Move all the lexer stuff to a module directory
This commit is contained in:
		
							parent
							
								
									d333819dee
								
							
						
					
					
						commit
						d4dee92904
					
				
					 5 changed files with 97 additions and 92 deletions
				
			
		
							
								
								
									
										31
									
								
								src/lexer/char.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								src/lexer/char.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,31 @@
 | 
				
			||||||
 | 
					/* char.rs
 | 
				
			||||||
 | 
					 * Eryn Wells <eryn@erynwells.me>
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use lexer::charset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub trait Lexable {
 | 
				
			||||||
 | 
					    fn is_left_paren(&self) -> bool;
 | 
				
			||||||
 | 
					    fn is_right_paren(&self) -> bool;
 | 
				
			||||||
 | 
					    fn is_identifier_initial(&self) -> bool;
 | 
				
			||||||
 | 
					    fn is_identifier_subsequent(&self) -> bool;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Lexable for char {
 | 
				
			||||||
 | 
					    fn is_left_paren(&self) -> bool {
 | 
				
			||||||
 | 
					        self == &'('
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn is_right_paren(&self) -> bool {
 | 
				
			||||||
 | 
					        self == &')'
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn is_identifier_initial(&self) -> bool {
 | 
				
			||||||
 | 
					        charset::identifier_initials().contains(&self)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn is_identifier_subsequent(&self) -> bool {
 | 
				
			||||||
 | 
					        charset::identifier_subsequents().contains(&self)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										43
									
								
								src/lexer/charset.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								src/lexer/charset.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,43 @@
 | 
				
			||||||
 | 
					/// Character Sets
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// Sets of characters valid for making up tokens.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use std::collections::HashSet;
 | 
				
			||||||
 | 
					use std::iter::FromIterator;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub type CharSet = HashSet<char>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO: Use std::sync::Once for these sets?
 | 
				
			||||||
 | 
					// https://doc.rust-lang.org/beta/std/sync/struct.Once.html
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn ascii_letters() -> CharSet {
 | 
				
			||||||
 | 
					    let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".chars();
 | 
				
			||||||
 | 
					    CharSet::from_iter(letters)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn ascii_digits() -> CharSet {
 | 
				
			||||||
 | 
					    let digits = "1234567890".chars();
 | 
				
			||||||
 | 
					    CharSet::from_iter(digits)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// A set of all characters allowed to start Scheme identifiers.
 | 
				
			||||||
 | 
					pub fn identifier_initials() -> CharSet {
 | 
				
			||||||
 | 
					    let letters = ascii_letters();
 | 
				
			||||||
 | 
					    let extras = CharSet::from_iter("!$%&*/:<=>?~_^".chars());
 | 
				
			||||||
 | 
					    let mut initials = CharSet::new();
 | 
				
			||||||
 | 
					    initials.extend(letters.iter());
 | 
				
			||||||
 | 
					    initials.extend(extras.iter());
 | 
				
			||||||
 | 
					    initials
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// A set of all characters allowed to follow an identifier initial.
 | 
				
			||||||
 | 
					pub fn identifier_subsequents() -> CharSet {
 | 
				
			||||||
 | 
					    let initials = identifier_initials();
 | 
				
			||||||
 | 
					    let digits = ascii_digits();
 | 
				
			||||||
 | 
					    let extras = CharSet::from_iter(".+-".chars());
 | 
				
			||||||
 | 
					    let mut subsequents = CharSet::new();
 | 
				
			||||||
 | 
					    subsequents.extend(initials.iter());
 | 
				
			||||||
 | 
					    subsequents.extend(digits.iter());
 | 
				
			||||||
 | 
					    subsequents.extend(extras.iter());
 | 
				
			||||||
 | 
					    subsequents
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1,11 +1,15 @@
 | 
				
			||||||
//! # Lexer
 | 
					//! # Lexer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub mod token;
 | 
					pub mod token;
 | 
				
			||||||
 | 
					mod char;
 | 
				
			||||||
 | 
					mod charset;
 | 
				
			||||||
 | 
					mod str;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use characters;
 | 
					use self::char::Lexable;
 | 
				
			||||||
use characters::CharAt;
 | 
					use self::str::CharAt;
 | 
				
			||||||
use characters::Lexable;
 | 
					use self::str::RelativeIndexable;
 | 
				
			||||||
use characters::RelativeIndexable;
 | 
					use self::token::Token;
 | 
				
			||||||
 | 
					use self::token::Kind;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum State {
 | 
					enum State {
 | 
				
			||||||
    Initial,
 | 
					    Initial,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,79 +1,6 @@
 | 
				
			||||||
//! Characters
 | 
					/* str.rs
 | 
				
			||||||
//!
 | 
					 * Eryn Wells <eryn@erynwells.me>
 | 
				
			||||||
//! Utilities for dealing with chars of various sorts.
 | 
					 */
 | 
				
			||||||
 | 
					 | 
				
			||||||
use std::collections::HashSet;
 | 
					 | 
				
			||||||
use std::iter::FromIterator;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub type CharSet = HashSet<char>;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// TODO: Use std::sync::Once for these sets?
 | 
					 | 
				
			||||||
// https://doc.rust-lang.org/beta/std/sync/struct.Once.html
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn ascii_letters() -> CharSet {
 | 
					 | 
				
			||||||
    let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".chars();
 | 
					 | 
				
			||||||
    CharSet::from_iter(letters)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn ascii_digits() -> CharSet {
 | 
					 | 
				
			||||||
    let digits = "1234567890".chars();
 | 
					 | 
				
			||||||
    CharSet::from_iter(digits)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/// A set of all characters allowed to start Scheme identifiers.
 | 
					 | 
				
			||||||
pub fn identifier_initials() -> CharSet {
 | 
					 | 
				
			||||||
    let letters = ascii_letters();
 | 
					 | 
				
			||||||
    let extras = CharSet::from_iter("!$%&*/:<=>?~_^".chars());
 | 
					 | 
				
			||||||
    let mut initials = CharSet::new();
 | 
					 | 
				
			||||||
    initials.extend(letters.iter());
 | 
					 | 
				
			||||||
    initials.extend(extras.iter());
 | 
					 | 
				
			||||||
    initials
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/// A set of all characters allowed to follow an identifier initial.
 | 
					 | 
				
			||||||
pub fn identifier_subsequents() -> CharSet {
 | 
					 | 
				
			||||||
    let initials = identifier_initials();
 | 
					 | 
				
			||||||
    let digits = ascii_digits();
 | 
					 | 
				
			||||||
    let extras = CharSet::from_iter(".+-".chars());
 | 
					 | 
				
			||||||
    let mut subsequents = CharSet::new();
 | 
					 | 
				
			||||||
    subsequents.extend(initials.iter());
 | 
					 | 
				
			||||||
    subsequents.extend(digits.iter());
 | 
					 | 
				
			||||||
    subsequents.extend(extras.iter());
 | 
					 | 
				
			||||||
    subsequents
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
// char
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub trait Lexable {
 | 
					 | 
				
			||||||
    fn is_left_paren(&self) -> bool;
 | 
					 | 
				
			||||||
    fn is_right_paren(&self) -> bool;
 | 
					 | 
				
			||||||
    fn is_identifier_initial(&self) -> bool;
 | 
					 | 
				
			||||||
    fn is_identifier_subsequent(&self) -> bool;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
impl Lexable for char {
 | 
					 | 
				
			||||||
    fn is_left_paren(&self) -> bool {
 | 
					 | 
				
			||||||
        self == &'('
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fn is_right_paren(&self) -> bool {
 | 
					 | 
				
			||||||
        self == &')'
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fn is_identifier_initial(&self) -> bool {
 | 
					 | 
				
			||||||
        identifier_initials().contains(&self)
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fn is_identifier_subsequent(&self) -> bool {
 | 
					 | 
				
			||||||
        identifier_subsequents().contains(&self)
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
// str and String
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub trait RelativeIndexable {
 | 
					pub trait RelativeIndexable {
 | 
				
			||||||
    /// Get the index of the character boundary preceding the given index. The index does not need to be on a character
 | 
					    /// Get the index of the character boundary preceding the given index. The index does not need to be on a character
 | 
				
			||||||
| 
						 | 
					@ -124,6 +51,17 @@ impl RelativeIndexable for str {
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl CharAt for str {
 | 
				
			||||||
 | 
					    fn char_at(&self, index: usize) -> Option<char> {
 | 
				
			||||||
 | 
					        if !self.is_char_boundary(index) {
 | 
				
			||||||
 | 
					            return None;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        let end = self.index_after(index);
 | 
				
			||||||
 | 
					        let char_str = &self[index .. end];
 | 
				
			||||||
 | 
					        char_str.chars().nth(0)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[test]
 | 
					#[test]
 | 
				
			||||||
fn index_before_is_well_behaved_for_ascii() {
 | 
					fn index_before_is_well_behaved_for_ascii() {
 | 
				
			||||||
    let s = "abc";
 | 
					    let s = "abc";
 | 
				
			||||||
| 
						 | 
					@ -158,14 +96,3 @@ fn index_after_is_well_behaved_for_ascii() {
 | 
				
			||||||
        assert!(s.is_char_boundary(idx));
 | 
					        assert!(s.is_char_boundary(idx));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
impl CharAt for str {
 | 
					 | 
				
			||||||
    fn char_at(&self, index: usize) -> Option<char> {
 | 
					 | 
				
			||||||
        if !self.is_char_boundary(index) {
 | 
					 | 
				
			||||||
            return None;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        let end = self.index_after(index);
 | 
					 | 
				
			||||||
        let char_str = &self[index .. end];
 | 
					 | 
				
			||||||
        char_str.chars().nth(0)
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
| 
						 | 
					@ -14,7 +14,7 @@ pub struct Token {
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl Token {
 | 
					impl Token {
 | 
				
			||||||
    fn new(kind: Kind, value: String) -> Token {
 | 
					    pub fn new(kind: Kind, value: String) -> Token {
 | 
				
			||||||
        Token { kind: kind, value: value, }
 | 
					        Token { kind: kind, value: value, }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue