parent
							
								
									0bdfc24abd
								
							
						
					
					
						commit
						e139cf0c6b
					
				
					 5 changed files with 86 additions and 14 deletions
				
			
		|  | @ -3,7 +3,7 @@ | |||
|  */ | ||||
| 
 | ||||
| use std::iter::Peekable; | ||||
| use states::*; | ||||
| use states::{Begin, Resume, StateResult}; | ||||
| 
 | ||||
| mod chars; | ||||
| mod error; | ||||
|  | @ -16,8 +16,12 @@ pub use token::{Lex, Token}; | |||
| pub type Result = std::result::Result<Lex, Error>; | ||||
| 
 | ||||
| pub struct Lexer<T> where T: Iterator<Item=char> { | ||||
|     /// The input stream.
 | ||||
|     input: Peekable<T>, | ||||
| 
 | ||||
|     /// Current line number.
 | ||||
|     line: usize, | ||||
|     /// Character offset from the start of the input.
 | ||||
|     offset: usize, | ||||
| } | ||||
| 
 | ||||
|  | @ -32,15 +36,28 @@ impl<T> Lexer<T> where T: Iterator<Item=char> { | |||
| 
 | ||||
|     fn next(&mut self) -> Option<T::Item> { | ||||
|         let out = self.input.next(); | ||||
|         if let Some(c) = out { | ||||
|             self.update_offsets(c); | ||||
|         } | ||||
|         out | ||||
|     } | ||||
| 
 | ||||
|     fn handle_whitespace(&mut self, c: char) { | ||||
|         if c == '\n' { | ||||
|             self.line += 1; | ||||
|             self.offset = 1; | ||||
|         } else { | ||||
|             self.offset += 1; | ||||
|     fn handle_error(&self, err: Error) { | ||||
|         panic!("{}:{}: {}", self.line, self.offset, err.msg()) | ||||
|     } | ||||
| 
 | ||||
|     fn prepare_offsets(&mut self) { } | ||||
| 
 | ||||
|     fn update_offsets(&mut self, c: char) { | ||||
|         self.offset += 1; | ||||
|         match c { | ||||
|             '\n' => { | ||||
|                 self.line += 1; | ||||
|                 self.offset = 0; | ||||
|             }, | ||||
|             _ => { | ||||
|                 self.offset += 1; | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | @ -49,8 +66,10 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> { | |||
|     type Item = Result; | ||||
| 
 | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         self.prepare_offsets(); | ||||
| 
 | ||||
|         let mut buffer = String::new(); | ||||
|         let mut state: Box<states::State> = Box::new(states::Begin{}); | ||||
|         let mut state: Box<states::State> = Box::new(Begin::new()); | ||||
|         let mut out: Option<Self::Item> = None; | ||||
|         loop { | ||||
|             let peek = self.input.peek().map(char::clone); | ||||
|  | @ -62,7 +81,7 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> { | |||
|                         out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset))); | ||||
|                         break; | ||||
|                     }, | ||||
|                     Err(err) => panic!("{}:{}: {}", self.line, self.offset, err.msg()) | ||||
|                     Err(err) => self.handle_error(err) | ||||
|                 }, | ||||
|                 Some(c) => { | ||||
|                     let result = state.lex(c); | ||||
|  | @ -76,6 +95,13 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> { | |||
|                             self.next(); | ||||
|                             state = to; | ||||
|                         }, | ||||
|                         StateResult::Discard(resume) => { | ||||
|                             buffer.clear(); | ||||
|                             state = Box::new(Begin::new()); | ||||
|                             if resume == Resume::AtNext { | ||||
|                                 self.next(); | ||||
|                             } | ||||
|                         }, | ||||
|                         StateResult::Emit(token, resume) => { | ||||
|                             if resume == Resume::AtNext { | ||||
|                                 buffer.push(c); | ||||
|  | @ -84,9 +110,7 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> { | |||
|                             out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset))); | ||||
|                             break; | ||||
|                         }, | ||||
|                         StateResult::Fail(err) => { | ||||
|                             panic!("{}:{}: {}", self.line, self.offset, err.msg()); | ||||
|                         } | ||||
|                         StateResult::Fail(err) => self.handle_error(err), | ||||
|                     } | ||||
|                 }, | ||||
|             } | ||||
|  |  | |||
|  | @ -9,13 +9,22 @@ use states::{Resume, State, StateResult}; | |||
| use states::id::IdSub; | ||||
| use states::hash::Hash; | ||||
| use states::number::{Builder, Digit}; | ||||
| use states::whitespace::Whitespace; | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| pub struct Begin; | ||||
| 
 | ||||
| impl Begin { | ||||
|     pub fn new() -> Begin { | ||||
|         Begin{} | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl State for Begin { | ||||
|     fn lex(&mut self, c: char) -> StateResult { | ||||
|         if c.is_left_paren() { | ||||
|         if c.is_whitespace() { | ||||
|             StateResult::advance(Box::new(Whitespace::new())) | ||||
|         } else if c.is_left_paren() { | ||||
|             StateResult::Emit(Token::LeftParen, Resume::AtNext) | ||||
|         } else if c.is_right_paren() { | ||||
|             StateResult::Emit(Token::RightParen, Resume::AtNext) | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ mod bool; | |||
| mod hash; | ||||
| mod number; | ||||
| mod id; | ||||
| mod whitespace; | ||||
| 
 | ||||
| pub use self::begin::Begin; | ||||
| 
 | ||||
|  | @ -20,6 +21,8 @@ pub enum StateResult { | |||
|     Continue, | ||||
|     /// Consume the character, advance to the provided state.
 | ||||
|     Advance { to: Box<State> }, | ||||
|     /// Discard the input consumed to this point. Resume as specified.
 | ||||
|     Discard(Resume), | ||||
|     /// Emit a Lex with the provided Token and the accumulated buffer. The Resume value indicates
 | ||||
|     /// whether to revisit the current input character or advance to the next one.
 | ||||
|     Emit(Token, Resume), | ||||
|  |  | |||
							
								
								
									
										30
									
								
								lexer/src/states/whitespace.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								lexer/src/states/whitespace.rs
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| /* lexer/src/states/whitespace.rs
 | ||||
|  * Eryn Wells <eryn@erynwells.me> | ||||
|  */ | ||||
| 
 | ||||
| use error::Error; | ||||
| use states::{Resume, State, StateResult}; | ||||
| use token::Token; | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| pub struct Whitespace; | ||||
| 
 | ||||
| impl Whitespace { | ||||
|     pub fn new() -> Whitespace { | ||||
|         Whitespace{} | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl State for Whitespace { | ||||
|     fn lex(&mut self, c: char) -> StateResult { | ||||
|         if c.is_whitespace() { | ||||
|             StateResult::Continue | ||||
|         } else { | ||||
|             StateResult::Discard(Resume::Here) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn none(&mut self) -> Result<Option<Token>, Error> { | ||||
|         Ok(None) | ||||
|     } | ||||
| } | ||||
|  | @ -2,7 +2,7 @@ | |||
|  * Eryn Wells <eryn@erynwells.me> | ||||
|  */ | ||||
| 
 | ||||
| #[derive(Debug, Eq, PartialEq)] | ||||
| #[derive(Debug, Eq)] | ||||
| pub struct Lex { | ||||
|     token: Token, | ||||
|     value: String, | ||||
|  | @ -32,3 +32,9 @@ impl Lex { | |||
|     pub fn token(&self) -> Token { self.token } | ||||
|     pub fn value(&self) -> &str { self.value.as_str() } | ||||
| } | ||||
| 
 | ||||
| impl PartialEq for Lex { | ||||
|     fn eq(&self, rhs: &Lex) -> bool { | ||||
|         self.token == rhs.token && self.value == rhs.value | ||||
|     } | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue