parent
							
								
									0bdfc24abd
								
							
						
					
					
						commit
						e139cf0c6b
					
				
					 5 changed files with 86 additions and 14 deletions
				
			
		| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use std::iter::Peekable;
 | 
					use std::iter::Peekable;
 | 
				
			||||||
use states::*;
 | 
					use states::{Begin, Resume, StateResult};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
mod chars;
 | 
					mod chars;
 | 
				
			||||||
mod error;
 | 
					mod error;
 | 
				
			||||||
| 
						 | 
					@ -16,8 +16,12 @@ pub use token::{Lex, Token};
 | 
				
			||||||
pub type Result = std::result::Result<Lex, Error>;
 | 
					pub type Result = std::result::Result<Lex, Error>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub struct Lexer<T> where T: Iterator<Item=char> {
 | 
					pub struct Lexer<T> where T: Iterator<Item=char> {
 | 
				
			||||||
 | 
					    /// The input stream.
 | 
				
			||||||
    input: Peekable<T>,
 | 
					    input: Peekable<T>,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Current line number.
 | 
				
			||||||
    line: usize,
 | 
					    line: usize,
 | 
				
			||||||
 | 
					    /// Character offset from the start of the input.
 | 
				
			||||||
    offset: usize,
 | 
					    offset: usize,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -32,15 +36,28 @@ impl<T> Lexer<T> where T: Iterator<Item=char> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn next(&mut self) -> Option<T::Item> {
 | 
					    fn next(&mut self) -> Option<T::Item> {
 | 
				
			||||||
        let out = self.input.next();
 | 
					        let out = self.input.next();
 | 
				
			||||||
 | 
					        if let Some(c) = out {
 | 
				
			||||||
 | 
					            self.update_offsets(c);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        out
 | 
					        out
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn handle_whitespace(&mut self, c: char) {
 | 
					    fn handle_error(&self, err: Error) {
 | 
				
			||||||
        if c == '\n' {
 | 
					        panic!("{}:{}: {}", self.line, self.offset, err.msg())
 | 
				
			||||||
            self.line += 1;
 | 
					    }
 | 
				
			||||||
            self.offset = 1;
 | 
					
 | 
				
			||||||
        } else {
 | 
					    fn prepare_offsets(&mut self) { }
 | 
				
			||||||
            self.offset += 1;
 | 
					
 | 
				
			||||||
 | 
					    fn update_offsets(&mut self, c: char) {
 | 
				
			||||||
 | 
					        self.offset += 1;
 | 
				
			||||||
 | 
					        match c {
 | 
				
			||||||
 | 
					            '\n' => {
 | 
				
			||||||
 | 
					                self.line += 1;
 | 
				
			||||||
 | 
					                self.offset = 0;
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            _ => {
 | 
				
			||||||
 | 
					                self.offset += 1;
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -49,8 +66,10 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
 | 
				
			||||||
    type Item = Result;
 | 
					    type Item = Result;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn next(&mut self) -> Option<Self::Item> {
 | 
					    fn next(&mut self) -> Option<Self::Item> {
 | 
				
			||||||
 | 
					        self.prepare_offsets();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let mut buffer = String::new();
 | 
					        let mut buffer = String::new();
 | 
				
			||||||
        let mut state: Box<states::State> = Box::new(states::Begin{});
 | 
					        let mut state: Box<states::State> = Box::new(Begin::new());
 | 
				
			||||||
        let mut out: Option<Self::Item> = None;
 | 
					        let mut out: Option<Self::Item> = None;
 | 
				
			||||||
        loop {
 | 
					        loop {
 | 
				
			||||||
            let peek = self.input.peek().map(char::clone);
 | 
					            let peek = self.input.peek().map(char::clone);
 | 
				
			||||||
| 
						 | 
					@ -62,7 +81,7 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
 | 
				
			||||||
                        out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset)));
 | 
					                        out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset)));
 | 
				
			||||||
                        break;
 | 
					                        break;
 | 
				
			||||||
                    },
 | 
					                    },
 | 
				
			||||||
                    Err(err) => panic!("{}:{}: {}", self.line, self.offset, err.msg())
 | 
					                    Err(err) => self.handle_error(err)
 | 
				
			||||||
                },
 | 
					                },
 | 
				
			||||||
                Some(c) => {
 | 
					                Some(c) => {
 | 
				
			||||||
                    let result = state.lex(c);
 | 
					                    let result = state.lex(c);
 | 
				
			||||||
| 
						 | 
					@ -76,6 +95,13 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
 | 
				
			||||||
                            self.next();
 | 
					                            self.next();
 | 
				
			||||||
                            state = to;
 | 
					                            state = to;
 | 
				
			||||||
                        },
 | 
					                        },
 | 
				
			||||||
 | 
					                        StateResult::Discard(resume) => {
 | 
				
			||||||
 | 
					                            buffer.clear();
 | 
				
			||||||
 | 
					                            state = Box::new(Begin::new());
 | 
				
			||||||
 | 
					                            if resume == Resume::AtNext {
 | 
				
			||||||
 | 
					                                self.next();
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
                        StateResult::Emit(token, resume) => {
 | 
					                        StateResult::Emit(token, resume) => {
 | 
				
			||||||
                            if resume == Resume::AtNext {
 | 
					                            if resume == Resume::AtNext {
 | 
				
			||||||
                                buffer.push(c);
 | 
					                                buffer.push(c);
 | 
				
			||||||
| 
						 | 
					@ -84,9 +110,7 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
 | 
				
			||||||
                            out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset)));
 | 
					                            out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset)));
 | 
				
			||||||
                            break;
 | 
					                            break;
 | 
				
			||||||
                        },
 | 
					                        },
 | 
				
			||||||
                        StateResult::Fail(err) => {
 | 
					                        StateResult::Fail(err) => self.handle_error(err),
 | 
				
			||||||
                            panic!("{}:{}: {}", self.line, self.offset, err.msg());
 | 
					 | 
				
			||||||
                        }
 | 
					 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                },
 | 
					                },
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9,13 +9,22 @@ use states::{Resume, State, StateResult};
 | 
				
			||||||
use states::id::IdSub;
 | 
					use states::id::IdSub;
 | 
				
			||||||
use states::hash::Hash;
 | 
					use states::hash::Hash;
 | 
				
			||||||
use states::number::{Builder, Digit};
 | 
					use states::number::{Builder, Digit};
 | 
				
			||||||
 | 
					use states::whitespace::Whitespace;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Debug)]
 | 
					#[derive(Debug)]
 | 
				
			||||||
pub struct Begin;
 | 
					pub struct Begin;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Begin {
 | 
				
			||||||
 | 
					    pub fn new() -> Begin {
 | 
				
			||||||
 | 
					        Begin{}
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl State for Begin {
 | 
					impl State for Begin {
 | 
				
			||||||
    fn lex(&mut self, c: char) -> StateResult {
 | 
					    fn lex(&mut self, c: char) -> StateResult {
 | 
				
			||||||
        if c.is_left_paren() {
 | 
					        if c.is_whitespace() {
 | 
				
			||||||
 | 
					            StateResult::advance(Box::new(Whitespace::new()))
 | 
				
			||||||
 | 
					        } else if c.is_left_paren() {
 | 
				
			||||||
            StateResult::Emit(Token::LeftParen, Resume::AtNext)
 | 
					            StateResult::Emit(Token::LeftParen, Resume::AtNext)
 | 
				
			||||||
        } else if c.is_right_paren() {
 | 
					        } else if c.is_right_paren() {
 | 
				
			||||||
            StateResult::Emit(Token::RightParen, Resume::AtNext)
 | 
					            StateResult::Emit(Token::RightParen, Resume::AtNext)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,6 +11,7 @@ mod bool;
 | 
				
			||||||
mod hash;
 | 
					mod hash;
 | 
				
			||||||
mod number;
 | 
					mod number;
 | 
				
			||||||
mod id;
 | 
					mod id;
 | 
				
			||||||
 | 
					mod whitespace;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub use self::begin::Begin;
 | 
					pub use self::begin::Begin;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -20,6 +21,8 @@ pub enum StateResult {
 | 
				
			||||||
    Continue,
 | 
					    Continue,
 | 
				
			||||||
    /// Consume the character, advance to the provided state.
 | 
					    /// Consume the character, advance to the provided state.
 | 
				
			||||||
    Advance { to: Box<State> },
 | 
					    Advance { to: Box<State> },
 | 
				
			||||||
 | 
					    /// Discard the input consumed to this point. Resume as specified.
 | 
				
			||||||
 | 
					    Discard(Resume),
 | 
				
			||||||
    /// Emit a Lex with the provided Token and the accumulated buffer. The Resume value indicates
 | 
					    /// Emit a Lex with the provided Token and the accumulated buffer. The Resume value indicates
 | 
				
			||||||
    /// whether to revisit the current input character or advance to the next one.
 | 
					    /// whether to revisit the current input character or advance to the next one.
 | 
				
			||||||
    Emit(Token, Resume),
 | 
					    Emit(Token, Resume),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										30
									
								
								lexer/src/states/whitespace.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								lexer/src/states/whitespace.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,30 @@
 | 
				
			||||||
 | 
					/* lexer/src/states/whitespace.rs
 | 
				
			||||||
 | 
					 * Eryn Wells <eryn@erynwells.me>
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use error::Error;
 | 
				
			||||||
 | 
					use states::{Resume, State, StateResult};
 | 
				
			||||||
 | 
					use token::Token;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub struct Whitespace;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Whitespace {
 | 
				
			||||||
 | 
					    pub fn new() -> Whitespace {
 | 
				
			||||||
 | 
					        Whitespace{}
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl State for Whitespace {
 | 
				
			||||||
 | 
					    fn lex(&mut self, c: char) -> StateResult {
 | 
				
			||||||
 | 
					        if c.is_whitespace() {
 | 
				
			||||||
 | 
					            StateResult::Continue
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            StateResult::Discard(Resume::Here)
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn none(&mut self) -> Result<Option<Token>, Error> {
 | 
				
			||||||
 | 
					        Ok(None)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,7 @@
 | 
				
			||||||
 * Eryn Wells <eryn@erynwells.me>
 | 
					 * Eryn Wells <eryn@erynwells.me>
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Debug, Eq, PartialEq)]
 | 
					#[derive(Debug, Eq)]
 | 
				
			||||||
pub struct Lex {
 | 
					pub struct Lex {
 | 
				
			||||||
    token: Token,
 | 
					    token: Token,
 | 
				
			||||||
    value: String,
 | 
					    value: String,
 | 
				
			||||||
| 
						 | 
					@ -32,3 +32,9 @@ impl Lex {
 | 
				
			||||||
    pub fn token(&self) -> Token { self.token }
 | 
					    pub fn token(&self) -> Token { self.token }
 | 
				
			||||||
    pub fn value(&self) -> &str { self.value.as_str() }
 | 
					    pub fn value(&self) -> &str { self.value.as_str() }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl PartialEq for Lex {
 | 
				
			||||||
 | 
					    fn eq(&self, rhs: &Lex) -> bool {
 | 
				
			||||||
 | 
					        self.token == rhs.token && self.value == rhs.value
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue