diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 70de94f..4309278 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -2,29 +2,19 @@ * Eryn Wells */ -use std::iter::Peekable; -use chars::Lexable; - mod chars; mod error; +mod states; mod token; pub use error::Error; pub use token::{Lex, Token}; +use std::iter::Peekable; +use states::*; + pub type Result = std::result::Result; -#[derive(Debug, Eq, PartialEq)] -enum Resume { Here, AtNext } - -#[derive(Debug, Eq, PartialEq)] -enum IterationResult { - Finish, - Continue, - Emit(Token, Resume), - Error(Error), -} - pub struct Lexer where T: Iterator { input: Peekable, line: usize, @@ -39,14 +29,6 @@ impl Lexer where T: Iterator { offset: 0 } } - - fn emit(&self, token: Token, resume: Resume) -> IterationResult { - IterationResult::Emit(token, resume) - } - - fn fail(&self, msg: String) -> IterationResult { - IterationResult::Error(Error::new(msg)) - } } impl Lexer where T: Iterator { @@ -66,59 +48,48 @@ impl Iterator for Lexer where T: Iterator { fn next(&mut self) -> Option { let mut buffer = String::new(); + let mut state: Box = Box::new(states::Begin{}); + let mut out: Option = None; loop { let peek = self.input.peek().map(char::clone); - let result = if buffer.is_empty() { - match peek { - Some(c) if c.is_left_paren() => { - buffer.push(c); - self.emit(Token::LeftParen, Resume::AtNext) + println!("lexing {:?} in state {:?}, buffer = {:?}", peek, state, buffer); + match peek { + // TODO: Give the current state a chance to react. + None => match state.none() { + Ok(None) => break, + Ok(Some(token)) => { + out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset))); + break; }, - Some(c) if c.is_right_paren() => { - buffer.push(c); - self.emit(Token::RightParen, Resume::AtNext) - }, - Some(c) if c.is_whitespace() => { - self.handle_whitespace(c); - IterationResult::Continue - }, - Some(c) if c.is_identifier_initial() => { - buffer.push(c); - IterationResult::Continue - }, - Some(c) => self.fail(format!("Invalid character: {}", c)), - // We found EOF and there's no pending string, so just finish. - None => IterationResult::Finish, + Err(msg) => panic!("{}", msg) + }, + Some(c) => { + let result = state.lex(c); + match result { + StateResult::Continue => { + buffer.push(c); + self.input.next(); + }, + StateResult::Advance { to } => { + buffer.push(c); + self.input.next(); + state = to; + }, + StateResult::Emit(token, resume) => { + if resume == Resume::AtNext { + buffer.push(c); + self.input.next(); + } + out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset))); + break; + }, + StateResult::Fail { msg } => { + panic!("{}", msg); + } + } } } - else { - match peek { - Some(c) if c.is_identifier_subsequent() => { - buffer.push(c); - IterationResult::Continue - } - Some(c) if c.is_identifier_delimiter() => - self.emit(Token::Id, Resume::Here), - Some(c) => self.fail(format!("Invalid character: {}", c)), - // Found EOF. Emit what we have and finish. - // Note: the Resume argument doesn't matter in this case since the input - // iterator will always be None from here on. - None => self.emit(Token::Id, Resume::Here), - } - }; - match result { - IterationResult::Finish => break, - IterationResult::Continue => self.input.next(), - IterationResult::Emit(token, resume) => { - if resume == Resume::AtNext { - self.input.next(); - } - let lex = Lex::new(token, &buffer, self.line, self.offset); - return Some(Ok(lex)) - }, - IterationResult::Error(err) => return Some(Err(err)), - }; } - None + out } } diff --git a/lexer/src/states/begin.rs b/lexer/src/states/begin.rs new file mode 100644 index 0000000..3d25b1a --- /dev/null +++ b/lexer/src/states/begin.rs @@ -0,0 +1,31 @@ +/* lexer/src/states/begin.rs + * Eryn Wells + */ + +use chars::Lexable; +use token::Token; +use states::{Resume, State, StateResult}; +use states::id::IdSub; + +#[derive(Debug)] +pub struct Begin; + +impl State for Begin { + fn lex(&mut self, c: char) -> StateResult { + match c { + c if c.is_left_paren() => StateResult::Emit(Token::LeftParen, Resume::AtNext), + c if c.is_right_paren() => StateResult::Emit(Token::RightParen, Resume::AtNext), + // TODO: Figure out some way to track newlines. + c if c.is_whitespace() => StateResult::Continue, + c if c.is_identifier_initial() => StateResult::Advance { to: Box::new(IdSub{}) }, + _ => { + let msg = format!("Invalid character: {}", c); + StateResult::Fail { msg } + } + } + } + + fn none(&mut self) -> Result, String> { + Ok(None) + } +} diff --git a/lexer/src/states/id.rs b/lexer/src/states/id.rs new file mode 100644 index 0000000..0232c00 --- /dev/null +++ b/lexer/src/states/id.rs @@ -0,0 +1,27 @@ +/* lexer/src/states/id.rs + * Eryn Wells + */ + +use chars::Lexable; +use states::{Resume, State, StateResult}; +use token::Token; + +#[derive(Debug)] +pub struct IdSub; + +impl State for IdSub { + fn lex(&mut self, c: char) -> StateResult { + match c { + c if c.is_identifier_subsequent() => StateResult::Continue, + c if c.is_identifier_delimiter() => StateResult::Emit(Token::Id, Resume::Here), + _ => { + let msg = format!("Invalid character: {}", c); + StateResult::Fail { msg } + } + } + } + + fn none(&mut self) -> Result, String> { + Ok(Some(Token::Id)) + } +} diff --git a/lexer/src/states/mod.rs b/lexer/src/states/mod.rs new file mode 100644 index 0000000..68ffa27 --- /dev/null +++ b/lexer/src/states/mod.rs @@ -0,0 +1,36 @@ +/* lexer/src/states/mod.rs + * Eryn Wells + */ + +mod begin; +mod id; + +pub use self::begin::Begin; + +use std::fmt::Debug; +use token::Token; + +#[derive(Debug)] +pub enum StateResult { + /// Consume the character, remain on this state. + Continue, + /// Consume the character, advance to the provided state. + Advance { to: Box }, + /// Emit a Lex with the provided Token and the accumulated buffer. The Resume value indicates + /// whether to revisit the current input character or advance to the next one. + Emit(Token, Resume), + Fail { msg: String } +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Resume { + /// Revisit the current input character in the next state. + Here, + /// Advance the input to the next character before beginning the next token. + AtNext +} + +pub trait State: Debug { + fn lex(&mut self, c: char) -> StateResult; + fn none(&mut self) -> Result, String>; +}