[lexer] Lex and discard whitespace

Closes #12.
This commit is contained in:
Eryn Wells 2018-09-04 17:58:45 -07:00
parent 0bdfc24abd
commit e139cf0c6b
5 changed files with 86 additions and 14 deletions

View file

@ -3,7 +3,7 @@
*/ */
use std::iter::Peekable; use std::iter::Peekable;
use states::*; use states::{Begin, Resume, StateResult};
mod chars; mod chars;
mod error; mod error;
@ -16,8 +16,12 @@ pub use token::{Lex, Token};
pub type Result = std::result::Result<Lex, Error>; pub type Result = std::result::Result<Lex, Error>;
pub struct Lexer<T> where T: Iterator<Item=char> { pub struct Lexer<T> where T: Iterator<Item=char> {
/// The input stream.
input: Peekable<T>, input: Peekable<T>,
/// Current line number.
line: usize, line: usize,
/// Character offset from the start of the input.
offset: usize, offset: usize,
} }
@ -32,15 +36,28 @@ impl<T> Lexer<T> where T: Iterator<Item=char> {
fn next(&mut self) -> Option<T::Item> { fn next(&mut self) -> Option<T::Item> {
let out = self.input.next(); let out = self.input.next();
if let Some(c) = out {
self.update_offsets(c);
}
out out
} }
fn handle_whitespace(&mut self, c: char) { fn handle_error(&self, err: Error) {
if c == '\n' { panic!("{}:{}: {}", self.line, self.offset, err.msg())
self.line += 1; }
self.offset = 1;
} else { fn prepare_offsets(&mut self) { }
fn update_offsets(&mut self, c: char) {
self.offset += 1; self.offset += 1;
match c {
'\n' => {
self.line += 1;
self.offset = 0;
},
_ => {
self.offset += 1;
},
} }
} }
} }
@ -49,8 +66,10 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
type Item = Result; type Item = Result;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.prepare_offsets();
let mut buffer = String::new(); let mut buffer = String::new();
let mut state: Box<states::State> = Box::new(states::Begin{}); let mut state: Box<states::State> = Box::new(Begin::new());
let mut out: Option<Self::Item> = None; let mut out: Option<Self::Item> = None;
loop { loop {
let peek = self.input.peek().map(char::clone); let peek = self.input.peek().map(char::clone);
@ -62,7 +81,7 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset))); out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset)));
break; break;
}, },
Err(err) => panic!("{}:{}: {}", self.line, self.offset, err.msg()) Err(err) => self.handle_error(err)
}, },
Some(c) => { Some(c) => {
let result = state.lex(c); let result = state.lex(c);
@ -76,6 +95,13 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
self.next(); self.next();
state = to; state = to;
}, },
StateResult::Discard(resume) => {
buffer.clear();
state = Box::new(Begin::new());
if resume == Resume::AtNext {
self.next();
}
},
StateResult::Emit(token, resume) => { StateResult::Emit(token, resume) => {
if resume == Resume::AtNext { if resume == Resume::AtNext {
buffer.push(c); buffer.push(c);
@ -84,9 +110,7 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset))); out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset)));
break; break;
}, },
StateResult::Fail(err) => { StateResult::Fail(err) => self.handle_error(err),
panic!("{}:{}: {}", self.line, self.offset, err.msg());
}
} }
}, },
} }

View file

@ -9,13 +9,22 @@ use states::{Resume, State, StateResult};
use states::id::IdSub; use states::id::IdSub;
use states::hash::Hash; use states::hash::Hash;
use states::number::{Builder, Digit}; use states::number::{Builder, Digit};
use states::whitespace::Whitespace;
#[derive(Debug)] #[derive(Debug)]
pub struct Begin; pub struct Begin;
impl Begin {
pub fn new() -> Begin {
Begin{}
}
}
impl State for Begin { impl State for Begin {
fn lex(&mut self, c: char) -> StateResult { fn lex(&mut self, c: char) -> StateResult {
if c.is_left_paren() { if c.is_whitespace() {
StateResult::advance(Box::new(Whitespace::new()))
} else if c.is_left_paren() {
StateResult::Emit(Token::LeftParen, Resume::AtNext) StateResult::Emit(Token::LeftParen, Resume::AtNext)
} else if c.is_right_paren() { } else if c.is_right_paren() {
StateResult::Emit(Token::RightParen, Resume::AtNext) StateResult::Emit(Token::RightParen, Resume::AtNext)

View file

@ -11,6 +11,7 @@ mod bool;
mod hash; mod hash;
mod number; mod number;
mod id; mod id;
mod whitespace;
pub use self::begin::Begin; pub use self::begin::Begin;
@ -20,6 +21,8 @@ pub enum StateResult {
Continue, Continue,
/// Consume the character, advance to the provided state. /// Consume the character, advance to the provided state.
Advance { to: Box<State> }, Advance { to: Box<State> },
/// Discard the input consumed to this point. Resume as specified.
Discard(Resume),
/// Emit a Lex with the provided Token and the accumulated buffer. The Resume value indicates /// Emit a Lex with the provided Token and the accumulated buffer. The Resume value indicates
/// whether to revisit the current input character or advance to the next one. /// whether to revisit the current input character or advance to the next one.
Emit(Token, Resume), Emit(Token, Resume),

View file

@ -0,0 +1,30 @@
/* lexer/src/states/whitespace.rs
* Eryn Wells <eryn@erynwells.me>
*/
use error::Error;
use states::{Resume, State, StateResult};
use token::Token;
#[derive(Debug)]
pub struct Whitespace;
impl Whitespace {
pub fn new() -> Whitespace {
Whitespace{}
}
}
impl State for Whitespace {
fn lex(&mut self, c: char) -> StateResult {
if c.is_whitespace() {
StateResult::Continue
} else {
StateResult::Discard(Resume::Here)
}
}
fn none(&mut self) -> Result<Option<Token>, Error> {
Ok(None)
}
}

View file

@ -2,7 +2,7 @@
* Eryn Wells <eryn@erynwells.me> * Eryn Wells <eryn@erynwells.me>
*/ */
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq)]
pub struct Lex { pub struct Lex {
token: Token, token: Token,
value: String, value: String,
@ -32,3 +32,9 @@ impl Lex {
pub fn token(&self) -> Token { self.token } pub fn token(&self) -> Token { self.token }
pub fn value(&self) -> &str { self.value.as_str() } pub fn value(&self) -> &str { self.value.as_str() }
} }
impl PartialEq for Lex {
fn eq(&self, rhs: &Lex) -> bool {
self.token == rhs.token && self.value == rhs.value
}
}