parent
0bdfc24abd
commit
e139cf0c6b
5 changed files with 86 additions and 14 deletions
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
use std::iter::Peekable;
|
||||
use states::*;
|
||||
use states::{Begin, Resume, StateResult};
|
||||
|
||||
mod chars;
|
||||
mod error;
|
||||
|
@ -16,8 +16,12 @@ pub use token::{Lex, Token};
|
|||
pub type Result = std::result::Result<Lex, Error>;
|
||||
|
||||
pub struct Lexer<T> where T: Iterator<Item=char> {
|
||||
/// The input stream.
|
||||
input: Peekable<T>,
|
||||
|
||||
/// Current line number.
|
||||
line: usize,
|
||||
/// Character offset from the start of the input.
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
|
@ -32,15 +36,28 @@ impl<T> Lexer<T> where T: Iterator<Item=char> {
|
|||
|
||||
fn next(&mut self) -> Option<T::Item> {
|
||||
let out = self.input.next();
|
||||
if let Some(c) = out {
|
||||
self.update_offsets(c);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn handle_whitespace(&mut self, c: char) {
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
self.offset = 1;
|
||||
} else {
|
||||
self.offset += 1;
|
||||
fn handle_error(&self, err: Error) {
|
||||
panic!("{}:{}: {}", self.line, self.offset, err.msg())
|
||||
}
|
||||
|
||||
fn prepare_offsets(&mut self) { }
|
||||
|
||||
fn update_offsets(&mut self, c: char) {
|
||||
self.offset += 1;
|
||||
match c {
|
||||
'\n' => {
|
||||
self.line += 1;
|
||||
self.offset = 0;
|
||||
},
|
||||
_ => {
|
||||
self.offset += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -49,8 +66,10 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
|
|||
type Item = Result;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.prepare_offsets();
|
||||
|
||||
let mut buffer = String::new();
|
||||
let mut state: Box<states::State> = Box::new(states::Begin{});
|
||||
let mut state: Box<states::State> = Box::new(Begin::new());
|
||||
let mut out: Option<Self::Item> = None;
|
||||
loop {
|
||||
let peek = self.input.peek().map(char::clone);
|
||||
|
@ -62,7 +81,7 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
|
|||
out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset)));
|
||||
break;
|
||||
},
|
||||
Err(err) => panic!("{}:{}: {}", self.line, self.offset, err.msg())
|
||||
Err(err) => self.handle_error(err)
|
||||
},
|
||||
Some(c) => {
|
||||
let result = state.lex(c);
|
||||
|
@ -76,6 +95,13 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
|
|||
self.next();
|
||||
state = to;
|
||||
},
|
||||
StateResult::Discard(resume) => {
|
||||
buffer.clear();
|
||||
state = Box::new(Begin::new());
|
||||
if resume == Resume::AtNext {
|
||||
self.next();
|
||||
}
|
||||
},
|
||||
StateResult::Emit(token, resume) => {
|
||||
if resume == Resume::AtNext {
|
||||
buffer.push(c);
|
||||
|
@ -84,9 +110,7 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
|
|||
out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset)));
|
||||
break;
|
||||
},
|
||||
StateResult::Fail(err) => {
|
||||
panic!("{}:{}: {}", self.line, self.offset, err.msg());
|
||||
}
|
||||
StateResult::Fail(err) => self.handle_error(err),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
|
|
@ -9,13 +9,22 @@ use states::{Resume, State, StateResult};
|
|||
use states::id::IdSub;
|
||||
use states::hash::Hash;
|
||||
use states::number::{Builder, Digit};
|
||||
use states::whitespace::Whitespace;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Begin;
|
||||
|
||||
impl Begin {
|
||||
pub fn new() -> Begin {
|
||||
Begin{}
|
||||
}
|
||||
}
|
||||
|
||||
impl State for Begin {
|
||||
fn lex(&mut self, c: char) -> StateResult {
|
||||
if c.is_left_paren() {
|
||||
if c.is_whitespace() {
|
||||
StateResult::advance(Box::new(Whitespace::new()))
|
||||
} else if c.is_left_paren() {
|
||||
StateResult::Emit(Token::LeftParen, Resume::AtNext)
|
||||
} else if c.is_right_paren() {
|
||||
StateResult::Emit(Token::RightParen, Resume::AtNext)
|
||||
|
|
|
@ -11,6 +11,7 @@ mod bool;
|
|||
mod hash;
|
||||
mod number;
|
||||
mod id;
|
||||
mod whitespace;
|
||||
|
||||
pub use self::begin::Begin;
|
||||
|
||||
|
@ -20,6 +21,8 @@ pub enum StateResult {
|
|||
Continue,
|
||||
/// Consume the character, advance to the provided state.
|
||||
Advance { to: Box<State> },
|
||||
/// Discard the input consumed to this point. Resume as specified.
|
||||
Discard(Resume),
|
||||
/// Emit a Lex with the provided Token and the accumulated buffer. The Resume value indicates
|
||||
/// whether to revisit the current input character or advance to the next one.
|
||||
Emit(Token, Resume),
|
||||
|
|
30
lexer/src/states/whitespace.rs
Normal file
30
lexer/src/states/whitespace.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* lexer/src/states/whitespace.rs
|
||||
* Eryn Wells <eryn@erynwells.me>
|
||||
*/
|
||||
|
||||
use error::Error;
|
||||
use states::{Resume, State, StateResult};
|
||||
use token::Token;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Whitespace;
|
||||
|
||||
impl Whitespace {
|
||||
pub fn new() -> Whitespace {
|
||||
Whitespace{}
|
||||
}
|
||||
}
|
||||
|
||||
impl State for Whitespace {
|
||||
fn lex(&mut self, c: char) -> StateResult {
|
||||
if c.is_whitespace() {
|
||||
StateResult::Continue
|
||||
} else {
|
||||
StateResult::Discard(Resume::Here)
|
||||
}
|
||||
}
|
||||
|
||||
fn none(&mut self) -> Result<Option<Token>, Error> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
|
@ -2,7 +2,7 @@
|
|||
* Eryn Wells <eryn@erynwells.me>
|
||||
*/
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
#[derive(Debug, Eq)]
|
||||
pub struct Lex {
|
||||
token: Token,
|
||||
value: String,
|
||||
|
@ -32,3 +32,9 @@ impl Lex {
|
|||
pub fn token(&self) -> Token { self.token }
|
||||
pub fn value(&self) -> &str { self.value.as_str() }
|
||||
}
|
||||
|
||||
impl PartialEq for Lex {
|
||||
fn eq(&self, rhs: &Lex) -> bool {
|
||||
self.token == rhs.token && self.value == rhs.value
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue