sibil/lexer/src/lib.rs

120 lines
3.6 KiB
Rust
Raw Normal View History

/* lexer/src/lib.rs
* Eryn Wells <eryn@erynwells.me>
*/
use std::iter::Peekable;
mod chars;
2017-05-13 15:26:41 -07:00
mod error;
pub use error::Error;
use chars::Lexable;
2017-06-18 16:50:14 -07:00
pub type Result = std::result::Result<Token, Error>;
#[derive(Debug, Eq, PartialEq)]
pub enum Token { LeftParen, RightParen, Id(String), }
#[derive(Debug, Eq, PartialEq)]
enum Resume { Here, AtNext }
#[derive(Debug, Eq, PartialEq)]
enum IterationResult {
Finish,
Continue,
Emit(Token, Resume),
2017-05-13 15:26:41 -07:00
Error(Error),
}
pub struct Lexer<T> where T: Iterator<Item=char> {
input: Peekable<T>,
line: usize,
offset: usize,
}
impl<T> Lexer<T> where T: Iterator<Item=char> {
pub fn new(input: T) -> Lexer<T> {
Lexer {
input: input.peekable(),
line: 0,
offset: 0
}
}
fn emit(&self, token: Token, resume: Resume) -> IterationResult {
IterationResult::Emit(token, resume)
}
fn fail(&self, msg: String) -> IterationResult {
2017-05-13 15:26:41 -07:00
IterationResult::Error(Error::new(msg))
}
}
impl<T> Lexer<T> where T: Iterator<Item=char> {
fn handle_whitespace(&mut self, c: char) {
if c == '\n' {
self.line += 1;
self.offset = 0;
}
else {
self.offset += 1;
}
}
}
impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
2017-06-18 16:50:14 -07:00
type Item = Result;
fn next(&mut self) -> Option<Self::Item> {
let mut buffer = String::new();
loop {
let peek = self.input.peek().map(char::clone);
let result = if buffer.is_empty() {
match peek {
Some(c) if c.is_left_paren() => self.emit(Token::LeftParen, Resume::AtNext),
Some(c) if c.is_right_paren() => self.emit(Token::RightParen, Resume::AtNext),
Some(c) if c.is_whitespace() => {
self.handle_whitespace(c);
IterationResult::Continue
},
Some(c) if c.is_identifier_initial() => {
buffer.push(c);
IterationResult::Continue
},
Some(c) => self.fail(format!("Invalid character: {}", c)),
// We found EOF and there's no pending string, so just finish.
None => IterationResult::Finish,
}
}
else {
match peek {
Some(c) if c.is_identifier_subsequent() => {
buffer.push(c);
IterationResult::Continue
}
Some(c) if c.is_identifier_delimiter() =>
self.emit(Token::Id(buffer.clone()), Resume::Here),
Some(c) => self.fail(format!("Invalid character: {}", c)),
// Found EOF. Emit what we have and finish.
// Note: the Resume argument doesn't matter in this case since the input
// iterator will always be None from here on.
None => self.emit(Token::Id(buffer.clone()), Resume::Here),
}
};
match result {
IterationResult::Finish => break,
IterationResult::Continue => self.input.next(),
IterationResult::Emit(token, resume) => {
if resume == Resume::AtNext {
self.input.next();
}
return Some(Ok(token))
},
2017-05-13 15:26:41 -07:00
IterationResult::Error(err) => return Some(Err(err)),
};
}
None
}
}