2017-04-30 17:46:42 -07:00
|
|
|
/* lexer/src/lib.rs
|
|
|
|
* Eryn Wells <eryn@erynwells.me>
|
|
|
|
*/
|
|
|
|
|
|
|
|
use std::iter::Peekable;
|
|
|
|
|
2017-05-13 15:37:01 -07:00
|
|
|
mod chars;
|
2017-05-13 15:26:41 -07:00
|
|
|
mod error;
|
|
|
|
|
|
|
|
pub use error::Error;
|
|
|
|
|
2017-05-13 15:37:01 -07:00
|
|
|
use chars::Lexable;
|
|
|
|
|
2017-06-18 16:50:14 -07:00
|
|
|
pub type Result = std::result::Result<Token, Error>;
|
|
|
|
|
2017-05-02 21:44:01 -07:00
|
|
|
#[derive(Debug, Eq, PartialEq)]
|
2017-04-30 17:46:42 -07:00
|
|
|
pub enum Token { LeftParen, RightParen, Id(String), }
|
|
|
|
|
2017-05-02 21:44:01 -07:00
|
|
|
#[derive(Debug, Eq, PartialEq)]
|
2017-04-30 17:46:42 -07:00
|
|
|
enum Resume { Here, AtNext }
|
|
|
|
|
2017-05-02 21:44:01 -07:00
|
|
|
#[derive(Debug, Eq, PartialEq)]
|
2017-04-30 17:46:42 -07:00
|
|
|
enum IterationResult {
|
2017-05-02 21:44:57 -07:00
|
|
|
Finish,
|
2017-04-30 17:46:42 -07:00
|
|
|
Continue,
|
|
|
|
Emit(Token, Resume),
|
2017-05-13 15:26:41 -07:00
|
|
|
Error(Error),
|
2017-04-30 17:46:42 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
pub struct Lexer<T> where T: Iterator<Item=char> {
|
|
|
|
input: Peekable<T>,
|
2017-06-26 21:54:05 -07:00
|
|
|
line: usize,
|
|
|
|
offset: usize,
|
2017-04-30 17:46:42 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<T> Lexer<T> where T: Iterator<Item=char> {
|
|
|
|
pub fn new(input: T) -> Lexer<T> {
|
2017-06-26 21:54:05 -07:00
|
|
|
Lexer {
|
|
|
|
input: input.peekable(),
|
|
|
|
line: 0,
|
|
|
|
offset: 0
|
|
|
|
}
|
2017-04-30 17:46:42 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn emit(&self, token: Token, resume: Resume) -> IterationResult {
|
|
|
|
IterationResult::Emit(token, resume)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn fail(&self, msg: String) -> IterationResult {
|
2017-05-13 15:26:41 -07:00
|
|
|
IterationResult::Error(Error::new(msg))
|
2017-04-30 17:46:42 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-26 21:54:05 -07:00
|
|
|
impl<T> Lexer<T> where T: Iterator<Item=char> {
|
|
|
|
fn handle_whitespace(&mut self, c: char) {
|
|
|
|
if c == '\n' {
|
|
|
|
self.line += 1;
|
|
|
|
self.offset = 0;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
self.offset += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-30 17:46:42 -07:00
|
|
|
impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
|
2017-06-18 16:50:14 -07:00
|
|
|
type Item = Result;
|
2017-04-30 17:46:42 -07:00
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
let mut buffer = String::new();
|
2017-05-02 21:44:57 -07:00
|
|
|
loop {
|
|
|
|
let peek = self.input.peek().map(char::clone);
|
2017-04-30 17:46:42 -07:00
|
|
|
let result = if buffer.is_empty() {
|
|
|
|
match peek {
|
2017-05-13 15:37:01 -07:00
|
|
|
Some(c) if c.is_left_paren() => self.emit(Token::LeftParen, Resume::AtNext),
|
|
|
|
Some(c) if c.is_right_paren() => self.emit(Token::RightParen, Resume::AtNext),
|
2017-06-26 21:54:05 -07:00
|
|
|
Some(c) if c.is_whitespace() => {
|
|
|
|
self.handle_whitespace(c);
|
|
|
|
IterationResult::Continue
|
|
|
|
},
|
2017-05-13 17:21:23 -07:00
|
|
|
Some(c) if c.is_identifier_initial() => {
|
2017-04-30 17:46:42 -07:00
|
|
|
buffer.push(c);
|
|
|
|
IterationResult::Continue
|
|
|
|
},
|
2017-05-02 21:44:57 -07:00
|
|
|
Some(c) => self.fail(format!("Invalid character: {}", c)),
|
|
|
|
// We found EOF and there's no pending string, so just finish.
|
|
|
|
None => IterationResult::Finish,
|
2017-04-30 17:46:42 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
match peek {
|
2017-05-13 17:21:23 -07:00
|
|
|
Some(c) if c.is_identifier_subsequent() => {
|
2017-04-30 17:46:42 -07:00
|
|
|
buffer.push(c);
|
|
|
|
IterationResult::Continue
|
|
|
|
}
|
2017-05-13 17:21:23 -07:00
|
|
|
Some(c) if c.is_identifier_delimiter() =>
|
2017-04-30 17:46:42 -07:00
|
|
|
self.emit(Token::Id(buffer.clone()), Resume::Here),
|
2017-05-02 21:44:57 -07:00
|
|
|
Some(c) => self.fail(format!("Invalid character: {}", c)),
|
|
|
|
// Found EOF. Emit what we have and finish.
|
|
|
|
// Note: the Resume argument doesn't matter in this case since the input
|
|
|
|
// iterator will always be None from here on.
|
|
|
|
None => self.emit(Token::Id(buffer.clone()), Resume::Here),
|
2017-04-30 17:46:42 -07:00
|
|
|
}
|
|
|
|
};
|
|
|
|
match result {
|
2017-05-02 21:44:57 -07:00
|
|
|
IterationResult::Finish => break,
|
2017-04-30 17:46:42 -07:00
|
|
|
IterationResult::Continue => self.input.next(),
|
|
|
|
IterationResult::Emit(token, resume) => {
|
2017-05-02 21:44:01 -07:00
|
|
|
if resume == Resume::AtNext {
|
|
|
|
self.input.next();
|
|
|
|
}
|
2017-04-30 17:46:42 -07:00
|
|
|
return Some(Ok(token))
|
|
|
|
},
|
2017-05-13 15:26:41 -07:00
|
|
|
IterationResult::Error(err) => return Some(Err(err)),
|
2017-04-30 17:46:42 -07:00
|
|
|
};
|
|
|
|
}
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|