From c5b769ff45db00e7615e1cc02ae1f78c739b98bf Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 30 Apr 2017 17:46:42 -0700 Subject: [PATCH] A peekable lexer Use the chars() iterator on &str, with the Peekable wrapper on Iterators, to iterate the input, rather than needing to hold the whole input and do iteration by indexes. --- lexer/Cargo.toml | 1 - lexer/src/lib.rs | 79 +++++++++++++++++++++++++++++++++++++++++++++++ lexer/src/main.rs | 14 +++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 lexer/src/main.rs diff --git a/lexer/Cargo.toml b/lexer/Cargo.toml index f8eb105..16d75ef 100644 --- a/lexer/Cargo.toml +++ b/lexer/Cargo.toml @@ -4,4 +4,3 @@ version = "0.1.0" authors = ["Eryn Wells "] [dependencies] -sibiltypes = { path = "../types" } diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index e69de29..0d2a0c0 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -0,0 +1,79 @@ +/* lexer/src/lib.rs + * Eryn Wells + */ + +use std::iter::Peekable; + +#[derive(Debug)] +pub enum Token { LeftParen, RightParen, Id(String), } + +enum Resume { Here, AtNext } + +enum IterationResult { + Continue, + Emit(Token, Resume), + Error(String), +} + +pub struct Lexer where T: Iterator { + input: Peekable, +} + +impl Lexer where T: Iterator { + pub fn new(input: T) -> Lexer { + Lexer { input: input.peekable() } + } + + fn emit(&self, token: Token, resume: Resume) -> IterationResult { + IterationResult::Emit(token, resume) + } + + fn fail(&self, msg: String) -> IterationResult { + IterationResult::Error(msg) + } +} + +impl Iterator for Lexer where T: Iterator { + type Item = Result; + + fn next(&mut self) -> Option { + let mut buffer = String::new(); + while let Some(peek) = self.input.peek().map(char::clone) { + let result = if buffer.is_empty() { + match peek { + '(' => self.emit(Token::LeftParen, Resume::AtNext), + ')' => self.emit(Token::RightParen, Resume::AtNext), + c if c.is_whitespace() => IterationResult::Continue, + c if c.is_alphabetic() => { + buffer.push(c); + IterationResult::Continue + }, + c => self.fail(format!("Invalid character: {}", c)), + } + } + else { + match peek { + c if c.is_alphabetic() => { + buffer.push(c); + IterationResult::Continue + } + c if c == '(' || c == ')' || c.is_whitespace() => + self.emit(Token::Id(buffer.clone()), Resume::Here), + c => self.fail(format!("Invalid character: {}", c)), + } + }; + match result { + IterationResult::Continue => self.input.next(), + IterationResult::Emit(token, resume) => { + match resume { + Resume::AtNext => self.input.next(), + Resume::Here => None, + }; + return Some(Ok(token)) + }, + IterationResult::Error(msg) => return Some(Err(msg)), + }; + } + None + } +} diff --git a/lexer/src/main.rs b/lexer/src/main.rs new file mode 100644 index 0000000..e177f98 --- /dev/null +++ b/lexer/src/main.rs @@ -0,0 +1,14 @@ +/* lexer/src/main.rs + * Eryn Wells + */ + +extern crate sibillexer; + +use sibillexer::Lexer; + +fn main() { + let lexer = Lexer::new("(ab (cd) ef)".chars()); + for tok in lexer { + println!("found {:?}", tok.unwrap()); + } +}