Do actual (quite simplified) lexing!

This commit is contained in:
Eryn Wells 2016-12-23 17:46:28 -07:00
parent 9561e1bf91
commit b1051f5f74

View file

@ -3,6 +3,8 @@
use std::fmt; use std::fmt;
use characters; use characters;
use characters::CharAt;
use characters::Lexable;
use characters::RelativeIndexable; use characters::RelativeIndexable;
pub enum Kind { pub enum Kind {
@ -27,6 +29,15 @@ pub struct Token {
value: String, value: String,
} }
impl Token {
fn new(kind: Kind, value: String) -> Token {
Token {
kind: kind,
value: value,
}
}
}
impl fmt::Display for Token { impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "({}, \"{}\")", self.kind, self.value) write!(f, "({}, \"{}\")", self.kind, self.value)
@ -64,26 +75,50 @@ impl Lexer {
/// Advance the forward pointer to the next character. /// Advance the forward pointer to the next character.
fn advance(&mut self) { fn advance(&mut self) {
if let Some(next) = self.input.index_after(&self.forward) { self.forward = self.input.index_after(self.forward);
self.forward = next; println!("> forward={}", self.forward);
}
} }
/// Retract the forward pointer to the previous character. /// Retract the forward pointer to the previous character.
fn retract(&mut self) { fn retract(&mut self) {
if let Some(prev) = self.input.index_before(&self.forward) { self.forward = self.input.index_before(self.forward);
self.forward = prev; println!("< forward={}", self.forward);
} }
fn advance_begin(&mut self) {
self.begin = self.input.index_after(self.forward);
println!("> begin={}", self.begin);
}
fn value(&self) -> String {
self.input[self.begin .. self.forward].to_string()
} }
} }
impl Lexer { impl Lexer {
fn state_initial(&mut self) { fn state_initial(&mut self, c: char, token: &mut Option<Token>) {
println!("Initial!"); println!("Initial! c='{}'", c);
if c.is_left_paren() {
*token = Some(Token::new(Kind::LeftParen, c.to_string()));
}
else if c.is_right_paren() {
*token = Some(Token::new(Kind::RightParen, c.to_string()));
}
else if c.is_identifier_initial() {
self.state = State::Identifier;
self.advance();
}
} }
fn state_identifier(&mut self) { fn state_identifier(&mut self, c: char, token: &mut Option<Token>) {
println!("Identifier!") if c.is_identifier_subsequent() {
// State in Identifier state.
self.advance();
}
else {
*token = Some(Token::new(Kind::Identifier, self.value()));
self.retract();
}
} }
} }
@ -92,16 +127,28 @@ impl Iterator for Lexer {
fn next(&mut self) -> Option<Token> { fn next(&mut self) -> Option<Token> {
self.begin_lexing(); self.begin_lexing();
let mut emit = false; if self.begin == self.input.len() {
println!("Lexing '{}'", self.input); return None;
while !emit {
match self.state {
State::Initial => self.state_initial(),
State::Identifier => self.state_identifier(),
}
emit = true;
} }
None let mut token: Option<Token> = None;
println!("Lexing '{}'", &self.input[self.begin ..]);
loop {
if let Some(c) = self.input.char_at(self.forward) {
match self.state {
State::Initial => self.state_initial(c, &mut token),
State::Identifier => self.state_identifier(c, &mut token),
}
}
else {
assert!(false, "Invalid character! :-(");
}
if token.is_some() {
break;
}
}
self.advance_begin();
assert!(token.is_some());
token
} }
} }