Do actual (quite simplified) lexing!
This commit is contained in:
parent
9561e1bf91
commit
b1051f5f74
1 changed files with 66 additions and 19 deletions
85
src/lexer.rs
85
src/lexer.rs
|
@ -3,6 +3,8 @@
|
|||
use std::fmt;
|
||||
|
||||
use characters;
|
||||
use characters::CharAt;
|
||||
use characters::Lexable;
|
||||
use characters::RelativeIndexable;
|
||||
|
||||
pub enum Kind {
|
||||
|
@ -27,6 +29,15 @@ pub struct Token {
|
|||
value: String,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
fn new(kind: Kind, value: String) -> Token {
|
||||
Token {
|
||||
kind: kind,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "({}, \"{}\")", self.kind, self.value)
|
||||
|
@ -64,26 +75,50 @@ impl Lexer {
|
|||
|
||||
/// Advance the forward pointer to the next character.
|
||||
fn advance(&mut self) {
|
||||
if let Some(next) = self.input.index_after(&self.forward) {
|
||||
self.forward = next;
|
||||
}
|
||||
self.forward = self.input.index_after(self.forward);
|
||||
println!("> forward={}", self.forward);
|
||||
}
|
||||
|
||||
/// Retract the forward pointer to the previous character.
|
||||
fn retract(&mut self) {
|
||||
if let Some(prev) = self.input.index_before(&self.forward) {
|
||||
self.forward = prev;
|
||||
}
|
||||
self.forward = self.input.index_before(self.forward);
|
||||
println!("< forward={}", self.forward);
|
||||
}
|
||||
|
||||
fn advance_begin(&mut self) {
|
||||
self.begin = self.input.index_after(self.forward);
|
||||
println!("> begin={}", self.begin);
|
||||
}
|
||||
|
||||
fn value(&self) -> String {
|
||||
self.input[self.begin .. self.forward].to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
fn state_initial(&mut self) {
|
||||
println!("Initial!");
|
||||
fn state_initial(&mut self, c: char, token: &mut Option<Token>) {
|
||||
println!("Initial! c='{}'", c);
|
||||
if c.is_left_paren() {
|
||||
*token = Some(Token::new(Kind::LeftParen, c.to_string()));
|
||||
}
|
||||
else if c.is_right_paren() {
|
||||
*token = Some(Token::new(Kind::RightParen, c.to_string()));
|
||||
}
|
||||
else if c.is_identifier_initial() {
|
||||
self.state = State::Identifier;
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
fn state_identifier(&mut self) {
|
||||
println!("Identifier!")
|
||||
fn state_identifier(&mut self, c: char, token: &mut Option<Token>) {
|
||||
if c.is_identifier_subsequent() {
|
||||
// State in Identifier state.
|
||||
self.advance();
|
||||
}
|
||||
else {
|
||||
*token = Some(Token::new(Kind::Identifier, self.value()));
|
||||
self.retract();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,16 +127,28 @@ impl Iterator for Lexer {
|
|||
|
||||
fn next(&mut self) -> Option<Token> {
|
||||
self.begin_lexing();
|
||||
let mut emit = false;
|
||||
println!("Lexing '{}'", self.input);
|
||||
while !emit {
|
||||
match self.state {
|
||||
State::Initial => self.state_initial(),
|
||||
State::Identifier => self.state_identifier(),
|
||||
}
|
||||
emit = true;
|
||||
if self.begin == self.input.len() {
|
||||
return None;
|
||||
}
|
||||
None
|
||||
let mut token: Option<Token> = None;
|
||||
println!("Lexing '{}'", &self.input[self.begin ..]);
|
||||
loop {
|
||||
if let Some(c) = self.input.char_at(self.forward) {
|
||||
match self.state {
|
||||
State::Initial => self.state_initial(c, &mut token),
|
||||
State::Identifier => self.state_identifier(c, &mut token),
|
||||
}
|
||||
}
|
||||
else {
|
||||
assert!(false, "Invalid character! :-(");
|
||||
}
|
||||
if token.is_some() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.advance_begin();
|
||||
assert!(token.is_some());
|
||||
token
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue