2016-12-19 22:23:27 -08:00
|
|
|
//! # Lexer
|
|
|
|
|
2016-12-23 17:45:37 -07:00
|
|
|
use std::fmt;
|
|
|
|
|
2016-12-20 17:38:44 -08:00
|
|
|
use characters;
|
2016-12-23 17:46:28 -07:00
|
|
|
use characters::CharAt;
|
|
|
|
use characters::Lexable;
|
2016-12-22 09:25:31 -08:00
|
|
|
use characters::RelativeIndexable;
|
2016-12-19 22:23:27 -08:00
|
|
|
|
2016-12-20 17:52:29 -08:00
|
|
|
pub enum Kind {
|
2016-12-19 22:23:27 -08:00
|
|
|
LeftParen,
|
|
|
|
RightParen,
|
|
|
|
Identifier,
|
|
|
|
}
|
|
|
|
|
2016-12-23 17:45:37 -07:00
|
|
|
impl fmt::Display for Kind {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
let s = match *self {
|
|
|
|
Kind::LeftParen => "LeftParen",
|
|
|
|
Kind::RightParen => "RightParen",
|
|
|
|
Kind::Identifier => "Identifier",
|
|
|
|
};
|
|
|
|
write!(f, "{}", s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-20 17:52:29 -08:00
|
|
|
pub struct Token {
|
2016-12-19 22:23:27 -08:00
|
|
|
kind: Kind,
|
|
|
|
value: String,
|
|
|
|
}
|
|
|
|
|
2016-12-23 17:46:28 -07:00
|
|
|
impl Token {
|
|
|
|
fn new(kind: Kind, value: String) -> Token {
|
|
|
|
Token {
|
|
|
|
kind: kind,
|
|
|
|
value: value,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-23 17:45:37 -07:00
|
|
|
impl fmt::Display for Token {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
write!(f, "({}, \"{}\")", self.kind, self.value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-22 09:25:31 -08:00
|
|
|
enum State {
|
|
|
|
Initial,
|
|
|
|
Identifier,
|
|
|
|
}
|
|
|
|
|
2016-12-20 17:52:29 -08:00
|
|
|
pub struct Lexer {
|
|
|
|
input: String,
|
2016-12-22 09:25:31 -08:00
|
|
|
begin: usize,
|
|
|
|
forward: usize,
|
|
|
|
state: State,
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Lexer {
|
2016-12-20 17:52:29 -08:00
|
|
|
pub fn new(input: String) -> Lexer {
|
2016-12-22 09:25:31 -08:00
|
|
|
Lexer {
|
|
|
|
input: input,
|
|
|
|
begin: 0,
|
|
|
|
forward: 0,
|
|
|
|
state: State::Initial,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Lexer {
|
|
|
|
fn begin_lexing(&mut self) {
|
|
|
|
self.forward = self.begin;
|
|
|
|
self.state = State::Initial;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Advance the forward pointer to the next character.
|
|
|
|
fn advance(&mut self) {
|
2016-12-23 17:46:28 -07:00
|
|
|
self.forward = self.input.index_after(self.forward);
|
|
|
|
println!("> forward={}", self.forward);
|
2016-12-22 09:25:31 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Retract the forward pointer to the previous character.
|
|
|
|
fn retract(&mut self) {
|
2016-12-23 17:46:28 -07:00
|
|
|
self.forward = self.input.index_before(self.forward);
|
|
|
|
println!("< forward={}", self.forward);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn advance_begin(&mut self) {
|
|
|
|
self.begin = self.input.index_after(self.forward);
|
|
|
|
println!("> begin={}", self.begin);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn value(&self) -> String {
|
|
|
|
self.input[self.begin .. self.forward].to_string()
|
2016-12-22 09:25:31 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Lexer {
|
2016-12-23 17:46:28 -07:00
|
|
|
fn state_initial(&mut self, c: char, token: &mut Option<Token>) {
|
|
|
|
println!("Initial! c='{}'", c);
|
|
|
|
if c.is_left_paren() {
|
|
|
|
*token = Some(Token::new(Kind::LeftParen, c.to_string()));
|
|
|
|
}
|
|
|
|
else if c.is_right_paren() {
|
|
|
|
*token = Some(Token::new(Kind::RightParen, c.to_string()));
|
|
|
|
}
|
|
|
|
else if c.is_identifier_initial() {
|
|
|
|
self.state = State::Identifier;
|
|
|
|
self.advance();
|
|
|
|
}
|
2016-12-22 09:25:31 -08:00
|
|
|
}
|
|
|
|
|
2016-12-23 17:46:28 -07:00
|
|
|
fn state_identifier(&mut self, c: char, token: &mut Option<Token>) {
|
|
|
|
if c.is_identifier_subsequent() {
|
|
|
|
// State in Identifier state.
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
*token = Some(Token::new(Kind::Identifier, self.value()));
|
|
|
|
self.retract();
|
|
|
|
}
|
2016-12-20 17:52:29 -08:00
|
|
|
}
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Iterator for Lexer {
|
|
|
|
type Item = Token;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Token> {
|
2016-12-22 09:25:31 -08:00
|
|
|
self.begin_lexing();
|
2016-12-23 17:46:28 -07:00
|
|
|
if self.begin == self.input.len() {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
let mut token: Option<Token> = None;
|
|
|
|
println!("Lexing '{}'", &self.input[self.begin ..]);
|
|
|
|
loop {
|
|
|
|
if let Some(c) = self.input.char_at(self.forward) {
|
|
|
|
match self.state {
|
|
|
|
State::Initial => self.state_initial(c, &mut token),
|
|
|
|
State::Identifier => self.state_identifier(c, &mut token),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
assert!(false, "Invalid character! :-(");
|
|
|
|
}
|
|
|
|
if token.is_some() {
|
|
|
|
break;
|
2016-12-22 09:25:31 -08:00
|
|
|
}
|
|
|
|
}
|
2016-12-23 17:46:28 -07:00
|
|
|
self.advance_begin();
|
|
|
|
assert!(token.is_some());
|
|
|
|
token
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
}
|