sibil/src/lexer/mod.rs

153 lines
4 KiB
Rust
Raw Normal View History

2016-12-24 09:05:10 -07:00
/* lexer.rs
* Eryn Wells <eryn@erynwells.me>
*/
pub mod token;
mod char;
mod charset;
mod str;
2016-12-23 17:45:37 -07:00
use self::char::Lexable;
use self::str::CharAt;
use self::str::RelativeIndexable;
use self::token::Lex;
use self::token::Token;
#[derive(Debug)]
enum State {
Initial,
Identifier,
2016-12-24 10:29:10 -07:00
Hash,
}
2016-12-20 17:52:29 -08:00
pub struct Lexer {
input: String,
begin: usize,
forward: usize,
2016-12-24 09:17:08 -07:00
line: u32,
state: State,
2016-12-20 17:38:44 -08:00
}
impl Lexer {
2016-12-20 17:52:29 -08:00
pub fn new(input: String) -> Lexer {
Lexer {
input: input,
begin: 0,
forward: 0,
2016-12-24 09:17:08 -07:00
line: 1,
state: State::Initial,
}
}
}
impl Lexer {
fn begin_lexing(&mut self) {
self.forward = self.begin;
self.state = State::Initial;
}
/// Advance the forward pointer to the next character.
fn advance(&mut self) {
2016-12-23 17:46:28 -07:00
self.forward = self.input.index_after(self.forward);
println!("> forward={}", self.forward);
}
/// Retract the forward pointer to the previous character.
fn retract(&mut self) {
2016-12-23 17:46:28 -07:00
self.forward = self.input.index_before(self.forward);
println!("< forward={}", self.forward);
}
2016-12-23 17:53:28 -07:00
/// Advance the begin pointer to prepare for the next iteration.
2016-12-23 17:46:28 -07:00
fn advance_begin(&mut self) {
self.begin = self.input.index_after(self.forward);
self.forward = self.begin;
println!("> begin={}, forward={}", self.begin, self.forward);
2016-12-23 17:46:28 -07:00
}
2016-12-24 09:05:10 -07:00
/// Get the substring between the two input indexes. This is the value to give to a new Token instance.
2016-12-23 17:46:28 -07:00
fn value(&self) -> String {
self.input[self.begin .. self.forward].to_string()
}
}
impl Lexer {
2016-12-23 17:53:28 -07:00
/// Handle self.state == State::Initial
fn state_initial(&mut self, c: char, lex: &mut Option<Lex>) {
2016-12-23 17:46:28 -07:00
if c.is_left_paren() {
*lex = Some(Lex::new(Token::LeftParen(c.to_string())));
2016-12-23 17:46:28 -07:00
}
else if c.is_right_paren() {
*lex = Some(Lex::new(Token::RightParen(c.to_string())));
2016-12-23 17:46:28 -07:00
}
2016-12-24 10:29:10 -07:00
2016-12-24 09:07:38 -07:00
else if c.is_identifier_single() {
*lex = Some(Lex::new(Token::Identifier(c.to_string())));
2016-12-24 09:07:38 -07:00
}
2016-12-23 17:46:28 -07:00
else if c.is_identifier_initial() {
self.state = State::Identifier;
self.advance();
}
2016-12-24 10:29:10 -07:00
else if c.is_hash() {
self.state = State::Hash;
self.advance();
}
2016-12-24 09:17:08 -07:00
else if c.is_whitespace() {
if c.is_newline() {
self.line += 1;
}
self.advance_begin();
2016-12-24 09:17:08 -07:00
}
}
2016-12-23 17:53:28 -07:00
/// Handle self.state == State::Identifier
fn state_identifier(&mut self, c: char, lex: &mut Option<Lex>) {
2016-12-23 17:46:28 -07:00
if c.is_identifier_subsequent() {
// State in Identifier state.
self.advance();
}
else {
*lex = Some(Lex::new(Token::Identifier(self.value())));
2016-12-23 17:46:28 -07:00
self.retract();
}
2016-12-20 17:52:29 -08:00
}
2016-12-24 10:29:10 -07:00
fn state_hash(&mut self, c: char, lex: &mut Option<Lex>) {
if c.is_boolean_true() || c.is_boolean_false() {
2016-12-24 10:29:10 -07:00
self.advance();
*lex = Some(Lex::new(Token::Boolean(c.is_boolean_true()));
2016-12-24 10:29:10 -07:00
}
}
2016-12-20 17:38:44 -08:00
}
impl Iterator for Lexer {
type Item = Lex;
2016-12-20 17:38:44 -08:00
fn next(&mut self) -> Option<Lex> {
self.begin_lexing();
2016-12-23 17:46:28 -07:00
if self.begin == self.input.len() {
return None;
}
let mut lex: Option<Lex> = None;
2016-12-23 17:46:28 -07:00
println!("Lexing '{}'", &self.input[self.begin ..]);
while lex.is_none() {
2016-12-23 17:46:28 -07:00
if let Some(c) = self.input.char_at(self.forward) {
println!("{:?}! c='{}'", self.state, c);
2016-12-23 17:46:28 -07:00
match self.state {
State::Initial => self.state_initial(c, &mut lex),
State::Identifier => self.state_identifier(c, &mut lex),
State::Hash => self.state_hash(c, &mut lex),
2016-12-23 17:46:28 -07:00
}
}
else {
assert!(false, "Invalid character! :-(");
}
}
2016-12-23 17:46:28 -07:00
self.advance_begin();
assert!(lex.is_some(), "We quit the lexing loop but didn't actually have a token. :-(");
lex
2016-12-20 17:38:44 -08:00
}
}