[lexer] Lex bools!

Extend the lexer to support Scheme bools like this: #t, #f, #true, #false.
Add some positive tests for this too.
This commit is contained in:
Eryn Wells 2018-08-26 13:40:27 -07:00
parent b0b4699476
commit a23b917785
6 changed files with 144 additions and 2 deletions

View file

@ -54,7 +54,6 @@ impl<T> Iterator for Lexer<T> where T: Iterator<Item=char> {
let peek = self.input.peek().map(char::clone);
println!("lexing {:?} in state {:?}, buffer = {:?}", peek, state, buffer);
match peek {
// TODO: Give the current state a chance to react.
None => match state.none() {
Ok(None) => break,
Ok(Some(token)) => {

View file

@ -6,6 +6,7 @@ use chars::Lexable;
use token::Token;
use states::{Resume, State, StateResult};
use states::id::IdSub;
use states::hash::Hash;
#[derive(Debug)]
pub struct Begin;
@ -18,6 +19,7 @@ impl State for Begin {
// TODO: Figure out some way to track newlines.
c if c.is_whitespace() => StateResult::Continue,
c if c.is_identifier_initial() => StateResult::Advance { to: Box::new(IdSub{}) },
c if c.is_hash() => StateResult::Advance { to: Box::new(Hash{}) },
_ => {
let msg = format!("Invalid character: {}", c);
StateResult::Fail { msg }
@ -29,3 +31,11 @@ impl State for Begin {
Ok(None)
}
}
trait BeginLexable {
fn is_hash(&self) -> bool;
}
impl BeginLexable for char {
fn is_hash(&self) -> bool { *self == '#' }
}

95
lexer/src/states/hash.rs Normal file
View file

@ -0,0 +1,95 @@
/* lexer/src/states/hash.rs
* Eryn Wells <eryn@erynwells.me>
*/
//! Lexer states for handling tokens that begin with hash marks '#'.
use chars::Lexable;
use states::{Resume, State, StateResult};
use token::Token;
const TRUE_SHORT: &'static str = "t";
const TRUE: &'static str = "true";
const FALSE_SHORT: &'static str = "f";
const FALSE: &'static str = "false";
#[derive(Debug)] pub struct Hash;
#[derive(Debug)] pub struct BoolSub(String);
impl State for Hash {
fn lex(&mut self, c: char) -> StateResult {
match c {
c if TRUE.starts_with(c) || FALSE.starts_with(c) => {
let buf = c.to_lowercase().to_string();
StateResult::Advance { to: Box::new(BoolSub(buf)) }
}
_ => {
let msg = format!("Invalid character: {}", c);
StateResult::Fail { msg }
}
}
}
fn none(&mut self) -> Result<Option<Token>, String> {
Ok(None)
}
}
impl BoolSub {
fn handle_delimiter(&self) -> Result<(Token, Resume), ()> {
if self.0 == TRUE || self.0 == TRUE_SHORT {
Ok((Token::Bool(true), Resume::Here))
} else if self.0 == FALSE || self.0 == FALSE_SHORT {
Ok((Token::Bool(false), Resume::Here))
} else {
Err(())
}
}
}
impl State for BoolSub {
fn lex(&mut self, c: char) -> StateResult {
match c {
c if c.is_identifier_delimiter() => match self.handle_delimiter() {
Ok((token, resume)) => StateResult::Emit(token, resume),
Err(_) => {
let msg = format!("Invalid character: {}", c);
StateResult::Fail { msg }
}
},
_ => {
let buf = {
let mut b = String::from(self.0.as_str());
b.push(c);
b
};
if TRUE.starts_with(&buf) || FALSE.starts_with(&buf) {
StateResult::Advance { to: Box::new(BoolSub(buf)) }
} else {
let msg = format!("Invalid character: {}", c);
StateResult::Fail { msg }
}
}
}
}
fn none(&mut self) -> Result<Option<Token>, String> {
match self.handle_delimiter() {
Ok((token, _)) => Ok(Some(token)),
Err(_) => {
let msg = format!("Found EOF while trying to parse a bool");
Err(msg)
}
}
}
}
trait HashLexable {
fn is_tf(&self) -> bool;
fn is_slash(&self) -> bool;
}
impl HashLexable for char {
fn is_tf(&self) -> bool { "tfTF".contains(*self) }
fn is_slash(&self) -> bool { *self == '\\' }
}

View file

@ -3,6 +3,7 @@
*/
mod begin;
mod hash;
mod id;
pub use self::begin::Begin;

View file

@ -11,7 +11,12 @@ pub struct Lex {
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Token { LeftParen, RightParen, Id, }
pub enum Token {
Bool(bool),
LeftParen,
RightParen,
Id
}
impl Lex {
pub fn new(token: Token, value: &str, line: usize, offset: usize) -> Lex {

View file

@ -31,3 +31,35 @@ fn lexer_finds_id() {
assert_eq!(lex.next(), Some(Ok(expected_lex)));
assert_eq!(lex.next(), None);
}
#[test]
fn bool_short_true() {
let expected_lex = Lex::new(Token::Bool(true), "#t", 0, 0);
let mut lex = Lexer::new("#t".chars());
assert_eq!(lex.next(), Some(Ok(expected_lex)));
assert_eq!(lex.next(), None);
}
#[test]
fn bool_short_false() {
let expected_lex = Lex::new(Token::Bool(false), "#f", 0, 0);
let mut lex = Lexer::new("#f".chars());
assert_eq!(lex.next(), Some(Ok(expected_lex)));
assert_eq!(lex.next(), None);
}
#[test]
fn bool_long_true() {
let expected_lex = Lex::new(Token::Bool(true), "#true", 0, 0);
let mut lex = Lexer::new("#true".chars());
assert_eq!(lex.next(), Some(Ok(expected_lex)));
assert_eq!(lex.next(), None);
}
#[test]
fn bool_long_false() {
let expected_lex = Lex::new(Token::Bool(false), "#false", 0, 0);
let mut lex = Lexer::new("#false".chars());
assert_eq!(lex.next(), Some(Ok(expected_lex)));
assert_eq!(lex.next(), None);
}