From a23b9177852085d9ecef895ef22902d21f9f1940 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 26 Aug 2018 13:40:27 -0700 Subject: [PATCH] [lexer] Lex bools! Extend the lexer to support Scheme bools like this: #t, #f, #true, #false. Add some positive tests for this too. --- lexer/src/lib.rs | 1 - lexer/src/states/begin.rs | 10 ++++ lexer/src/states/hash.rs | 95 ++++++++++++++++++++++++++++++++++++ lexer/src/states/mod.rs | 1 + lexer/src/token.rs | 7 ++- lexer/tests/single_tokens.rs | 32 ++++++++++++ 6 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 lexer/src/states/hash.rs diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 4309278..032d88e 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -54,7 +54,6 @@ impl Iterator for Lexer where T: Iterator { let peek = self.input.peek().map(char::clone); println!("lexing {:?} in state {:?}, buffer = {:?}", peek, state, buffer); match peek { - // TODO: Give the current state a chance to react. None => match state.none() { Ok(None) => break, Ok(Some(token)) => { diff --git a/lexer/src/states/begin.rs b/lexer/src/states/begin.rs index 3d25b1a..432ba9a 100644 --- a/lexer/src/states/begin.rs +++ b/lexer/src/states/begin.rs @@ -6,6 +6,7 @@ use chars::Lexable; use token::Token; use states::{Resume, State, StateResult}; use states::id::IdSub; +use states::hash::Hash; #[derive(Debug)] pub struct Begin; @@ -18,6 +19,7 @@ impl State for Begin { // TODO: Figure out some way to track newlines. c if c.is_whitespace() => StateResult::Continue, c if c.is_identifier_initial() => StateResult::Advance { to: Box::new(IdSub{}) }, + c if c.is_hash() => StateResult::Advance { to: Box::new(Hash{}) }, _ => { let msg = format!("Invalid character: {}", c); StateResult::Fail { msg } @@ -29,3 +31,11 @@ impl State for Begin { Ok(None) } } + +trait BeginLexable { + fn is_hash(&self) -> bool; +} + +impl BeginLexable for char { + fn is_hash(&self) -> bool { *self == '#' } +} diff --git a/lexer/src/states/hash.rs b/lexer/src/states/hash.rs new file mode 100644 index 0000000..53eb0b3 --- /dev/null +++ b/lexer/src/states/hash.rs @@ -0,0 +1,95 @@ +/* lexer/src/states/hash.rs + * Eryn Wells + */ + +//! Lexer states for handling tokens that begin with hash marks '#'. + +use chars::Lexable; +use states::{Resume, State, StateResult}; +use token::Token; + +const TRUE_SHORT: &'static str = "t"; +const TRUE: &'static str = "true"; +const FALSE_SHORT: &'static str = "f"; +const FALSE: &'static str = "false"; + +#[derive(Debug)] pub struct Hash; +#[derive(Debug)] pub struct BoolSub(String); + +impl State for Hash { + fn lex(&mut self, c: char) -> StateResult { + match c { + c if TRUE.starts_with(c) || FALSE.starts_with(c) => { + let buf = c.to_lowercase().to_string(); + StateResult::Advance { to: Box::new(BoolSub(buf)) } + } + _ => { + let msg = format!("Invalid character: {}", c); + StateResult::Fail { msg } + } + } + } + + fn none(&mut self) -> Result, String> { + Ok(None) + } +} + +impl BoolSub { + fn handle_delimiter(&self) -> Result<(Token, Resume), ()> { + if self.0 == TRUE || self.0 == TRUE_SHORT { + Ok((Token::Bool(true), Resume::Here)) + } else if self.0 == FALSE || self.0 == FALSE_SHORT { + Ok((Token::Bool(false), Resume::Here)) + } else { + Err(()) + } + } +} + +impl State for BoolSub { + fn lex(&mut self, c: char) -> StateResult { + match c { + c if c.is_identifier_delimiter() => match self.handle_delimiter() { + Ok((token, resume)) => StateResult::Emit(token, resume), + Err(_) => { + let msg = format!("Invalid character: {}", c); + StateResult::Fail { msg } + } + }, + _ => { + let buf = { + let mut b = String::from(self.0.as_str()); + b.push(c); + b + }; + if TRUE.starts_with(&buf) || FALSE.starts_with(&buf) { + StateResult::Advance { to: Box::new(BoolSub(buf)) } + } else { + let msg = format!("Invalid character: {}", c); + StateResult::Fail { msg } + } + } + } + } + + fn none(&mut self) -> Result, String> { + match self.handle_delimiter() { + Ok((token, _)) => Ok(Some(token)), + Err(_) => { + let msg = format!("Found EOF while trying to parse a bool"); + Err(msg) + } + } + } +} + +trait HashLexable { + fn is_tf(&self) -> bool; + fn is_slash(&self) -> bool; +} + +impl HashLexable for char { + fn is_tf(&self) -> bool { "tfTF".contains(*self) } + fn is_slash(&self) -> bool { *self == '\\' } +} diff --git a/lexer/src/states/mod.rs b/lexer/src/states/mod.rs index 68ffa27..ac9004d 100644 --- a/lexer/src/states/mod.rs +++ b/lexer/src/states/mod.rs @@ -3,6 +3,7 @@ */ mod begin; +mod hash; mod id; pub use self::begin::Begin; diff --git a/lexer/src/token.rs b/lexer/src/token.rs index 0cda4ca..a97925b 100644 --- a/lexer/src/token.rs +++ b/lexer/src/token.rs @@ -11,7 +11,12 @@ pub struct Lex { } #[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum Token { LeftParen, RightParen, Id, } +pub enum Token { + Bool(bool), + LeftParen, + RightParen, + Id +} impl Lex { pub fn new(token: Token, value: &str, line: usize, offset: usize) -> Lex { diff --git a/lexer/tests/single_tokens.rs b/lexer/tests/single_tokens.rs index ef05b7c..25363a8 100644 --- a/lexer/tests/single_tokens.rs +++ b/lexer/tests/single_tokens.rs @@ -31,3 +31,35 @@ fn lexer_finds_id() { assert_eq!(lex.next(), Some(Ok(expected_lex))); assert_eq!(lex.next(), None); } + +#[test] +fn bool_short_true() { + let expected_lex = Lex::new(Token::Bool(true), "#t", 0, 0); + let mut lex = Lexer::new("#t".chars()); + assert_eq!(lex.next(), Some(Ok(expected_lex))); + assert_eq!(lex.next(), None); +} + +#[test] +fn bool_short_false() { + let expected_lex = Lex::new(Token::Bool(false), "#f", 0, 0); + let mut lex = Lexer::new("#f".chars()); + assert_eq!(lex.next(), Some(Ok(expected_lex))); + assert_eq!(lex.next(), None); +} + +#[test] +fn bool_long_true() { + let expected_lex = Lex::new(Token::Bool(true), "#true", 0, 0); + let mut lex = Lexer::new("#true".chars()); + assert_eq!(lex.next(), Some(Ok(expected_lex))); + assert_eq!(lex.next(), None); +} + +#[test] +fn bool_long_false() { + let expected_lex = Lex::new(Token::Bool(false), "#false", 0, 0); + let mut lex = Lexer::new("#false".chars()); + assert_eq!(lex.next(), Some(Ok(expected_lex))); + assert_eq!(lex.next(), None); +}