From def35966eb19b6434bda8d84de789f5c3aeee207 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 3 Sep 2018 15:19:28 -0700 Subject: [PATCH] [lexer] Re-kajigger the states for numbers --- lexer/src/chars.rs | 12 ++ lexer/src/states/hash.rs | 9 ++ lexer/src/states/mod.rs | 1 + lexer/src/states/number/mod.rs | 231 ++++++------------------------ lexer/src/states/number/prefix.rs | 68 +++++++++ 5 files changed, 135 insertions(+), 186 deletions(-) create mode 100644 lexer/src/states/number/prefix.rs diff --git a/lexer/src/chars.rs b/lexer/src/chars.rs index 60192c5..6ef0375 100644 --- a/lexer/src/chars.rs +++ b/lexer/src/chars.rs @@ -8,9 +8,16 @@ pub trait Lexable { fn is_identifier_initial(&self) -> bool; fn is_identifier_subsequent(&self) -> bool; fn is_identifier_delimiter(&self) -> bool; + + fn is_exactness(&self) -> bool; + fn is_radix(&self) -> bool; } impl Lexable for char { + fn is_exactness(&self) -> bool { + *self == 'i' || *self == 'e' + } + fn is_left_paren(&self) -> bool { *self == '(' } @@ -30,6 +37,11 @@ impl Lexable for char { fn is_identifier_delimiter(&self) -> bool { self.is_whitespace() || self.is_left_paren() || self.is_right_paren() } + + fn is_radix(&self) -> bool { + let radishes = &['b', 'd', 'o', 'x']; + radishes.contains(self) + } } trait LexableSpecial { diff --git a/lexer/src/states/hash.rs b/lexer/src/states/hash.rs index 14ef37b..04f078b 100644 --- a/lexer/src/states/hash.rs +++ b/lexer/src/states/hash.rs @@ -2,8 +2,10 @@ * Eryn Wells */ +use chars::Lexable; use states::{State, StateResult}; use states::bool::Bool; +use states::number::Prefix; use token::Token; trait HashLexable { @@ -24,6 +26,13 @@ impl State for Hash { let buf = c.to_ascii_lowercase().to_string(); StateResult::advance(Box::new(Bool::new(buf.as_str()))) }, + c if c.is_radix() || c.is_exactness() => { + if let Some(st) = Prefix::with_char(c) { + StateResult::advance(Box::new(st)) + } else { + StateResult::fail(format!("invalid numeric prefix character: {}", c).as_str()) + } + }, _ => { let msg = format!("Invalid character: {}", c); StateResult::fail(msg.as_str()) diff --git a/lexer/src/states/mod.rs b/lexer/src/states/mod.rs index e8b244e..3c6194d 100644 --- a/lexer/src/states/mod.rs +++ b/lexer/src/states/mod.rs @@ -8,6 +8,7 @@ use token::Token; mod begin; mod bool; mod hash; +mod number; mod id; pub use self::begin::Begin; diff --git a/lexer/src/states/number/mod.rs b/lexer/src/states/number/mod.rs index 8c0aec7..27d0315 100644 --- a/lexer/src/states/number/mod.rs +++ b/lexer/src/states/number/mod.rs @@ -1,16 +1,17 @@ -/* lexer/src/states/number.rs +/* lexer/src/states/number/mod.rs * Eryn Wells */ -use std::collections::HashSet; -use chars::Lexable; -use super::{Resume, State, StateResult, Token}; +mod prefix; +mod sign; + +pub use self::prefix::Prefix; trait NumberLexable { /// Returns the value of this character interpreted as the indicator for a /// base. In Scheme, you indicate the base of a number by prefixing it with /// #[bodx]. - fn base_value(&self) -> Option; + fn base_value(&self) -> Option; /// Returns the value of the character interpreted as a numerical digit. fn digit_value(&self) -> Option; fn sign_value(&self) -> Option; @@ -20,219 +21,77 @@ trait NumberLexable { } #[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum Base { Bin = 2, Oct = 8, Dec = 10, Hex = 16 } +pub enum Radix { Bin = 2, Oct = 8, Dec = 10, Hex = 16 } #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum Sign { Neg = -1, Pos = 1 } +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Exact { Yes, No } + #[derive(Copy, Clone, Debug)] pub struct Builder { - base: Option, + radix: Option, sign: Option, - value: i64 + exact: Option, + value: i64, } -#[derive(Debug)] pub struct BeginState(Builder); -#[derive(Debug)] pub struct DigitState(Builder); -#[derive(Debug)] pub struct HashState(Builder); -#[derive(Debug)] pub struct SignState(Builder); +impl Radix { + pub fn from(c: char) -> Option { + match c { + 'b'|'B' => Some(Radix::Bin), + 'o'|'O' => Some(Radix::Oct), + 'd'|'D' => Some(Radix::Dec), + 'x'|'X' => Some(Radix::Hex), + _ => None + } + } +} -impl Base { - pub fn contains(&self, digit: u8) -> bool { - digit < (*self as u8) +impl Exact { + pub fn from(c: char) -> Option { + match c { + 'i'|'I' => Some(Exact::No), + 'e'|'E' => Some(Exact::Yes), + _ => None + } } } impl Builder { pub fn new() -> Builder { Builder { - base: None, + radix: None, sign: None, + exact: None, value: 0 } } - fn base(&self) -> Base { - match self.base { - Some(b) => b, - None => Base::Dec - } + fn push_digit(&mut self, digit: u8) { + //self.value = self.value * self.base_value() as i64 + digit as i64; } - fn sign(&self) -> Sign { - match self.sign { - Some(s) => s, - None => Sign::Pos - } + fn push_exact(&mut self, ex: Exact) { + self.exact = Some(ex); } - fn push_base(&mut self, base: Base) { - self.base = Some(base); + fn push_radix(&mut self, radix: Radix) { + self.radix = Some(radix); } fn push_sign(&mut self, sign: Sign) { self.sign = Some(sign); } - fn push_digit(&mut self, digit: u8) { - self.value = self.value * self.base_value() as i64 + digit as i64; - } - fn resolve(&self) -> i64 { - let sign_factor: i64 = if let Some(sign) = self.sign { sign as i64 } else { 1 }; - self.value * sign_factor + //let sign_factor: i64 = if let Some(sign) = self.sign { sign as i64 } else { 1 }; + //self.value * sign_factor + 0 } - fn seen_base(&self) -> bool { self.base.is_some() } - - fn base_value(&self) -> u8 { self.base() as u8 } -} - -impl BeginState { - pub fn new() -> BeginState { - BeginState (Builder::new()) - } -} - -impl State for BeginState { - fn lex(&mut self, c: char) -> StateResult { - match c { - c if c.is_hash() => StateResult::advance(Box::new(HashState(self.0))), - c if c.is_sign() => { - self.0.push_sign(c.sign_value().unwrap()); - StateResult::advance(Box::new(SignState(self.0))) - }, - c if c.is_digit(self.0.base_value() as u32) => { - let value = c.digit_value().unwrap(); - if self.0.base().contains(value) { - self.0.push_digit(value); - StateResult::advance(Box::new(DigitState(self.0))) - } else { - StateResult::fail(format!("invalid digit for current base: {}", c).as_str()) - } - }, - _ => StateResult::fail(format!("invalid char: {}", c).as_str()) - } - } - - fn none(&mut self) -> Result, String> { - // TODO: Implement. - Err("blah".to_string()) - } -} - -impl State for HashState { - fn lex(&mut self, c: char) -> StateResult { - if let Some(base) = c.base_value() { - if !self.0.seen_base() { - self.0.push_base(base); - StateResult::advance(Box::new(BeginState (self.0))) - } else { - StateResult::fail("got base again, despite already having one") - } - } else { - StateResult::fail(format!("invalid char: {}", c).as_str()) - } - } - - fn none(&mut self) -> Result, String> { - // TODO: Implement. - Err("blah".to_string()) - } -} - -impl SignState { - pub fn initials() -> HashSet { - let mut inits = HashSet::new(); - inits.insert('+'); - inits.insert('-'); - inits - } -} - -impl State for SignState { - fn lex(&mut self, c: char) -> StateResult { - if let Some(digit) = c.digit_value() { - if self.0.base().contains(digit) { - self.0.push_digit(digit); - StateResult::advance(Box::new(DigitState(self.0))) - } else { - StateResult::fail(format!("invalid digit for current base: {}", c).as_str()) - } - } else { - StateResult::fail(format!("invalid char: {}", c).as_str()) - } - } - - fn none(&mut self) -> Result, String> { - // TODO: Implement. - Err("blah".to_string()) - } -} - -impl DigitState { - pub fn initials() -> HashSet { - let foldp = |acc: HashSet, x: u8| { - let c = char::from(x); - acc.insert(c); - acc - }; - '0'..='9'.chain(('a' as u8)..=('f' as u8)).fold(HashSet::new(), foldp) - } -} - -impl State for DigitState { - fn lex(&mut self, c: char) -> StateResult { - if let Some(digit) = c.digit_value() { - if self.0.base().contains(digit) { - self.0.push_digit(digit); - StateResult::Continue - } else { - StateResult::fail(format!("invalid digit for current base: {}", c).as_str()) - } - } else if c.is_identifier_delimiter() { - StateResult::emit(Token::Num(self.0.resolve()), Resume::Here) - } else { - StateResult::fail(format!("invalid char: {}", c).as_str()) - } - } - - fn none(&mut self) -> Result, String> { - // TODO: Implement. - Err("blah".to_string()) - } -} - -impl NumberLexable for char { - fn base_value(&self) -> Option { - match *self { - 'b' => Some(Base::Bin), - 'o' => Some(Base::Oct), - 'd' => Some(Base::Dec), - 'x' => Some(Base::Hex), - _ => None - } - } - - fn digit_value(&self) -> Option { - let ascii_value = *self as u32; - match *self { - '0'...'9' => Some((ascii_value - '0' as u32) as u8), - 'a'...'f' => Some((ascii_value - 'a' as u32 + 10) as u8), - 'A'...'F' => Some((ascii_value - 'A' as u32 + 10) as u8), - _ => None - } - } - - fn sign_value(&self) -> Option { - match *self { - '+' => Some(Sign::Pos), - '-' => Some(Sign::Neg), - _ => None - } - } - - fn is_dot(&self) -> bool { *self == '.' } - fn is_hash(&self) -> bool { *self == '#' } - fn is_sign(&self) -> bool { self.sign_value().is_some() } + fn seen_exact(&self) -> bool { self.exact.is_some() } + fn seen_radix(&self) -> bool { self.radix.is_some() } + fn seen_sign(&self) -> bool { self.sign.is_some() } } diff --git a/lexer/src/states/number/prefix.rs b/lexer/src/states/number/prefix.rs new file mode 100644 index 0000000..9c68043 --- /dev/null +++ b/lexer/src/states/number/prefix.rs @@ -0,0 +1,68 @@ +/* lexer/src/states/number/prefix.rs + * Eryn Wells + */ + +use super::{Radix, Exact}; +use states::{State, StateResult}; +use states::number::Builder; +use token::Token; + +#[derive(Debug)] pub struct Prefix(Builder); +#[derive(Debug)] pub struct Hash(Builder); + +impl Prefix { + pub fn new(b: Builder) -> Prefix { + Prefix(b) + } + + pub fn with_char(c: char) -> Option { + let mut builder = Builder::new(); + if let Some(ex) = Exact::from(c) { + builder.push_exact(ex); + } else if let Some(rx) = Radix::from(c) { + builder.push_radix(rx); + } else { + return None; + } + Some(Prefix::new(builder)) + } +} + +impl State for Prefix { + fn lex(&mut self, c: char) -> StateResult { + match c { + '#' => StateResult::advance(Box::new(Hash(self.0))), + _ => StateResult::fail(format!("invalid char: {}", c).as_str()) + } + } + + fn none(&mut self) -> Result, String> { + Err("blah".to_string()) + } +} + +impl State for Hash { + fn lex(&mut self, c: char) -> StateResult { + if let Some(ex) = Exact::from(c) { + if !self.0.seen_exact() { + self.0.push_exact(ex); + StateResult::advance(Box::new(Prefix::new(self.0))) + } else { + StateResult::fail(format!("invalid char: {}", c).as_str()) + } + } else if let Some(rx) = Radix::from(c) { + if !self.0.seen_radix() { + self.0.push_radix(rx); + StateResult::advance(Box::new(Prefix::new(self.0))) + } else { + StateResult::fail(format!("invalid char: {}", c).as_str()) + } + } else { + StateResult::fail(format!("invalid char: {}", c).as_str()) + } + } + + fn none(&mut self) -> Result, String> { + Err("blah".to_string()) + } +}