[lexer] Re-kajigger the states for numbers

This commit is contained in:
Eryn Wells 2018-09-03 15:19:28 -07:00
parent b759ee4c57
commit def35966eb
5 changed files with 135 additions and 186 deletions

View file

@ -8,9 +8,16 @@ pub trait Lexable {
fn is_identifier_initial(&self) -> bool;
fn is_identifier_subsequent(&self) -> bool;
fn is_identifier_delimiter(&self) -> bool;
fn is_exactness(&self) -> bool;
fn is_radix(&self) -> bool;
}
impl Lexable for char {
fn is_exactness(&self) -> bool {
*self == 'i' || *self == 'e'
}
fn is_left_paren(&self) -> bool {
*self == '('
}
@ -30,6 +37,11 @@ impl Lexable for char {
fn is_identifier_delimiter(&self) -> bool {
self.is_whitespace() || self.is_left_paren() || self.is_right_paren()
}
fn is_radix(&self) -> bool {
let radishes = &['b', 'd', 'o', 'x'];
radishes.contains(self)
}
}
trait LexableSpecial {

View file

@ -2,8 +2,10 @@
* Eryn Wells <eryn@erynwells.me>
*/
use chars::Lexable;
use states::{State, StateResult};
use states::bool::Bool;
use states::number::Prefix;
use token::Token;
trait HashLexable {
@ -24,6 +26,13 @@ impl State for Hash {
let buf = c.to_ascii_lowercase().to_string();
StateResult::advance(Box::new(Bool::new(buf.as_str())))
},
c if c.is_radix() || c.is_exactness() => {
if let Some(st) = Prefix::with_char(c) {
StateResult::advance(Box::new(st))
} else {
StateResult::fail(format!("invalid numeric prefix character: {}", c).as_str())
}
},
_ => {
let msg = format!("Invalid character: {}", c);
StateResult::fail(msg.as_str())

View file

@ -8,6 +8,7 @@ use token::Token;
mod begin;
mod bool;
mod hash;
mod number;
mod id;
pub use self::begin::Begin;

View file

@ -1,16 +1,17 @@
/* lexer/src/states/number.rs
/* lexer/src/states/number/mod.rs
* Eryn Wells <eryn@erynwells.me>
*/
use std::collections::HashSet;
use chars::Lexable;
use super::{Resume, State, StateResult, Token};
mod prefix;
mod sign;
pub use self::prefix::Prefix;
trait NumberLexable {
/// Returns the value of this character interpreted as the indicator for a
/// base. In Scheme, you indicate the base of a number by prefixing it with
/// #[bodx].
fn base_value(&self) -> Option<Base>;
fn base_value(&self) -> Option<Radix>;
/// Returns the value of the character interpreted as a numerical digit.
fn digit_value(&self) -> Option<u8>;
fn sign_value(&self) -> Option<Sign>;
@ -20,219 +21,77 @@ trait NumberLexable {
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Base { Bin = 2, Oct = 8, Dec = 10, Hex = 16 }
pub enum Radix { Bin = 2, Oct = 8, Dec = 10, Hex = 16 }
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Sign { Neg = -1, Pos = 1 }
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Exact { Yes, No }
#[derive(Copy, Clone, Debug)]
pub struct Builder {
base: Option<Base>,
radix: Option<Radix>,
sign: Option<Sign>,
value: i64
exact: Option<Exact>,
value: i64,
}
#[derive(Debug)] pub struct BeginState(Builder);
#[derive(Debug)] pub struct DigitState(Builder);
#[derive(Debug)] pub struct HashState(Builder);
#[derive(Debug)] pub struct SignState(Builder);
impl Radix {
pub fn from(c: char) -> Option<Radix> {
match c {
'b'|'B' => Some(Radix::Bin),
'o'|'O' => Some(Radix::Oct),
'd'|'D' => Some(Radix::Dec),
'x'|'X' => Some(Radix::Hex),
_ => None
}
}
}
impl Base {
pub fn contains(&self, digit: u8) -> bool {
digit < (*self as u8)
impl Exact {
pub fn from(c: char) -> Option<Exact> {
match c {
'i'|'I' => Some(Exact::No),
'e'|'E' => Some(Exact::Yes),
_ => None
}
}
}
impl Builder {
pub fn new() -> Builder {
Builder {
base: None,
radix: None,
sign: None,
exact: None,
value: 0
}
}
fn base(&self) -> Base {
match self.base {
Some(b) => b,
None => Base::Dec
}
fn push_digit(&mut self, digit: u8) {
//self.value = self.value * self.base_value() as i64 + digit as i64;
}
fn sign(&self) -> Sign {
match self.sign {
Some(s) => s,
None => Sign::Pos
}
fn push_exact(&mut self, ex: Exact) {
self.exact = Some(ex);
}
fn push_base(&mut self, base: Base) {
self.base = Some(base);
fn push_radix(&mut self, radix: Radix) {
self.radix = Some(radix);
}
fn push_sign(&mut self, sign: Sign) {
self.sign = Some(sign);
}
fn push_digit(&mut self, digit: u8) {
self.value = self.value * self.base_value() as i64 + digit as i64;
}
fn resolve(&self) -> i64 {
let sign_factor: i64 = if let Some(sign) = self.sign { sign as i64 } else { 1 };
self.value * sign_factor
//let sign_factor: i64 = if let Some(sign) = self.sign { sign as i64 } else { 1 };
//self.value * sign_factor
0
}
fn seen_base(&self) -> bool { self.base.is_some() }
fn base_value(&self) -> u8 { self.base() as u8 }
}
impl BeginState {
pub fn new() -> BeginState {
BeginState (Builder::new())
}
}
impl State for BeginState {
fn lex(&mut self, c: char) -> StateResult {
match c {
c if c.is_hash() => StateResult::advance(Box::new(HashState(self.0))),
c if c.is_sign() => {
self.0.push_sign(c.sign_value().unwrap());
StateResult::advance(Box::new(SignState(self.0)))
},
c if c.is_digit(self.0.base_value() as u32) => {
let value = c.digit_value().unwrap();
if self.0.base().contains(value) {
self.0.push_digit(value);
StateResult::advance(Box::new(DigitState(self.0)))
} else {
StateResult::fail(format!("invalid digit for current base: {}", c).as_str())
}
},
_ => StateResult::fail(format!("invalid char: {}", c).as_str())
}
}
fn none(&mut self) -> Result<Option<Token>, String> {
// TODO: Implement.
Err("blah".to_string())
}
}
impl State for HashState {
fn lex(&mut self, c: char) -> StateResult {
if let Some(base) = c.base_value() {
if !self.0.seen_base() {
self.0.push_base(base);
StateResult::advance(Box::new(BeginState (self.0)))
} else {
StateResult::fail("got base again, despite already having one")
}
} else {
StateResult::fail(format!("invalid char: {}", c).as_str())
}
}
fn none(&mut self) -> Result<Option<Token>, String> {
// TODO: Implement.
Err("blah".to_string())
}
}
impl SignState {
pub fn initials() -> HashSet<char> {
let mut inits = HashSet::new();
inits.insert('+');
inits.insert('-');
inits
}
}
impl State for SignState {
fn lex(&mut self, c: char) -> StateResult {
if let Some(digit) = c.digit_value() {
if self.0.base().contains(digit) {
self.0.push_digit(digit);
StateResult::advance(Box::new(DigitState(self.0)))
} else {
StateResult::fail(format!("invalid digit for current base: {}", c).as_str())
}
} else {
StateResult::fail(format!("invalid char: {}", c).as_str())
}
}
fn none(&mut self) -> Result<Option<Token>, String> {
// TODO: Implement.
Err("blah".to_string())
}
}
impl DigitState {
pub fn initials() -> HashSet<char> {
let foldp = |acc: HashSet<char>, x: u8| {
let c = char::from(x);
acc.insert(c);
acc
};
'0'..='9'.chain(('a' as u8)..=('f' as u8)).fold(HashSet::new(), foldp)
}
}
impl State for DigitState {
fn lex(&mut self, c: char) -> StateResult {
if let Some(digit) = c.digit_value() {
if self.0.base().contains(digit) {
self.0.push_digit(digit);
StateResult::Continue
} else {
StateResult::fail(format!("invalid digit for current base: {}", c).as_str())
}
} else if c.is_identifier_delimiter() {
StateResult::emit(Token::Num(self.0.resolve()), Resume::Here)
} else {
StateResult::fail(format!("invalid char: {}", c).as_str())
}
}
fn none(&mut self) -> Result<Option<Token>, String> {
// TODO: Implement.
Err("blah".to_string())
}
}
impl NumberLexable for char {
fn base_value(&self) -> Option<Base> {
match *self {
'b' => Some(Base::Bin),
'o' => Some(Base::Oct),
'd' => Some(Base::Dec),
'x' => Some(Base::Hex),
_ => None
}
}
fn digit_value(&self) -> Option<u8> {
let ascii_value = *self as u32;
match *self {
'0'...'9' => Some((ascii_value - '0' as u32) as u8),
'a'...'f' => Some((ascii_value - 'a' as u32 + 10) as u8),
'A'...'F' => Some((ascii_value - 'A' as u32 + 10) as u8),
_ => None
}
}
fn sign_value(&self) -> Option<Sign> {
match *self {
'+' => Some(Sign::Pos),
'-' => Some(Sign::Neg),
_ => None
}
}
fn is_dot(&self) -> bool { *self == '.' }
fn is_hash(&self) -> bool { *self == '#' }
fn is_sign(&self) -> bool { self.sign_value().is_some() }
fn seen_exact(&self) -> bool { self.exact.is_some() }
fn seen_radix(&self) -> bool { self.radix.is_some() }
fn seen_sign(&self) -> bool { self.sign.is_some() }
}

View file

@ -0,0 +1,68 @@
/* lexer/src/states/number/prefix.rs
* Eryn Wells <eryn@erynwells.me>
*/
use super::{Radix, Exact};
use states::{State, StateResult};
use states::number::Builder;
use token::Token;
#[derive(Debug)] pub struct Prefix(Builder);
#[derive(Debug)] pub struct Hash(Builder);
impl Prefix {
pub fn new(b: Builder) -> Prefix {
Prefix(b)
}
pub fn with_char(c: char) -> Option<Prefix> {
let mut builder = Builder::new();
if let Some(ex) = Exact::from(c) {
builder.push_exact(ex);
} else if let Some(rx) = Radix::from(c) {
builder.push_radix(rx);
} else {
return None;
}
Some(Prefix::new(builder))
}
}
impl State for Prefix {
fn lex(&mut self, c: char) -> StateResult {
match c {
'#' => StateResult::advance(Box::new(Hash(self.0))),
_ => StateResult::fail(format!("invalid char: {}", c).as_str())
}
}
fn none(&mut self) -> Result<Option<Token>, String> {
Err("blah".to_string())
}
}
impl State for Hash {
fn lex(&mut self, c: char) -> StateResult {
if let Some(ex) = Exact::from(c) {
if !self.0.seen_exact() {
self.0.push_exact(ex);
StateResult::advance(Box::new(Prefix::new(self.0)))
} else {
StateResult::fail(format!("invalid char: {}", c).as_str())
}
} else if let Some(rx) = Radix::from(c) {
if !self.0.seen_radix() {
self.0.push_radix(rx);
StateResult::advance(Box::new(Prefix::new(self.0)))
} else {
StateResult::fail(format!("invalid char: {}", c).as_str())
}
} else {
StateResult::fail(format!("invalid char: {}", c).as_str())
}
}
fn none(&mut self) -> Result<Option<Token>, String> {
Err("blah".to_string())
}
}