[lexer] Re-kajigger the states for numbers
This commit is contained in:
parent
b759ee4c57
commit
def35966eb
5 changed files with 135 additions and 186 deletions
|
@ -8,9 +8,16 @@ pub trait Lexable {
|
|||
fn is_identifier_initial(&self) -> bool;
|
||||
fn is_identifier_subsequent(&self) -> bool;
|
||||
fn is_identifier_delimiter(&self) -> bool;
|
||||
|
||||
fn is_exactness(&self) -> bool;
|
||||
fn is_radix(&self) -> bool;
|
||||
}
|
||||
|
||||
impl Lexable for char {
|
||||
fn is_exactness(&self) -> bool {
|
||||
*self == 'i' || *self == 'e'
|
||||
}
|
||||
|
||||
fn is_left_paren(&self) -> bool {
|
||||
*self == '('
|
||||
}
|
||||
|
@ -30,6 +37,11 @@ impl Lexable for char {
|
|||
fn is_identifier_delimiter(&self) -> bool {
|
||||
self.is_whitespace() || self.is_left_paren() || self.is_right_paren()
|
||||
}
|
||||
|
||||
fn is_radix(&self) -> bool {
|
||||
let radishes = &['b', 'd', 'o', 'x'];
|
||||
radishes.contains(self)
|
||||
}
|
||||
}
|
||||
|
||||
trait LexableSpecial {
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
* Eryn Wells <eryn@erynwells.me>
|
||||
*/
|
||||
|
||||
use chars::Lexable;
|
||||
use states::{State, StateResult};
|
||||
use states::bool::Bool;
|
||||
use states::number::Prefix;
|
||||
use token::Token;
|
||||
|
||||
trait HashLexable {
|
||||
|
@ -24,6 +26,13 @@ impl State for Hash {
|
|||
let buf = c.to_ascii_lowercase().to_string();
|
||||
StateResult::advance(Box::new(Bool::new(buf.as_str())))
|
||||
},
|
||||
c if c.is_radix() || c.is_exactness() => {
|
||||
if let Some(st) = Prefix::with_char(c) {
|
||||
StateResult::advance(Box::new(st))
|
||||
} else {
|
||||
StateResult::fail(format!("invalid numeric prefix character: {}", c).as_str())
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
let msg = format!("Invalid character: {}", c);
|
||||
StateResult::fail(msg.as_str())
|
||||
|
|
|
@ -8,6 +8,7 @@ use token::Token;
|
|||
mod begin;
|
||||
mod bool;
|
||||
mod hash;
|
||||
mod number;
|
||||
mod id;
|
||||
|
||||
pub use self::begin::Begin;
|
||||
|
|
|
@ -1,16 +1,17 @@
|
|||
/* lexer/src/states/number.rs
|
||||
/* lexer/src/states/number/mod.rs
|
||||
* Eryn Wells <eryn@erynwells.me>
|
||||
*/
|
||||
|
||||
use std::collections::HashSet;
|
||||
use chars::Lexable;
|
||||
use super::{Resume, State, StateResult, Token};
|
||||
mod prefix;
|
||||
mod sign;
|
||||
|
||||
pub use self::prefix::Prefix;
|
||||
|
||||
trait NumberLexable {
|
||||
/// Returns the value of this character interpreted as the indicator for a
|
||||
/// base. In Scheme, you indicate the base of a number by prefixing it with
|
||||
/// #[bodx].
|
||||
fn base_value(&self) -> Option<Base>;
|
||||
fn base_value(&self) -> Option<Radix>;
|
||||
/// Returns the value of the character interpreted as a numerical digit.
|
||||
fn digit_value(&self) -> Option<u8>;
|
||||
fn sign_value(&self) -> Option<Sign>;
|
||||
|
@ -20,219 +21,77 @@ trait NumberLexable {
|
|||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum Base { Bin = 2, Oct = 8, Dec = 10, Hex = 16 }
|
||||
pub enum Radix { Bin = 2, Oct = 8, Dec = 10, Hex = 16 }
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum Sign { Neg = -1, Pos = 1 }
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum Exact { Yes, No }
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct Builder {
|
||||
base: Option<Base>,
|
||||
radix: Option<Radix>,
|
||||
sign: Option<Sign>,
|
||||
value: i64
|
||||
exact: Option<Exact>,
|
||||
value: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug)] pub struct BeginState(Builder);
|
||||
#[derive(Debug)] pub struct DigitState(Builder);
|
||||
#[derive(Debug)] pub struct HashState(Builder);
|
||||
#[derive(Debug)] pub struct SignState(Builder);
|
||||
impl Radix {
|
||||
pub fn from(c: char) -> Option<Radix> {
|
||||
match c {
|
||||
'b'|'B' => Some(Radix::Bin),
|
||||
'o'|'O' => Some(Radix::Oct),
|
||||
'd'|'D' => Some(Radix::Dec),
|
||||
'x'|'X' => Some(Radix::Hex),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Base {
|
||||
pub fn contains(&self, digit: u8) -> bool {
|
||||
digit < (*self as u8)
|
||||
impl Exact {
|
||||
pub fn from(c: char) -> Option<Exact> {
|
||||
match c {
|
||||
'i'|'I' => Some(Exact::No),
|
||||
'e'|'E' => Some(Exact::Yes),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
pub fn new() -> Builder {
|
||||
Builder {
|
||||
base: None,
|
||||
radix: None,
|
||||
sign: None,
|
||||
exact: None,
|
||||
value: 0
|
||||
}
|
||||
}
|
||||
|
||||
fn base(&self) -> Base {
|
||||
match self.base {
|
||||
Some(b) => b,
|
||||
None => Base::Dec
|
||||
}
|
||||
fn push_digit(&mut self, digit: u8) {
|
||||
//self.value = self.value * self.base_value() as i64 + digit as i64;
|
||||
}
|
||||
|
||||
fn sign(&self) -> Sign {
|
||||
match self.sign {
|
||||
Some(s) => s,
|
||||
None => Sign::Pos
|
||||
}
|
||||
fn push_exact(&mut self, ex: Exact) {
|
||||
self.exact = Some(ex);
|
||||
}
|
||||
|
||||
fn push_base(&mut self, base: Base) {
|
||||
self.base = Some(base);
|
||||
fn push_radix(&mut self, radix: Radix) {
|
||||
self.radix = Some(radix);
|
||||
}
|
||||
|
||||
fn push_sign(&mut self, sign: Sign) {
|
||||
self.sign = Some(sign);
|
||||
}
|
||||
|
||||
fn push_digit(&mut self, digit: u8) {
|
||||
self.value = self.value * self.base_value() as i64 + digit as i64;
|
||||
}
|
||||
|
||||
fn resolve(&self) -> i64 {
|
||||
let sign_factor: i64 = if let Some(sign) = self.sign { sign as i64 } else { 1 };
|
||||
self.value * sign_factor
|
||||
//let sign_factor: i64 = if let Some(sign) = self.sign { sign as i64 } else { 1 };
|
||||
//self.value * sign_factor
|
||||
0
|
||||
}
|
||||
|
||||
fn seen_base(&self) -> bool { self.base.is_some() }
|
||||
|
||||
fn base_value(&self) -> u8 { self.base() as u8 }
|
||||
}
|
||||
|
||||
impl BeginState {
|
||||
pub fn new() -> BeginState {
|
||||
BeginState (Builder::new())
|
||||
}
|
||||
}
|
||||
|
||||
impl State for BeginState {
|
||||
fn lex(&mut self, c: char) -> StateResult {
|
||||
match c {
|
||||
c if c.is_hash() => StateResult::advance(Box::new(HashState(self.0))),
|
||||
c if c.is_sign() => {
|
||||
self.0.push_sign(c.sign_value().unwrap());
|
||||
StateResult::advance(Box::new(SignState(self.0)))
|
||||
},
|
||||
c if c.is_digit(self.0.base_value() as u32) => {
|
||||
let value = c.digit_value().unwrap();
|
||||
if self.0.base().contains(value) {
|
||||
self.0.push_digit(value);
|
||||
StateResult::advance(Box::new(DigitState(self.0)))
|
||||
} else {
|
||||
StateResult::fail(format!("invalid digit for current base: {}", c).as_str())
|
||||
}
|
||||
},
|
||||
_ => StateResult::fail(format!("invalid char: {}", c).as_str())
|
||||
}
|
||||
}
|
||||
|
||||
fn none(&mut self) -> Result<Option<Token>, String> {
|
||||
// TODO: Implement.
|
||||
Err("blah".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl State for HashState {
|
||||
fn lex(&mut self, c: char) -> StateResult {
|
||||
if let Some(base) = c.base_value() {
|
||||
if !self.0.seen_base() {
|
||||
self.0.push_base(base);
|
||||
StateResult::advance(Box::new(BeginState (self.0)))
|
||||
} else {
|
||||
StateResult::fail("got base again, despite already having one")
|
||||
}
|
||||
} else {
|
||||
StateResult::fail(format!("invalid char: {}", c).as_str())
|
||||
}
|
||||
}
|
||||
|
||||
fn none(&mut self) -> Result<Option<Token>, String> {
|
||||
// TODO: Implement.
|
||||
Err("blah".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl SignState {
|
||||
pub fn initials() -> HashSet<char> {
|
||||
let mut inits = HashSet::new();
|
||||
inits.insert('+');
|
||||
inits.insert('-');
|
||||
inits
|
||||
}
|
||||
}
|
||||
|
||||
impl State for SignState {
|
||||
fn lex(&mut self, c: char) -> StateResult {
|
||||
if let Some(digit) = c.digit_value() {
|
||||
if self.0.base().contains(digit) {
|
||||
self.0.push_digit(digit);
|
||||
StateResult::advance(Box::new(DigitState(self.0)))
|
||||
} else {
|
||||
StateResult::fail(format!("invalid digit for current base: {}", c).as_str())
|
||||
}
|
||||
} else {
|
||||
StateResult::fail(format!("invalid char: {}", c).as_str())
|
||||
}
|
||||
}
|
||||
|
||||
fn none(&mut self) -> Result<Option<Token>, String> {
|
||||
// TODO: Implement.
|
||||
Err("blah".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl DigitState {
|
||||
pub fn initials() -> HashSet<char> {
|
||||
let foldp = |acc: HashSet<char>, x: u8| {
|
||||
let c = char::from(x);
|
||||
acc.insert(c);
|
||||
acc
|
||||
};
|
||||
'0'..='9'.chain(('a' as u8)..=('f' as u8)).fold(HashSet::new(), foldp)
|
||||
}
|
||||
}
|
||||
|
||||
impl State for DigitState {
|
||||
fn lex(&mut self, c: char) -> StateResult {
|
||||
if let Some(digit) = c.digit_value() {
|
||||
if self.0.base().contains(digit) {
|
||||
self.0.push_digit(digit);
|
||||
StateResult::Continue
|
||||
} else {
|
||||
StateResult::fail(format!("invalid digit for current base: {}", c).as_str())
|
||||
}
|
||||
} else if c.is_identifier_delimiter() {
|
||||
StateResult::emit(Token::Num(self.0.resolve()), Resume::Here)
|
||||
} else {
|
||||
StateResult::fail(format!("invalid char: {}", c).as_str())
|
||||
}
|
||||
}
|
||||
|
||||
fn none(&mut self) -> Result<Option<Token>, String> {
|
||||
// TODO: Implement.
|
||||
Err("blah".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl NumberLexable for char {
|
||||
fn base_value(&self) -> Option<Base> {
|
||||
match *self {
|
||||
'b' => Some(Base::Bin),
|
||||
'o' => Some(Base::Oct),
|
||||
'd' => Some(Base::Dec),
|
||||
'x' => Some(Base::Hex),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
|
||||
fn digit_value(&self) -> Option<u8> {
|
||||
let ascii_value = *self as u32;
|
||||
match *self {
|
||||
'0'...'9' => Some((ascii_value - '0' as u32) as u8),
|
||||
'a'...'f' => Some((ascii_value - 'a' as u32 + 10) as u8),
|
||||
'A'...'F' => Some((ascii_value - 'A' as u32 + 10) as u8),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
|
||||
fn sign_value(&self) -> Option<Sign> {
|
||||
match *self {
|
||||
'+' => Some(Sign::Pos),
|
||||
'-' => Some(Sign::Neg),
|
||||
_ => None
|
||||
}
|
||||
}
|
||||
|
||||
fn is_dot(&self) -> bool { *self == '.' }
|
||||
fn is_hash(&self) -> bool { *self == '#' }
|
||||
fn is_sign(&self) -> bool { self.sign_value().is_some() }
|
||||
fn seen_exact(&self) -> bool { self.exact.is_some() }
|
||||
fn seen_radix(&self) -> bool { self.radix.is_some() }
|
||||
fn seen_sign(&self) -> bool { self.sign.is_some() }
|
||||
}
|
||||
|
|
68
lexer/src/states/number/prefix.rs
Normal file
68
lexer/src/states/number/prefix.rs
Normal file
|
@ -0,0 +1,68 @@
|
|||
/* lexer/src/states/number/prefix.rs
|
||||
* Eryn Wells <eryn@erynwells.me>
|
||||
*/
|
||||
|
||||
use super::{Radix, Exact};
|
||||
use states::{State, StateResult};
|
||||
use states::number::Builder;
|
||||
use token::Token;
|
||||
|
||||
#[derive(Debug)] pub struct Prefix(Builder);
|
||||
#[derive(Debug)] pub struct Hash(Builder);
|
||||
|
||||
impl Prefix {
|
||||
pub fn new(b: Builder) -> Prefix {
|
||||
Prefix(b)
|
||||
}
|
||||
|
||||
pub fn with_char(c: char) -> Option<Prefix> {
|
||||
let mut builder = Builder::new();
|
||||
if let Some(ex) = Exact::from(c) {
|
||||
builder.push_exact(ex);
|
||||
} else if let Some(rx) = Radix::from(c) {
|
||||
builder.push_radix(rx);
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
Some(Prefix::new(builder))
|
||||
}
|
||||
}
|
||||
|
||||
impl State for Prefix {
|
||||
fn lex(&mut self, c: char) -> StateResult {
|
||||
match c {
|
||||
'#' => StateResult::advance(Box::new(Hash(self.0))),
|
||||
_ => StateResult::fail(format!("invalid char: {}", c).as_str())
|
||||
}
|
||||
}
|
||||
|
||||
fn none(&mut self) -> Result<Option<Token>, String> {
|
||||
Err("blah".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl State for Hash {
|
||||
fn lex(&mut self, c: char) -> StateResult {
|
||||
if let Some(ex) = Exact::from(c) {
|
||||
if !self.0.seen_exact() {
|
||||
self.0.push_exact(ex);
|
||||
StateResult::advance(Box::new(Prefix::new(self.0)))
|
||||
} else {
|
||||
StateResult::fail(format!("invalid char: {}", c).as_str())
|
||||
}
|
||||
} else if let Some(rx) = Radix::from(c) {
|
||||
if !self.0.seen_radix() {
|
||||
self.0.push_radix(rx);
|
||||
StateResult::advance(Box::new(Prefix::new(self.0)))
|
||||
} else {
|
||||
StateResult::fail(format!("invalid char: {}", c).as_str())
|
||||
}
|
||||
} else {
|
||||
StateResult::fail(format!("invalid char: {}", c).as_str())
|
||||
}
|
||||
}
|
||||
|
||||
fn none(&mut self) -> Result<Option<Token>, String> {
|
||||
Err("blah".to_string())
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue