[lexer] Re-kajigger the states for numbers
This commit is contained in:
		
							parent
							
								
									b759ee4c57
								
							
						
					
					
						commit
						def35966eb
					
				
					 5 changed files with 135 additions and 186 deletions
				
			
		|  | @ -8,9 +8,16 @@ pub trait Lexable { | |||
|     fn is_identifier_initial(&self) -> bool; | ||||
|     fn is_identifier_subsequent(&self) -> bool; | ||||
|     fn is_identifier_delimiter(&self) -> bool; | ||||
| 
 | ||||
|     fn is_exactness(&self) -> bool; | ||||
|     fn is_radix(&self) -> bool; | ||||
| } | ||||
| 
 | ||||
| impl Lexable for char { | ||||
|     fn is_exactness(&self) -> bool { | ||||
|         *self == 'i' || *self == 'e' | ||||
|     } | ||||
| 
 | ||||
|     fn is_left_paren(&self) -> bool { | ||||
|         *self == '(' | ||||
|     } | ||||
|  | @ -30,6 +37,11 @@ impl Lexable for char { | |||
|     fn is_identifier_delimiter(&self) -> bool { | ||||
|         self.is_whitespace() || self.is_left_paren() || self.is_right_paren() | ||||
|     } | ||||
| 
 | ||||
|     fn is_radix(&self) -> bool { | ||||
|         let radishes = &['b', 'd', 'o', 'x']; | ||||
|         radishes.contains(self) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| trait LexableSpecial { | ||||
|  |  | |||
|  | @ -2,8 +2,10 @@ | |||
|  * Eryn Wells <eryn@erynwells.me> | ||||
|  */ | ||||
| 
 | ||||
| use chars::Lexable; | ||||
| use states::{State, StateResult}; | ||||
| use states::bool::Bool; | ||||
| use states::number::Prefix; | ||||
| use token::Token; | ||||
| 
 | ||||
| trait HashLexable { | ||||
|  | @ -24,6 +26,13 @@ impl State for Hash { | |||
|                 let buf = c.to_ascii_lowercase().to_string(); | ||||
|                 StateResult::advance(Box::new(Bool::new(buf.as_str()))) | ||||
|             }, | ||||
|             c if c.is_radix() || c.is_exactness() => { | ||||
|                 if let Some(st) = Prefix::with_char(c) { | ||||
|                     StateResult::advance(Box::new(st)) | ||||
|                 } else { | ||||
|                     StateResult::fail(format!("invalid numeric prefix character: {}", c).as_str()) | ||||
|                 } | ||||
|             }, | ||||
|             _ => { | ||||
|                 let msg = format!("Invalid character: {}", c); | ||||
|                 StateResult::fail(msg.as_str()) | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ use token::Token; | |||
| mod begin; | ||||
| mod bool; | ||||
| mod hash; | ||||
| mod number; | ||||
| mod id; | ||||
| 
 | ||||
| pub use self::begin::Begin; | ||||
|  |  | |||
|  | @ -1,16 +1,17 @@ | |||
| /* lexer/src/states/number.rs
 | ||||
| /* lexer/src/states/number/mod.rs
 | ||||
|  * Eryn Wells <eryn@erynwells.me> | ||||
|  */ | ||||
| 
 | ||||
| use std::collections::HashSet; | ||||
| use chars::Lexable; | ||||
| use super::{Resume, State, StateResult, Token}; | ||||
| mod prefix; | ||||
| mod sign; | ||||
| 
 | ||||
| pub use self::prefix::Prefix; | ||||
| 
 | ||||
| trait NumberLexable { | ||||
|     /// Returns the value of this character interpreted as the indicator for a
 | ||||
|     /// base. In Scheme, you indicate the base of a number by prefixing it with
 | ||||
|     /// #[bodx].
 | ||||
|     fn base_value(&self) -> Option<Base>; | ||||
|     fn base_value(&self) -> Option<Radix>; | ||||
|     /// Returns the value of the character interpreted as a numerical digit.
 | ||||
|     fn digit_value(&self) -> Option<u8>; | ||||
|     fn sign_value(&self) -> Option<Sign>; | ||||
|  | @ -20,219 +21,77 @@ trait NumberLexable { | |||
| } | ||||
| 
 | ||||
| #[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||||
| pub enum Base { Bin = 2, Oct = 8, Dec = 10, Hex = 16 } | ||||
| pub enum Radix { Bin = 2, Oct = 8, Dec = 10, Hex = 16 } | ||||
| 
 | ||||
| #[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||||
| pub enum Sign { Neg = -1, Pos = 1 } | ||||
| 
 | ||||
| #[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||||
| pub enum Exact { Yes, No } | ||||
| 
 | ||||
| #[derive(Copy, Clone, Debug)] | ||||
| pub struct Builder { | ||||
|     base: Option<Base>, | ||||
|     radix: Option<Radix>, | ||||
|     sign: Option<Sign>, | ||||
|     value: i64 | ||||
|     exact: Option<Exact>, | ||||
|     value: i64, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] pub struct BeginState(Builder); | ||||
| #[derive(Debug)] pub struct DigitState(Builder); | ||||
| #[derive(Debug)] pub struct HashState(Builder); | ||||
| #[derive(Debug)] pub struct SignState(Builder); | ||||
| impl Radix { | ||||
|     pub fn from(c: char) -> Option<Radix> { | ||||
|         match c { | ||||
|             'b'|'B' => Some(Radix::Bin), | ||||
|             'o'|'O' => Some(Radix::Oct), | ||||
|             'd'|'D' => Some(Radix::Dec), | ||||
|             'x'|'X' => Some(Radix::Hex), | ||||
|             _ => None | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Base { | ||||
|     pub fn contains(&self, digit: u8) -> bool { | ||||
|         digit < (*self as u8) | ||||
| impl Exact { | ||||
|     pub fn from(c: char) -> Option<Exact> { | ||||
|         match c { | ||||
|             'i'|'I' => Some(Exact::No), | ||||
|             'e'|'E' => Some(Exact::Yes), | ||||
|             _ => None | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Builder { | ||||
|     pub fn new() -> Builder { | ||||
|         Builder { | ||||
|              base: None, | ||||
|              radix: None, | ||||
|              sign: None, | ||||
|              exact: None, | ||||
|              value: 0 | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn base(&self) -> Base { | ||||
|         match self.base { | ||||
|             Some(b) => b, | ||||
|             None => Base::Dec | ||||
|         } | ||||
|     fn push_digit(&mut self, digit: u8) { | ||||
|         //self.value = self.value * self.base_value() as i64 + digit as i64;
 | ||||
|     } | ||||
| 
 | ||||
|     fn sign(&self) -> Sign { | ||||
|         match self.sign { | ||||
|             Some(s) => s, | ||||
|             None => Sign::Pos | ||||
|         } | ||||
|     fn push_exact(&mut self, ex: Exact) { | ||||
|         self.exact = Some(ex); | ||||
|     } | ||||
| 
 | ||||
|     fn push_base(&mut self, base: Base) { | ||||
|         self.base = Some(base); | ||||
|     fn push_radix(&mut self, radix: Radix) { | ||||
|         self.radix = Some(radix); | ||||
|     } | ||||
| 
 | ||||
|     fn push_sign(&mut self, sign: Sign) { | ||||
|         self.sign = Some(sign); | ||||
|     } | ||||
| 
 | ||||
|     fn push_digit(&mut self, digit: u8) { | ||||
|         self.value = self.value * self.base_value() as i64 + digit as i64; | ||||
|     } | ||||
| 
 | ||||
|     fn resolve(&self) -> i64 { | ||||
|         let sign_factor: i64 = if let Some(sign) = self.sign { sign as i64 } else { 1 }; | ||||
|         self.value * sign_factor | ||||
|         //let sign_factor: i64 = if let Some(sign) = self.sign { sign as i64 } else { 1 };
 | ||||
|         //self.value * sign_factor
 | ||||
|         0 | ||||
|     } | ||||
| 
 | ||||
|     fn seen_base(&self) -> bool { self.base.is_some() } | ||||
| 
 | ||||
|     fn base_value(&self) -> u8 { self.base() as u8 } | ||||
| } | ||||
| 
 | ||||
| impl BeginState { | ||||
|     pub fn new() -> BeginState  { | ||||
|         BeginState (Builder::new()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl State for BeginState  { | ||||
|     fn lex(&mut self, c: char) -> StateResult { | ||||
|         match c { | ||||
|             c if c.is_hash() => StateResult::advance(Box::new(HashState(self.0))), | ||||
|             c if c.is_sign() => { | ||||
|                 self.0.push_sign(c.sign_value().unwrap()); | ||||
|                 StateResult::advance(Box::new(SignState(self.0))) | ||||
|             }, | ||||
|             c if c.is_digit(self.0.base_value() as u32) => { | ||||
|                 let value = c.digit_value().unwrap(); | ||||
|                 if self.0.base().contains(value) { | ||||
|                     self.0.push_digit(value); | ||||
|                     StateResult::advance(Box::new(DigitState(self.0))) | ||||
|                 } else { | ||||
|                     StateResult::fail(format!("invalid digit for current base: {}", c).as_str()) | ||||
|                 } | ||||
|             }, | ||||
|             _ => StateResult::fail(format!("invalid char: {}", c).as_str()) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn none(&mut self) -> Result<Option<Token>, String> { | ||||
|         // TODO: Implement.
 | ||||
|         Err("blah".to_string()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl State for HashState { | ||||
|     fn lex(&mut self, c: char) -> StateResult { | ||||
|         if let Some(base) = c.base_value() { | ||||
|             if !self.0.seen_base() { | ||||
|                 self.0.push_base(base); | ||||
|                 StateResult::advance(Box::new(BeginState (self.0))) | ||||
|             } else { | ||||
|                 StateResult::fail("got base again, despite already having one") | ||||
|             } | ||||
|         } else { | ||||
|             StateResult::fail(format!("invalid char: {}", c).as_str()) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn none(&mut self) -> Result<Option<Token>, String> { | ||||
|         // TODO: Implement.
 | ||||
|         Err("blah".to_string()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl SignState { | ||||
|     pub fn initials() -> HashSet<char> { | ||||
|         let mut inits = HashSet::new(); | ||||
|         inits.insert('+'); | ||||
|         inits.insert('-'); | ||||
|         inits | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl State for SignState { | ||||
|     fn lex(&mut self, c: char) -> StateResult { | ||||
|         if let Some(digit) = c.digit_value() { | ||||
|             if self.0.base().contains(digit) { | ||||
|                 self.0.push_digit(digit); | ||||
|                 StateResult::advance(Box::new(DigitState(self.0))) | ||||
|             } else { | ||||
|                 StateResult::fail(format!("invalid digit for current base: {}", c).as_str()) | ||||
|             } | ||||
|         } else { | ||||
|             StateResult::fail(format!("invalid char: {}", c).as_str()) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn none(&mut self) -> Result<Option<Token>, String> { | ||||
|         // TODO: Implement.
 | ||||
|         Err("blah".to_string()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl DigitState { | ||||
|     pub fn initials() -> HashSet<char> { | ||||
|         let foldp = |acc: HashSet<char>, x: u8| { | ||||
|             let c = char::from(x); | ||||
|             acc.insert(c); | ||||
|             acc | ||||
|         }; | ||||
|         '0'..='9'.chain(('a' as u8)..=('f' as u8)).fold(HashSet::new(), foldp) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl State for DigitState { | ||||
|     fn lex(&mut self, c: char) -> StateResult { | ||||
|         if let Some(digit) = c.digit_value() { | ||||
|             if self.0.base().contains(digit) { | ||||
|                 self.0.push_digit(digit); | ||||
|                 StateResult::Continue | ||||
|             } else { | ||||
|                 StateResult::fail(format!("invalid digit for current base: {}", c).as_str()) | ||||
|             } | ||||
|         } else if c.is_identifier_delimiter() { | ||||
|             StateResult::emit(Token::Num(self.0.resolve()), Resume::Here) | ||||
|         } else { | ||||
|             StateResult::fail(format!("invalid char: {}", c).as_str()) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn none(&mut self) -> Result<Option<Token>, String> { | ||||
|         // TODO: Implement.
 | ||||
|         Err("blah".to_string()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl NumberLexable for char { | ||||
|     fn base_value(&self) -> Option<Base> { | ||||
|         match *self { | ||||
|             'b' => Some(Base::Bin), | ||||
|             'o' => Some(Base::Oct), | ||||
|             'd' => Some(Base::Dec), | ||||
|             'x' => Some(Base::Hex), | ||||
|             _ => None | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn digit_value(&self) -> Option<u8> { | ||||
|         let ascii_value = *self as u32; | ||||
|         match *self { | ||||
|             '0'...'9' => Some((ascii_value - '0' as u32) as u8), | ||||
|             'a'...'f' => Some((ascii_value - 'a' as u32 + 10) as u8), | ||||
|             'A'...'F' => Some((ascii_value - 'A' as u32 + 10) as u8), | ||||
|             _ => None | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn sign_value(&self) -> Option<Sign> { | ||||
|         match *self { | ||||
|             '+' => Some(Sign::Pos), | ||||
|             '-' => Some(Sign::Neg), | ||||
|             _ => None | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn is_dot(&self) -> bool { *self == '.' } | ||||
|     fn is_hash(&self) -> bool { *self == '#' } | ||||
|     fn is_sign(&self) -> bool { self.sign_value().is_some() } | ||||
|     fn seen_exact(&self) -> bool { self.exact.is_some() } | ||||
|     fn seen_radix(&self) -> bool { self.radix.is_some() } | ||||
|     fn seen_sign(&self) -> bool { self.sign.is_some() } | ||||
| } | ||||
|  |  | |||
							
								
								
									
										68
									
								
								lexer/src/states/number/prefix.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								lexer/src/states/number/prefix.rs
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,68 @@ | |||
| /* lexer/src/states/number/prefix.rs
 | ||||
|  * Eryn Wells <eryn@erynwells.me> | ||||
|  */ | ||||
| 
 | ||||
| use super::{Radix, Exact}; | ||||
| use states::{State, StateResult}; | ||||
| use states::number::Builder; | ||||
| use token::Token; | ||||
| 
 | ||||
| #[derive(Debug)] pub struct Prefix(Builder); | ||||
| #[derive(Debug)] pub struct Hash(Builder); | ||||
| 
 | ||||
| impl Prefix { | ||||
|     pub fn new(b: Builder) -> Prefix { | ||||
|         Prefix(b) | ||||
|     } | ||||
| 
 | ||||
|     pub fn with_char(c: char) -> Option<Prefix> { | ||||
|         let mut builder = Builder::new(); | ||||
|         if let Some(ex) = Exact::from(c) { | ||||
|             builder.push_exact(ex); | ||||
|         } else if let Some(rx) = Radix::from(c) { | ||||
|             builder.push_radix(rx); | ||||
|         } else { | ||||
|             return None; | ||||
|         } | ||||
|         Some(Prefix::new(builder)) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl State for Prefix { | ||||
|     fn lex(&mut self, c: char) -> StateResult { | ||||
|         match c { | ||||
|             '#' => StateResult::advance(Box::new(Hash(self.0))), | ||||
|             _ => StateResult::fail(format!("invalid char: {}", c).as_str()) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn none(&mut self) -> Result<Option<Token>, String> { | ||||
|         Err("blah".to_string()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl State for Hash { | ||||
|     fn lex(&mut self, c: char) -> StateResult { | ||||
|         if let Some(ex) = Exact::from(c) { | ||||
|             if !self.0.seen_exact() { | ||||
|                 self.0.push_exact(ex); | ||||
|                 StateResult::advance(Box::new(Prefix::new(self.0))) | ||||
|             } else { | ||||
|                 StateResult::fail(format!("invalid char: {}", c).as_str()) | ||||
|             } | ||||
|         } else if let Some(rx) = Radix::from(c) { | ||||
|             if !self.0.seen_radix() { | ||||
|                 self.0.push_radix(rx); | ||||
|                 StateResult::advance(Box::new(Prefix::new(self.0))) | ||||
|             } else { | ||||
|                 StateResult::fail(format!("invalid char: {}", c).as_str()) | ||||
|             } | ||||
|         } else { | ||||
|             StateResult::fail(format!("invalid char: {}", c).as_str()) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn none(&mut self) -> Result<Option<Token>, String> { | ||||
|         Err("blah".to_string()) | ||||
|     } | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue