Lex all the special character escapes, without stupid state hacking

This commit is contained in:
Eryn Wells 2017-01-07 16:20:26 -08:00
parent 2f8f71fc28
commit 2a680d07c7

View file

@ -8,6 +8,51 @@ mod charset;
mod number; mod number;
mod str; mod str;
mod named_char {
use std::collections::HashSet;
const ALARM: &'static str = "alarm";
const BACKSPACE: &'static str = "backspace";
const DELETE: &'static str = "delete";
const ESCAPE: &'static str = "escape";
const NEWLINE: &'static str = "newline";
const NULL: &'static str = "null";
const RETURN: &'static str = "return";
const SPACE: &'static str = "space";
const TAB: &'static str = "tab";
pub fn set() -> HashSet<&'static str> {
let mut set: HashSet<&'static str> = HashSet::new();
set.insert(ALARM);
set.insert(BACKSPACE);
set.insert(DELETE);
set.insert(ESCAPE);
set.insert(NEWLINE);
set.insert(NULL);
set.insert(RETURN);
set.insert(SPACE);
set.insert(TAB);
set
}
pub fn char_named_by(named: &str) -> char {
match named {
ALARM => '\x07',
BACKSPACE => '\x08',
DELETE => '\x7F',
ESCAPE => '\x1B',
NEWLINE => '\n',
NULL => '\0',
RETURN => '\r',
SPACE => ' ',
TAB => '\t',
_ => panic!("char_named_by called with invalid named char string")
}
}
}
use std::collections::HashSet;
use self::char::Lexable; use self::char::Lexable;
use self::number::Exactness; use self::number::Exactness;
use self::number::NumberBuilder; use self::number::NumberBuilder;
@ -26,9 +71,8 @@ trait HasResult {
#[derive(Debug)] #[derive(Debug)]
enum State { enum State {
Character, Char,
CharacterNewline(NewlineState), NamedChar(HashSet<&'static str>, String),
CharacterSpace(SpaceState),
Comment, Comment,
Initial, Initial,
Identifier, Identifier,
@ -44,11 +88,6 @@ enum State {
StringEscape, StringEscape,
} }
#[derive(Clone, PartialEq, Debug)]
enum NewlineState { N, Ne, New, Newl, Newli, Newlin, Newline }
#[derive(Clone, PartialEq, Debug)]
enum SpaceState { S, Sp, Spa, Spac, Space }
pub fn lex(input: &str) -> Lexer { pub fn lex(input: &str) -> Lexer {
Lexer::new(&input) Lexer::new(&input)
} }
@ -210,67 +249,67 @@ impl Lexer {
Ok(None) Ok(None)
} }
/// Handle self.state == State::Character /// Handle self.state == State::Char
fn state_character(&mut self, c: char) -> StateResult { fn state_char(&mut self, c: char) -> StateResult {
self.advance(); self.advance();
match c { let lower_c = c.to_lowercase().collect::<String>();
'n' => self.state = State::CharacterNewline(NewlineState::N), let mut candidates: HashSet<&str> = HashSet::new();
's' => self.state = State::CharacterSpace(SpaceState::S), for c in named_char::set().iter() {
_ => return self.token_result(Token::Character(c)), if c.starts_with(&lower_c) {
candidates.insert(c);
}
}
if candidates.len() > 0 {
self.state = State::NamedChar(candidates, lower_c);
} else {
return self.token_result(Token::Character(c));
} }
Ok(None) Ok(None)
} }
/// Handle self.state == State::CharacterNewline /// Handle self.state == State::NamedChar
fn state_character_newline(&mut self, c: char) -> StateResult { fn state_named_char(&mut self, c: char) -> StateResult {
let substate = match self.state { let (candidates, mut progress) = match self.state {
State::CharacterNewline(ref substate) => Some(substate.clone()), State::NamedChar(ref candidates, ref progress) => (candidates.clone(), progress.clone()),
_ => None, _ => panic!("Called state_named_char without being in NamedChar state")
}.unwrap(); };
// Assume we'll advance... if c.is_identifier_delimiter() || c.is_eof() {
self.advance(); if progress.len() == 1 {
if substate == NewlineState::N && (c.is_identifier_delimiter() || c.is_eof()) { self.retract();
return self.token_result(Token::Character('n')); return self.token_result(Token::Character(progress.chars().next().unwrap()));
} }
if let Some(next) = substate.next(c) { else {
match next { return self.generic_error(c);
NewlineState::Newline => return self.token_result(Token::Character('\n')),
_ => self.state = State::CharacterNewline(next),
} }
} }
else {
// ... but retract if we failed.
self.retract();
return Err(self.error_string(format!("Invalid character while building #\\newline: '{}'", c)));
}
Ok(None)
}
/// Handle self.state == State::CharacterNewline progress.push(c);
fn state_character_space(&mut self, c: char) -> StateResult {
let substate = match self.state {
State::CharacterSpace(ref substate) => Some(substate.clone()),
_ => None,
}.unwrap();
// Assume we'll advance... let candidates: HashSet<&str> = {
self.advance(); let filtered = candidates.iter().filter(|c| c.starts_with(&progress)).map(|c| *c);
if substate == SpaceState::S && (c.is_identifier_delimiter() || c.is_eof()) { filtered.collect()
return self.token_result(Token::Character('s')); };
}
if let Some(next) = substate.next(c) { if candidates.len() == 1 {
match next { let candidate = *candidates.iter().next().unwrap();
SpaceState::Space => return self.token_result(Token::Character(' ')), if candidate == &progress {
_ => self.state = State::CharacterSpace(next), self.token_result(Token::Character(named_char::char_named_by(&progress)))
}
else {
self.state = State::NamedChar(candidates, progress);
self.advance();
Ok(None)
} }
} }
else { else if candidates.len() > 1 {
// ... but retract if we failed. self.state = State::NamedChar(candidates, progress);
self.retract(); self.advance();
return Err(self.error_string(format!("Invalid character while building #\\space: '{}'", c))); Ok(None)
}
else {
self.generic_error(c)
} }
Ok(None)
} }
/// Handle self.state == State::Dot /// Handle self.state == State::Dot
@ -302,7 +341,7 @@ impl Lexer {
return self.token_result(Token::LeftVectorParen); return self.token_result(Token::LeftVectorParen);
} }
else if c.is_character_leader() { else if c.is_character_leader() {
self.state = State::Character; self.state = State::Char;
self.advance(); self.advance();
} }
else if let Some(radix) = Radix::from_char(c) { else if let Some(radix) = Radix::from_char(c) {
@ -495,9 +534,8 @@ impl Iterator for Lexer {
println!("{:?}! c='{}'", self.state, c); println!("{:?}! c='{}'", self.state, c);
let previous_forward = self.forward; let previous_forward = self.forward;
let result = match self.state { let result = match self.state {
State::Character => self.state_character(c), State::Char=> self.state_char(c),
State::CharacterNewline(_) => self.state_character_newline(c), State::NamedChar(_, _) => self.state_named_char(c),
State::CharacterSpace(_) => self.state_character_space(c),
State::Comment => self.state_comment(c), State::Comment => self.state_comment(c),
State::Dot => self.state_dot(c), State::Dot => self.state_dot(c),
State::Hash => self.state_hash(c), State::Hash => self.state_hash(c),
@ -541,63 +579,6 @@ impl HasResult for StateResult {
} }
} }
impl NewlineState {
fn next(&self, c: char) -> Option<NewlineState> {
match *self {
NewlineState::N => match c {
'e' => Some(NewlineState::Ne),
_ => None,
},
NewlineState::Ne => match c {
'w' => Some(NewlineState::New),
_ => None,
},
NewlineState::New => match c {
'l' => Some(NewlineState::Newl),
_ => None,
},
NewlineState::Newl => match c {
'i' => Some(NewlineState::Newli),
_ => None,
},
NewlineState::Newli => match c {
'n' => Some(NewlineState::Newlin),
_ => None,
},
NewlineState::Newlin => match c {
'e' => Some(NewlineState::Newline),
_ => None,
},
_ => None,
}
}
}
impl SpaceState {
fn next(&self, c: char) -> Option<SpaceState> {
match *self {
SpaceState::S => match c {
'p' => Some(SpaceState::Sp),
_ => None,
},
SpaceState::Sp => match c {
'a' => Some(SpaceState::Spa),
_ => None,
},
SpaceState::Spa => match c {
'c' => Some(SpaceState::Spac),
_ => None,
},
SpaceState::Spac => match c {
'e' => Some(SpaceState::Space),
_ => None,
},
_ => None,
}
}
}
// //
// UNIT TESTING // UNIT TESTING
// //