Lex all the special character escapes, without stupid state hacking
This commit is contained in:
parent
2f8f71fc28
commit
2a680d07c7
1 changed files with 98 additions and 117 deletions
215
src/lexer/mod.rs
215
src/lexer/mod.rs
|
@ -8,6 +8,51 @@ mod charset;
|
||||||
mod number;
|
mod number;
|
||||||
mod str;
|
mod str;
|
||||||
|
|
||||||
|
mod named_char {
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
const ALARM: &'static str = "alarm";
|
||||||
|
const BACKSPACE: &'static str = "backspace";
|
||||||
|
const DELETE: &'static str = "delete";
|
||||||
|
const ESCAPE: &'static str = "escape";
|
||||||
|
const NEWLINE: &'static str = "newline";
|
||||||
|
const NULL: &'static str = "null";
|
||||||
|
const RETURN: &'static str = "return";
|
||||||
|
const SPACE: &'static str = "space";
|
||||||
|
const TAB: &'static str = "tab";
|
||||||
|
|
||||||
|
pub fn set() -> HashSet<&'static str> {
|
||||||
|
let mut set: HashSet<&'static str> = HashSet::new();
|
||||||
|
set.insert(ALARM);
|
||||||
|
set.insert(BACKSPACE);
|
||||||
|
set.insert(DELETE);
|
||||||
|
set.insert(ESCAPE);
|
||||||
|
set.insert(NEWLINE);
|
||||||
|
set.insert(NULL);
|
||||||
|
set.insert(RETURN);
|
||||||
|
set.insert(SPACE);
|
||||||
|
set.insert(TAB);
|
||||||
|
set
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn char_named_by(named: &str) -> char {
|
||||||
|
match named {
|
||||||
|
ALARM => '\x07',
|
||||||
|
BACKSPACE => '\x08',
|
||||||
|
DELETE => '\x7F',
|
||||||
|
ESCAPE => '\x1B',
|
||||||
|
NEWLINE => '\n',
|
||||||
|
NULL => '\0',
|
||||||
|
RETURN => '\r',
|
||||||
|
SPACE => ' ',
|
||||||
|
TAB => '\t',
|
||||||
|
_ => panic!("char_named_by called with invalid named char string")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use self::char::Lexable;
|
use self::char::Lexable;
|
||||||
use self::number::Exactness;
|
use self::number::Exactness;
|
||||||
use self::number::NumberBuilder;
|
use self::number::NumberBuilder;
|
||||||
|
@ -26,9 +71,8 @@ trait HasResult {
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum State {
|
enum State {
|
||||||
Character,
|
Char,
|
||||||
CharacterNewline(NewlineState),
|
NamedChar(HashSet<&'static str>, String),
|
||||||
CharacterSpace(SpaceState),
|
|
||||||
Comment,
|
Comment,
|
||||||
Initial,
|
Initial,
|
||||||
Identifier,
|
Identifier,
|
||||||
|
@ -44,11 +88,6 @@ enum State {
|
||||||
StringEscape,
|
StringEscape,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Debug)]
|
|
||||||
enum NewlineState { N, Ne, New, Newl, Newli, Newlin, Newline }
|
|
||||||
#[derive(Clone, PartialEq, Debug)]
|
|
||||||
enum SpaceState { S, Sp, Spa, Spac, Space }
|
|
||||||
|
|
||||||
pub fn lex(input: &str) -> Lexer {
|
pub fn lex(input: &str) -> Lexer {
|
||||||
Lexer::new(&input)
|
Lexer::new(&input)
|
||||||
}
|
}
|
||||||
|
@ -210,67 +249,67 @@ impl Lexer {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle self.state == State::Character
|
/// Handle self.state == State::Char
|
||||||
fn state_character(&mut self, c: char) -> StateResult {
|
fn state_char(&mut self, c: char) -> StateResult {
|
||||||
self.advance();
|
self.advance();
|
||||||
match c {
|
let lower_c = c.to_lowercase().collect::<String>();
|
||||||
'n' => self.state = State::CharacterNewline(NewlineState::N),
|
let mut candidates: HashSet<&str> = HashSet::new();
|
||||||
's' => self.state = State::CharacterSpace(SpaceState::S),
|
for c in named_char::set().iter() {
|
||||||
_ => return self.token_result(Token::Character(c)),
|
if c.starts_with(&lower_c) {
|
||||||
|
candidates.insert(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if candidates.len() > 0 {
|
||||||
|
self.state = State::NamedChar(candidates, lower_c);
|
||||||
|
} else {
|
||||||
|
return self.token_result(Token::Character(c));
|
||||||
}
|
}
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle self.state == State::CharacterNewline
|
/// Handle self.state == State::NamedChar
|
||||||
fn state_character_newline(&mut self, c: char) -> StateResult {
|
fn state_named_char(&mut self, c: char) -> StateResult {
|
||||||
let substate = match self.state {
|
let (candidates, mut progress) = match self.state {
|
||||||
State::CharacterNewline(ref substate) => Some(substate.clone()),
|
State::NamedChar(ref candidates, ref progress) => (candidates.clone(), progress.clone()),
|
||||||
_ => None,
|
_ => panic!("Called state_named_char without being in NamedChar state")
|
||||||
}.unwrap();
|
};
|
||||||
|
|
||||||
// Assume we'll advance...
|
if c.is_identifier_delimiter() || c.is_eof() {
|
||||||
self.advance();
|
if progress.len() == 1 {
|
||||||
if substate == NewlineState::N && (c.is_identifier_delimiter() || c.is_eof()) {
|
self.retract();
|
||||||
return self.token_result(Token::Character('n'));
|
return self.token_result(Token::Character(progress.chars().next().unwrap()));
|
||||||
}
|
}
|
||||||
if let Some(next) = substate.next(c) {
|
else {
|
||||||
match next {
|
return self.generic_error(c);
|
||||||
NewlineState::Newline => return self.token_result(Token::Character('\n')),
|
|
||||||
_ => self.state = State::CharacterNewline(next),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
// ... but retract if we failed.
|
|
||||||
self.retract();
|
|
||||||
return Err(self.error_string(format!("Invalid character while building #\\newline: '{}'", c)));
|
|
||||||
}
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Handle self.state == State::CharacterNewline
|
progress.push(c);
|
||||||
fn state_character_space(&mut self, c: char) -> StateResult {
|
|
||||||
let substate = match self.state {
|
|
||||||
State::CharacterSpace(ref substate) => Some(substate.clone()),
|
|
||||||
_ => None,
|
|
||||||
}.unwrap();
|
|
||||||
|
|
||||||
// Assume we'll advance...
|
let candidates: HashSet<&str> = {
|
||||||
self.advance();
|
let filtered = candidates.iter().filter(|c| c.starts_with(&progress)).map(|c| *c);
|
||||||
if substate == SpaceState::S && (c.is_identifier_delimiter() || c.is_eof()) {
|
filtered.collect()
|
||||||
return self.token_result(Token::Character('s'));
|
};
|
||||||
}
|
|
||||||
if let Some(next) = substate.next(c) {
|
if candidates.len() == 1 {
|
||||||
match next {
|
let candidate = *candidates.iter().next().unwrap();
|
||||||
SpaceState::Space => return self.token_result(Token::Character(' ')),
|
if candidate == &progress {
|
||||||
_ => self.state = State::CharacterSpace(next),
|
self.token_result(Token::Character(named_char::char_named_by(&progress)))
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
self.state = State::NamedChar(candidates, progress);
|
||||||
|
self.advance();
|
||||||
|
Ok(None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else if candidates.len() > 1 {
|
||||||
// ... but retract if we failed.
|
self.state = State::NamedChar(candidates, progress);
|
||||||
self.retract();
|
self.advance();
|
||||||
return Err(self.error_string(format!("Invalid character while building #\\space: '{}'", c)));
|
Ok(None)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
self.generic_error(c)
|
||||||
}
|
}
|
||||||
Ok(None)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle self.state == State::Dot
|
/// Handle self.state == State::Dot
|
||||||
|
@ -302,7 +341,7 @@ impl Lexer {
|
||||||
return self.token_result(Token::LeftVectorParen);
|
return self.token_result(Token::LeftVectorParen);
|
||||||
}
|
}
|
||||||
else if c.is_character_leader() {
|
else if c.is_character_leader() {
|
||||||
self.state = State::Character;
|
self.state = State::Char;
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
else if let Some(radix) = Radix::from_char(c) {
|
else if let Some(radix) = Radix::from_char(c) {
|
||||||
|
@ -495,9 +534,8 @@ impl Iterator for Lexer {
|
||||||
println!("{:?}! c='{}'", self.state, c);
|
println!("{:?}! c='{}'", self.state, c);
|
||||||
let previous_forward = self.forward;
|
let previous_forward = self.forward;
|
||||||
let result = match self.state {
|
let result = match self.state {
|
||||||
State::Character => self.state_character(c),
|
State::Char=> self.state_char(c),
|
||||||
State::CharacterNewline(_) => self.state_character_newline(c),
|
State::NamedChar(_, _) => self.state_named_char(c),
|
||||||
State::CharacterSpace(_) => self.state_character_space(c),
|
|
||||||
State::Comment => self.state_comment(c),
|
State::Comment => self.state_comment(c),
|
||||||
State::Dot => self.state_dot(c),
|
State::Dot => self.state_dot(c),
|
||||||
State::Hash => self.state_hash(c),
|
State::Hash => self.state_hash(c),
|
||||||
|
@ -541,63 +579,6 @@ impl HasResult for StateResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NewlineState {
|
|
||||||
fn next(&self, c: char) -> Option<NewlineState> {
|
|
||||||
match *self {
|
|
||||||
NewlineState::N => match c {
|
|
||||||
'e' => Some(NewlineState::Ne),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
NewlineState::Ne => match c {
|
|
||||||
'w' => Some(NewlineState::New),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
NewlineState::New => match c {
|
|
||||||
'l' => Some(NewlineState::Newl),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
NewlineState::Newl => match c {
|
|
||||||
'i' => Some(NewlineState::Newli),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
NewlineState::Newli => match c {
|
|
||||||
'n' => Some(NewlineState::Newlin),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
NewlineState::Newlin => match c {
|
|
||||||
'e' => Some(NewlineState::Newline),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SpaceState {
|
|
||||||
fn next(&self, c: char) -> Option<SpaceState> {
|
|
||||||
match *self {
|
|
||||||
SpaceState::S => match c {
|
|
||||||
'p' => Some(SpaceState::Sp),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
SpaceState::Sp => match c {
|
|
||||||
'a' => Some(SpaceState::Spa),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
SpaceState::Spa => match c {
|
|
||||||
'c' => Some(SpaceState::Spac),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
SpaceState::Spac => match c {
|
|
||||||
'e' => Some(SpaceState::Space),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// UNIT TESTING
|
// UNIT TESTING
|
||||||
//
|
//
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue