Lex basic strings (no escapes yet)
This commit is contained in:
parent
ea4758e442
commit
81a84a796a
3 changed files with 48 additions and 21 deletions
|
@ -7,14 +7,15 @@ use lexer::charset;
|
|||
pub trait Lexable {
|
||||
fn is_left_paren(&self) -> bool;
|
||||
fn is_right_paren(&self) -> bool;
|
||||
fn is_hash(&self) -> bool;
|
||||
fn is_string_quote(&self) -> bool;
|
||||
fn is_newline(&self) -> bool;
|
||||
fn is_eof(&self) -> bool;
|
||||
fn is_identifier_initial(&self) -> bool;
|
||||
fn is_identifier_subsequent(&self) -> bool;
|
||||
fn is_identifier_single(&self) -> bool;
|
||||
fn is_hash(&self) -> bool;
|
||||
fn is_boolean_true(&self) -> bool;
|
||||
fn is_boolean_false(&self) -> bool;
|
||||
fn is_newline(&self) -> bool;
|
||||
fn is_eof(&self) -> bool;
|
||||
fn is_comment_initial(&self) -> bool;
|
||||
}
|
||||
|
||||
|
@ -27,21 +28,13 @@ impl Lexable for char {
|
|||
*self == ')'
|
||||
}
|
||||
|
||||
fn is_identifier_initial(&self) -> bool {
|
||||
charset::identifier_initials().contains(&self)
|
||||
}
|
||||
|
||||
fn is_identifier_subsequent(&self) -> bool {
|
||||
charset::identifier_subsequents().contains(&self)
|
||||
}
|
||||
|
||||
fn is_identifier_single(&self) -> bool {
|
||||
charset::identifier_singles().contains(&self)
|
||||
}
|
||||
|
||||
fn is_hash(&self) -> bool {
|
||||
*self == '#'
|
||||
}
|
||||
|
||||
fn is_string_quote(&self) -> bool {
|
||||
*self == '"'
|
||||
}
|
||||
|
||||
fn is_boolean_true(&self) -> bool {
|
||||
*self == 't'
|
||||
|
@ -62,5 +55,16 @@ impl Lexable for char {
|
|||
fn is_comment_initial(&self) -> bool {
|
||||
*self == ';'
|
||||
}
|
||||
}
|
||||
|
||||
fn is_identifier_initial(&self) -> bool {
|
||||
charset::identifier_initials().contains(&self)
|
||||
}
|
||||
|
||||
fn is_identifier_subsequent(&self) -> bool {
|
||||
charset::identifier_subsequents().contains(&self)
|
||||
}
|
||||
|
||||
fn is_identifier_single(&self) -> bool {
|
||||
charset::identifier_singles().contains(&self)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ enum State {
|
|||
Identifier,
|
||||
Hash,
|
||||
Comment,
|
||||
String,
|
||||
}
|
||||
|
||||
pub struct Lexer {
|
||||
|
@ -85,6 +86,14 @@ impl Lexer {
|
|||
else if c.is_right_paren() {
|
||||
*token = Some(Token::RightParen(c.to_string()));
|
||||
}
|
||||
else if c.is_hash() {
|
||||
self.state = State::Hash;
|
||||
self.advance();
|
||||
}
|
||||
else if c.is_string_quote() {
|
||||
self.state = State::String;
|
||||
self.advance();
|
||||
}
|
||||
|
||||
else if c.is_identifier_single() {
|
||||
*token = Some(Token::Identifier(c.to_string()));
|
||||
|
@ -94,11 +103,6 @@ impl Lexer {
|
|||
self.advance();
|
||||
}
|
||||
|
||||
else if c.is_hash() {
|
||||
self.state = State::Hash;
|
||||
self.advance();
|
||||
}
|
||||
|
||||
else if c.is_whitespace() {
|
||||
if c.is_newline() {
|
||||
self.handle_newline();
|
||||
|
@ -131,6 +135,15 @@ impl Lexer {
|
|||
}
|
||||
}
|
||||
|
||||
fn state_string(&mut self, c: char, token: &mut Option<Token>) {
|
||||
self.advance();
|
||||
if c.is_string_quote() {
|
||||
*token = Some(Token::String(self.value()));
|
||||
}
|
||||
else {
|
||||
}
|
||||
}
|
||||
|
||||
fn state_comment(&mut self, c: char, token: &mut Option<Token>) {
|
||||
if c.is_newline() {
|
||||
self.handle_newline();
|
||||
|
@ -163,6 +176,7 @@ impl Iterator for Lexer {
|
|||
State::Initial => self.state_initial(c, &mut token),
|
||||
State::Identifier => self.state_identifier(c, &mut token),
|
||||
State::Hash => self.state_hash(c, &mut token),
|
||||
State::String => self.state_string(c, &mut token),
|
||||
State::Comment => self.state_comment(c, &mut token),
|
||||
}
|
||||
}
|
||||
|
@ -206,6 +220,14 @@ fn lexer_finds_comments() {
|
|||
assert_next_token(&mut lexer, &Token::Comment(s.to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lexer_finds_strings() {
|
||||
let mut lexer = Lexer::new("\"\"");
|
||||
assert_next_token(&mut lexer, &Token::String("\"\"".to_string()));
|
||||
let mut lexer = Lexer::new("\"abc\"");
|
||||
assert_next_token(&mut lexer, &Token::String("\"abc\"".to_string()));
|
||||
}
|
||||
|
||||
fn assert_next_token(lexer: &mut Lexer, expected: &Token) {
|
||||
let lex = lexer.next().unwrap();
|
||||
assert_eq!(lex.token, *expected);
|
||||
|
|
|
@ -9,6 +9,7 @@ pub enum Token {
|
|||
RightParen(String),
|
||||
Identifier(String),
|
||||
Boolean(bool),
|
||||
String(String),
|
||||
Comment(String),
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue