Lex basic strings (no escapes yet)

This commit is contained in:
Eryn Wells 2016-12-25 15:03:18 -07:00
parent ea4758e442
commit 81a84a796a
3 changed files with 48 additions and 21 deletions

View file

@ -7,14 +7,15 @@ use lexer::charset;
pub trait Lexable { pub trait Lexable {
fn is_left_paren(&self) -> bool; fn is_left_paren(&self) -> bool;
fn is_right_paren(&self) -> bool; fn is_right_paren(&self) -> bool;
fn is_hash(&self) -> bool;
fn is_string_quote(&self) -> bool;
fn is_newline(&self) -> bool;
fn is_eof(&self) -> bool;
fn is_identifier_initial(&self) -> bool; fn is_identifier_initial(&self) -> bool;
fn is_identifier_subsequent(&self) -> bool; fn is_identifier_subsequent(&self) -> bool;
fn is_identifier_single(&self) -> bool; fn is_identifier_single(&self) -> bool;
fn is_hash(&self) -> bool;
fn is_boolean_true(&self) -> bool; fn is_boolean_true(&self) -> bool;
fn is_boolean_false(&self) -> bool; fn is_boolean_false(&self) -> bool;
fn is_newline(&self) -> bool;
fn is_eof(&self) -> bool;
fn is_comment_initial(&self) -> bool; fn is_comment_initial(&self) -> bool;
} }
@ -27,21 +28,13 @@ impl Lexable for char {
*self == ')' *self == ')'
} }
fn is_identifier_initial(&self) -> bool {
charset::identifier_initials().contains(&self)
}
fn is_identifier_subsequent(&self) -> bool {
charset::identifier_subsequents().contains(&self)
}
fn is_identifier_single(&self) -> bool {
charset::identifier_singles().contains(&self)
}
fn is_hash(&self) -> bool { fn is_hash(&self) -> bool {
*self == '#' *self == '#'
} }
fn is_string_quote(&self) -> bool {
*self == '"'
}
fn is_boolean_true(&self) -> bool { fn is_boolean_true(&self) -> bool {
*self == 't' *self == 't'
@ -62,5 +55,16 @@ impl Lexable for char {
fn is_comment_initial(&self) -> bool { fn is_comment_initial(&self) -> bool {
*self == ';' *self == ';'
} }
}
fn is_identifier_initial(&self) -> bool {
charset::identifier_initials().contains(&self)
}
fn is_identifier_subsequent(&self) -> bool {
charset::identifier_subsequents().contains(&self)
}
fn is_identifier_single(&self) -> bool {
charset::identifier_singles().contains(&self)
}
}

View file

@ -19,6 +19,7 @@ enum State {
Identifier, Identifier,
Hash, Hash,
Comment, Comment,
String,
} }
pub struct Lexer { pub struct Lexer {
@ -85,6 +86,14 @@ impl Lexer {
else if c.is_right_paren() { else if c.is_right_paren() {
*token = Some(Token::RightParen(c.to_string())); *token = Some(Token::RightParen(c.to_string()));
} }
else if c.is_hash() {
self.state = State::Hash;
self.advance();
}
else if c.is_string_quote() {
self.state = State::String;
self.advance();
}
else if c.is_identifier_single() { else if c.is_identifier_single() {
*token = Some(Token::Identifier(c.to_string())); *token = Some(Token::Identifier(c.to_string()));
@ -94,11 +103,6 @@ impl Lexer {
self.advance(); self.advance();
} }
else if c.is_hash() {
self.state = State::Hash;
self.advance();
}
else if c.is_whitespace() { else if c.is_whitespace() {
if c.is_newline() { if c.is_newline() {
self.handle_newline(); self.handle_newline();
@ -131,6 +135,15 @@ impl Lexer {
} }
} }
fn state_string(&mut self, c: char, token: &mut Option<Token>) {
self.advance();
if c.is_string_quote() {
*token = Some(Token::String(self.value()));
}
else {
}
}
fn state_comment(&mut self, c: char, token: &mut Option<Token>) { fn state_comment(&mut self, c: char, token: &mut Option<Token>) {
if c.is_newline() { if c.is_newline() {
self.handle_newline(); self.handle_newline();
@ -163,6 +176,7 @@ impl Iterator for Lexer {
State::Initial => self.state_initial(c, &mut token), State::Initial => self.state_initial(c, &mut token),
State::Identifier => self.state_identifier(c, &mut token), State::Identifier => self.state_identifier(c, &mut token),
State::Hash => self.state_hash(c, &mut token), State::Hash => self.state_hash(c, &mut token),
State::String => self.state_string(c, &mut token),
State::Comment => self.state_comment(c, &mut token), State::Comment => self.state_comment(c, &mut token),
} }
} }
@ -206,6 +220,14 @@ fn lexer_finds_comments() {
assert_next_token(&mut lexer, &Token::Comment(s.to_string())); assert_next_token(&mut lexer, &Token::Comment(s.to_string()));
} }
#[test]
fn lexer_finds_strings() {
let mut lexer = Lexer::new("\"\"");
assert_next_token(&mut lexer, &Token::String("\"\"".to_string()));
let mut lexer = Lexer::new("\"abc\"");
assert_next_token(&mut lexer, &Token::String("\"abc\"".to_string()));
}
fn assert_next_token(lexer: &mut Lexer, expected: &Token) { fn assert_next_token(lexer: &mut Lexer, expected: &Token) {
let lex = lexer.next().unwrap(); let lex = lexer.next().unwrap();
assert_eq!(lex.token, *expected); assert_eq!(lex.token, *expected);

View file

@ -9,6 +9,7 @@ pub enum Token {
RightParen(String), RightParen(String),
Identifier(String), Identifier(String),
Boolean(bool), Boolean(bool),
String(String),
Comment(String), Comment(String),
} }