From 81a84a796a34fa3c46190fea2a05757f6452cc20 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 25 Dec 2016 15:03:18 -0700 Subject: [PATCH] Lex basic strings (no escapes yet) --- src/lexer/char.rs | 36 ++++++++++++++++++++---------------- src/lexer/mod.rs | 32 +++++++++++++++++++++++++++----- src/lexer/token.rs | 1 + 3 files changed, 48 insertions(+), 21 deletions(-) diff --git a/src/lexer/char.rs b/src/lexer/char.rs index 5b6e61a..084467a 100644 --- a/src/lexer/char.rs +++ b/src/lexer/char.rs @@ -7,14 +7,15 @@ use lexer::charset; pub trait Lexable { fn is_left_paren(&self) -> bool; fn is_right_paren(&self) -> bool; + fn is_hash(&self) -> bool; + fn is_string_quote(&self) -> bool; + fn is_newline(&self) -> bool; + fn is_eof(&self) -> bool; fn is_identifier_initial(&self) -> bool; fn is_identifier_subsequent(&self) -> bool; fn is_identifier_single(&self) -> bool; - fn is_hash(&self) -> bool; fn is_boolean_true(&self) -> bool; fn is_boolean_false(&self) -> bool; - fn is_newline(&self) -> bool; - fn is_eof(&self) -> bool; fn is_comment_initial(&self) -> bool; } @@ -27,21 +28,13 @@ impl Lexable for char { *self == ')' } - fn is_identifier_initial(&self) -> bool { - charset::identifier_initials().contains(&self) - } - - fn is_identifier_subsequent(&self) -> bool { - charset::identifier_subsequents().contains(&self) - } - - fn is_identifier_single(&self) -> bool { - charset::identifier_singles().contains(&self) - } - fn is_hash(&self) -> bool { *self == '#' } + + fn is_string_quote(&self) -> bool { + *self == '"' + } fn is_boolean_true(&self) -> bool { *self == 't' @@ -62,5 +55,16 @@ impl Lexable for char { fn is_comment_initial(&self) -> bool { *self == ';' } -} + fn is_identifier_initial(&self) -> bool { + charset::identifier_initials().contains(&self) + } + + fn is_identifier_subsequent(&self) -> bool { + charset::identifier_subsequents().contains(&self) + } + + fn is_identifier_single(&self) -> bool { + charset::identifier_singles().contains(&self) + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 24476a8..68fd0c4 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -19,6 +19,7 @@ enum State { Identifier, Hash, Comment, + String, } pub struct Lexer { @@ -85,6 +86,14 @@ impl Lexer { else if c.is_right_paren() { *token = Some(Token::RightParen(c.to_string())); } + else if c.is_hash() { + self.state = State::Hash; + self.advance(); + } + else if c.is_string_quote() { + self.state = State::String; + self.advance(); + } else if c.is_identifier_single() { *token = Some(Token::Identifier(c.to_string())); @@ -94,11 +103,6 @@ impl Lexer { self.advance(); } - else if c.is_hash() { - self.state = State::Hash; - self.advance(); - } - else if c.is_whitespace() { if c.is_newline() { self.handle_newline(); @@ -131,6 +135,15 @@ impl Lexer { } } + fn state_string(&mut self, c: char, token: &mut Option) { + self.advance(); + if c.is_string_quote() { + *token = Some(Token::String(self.value())); + } + else { + } + } + fn state_comment(&mut self, c: char, token: &mut Option) { if c.is_newline() { self.handle_newline(); @@ -163,6 +176,7 @@ impl Iterator for Lexer { State::Initial => self.state_initial(c, &mut token), State::Identifier => self.state_identifier(c, &mut token), State::Hash => self.state_hash(c, &mut token), + State::String => self.state_string(c, &mut token), State::Comment => self.state_comment(c, &mut token), } } @@ -206,6 +220,14 @@ fn lexer_finds_comments() { assert_next_token(&mut lexer, &Token::Comment(s.to_string())); } +#[test] +fn lexer_finds_strings() { + let mut lexer = Lexer::new("\"\""); + assert_next_token(&mut lexer, &Token::String("\"\"".to_string())); + let mut lexer = Lexer::new("\"abc\""); + assert_next_token(&mut lexer, &Token::String("\"abc\"".to_string())); +} + fn assert_next_token(lexer: &mut Lexer, expected: &Token) { let lex = lexer.next().unwrap(); assert_eq!(lex.token, *expected); diff --git a/src/lexer/token.rs b/src/lexer/token.rs index ede52bf..aaf1114 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -9,6 +9,7 @@ pub enum Token { RightParen(String), Identifier(String), Boolean(bool), + String(String), Comment(String), }