Lex basic strings (no escapes yet)
This commit is contained in:
parent
ea4758e442
commit
81a84a796a
3 changed files with 48 additions and 21 deletions
|
@ -7,14 +7,15 @@ use lexer::charset;
|
||||||
pub trait Lexable {
|
pub trait Lexable {
|
||||||
fn is_left_paren(&self) -> bool;
|
fn is_left_paren(&self) -> bool;
|
||||||
fn is_right_paren(&self) -> bool;
|
fn is_right_paren(&self) -> bool;
|
||||||
|
fn is_hash(&self) -> bool;
|
||||||
|
fn is_string_quote(&self) -> bool;
|
||||||
|
fn is_newline(&self) -> bool;
|
||||||
|
fn is_eof(&self) -> bool;
|
||||||
fn is_identifier_initial(&self) -> bool;
|
fn is_identifier_initial(&self) -> bool;
|
||||||
fn is_identifier_subsequent(&self) -> bool;
|
fn is_identifier_subsequent(&self) -> bool;
|
||||||
fn is_identifier_single(&self) -> bool;
|
fn is_identifier_single(&self) -> bool;
|
||||||
fn is_hash(&self) -> bool;
|
|
||||||
fn is_boolean_true(&self) -> bool;
|
fn is_boolean_true(&self) -> bool;
|
||||||
fn is_boolean_false(&self) -> bool;
|
fn is_boolean_false(&self) -> bool;
|
||||||
fn is_newline(&self) -> bool;
|
|
||||||
fn is_eof(&self) -> bool;
|
|
||||||
fn is_comment_initial(&self) -> bool;
|
fn is_comment_initial(&self) -> bool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,21 +28,13 @@ impl Lexable for char {
|
||||||
*self == ')'
|
*self == ')'
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_identifier_initial(&self) -> bool {
|
|
||||||
charset::identifier_initials().contains(&self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_identifier_subsequent(&self) -> bool {
|
|
||||||
charset::identifier_subsequents().contains(&self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_identifier_single(&self) -> bool {
|
|
||||||
charset::identifier_singles().contains(&self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_hash(&self) -> bool {
|
fn is_hash(&self) -> bool {
|
||||||
*self == '#'
|
*self == '#'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_string_quote(&self) -> bool {
|
||||||
|
*self == '"'
|
||||||
|
}
|
||||||
|
|
||||||
fn is_boolean_true(&self) -> bool {
|
fn is_boolean_true(&self) -> bool {
|
||||||
*self == 't'
|
*self == 't'
|
||||||
|
@ -62,5 +55,16 @@ impl Lexable for char {
|
||||||
fn is_comment_initial(&self) -> bool {
|
fn is_comment_initial(&self) -> bool {
|
||||||
*self == ';'
|
*self == ';'
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
fn is_identifier_initial(&self) -> bool {
|
||||||
|
charset::identifier_initials().contains(&self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_identifier_subsequent(&self) -> bool {
|
||||||
|
charset::identifier_subsequents().contains(&self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_identifier_single(&self) -> bool {
|
||||||
|
charset::identifier_singles().contains(&self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ enum State {
|
||||||
Identifier,
|
Identifier,
|
||||||
Hash,
|
Hash,
|
||||||
Comment,
|
Comment,
|
||||||
|
String,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Lexer {
|
pub struct Lexer {
|
||||||
|
@ -85,6 +86,14 @@ impl Lexer {
|
||||||
else if c.is_right_paren() {
|
else if c.is_right_paren() {
|
||||||
*token = Some(Token::RightParen(c.to_string()));
|
*token = Some(Token::RightParen(c.to_string()));
|
||||||
}
|
}
|
||||||
|
else if c.is_hash() {
|
||||||
|
self.state = State::Hash;
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
else if c.is_string_quote() {
|
||||||
|
self.state = State::String;
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
|
||||||
else if c.is_identifier_single() {
|
else if c.is_identifier_single() {
|
||||||
*token = Some(Token::Identifier(c.to_string()));
|
*token = Some(Token::Identifier(c.to_string()));
|
||||||
|
@ -94,11 +103,6 @@ impl Lexer {
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
else if c.is_hash() {
|
|
||||||
self.state = State::Hash;
|
|
||||||
self.advance();
|
|
||||||
}
|
|
||||||
|
|
||||||
else if c.is_whitespace() {
|
else if c.is_whitespace() {
|
||||||
if c.is_newline() {
|
if c.is_newline() {
|
||||||
self.handle_newline();
|
self.handle_newline();
|
||||||
|
@ -131,6 +135,15 @@ impl Lexer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn state_string(&mut self, c: char, token: &mut Option<Token>) {
|
||||||
|
self.advance();
|
||||||
|
if c.is_string_quote() {
|
||||||
|
*token = Some(Token::String(self.value()));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn state_comment(&mut self, c: char, token: &mut Option<Token>) {
|
fn state_comment(&mut self, c: char, token: &mut Option<Token>) {
|
||||||
if c.is_newline() {
|
if c.is_newline() {
|
||||||
self.handle_newline();
|
self.handle_newline();
|
||||||
|
@ -163,6 +176,7 @@ impl Iterator for Lexer {
|
||||||
State::Initial => self.state_initial(c, &mut token),
|
State::Initial => self.state_initial(c, &mut token),
|
||||||
State::Identifier => self.state_identifier(c, &mut token),
|
State::Identifier => self.state_identifier(c, &mut token),
|
||||||
State::Hash => self.state_hash(c, &mut token),
|
State::Hash => self.state_hash(c, &mut token),
|
||||||
|
State::String => self.state_string(c, &mut token),
|
||||||
State::Comment => self.state_comment(c, &mut token),
|
State::Comment => self.state_comment(c, &mut token),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -206,6 +220,14 @@ fn lexer_finds_comments() {
|
||||||
assert_next_token(&mut lexer, &Token::Comment(s.to_string()));
|
assert_next_token(&mut lexer, &Token::Comment(s.to_string()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lexer_finds_strings() {
|
||||||
|
let mut lexer = Lexer::new("\"\"");
|
||||||
|
assert_next_token(&mut lexer, &Token::String("\"\"".to_string()));
|
||||||
|
let mut lexer = Lexer::new("\"abc\"");
|
||||||
|
assert_next_token(&mut lexer, &Token::String("\"abc\"".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
fn assert_next_token(lexer: &mut Lexer, expected: &Token) {
|
fn assert_next_token(lexer: &mut Lexer, expected: &Token) {
|
||||||
let lex = lexer.next().unwrap();
|
let lex = lexer.next().unwrap();
|
||||||
assert_eq!(lex.token, *expected);
|
assert_eq!(lex.token, *expected);
|
||||||
|
|
|
@ -9,6 +9,7 @@ pub enum Token {
|
||||||
RightParen(String),
|
RightParen(String),
|
||||||
Identifier(String),
|
Identifier(String),
|
||||||
Boolean(bool),
|
Boolean(bool),
|
||||||
|
String(String),
|
||||||
Comment(String),
|
Comment(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue