From d9943163929c0453cf6ab832aa3f6fc997a3a717 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sat, 13 May 2017 17:21:23 -0700 Subject: [PATCH] Character checks for identifier initial and subsequent --- lexer/src/chars.rs | 44 ++++++++++++++++++++++++++++++++++++++++++-- lexer/src/lib.rs | 6 +++--- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/lexer/src/chars.rs b/lexer/src/chars.rs index 0314e68..60192c5 100644 --- a/lexer/src/chars.rs +++ b/lexer/src/chars.rs @@ -5,9 +5,49 @@ pub trait Lexable { fn is_left_paren(&self) -> bool; fn is_right_paren(&self) -> bool; + fn is_identifier_initial(&self) -> bool; + fn is_identifier_subsequent(&self) -> bool; + fn is_identifier_delimiter(&self) -> bool; } impl Lexable for char { - fn is_left_paren(&self) -> bool { *self == '(' } - fn is_right_paren(&self) -> bool { *self == ')' } + fn is_left_paren(&self) -> bool { + *self == '(' + } + + fn is_right_paren(&self) -> bool { + *self == ')' + } + + fn is_identifier_initial(&self) -> bool { + self.is_alphabetic() || self.is_special_initial() + } + + fn is_identifier_subsequent(&self) -> bool { + self.is_identifier_initial() || self.is_numeric() || self.is_special_subsequent() + } + + fn is_identifier_delimiter(&self) -> bool { + self.is_whitespace() || self.is_left_paren() || self.is_right_paren() + } +} + +trait LexableSpecial { + fn is_special_initial(&self) -> bool; + fn is_special_subsequent(&self) -> bool; + fn is_explicit_sign(&self) -> bool; +} + +impl LexableSpecial for char { + fn is_special_initial(&self) -> bool { + "!$%&*/:<=>?~_^".contains(*self) + } + + fn is_special_subsequent(&self) -> bool { + self.is_explicit_sign() || ".@".contains(*self) + } + + fn is_explicit_sign(&self) -> bool { + *self == '+' || *self == '-' + } } diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 7b7c531..489e029 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -55,7 +55,7 @@ impl Iterator for Lexer where T: Iterator { Some(c) if c.is_left_paren() => self.emit(Token::LeftParen, Resume::AtNext), Some(c) if c.is_right_paren() => self.emit(Token::RightParen, Resume::AtNext), Some(c) if c.is_whitespace() => IterationResult::Continue, - Some(c) if c.is_alphabetic() => { + Some(c) if c.is_identifier_initial() => { buffer.push(c); IterationResult::Continue }, @@ -66,11 +66,11 @@ impl Iterator for Lexer where T: Iterator { } else { match peek { - Some(c) if c.is_alphabetic() => { + Some(c) if c.is_identifier_subsequent() => { buffer.push(c); IterationResult::Continue } - Some(c) if c.is_left_paren() || c.is_right_paren() || c.is_whitespace() => + Some(c) if c.is_identifier_delimiter() => self.emit(Token::Id(buffer.clone()), Resume::Here), Some(c) => self.fail(format!("Invalid character: {}", c)), // Found EOF. Emit what we have and finish.