Move the lexer to its own sibillexer module

Lots of failing tests right now, unfortunately. :(
This commit is contained in:
Eryn Wells 2017-04-15 09:37:12 -07:00
parent cea63e8e8e
commit fc947280ae
9 changed files with 264 additions and 244 deletions

7
lexer/Cargo.toml Normal file
View file

@ -0,0 +1,7 @@
[package]
name = "sibillexer"
version = "0.1.0"
authors = ["Eryn Wells <eryn@erynwells.me>"]
[dependencies]
sibiltypes = { path = "../types" }

View file

@ -2,7 +2,7 @@
* Eryn Wells <eryn@erynwells.me>
*/
use lexer::charset;
use charset;
pub trait Lexable {
fn is_character_leader(&self) -> bool;

View file

@ -2,69 +2,19 @@
* Eryn Wells <eryn@erynwells.me>
*/
pub mod token;
pub use self::token::Lex;
pub use self::token::Token;
mod char;
mod charset;
mod number;
mod str;
mod named_char {
use std::collections::HashSet;
use types::Char;
const ALARM: &'static str = "alarm";
const BACKSPACE: &'static str = "backspace";
const DELETE: &'static str = "delete";
const ESCAPE: &'static str = "escape";
const NEWLINE: &'static str = "newline";
const NULL: &'static str = "null";
const RETURN: &'static str = "return";
const SPACE: &'static str = "space";
const TAB: &'static str = "tab";
pub fn set() -> HashSet<&'static str> {
let mut set: HashSet<&'static str> = HashSet::new();
set.insert(ALARM);
set.insert(BACKSPACE);
set.insert(DELETE);
set.insert(ESCAPE);
set.insert(NEWLINE);
set.insert(NULL);
set.insert(RETURN);
set.insert(SPACE);
set.insert(TAB);
set
}
pub fn char_named_by(named: &str) -> Char {
Char::new(match named {
ALARM => '\x07',
BACKSPACE => '\x08',
DELETE => '\x7F',
ESCAPE => '\x1B',
NEWLINE => '\n',
NULL => '\0',
RETURN => '\r',
SPACE => ' ',
TAB => '\t',
_ => panic!("char_named_by called with invalid named char string")
})
}
}
use std::collections::HashSet;
use sibiltypes::{Bool, Char};
use types::{Bool, Char};
use self::char::Lexable;
use self::number::Exactness;
use self::number::NumberBuilder;
use self::number::Radix;
use self::number::Sign;
use self::str::CharAt;
use self::str::RelativeIndexable;
use char::Lexable;
use named_char;
use number::Exactness;
use number::NumberBuilder;
use number::Radix;
use number::Sign;
use str::CharAt;
use str::RelativeIndexable;
use token::Lex;
use token::Token;
type StateResult = Result<Option<Token>, String>;
@ -265,7 +215,7 @@ impl Lexer {
if candidates.len() > 0 {
self.state = State::NamedChar(candidates, lower_c);
} else {
return self.token_result(Token::Character(Char::new(c)));
return self.token_result(Token::Character(Char(c)));
}
Ok(None)
}
@ -280,7 +230,7 @@ impl Lexer {
if c.is_identifier_delimiter() || c.is_eof() {
if progress.len() == 1 {
self.retract();
return self.token_result(Token::Character(Char::new(progress.chars().next().unwrap())));
return self.token_result(Token::Character(Char(progress.chars().next().unwrap())));
}
else {
return self.generic_error(c);
@ -337,7 +287,7 @@ impl Lexer {
fn state_hash(&mut self, c: char) -> StateResult {
if c.is_boolean_true() || c.is_boolean_false() {
self.advance();
return self.token_result(Token::Boolean(Bool::new(c.is_boolean_true())));
return self.token_result(Token::Boolean(Bool(c.is_boolean_true())));
}
else if c.is_left_paren() {
self.advance();
@ -580,166 +530,3 @@ impl HasResult for StateResult {
}
}
}
//
// UNIT TESTING
//
#[cfg(test)]
mod tests {
use types::{Bool, Char, Number};
use std::iter::Iterator;
use super::*;
#[test]
fn finds_parens() {
check_single_token("(", Token::LeftParen);
check_single_token(")", Token::RightParen);
check_single_token("#(", Token::LeftVectorParen);
}
#[test]
fn finds_characters() {
check_single_token("#\\a", Token::Character(Char::new('a')));
check_single_token("#\\n", Token::Character(Char::new('n')));
check_single_token("#\\s", Token::Character(Char::new('s')));
}
#[test]
fn finds_named_characters() {
check_single_token("#\\newline", Token::Character(Char::new('\n')));
check_single_token("#\\null", Token::Character(Char::new('\0')));
check_single_token("#\\space", Token::Character(Char::new(' ')));
}
#[test]
fn finds_dots() {
check_single_token(".", Token::Dot);
let mut lexer = Lexer::new("abc . abc");
assert_next_token(&mut lexer, &Token::Id(String::from("abc")));
assert_next_token(&mut lexer, &Token::Dot);
assert_next_token(&mut lexer, &Token::Id(String::from("abc")));
}
#[test]
fn finds_identifiers() {
let tok = |s: &str| { check_single_token(s, Token::Id(String::from(s))); };
tok("abc");
tok("number?");
tok("+");
tok("-");
}
#[test]
fn finds_booleans() {
check_single_token("#t", Token::Boolean(Bool::new(true)));
check_single_token("#f", Token::Boolean(Bool::new(false)));
}
#[test]
fn finds_comments() {
let s = "; a comment";
check_single_token(s, Token::Comment(String::from(s)));
}
#[test]
fn finds_escaped_characters_in_strings() {
check_single_token("\"\\\\\"", Token::String(String::from("\\")));
check_single_token("\"\\\"\"", Token::String(String::from("\"")));
check_single_token("\"\\n\"", Token::String(String::from("\n")));
}
#[test]
fn finds_numbers() {
check_single_token("34", Token::Number(Number::from_float(34.0)));
check_single_token(".34", Token::Number(Number::from_float(0.34)));
check_single_token("0.34", Token::Number(Number::from_float(0.34)));
}
#[test]
fn finds_negative_numbers() {
check_single_token("-3", Token::Number(Number::from_int(-3)));
check_single_token("-0", Token::Number(Number::from_int(-0)));
check_single_token("-0.56", Token::Number(Number::from_float(-0.56)));
check_single_token("-3.14159", Token::Number(Number::from_float(-3.14159)));
}
#[test]
fn finds_bin_numbers() {
check_single_token("#b0", Token::Number(Number::from_int(0b0)));
check_single_token("#b01011", Token::Number(Number::from_int(0b01011)));
}
#[test]
fn finds_dec_numbers() {
check_single_token("34", Token::Number(Number::from_float(34.0)));
check_single_token("#d89", Token::Number(Number::from_int(89)));
}
#[test]
fn finds_oct_numbers() {
check_single_token("#o45", Token::Number(Number::from_int(0o45)));
}
#[test]
fn finds_exact_numbers() {
check_single_token("#e45", Token::Number(Number::from_int(45)));
check_single_token("#e-45", Token::Number(Number::from_int(-45)));
}
#[test]
fn finds_hex_numbers() {
check_single_token("#h4A65", Token::Number(Number::from_int(0x4A65)));
}
#[test]
fn finds_quote() {
check_single_token("'", Token::Quote);
}
#[test]
fn finds_strings() {
check_single_token("\"\"", Token::String(String::from("")));
check_single_token("\"abc\"", Token::String(String::from("abc")));
}
#[test]
fn lexes_simple_expression() {
check_tokens("(+ 3.4 6.8)", vec![
Token::LeftParen,
Token::Id(String::from("+")),
Token::Number(Number::from_float(3.4)),
Token::Number(Number::from_float(6.8)),
Token::RightParen]);
}
#[test]
fn lexes_quoted_identifier() {
check_tokens("'abc", vec![Token::Quote, Token::Id(String::from("abc"))]);
}
fn check_single_token(input: &str, expected: Token) {
let mut lexer = Lexer::new(input);
assert_next_token(&mut lexer, &expected);
}
fn check_tokens(input: &str, expected: Vec<Token>) {
let lexer = Lexer::new(input);
let mut expected_iter = expected.iter();
for lex in lexer {
if let Some(expected_token) = expected_iter.next() {
assert_eq!(lex.token, *expected_token);
}
else {
assert!(false, "Found a token we didn't expect: {:?}", lex.token);
}
}
// TODO: Check that all expected tokens are consumed.
}
fn assert_next_token(lexer: &mut Lexer, expected: &Token) {
let lex = lexer.next().unwrap();
assert_eq!(lex.token, *expected);
}
}

169
lexer/src/lib.rs Normal file
View file

@ -0,0 +1,169 @@
extern crate sibiltypes;
mod char;
mod charset;
mod lexer;
mod named_char;
mod number;
mod str;
mod token;
#[cfg(test)]
mod tests {
use sibiltypes::{Bool, Char, Number};
use std::iter::Iterator;
use lexer::Lexer;
use token::Token;
#[test]
fn finds_parens() {
check_single_token("(", Token::LeftParen);
check_single_token(")", Token::RightParen);
check_single_token("#(", Token::LeftVectorParen);
}
#[test]
fn finds_characters() {
check_single_token("#\\a", Token::Character(Char('a')));
check_single_token("#\\n", Token::Character(Char('n')));
check_single_token("#\\s", Token::Character(Char('s')));
}
#[test]
fn finds_named_characters() {
check_single_token("#\\newline", Token::Character(Char('\n')));
check_single_token("#\\null", Token::Character(Char('\0')));
check_single_token("#\\space", Token::Character(Char(' ')));
}
#[test]
fn finds_dots() {
check_single_token(".", Token::Dot);
let mut lexer = Lexer::new("abc . abc");
assert_next_token(&mut lexer, &Token::Id(String::from("abc")));
assert_next_token(&mut lexer, &Token::Dot);
assert_next_token(&mut lexer, &Token::Id(String::from("abc")));
}
#[test]
fn finds_identifiers() {
let tok = |s: &str| { check_single_token(s, Token::Id(String::from(s))); };
tok("abc");
tok("number?");
tok("+");
tok("-");
}
#[test]
fn finds_booleans() {
check_single_token("#t", Token::Boolean(Bool(true)));
check_single_token("#f", Token::Boolean(Bool(false)));
}
#[test]
fn finds_comments() {
let s = "; a comment";
check_single_token(s, Token::Comment(String::from(s)));
}
#[test]
fn finds_escaped_characters_in_strings() {
check_single_token("\"\\\\\"", Token::String(String::from("\\")));
check_single_token("\"\\\"\"", Token::String(String::from("\"")));
check_single_token("\"\\n\"", Token::String(String::from("\n")));
}
#[test]
fn finds_numbers() {
check_single_token("34", Token::Number(Number::from_int(34, true)));
check_single_token(".34", Token::Number(Number::from_float(0.34, false)));
check_single_token("0.34", Token::Number(Number::from_float(0.34, false)));
}
#[test]
fn finds_negative_numbers() {
check_single_token("-3", Token::Number(Number::from_int(-3, true)));
check_single_token("-0", Token::Number(Number::from_int(-0, true)));
check_single_token("-0.56", Token::Number(Number::from_float(-0.56, false)));
check_single_token("-3.14159", Token::Number(Number::from_float(-3.14159, false)));
}
#[test]
fn finds_bin_numbers() {
check_single_token("#b0", Token::Number(Number::from_int(0b0, true)));
check_single_token("#b01011", Token::Number(Number::from_int(0b01011, true)));
}
#[test]
fn finds_dec_numbers() {
check_single_token("34", Token::Number(Number::from_int(34, true)));
check_single_token("#d89", Token::Number(Number::from_int(89, true)));
}
#[test]
fn finds_oct_numbers() {
check_single_token("#o45", Token::Number(Number::from_int(0o45, true)));
}
#[test]
fn finds_exact_numbers() {
check_single_token("#e45", Token::Number(Number::from_int(45, true)));
check_single_token("#e-45", Token::Number(Number::from_int(-45, true)));
}
#[test]
fn finds_hex_numbers() {
check_single_token("#h4A65", Token::Number(Number::from_int(0x4A65, true)));
}
#[test]
fn finds_quote() {
check_single_token("'", Token::Quote);
}
#[test]
fn finds_strings() {
check_single_token("\"\"", Token::String(String::from("")));
check_single_token("\"abc\"", Token::String(String::from("abc")));
}
#[test]
fn lexes_simple_expression() {
check_tokens("(+ 3.4 6.8)", vec![
Token::LeftParen,
Token::Id(String::from("+")),
Token::Number(Number::from_float(3.4, false)),
Token::Number(Number::from_float(6.8, false)),
Token::RightParen]);
}
#[test]
fn lexes_quoted_identifier() {
check_tokens("'abc", vec![Token::Quote, Token::Id(String::from("abc"))]);
}
fn check_single_token(input: &str, expected: Token) {
let mut lexer = Lexer::new(input);
assert_next_token(&mut lexer, &expected);
}
fn check_tokens(input: &str, expected: Vec<Token>) {
let lexer = Lexer::new(input);
let mut expected_iter = expected.iter();
for lex in lexer {
if let Some(expected_token) = expected_iter.next() {
assert_eq!(lex.token, *expected_token);
}
else {
assert!(false, "Found a token we didn't expect: {:?}", lex.token);
}
}
// TODO: Check that all expected tokens are consumed.
}
fn assert_next_token(lexer: &mut Lexer, expected: &Token) {
let lex = lexer.next().unwrap();
assert_eq!(lex.token, *expected);
}
}

45
lexer/src/named_char.rs Normal file
View file

@ -0,0 +1,45 @@
/* lexer/src/named_char.rs
* Eryn Wells <eryn@erynwells.me>
*/
use std::collections::HashSet;
use sibiltypes::Char;
const ALARM: &'static str = "alarm";
const BACKSPACE: &'static str = "backspace";
const DELETE: &'static str = "delete";
const ESCAPE: &'static str = "escape";
const NEWLINE: &'static str = "newline";
const NULL: &'static str = "null";
const RETURN: &'static str = "return";
const SPACE: &'static str = "space";
const TAB: &'static str = "tab";
pub fn set() -> HashSet<&'static str> {
let mut set: HashSet<&'static str> = HashSet::new();
set.insert(ALARM);
set.insert(BACKSPACE);
set.insert(DELETE);
set.insert(ESCAPE);
set.insert(NEWLINE);
set.insert(NULL);
set.insert(RETURN);
set.insert(SPACE);
set.insert(TAB);
set
}
pub fn char_named_by(named: &str) -> Char {
Char(match named {
ALARM => '\x07',
BACKSPACE => '\x08',
DELETE => '\x7F',
ESCAPE => '\x1B',
NEWLINE => '\n',
NULL => '\0',
RETURN => '\r',
SPACE => ' ',
TAB => '\t',
_ => panic!("char_named_by called with invalid named char string")
})
}

View file

@ -2,7 +2,7 @@
* Eryn Wells <eryn@erynwells.me>
*/
use types::Number;
use sibiltypes::Number;
#[derive(Debug)]
pub enum Radix { Bin, Oct, Dec, Hex }
@ -67,9 +67,20 @@ impl NumberBuilder {
pub fn resolve(&self) -> Number {
// TODO: Convert fields to Number type.
let value = if self.point > 0 { self.value / 10u32.pow(self.point) as f64 } else { self.value };
let value = if self.sign == Sign::Neg { value * -1.0 } else { value };
Number::from_float(value)
let value = if self.point > 0 {
self.value / 10u32.pow(self.point) as f64
}
else {
self.value
};
let value = if self.sign == Sign::Neg {
value * -1.0
}
else {
value
};
// TODO: Use an integer if we can.
Number::from_float(value, self.exact == Exactness::Exact)
}
pub fn radix_value(&self) -> u32 {
@ -133,42 +144,43 @@ impl Exactness {
#[cfg(test)]
mod tests {
use sibiltypes::Number;
use super::*;
#[test]
fn builds_integers() {
let mut b = NumberBuilder::new();
b.extend_value('3');
assert_eq!(b.resolve().value, 3.0);
assert_eq!(b.resolve(), Number::from_int(3, true));
b.extend_value('4');
assert_eq!(b.resolve().value, 34.0);
assert_eq!(b.resolve(), Number::from_int(34, true));
}
#[test]
fn builds_negative_integers() {
let num = NumberBuilder::new().sign(Sign::Neg).extend_value('3').resolve();
assert_eq!(num.value, -3.0);
assert_eq!(num, Number::from_int(-3, true));
}
#[test]
fn builds_pointy_numbers() {
let mut b = NumberBuilder::new();
b.extend_value('5');
assert_eq!(b.resolve().value, 5.0);
assert_eq!(b.resolve(), Number::from_int(5, true));
b.extend_decimal_value('3');
assert_eq!(b.resolve().value, 5.3);
assert_eq!(b.resolve(), Number::from_float(5.3, false));
b.extend_decimal_value('4');
assert_eq!(b.resolve().value, 5.34);
assert_eq!(b.resolve(), Number::from_float(5.34, false));
}
#[test]
fn builds_hex() {
let mut b = NumberBuilder::new();
b.radix(Radix::Hex).extend_value('4');
assert_eq!(b.resolve().value, 0x4 as f64);
assert_eq!(b.resolve(), Number::from_int(0x4, true));
b.extend_value('A');
assert_eq!(b.resolve().value, 0x4A as f64);
assert_eq!(b.resolve(), Number::from_int(0x4A, true));
b.extend_value('6');
assert_eq!(b.resolve().value, 0x4A6 as f64);
assert_eq!(b.resolve(), Number::from_int(0x4A6, true));
}
}

View file

@ -2,7 +2,7 @@
* Eryn Wells <eryn@erynwells.me>
*/
use types::{Bool, Char, Number};
use sibiltypes::{Bool, Char, Number};
#[derive(Debug, PartialEq)]
pub enum Token {
@ -19,8 +19,8 @@ pub enum Token {
String(String),
}
/// A Lex is a Token extracted from a specific position in an input. It contains useful information about the token's
/// place.
/// A Lex is a Token extracted from a specific position in an input stream. It
/// contains useful information about the token's place.
#[derive(Debug)]
pub struct Lex {
pub token: Token,