Move the lexer to its own sibillexer module

Lots of failing tests right now, unfortunately. :(
This commit is contained in:
Eryn Wells 2017-04-15 09:37:12 -07:00
parent cea63e8e8e
commit fc947280ae
9 changed files with 264 additions and 244 deletions

7
lexer/Cargo.toml Normal file
View file

@ -0,0 +1,7 @@
[package]
name = "sibillexer"
version = "0.1.0"
authors = ["Eryn Wells <eryn@erynwells.me>"]
[dependencies]
sibiltypes = { path = "../types" }

103
lexer/src/char.rs Normal file
View file

@ -0,0 +1,103 @@
/* char.rs
* Eryn Wells <eryn@erynwells.me>
*/
use charset;
pub trait Lexable {
fn is_character_leader(&self) -> bool;
fn is_dot(&self) -> bool;
fn is_hash(&self) -> bool;
fn is_quote(&self) -> bool;
fn is_left_paren(&self) -> bool;
fn is_right_paren(&self) -> bool;
fn is_string_quote(&self) -> bool;
fn is_string_escape_leader(&self) -> bool;
fn is_string_escaped(&self) -> bool;
fn is_newline(&self) -> bool;
fn is_eof(&self) -> bool;
fn is_identifier_initial(&self) -> bool;
fn is_identifier_subsequent(&self) -> bool;
fn is_identifier_delimiter(&self) -> bool;
fn is_boolean_true(&self) -> bool;
fn is_boolean_false(&self) -> bool;
fn is_comment_initial(&self) -> bool;
}
impl Lexable for char {
fn is_left_paren(&self) -> bool {
*self == '('
}
fn is_right_paren(&self) -> bool {
*self == ')'
}
fn is_character_leader(&self) -> bool {
*self == '\\'
}
fn is_dot(&self) -> bool {
*self == '.'
}
fn is_hash(&self) -> bool {
*self == '#'
}
fn is_quote(&self) -> bool {
*self == '\''
}
fn is_string_quote(&self) -> bool {
*self == '"'
}
fn is_string_escape_leader(&self) -> bool {
*self == '\\'
}
fn is_string_escaped(&self) -> bool {
*self == '"' || *self == '\\'
}
fn is_boolean_true(&self) -> bool {
*self == 't'
}
fn is_boolean_false(&self) -> bool {
*self == 'f'
}
fn is_newline(&self) -> bool {
*self == '\n'
}
fn is_eof(&self) -> bool {
*self == '\0'
}
fn is_comment_initial(&self) -> bool {
*self == ';'
}
fn is_identifier_initial(&self) -> bool {
charset::identifier_initials().contains(&self)
}
fn is_identifier_subsequent(&self) -> bool {
charset::identifier_subsequents().contains(&self)
}
fn is_identifier_delimiter(&self) -> bool {
self.is_whitespace()
|| self.is_comment_initial()
|| self.is_left_paren()
|| self.is_right_paren()
|| self.is_string_quote()
|| self.is_eof()
}
}

43
lexer/src/charset.rs Normal file
View file

@ -0,0 +1,43 @@
/* charset.rs
* Eryn Wells <eryn@erynwells.me>
*/
use std::collections::HashSet;
use std::iter::FromIterator;
pub type CharSet = HashSet<char>;
// TODO: Use std::sync::Once for these sets?
// https://doc.rust-lang.org/beta/std/sync/struct.Once.html
fn ascii_letters() -> CharSet {
let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".chars();
CharSet::from_iter(letters)
}
fn ascii_digits() -> CharSet {
let digits = "1234567890".chars();
CharSet::from_iter(digits)
}
/// A set of all characters allowed to start Scheme identifiers.
pub fn identifier_initials() -> CharSet {
let letters = ascii_letters();
let extras = CharSet::from_iter("!$%&*/:<=>?~_^".chars());
let mut initials = CharSet::new();
initials.extend(letters.iter());
initials.extend(extras.iter());
initials
}
/// A set of all characters allowed to follow an identifier initial.
pub fn identifier_subsequents() -> CharSet {
let initials = identifier_initials();
let digits = ascii_digits();
let extras = CharSet::from_iter(".+-".chars());
let mut subsequents = CharSet::new();
subsequents.extend(initials.iter());
subsequents.extend(digits.iter());
subsequents.extend(extras.iter());
subsequents
}

532
lexer/src/lexer.rs Normal file
View file

@ -0,0 +1,532 @@
/* lexer.rs
* Eryn Wells <eryn@erynwells.me>
*/
use std::collections::HashSet;
use sibiltypes::{Bool, Char};
use char::Lexable;
use named_char;
use number::Exactness;
use number::NumberBuilder;
use number::Radix;
use number::Sign;
use str::CharAt;
use str::RelativeIndexable;
use token::Lex;
use token::Token;
type StateResult = Result<Option<Token>, String>;
trait HasResult {
fn has_token(&self) -> bool;
}
#[derive(Debug)]
enum State {
Char,
NamedChar(HashSet<&'static str>, String),
Comment,
Initial,
Id,
Dot,
Hash,
Number,
NumberExactness,
NumberDecimal,
NumberRadix,
NumberSign,
Sign,
String,
StringEscape,
}
pub fn lex(input: &str) -> Lexer {
Lexer::new(&input)
}
pub struct Lexer {
input: String,
begin: usize,
forward: usize,
line: usize,
line_offset: usize,
state: State,
number_builder: NumberBuilder,
string_value: String,
}
impl Lexer {
pub fn new(input: &str) -> Lexer {
Lexer {
input: String::from(input),
begin: 0,
forward: 0,
line: 1,
line_offset: 1,
state: State::Initial,
number_builder: NumberBuilder::new(),
string_value: String::new(),
}
}
}
impl Lexer {
fn begin_lexing(&mut self) {
self.forward = self.begin;
self.state = State::Initial;
}
/// Advance the forward pointer to the next character.
fn advance(&mut self) {
self.forward = self.input.index_after(self.forward);
self.line_offset += 1;
println!("> forward={}", self.forward);
}
/// Retract the forward pointer to the previous character.
fn retract(&mut self) {
self.forward = self.input.index_before(self.forward);
self.line_offset -= 1;
println!("< forward={}", self.forward);
}
/// Advance the begin pointer to prepare for the next iteration.
fn advance_begin(&mut self) {
self.begin = self.input.index_after(self.forward);
self.forward = self.begin;
println!("> begin={}, forward={}", self.begin, self.forward);
}
/// Update lexer state when it encounters a newline.
fn handle_newline(&mut self) {
self.line += 1;
self.line_offset = 1;
}
/// Get the substring between the two input indexes. This is the value to give to a new Token instance.
fn value(&self) -> String {
self.input[self.begin .. self.forward].to_string()
}
fn error_string(&self, message: String) -> String {
format!("{}:{}: {}", self.line, self.line_offset, message)
}
fn token_result(&self, token: Token) -> StateResult {
Ok(Some(token))
}
fn generic_error(&self, c: char) -> StateResult {
Err(self.error_string(format!("Invalid token character: {}", c)))
}
}
impl Lexer {
/// Handle self.state == State::Initial
fn state_initial(&mut self, c: char) -> StateResult {
if c.is_left_paren() {
return self.token_result(Token::LeftParen);
}
else if c.is_right_paren() {
return self.token_result(Token::RightParen);
}
else if c.is_dot() {
self.state = State::Dot;
self.advance();
}
else if c.is_hash() {
self.state = State::Hash;
self.advance();
}
else if c.is_quote() {
return self.token_result(Token::Quote);
}
else if c.is_string_quote() {
self.string_value = String::from("");
self.state = State::String;
self.advance();
}
else if let Some(sign) = Sign::from_char(c) {
self.number_builder = NumberBuilder::new();
self.number_builder.sign(sign);
self.state = State::Sign;
self.advance();
}
else if c.is_identifier_initial() {
self.state = State::Id;
self.advance();
}
else if c.is_digit(10) {
self.number_builder = NumberBuilder::new();
self.number_builder.extend_value(c);
self.state = State::Number;
self.advance();
}
else if c.is_whitespace() {
if c.is_newline() {
self.handle_newline();
}
self.advance_begin();
}
else if c.is_comment_initial() {
self.state = State::Comment;
self.advance();
}
else {
return self.generic_error(c);
}
Ok(None)
}
/// Handle self.state == State::Id
fn state_identifier(&mut self, c: char) -> StateResult {
if c.is_identifier_subsequent() {
// Stay in Id state.
self.advance();
}
else if c.is_identifier_delimiter() {
let value = self.value();
self.retract();
return self.token_result(Token::Id(value));
}
else {
return self.generic_error(c);
}
Ok(None)
}
/// Handle self.state == State::Char
fn state_char(&mut self, c: char) -> StateResult {
self.advance();
let lower_c = c.to_lowercase().collect::<String>();
let mut candidates: HashSet<&str> = HashSet::new();
for c in named_char::set().iter() {
if c.starts_with(&lower_c) {
candidates.insert(c);
}
}
if candidates.len() > 0 {
self.state = State::NamedChar(candidates, lower_c);
} else {
return self.token_result(Token::Character(Char(c)));
}
Ok(None)
}
/// Handle self.state == State::NamedChar
fn state_named_char(&mut self, c: char) -> StateResult {
let (candidates, mut progress) = match self.state {
State::NamedChar(ref candidates, ref progress) => (candidates.clone(), progress.clone()),
_ => panic!("Called state_named_char without being in NamedChar state")
};
if c.is_identifier_delimiter() || c.is_eof() {
if progress.len() == 1 {
self.retract();
return self.token_result(Token::Character(Char(progress.chars().next().unwrap())));
}
else {
return self.generic_error(c);
}
}
progress.push(c);
let candidates: HashSet<&str> = {
let filtered = candidates.iter().filter(|c| c.starts_with(&progress)).map(|c| *c);
filtered.collect()
};
if candidates.len() == 1 {
let candidate = *candidates.iter().next().unwrap();
if candidate == &progress {
self.token_result(Token::Character(named_char::char_named_by(&progress)))
}
else {
self.state = State::NamedChar(candidates, progress);
self.advance();
Ok(None)
}
}
else if candidates.len() > 1 {
self.state = State::NamedChar(candidates, progress);
self.advance();
Ok(None)
}
else {
self.generic_error(c)
}
}
/// Handle self.state == State::Dot
fn state_dot(&mut self, c: char) -> StateResult {
if c.is_identifier_delimiter() {
self.retract();
return self.token_result(Token::Dot);
}
else if c.is_digit(10) {
self.number_builder = NumberBuilder::new();
self.number_builder.extend_decimal_value(c);
self.state = State::NumberDecimal;
self.advance();
}
else {
return self.generic_error(c);
}
Ok(None)
}
/// Handle self.state == State::Hash
fn state_hash(&mut self, c: char) -> StateResult {
if c.is_boolean_true() || c.is_boolean_false() {
self.advance();
return self.token_result(Token::Boolean(Bool(c.is_boolean_true())));
}
else if c.is_left_paren() {
self.advance();
return self.token_result(Token::LeftVectorParen);
}
else if c.is_character_leader() {
self.state = State::Char;
self.advance();
}
else if let Some(radix) = Radix::from_char(c) {
self.number_builder.radix(radix);
self.state = State::NumberRadix;
self.advance();
}
else if let Some(exactness) = Exactness::from_char(c) {
self.number_builder.exact(exactness);
self.state = State::NumberExactness;
self.advance();
}
else {
return self.generic_error(c);
}
Ok(None)
}
/// Handle self.state == State::Number
fn state_number(&mut self, c: char) -> StateResult {
if c.is_digit(self.number_builder.radix_value()) {
self.number_builder.extend_value(c);
self.advance();
}
else if c.is_dot() {
self.state = State::NumberDecimal;
self.advance();
}
else if c.is_identifier_delimiter() {
self.retract();
return self.token_result(Token::Number(self.number_builder.resolve()));
}
else {
return self.generic_error(c);
}
Ok(None)
}
fn state_number_exactness(&mut self, c: char) -> StateResult {
if c.is_hash() {
self.state = State::Hash;
self.advance();
}
else if let Some(sign) = Sign::from_char(c) {
self.number_builder.sign(sign);
self.state = State::NumberSign;
self.advance();
}
else if c.is_digit(self.number_builder.radix_value()) {
self.number_builder.extend_value(c);
self.state = State::Number;
self.advance();
}
else {
return self.generic_error(c);
}
Ok(None)
}
fn state_number_decimal(&mut self, c: char) -> StateResult {
if c.is_digit(Radix::Dec.value()) {
self.number_builder.extend_decimal_value(c);
self.advance();
}
else if c.is_identifier_delimiter() {
self.retract();
return self.token_result(Token::Number(self.number_builder.resolve()));
}
else {
return self.generic_error(c);
}
Ok(None)
}
fn state_number_radix(&mut self, c: char) -> StateResult {
if c.is_digit(self.number_builder.radix_value()) {
self.number_builder.extend_value(c);
self.state = State::Number;
self.advance();
}
else if c.is_dot() {
self.state = State::NumberDecimal;
self.advance();
}
else if c.is_hash() {
self.state = State::Hash;
self.advance();
}
else if let Some(sign) = Sign::from_char(c) {
self.number_builder.sign(sign);
self.state = State::NumberSign;
self.advance();
}
else {
return self.generic_error(c);
}
Ok(None)
}
fn state_number_sign(&mut self, c: char) -> StateResult {
if c.is_digit(self.number_builder.radix_value()) {
self.number_builder.extend_value(c);
self.state = State::Number;
self.advance();
}
else if c.is_dot() {
self.state = State::NumberDecimal;
self.advance();
}
else {
return self.generic_error(c);
}
Ok(None)
}
fn state_sign(&mut self, c: char) -> StateResult {
if c.is_digit(Radix::Dec.value()) {
self.number_builder.extend_value(c);
self.state = State::Number;
self.advance();
}
else if c.is_identifier_delimiter() {
let value = self.value();
self.retract();
return self.token_result(Token::Id(value));
}
else {
return self.generic_error(c);
}
Ok(None)
}
fn state_string(&mut self, c: char) -> StateResult {
self.advance();
if c.is_string_quote() {
return self.token_result(Token::String(self.string_value.clone()));
}
else if c.is_string_escape_leader() {
self.state = State::StringEscape;
}
else {
self.string_value.push(c);
}
Ok(None)
}
fn state_string_escape(&mut self, c: char) -> StateResult {
let char_to_push = match c {
'0' => '\0',
'n' => '\n',
't' => '\t',
'"' => '"',
'\\' => '\\',
_ => return Err(self.error_string(format!("Invalid string escape character: {}", c))),
};
self.string_value.push(char_to_push);
self.state = State::String;
self.advance();
Ok(None)
}
fn state_comment(&mut self, c: char) -> StateResult {
if c.is_newline() {
self.handle_newline();
return self.token_result(Token::Comment(self.value()));
}
else if c.is_eof() {
return self.token_result(Token::Comment(self.value()));
}
self.advance();
Ok(None)
}
}
impl Iterator for Lexer {
type Item = Lex;
fn next(&mut self) -> Option<Lex> {
self.begin_lexing();
if self.begin == self.input.len() {
return None;
}
let mut token: Option<Token> = None;
println!("Lexing '{}'", &self.input[self.begin ..]);
while token.is_none() {
let c = match self.input.char_at(self.forward) {
Some(c) => c,
None => '\0',
};
println!("{:?}! c='{}'", self.state, c);
let previous_forward = self.forward;
let result = match self.state {
State::Char=> self.state_char(c),
State::NamedChar(_, _) => self.state_named_char(c),
State::Comment => self.state_comment(c),
State::Dot => self.state_dot(c),
State::Hash => self.state_hash(c),
State::Id => self.state_identifier(c),
State::Initial => self.state_initial(c),
State::Number => self.state_number(c),
State::NumberDecimal => self.state_number_decimal(c),
State::NumberExactness => self.state_number_exactness(c),
State::NumberRadix => self.state_number_radix(c),
State::NumberSign => self.state_number_sign(c),
State::Sign => self.state_sign(c),
State::String => self.state_string(c),
State::StringEscape => self.state_string_escape(c),
};
debug_assert!(result.has_token() || self.forward != previous_forward, "No lexing progress made!");
if result.has_token() {
token = result.ok().unwrap();
}
else if result.is_err() {
assert!(false, "{}", result.err().unwrap());
}
}
self.advance_begin();
match token {
Some(t) => Some(Lex::new(t, self.line, self.line_offset)),
None => None,
}
}
}
impl HasResult for StateResult {
fn has_token(&self) -> bool {
match *self {
Ok(ref token) => match *token {
Some(_) => true,
None => false,
},
Err(_) => false
}
}
}

169
lexer/src/lib.rs Normal file
View file

@ -0,0 +1,169 @@
extern crate sibiltypes;
mod char;
mod charset;
mod lexer;
mod named_char;
mod number;
mod str;
mod token;
#[cfg(test)]
mod tests {
use sibiltypes::{Bool, Char, Number};
use std::iter::Iterator;
use lexer::Lexer;
use token::Token;
#[test]
fn finds_parens() {
check_single_token("(", Token::LeftParen);
check_single_token(")", Token::RightParen);
check_single_token("#(", Token::LeftVectorParen);
}
#[test]
fn finds_characters() {
check_single_token("#\\a", Token::Character(Char('a')));
check_single_token("#\\n", Token::Character(Char('n')));
check_single_token("#\\s", Token::Character(Char('s')));
}
#[test]
fn finds_named_characters() {
check_single_token("#\\newline", Token::Character(Char('\n')));
check_single_token("#\\null", Token::Character(Char('\0')));
check_single_token("#\\space", Token::Character(Char(' ')));
}
#[test]
fn finds_dots() {
check_single_token(".", Token::Dot);
let mut lexer = Lexer::new("abc . abc");
assert_next_token(&mut lexer, &Token::Id(String::from("abc")));
assert_next_token(&mut lexer, &Token::Dot);
assert_next_token(&mut lexer, &Token::Id(String::from("abc")));
}
#[test]
fn finds_identifiers() {
let tok = |s: &str| { check_single_token(s, Token::Id(String::from(s))); };
tok("abc");
tok("number?");
tok("+");
tok("-");
}
#[test]
fn finds_booleans() {
check_single_token("#t", Token::Boolean(Bool(true)));
check_single_token("#f", Token::Boolean(Bool(false)));
}
#[test]
fn finds_comments() {
let s = "; a comment";
check_single_token(s, Token::Comment(String::from(s)));
}
#[test]
fn finds_escaped_characters_in_strings() {
check_single_token("\"\\\\\"", Token::String(String::from("\\")));
check_single_token("\"\\\"\"", Token::String(String::from("\"")));
check_single_token("\"\\n\"", Token::String(String::from("\n")));
}
#[test]
fn finds_numbers() {
check_single_token("34", Token::Number(Number::from_int(34, true)));
check_single_token(".34", Token::Number(Number::from_float(0.34, false)));
check_single_token("0.34", Token::Number(Number::from_float(0.34, false)));
}
#[test]
fn finds_negative_numbers() {
check_single_token("-3", Token::Number(Number::from_int(-3, true)));
check_single_token("-0", Token::Number(Number::from_int(-0, true)));
check_single_token("-0.56", Token::Number(Number::from_float(-0.56, false)));
check_single_token("-3.14159", Token::Number(Number::from_float(-3.14159, false)));
}
#[test]
fn finds_bin_numbers() {
check_single_token("#b0", Token::Number(Number::from_int(0b0, true)));
check_single_token("#b01011", Token::Number(Number::from_int(0b01011, true)));
}
#[test]
fn finds_dec_numbers() {
check_single_token("34", Token::Number(Number::from_int(34, true)));
check_single_token("#d89", Token::Number(Number::from_int(89, true)));
}
#[test]
fn finds_oct_numbers() {
check_single_token("#o45", Token::Number(Number::from_int(0o45, true)));
}
#[test]
fn finds_exact_numbers() {
check_single_token("#e45", Token::Number(Number::from_int(45, true)));
check_single_token("#e-45", Token::Number(Number::from_int(-45, true)));
}
#[test]
fn finds_hex_numbers() {
check_single_token("#h4A65", Token::Number(Number::from_int(0x4A65, true)));
}
#[test]
fn finds_quote() {
check_single_token("'", Token::Quote);
}
#[test]
fn finds_strings() {
check_single_token("\"\"", Token::String(String::from("")));
check_single_token("\"abc\"", Token::String(String::from("abc")));
}
#[test]
fn lexes_simple_expression() {
check_tokens("(+ 3.4 6.8)", vec![
Token::LeftParen,
Token::Id(String::from("+")),
Token::Number(Number::from_float(3.4, false)),
Token::Number(Number::from_float(6.8, false)),
Token::RightParen]);
}
#[test]
fn lexes_quoted_identifier() {
check_tokens("'abc", vec![Token::Quote, Token::Id(String::from("abc"))]);
}
fn check_single_token(input: &str, expected: Token) {
let mut lexer = Lexer::new(input);
assert_next_token(&mut lexer, &expected);
}
fn check_tokens(input: &str, expected: Vec<Token>) {
let lexer = Lexer::new(input);
let mut expected_iter = expected.iter();
for lex in lexer {
if let Some(expected_token) = expected_iter.next() {
assert_eq!(lex.token, *expected_token);
}
else {
assert!(false, "Found a token we didn't expect: {:?}", lex.token);
}
}
// TODO: Check that all expected tokens are consumed.
}
fn assert_next_token(lexer: &mut Lexer, expected: &Token) {
let lex = lexer.next().unwrap();
assert_eq!(lex.token, *expected);
}
}

45
lexer/src/named_char.rs Normal file
View file

@ -0,0 +1,45 @@
/* lexer/src/named_char.rs
* Eryn Wells <eryn@erynwells.me>
*/
use std::collections::HashSet;
use sibiltypes::Char;
const ALARM: &'static str = "alarm";
const BACKSPACE: &'static str = "backspace";
const DELETE: &'static str = "delete";
const ESCAPE: &'static str = "escape";
const NEWLINE: &'static str = "newline";
const NULL: &'static str = "null";
const RETURN: &'static str = "return";
const SPACE: &'static str = "space";
const TAB: &'static str = "tab";
pub fn set() -> HashSet<&'static str> {
let mut set: HashSet<&'static str> = HashSet::new();
set.insert(ALARM);
set.insert(BACKSPACE);
set.insert(DELETE);
set.insert(ESCAPE);
set.insert(NEWLINE);
set.insert(NULL);
set.insert(RETURN);
set.insert(SPACE);
set.insert(TAB);
set
}
pub fn char_named_by(named: &str) -> Char {
Char(match named {
ALARM => '\x07',
BACKSPACE => '\x08',
DELETE => '\x7F',
ESCAPE => '\x1B',
NEWLINE => '\n',
NULL => '\0',
RETURN => '\r',
SPACE => ' ',
TAB => '\t',
_ => panic!("char_named_by called with invalid named char string")
})
}

186
lexer/src/number.rs Normal file
View file

@ -0,0 +1,186 @@
/* number.rs
* Eryn Wells <eryn@erynwells.me>
*/
use sibiltypes::Number;
#[derive(Debug)]
pub enum Radix { Bin, Oct, Dec, Hex }
#[derive(PartialEq, Debug)]
pub enum Sign { Pos, Neg }
#[derive(PartialEq, Debug)]
pub enum Exactness { Exact, Inexact }
#[derive(Debug)]
pub struct NumberBuilder {
exact: Exactness,
radix: Radix,
sign: Sign,
value: f64,
point: u32,
}
impl NumberBuilder {
pub fn new() -> NumberBuilder {
NumberBuilder {
exact: Exactness::Inexact,
radix: Radix::Dec,
sign: Sign::Pos,
value: 0.0,
point: 0,
}
}
pub fn exact<'a>(&'a mut self, ex: Exactness) -> &'a mut NumberBuilder {
self.exact = ex;
self
}
pub fn radix<'a>(&'a mut self, r: Radix) -> &'a mut NumberBuilder {
self.radix = r;
self
}
pub fn sign<'a>(&'a mut self, s: Sign) -> &'a mut NumberBuilder {
self.sign = s;
self
}
pub fn extend_value<'a>(&'a mut self, digit: char) -> &'a mut Self {
if let Some(place) = NumberBuilder::place_value(digit) {
self.value = self.radix.float_value() * self.value + place;
}
else {
// TODO: Indicate an error.
}
self
}
pub fn extend_decimal_value<'a>(&'a mut self, digit: char) -> &'a mut Self {
self.extend_value(digit);
self.point += 1;
println!("value = {}, point = {}", self.value, self.point);
self
}
pub fn resolve(&self) -> Number {
// TODO: Convert fields to Number type.
let value = if self.point > 0 {
self.value / 10u32.pow(self.point) as f64
}
else {
self.value
};
let value = if self.sign == Sign::Neg {
value * -1.0
}
else {
value
};
// TODO: Use an integer if we can.
Number::from_float(value, self.exact == Exactness::Exact)
}
pub fn radix_value(&self) -> u32 {
self.radix.value()
}
fn place_value(digit: char) -> Option<f64> {
match digit {
'0' ... '9' => Some((digit as u32 - '0' as u32) as f64),
'a' ... 'f' => Some((digit as u32 - 'a' as u32 + 10) as f64),
'A' ... 'F' => Some((digit as u32 - 'A' as u32 + 10) as f64),
_ => None,
}
}
}
impl Radix {
pub fn from_char(c: char) -> Option<Radix> {
match c {
'b' => Some(Radix::Bin),
'o' => Some(Radix::Oct),
'd' => Some(Radix::Dec),
'h' => Some(Radix::Hex),
_ => None,
}
}
pub fn value(&self) -> u32 {
match *self {
Radix::Bin => 2,
Radix::Oct => 8,
Radix::Dec => 10,
Radix::Hex => 16,
}
}
pub fn float_value(&self) -> f64 {
self.value() as f64
}
}
impl Sign {
pub fn from_char(c: char) -> Option<Sign> {
match c {
'+' => Some(Sign::Pos),
'-' => Some(Sign::Neg),
_ => None,
}
}
}
impl Exactness {
pub fn from_char(c: char) -> Option<Exactness> {
match c {
'i' => Some(Exactness::Inexact),
'e' => Some(Exactness::Exact),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use sibiltypes::Number;
use super::*;
#[test]
fn builds_integers() {
let mut b = NumberBuilder::new();
b.extend_value('3');
assert_eq!(b.resolve(), Number::from_int(3, true));
b.extend_value('4');
assert_eq!(b.resolve(), Number::from_int(34, true));
}
#[test]
fn builds_negative_integers() {
let num = NumberBuilder::new().sign(Sign::Neg).extend_value('3').resolve();
assert_eq!(num, Number::from_int(-3, true));
}
#[test]
fn builds_pointy_numbers() {
let mut b = NumberBuilder::new();
b.extend_value('5');
assert_eq!(b.resolve(), Number::from_int(5, true));
b.extend_decimal_value('3');
assert_eq!(b.resolve(), Number::from_float(5.3, false));
b.extend_decimal_value('4');
assert_eq!(b.resolve(), Number::from_float(5.34, false));
}
#[test]
fn builds_hex() {
let mut b = NumberBuilder::new();
b.radix(Radix::Hex).extend_value('4');
assert_eq!(b.resolve(), Number::from_int(0x4, true));
b.extend_value('A');
assert_eq!(b.resolve(), Number::from_int(0x4A, true));
b.extend_value('6');
assert_eq!(b.resolve(), Number::from_int(0x4A6, true));
}
}

103
lexer/src/str.rs Normal file
View file

@ -0,0 +1,103 @@
/* str.rs
* Eryn Wells <eryn@erynwells.me>
*/
pub trait RelativeIndexable {
/// Get the index of the character boundary preceding the given index. The index does not need to be on a character
/// boundary.
fn index_before(&self, usize) -> usize;
/// Get the index of the character boundary following the given index. The index does not need to be on a character
/// boundary.
fn index_after(&self, usize) -> usize;
}
pub trait CharAt {
/// Get the character at the given byte index. This index must be at a character boundary as defined by
/// `is_char_boundary()`.
fn char_at(&self, usize) -> Option<char>;
}
impl RelativeIndexable for str {
fn index_before(&self, index: usize) -> usize {
if index == 0 {
return 0;
}
let mut index = index;
if index > self.len() {
index = self.len();
}
loop {
index -= 1;
if self.is_char_boundary(index) {
break;
}
}
index
}
fn index_after(&self, index: usize) -> usize {
if index >= self.len() {
return self.len();
}
let mut index = index;
loop {
index += 1;
if self.is_char_boundary(index) {
break;
}
}
index
}
}
impl CharAt for str {
fn char_at(&self, index: usize) -> Option<char> {
if !self.is_char_boundary(index) {
return None;
}
let end = self.index_after(index);
let char_str = &self[index .. end];
char_str.chars().nth(0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn index_before_is_well_behaved_for_ascii() {
let s = "abc";
// Sanity
assert_eq!(s.index_before(0), 0);
assert_eq!(s.index_before(2), 1);
// An index beyond the string bounds returns the index of the last character in the string.
{
let idx = s.index_before(4);
assert_eq!(idx, 2);
assert!(s.is_char_boundary(idx));
let last_char = &s[idx ..];
assert_eq!(last_char.len(), 1);
assert_eq!(last_char.chars().nth(0), Some('c'));
}
}
#[test]
fn index_after_is_well_behaved_for_ascii() {
let s = "abc";
// Sanity
assert_eq!(s.index_after(0), 1);
assert_eq!(s.index_after(2), 3);
// An index beyond the string bounds returns the length of the string
{
let idx = s.index_after(4);
assert_eq!(idx, s.len());
assert!(s.is_char_boundary(idx));
}
}
}

35
lexer/src/token.rs Normal file
View file

@ -0,0 +1,35 @@
/* token.rs
* Eryn Wells <eryn@erynwells.me>
*/
use sibiltypes::{Bool, Char, Number};
#[derive(Debug, PartialEq)]
pub enum Token {
Boolean(Bool),
Character(Char),
Comment(String),
Dot,
Id(String),
LeftParen,
LeftVectorParen,
Number(Number),
Quote,
RightParen,
String(String),
}
/// A Lex is a Token extracted from a specific position in an input stream. It
/// contains useful information about the token's place.
#[derive(Debug)]
pub struct Lex {
pub token: Token,
pub line: usize,
pub offset: usize,
}
impl Lex {
pub fn new(token: Token, line: usize, offset: usize) -> Lex {
Lex { token: token, line: line, offset: offset }
}
}