diff --git a/lexer/src/char.rs b/lexer/src/char.rs deleted file mode 100644 index 06fdd82..0000000 --- a/lexer/src/char.rs +++ /dev/null @@ -1,108 +0,0 @@ -/* char.rs - * Eryn Wells - */ - -use std::marker::Sized; -use charset; - -pub trait FromChar { - fn from_char(c: char) -> Option where Self: Sized; -} - -pub trait Lexable { - fn is_character_leader(&self) -> bool; - fn is_dot(&self) -> bool; - fn is_hash(&self) -> bool; - fn is_quote(&self) -> bool; - fn is_left_paren(&self) -> bool; - fn is_right_paren(&self) -> bool; - fn is_string_quote(&self) -> bool; - fn is_string_escape_leader(&self) -> bool; - fn is_string_escaped(&self) -> bool; - fn is_newline(&self) -> bool; - fn is_eof(&self) -> bool; - - fn is_identifier_initial(&self) -> bool; - fn is_identifier_subsequent(&self) -> bool; - fn is_identifier_delimiter(&self) -> bool; - - fn is_boolean_true(&self) -> bool; - fn is_boolean_false(&self) -> bool; - - fn is_comment_initial(&self) -> bool; -} - -impl Lexable for char { - fn is_left_paren(&self) -> bool { - *self == '(' - } - - fn is_right_paren(&self) -> bool { - *self == ')' - } - - fn is_character_leader(&self) -> bool { - *self == '\\' - } - - fn is_dot(&self) -> bool { - *self == '.' - } - - fn is_hash(&self) -> bool { - *self == '#' - } - - fn is_quote(&self) -> bool { - *self == '\'' - } - - fn is_string_quote(&self) -> bool { - *self == '"' - } - - fn is_string_escape_leader(&self) -> bool { - *self == '\\' - } - - fn is_string_escaped(&self) -> bool { - *self == '"' || *self == '\\' - } - - fn is_boolean_true(&self) -> bool { - *self == 't' - } - - fn is_boolean_false(&self) -> bool { - *self == 'f' - } - - fn is_newline(&self) -> bool { - *self == '\n' - } - - fn is_eof(&self) -> bool { - *self == '\0' - } - - fn is_comment_initial(&self) -> bool { - *self == ';' - } - - fn is_identifier_initial(&self) -> bool { - charset::identifier_initials().contains(&self) - } - - fn is_identifier_subsequent(&self) -> bool { - charset::identifier_subsequents().contains(&self) - } - - fn is_identifier_delimiter(&self) -> bool { - self.is_whitespace() - || self.is_comment_initial() - || self.is_left_paren() - || self.is_right_paren() - || self.is_string_quote() - || self.is_eof() - } -} diff --git a/lexer/src/charset.rs b/lexer/src/charset.rs deleted file mode 100644 index 63aaaaf..0000000 --- a/lexer/src/charset.rs +++ /dev/null @@ -1,43 +0,0 @@ -/* charset.rs - * Eryn Wells - */ - -use std::collections::HashSet; -use std::iter::FromIterator; - -pub type CharSet = HashSet; - -// TODO: Use std::sync::Once for these sets? -// https://doc.rust-lang.org/beta/std/sync/struct.Once.html - -fn ascii_letters() -> CharSet { - let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".chars(); - CharSet::from_iter(letters) -} - -fn ascii_digits() -> CharSet { - let digits = "1234567890".chars(); - CharSet::from_iter(digits) -} - -/// A set of all characters allowed to start Scheme identifiers. -pub fn identifier_initials() -> CharSet { - let letters = ascii_letters(); - let extras = CharSet::from_iter("!$%&*/:<=>?~_^".chars()); - let mut initials = CharSet::new(); - initials.extend(letters.iter()); - initials.extend(extras.iter()); - initials -} - -/// A set of all characters allowed to follow an identifier initial. -pub fn identifier_subsequents() -> CharSet { - let initials = identifier_initials(); - let digits = ascii_digits(); - let extras = CharSet::from_iter(".+-".chars()); - let mut subsequents = CharSet::new(); - subsequents.extend(initials.iter()); - subsequents.extend(digits.iter()); - subsequents.extend(extras.iter()); - subsequents -} diff --git a/lexer/src/lexer.rs b/lexer/src/lexer.rs deleted file mode 100644 index 3b8940e..0000000 --- a/lexer/src/lexer.rs +++ /dev/null @@ -1,526 +0,0 @@ -/* lexer.rs - * Eryn Wells - */ - -use std::collections::HashSet; -use sibiltypes::Object; -use sibiltypes::number::Exact; - -use char::{FromChar, Lexable}; -use number::{NumberBuilder, Radix, Sign}; -use str::{CharAt, RelativeIndexable}; -use token::{Lex, Token}; - -type StateResult = Result, String>; - -trait HasResult { - fn has_token(&self) -> bool; -} - -#[derive(Debug)] -enum State { - Char, - NamedChar(HashSet<&'static str>, String), - Comment, - Initial, - Id, - Dot, - Hash, - Number, - NumberExact, - NumberDecimal, - NumberRadix, - NumberSign, - Sign, - String, - StringEscape, -} - -pub struct Lexer { - input: String, - begin: usize, - forward: usize, - line: usize, - line_offset: usize, - state: State, - number_builder: NumberBuilder, - string_value: String, -} - -impl Lexer { - pub fn new(input: &str) -> Lexer { - Lexer { - input: String::from(input), - begin: 0, - forward: 0, - line: 1, - line_offset: 1, - state: State::Initial, - number_builder: NumberBuilder::new(), - string_value: String::new(), - } - } -} - -impl Lexer { - fn begin_lexing(&mut self) { - self.forward = self.begin; - self.state = State::Initial; - } - - /// Advance the forward pointer to the next character. - fn advance(&mut self) { - self.forward = self.input.index_after(self.forward); - self.line_offset += 1; - println!("> forward={}", self.forward); - } - - /// Retract the forward pointer to the previous character. - fn retract(&mut self) { - self.forward = self.input.index_before(self.forward); - self.line_offset -= 1; - println!("< forward={}", self.forward); - } - - /// Advance the begin pointer to prepare for the next iteration. - fn advance_begin(&mut self) { - self.begin = self.input.index_after(self.forward); - self.forward = self.begin; - println!("> begin={}, forward={}", self.begin, self.forward); - } - - /// Update lexer state when it encounters a newline. - fn handle_newline(&mut self) { - self.line += 1; - self.line_offset = 1; - } - - /// Get the substring between the two input indexes. This is the value to give to a new Token instance. - fn value(&self) -> String { - self.input[self.begin .. self.forward].to_string() - } - - fn error_string(&self, message: String) -> String { - format!("{}:{}: {}", self.line, self.line_offset, message) - } - - fn token_result(&self, token: Token) -> StateResult { - Ok(Some(token)) - } - - fn generic_error(&self, c: char) -> StateResult { - Err(self.error_string(format!("Invalid token character: {}", c))) - } -} - -impl Lexer { - /// Handle self.state == State::Initial - fn state_initial(&mut self, c: char) -> StateResult { - if c.is_left_paren() { - return self.token_result(Token::LeftParen); - } - else if c.is_right_paren() { - return self.token_result(Token::RightParen); - } - else if c.is_dot() { - self.state = State::Dot; - self.advance(); - } - else if c.is_hash() { - self.state = State::Hash; - self.advance(); - } - else if c.is_quote() { - return self.token_result(Token::Quote); - } - else if c.is_string_quote() { - self.string_value = String::from(""); - self.state = State::String; - self.advance(); - } - - else if let Some(sign) = Sign::from_char(c) { - self.number_builder = NumberBuilder::new(); - self.number_builder.sign(sign); - self.state = State::Sign; - self.advance(); - } - else if c.is_identifier_initial() { - self.state = State::Id; - self.advance(); - } - - else if c.is_digit(10) { - self.number_builder = NumberBuilder::new(); - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - - else if c.is_whitespace() { - if c.is_newline() { - self.handle_newline(); - } - self.advance_begin(); - } - - else if c.is_comment_initial() { - self.state = State::Comment; - self.advance(); - } - - else { - return self.generic_error(c); - } - - Ok(None) - } - - /// Handle self.state == State::Id - fn state_identifier(&mut self, c: char) -> StateResult { - if c.is_identifier_subsequent() { - // Stay in Id state. - self.advance(); - } - else if c.is_identifier_delimiter() { - let value = self.value(); - self.retract(); - return self.token_result(Token::Id(value)); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - /// Handle self.state == State::Char - fn state_char(&mut self, c: char) -> StateResult { - self.advance(); - let lower_c = c.to_lowercase().collect::(); - let mut candidates: HashSet<&str> = HashSet::new(); - for c in names::set().iter() { - if c.starts_with(&lower_c) { - candidates.insert(c); - } - } - if candidates.len() > 0 { - self.state = State::NamedChar(candidates, lower_c); - } else { - return self.token_result(Token::Character(Object::Char(c))); - } - Ok(None) - } - - /// Handle self.state == State::NamedChar - fn state_named_char(&mut self, c: char) -> StateResult { - let (candidates, mut progress) = match self.state { - State::NamedChar(ref candidates, ref progress) => (candidates.clone(), progress.clone()), - _ => panic!("Called state_named_char without being in NamedChar state") - }; - - if c.is_identifier_delimiter() || c.is_eof() { - if progress.len() == 1 { - self.retract(); - let token_char = Object::Char(progress.chars().next().unwrap()); - return self.token_result(Token::Character(token_char)); - } - else { - return self.generic_error(c); - } - } - - progress.push(c); - - let candidates: HashSet<&str> = { - let filtered = candidates.iter().filter(|c| c.starts_with(&progress)).map(|c| *c); - filtered.collect() - }; - - if candidates.len() == 1 { - let candidate = *candidates.iter().next().unwrap(); - if candidate == &progress { - let token_char = Object::from_char_named(&progress); - self.token_result(Token::Character(token_char)) - } - else { - self.state = State::NamedChar(candidates, progress); - self.advance(); - Ok(None) - } - } - else if candidates.len() > 1 { - self.state = State::NamedChar(candidates, progress); - self.advance(); - Ok(None) - } - else { - self.generic_error(c) - } - } - - /// Handle self.state == State::Dot - fn state_dot(&mut self, c: char) -> StateResult { - if c.is_identifier_delimiter() { - self.retract(); - return self.token_result(Token::Dot); - } - else if c.is_digit(10) { - self.number_builder = NumberBuilder::new(); - self.number_builder.extend_decimal_value(c); - self.state = State::NumberDecimal; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - /// Handle self.state == State::Hash - fn state_hash(&mut self, c: char) -> StateResult { - if c.is_boolean_true() || c.is_boolean_false() { - self.advance(); - let token_bool = Object::Bool(c.is_boolean_true()); - return self.token_result(Token::Boolean(token_bool)); - } - else if c.is_left_paren() { - self.advance(); - return self.token_result(Token::LeftVectorParen); - } - else if c.is_character_leader() { - self.state = State::Char; - self.advance(); - } - else if let Some(radix) = Radix::from_char(c) { - self.number_builder.radix(radix); - self.state = State::NumberRadix; - self.advance(); - } - else if let Some(exactness) = Exact::from_char(c) { - self.number_builder.exact(exactness); - self.state = State::NumberExact; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - /// Handle self.state == State::Number - fn state_number(&mut self, c: char) -> StateResult { - if c.is_digit(self.number_builder.radix_value()) { - self.number_builder.extend_value(c); - self.advance(); - } - else if c.is_dot() { - self.state = State::NumberDecimal; - self.advance(); - } - else if c.is_identifier_delimiter() { - self.retract(); - return self.token_result(Token::Number(self.number_builder.resolve())); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_number_exactness(&mut self, c: char) -> StateResult { - if c.is_hash() { - self.state = State::Hash; - self.advance(); - } - else if let Some(sign) = Sign::from_char(c) { - self.number_builder.sign(sign); - self.state = State::NumberSign; - self.advance(); - } - else if c.is_digit(self.number_builder.radix_value()) { - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_number_decimal(&mut self, c: char) -> StateResult { - if c.is_digit(Radix::Dec.value()) { - self.number_builder.extend_decimal_value(c); - self.advance(); - } - else if c.is_identifier_delimiter() { - self.retract(); - return self.token_result(Token::Number(self.number_builder.resolve())); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_number_radix(&mut self, c: char) -> StateResult { - if c.is_digit(self.number_builder.radix_value()) { - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - else if c.is_dot() { - self.state = State::NumberDecimal; - self.advance(); - } - else if c.is_hash() { - self.state = State::Hash; - self.advance(); - } - else if let Some(sign) = Sign::from_char(c) { - self.number_builder.sign(sign); - self.state = State::NumberSign; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_number_sign(&mut self, c: char) -> StateResult { - if c.is_digit(self.number_builder.radix_value()) { - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - else if c.is_dot() { - self.state = State::NumberDecimal; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_sign(&mut self, c: char) -> StateResult { - if c.is_digit(Radix::Dec.value()) { - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - else if c.is_identifier_delimiter() { - let value = self.value(); - self.retract(); - return self.token_result(Token::Id(value)); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_string(&mut self, c: char) -> StateResult { - self.advance(); - if c.is_string_quote() { - return self.token_result(Token::String(self.string_value.clone())); - } - else if c.is_string_escape_leader() { - self.state = State::StringEscape; - } - else { - self.string_value.push(c); - } - Ok(None) - } - - fn state_string_escape(&mut self, c: char) -> StateResult { - let char_to_push = match c { - '0' => '\0', - 'n' => '\n', - 't' => '\t', - '"' => '"', - '\\' => '\\', - _ => return Err(self.error_string(format!("Invalid string escape character: {}", c))), - }; - self.string_value.push(char_to_push); - self.state = State::String; - self.advance(); - Ok(None) - } - - fn state_comment(&mut self, c: char) -> StateResult { - if c.is_newline() { - self.handle_newline(); - return self.token_result(Token::Comment(self.value())); - } - else if c.is_eof() { - return self.token_result(Token::Comment(self.value())); - } - self.advance(); - Ok(None) - } -} - -impl Iterator for Lexer { - type Item = Lex; - - fn next(&mut self) -> Option { - self.begin_lexing(); - if self.begin == self.input.len() { - return None; - } - let mut token: Option = None; - println!("Lexing '{}'", &self.input[self.begin ..]); - while token.is_none() { - let c = match self.input.char_at(self.forward) { - Some(c) => c, - None => '\0', - }; - println!("state={:?} c='{}'", self.state, c); - let previous_forward = self.forward; - let result = match self.state { - State::Char=> self.state_char(c), - State::NamedChar(_, _) => self.state_named_char(c), - State::Comment => self.state_comment(c), - State::Dot => self.state_dot(c), - State::Hash => self.state_hash(c), - State::Id => self.state_identifier(c), - State::Initial => self.state_initial(c), - State::Number => self.state_number(c), - State::NumberDecimal => self.state_number_decimal(c), - State::NumberExact => self.state_number_exactness(c), - State::NumberRadix => self.state_number_radix(c), - State::NumberSign => self.state_number_sign(c), - State::Sign => self.state_sign(c), - State::String => self.state_string(c), - State::StringEscape => self.state_string_escape(c), - }; - debug_assert!(result.has_token() || self.forward != previous_forward, "No lexing progress made!"); - if result.has_token() { - token = result.ok().unwrap(); - } - else if result.is_err() { - assert!(false, "{}", result.err().unwrap()); - } - } - self.advance_begin(); - match token { - Some(t) => Some(Lex::new(t, self.line, self.line_offset)), - None => None, - } - } -} - -impl HasResult for StateResult { - fn has_token(&self) -> bool { - match *self { - Ok(ref token) => match *token { - Some(_) => true, - None => false, - }, - Err(_) => false - } - } -} diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 0a35bae..e69de29 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -1,183 +0,0 @@ -extern crate sibiltypes; - -mod char; -mod charset; -mod lexer; -mod number; -mod str; -mod token; - -pub use lexer::Lexer; -pub use token::Token; - -pub fn lex(input: &str) -> Lexer { - Lexer::new(&input) -} - -#[cfg(test)] -mod tests { - use sibiltypes::{Bool, Char, Number}; - use std::iter::Iterator; - use super::lex; - use lexer::Lexer; - use token::Token; - - #[test] - fn finds_parens() { - check_single_token("(", Token::LeftParen); - check_single_token(")", Token::RightParen); - check_single_token("#(", Token::LeftVectorParen); - } - - #[test] - fn finds_characters() { - check_single_token("#\\a", Token::Character(Char('a'))); - check_single_token("#\\n", Token::Character(Char('n'))); - check_single_token("#\\s", Token::Character(Char('s'))); - } - - #[test] - fn finds_named_characters() { - check_single_token("#\\newline", Token::Character(Char('\n'))); - check_single_token("#\\null", Token::Character(Char('\0'))); - check_single_token("#\\space", Token::Character(Char(' '))); - } - - #[test] - fn finds_dots() { - check_single_token(".", Token::Dot); - - let mut lexer = Lexer::new("abc . abc"); - assert_next_token(&mut lexer, &Token::Id(String::from("abc"))); - assert_next_token(&mut lexer, &Token::Dot); - assert_next_token(&mut lexer, &Token::Id(String::from("abc"))); - } - - #[test] - fn finds_identifiers() { - let tok = |s: &str| { check_single_token(s, Token::Id(String::from(s))); }; - tok("abc"); - tok("number?"); - tok("+"); - tok("-"); - } - - #[test] - fn finds_booleans() { - check_single_token("#t", Token::Boolean(Bool(true))); - check_single_token("#f", Token::Boolean(Bool(false))); - } - - #[test] - fn finds_comments() { - let s = "; a comment"; - check_single_token(s, Token::Comment(String::from(s))); - } - - #[test] - fn finds_escaped_characters_in_strings() { - check_single_token("\"\\\\\"", Token::String(String::from("\\"))); - check_single_token("\"\\\"\"", Token::String(String::from("\""))); - check_single_token("\"\\n\"", Token::String(String::from("\n"))); - } - - #[test] - fn finds_numbers() { - check_single_token("34", Token::Number(Number::from_int(34, true))); - check_single_token(".34", Token::Number(Number::from_float(0.34, false))); - check_single_token("0.34", Token::Number(Number::from_float(0.34, false))); - } - - #[test] - fn finds_rational_numbers() { - check_single_token("3/2", Token::Number(Number::from_quotient(3, 2, true))); - check_single_token("-3/2", Token::Number(Number::from_quotient(-3, 2, true))); - } - - #[test] - fn finds_negative_numbers() { - check_single_token("-3", Token::Number(Number::from_int(-3, true))); - check_single_token("-0", Token::Number(Number::from_int(-0, true))); - check_single_token("-0.56", Token::Number(Number::from_float(-0.56, false))); - check_single_token("-3.14159", Token::Number(Number::from_float(-3.14159, false))); - } - - #[test] - fn finds_bin_numbers() { - check_single_token("#b0", Token::Number(Number::from_int(0b0, true))); - check_single_token("#b01011", Token::Number(Number::from_int(0b01011, true))); - } - - #[test] - fn finds_dec_numbers() { - check_single_token("34", Token::Number(Number::from_int(34, true))); - check_single_token("#d89", Token::Number(Number::from_int(89, true))); - } - - #[test] - fn finds_oct_numbers() { - check_single_token("#o45", Token::Number(Number::from_int(0o45, true))); - } - - #[test] - fn finds_exact_numbers() { - check_single_token("#e45", Token::Number(Number::from_int(45, true))); - check_single_token("#e-45", Token::Number(Number::from_int(-45, true))); - check_single_token("#e4.5", Token::Number(Number::from_float(4.5, true))); - } - - #[test] - fn finds_hex_numbers() { - check_single_token("#h4A65", Token::Number(Number::from_int(0x4A65, true))); - } - - #[test] - fn finds_quote() { - check_single_token("'", Token::Quote); - } - - #[test] - fn finds_strings() { - check_single_token("\"\"", Token::String(String::from(""))); - check_single_token("\"abc\"", Token::String(String::from("abc"))); - } - - #[test] - fn lexes_simple_expression() { - check_tokens("(+ 3.4 6.8)", vec![ - Token::LeftParen, - Token::Id(String::from("+")), - Token::Number(Number::from_float(3.4, false)), - Token::Number(Number::from_float(6.8, false)), - Token::RightParen]); - } - - #[test] - fn lexes_quoted_identifier() { - check_tokens("'abc", vec![Token::Quote, Token::Id(String::from("abc"))]); - } - - fn check_single_token(input: &str, expected: Token) { - let mut lexer = Lexer::new(input); - assert_next_token(&mut lexer, &expected); - } - - fn check_tokens(input: &str, expected: Vec) { - let lexer = lex(input); - let mut expected_iter = expected.iter(); - for lex in lexer { - if let Some(expected_token) = expected_iter.next() { - assert_eq!(lex.token, *expected_token); - } - else { - assert!(false, "Found a token we didn't expect: {:?}", lex.token); - } - } - // TODO: Check that all expected tokens are consumed. - } - - fn assert_next_token(lexer: &mut Lexer, expected: &Token) { - let lex = lexer.next().unwrap(); - assert_eq!(lex.token, *expected); - } -} diff --git a/lexer/src/number.rs b/lexer/src/number.rs deleted file mode 100644 index 5051a5c..0000000 --- a/lexer/src/number.rs +++ /dev/null @@ -1,176 +0,0 @@ -/* number.rs - * Eryn Wells - */ - -use sibiltypes::Object; -use sibiltypes::number::{Number, Exact}; -use char::FromChar; - -#[derive(Debug)] -pub enum Radix { Bin, Oct, Dec, Hex } - -#[derive(Eq, PartialEq, Debug)] -pub enum Sign { Pos, Neg } - -#[derive(Debug)] -pub struct NumberBuilder { - exact: Exact, - radix: Radix, - sign: Sign, - value: f64, - point: u32, -} - -impl NumberBuilder { - pub fn new() -> NumberBuilder { - NumberBuilder { - exact: Exact::Yes, - radix: Radix::Dec, - sign: Sign::Pos, - value: 0.0, - point: 0, - } - } - - pub fn exact<'a>(&'a mut self, ex: Exact) -> &'a mut NumberBuilder { - self.exact = ex; - self - } - - pub fn radix<'a>(&'a mut self, r: Radix) -> &'a mut NumberBuilder { - self.radix = r; - self - } - - pub fn sign<'a>(&'a mut self, s: Sign) -> &'a mut NumberBuilder { - self.sign = s; - self - } - - pub fn extend_value<'a>(&'a mut self, digit: char) -> &'a mut Self { - if let Some(place) = NumberBuilder::place_value(digit) { - self.value = self.radix.float_value() * self.value + place; - } - else { - // TODO: Indicate an error. - } - self - } - - pub fn extend_decimal_value<'a>(&'a mut self, digit: char) -> &'a mut Self { - self.extend_value(digit); - self.point += 1; - self - } - - pub fn resolve(&self) -> Number { - // TODO: Convert fields to Number type. - let value = if self.point > 0 { self.value / 10u32.pow(self.point) as f64 } else { self.value }; - let value = if self.sign == Sign::Neg { value * -1.0 } else { value }; - // TODO: Use an integer if we can. - Number::from_float(value, self.exact) - } - - pub fn radix_value(&self) -> u32 { - self.radix.value() - } - - fn place_value(digit: char) -> Option { - match digit { - '0' ... '9' => Some((digit as u32 - '0' as u32) as f64), - 'a' ... 'f' => Some((digit as u32 - 'a' as u32 + 10) as f64), - 'A' ... 'F' => Some((digit as u32 - 'A' as u32 + 10) as f64), - _ => None, - } - } -} - -impl Radix { - pub fn value(&self) -> u32 { - match *self { - Radix::Bin => 2, - Radix::Oct => 8, - Radix::Dec => 10, - Radix::Hex => 16, - } - } - - pub fn float_value(&self) -> f64 { - self.value() as f64 - } -} - -impl FromChar for Radix { - fn from_char(c: char) -> Option { - match c { - 'b' => Some(Radix::Bin), - 'o' => Some(Radix::Oct), - 'd' => Some(Radix::Dec), - 'h' => Some(Radix::Hex), - _ => None, - } - } -} - -impl FromChar for Sign { - fn from_char(c: char) -> Option { - match c { - '+' => Some(Sign::Pos), - '-' => Some(Sign::Neg), - _ => None, - } - } -} - -impl FromChar for Exact { - fn from_char(c: char) -> Option { - match c { - 'i' => Some(Exact::No), - 'e' => Some(Exact::Yes), - _ => None, - } - } -} - -#[cfg(test)] -mod tests { - use sibiltypes::Number; - use super::*; - - #[test] - fn builds_integers() { - let mut b = NumberBuilder::new(); - b.extend_value('3'); - assert_eq!(b.resolve(), Number::from_int(3, true)); - b.extend_value('4'); - assert_eq!(b.resolve(), Number::from_int(34, true)); - } - - #[test] - fn builds_negative_integers() { - let num = NumberBuilder::new().sign(Sign::Neg).extend_value('3').resolve(); - assert_eq!(num, Number::from_int(-3, true)); - } - - #[test] - fn builds_pointy_numbers() { - let mut b = NumberBuilder::new(); - b.extend_value('5'); - assert_eq!(b.resolve(), Number::from_int(5, true)); - b.extend_decimal_value('3'); - assert_eq!(b.resolve(), Number::from_float(5.3, false)); - b.extend_decimal_value('4'); - assert_eq!(b.resolve(), Number::from_float(5.34, false)); - } - - #[test] - fn builds_hex() { - let mut b = NumberBuilder::new(); - b.radix(Radix::Hex).extend_value('4'); - assert_eq!(b.resolve(), Number::from_int(0x4, true)); - b.extend_value('A'); - assert_eq!(b.resolve(), Number::from_int(0x4A, true)); - b.extend_value('6'); - assert_eq!(b.resolve(), Number::from_int(0x4A6, true)); - } -} diff --git a/lexer/src/str.rs b/lexer/src/str.rs deleted file mode 100644 index 76ed1a9..0000000 --- a/lexer/src/str.rs +++ /dev/null @@ -1,103 +0,0 @@ -/* str.rs - * Eryn Wells - */ - -pub trait RelativeIndexable { - /// Get the index of the character boundary preceding the given index. The index does not need to be on a character - /// boundary. - fn index_before(&self, usize) -> usize; - - /// Get the index of the character boundary following the given index. The index does not need to be on a character - /// boundary. - fn index_after(&self, usize) -> usize; -} - -pub trait CharAt { - /// Get the character at the given byte index. This index must be at a character boundary as defined by - /// `is_char_boundary()`. - fn char_at(&self, usize) -> Option; -} - -impl RelativeIndexable for str { - fn index_before(&self, index: usize) -> usize { - if index == 0 { - return 0; - } - let mut index = index; - if index > self.len() { - index = self.len(); - } - loop { - index -= 1; - if self.is_char_boundary(index) { - break; - } - } - index - } - - fn index_after(&self, index: usize) -> usize { - if index >= self.len() { - return self.len(); - } - let mut index = index; - loop { - index += 1; - if self.is_char_boundary(index) { - break; - } - } - index - } -} - -impl CharAt for str { - fn char_at(&self, index: usize) -> Option { - if !self.is_char_boundary(index) { - return None; - } - let end = self.index_after(index); - let char_str = &self[index .. end]; - char_str.chars().nth(0) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn index_before_is_well_behaved_for_ascii() { - let s = "abc"; - - // Sanity - assert_eq!(s.index_before(0), 0); - assert_eq!(s.index_before(2), 1); - - // An index beyond the string bounds returns the index of the last character in the string. - { - let idx = s.index_before(4); - assert_eq!(idx, 2); - assert!(s.is_char_boundary(idx)); - let last_char = &s[idx ..]; - assert_eq!(last_char.len(), 1); - assert_eq!(last_char.chars().nth(0), Some('c')); - } - } - - #[test] - fn index_after_is_well_behaved_for_ascii() { - let s = "abc"; - - // Sanity - assert_eq!(s.index_after(0), 1); - assert_eq!(s.index_after(2), 3); - - // An index beyond the string bounds returns the length of the string - { - let idx = s.index_after(4); - assert_eq!(idx, s.len()); - assert!(s.is_char_boundary(idx)); - } - } -} diff --git a/lexer/src/token.rs b/lexer/src/token.rs deleted file mode 100644 index 88a1867..0000000 --- a/lexer/src/token.rs +++ /dev/null @@ -1,39 +0,0 @@ -/* token.rs - * Eryn Wells - */ - -use sibiltypes::Object; - -#[derive(Debug, PartialEq)] -pub enum Token { - Boolean(Object), - Character(Object), - Comment(Object), - Dot, - Id(Object), - LeftParen, - LeftVectorParen, - Number(Object), - Quote, - RightParen, - String(Object), -} - -/// A Lex is a Token extracted from a specific position in an input string. It contains useful -/// information about the token's place in that input. -#[derive(Debug)] -pub struct Lex { - token: Token, - line: usize, - offset: usize, -} - -impl Lex { - pub fn new(token: Token, line: usize, offset: usize) -> Lex { - Lex { token: token, line: line, offset: offset } - } - - pub fn token(&self) -> &Token { &self.token } - pub fn line(&self) -> usize { self.line } - pub fn offset(&self) -> usize { self.offset } -}