From a4282e77604984aa8ad526be4de79999bf67f934 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 30 Apr 2017 16:32:31 -0700 Subject: [PATCH 01/25] Blow away the lexer code =o --- lexer/src/char.rs | 108 --------- lexer/src/charset.rs | 43 ---- lexer/src/lexer.rs | 526 ------------------------------------------- lexer/src/lib.rs | 183 --------------- lexer/src/number.rs | 176 --------------- lexer/src/str.rs | 103 --------- lexer/src/token.rs | 39 ---- 7 files changed, 1178 deletions(-) delete mode 100644 lexer/src/char.rs delete mode 100644 lexer/src/charset.rs delete mode 100644 lexer/src/lexer.rs delete mode 100644 lexer/src/number.rs delete mode 100644 lexer/src/str.rs delete mode 100644 lexer/src/token.rs diff --git a/lexer/src/char.rs b/lexer/src/char.rs deleted file mode 100644 index 06fdd82..0000000 --- a/lexer/src/char.rs +++ /dev/null @@ -1,108 +0,0 @@ -/* char.rs - * Eryn Wells - */ - -use std::marker::Sized; -use charset; - -pub trait FromChar { - fn from_char(c: char) -> Option where Self: Sized; -} - -pub trait Lexable { - fn is_character_leader(&self) -> bool; - fn is_dot(&self) -> bool; - fn is_hash(&self) -> bool; - fn is_quote(&self) -> bool; - fn is_left_paren(&self) -> bool; - fn is_right_paren(&self) -> bool; - fn is_string_quote(&self) -> bool; - fn is_string_escape_leader(&self) -> bool; - fn is_string_escaped(&self) -> bool; - fn is_newline(&self) -> bool; - fn is_eof(&self) -> bool; - - fn is_identifier_initial(&self) -> bool; - fn is_identifier_subsequent(&self) -> bool; - fn is_identifier_delimiter(&self) -> bool; - - fn is_boolean_true(&self) -> bool; - fn is_boolean_false(&self) -> bool; - - fn is_comment_initial(&self) -> bool; -} - -impl Lexable for char { - fn is_left_paren(&self) -> bool { - *self == '(' - } - - fn is_right_paren(&self) -> bool { - *self == ')' - } - - fn is_character_leader(&self) -> bool { - *self == '\\' - } - - fn is_dot(&self) -> bool { - *self == '.' - } - - fn is_hash(&self) -> bool { - *self == '#' - } - - fn is_quote(&self) -> bool { - *self == '\'' - } - - fn is_string_quote(&self) -> bool { - *self == '"' - } - - fn is_string_escape_leader(&self) -> bool { - *self == '\\' - } - - fn is_string_escaped(&self) -> bool { - *self == '"' || *self == '\\' - } - - fn is_boolean_true(&self) -> bool { - *self == 't' - } - - fn is_boolean_false(&self) -> bool { - *self == 'f' - } - - fn is_newline(&self) -> bool { - *self == '\n' - } - - fn is_eof(&self) -> bool { - *self == '\0' - } - - fn is_comment_initial(&self) -> bool { - *self == ';' - } - - fn is_identifier_initial(&self) -> bool { - charset::identifier_initials().contains(&self) - } - - fn is_identifier_subsequent(&self) -> bool { - charset::identifier_subsequents().contains(&self) - } - - fn is_identifier_delimiter(&self) -> bool { - self.is_whitespace() - || self.is_comment_initial() - || self.is_left_paren() - || self.is_right_paren() - || self.is_string_quote() - || self.is_eof() - } -} diff --git a/lexer/src/charset.rs b/lexer/src/charset.rs deleted file mode 100644 index 63aaaaf..0000000 --- a/lexer/src/charset.rs +++ /dev/null @@ -1,43 +0,0 @@ -/* charset.rs - * Eryn Wells - */ - -use std::collections::HashSet; -use std::iter::FromIterator; - -pub type CharSet = HashSet; - -// TODO: Use std::sync::Once for these sets? -// https://doc.rust-lang.org/beta/std/sync/struct.Once.html - -fn ascii_letters() -> CharSet { - let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".chars(); - CharSet::from_iter(letters) -} - -fn ascii_digits() -> CharSet { - let digits = "1234567890".chars(); - CharSet::from_iter(digits) -} - -/// A set of all characters allowed to start Scheme identifiers. -pub fn identifier_initials() -> CharSet { - let letters = ascii_letters(); - let extras = CharSet::from_iter("!$%&*/:<=>?~_^".chars()); - let mut initials = CharSet::new(); - initials.extend(letters.iter()); - initials.extend(extras.iter()); - initials -} - -/// A set of all characters allowed to follow an identifier initial. -pub fn identifier_subsequents() -> CharSet { - let initials = identifier_initials(); - let digits = ascii_digits(); - let extras = CharSet::from_iter(".+-".chars()); - let mut subsequents = CharSet::new(); - subsequents.extend(initials.iter()); - subsequents.extend(digits.iter()); - subsequents.extend(extras.iter()); - subsequents -} diff --git a/lexer/src/lexer.rs b/lexer/src/lexer.rs deleted file mode 100644 index 3b8940e..0000000 --- a/lexer/src/lexer.rs +++ /dev/null @@ -1,526 +0,0 @@ -/* lexer.rs - * Eryn Wells - */ - -use std::collections::HashSet; -use sibiltypes::Object; -use sibiltypes::number::Exact; - -use char::{FromChar, Lexable}; -use number::{NumberBuilder, Radix, Sign}; -use str::{CharAt, RelativeIndexable}; -use token::{Lex, Token}; - -type StateResult = Result, String>; - -trait HasResult { - fn has_token(&self) -> bool; -} - -#[derive(Debug)] -enum State { - Char, - NamedChar(HashSet<&'static str>, String), - Comment, - Initial, - Id, - Dot, - Hash, - Number, - NumberExact, - NumberDecimal, - NumberRadix, - NumberSign, - Sign, - String, - StringEscape, -} - -pub struct Lexer { - input: String, - begin: usize, - forward: usize, - line: usize, - line_offset: usize, - state: State, - number_builder: NumberBuilder, - string_value: String, -} - -impl Lexer { - pub fn new(input: &str) -> Lexer { - Lexer { - input: String::from(input), - begin: 0, - forward: 0, - line: 1, - line_offset: 1, - state: State::Initial, - number_builder: NumberBuilder::new(), - string_value: String::new(), - } - } -} - -impl Lexer { - fn begin_lexing(&mut self) { - self.forward = self.begin; - self.state = State::Initial; - } - - /// Advance the forward pointer to the next character. - fn advance(&mut self) { - self.forward = self.input.index_after(self.forward); - self.line_offset += 1; - println!("> forward={}", self.forward); - } - - /// Retract the forward pointer to the previous character. - fn retract(&mut self) { - self.forward = self.input.index_before(self.forward); - self.line_offset -= 1; - println!("< forward={}", self.forward); - } - - /// Advance the begin pointer to prepare for the next iteration. - fn advance_begin(&mut self) { - self.begin = self.input.index_after(self.forward); - self.forward = self.begin; - println!("> begin={}, forward={}", self.begin, self.forward); - } - - /// Update lexer state when it encounters a newline. - fn handle_newline(&mut self) { - self.line += 1; - self.line_offset = 1; - } - - /// Get the substring between the two input indexes. This is the value to give to a new Token instance. - fn value(&self) -> String { - self.input[self.begin .. self.forward].to_string() - } - - fn error_string(&self, message: String) -> String { - format!("{}:{}: {}", self.line, self.line_offset, message) - } - - fn token_result(&self, token: Token) -> StateResult { - Ok(Some(token)) - } - - fn generic_error(&self, c: char) -> StateResult { - Err(self.error_string(format!("Invalid token character: {}", c))) - } -} - -impl Lexer { - /// Handle self.state == State::Initial - fn state_initial(&mut self, c: char) -> StateResult { - if c.is_left_paren() { - return self.token_result(Token::LeftParen); - } - else if c.is_right_paren() { - return self.token_result(Token::RightParen); - } - else if c.is_dot() { - self.state = State::Dot; - self.advance(); - } - else if c.is_hash() { - self.state = State::Hash; - self.advance(); - } - else if c.is_quote() { - return self.token_result(Token::Quote); - } - else if c.is_string_quote() { - self.string_value = String::from(""); - self.state = State::String; - self.advance(); - } - - else if let Some(sign) = Sign::from_char(c) { - self.number_builder = NumberBuilder::new(); - self.number_builder.sign(sign); - self.state = State::Sign; - self.advance(); - } - else if c.is_identifier_initial() { - self.state = State::Id; - self.advance(); - } - - else if c.is_digit(10) { - self.number_builder = NumberBuilder::new(); - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - - else if c.is_whitespace() { - if c.is_newline() { - self.handle_newline(); - } - self.advance_begin(); - } - - else if c.is_comment_initial() { - self.state = State::Comment; - self.advance(); - } - - else { - return self.generic_error(c); - } - - Ok(None) - } - - /// Handle self.state == State::Id - fn state_identifier(&mut self, c: char) -> StateResult { - if c.is_identifier_subsequent() { - // Stay in Id state. - self.advance(); - } - else if c.is_identifier_delimiter() { - let value = self.value(); - self.retract(); - return self.token_result(Token::Id(value)); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - /// Handle self.state == State::Char - fn state_char(&mut self, c: char) -> StateResult { - self.advance(); - let lower_c = c.to_lowercase().collect::(); - let mut candidates: HashSet<&str> = HashSet::new(); - for c in names::set().iter() { - if c.starts_with(&lower_c) { - candidates.insert(c); - } - } - if candidates.len() > 0 { - self.state = State::NamedChar(candidates, lower_c); - } else { - return self.token_result(Token::Character(Object::Char(c))); - } - Ok(None) - } - - /// Handle self.state == State::NamedChar - fn state_named_char(&mut self, c: char) -> StateResult { - let (candidates, mut progress) = match self.state { - State::NamedChar(ref candidates, ref progress) => (candidates.clone(), progress.clone()), - _ => panic!("Called state_named_char without being in NamedChar state") - }; - - if c.is_identifier_delimiter() || c.is_eof() { - if progress.len() == 1 { - self.retract(); - let token_char = Object::Char(progress.chars().next().unwrap()); - return self.token_result(Token::Character(token_char)); - } - else { - return self.generic_error(c); - } - } - - progress.push(c); - - let candidates: HashSet<&str> = { - let filtered = candidates.iter().filter(|c| c.starts_with(&progress)).map(|c| *c); - filtered.collect() - }; - - if candidates.len() == 1 { - let candidate = *candidates.iter().next().unwrap(); - if candidate == &progress { - let token_char = Object::from_char_named(&progress); - self.token_result(Token::Character(token_char)) - } - else { - self.state = State::NamedChar(candidates, progress); - self.advance(); - Ok(None) - } - } - else if candidates.len() > 1 { - self.state = State::NamedChar(candidates, progress); - self.advance(); - Ok(None) - } - else { - self.generic_error(c) - } - } - - /// Handle self.state == State::Dot - fn state_dot(&mut self, c: char) -> StateResult { - if c.is_identifier_delimiter() { - self.retract(); - return self.token_result(Token::Dot); - } - else if c.is_digit(10) { - self.number_builder = NumberBuilder::new(); - self.number_builder.extend_decimal_value(c); - self.state = State::NumberDecimal; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - /// Handle self.state == State::Hash - fn state_hash(&mut self, c: char) -> StateResult { - if c.is_boolean_true() || c.is_boolean_false() { - self.advance(); - let token_bool = Object::Bool(c.is_boolean_true()); - return self.token_result(Token::Boolean(token_bool)); - } - else if c.is_left_paren() { - self.advance(); - return self.token_result(Token::LeftVectorParen); - } - else if c.is_character_leader() { - self.state = State::Char; - self.advance(); - } - else if let Some(radix) = Radix::from_char(c) { - self.number_builder.radix(radix); - self.state = State::NumberRadix; - self.advance(); - } - else if let Some(exactness) = Exact::from_char(c) { - self.number_builder.exact(exactness); - self.state = State::NumberExact; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - /// Handle self.state == State::Number - fn state_number(&mut self, c: char) -> StateResult { - if c.is_digit(self.number_builder.radix_value()) { - self.number_builder.extend_value(c); - self.advance(); - } - else if c.is_dot() { - self.state = State::NumberDecimal; - self.advance(); - } - else if c.is_identifier_delimiter() { - self.retract(); - return self.token_result(Token::Number(self.number_builder.resolve())); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_number_exactness(&mut self, c: char) -> StateResult { - if c.is_hash() { - self.state = State::Hash; - self.advance(); - } - else if let Some(sign) = Sign::from_char(c) { - self.number_builder.sign(sign); - self.state = State::NumberSign; - self.advance(); - } - else if c.is_digit(self.number_builder.radix_value()) { - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_number_decimal(&mut self, c: char) -> StateResult { - if c.is_digit(Radix::Dec.value()) { - self.number_builder.extend_decimal_value(c); - self.advance(); - } - else if c.is_identifier_delimiter() { - self.retract(); - return self.token_result(Token::Number(self.number_builder.resolve())); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_number_radix(&mut self, c: char) -> StateResult { - if c.is_digit(self.number_builder.radix_value()) { - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - else if c.is_dot() { - self.state = State::NumberDecimal; - self.advance(); - } - else if c.is_hash() { - self.state = State::Hash; - self.advance(); - } - else if let Some(sign) = Sign::from_char(c) { - self.number_builder.sign(sign); - self.state = State::NumberSign; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_number_sign(&mut self, c: char) -> StateResult { - if c.is_digit(self.number_builder.radix_value()) { - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - else if c.is_dot() { - self.state = State::NumberDecimal; - self.advance(); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_sign(&mut self, c: char) -> StateResult { - if c.is_digit(Radix::Dec.value()) { - self.number_builder.extend_value(c); - self.state = State::Number; - self.advance(); - } - else if c.is_identifier_delimiter() { - let value = self.value(); - self.retract(); - return self.token_result(Token::Id(value)); - } - else { - return self.generic_error(c); - } - Ok(None) - } - - fn state_string(&mut self, c: char) -> StateResult { - self.advance(); - if c.is_string_quote() { - return self.token_result(Token::String(self.string_value.clone())); - } - else if c.is_string_escape_leader() { - self.state = State::StringEscape; - } - else { - self.string_value.push(c); - } - Ok(None) - } - - fn state_string_escape(&mut self, c: char) -> StateResult { - let char_to_push = match c { - '0' => '\0', - 'n' => '\n', - 't' => '\t', - '"' => '"', - '\\' => '\\', - _ => return Err(self.error_string(format!("Invalid string escape character: {}", c))), - }; - self.string_value.push(char_to_push); - self.state = State::String; - self.advance(); - Ok(None) - } - - fn state_comment(&mut self, c: char) -> StateResult { - if c.is_newline() { - self.handle_newline(); - return self.token_result(Token::Comment(self.value())); - } - else if c.is_eof() { - return self.token_result(Token::Comment(self.value())); - } - self.advance(); - Ok(None) - } -} - -impl Iterator for Lexer { - type Item = Lex; - - fn next(&mut self) -> Option { - self.begin_lexing(); - if self.begin == self.input.len() { - return None; - } - let mut token: Option = None; - println!("Lexing '{}'", &self.input[self.begin ..]); - while token.is_none() { - let c = match self.input.char_at(self.forward) { - Some(c) => c, - None => '\0', - }; - println!("state={:?} c='{}'", self.state, c); - let previous_forward = self.forward; - let result = match self.state { - State::Char=> self.state_char(c), - State::NamedChar(_, _) => self.state_named_char(c), - State::Comment => self.state_comment(c), - State::Dot => self.state_dot(c), - State::Hash => self.state_hash(c), - State::Id => self.state_identifier(c), - State::Initial => self.state_initial(c), - State::Number => self.state_number(c), - State::NumberDecimal => self.state_number_decimal(c), - State::NumberExact => self.state_number_exactness(c), - State::NumberRadix => self.state_number_radix(c), - State::NumberSign => self.state_number_sign(c), - State::Sign => self.state_sign(c), - State::String => self.state_string(c), - State::StringEscape => self.state_string_escape(c), - }; - debug_assert!(result.has_token() || self.forward != previous_forward, "No lexing progress made!"); - if result.has_token() { - token = result.ok().unwrap(); - } - else if result.is_err() { - assert!(false, "{}", result.err().unwrap()); - } - } - self.advance_begin(); - match token { - Some(t) => Some(Lex::new(t, self.line, self.line_offset)), - None => None, - } - } -} - -impl HasResult for StateResult { - fn has_token(&self) -> bool { - match *self { - Ok(ref token) => match *token { - Some(_) => true, - None => false, - }, - Err(_) => false - } - } -} diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 0a35bae..e69de29 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -1,183 +0,0 @@ -extern crate sibiltypes; - -mod char; -mod charset; -mod lexer; -mod number; -mod str; -mod token; - -pub use lexer::Lexer; -pub use token::Token; - -pub fn lex(input: &str) -> Lexer { - Lexer::new(&input) -} - -#[cfg(test)] -mod tests { - use sibiltypes::{Bool, Char, Number}; - use std::iter::Iterator; - use super::lex; - use lexer::Lexer; - use token::Token; - - #[test] - fn finds_parens() { - check_single_token("(", Token::LeftParen); - check_single_token(")", Token::RightParen); - check_single_token("#(", Token::LeftVectorParen); - } - - #[test] - fn finds_characters() { - check_single_token("#\\a", Token::Character(Char('a'))); - check_single_token("#\\n", Token::Character(Char('n'))); - check_single_token("#\\s", Token::Character(Char('s'))); - } - - #[test] - fn finds_named_characters() { - check_single_token("#\\newline", Token::Character(Char('\n'))); - check_single_token("#\\null", Token::Character(Char('\0'))); - check_single_token("#\\space", Token::Character(Char(' '))); - } - - #[test] - fn finds_dots() { - check_single_token(".", Token::Dot); - - let mut lexer = Lexer::new("abc . abc"); - assert_next_token(&mut lexer, &Token::Id(String::from("abc"))); - assert_next_token(&mut lexer, &Token::Dot); - assert_next_token(&mut lexer, &Token::Id(String::from("abc"))); - } - - #[test] - fn finds_identifiers() { - let tok = |s: &str| { check_single_token(s, Token::Id(String::from(s))); }; - tok("abc"); - tok("number?"); - tok("+"); - tok("-"); - } - - #[test] - fn finds_booleans() { - check_single_token("#t", Token::Boolean(Bool(true))); - check_single_token("#f", Token::Boolean(Bool(false))); - } - - #[test] - fn finds_comments() { - let s = "; a comment"; - check_single_token(s, Token::Comment(String::from(s))); - } - - #[test] - fn finds_escaped_characters_in_strings() { - check_single_token("\"\\\\\"", Token::String(String::from("\\"))); - check_single_token("\"\\\"\"", Token::String(String::from("\""))); - check_single_token("\"\\n\"", Token::String(String::from("\n"))); - } - - #[test] - fn finds_numbers() { - check_single_token("34", Token::Number(Number::from_int(34, true))); - check_single_token(".34", Token::Number(Number::from_float(0.34, false))); - check_single_token("0.34", Token::Number(Number::from_float(0.34, false))); - } - - #[test] - fn finds_rational_numbers() { - check_single_token("3/2", Token::Number(Number::from_quotient(3, 2, true))); - check_single_token("-3/2", Token::Number(Number::from_quotient(-3, 2, true))); - } - - #[test] - fn finds_negative_numbers() { - check_single_token("-3", Token::Number(Number::from_int(-3, true))); - check_single_token("-0", Token::Number(Number::from_int(-0, true))); - check_single_token("-0.56", Token::Number(Number::from_float(-0.56, false))); - check_single_token("-3.14159", Token::Number(Number::from_float(-3.14159, false))); - } - - #[test] - fn finds_bin_numbers() { - check_single_token("#b0", Token::Number(Number::from_int(0b0, true))); - check_single_token("#b01011", Token::Number(Number::from_int(0b01011, true))); - } - - #[test] - fn finds_dec_numbers() { - check_single_token("34", Token::Number(Number::from_int(34, true))); - check_single_token("#d89", Token::Number(Number::from_int(89, true))); - } - - #[test] - fn finds_oct_numbers() { - check_single_token("#o45", Token::Number(Number::from_int(0o45, true))); - } - - #[test] - fn finds_exact_numbers() { - check_single_token("#e45", Token::Number(Number::from_int(45, true))); - check_single_token("#e-45", Token::Number(Number::from_int(-45, true))); - check_single_token("#e4.5", Token::Number(Number::from_float(4.5, true))); - } - - #[test] - fn finds_hex_numbers() { - check_single_token("#h4A65", Token::Number(Number::from_int(0x4A65, true))); - } - - #[test] - fn finds_quote() { - check_single_token("'", Token::Quote); - } - - #[test] - fn finds_strings() { - check_single_token("\"\"", Token::String(String::from(""))); - check_single_token("\"abc\"", Token::String(String::from("abc"))); - } - - #[test] - fn lexes_simple_expression() { - check_tokens("(+ 3.4 6.8)", vec![ - Token::LeftParen, - Token::Id(String::from("+")), - Token::Number(Number::from_float(3.4, false)), - Token::Number(Number::from_float(6.8, false)), - Token::RightParen]); - } - - #[test] - fn lexes_quoted_identifier() { - check_tokens("'abc", vec![Token::Quote, Token::Id(String::from("abc"))]); - } - - fn check_single_token(input: &str, expected: Token) { - let mut lexer = Lexer::new(input); - assert_next_token(&mut lexer, &expected); - } - - fn check_tokens(input: &str, expected: Vec) { - let lexer = lex(input); - let mut expected_iter = expected.iter(); - for lex in lexer { - if let Some(expected_token) = expected_iter.next() { - assert_eq!(lex.token, *expected_token); - } - else { - assert!(false, "Found a token we didn't expect: {:?}", lex.token); - } - } - // TODO: Check that all expected tokens are consumed. - } - - fn assert_next_token(lexer: &mut Lexer, expected: &Token) { - let lex = lexer.next().unwrap(); - assert_eq!(lex.token, *expected); - } -} diff --git a/lexer/src/number.rs b/lexer/src/number.rs deleted file mode 100644 index 5051a5c..0000000 --- a/lexer/src/number.rs +++ /dev/null @@ -1,176 +0,0 @@ -/* number.rs - * Eryn Wells - */ - -use sibiltypes::Object; -use sibiltypes::number::{Number, Exact}; -use char::FromChar; - -#[derive(Debug)] -pub enum Radix { Bin, Oct, Dec, Hex } - -#[derive(Eq, PartialEq, Debug)] -pub enum Sign { Pos, Neg } - -#[derive(Debug)] -pub struct NumberBuilder { - exact: Exact, - radix: Radix, - sign: Sign, - value: f64, - point: u32, -} - -impl NumberBuilder { - pub fn new() -> NumberBuilder { - NumberBuilder { - exact: Exact::Yes, - radix: Radix::Dec, - sign: Sign::Pos, - value: 0.0, - point: 0, - } - } - - pub fn exact<'a>(&'a mut self, ex: Exact) -> &'a mut NumberBuilder { - self.exact = ex; - self - } - - pub fn radix<'a>(&'a mut self, r: Radix) -> &'a mut NumberBuilder { - self.radix = r; - self - } - - pub fn sign<'a>(&'a mut self, s: Sign) -> &'a mut NumberBuilder { - self.sign = s; - self - } - - pub fn extend_value<'a>(&'a mut self, digit: char) -> &'a mut Self { - if let Some(place) = NumberBuilder::place_value(digit) { - self.value = self.radix.float_value() * self.value + place; - } - else { - // TODO: Indicate an error. - } - self - } - - pub fn extend_decimal_value<'a>(&'a mut self, digit: char) -> &'a mut Self { - self.extend_value(digit); - self.point += 1; - self - } - - pub fn resolve(&self) -> Number { - // TODO: Convert fields to Number type. - let value = if self.point > 0 { self.value / 10u32.pow(self.point) as f64 } else { self.value }; - let value = if self.sign == Sign::Neg { value * -1.0 } else { value }; - // TODO: Use an integer if we can. - Number::from_float(value, self.exact) - } - - pub fn radix_value(&self) -> u32 { - self.radix.value() - } - - fn place_value(digit: char) -> Option { - match digit { - '0' ... '9' => Some((digit as u32 - '0' as u32) as f64), - 'a' ... 'f' => Some((digit as u32 - 'a' as u32 + 10) as f64), - 'A' ... 'F' => Some((digit as u32 - 'A' as u32 + 10) as f64), - _ => None, - } - } -} - -impl Radix { - pub fn value(&self) -> u32 { - match *self { - Radix::Bin => 2, - Radix::Oct => 8, - Radix::Dec => 10, - Radix::Hex => 16, - } - } - - pub fn float_value(&self) -> f64 { - self.value() as f64 - } -} - -impl FromChar for Radix { - fn from_char(c: char) -> Option { - match c { - 'b' => Some(Radix::Bin), - 'o' => Some(Radix::Oct), - 'd' => Some(Radix::Dec), - 'h' => Some(Radix::Hex), - _ => None, - } - } -} - -impl FromChar for Sign { - fn from_char(c: char) -> Option { - match c { - '+' => Some(Sign::Pos), - '-' => Some(Sign::Neg), - _ => None, - } - } -} - -impl FromChar for Exact { - fn from_char(c: char) -> Option { - match c { - 'i' => Some(Exact::No), - 'e' => Some(Exact::Yes), - _ => None, - } - } -} - -#[cfg(test)] -mod tests { - use sibiltypes::Number; - use super::*; - - #[test] - fn builds_integers() { - let mut b = NumberBuilder::new(); - b.extend_value('3'); - assert_eq!(b.resolve(), Number::from_int(3, true)); - b.extend_value('4'); - assert_eq!(b.resolve(), Number::from_int(34, true)); - } - - #[test] - fn builds_negative_integers() { - let num = NumberBuilder::new().sign(Sign::Neg).extend_value('3').resolve(); - assert_eq!(num, Number::from_int(-3, true)); - } - - #[test] - fn builds_pointy_numbers() { - let mut b = NumberBuilder::new(); - b.extend_value('5'); - assert_eq!(b.resolve(), Number::from_int(5, true)); - b.extend_decimal_value('3'); - assert_eq!(b.resolve(), Number::from_float(5.3, false)); - b.extend_decimal_value('4'); - assert_eq!(b.resolve(), Number::from_float(5.34, false)); - } - - #[test] - fn builds_hex() { - let mut b = NumberBuilder::new(); - b.radix(Radix::Hex).extend_value('4'); - assert_eq!(b.resolve(), Number::from_int(0x4, true)); - b.extend_value('A'); - assert_eq!(b.resolve(), Number::from_int(0x4A, true)); - b.extend_value('6'); - assert_eq!(b.resolve(), Number::from_int(0x4A6, true)); - } -} diff --git a/lexer/src/str.rs b/lexer/src/str.rs deleted file mode 100644 index 76ed1a9..0000000 --- a/lexer/src/str.rs +++ /dev/null @@ -1,103 +0,0 @@ -/* str.rs - * Eryn Wells - */ - -pub trait RelativeIndexable { - /// Get the index of the character boundary preceding the given index. The index does not need to be on a character - /// boundary. - fn index_before(&self, usize) -> usize; - - /// Get the index of the character boundary following the given index. The index does not need to be on a character - /// boundary. - fn index_after(&self, usize) -> usize; -} - -pub trait CharAt { - /// Get the character at the given byte index. This index must be at a character boundary as defined by - /// `is_char_boundary()`. - fn char_at(&self, usize) -> Option; -} - -impl RelativeIndexable for str { - fn index_before(&self, index: usize) -> usize { - if index == 0 { - return 0; - } - let mut index = index; - if index > self.len() { - index = self.len(); - } - loop { - index -= 1; - if self.is_char_boundary(index) { - break; - } - } - index - } - - fn index_after(&self, index: usize) -> usize { - if index >= self.len() { - return self.len(); - } - let mut index = index; - loop { - index += 1; - if self.is_char_boundary(index) { - break; - } - } - index - } -} - -impl CharAt for str { - fn char_at(&self, index: usize) -> Option { - if !self.is_char_boundary(index) { - return None; - } - let end = self.index_after(index); - let char_str = &self[index .. end]; - char_str.chars().nth(0) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn index_before_is_well_behaved_for_ascii() { - let s = "abc"; - - // Sanity - assert_eq!(s.index_before(0), 0); - assert_eq!(s.index_before(2), 1); - - // An index beyond the string bounds returns the index of the last character in the string. - { - let idx = s.index_before(4); - assert_eq!(idx, 2); - assert!(s.is_char_boundary(idx)); - let last_char = &s[idx ..]; - assert_eq!(last_char.len(), 1); - assert_eq!(last_char.chars().nth(0), Some('c')); - } - } - - #[test] - fn index_after_is_well_behaved_for_ascii() { - let s = "abc"; - - // Sanity - assert_eq!(s.index_after(0), 1); - assert_eq!(s.index_after(2), 3); - - // An index beyond the string bounds returns the length of the string - { - let idx = s.index_after(4); - assert_eq!(idx, s.len()); - assert!(s.is_char_boundary(idx)); - } - } -} diff --git a/lexer/src/token.rs b/lexer/src/token.rs deleted file mode 100644 index 88a1867..0000000 --- a/lexer/src/token.rs +++ /dev/null @@ -1,39 +0,0 @@ -/* token.rs - * Eryn Wells - */ - -use sibiltypes::Object; - -#[derive(Debug, PartialEq)] -pub enum Token { - Boolean(Object), - Character(Object), - Comment(Object), - Dot, - Id(Object), - LeftParen, - LeftVectorParen, - Number(Object), - Quote, - RightParen, - String(Object), -} - -/// A Lex is a Token extracted from a specific position in an input string. It contains useful -/// information about the token's place in that input. -#[derive(Debug)] -pub struct Lex { - token: Token, - line: usize, - offset: usize, -} - -impl Lex { - pub fn new(token: Token, line: usize, offset: usize) -> Lex { - Lex { token: token, line: line, offset: offset } - } - - pub fn token(&self) -> &Token { &self.token } - pub fn line(&self) -> usize { self.line } - pub fn offset(&self) -> usize { self.offset } -} From c5b769ff45db00e7615e1cc02ae1f78c739b98bf Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 30 Apr 2017 17:46:42 -0700 Subject: [PATCH 02/25] A peekable lexer Use the chars() iterator on &str, with the Peekable wrapper on Iterators, to iterate the input, rather than needing to hold the whole input and do iteration by indexes. --- lexer/Cargo.toml | 1 - lexer/src/lib.rs | 79 +++++++++++++++++++++++++++++++++++++++++++++++ lexer/src/main.rs | 14 +++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 lexer/src/main.rs diff --git a/lexer/Cargo.toml b/lexer/Cargo.toml index f8eb105..16d75ef 100644 --- a/lexer/Cargo.toml +++ b/lexer/Cargo.toml @@ -4,4 +4,3 @@ version = "0.1.0" authors = ["Eryn Wells "] [dependencies] -sibiltypes = { path = "../types" } diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index e69de29..0d2a0c0 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -0,0 +1,79 @@ +/* lexer/src/lib.rs + * Eryn Wells + */ + +use std::iter::Peekable; + +#[derive(Debug)] +pub enum Token { LeftParen, RightParen, Id(String), } + +enum Resume { Here, AtNext } + +enum IterationResult { + Continue, + Emit(Token, Resume), + Error(String), +} + +pub struct Lexer where T: Iterator { + input: Peekable, +} + +impl Lexer where T: Iterator { + pub fn new(input: T) -> Lexer { + Lexer { input: input.peekable() } + } + + fn emit(&self, token: Token, resume: Resume) -> IterationResult { + IterationResult::Emit(token, resume) + } + + fn fail(&self, msg: String) -> IterationResult { + IterationResult::Error(msg) + } +} + +impl Iterator for Lexer where T: Iterator { + type Item = Result; + + fn next(&mut self) -> Option { + let mut buffer = String::new(); + while let Some(peek) = self.input.peek().map(char::clone) { + let result = if buffer.is_empty() { + match peek { + '(' => self.emit(Token::LeftParen, Resume::AtNext), + ')' => self.emit(Token::RightParen, Resume::AtNext), + c if c.is_whitespace() => IterationResult::Continue, + c if c.is_alphabetic() => { + buffer.push(c); + IterationResult::Continue + }, + c => self.fail(format!("Invalid character: {}", c)), + } + } + else { + match peek { + c if c.is_alphabetic() => { + buffer.push(c); + IterationResult::Continue + } + c if c == '(' || c == ')' || c.is_whitespace() => + self.emit(Token::Id(buffer.clone()), Resume::Here), + c => self.fail(format!("Invalid character: {}", c)), + } + }; + match result { + IterationResult::Continue => self.input.next(), + IterationResult::Emit(token, resume) => { + match resume { + Resume::AtNext => self.input.next(), + Resume::Here => None, + }; + return Some(Ok(token)) + }, + IterationResult::Error(msg) => return Some(Err(msg)), + }; + } + None + } +} diff --git a/lexer/src/main.rs b/lexer/src/main.rs new file mode 100644 index 0000000..e177f98 --- /dev/null +++ b/lexer/src/main.rs @@ -0,0 +1,14 @@ +/* lexer/src/main.rs + * Eryn Wells + */ + +extern crate sibillexer; + +use sibillexer::Lexer; + +fn main() { + let lexer = Lexer::new("(ab (cd) ef)".chars()); + for tok in lexer { + println!("found {:?}", tok.unwrap()); + } +} From 5fe10fe0022fa025082c7794829b447d726df005 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Tue, 2 May 2017 21:44:01 -0700 Subject: [PATCH 03/25] Simplify the resume check -- just use an if --- lexer/src/lib.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 0d2a0c0..b3d66ef 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -4,11 +4,13 @@ use std::iter::Peekable; -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub enum Token { LeftParen, RightParen, Id(String), } +#[derive(Debug, Eq, PartialEq)] enum Resume { Here, AtNext } +#[derive(Debug, Eq, PartialEq)] enum IterationResult { Continue, Emit(Token, Resume), @@ -65,10 +67,9 @@ impl Iterator for Lexer where T: Iterator { match result { IterationResult::Continue => self.input.next(), IterationResult::Emit(token, resume) => { - match resume { - Resume::AtNext => self.input.next(), - Resume::Here => None, - }; + if resume == Resume::AtNext { + self.input.next(); + } return Some(Ok(token)) }, IterationResult::Error(msg) => return Some(Err(msg)), From 28e5814101099b56594d50a4bbb04b676a143c6d Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Tue, 2 May 2017 21:44:57 -0700 Subject: [PATCH 04/25] Check for EOF (aka input.next() -> None) and emit what we have before finishing forever --- lexer/src/lib.rs | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index b3d66ef..f7ad002 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -12,6 +12,7 @@ enum Resume { Here, AtNext } #[derive(Debug, Eq, PartialEq)] enum IterationResult { + Finish, Continue, Emit(Token, Resume), Error(String), @@ -40,31 +41,39 @@ impl Iterator for Lexer where T: Iterator { fn next(&mut self) -> Option { let mut buffer = String::new(); - while let Some(peek) = self.input.peek().map(char::clone) { + loop { + let peek = self.input.peek().map(char::clone); let result = if buffer.is_empty() { match peek { - '(' => self.emit(Token::LeftParen, Resume::AtNext), - ')' => self.emit(Token::RightParen, Resume::AtNext), - c if c.is_whitespace() => IterationResult::Continue, - c if c.is_alphabetic() => { + Some('(') => self.emit(Token::LeftParen, Resume::AtNext), + Some(')') => self.emit(Token::RightParen, Resume::AtNext), + Some(c) if c.is_whitespace() => IterationResult::Continue, + Some(c) if c.is_alphabetic() => { buffer.push(c); IterationResult::Continue }, - c => self.fail(format!("Invalid character: {}", c)), + Some(c) => self.fail(format!("Invalid character: {}", c)), + // We found EOF and there's no pending string, so just finish. + None => IterationResult::Finish, } } else { match peek { - c if c.is_alphabetic() => { + Some(c) if c.is_alphabetic() => { buffer.push(c); IterationResult::Continue } - c if c == '(' || c == ')' || c.is_whitespace() => + Some(c) if c == '(' || c == ')' || c.is_whitespace() => self.emit(Token::Id(buffer.clone()), Resume::Here), - c => self.fail(format!("Invalid character: {}", c)), + Some(c) => self.fail(format!("Invalid character: {}", c)), + // Found EOF. Emit what we have and finish. + // Note: the Resume argument doesn't matter in this case since the input + // iterator will always be None from here on. + None => self.emit(Token::Id(buffer.clone()), Resume::Here), } }; match result { + IterationResult::Finish => break, IterationResult::Continue => self.input.next(), IterationResult::Emit(token, resume) => { if resume == Resume::AtNext { From b5f76deb9830d08dd804e7d1a2edbcdc706bed78 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Tue, 2 May 2017 21:45:10 -0700 Subject: [PATCH 05/25] Add some single token tests for the tokens we have --- lexer/tests/single_tokens.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 lexer/tests/single_tokens.rs diff --git a/lexer/tests/single_tokens.rs b/lexer/tests/single_tokens.rs new file mode 100644 index 0000000..bbfc724 --- /dev/null +++ b/lexer/tests/single_tokens.rs @@ -0,0 +1,30 @@ +/* lexer/tests/single_token.rs + * Eryn Wells + */ + +//! Tests that single tokens are matches by the lexer. + +extern crate sibillexer; + +use sibillexer::{Lexer, Token}; + +#[test] +fn lexer_finds_left_paren() { + let mut lex = Lexer::new("(".chars()); + assert_eq!(lex.next(), Some(Ok(Token::LeftParen))); + assert_eq!(lex.next(), None); +} + +#[test] +fn lexer_finds_right_paren() { + let mut lex = Lexer::new(")".chars()); + assert_eq!(lex.next(), Some(Ok(Token::RightParen))); + assert_eq!(lex.next(), None); +} + +#[test] +fn lexer_finds_id() { + let mut lex = Lexer::new("abc".chars()); + assert_eq!(lex.next(), Some(Ok(Token::Id("abc".to_string())))); + assert_eq!(lex.next(), None); +} From 1dfdc001b34a6555f161f0526f8ba21ac8e9c3c7 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sat, 13 May 2017 15:26:41 -0700 Subject: [PATCH 06/25] Add an error class --- lexer/src/error.rs | 16 ++++++++++++++++ lexer/src/lib.rs | 12 ++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 lexer/src/error.rs diff --git a/lexer/src/error.rs b/lexer/src/error.rs new file mode 100644 index 0000000..93aa3d2 --- /dev/null +++ b/lexer/src/error.rs @@ -0,0 +1,16 @@ +/* lexer/src/error.rs + * Eryn Wells + */ + +#[derive(Debug, Eq, PartialEq)] +pub struct Error { + message: String +} + +impl Error { + pub fn new(msg: String) -> Error { + Error { + message: msg + } + } +} diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index f7ad002..e01c242 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -4,6 +4,10 @@ use std::iter::Peekable; +mod error; + +pub use error::Error; + #[derive(Debug, Eq, PartialEq)] pub enum Token { LeftParen, RightParen, Id(String), } @@ -15,7 +19,7 @@ enum IterationResult { Finish, Continue, Emit(Token, Resume), - Error(String), + Error(Error), } pub struct Lexer where T: Iterator { @@ -32,12 +36,12 @@ impl Lexer where T: Iterator { } fn fail(&self, msg: String) -> IterationResult { - IterationResult::Error(msg) + IterationResult::Error(Error::new(msg)) } } impl Iterator for Lexer where T: Iterator { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { let mut buffer = String::new(); @@ -81,7 +85,7 @@ impl Iterator for Lexer where T: Iterator { } return Some(Ok(token)) }, - IterationResult::Error(msg) => return Some(Err(msg)), + IterationResult::Error(err) => return Some(Err(err)), }; } None From 237dca4b4b7716bb0db9586e188c6340433c7f6e Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sat, 13 May 2017 15:37:01 -0700 Subject: [PATCH 07/25] Add some character class methods to a Lexable trait for char --- lexer/src/chars.rs | 13 +++++++++++++ lexer/src/lib.rs | 9 ++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 lexer/src/chars.rs diff --git a/lexer/src/chars.rs b/lexer/src/chars.rs new file mode 100644 index 0000000..0314e68 --- /dev/null +++ b/lexer/src/chars.rs @@ -0,0 +1,13 @@ +/* lexer/src/chars.rs + * Eryn Wells + */ + +pub trait Lexable { + fn is_left_paren(&self) -> bool; + fn is_right_paren(&self) -> bool; +} + +impl Lexable for char { + fn is_left_paren(&self) -> bool { *self == '(' } + fn is_right_paren(&self) -> bool { *self == ')' } +} diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index e01c242..7b7c531 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -4,10 +4,13 @@ use std::iter::Peekable; +mod chars; mod error; pub use error::Error; +use chars::Lexable; + #[derive(Debug, Eq, PartialEq)] pub enum Token { LeftParen, RightParen, Id(String), } @@ -49,8 +52,8 @@ impl Iterator for Lexer where T: Iterator { let peek = self.input.peek().map(char::clone); let result = if buffer.is_empty() { match peek { - Some('(') => self.emit(Token::LeftParen, Resume::AtNext), - Some(')') => self.emit(Token::RightParen, Resume::AtNext), + Some(c) if c.is_left_paren() => self.emit(Token::LeftParen, Resume::AtNext), + Some(c) if c.is_right_paren() => self.emit(Token::RightParen, Resume::AtNext), Some(c) if c.is_whitespace() => IterationResult::Continue, Some(c) if c.is_alphabetic() => { buffer.push(c); @@ -67,7 +70,7 @@ impl Iterator for Lexer where T: Iterator { buffer.push(c); IterationResult::Continue } - Some(c) if c == '(' || c == ')' || c.is_whitespace() => + Some(c) if c.is_left_paren() || c.is_right_paren() || c.is_whitespace() => self.emit(Token::Id(buffer.clone()), Resume::Here), Some(c) => self.fail(format!("Invalid character: {}", c)), // Found EOF. Emit what we have and finish. From d9943163929c0453cf6ab832aa3f6fc997a3a717 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sat, 13 May 2017 17:21:23 -0700 Subject: [PATCH 08/25] Character checks for identifier initial and subsequent --- lexer/src/chars.rs | 44 ++++++++++++++++++++++++++++++++++++++++++-- lexer/src/lib.rs | 6 +++--- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/lexer/src/chars.rs b/lexer/src/chars.rs index 0314e68..60192c5 100644 --- a/lexer/src/chars.rs +++ b/lexer/src/chars.rs @@ -5,9 +5,49 @@ pub trait Lexable { fn is_left_paren(&self) -> bool; fn is_right_paren(&self) -> bool; + fn is_identifier_initial(&self) -> bool; + fn is_identifier_subsequent(&self) -> bool; + fn is_identifier_delimiter(&self) -> bool; } impl Lexable for char { - fn is_left_paren(&self) -> bool { *self == '(' } - fn is_right_paren(&self) -> bool { *self == ')' } + fn is_left_paren(&self) -> bool { + *self == '(' + } + + fn is_right_paren(&self) -> bool { + *self == ')' + } + + fn is_identifier_initial(&self) -> bool { + self.is_alphabetic() || self.is_special_initial() + } + + fn is_identifier_subsequent(&self) -> bool { + self.is_identifier_initial() || self.is_numeric() || self.is_special_subsequent() + } + + fn is_identifier_delimiter(&self) -> bool { + self.is_whitespace() || self.is_left_paren() || self.is_right_paren() + } +} + +trait LexableSpecial { + fn is_special_initial(&self) -> bool; + fn is_special_subsequent(&self) -> bool; + fn is_explicit_sign(&self) -> bool; +} + +impl LexableSpecial for char { + fn is_special_initial(&self) -> bool { + "!$%&*/:<=>?~_^".contains(*self) + } + + fn is_special_subsequent(&self) -> bool { + self.is_explicit_sign() || ".@".contains(*self) + } + + fn is_explicit_sign(&self) -> bool { + *self == '+' || *self == '-' + } } diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 7b7c531..489e029 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -55,7 +55,7 @@ impl Iterator for Lexer where T: Iterator { Some(c) if c.is_left_paren() => self.emit(Token::LeftParen, Resume::AtNext), Some(c) if c.is_right_paren() => self.emit(Token::RightParen, Resume::AtNext), Some(c) if c.is_whitespace() => IterationResult::Continue, - Some(c) if c.is_alphabetic() => { + Some(c) if c.is_identifier_initial() => { buffer.push(c); IterationResult::Continue }, @@ -66,11 +66,11 @@ impl Iterator for Lexer where T: Iterator { } else { match peek { - Some(c) if c.is_alphabetic() => { + Some(c) if c.is_identifier_subsequent() => { buffer.push(c); IterationResult::Continue } - Some(c) if c.is_left_paren() || c.is_right_paren() || c.is_whitespace() => + Some(c) if c.is_identifier_delimiter() => self.emit(Token::Id(buffer.clone()), Resume::Here), Some(c) => self.fail(format!("Invalid character: {}", c)), // Found EOF. Emit what we have and finish. From 2a7626c75f72ed65b49baf9ec735c1a811b319f3 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 18 Jun 2017 16:50:14 -0700 Subject: [PATCH 09/25] Add a type def for lexer Result --- lexer/src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 489e029..ec6e03d 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -11,6 +11,8 @@ pub use error::Error; use chars::Lexable; +pub type Result = std::result::Result; + #[derive(Debug, Eq, PartialEq)] pub enum Token { LeftParen, RightParen, Id(String), } @@ -44,7 +46,7 @@ impl Lexer where T: Iterator { } impl Iterator for Lexer where T: Iterator { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { let mut buffer = String::new(); From efe0c27d934501529f8e7a8ac9f782e79e234ab2 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 18 Jun 2017 17:34:46 -0700 Subject: [PATCH 10/25] Make the Parser dump tokens and quit Write a main() for sibilparser that just does that. --- parser/src/lib.rs | 35 ++++++++++++++++++++++++++++++----- parser/src/main.rs | 15 +++++++++++++++ 2 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 parser/src/main.rs diff --git a/parser/src/lib.rs b/parser/src/lib.rs index b4aaf3f..7fa7069 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -5,14 +5,39 @@ extern crate sibillexer; extern crate sibiltypes; -mod program; - -use sibillexer::Lexer; +use std::iter::Peekable; +use sibillexer::Result as LexerResult; use sibiltypes::Object; -struct ParseError { } +pub struct ParseError; -type Result = std::result::Result; +pub type Result = std::result::Result; + +pub struct Parser where T: Iterator { + input: Peekable, +} + +impl Parser where T: Iterator { + pub fn new(input: T) -> Parser { + Parser { input: input.peekable() } + } +} + +impl Iterator for Parser where T: Iterator { + type Item = Result; + + fn next(&mut self) -> Option { + loop { + if let Some(lex) = self.input.next() { + println!("{:?}", lex) + } + else { + break; + } + } + None + } +} #[cfg(test)] mod tests { diff --git a/parser/src/main.rs b/parser/src/main.rs new file mode 100644 index 0000000..17d9d4d --- /dev/null +++ b/parser/src/main.rs @@ -0,0 +1,15 @@ +/* parser/src/main.rs + * Eryn Wells + */ + +extern crate sibillexer; +extern crate sibilparser; + +use sibillexer::Lexer; +use sibilparser::Parser; + +fn main() { + let lexer = Lexer::new("(ab)".chars()); + let parser = Parser::new(lexer); + for thing in parser { } +} From 5f3770914f3d9c997502f17799850d67e057c26a Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 26 Jun 2017 21:54:05 -0700 Subject: [PATCH 11/25] [lexer] Basic handling of input offsets --- lexer/src/lib.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index ec6e03d..279e40d 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -29,11 +29,17 @@ enum IterationResult { pub struct Lexer where T: Iterator { input: Peekable, + line: usize, + offset: usize, } impl Lexer where T: Iterator { pub fn new(input: T) -> Lexer { - Lexer { input: input.peekable() } + Lexer { + input: input.peekable(), + line: 0, + offset: 0 + } } fn emit(&self, token: Token, resume: Resume) -> IterationResult { @@ -45,6 +51,18 @@ impl Lexer where T: Iterator { } } +impl Lexer where T: Iterator { + fn handle_whitespace(&mut self, c: char) { + if c == '\n' { + self.line += 1; + self.offset = 0; + } + else { + self.offset += 1; + } + } +} + impl Iterator for Lexer where T: Iterator { type Item = Result; @@ -56,7 +74,10 @@ impl Iterator for Lexer where T: Iterator { match peek { Some(c) if c.is_left_paren() => self.emit(Token::LeftParen, Resume::AtNext), Some(c) if c.is_right_paren() => self.emit(Token::RightParen, Resume::AtNext), - Some(c) if c.is_whitespace() => IterationResult::Continue, + Some(c) if c.is_whitespace() => { + self.handle_whitespace(c); + IterationResult::Continue + }, Some(c) if c.is_identifier_initial() => { buffer.push(c); IterationResult::Continue From cc43ffd135c60e09c72dfe872bd7a53dd6baa88f Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 26 Jun 2017 21:54:57 -0700 Subject: [PATCH 12/25] [lexer] Lexer emits Lexes instead of Tokens --- lexer/src/lib.rs | 27 ++++++++++++++++----------- lexer/src/token.rs | 25 +++++++++++++++++++++++++ lexer/tests/single_tokens.rs | 11 +++++++---- 3 files changed, 48 insertions(+), 15 deletions(-) create mode 100644 lexer/src/token.rs diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 279e40d..70de94f 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -3,18 +3,16 @@ */ use std::iter::Peekable; +use chars::Lexable; mod chars; mod error; +mod token; pub use error::Error; +pub use token::{Lex, Token}; -use chars::Lexable; - -pub type Result = std::result::Result; - -#[derive(Debug, Eq, PartialEq)] -pub enum Token { LeftParen, RightParen, Id(String), } +pub type Result = std::result::Result; #[derive(Debug, Eq, PartialEq)] enum Resume { Here, AtNext } @@ -72,8 +70,14 @@ impl Iterator for Lexer where T: Iterator { let peek = self.input.peek().map(char::clone); let result = if buffer.is_empty() { match peek { - Some(c) if c.is_left_paren() => self.emit(Token::LeftParen, Resume::AtNext), - Some(c) if c.is_right_paren() => self.emit(Token::RightParen, Resume::AtNext), + Some(c) if c.is_left_paren() => { + buffer.push(c); + self.emit(Token::LeftParen, Resume::AtNext) + }, + Some(c) if c.is_right_paren() => { + buffer.push(c); + self.emit(Token::RightParen, Resume::AtNext) + }, Some(c) if c.is_whitespace() => { self.handle_whitespace(c); IterationResult::Continue @@ -94,12 +98,12 @@ impl Iterator for Lexer where T: Iterator { IterationResult::Continue } Some(c) if c.is_identifier_delimiter() => - self.emit(Token::Id(buffer.clone()), Resume::Here), + self.emit(Token::Id, Resume::Here), Some(c) => self.fail(format!("Invalid character: {}", c)), // Found EOF. Emit what we have and finish. // Note: the Resume argument doesn't matter in this case since the input // iterator will always be None from here on. - None => self.emit(Token::Id(buffer.clone()), Resume::Here), + None => self.emit(Token::Id, Resume::Here), } }; match result { @@ -109,7 +113,8 @@ impl Iterator for Lexer where T: Iterator { if resume == Resume::AtNext { self.input.next(); } - return Some(Ok(token)) + let lex = Lex::new(token, &buffer, self.line, self.offset); + return Some(Ok(lex)) }, IterationResult::Error(err) => return Some(Err(err)), }; diff --git a/lexer/src/token.rs b/lexer/src/token.rs new file mode 100644 index 0000000..fda26fc --- /dev/null +++ b/lexer/src/token.rs @@ -0,0 +1,25 @@ +/* lexer/src/token.rs + * Eryn Wells + */ + +#[derive(Debug, Eq, PartialEq)] +pub struct Lex { + token: Token, + value: String, + line: usize, + offset: usize, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Token { LeftParen, RightParen, Id, } + +impl Lex { + pub fn new(token: Token, value: &str, line: usize, offset: usize) -> Lex { + Lex { + token: token, + value: String::from(value), + line: line, + offset: offset, + } + } +} diff --git a/lexer/tests/single_tokens.rs b/lexer/tests/single_tokens.rs index bbfc724..ef05b7c 100644 --- a/lexer/tests/single_tokens.rs +++ b/lexer/tests/single_tokens.rs @@ -6,25 +6,28 @@ extern crate sibillexer; -use sibillexer::{Lexer, Token}; +use sibillexer::{Lexer, Lex, Token}; #[test] fn lexer_finds_left_paren() { + let expected_lex = Lex::new(Token::LeftParen, "(", 0, 0); let mut lex = Lexer::new("(".chars()); - assert_eq!(lex.next(), Some(Ok(Token::LeftParen))); + assert_eq!(lex.next(), Some(Ok(expected_lex))); assert_eq!(lex.next(), None); } #[test] fn lexer_finds_right_paren() { + let expected_lex = Lex::new(Token::RightParen, ")", 0, 0); let mut lex = Lexer::new(")".chars()); - assert_eq!(lex.next(), Some(Ok(Token::RightParen))); + assert_eq!(lex.next(), Some(Ok(expected_lex))); assert_eq!(lex.next(), None); } #[test] fn lexer_finds_id() { + let expected_lex = Lex::new(Token::Id, "abc", 0, 0); let mut lex = Lexer::new("abc".chars()); - assert_eq!(lex.next(), Some(Ok(Token::Id("abc".to_string())))); + assert_eq!(lex.next(), Some(Ok(expected_lex))); assert_eq!(lex.next(), None); } From 6de9ff6695d8d6a6f7cfa9c11382cb650244f587 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Fri, 17 Aug 2018 17:18:01 -0700 Subject: [PATCH 13/25] [types] Fix the tests --- types/src/number/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/types/src/number/mod.rs b/types/src/number/mod.rs index 8b90c3b..f79ad46 100644 --- a/types/src/number/mod.rs +++ b/types/src/number/mod.rs @@ -90,13 +90,14 @@ impl fmt::Display for Number { #[cfg(test)] mod tests { + use super::Exact; use super::Number; use super::real::Real; #[test] fn exact_numbers_are_exact() { - assert!(Number::from_int(3, true).is_exact()); - assert!(!Number::from_int(3, false).is_exact()); + assert!(Number::from_int(3, Exact::Yes).is_exact()); + assert!(!Number::from_int(3, Exact::No).is_exact()); } #[test] From 52ede10d5e4baf035f0c61328fc1e2c09576059a Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 19 Aug 2018 21:15:05 -0700 Subject: [PATCH 14/25] [parser] Remove program.rs --- parser/src/program.rs | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 parser/src/program.rs diff --git a/parser/src/program.rs b/parser/src/program.rs deleted file mode 100644 index f7b2039..0000000 --- a/parser/src/program.rs +++ /dev/null @@ -1,8 +0,0 @@ -/* parser/src/program.rs - * Eryn Wells - */ - -use sibillexer::Lexer; -use super::Result; -use super::ParseError; - From d7bffdc432ea922173255137e2d2bce22092c070 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 19 Aug 2018 22:04:03 -0700 Subject: [PATCH 15/25] [parser] Add parsers field to Parser This will be a stack of node-specific parsers. As you descend the tree, a new parser will be created for each node we visit. --- parser/src/lib.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 7fa7069..6145f75 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -9,17 +9,25 @@ use std::iter::Peekable; use sibillexer::Result as LexerResult; use sibiltypes::Object; +#[derive(Debug)] pub struct ParseError; pub type Result = std::result::Result; +trait NodeParser { +} + pub struct Parser where T: Iterator { input: Peekable, + parsers: Vec>, } impl Parser where T: Iterator { pub fn new(input: T) -> Parser { - Parser { input: input.peekable() } + Parser { + input: input.peekable(), + parsers: Vec::new(), + } } } @@ -35,6 +43,7 @@ impl Iterator for Parser where T: Iterator { break; } } + assert_eq!(self.parsers.len(), 0); None } } From 027854d162a623f7b4e45be4e7a1640654ae9140 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 19 Aug 2018 22:11:08 -0700 Subject: [PATCH 16/25] [parser] Print things in main() as the thing --- parser/src/main.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parser/src/main.rs b/parser/src/main.rs index 17d9d4d..588f552 100644 --- a/parser/src/main.rs +++ b/parser/src/main.rs @@ -11,5 +11,7 @@ use sibilparser::Parser; fn main() { let lexer = Lexer::new("(ab)".chars()); let parser = Parser::new(lexer); - for thing in parser { } + for thing in parser { + println!("{:?}", thing); + } } From 281c29ee26c3322331651a898fe64db20aa4bce9 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 19 Aug 2018 22:30:19 -0700 Subject: [PATCH 17/25] [lexer] Expose token field via method; Clone and Copy Tokens --- lexer/src/token.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lexer/src/token.rs b/lexer/src/token.rs index fda26fc..5ea2aa4 100644 --- a/lexer/src/token.rs +++ b/lexer/src/token.rs @@ -10,7 +10,7 @@ pub struct Lex { offset: usize, } -#[derive(Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Token { LeftParen, RightParen, Id, } impl Lex { @@ -22,4 +22,6 @@ impl Lex { offset: offset, } } + + pub fn token(&self) -> Token { self.token } } From cabe40bd7a0a576fc3a73c45f490007f16d91fb9 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Sun, 19 Aug 2018 22:30:49 -0700 Subject: [PATCH 18/25] [parser] Skeleton IdParser and ListParser for Ids and Lists respectively --- parser/src/lib.rs | 18 ++++++++++-------- parser/src/node_parser.rs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) create mode 100644 parser/src/node_parser.rs diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 6145f75..1005821 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -5,17 +5,18 @@ extern crate sibillexer; extern crate sibiltypes; +mod node_parser; + use std::iter::Peekable; use sibillexer::Result as LexerResult; +use sibillexer::Token; use sibiltypes::Object; - -#[derive(Debug)] -pub struct ParseError; +use node_parser::{NodeParser, IdParser, ListParser}; pub type Result = std::result::Result; -trait NodeParser { -} +#[derive(Debug)] +pub struct ParseError; pub struct Parser where T: Iterator { input: Peekable, @@ -37,9 +38,10 @@ impl Iterator for Parser where T: Iterator { fn next(&mut self) -> Option { loop { if let Some(lex) = self.input.next() { - println!("{:?}", lex) - } - else { + if let Ok(lex) = lex { + } else { + } + } else { break; } } diff --git a/parser/src/node_parser.rs b/parser/src/node_parser.rs new file mode 100644 index 0000000..ac4c69d --- /dev/null +++ b/parser/src/node_parser.rs @@ -0,0 +1,30 @@ +/* node_parser.rs + * Eryn Wells + */ + +pub trait NodeParser { +} + +pub struct IdParser { +} + +impl IdParser { + pub fn new() -> IdParser { + IdParser { } + } +} + +impl NodeParser for IdParser { +} + +pub struct ListParser { +} + +impl ListParser { + pub fn new() -> ListParser { + ListParser { } + } +} + +impl NodeParser for ListParser { +} From 8475720a71b69debd0d4aefd43edf9de4938f01e Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 20 Aug 2018 15:21:20 -0700 Subject: [PATCH 19/25] Add accessor for Lex::value --- lexer/src/token.rs | 1 + parser/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lexer/src/token.rs b/lexer/src/token.rs index 5ea2aa4..0cda4ca 100644 --- a/lexer/src/token.rs +++ b/lexer/src/token.rs @@ -24,4 +24,5 @@ impl Lex { } pub fn token(&self) -> Token { self.token } + pub fn value(&self) -> &str { self.value.as_str() } } diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 1005821..e8517e2 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -9,10 +9,10 @@ mod node_parser; use std::iter::Peekable; use sibillexer::Result as LexerResult; -use sibillexer::Token; use sibiltypes::Object; use node_parser::{NodeParser, IdParser, ListParser}; +/// The output of calling `parse()` on a Parser is one of these Result objects. pub type Result = std::result::Result; #[derive(Debug)] From 5a5a0c9c071f505f791188110ad553f75d8d5da0 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 20 Aug 2018 15:21:38 -0700 Subject: [PATCH 20/25] Add ObjectPtr::new_pair() --- types/src/object.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/types/src/object.rs b/types/src/object.rs index 6af9773..8f17050 100644 --- a/types/src/object.rs +++ b/types/src/object.rs @@ -40,7 +40,13 @@ pub enum Object { } impl ObjectPtr { - pub fn new(obj: Object) -> ObjectPtr { ObjectPtr::Ptr(Box::new(obj)) } + pub fn new(obj: Object) -> ObjectPtr { + ObjectPtr::Ptr(Box::new(obj)) + } + + pub fn new_pair() -> ObjectPtr { + ObjectPtr::new(Object::Pair(ObjectPtr::Null, ObjectPtr::Null)) + } } impl fmt::Display for ObjectPtr { From 0208ecb4d2811afd84d611054073cb6b6cfab13d Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 20 Aug 2018 16:14:17 -0700 Subject: [PATCH 21/25] [parser] Continuing to sketch the parser --- parser/src/node_parser.rs | 65 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/parser/src/node_parser.rs b/parser/src/node_parser.rs index ac4c69d..15ee573 100644 --- a/parser/src/node_parser.rs +++ b/parser/src/node_parser.rs @@ -2,9 +2,50 @@ * Eryn Wells */ -pub trait NodeParser { +use std::fmt::Debug; +use sibillexer::{Lex, Token}; +use sibiltypes::{Object, ObjectPtr}; + +#[derive(Debug)] +pub enum NodeParseResult { + /// Continue parsing with this NodeParser. The passed in Lex was consumed. + Continue, + /// This NodeParser has completed its work and has produced the given Object + /// as a result. + Complete { obj: ObjectPtr }, + /// Push a new NodeParser onto the parsing stack and let that parser proceed + /// with the current Lex. + Push { next: Box }, + /// There was an error parsing with the current Lex. + Error { msg: String }, } +/// A `NodeParser` is responsible for parsing one particular thing in the Scheme +/// parse tree. Roughly, there should be one `XParser` for each variant of the +/// `sibiltypes::Object` enum. As the top-level `Parser` object progresses +/// through the stream of tokens, new NodeParsers are created to handle the +/// nodes it encounters. +pub trait NodeParser: Debug { + fn parse(&mut self, lex: Lex) -> NodeParseResult; +} + +#[derive(Debug)] +pub struct ProgramParser { +} + +impl ProgramParser { + pub fn new() -> ProgramParser { + ProgramParser { } + } +} + +impl NodeParser for ProgramParser { + fn parse(&mut self, lex: Lex) -> NodeParseResult { + NodeParseResult::Error { msg: "womp".to_string() } + } +} + +#[derive(Debug)] pub struct IdParser { } @@ -15,16 +56,36 @@ impl IdParser { } impl NodeParser for IdParser { + fn parse(&mut self, lex: Lex) -> NodeParseResult { + match lex.token() { + Token::Id => { + let value = String::from(lex.value()); + let obj = ObjectPtr::new(Object::Symbol(value)); + NodeParseResult::Complete { obj: obj } + } + _ => { + let msg = String::from(format!("Invalid token: {:?}", lex)); + NodeParseResult::Error { msg: msg } + } + } + } } +#[derive(Debug)] pub struct ListParser { + list: ObjectPtr } impl ListParser { pub fn new() -> ListParser { - ListParser { } + ListParser { + list: ObjectPtr::Null + } } } impl NodeParser for ListParser { + fn parse(&mut self, lex: Lex) -> NodeParseResult { + NodeParseResult::Error { msg: "womp".to_string() } + } } From a81e9b4258748f883a5df76fdb1138a26efd25ae Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 20 Aug 2018 16:30:59 -0700 Subject: [PATCH 22/25] I guess we're doing this again... --- types/src/object.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/types/src/object.rs b/types/src/object.rs index 8f17050..d8f9517 100644 --- a/types/src/object.rs +++ b/types/src/object.rs @@ -25,6 +25,11 @@ pub enum ObjectPtr { Ptr(Box), } +pub trait Object : + fmt::Display, + +{ } + #[derive(Debug, PartialEq)] pub enum Object { Bool(bool), From 234b60dde9e14fffd74a6b09d27eb177be1c81c5 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 20 Aug 2018 16:31:10 -0700 Subject: [PATCH 23/25] Some spacing tweaks to predicates --- types/src/predicates.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/types/src/predicates.rs b/types/src/predicates.rs index 5ab20f8..28e40cb 100644 --- a/types/src/predicates.rs +++ b/types/src/predicates.rs @@ -20,12 +20,16 @@ pub trait IsChar { pub trait IsNumber { /// Is this thing a number? fn is_number(&self) -> bool { self.is_complex() || self.is_real() || self.is_rational() || self.is_integer() } + /// Should return `true` if this Value is a complex number type. fn is_complex(&self) -> bool { self.is_real() } + /// Should return `true` if this Value is a real number type. fn is_real(&self) -> bool { self.is_rational() } + /// Should return `true` if this Value is a rational number type. fn is_rational(&self) -> bool { self.is_integer() } + /// Should return `true` if this Value is a integer number type. fn is_integer(&self) -> bool { false } } From f197f1ba8bbc44e9a61f907ff77b8ef326a447ed Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Thu, 23 Aug 2018 07:53:04 -0700 Subject: [PATCH 24/25] Rename predicates -> preds --- types/src/{predicates.rs => preds.rs} | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) rename types/src/{predicates.rs => preds.rs} (79%) diff --git a/types/src/predicates.rs b/types/src/preds.rs similarity index 79% rename from types/src/predicates.rs rename to types/src/preds.rs index 28e40cb..f10d347 100644 --- a/types/src/predicates.rs +++ b/types/src/preds.rs @@ -1,7 +1,10 @@ -/* types/src/predicates.rs +/* types/src/preds.rs * Eryn Wells */ +//! This module defines several important predicates for determing what kind of +//! a thing this Object is. + pub trait IsNull { /// Is this thing null? fn is_null(&self) -> bool { false } @@ -33,3 +36,8 @@ pub trait IsNumber { /// Should return `true` if this Value is a integer number type. fn is_integer(&self) -> bool { false } } + +pub trait IsPair { + /// Should return `true` if this Value is a pair. + fn is_pair(&self) -> bool { false } +} From d825d0ec8a954692b9df9ab4cb5f2656238dac24 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Thu, 23 Aug 2018 17:05:29 -0700 Subject: [PATCH 25/25] [types] Clean up Pair and Sym types Reconfigure the top-level object types. - Obj is an enum pointer type - Object is a trait that all Scheme types should implement Define Sym, a symbol type. Define Pair, a pair/cons/list type. Implement Display for all types above. Implement casting methods for the above. --- types/src/lib.rs | 13 ++- types/src/object.rs | 209 ++++++++++++++++++++++++-------------------- types/src/pair.rs | 41 +++++++++ types/src/sym.rs | 22 +++++ 4 files changed, 181 insertions(+), 104 deletions(-) create mode 100644 types/src/pair.rs create mode 100644 types/src/sym.rs diff --git a/types/src/lib.rs b/types/src/lib.rs index bb00a11..746f594 100644 --- a/types/src/lib.rs +++ b/types/src/lib.rs @@ -1,13 +1,10 @@ -pub mod number; -pub mod char; - -mod bool; mod object; -mod predicates; +mod pair; +mod sym; -pub use object::Object; -pub use object::ObjectPtr; -pub use predicates::*; +pub use object::Obj; +pub use pair::Pair; +pub use sym::Sym; #[cfg(test)] mod tests { diff --git a/types/src/object.rs b/types/src/object.rs index d8f9517..2a26aac 100644 --- a/types/src/object.rs +++ b/types/src/object.rs @@ -4,8 +4,8 @@ //! # Objects //! -//! All scheme types are represented by the `Object` enum defined in this -//! module. Most references to objects are going to be through an `ObjectPtr`. +//! All Scheme types implement the `Object` trait defined in this module. Most +//! references to objects are going to be through an `ObjectPtr`. //! //! ## Type Predicates //! @@ -13,116 +13,133 @@ //! available types in Scheme. These predicates are implemented as `is_*` //! methods in a bunch of `Is*` traits defined below. +use std::any::Any; use std::fmt; -use std::ops::Deref; -use number::Number; +use super::*; -#[derive(Debug, PartialEq)] -pub enum ObjectPtr { - /// Absence of a value. A null pointer. +pub enum Obj { Null, - /// A pointer to an object. - Ptr(Box), + Ptr(Box) } -pub trait Object : - fmt::Display, - -{ } - -#[derive(Debug, PartialEq)] -pub enum Object { - Bool(bool), - ByteVector(Vec), - Char(char), - Number(Number), - Pair(ObjectPtr, ObjectPtr), - //Procedure/*( TODO: Something )*/, - //Record/*( TODO: Something )*/, - String(String), - Symbol(String), - Vector(Vec), +pub trait Object: + fmt::Display +{ + fn as_any(&self) -> &Any; + fn as_pair(&self) -> Option<&Pair>; + fn as_sym(&self) -> Option<&Sym>; } -impl ObjectPtr { - pub fn new(obj: Object) -> ObjectPtr { - ObjectPtr::Ptr(Box::new(obj)) - } - - pub fn new_pair() -> ObjectPtr { - ObjectPtr::new(Object::Pair(ObjectPtr::Null, ObjectPtr::Null)) - } -} - -impl fmt::Display for ObjectPtr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - ObjectPtr::Null => write!(f, "()"), - ObjectPtr::Ptr(ref bx) => write!(f, "{}", bx.deref()), +impl Obj { + pub fn unbox_as(&self) -> Option<&T> { + match self { + Obj::Null => None, + Obj::Ptr(obj) => obj.as_any().downcast_ref::() } } } -impl fmt::Display for Object { +impl fmt::Display for Obj { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Object::Bool(ref v) => { - let out = if *v { "#t" } else { "#f" }; - write!(f, "{}", out) - }, - - Object::ByteVector(ref vec) => { - // TODO: Actually write the vector values. - write!(f, "#u8(").and_then(|_| write!(f, ")")) - }, - - Object::Char(ref c) => { - // TODO: This is not correct for all cases. See section 6.6 of the spec. - write!(f, "#\\{}", c) - }, - - Object::Number(ref n) => { - // TODO: Implement Display for Number - write!(f, "{}", n) - } - - Object::Pair(ref car, ref cdr) => { - write!(f, "(").and_then(|_| Object::fmt_pair(car, cdr, f)) - .and_then(|_| write!(f, ")")) - }, - - Object::String(ref st) => { - write!(f, "\"{}\"", st) - }, - - Object::Symbol(ref sym) => { - write!(f, "{}", sym) - }, - - Object::Vector(ref vec) => { - // TODO: Actually write the vector values. - vec.iter().enumerate().fold(write!(f, "#("), |acc, (i, obj)| { - let space = if i == (vec.len() - 1) { " " } else { "" }; - acc.and(write!(f, "{}{}", obj, space)) - }).and(write!(f, ")")) - } + match self { + Obj::Null => write!(f, "null"), + Obj::Ptr(obj) => write!(f, "{}", obj) } } } -impl Object { - fn fmt_pair(car: &ObjectPtr, cdr: &ObjectPtr, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", car).and_then(|r| match cdr { - &ObjectPtr::Null => Ok(r), // Don't write anything. - &ObjectPtr::Ptr(ref ptr) => match ptr.deref() { - &Object::Pair(ref next_car, ref next_cdr) => { - write!(f, " ").and_then(|_| Object::fmt_pair(next_car, next_cdr, f)) - }, - _ => write!(f, " . {}", ptr) - } - }) - } -} +//#[derive(Debug, PartialEq)] +//pub enum Object { +// ByteVector(Vec), +// Char(char), +// Number(Number), +// Pair(ObjectPtr, ObjectPtr), +// //Procedure/*( TODO: Something )*/, +// //Record/*( TODO: Something )*/, +// String(String), +// Symbol(String), +// Vector(Vec), +//} +// +//impl ObjectPtr { +// pub fn new(obj: Object) -> ObjectPtr { +// ObjectPtr::Ptr(Box::new(obj)) +// } +// +// pub fn new_pair() -> ObjectPtr { +// ObjectPtr::new(Object::Pair(ObjectPtr::Null, ObjectPtr::Null)) +// } +//} +// +//impl fmt::Display for ObjectPtr { +// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +// match *self { +// ObjectPtr::Null => write!(f, "()"), +// ObjectPtr::Ptr(ref bx) => write!(f, "{}", bx.deref()), +// } +// } +//} +// +//impl fmt::Display for Object { +// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +// match *self { +// Object::Bool(ref v) => { +// let out = if *v { "#t" } else { "#f" }; +// write!(f, "{}", out) +// }, +// +// Object::ByteVector(ref vec) => { +// // TODO: Actually write the vector values. +// write!(f, "#u8(").and_then(|_| write!(f, ")")) +// }, +// +// Object::Char(ref c) => { +// // TODO: This is not correct for all cases. See section 6.6 of the spec. +// write!(f, "#\\{}", c) +// }, +// +// Object::Number(ref n) => { +// // TODO: Implement Display for Number +// write!(f, "{}", n) +// } +// +// Object::Pair(ref car, ref cdr) => { +// write!(f, "(").and_then(|_| Object::fmt_pair(car, cdr, f)) +// .and_then(|_| write!(f, ")")) +// }, +// +// Object::String(ref st) => { +// write!(f, "\"{}\"", st) +// }, +// +// Object::Symbol(ref sym) => { +// write!(f, "{}", sym) +// }, +// +// Object::Vector(ref vec) => { +// // TODO: Actually write the vector values. +// vec.iter().enumerate().fold(write!(f, "#("), |acc, (i, obj)| { +// let space = if i == (vec.len() - 1) { " " } else { "" }; +// acc.and(write!(f, "{}{}", obj, space)) +// }).and(write!(f, ")")) +// } +// } +// } +//} +// +//impl Object { +// fn fmt_pair(car: &ObjectPtr, cdr: &ObjectPtr, f: &mut fmt::Formatter) -> fmt::Result { +// write!(f, "{}", car).and_then(|r| match cdr { +// &ObjectPtr::Null => Ok(r), // Don't write anything. +// &ObjectPtr::Ptr(ref ptr) => match ptr.deref() { +// &Object::Pair(ref next_car, ref next_cdr) => { +// write!(f, " ").and_then(|_| Object::fmt_pair(next_car, next_cdr, f)) +// }, +// _ => write!(f, " . {}", ptr) +// } +// }) +// } +//} #[cfg(test)] mod tests { diff --git a/types/src/pair.rs b/types/src/pair.rs new file mode 100644 index 0000000..039d61f --- /dev/null +++ b/types/src/pair.rs @@ -0,0 +1,41 @@ +/* types/src/pair.rs + * Eryn Wells + */ + +use std::any::Any; +use std::fmt; +use super::*; +use object::Object; + +pub struct Pair { + car: Obj, + cdr: Obj +} + +impl Pair { + fn fmt_pair(&self, f: &mut fmt::Formatter) -> fmt::Result { + let r = write!(f, "{}", self.car); + r.and_then(|r| match self.cdr { + Obj::Null => Ok(r), // Don't write anything. + Obj::Ptr(ref next) => { + match next.as_pair() { + Some(next_pair) => write!(f, " ").and_then(|_| next_pair.fmt_pair(f)), + None => write!(f, " . {}", next) + } + } + }) + } +} + +impl Object for Pair { + fn as_any(&self) -> &Any { self } + fn as_pair(&self) -> Option<&Pair> { Some(self) } + fn as_sym(&self) -> Option<&Sym> { None } +} + +impl fmt::Display for Pair { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "(").and_then(|_| self.fmt_pair(f)) + .and_then(|_| write!(f, ")")) + } +} diff --git a/types/src/sym.rs b/types/src/sym.rs new file mode 100644 index 0000000..4ab1782 --- /dev/null +++ b/types/src/sym.rs @@ -0,0 +1,22 @@ +/* types/src/symbol.rs + * Eryn Wells + */ + +use std::any::Any; +use std::fmt; +use object::Object; +use super::*; + +pub struct Sym(String); + +impl Object for Sym { + fn as_any(&self) -> &Any { self } + fn as_pair(&self) -> Option<&Pair> { None } + fn as_sym(&self) -> Option<&Sym> { Some(self) } +} + +impl fmt::Display for Sym { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +}