diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 0000000..5e99328 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1 @@ +lexer*.pdf diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..73a0d44 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,2 @@ +lexer.pdf: lexer.dot + dot -Tpdf -O$@ $^ diff --git a/doc/lexer.dot b/doc/lexer.dot new file mode 100644 index 0000000..f571e2e --- /dev/null +++ b/doc/lexer.dot @@ -0,0 +1,94 @@ +digraph lexer { + rankdir=LR; + node [shape = doublecircle] LP RP B0 BF DP1 DD0 DD1 DD2 DR0 DR1 INF NAN EXD; + node [shape = circle]; + BEGIN -> LP [ label = "(" ]; + BEGIN -> RP [ label = ")" ]; + BEGIN -> H [ label = "#" ]; + + subgraph bools { + H -> B0 [ label = "t,f" ]; + B0 -> BF [ label = "rue,alse" ]; + } + +/* + subgraph chars { + H -> SL [ label = "\\" ]; + SL -> CH1 [ label = "*" ]; + SL -> NMCH [ label = "alarm" ]; + SL -> NMCH [ label = "backspace" ]; + SL -> NMCH [ label = "delete" ]; + SL -> NMCH [ label = "escape" ]; + SL -> NMCH [ label = "newline" ]; + SL -> NMCH [ label = "null" ]; + SL -> NMCH [ label = "return" ]; + SL -> NMCH [ label = "space" ]; + SL -> NMCH [ label = "tab" ]; + SL -> XC [ label = "x" ]; + } +*/ + + subgraph numbers { + BEGIN -> DD0 [ label = "0-9" ]; + BEGIN -> SN0 [ label = "+,-" ]; + BEGIN -> DP0 [ label = "." ]; + DD0 -> DD0 [ label = "0-9" ]; + DD0 -> DP1 [ label = "." ]; + DP1 -> DD1 [ label = "0-9" ]; + DP0 -> DD1 [ label = "0-9" ]; + DD1 -> DD1 [ label = "0-9" ]; + SN0 -> DD0 [ label = "0-9" ]; + SN0 -> DP0 [ label = "." ]; + SN0 -> INF [ label = "inf.0" ]; + SN0 -> NAN [ label = "nan.0" ]; + + H -> NEX [ label = "i,e" ]; + NEX -> DD0 [ label = "0-9" ]; + NEX -> SN0 [ label = "+,-" ]; + NEX -> NXH1 [ label = "#" ]; + NXH1 -> NXD1 [ label = "d" ]; + NXD1 -> DD0 [ label = "0-9" ]; + NXD1 -> SN0 [ label = "+,-" ]; + NXH1 -> NXX1 [ label = "b,o,x" ]; + NXX1 -> SN1 [ label = "+,-" ]; + NXX1 -> DR0 [ label = "Dr" ]; + + H -> NBD [ label = "d" ]; + NBD -> DD0 [ label = "0-9" ]; + NBD -> SN0 [ label = "+,-" ]; + NBD -> NBH [ label = "#" ]; + NBH -> NBX [ label = "i,e" ]; + NBX -> SN0 [ label = "+,-" ]; + NBX -> DD0 [ label = "0-9" ]; + + H -> NBS [ label = "b,o,x" ]; + NBS -> DR0 [ label = "Dr" ]; + DR0 -> DR0 [ label = "Dr" ]; + NBS -> NXH [ label = "#" ]; + NXH -> NXX [ label = "i,e" ]; + NXX -> DR0 [ label = "Dr" ]; + + NBS -> SN1 [ label = "+,-" ]; + NXX -> SN1 [ label = "+,-" ]; + SN1 -> DR0 [ label = "Dr" ]; + SN1 -> INF [ label = "inf.0" ]; + SN1 -> NAN [ label = "nan.0" ]; + + DD0 -> EXP [ label = "e" ]; + DP1 -> EXP [ label = "e" ]; + DD1 -> EXP [ label = "e" ]; + + EXP -> EXS [ label = "+,-" ]; + EXS -> EXD [ label = "0-9" ]; + EXP -> EXD [ label = "0-9" ]; + EXD -> EXD [ label = "0-9" ]; + + DR0 -> FR0 [ label = "/" ]; + FR0 -> DR1 [ label = "Dr" ]; + DR1 -> DR1 [ label = "Dr" ]; + + DD0 -> FR1 [ label = "/" ]; + FR1 -> DD2 [ label = "0-9" ]; + DD2 -> DD2 [ label = "0-9" ]; + } +} diff --git a/lexer/Cargo.toml b/lexer/Cargo.toml index 16d75ef..f8eb105 100644 --- a/lexer/Cargo.toml +++ b/lexer/Cargo.toml @@ -4,3 +4,4 @@ version = "0.1.0" authors = ["Eryn Wells "] [dependencies] +sibiltypes = { path = "../types" } diff --git a/lexer/src/chars.rs b/lexer/src/chars.rs index 60192c5..70be6c7 100644 --- a/lexer/src/chars.rs +++ b/lexer/src/chars.rs @@ -3,14 +3,27 @@ */ pub trait Lexable { - fn is_left_paren(&self) -> bool; - fn is_right_paren(&self) -> bool; + fn is_dot(&self) -> bool; + fn is_exactness(&self) -> bool; + fn is_hash(&self) -> bool; + fn is_identifier_delimiter(&self) -> bool; fn is_identifier_initial(&self) -> bool; fn is_identifier_subsequent(&self) -> bool; - fn is_identifier_delimiter(&self) -> bool; + fn is_left_paren(&self) -> bool; + fn is_quote(&self) -> bool; + fn is_radix(&self) -> bool; + fn is_right_paren(&self) -> bool; } impl Lexable for char { + fn is_dot(&self) -> bool { + *self == '.' + } + + fn is_exactness(&self) -> bool { + *self == 'i' || *self == 'e' + } + fn is_left_paren(&self) -> bool { *self == '(' } @@ -20,7 +33,7 @@ impl Lexable for char { } fn is_identifier_initial(&self) -> bool { - self.is_alphabetic() || self.is_special_initial() + self.is_alphabetic() || self.is_special_initial() || self.is_explicit_sign() } fn is_identifier_subsequent(&self) -> bool { @@ -30,6 +43,19 @@ impl Lexable for char { fn is_identifier_delimiter(&self) -> bool { self.is_whitespace() || self.is_left_paren() || self.is_right_paren() } + + fn is_quote(&self) -> bool { + *self == '\'' + } + + fn is_radix(&self) -> bool { + let radishes = &['b', 'd', 'o', 'x']; + radishes.contains(self) + } + + fn is_hash(&self) -> bool { + *self == '#' + } } trait LexableSpecial { diff --git a/lexer/src/error.rs b/lexer/src/error.rs index 73a71bc..ca05e36 100644 --- a/lexer/src/error.rs +++ b/lexer/src/error.rs @@ -14,5 +14,13 @@ impl Error { } } + pub fn invalid_char(c: char) -> Error { + Error::new(format!("invalid character: {}", c)) + } + + pub fn unexpected_eof() -> Error { + Error::new("unexpected EOF".to_string()) + } + pub fn msg(&self) -> &str { &self.message } } diff --git a/lexer/src/lib.rs b/lexer/src/lib.rs index 032d88e..dd7f050 100644 --- a/lexer/src/lib.rs +++ b/lexer/src/lib.rs @@ -2,6 +2,11 @@ * Eryn Wells */ +extern crate sibiltypes; + +use std::iter::Peekable; +use states::{Begin, Resume, StateResult}; + mod chars; mod error; mod states; @@ -10,14 +15,15 @@ mod token; pub use error::Error; pub use token::{Lex, Token}; -use std::iter::Peekable; -use states::*; - pub type Result = std::result::Result; pub struct Lexer where T: Iterator { + /// The input stream. input: Peekable, + + /// Current line number. line: usize, + /// Character offset from the start of the input. offset: usize, } @@ -26,20 +32,33 @@ impl Lexer where T: Iterator { Lexer { input: input.peekable(), line: 0, - offset: 0 + offset: 0, } } -} -impl Lexer where T: Iterator { - fn handle_whitespace(&mut self, c: char) { - if c == '\n' { - self.line += 1; - self.offset = 0; + fn next(&mut self) -> Option { + let out = self.input.next(); + if let Some(c) = out { + self.update_offsets(c); } - else { - self.offset += 1; + out + } + + fn handle_error(&self, err: Error) { + panic!("{}:{}: {}", self.line, self.offset, err.msg()) + } + + fn prepare_offsets(&mut self) { } + + fn update_offsets(&mut self, c: char) { + match c { + '\n' => { + self.line += 1; + self.offset = 0; + }, + _ => self.offset += 1 } + println!("incremented offsets {}:{}", self.line, self.offset); } } @@ -47,8 +66,14 @@ impl Iterator for Lexer where T: Iterator { type Item = Result; fn next(&mut self) -> Option { + self.prepare_offsets(); + + let mut token_line = self.line; + let mut token_offset = self.offset; + println!("beginning token at {}:{}", token_line, token_offset); + let mut buffer = String::new(); - let mut state: Box = Box::new(states::Begin{}); + let mut state: Box = Box::new(Begin::new()); let mut out: Option = None; loop { let peek = self.input.peek().map(char::clone); @@ -57,38 +82,46 @@ impl Iterator for Lexer where T: Iterator { None => match state.none() { Ok(None) => break, Ok(Some(token)) => { - out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset))); + out = Some(Ok(Lex::new(token, &buffer, token_line, token_offset))); break; }, - Err(msg) => panic!("{}", msg) + Err(err) => self.handle_error(err) }, Some(c) => { let result = state.lex(c); match result { StateResult::Continue => { buffer.push(c); - self.input.next(); + self.next(); }, StateResult::Advance { to } => { buffer.push(c); - self.input.next(); + self.next(); state = to; }, + StateResult::Discard(resume) => { + buffer.clear(); + state = Box::new(Begin::new()); + if resume == Resume::AtNext { + self.next(); + } + token_line = self.line; + token_offset = self.offset; + }, StateResult::Emit(token, resume) => { if resume == Resume::AtNext { buffer.push(c); - self.input.next(); + self.next(); } - out = Some(Ok(Lex::new(token, &buffer, self.line, self.offset))); + out = Some(Ok(Lex::new(token, &buffer, token_line, token_offset))); break; }, - StateResult::Fail { msg } => { - panic!("{}", msg); - } + StateResult::Fail(err) => self.handle_error(err), } - } + }, } } + println!("emitting {:?}", out); out } } diff --git a/lexer/src/main.rs b/lexer/src/main.rs index 366ef92..2079c06 100644 --- a/lexer/src/main.rs +++ b/lexer/src/main.rs @@ -4,8 +4,8 @@ extern crate sibillexer; -use std::io::prelude::*; use std::io; +use std::io::Write; use sibillexer::Lexer; fn main() { diff --git a/lexer/src/states/begin.rs b/lexer/src/states/begin.rs index 432ba9a..5f839d0 100644 --- a/lexer/src/states/begin.rs +++ b/lexer/src/states/begin.rs @@ -3,39 +3,47 @@ */ use chars::Lexable; +use error::Error; use token::Token; use states::{Resume, State, StateResult}; +use states::dot::Dot; use states::id::IdSub; use states::hash::Hash; +use states::number::{Builder, Digit}; +use states::whitespace::Whitespace; -#[derive(Debug)] -pub struct Begin; +#[derive(Debug)] pub struct Begin; + +impl Begin { + pub fn new() -> Begin { + Begin{} + } +} impl State for Begin { fn lex(&mut self, c: char) -> StateResult { - match c { - c if c.is_left_paren() => StateResult::Emit(Token::LeftParen, Resume::AtNext), - c if c.is_right_paren() => StateResult::Emit(Token::RightParen, Resume::AtNext), - // TODO: Figure out some way to track newlines. - c if c.is_whitespace() => StateResult::Continue, - c if c.is_identifier_initial() => StateResult::Advance { to: Box::new(IdSub{}) }, - c if c.is_hash() => StateResult::Advance { to: Box::new(Hash{}) }, - _ => { - let msg = format!("Invalid character: {}", c); - StateResult::Fail { msg } - } + if c.is_whitespace() { + StateResult::advance(Box::new(Whitespace::new())) + } else if c.is_left_paren() { + StateResult::Emit(Token::LeftParen, Resume::AtNext) + } else if c.is_right_paren() { + StateResult::Emit(Token::RightParen, Resume::AtNext) + } else if c.is_dot() { + StateResult::advance(Box::new(Dot::new())) + } else if c.is_identifier_initial() { + StateResult::advance(Box::new(IdSub{})) + } else if c.is_hash() { + StateResult::advance(Box::new(Hash::new())) + } else if let Some(st) = Digit::with_char(&Builder::new(), c) { + StateResult::advance(Box::new(st)) + } else if c.is_quote() { + StateResult::Emit(Token::Quote, Resume::AtNext) + } else { + StateResult::fail(Error::invalid_char(c)) } } - fn none(&mut self) -> Result, String> { + fn none(&mut self) -> Result, Error> { Ok(None) } } - -trait BeginLexable { - fn is_hash(&self) -> bool; -} - -impl BeginLexable for char { - fn is_hash(&self) -> bool { *self == '#' } -} diff --git a/lexer/src/states/bool.rs b/lexer/src/states/bool.rs new file mode 100644 index 0000000..c9a52e0 --- /dev/null +++ b/lexer/src/states/bool.rs @@ -0,0 +1,62 @@ +/* lexer/src/states/bool.rs + * Eryn Wells + */ + +use error::Error; +use chars::Lexable; +use states::{Resume, State, StateResult}; +use token::Token; + +const TRUE_SHORT: &'static str = "t"; +const TRUE: &'static str = "true"; +const FALSE_SHORT: &'static str = "f"; +const FALSE: &'static str = "false"; + +#[derive(Debug)] pub struct Bool(String); + +impl Bool { + pub fn new(buf: &str) -> Bool { + Bool(buf.to_string()) + } + + fn handle_delimiter(&self) -> Option { + if self.0 == TRUE || self.0 == TRUE_SHORT { + Some(Token::Bool(true)) + } else if self.0 == FALSE || self.0 == FALSE_SHORT { + Some(Token::Bool(false)) + } else { + None + } + } +} + +impl State for Bool { + fn lex(&mut self, c: char) -> StateResult { + match c { + c if c.is_identifier_delimiter() => match self.handle_delimiter() { + Some(token) => StateResult::Emit(token, Resume::Here), + None => StateResult::fail(Error::invalid_char(c)), + }, + _ => { + let buf = { + let mut b = String::from(self.0.as_str()); + b.push(c); + b + }; + if TRUE.starts_with(&buf) || FALSE.starts_with(&buf) { + StateResult::advance(Box::new(Bool(buf))) + } else { + StateResult::fail(Error::invalid_char(c)) + } + }, + } + } + + fn none(&mut self) -> Result, Error> { + match self.handle_delimiter() { + Some(token) => Ok(Some(token)), + None => Err(Error::new("Found EOF while trying to parse a bool".to_string())) + } + } +} + diff --git a/lexer/src/states/dot.rs b/lexer/src/states/dot.rs new file mode 100644 index 0000000..45bdfc8 --- /dev/null +++ b/lexer/src/states/dot.rs @@ -0,0 +1,33 @@ +/* lexer/src/states/dot.rs + * Eryn Wells + */ + +use chars::Lexable; +use error::Error; +use states::{Resume, State, StateResult}; +use states::number::{Builder, Digit}; +use token::Token; + +#[derive(Debug)] pub struct Dot; + +impl Dot { + pub fn new() -> Dot { + Dot{} + } +} + +impl State for Dot { + fn lex(&mut self, c: char) -> StateResult { + if c.is_identifier_delimiter() { + StateResult::emit(Token::Dot, Resume::Here) + } else if let Some(st) = Digit::with_char(&Builder::new(), c) { + StateResult::advance(Box::new(st)) + } else { + StateResult::fail(Error::invalid_char(c)) + } + } + + fn none(&mut self) -> Result, Error> { + Ok(Some(Token::Dot)) + } +} diff --git a/lexer/src/states/hash.rs b/lexer/src/states/hash.rs index 53eb0b3..cd96cd3 100644 --- a/lexer/src/states/hash.rs +++ b/lexer/src/states/hash.rs @@ -2,94 +2,48 @@ * Eryn Wells */ -//! Lexer states for handling tokens that begin with hash marks '#'. - use chars::Lexable; -use states::{Resume, State, StateResult}; +use error::Error; +use states::{State, StateResult}; +use states::bool::Bool; +use states::number::{Builder, Prefix}; use token::Token; -const TRUE_SHORT: &'static str = "t"; -const TRUE: &'static str = "true"; -const FALSE_SHORT: &'static str = "f"; -const FALSE: &'static str = "false"; +trait HashLexable { + fn is_bool_initial(&self) -> bool; + fn is_slash(&self) -> bool; +} #[derive(Debug)] pub struct Hash; -#[derive(Debug)] pub struct BoolSub(String); + +impl Hash { + pub fn new() -> Hash { Hash{} } +} impl State for Hash { fn lex(&mut self, c: char) -> StateResult { match c { - c if TRUE.starts_with(c) || FALSE.starts_with(c) => { - let buf = c.to_lowercase().to_string(); - StateResult::Advance { to: Box::new(BoolSub(buf)) } - } - _ => { - let msg = format!("Invalid character: {}", c); - StateResult::Fail { msg } - } - } - } - - fn none(&mut self) -> Result, String> { - Ok(None) - } -} - -impl BoolSub { - fn handle_delimiter(&self) -> Result<(Token, Resume), ()> { - if self.0 == TRUE || self.0 == TRUE_SHORT { - Ok((Token::Bool(true), Resume::Here)) - } else if self.0 == FALSE || self.0 == FALSE_SHORT { - Ok((Token::Bool(false), Resume::Here)) - } else { - Err(()) - } - } -} - -impl State for BoolSub { - fn lex(&mut self, c: char) -> StateResult { - match c { - c if c.is_identifier_delimiter() => match self.handle_delimiter() { - Ok((token, resume)) => StateResult::Emit(token, resume), - Err(_) => { - let msg = format!("Invalid character: {}", c); - StateResult::Fail { msg } + c if c.is_bool_initial() => { + let buf = c.to_ascii_lowercase().to_string(); + StateResult::advance(Box::new(Bool::new(buf.as_str()))) + }, + c if c.is_radix() || c.is_exactness() => { + if let Some(st) = Prefix::with_char(&Builder::new(), c) { + StateResult::advance(Box::new(st)) + } else { + StateResult::fail(Error::new(format!("invalid numeric prefix character: {}", c))) } }, - _ => { - let buf = { - let mut b = String::from(self.0.as_str()); - b.push(c); - b - }; - if TRUE.starts_with(&buf) || FALSE.starts_with(&buf) { - StateResult::Advance { to: Box::new(BoolSub(buf)) } - } else { - let msg = format!("Invalid character: {}", c); - StateResult::Fail { msg } - } - } + _ => StateResult::fail(Error::invalid_char(c)), } } - fn none(&mut self) -> Result, String> { - match self.handle_delimiter() { - Ok((token, _)) => Ok(Some(token)), - Err(_) => { - let msg = format!("Found EOF while trying to parse a bool"); - Err(msg) - } - } + fn none(&mut self) -> Result, Error> { + Err(Error::unexpected_eof()) } } -trait HashLexable { - fn is_tf(&self) -> bool; - fn is_slash(&self) -> bool; -} - impl HashLexable for char { - fn is_tf(&self) -> bool { "tfTF".contains(*self) } + fn is_bool_initial(&self) -> bool { "tf".contains(self.to_ascii_lowercase()) } fn is_slash(&self) -> bool { *self == '\\' } } diff --git a/lexer/src/states/id.rs b/lexer/src/states/id.rs index 0232c00..bd792ce 100644 --- a/lexer/src/states/id.rs +++ b/lexer/src/states/id.rs @@ -3,6 +3,7 @@ */ use chars::Lexable; +use error::Error; use states::{Resume, State, StateResult}; use token::Token; @@ -14,14 +15,11 @@ impl State for IdSub { match c { c if c.is_identifier_subsequent() => StateResult::Continue, c if c.is_identifier_delimiter() => StateResult::Emit(Token::Id, Resume::Here), - _ => { - let msg = format!("Invalid character: {}", c); - StateResult::Fail { msg } - } + _ => StateResult::fail(Error::invalid_char(c)), } } - fn none(&mut self) -> Result, String> { + fn none(&mut self) -> Result, Error> { Ok(Some(Token::Id)) } } diff --git a/lexer/src/states/mod.rs b/lexer/src/states/mod.rs index ac9004d..5b8c788 100644 --- a/lexer/src/states/mod.rs +++ b/lexer/src/states/mod.rs @@ -2,25 +2,32 @@ * Eryn Wells */ +use std::fmt::Debug; +use error::Error; +use token::Token; + mod begin; +mod bool; +mod dot; mod hash; +mod number; mod id; +mod whitespace; pub use self::begin::Begin; -use std::fmt::Debug; -use token::Token; - #[derive(Debug)] pub enum StateResult { /// Consume the character, remain on this state. Continue, /// Consume the character, advance to the provided state. Advance { to: Box }, + /// Discard the input consumed to this point. Resume as specified. + Discard(Resume), /// Emit a Lex with the provided Token and the accumulated buffer. The Resume value indicates /// whether to revisit the current input character or advance to the next one. Emit(Token, Resume), - Fail { msg: String } + Fail(Error) } #[derive(Debug, Eq, PartialEq)] @@ -33,5 +40,19 @@ pub enum Resume { pub trait State: Debug { fn lex(&mut self, c: char) -> StateResult; - fn none(&mut self) -> Result, String>; + fn none(&mut self) -> Result, Error>; +} + +impl StateResult { + pub fn advance(to: Box) -> StateResult { + StateResult::Advance { to } + } + + pub fn emit(token: Token, at: Resume) -> StateResult { + StateResult::Emit(token, at) + } + + pub fn fail(err: Error) -> StateResult { + StateResult::Fail(err) + } } diff --git a/lexer/src/states/number/digit.rs b/lexer/src/states/number/digit.rs new file mode 100644 index 0000000..d2823c0 --- /dev/null +++ b/lexer/src/states/number/digit.rs @@ -0,0 +1,45 @@ +/* lexer/src/states/number/digit.rs + * Eryn Wells + */ + +use chars::Lexable; +use error::Error; +use states::{State, StateResult, Resume}; +use states::number::{Builder, Radix}; +use token::Token; + +#[derive(Debug)] pub struct Digit(Builder); + +impl Digit { + pub fn new(b: Builder) -> Digit { + Digit(b) + } + + pub fn with_char(b: &Builder, c: char) -> Option { + let mut b = b.clone(); + if !b.seen_radix() { + b.push_radix(Radix::Dec); + } + match b.push_digit(c) { + Ok(_) => Some(Digit::new(b)), + // TODO: Deal with this error properly. Don't just ignore it. + Err(_) => None, + } + } +} + +impl State for Digit { + fn lex(&mut self, c: char) -> StateResult { + if self.0.push_digit(c).is_ok() { + StateResult::Continue + } else if c.is_identifier_delimiter() { + StateResult::emit(Token::Num(self.0.resolve()), Resume::Here) + } else { + StateResult::fail(Error::invalid_char(c)) + } + } + + fn none(&mut self) -> Result, Error> { + Ok(Some(Token::Num(self.0.resolve()))) + } +} diff --git a/lexer/src/states/number/mod.rs b/lexer/src/states/number/mod.rs new file mode 100644 index 0000000..9852460 --- /dev/null +++ b/lexer/src/states/number/mod.rs @@ -0,0 +1,97 @@ +/* lexer/src/states/number/mod.rs + * Eryn Wells + */ + +use error::Error; + +mod digit; +mod prefix; +mod sign; + +pub use self::prefix::Prefix; +pub use self::digit::Digit; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Radix { Bin = 2, Oct = 8, Dec = 10, Hex = 16 } + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Sign { Neg = -1, Pos = 1 } + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Exact { Yes, No } + +#[derive(Clone, Debug)] +pub struct Builder { + radix: Option, + sign: Option, + exact: Option, + value: i64, +} + +impl Radix { + pub fn from(c: char) -> Option { + match c { + 'b'|'B' => Some(Radix::Bin), + 'o'|'O' => Some(Radix::Oct), + 'd'|'D' => Some(Radix::Dec), + 'x'|'X' => Some(Radix::Hex), + _ => None + } + } +} + +impl Exact { + pub fn from(c: char) -> Option { + match c { + 'i'|'I' => Some(Exact::No), + 'e'|'E' => Some(Exact::Yes), + _ => None + } + } +} + +impl Builder { + pub fn new() -> Builder { + Builder { + radix: None, + sign: None, + exact: None, + value: 0, + } + } + + fn push_digit(&mut self, c: char) -> Result<(), Error> { + let rx = self.radix_value(); + match c.to_digit(rx as u32) { + Some(d) => { + self.value = self.value * rx as i64 + d as i64; + Ok(()) + }, + None => Err(Error::invalid_char(c)) + } + } + + fn push_exact(&mut self, ex: Exact) { + self.exact = Some(ex); + } + + fn push_radix(&mut self, radix: Radix) { + self.radix = Some(radix); + } + + fn push_sign(&mut self, sign: Sign) { + self.sign = Some(sign); + } + + fn resolve(&self) -> i64 { + let sign_factor = self.sign_value() as i64; + self.value * sign_factor + } + + fn seen_exact(&self) -> bool { self.exact.is_some() } + fn seen_radix(&self) -> bool { self.radix.is_some() } + fn seen_sign(&self) -> bool { self.sign.is_some() } + + fn radix_value(&self) -> u8 { self.radix.unwrap_or(Radix::Dec) as u8 } + fn sign_value(&self) -> u8 { self.sign.unwrap_or(Sign::Pos) as u8 } +} diff --git a/lexer/src/states/number/prefix.rs b/lexer/src/states/number/prefix.rs new file mode 100644 index 0000000..001abd2 --- /dev/null +++ b/lexer/src/states/number/prefix.rs @@ -0,0 +1,78 @@ +/* lexer/src/states/number/prefix.rs + * Eryn Wells + */ + +use chars::Lexable; +use error::Error; +use states::{State, StateResult}; +use states::number::{Builder, Radix, Exact}; +use states::number::digit::Digit; +use states::number::sign::Sign; +use token::Token; + +#[derive(Debug)] pub struct Prefix(Builder); +#[derive(Debug)] pub struct Hash(Builder); + +impl Prefix { + pub fn new(b: Builder) -> Prefix { + Prefix(b) + } + + pub fn with_char(b: &Builder, c: char) -> Option { + if let Some(ex) = Exact::from(c) { + if b.seen_exact() { + return None; + } + let mut b = b.clone(); + b.push_exact(ex); + Some(Prefix::new(b)) + } else if let Some(rx) = Radix::from(c) { + if b.seen_radix() { + return None; + } + let mut b = b.clone(); + b.push_radix(rx); + Some(Prefix::new(b)) + } else { + None + } + } +} + +impl State for Prefix { + fn lex(&mut self, c: char) -> StateResult { + if c.is_hash() { + StateResult::advance(Box::new(Hash::new(&self.0))) + } else if let Some(st) = Sign::with_char(&self.0, c) { + StateResult::advance(Box::new(st)) + } else if let Some(st) = Digit::with_char(&self.0, c) { + StateResult::advance(Box::new(st)) + } else { + StateResult::fail(Error::invalid_char(c)) + } + } + + fn none(&mut self) -> Result, Error> { + Err(Error::unexpected_eof()) + } +} + +impl Hash { + fn new(b: &Builder) -> Hash { + Hash(b.clone()) + } +} + +impl State for Hash { + fn lex(&mut self, c: char) -> StateResult { + if let Some(st) = Prefix::with_char(&self.0, c) { + StateResult::advance(Box::new(st)) + } else { + StateResult::fail(Error::invalid_char(c)) + } + } + + fn none(&mut self) -> Result, Error> { + Err(Error::new("blah".to_string())) + } +} diff --git a/lexer/src/states/number/sign.rs b/lexer/src/states/number/sign.rs new file mode 100644 index 0000000..6e5d1a4 --- /dev/null +++ b/lexer/src/states/number/sign.rs @@ -0,0 +1,47 @@ +/* lexer/src/states/number/sign.rs + * Eryn Wells + */ + +use error::Error; +use states::{State, StateResult}; +use states::number::Builder; +use states::number::Sign as Sgn; +use token::Token; + +#[derive(Debug)] pub struct Sign(Builder); + +impl Sign { + pub fn new(b: Builder) -> Sign { + Sign(b) + } + + pub fn with_char(b: &Builder, c: char) -> Option { + if !b.seen_sign() { + match c { + '+' => { + let mut b = b.clone(); + b.push_sign(Sgn::Pos); + Some(Sign::new(b)) + }, + '-' => { + let mut b = b.clone(); + b.push_sign(Sgn::Neg); + Some(Sign::new(b)) + }, + _ => None + } + } else { + None + } + } +} + +impl State for Sign { + fn lex(&mut self, c: char) -> StateResult { + StateResult::fail(Error::invalid_char(c)) + } + + fn none(&mut self) -> Result, Error> { + Err(Error::unexpected_eof()) + } +} diff --git a/lexer/src/states/whitespace.rs b/lexer/src/states/whitespace.rs new file mode 100644 index 0000000..556590d --- /dev/null +++ b/lexer/src/states/whitespace.rs @@ -0,0 +1,30 @@ +/* lexer/src/states/whitespace.rs + * Eryn Wells + */ + +use error::Error; +use states::{Resume, State, StateResult}; +use token::Token; + +#[derive(Debug)] +pub struct Whitespace; + +impl Whitespace { + pub fn new() -> Whitespace { + Whitespace{} + } +} + +impl State for Whitespace { + fn lex(&mut self, c: char) -> StateResult { + if c.is_whitespace() { + StateResult::Continue + } else { + StateResult::Discard(Resume::Here) + } + } + + fn none(&mut self) -> Result, Error> { + Ok(None) + } +} diff --git a/lexer/src/token.rs b/lexer/src/token.rs index a97925b..5d79222 100644 --- a/lexer/src/token.rs +++ b/lexer/src/token.rs @@ -13,9 +13,12 @@ pub struct Lex { #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Token { Bool(bool), + Dot, + Id, LeftParen, + Num(i64), + Quote, RightParen, - Id } impl Lex { @@ -31,3 +34,4 @@ impl Lex { pub fn token(&self) -> Token { self.token } pub fn value(&self) -> &str { self.value.as_str() } } + diff --git a/lexer/tests/expressions.rs b/lexer/tests/expressions.rs new file mode 100644 index 0000000..1ee9712 --- /dev/null +++ b/lexer/tests/expressions.rs @@ -0,0 +1,29 @@ +/* lexer/tests/expressions.rs + * Eryn Wells + */ + +extern crate sibillexer; + +use sibillexer::{Lexer, Lex, Token}; + +#[test] +fn addition() { + let mut lex = Lexer::new("(+ 3 4)".chars()); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::LeftParen, "(", 0, 0)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Id, "+", 0, 1)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(3), "3", 0, 3)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(4), "4", 0, 5)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::RightParen, ")", 0, 6)))); + assert_eq!(lex.next(), None); +} + +#[test] +fn subtraction() { + let mut lex = Lexer::new("(- 3 4)".chars()); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::LeftParen, "(", 0, 0)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Id, "-", 0, 1)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(3), "3", 0, 3)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(4), "4", 0, 5)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::RightParen, ")", 0, 6)))); + assert_eq!(lex.next(), None); +} diff --git a/lexer/tests/numbers.rs b/lexer/tests/numbers.rs new file mode 100644 index 0000000..2940a7c --- /dev/null +++ b/lexer/tests/numbers.rs @@ -0,0 +1,35 @@ +/* lexer/tests/numbers.rs + * Eryn Wells + */ + +//! Tests for lexing numbers. + +extern crate sibillexer; + +use sibillexer::{Lexer, Lex, Token}; + +#[test] +fn ints_simple() { + let mut lex = Lexer::new("23 42 0".chars()); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(23), "23", 0, 0)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(42), "42", 0, 3)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(0), "0", 0, 6)))); + assert_eq!(lex.next(), None); +} + +#[test] +fn ints_negative() { + let mut lex = Lexer::new("-56".chars()); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(-56), "-56", 0, 0)))); + assert_eq!(lex.next(), None); +} + +#[test] +fn ints_alternative_bases() { + let mut lex = Lexer::new("#x2A #b11001 #o56 #d78".chars()); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(0x2A), "#x2A", 0, 0)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(0b11001), "#b11001", 0, 5)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(0o56), "#o56", 0, 13)))); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Num(78), "#d78", 0, 18)))); + assert_eq!(lex.next(), None); +} diff --git a/lexer/tests/single_tokens.rs b/lexer/tests/single_tokens.rs index 0bcf2b0..819e203 100644 --- a/lexer/tests/single_tokens.rs +++ b/lexer/tests/single_tokens.rs @@ -67,8 +67,22 @@ fn bool_long_false() { #[test] fn bool_with_spaces() { // See issue #12 - let expected_lex = Lex::new(Token::Bool(false), "#f", 0, 0); + let expected_lex = Lex::new(Token::Bool(false), "#f", 0, 2); let mut lex = Lexer::new(" #f ".chars()); assert_eq!(lex.next(), Some(Ok(expected_lex))); assert_eq!(lex.next(), None); } + +#[test] +fn dot() { + let mut lex = Lexer::new(".".chars()); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Dot, ".", 0, 0)))); + assert_eq!(lex.next(), None); +} + +#[test] +fn quote() { + let mut lex = Lexer::new("'".chars()); + assert_eq!(lex.next(), Some(Ok(Lex::new(Token::Quote, "'", 0, 0)))); + assert_eq!(lex.next(), None); +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 1fa8032..1a957e3 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -53,7 +53,7 @@ impl Parser where T: Iterator { } fn pop_parser(&mut self) { - let popped = self.parsers.pop(); + self.parsers.pop(); println!("popped parser stack --> {:?}", self.parsers); } diff --git a/parser/src/parsers/list.rs b/parser/src/parsers/list.rs index f15f492..14ec700 100644 --- a/parser/src/parsers/list.rs +++ b/parser/src/parsers/list.rs @@ -10,20 +10,25 @@ use parsers::sym::SymParser; #[derive(Debug)] pub struct ListParser { - pairs: Option> + pairs: Option>, + waiting_for_final: bool, } impl ListParser { pub fn new() -> ListParser { - ListParser { pairs: None } + ListParser { + pairs: None, + waiting_for_final: false, + } } fn assemble(&mut self) -> Result { match self.pairs.take() { - Some(pairs) => { - let obj = pairs.into_iter().rfold(Obj::Null, |acc, mut pair| { + Some(mut pairs) => { + let last = pairs.last_mut().and_then(|p| Some(p.cdr.take())).unwrap_or(Obj::Null); + let obj = pairs.into_iter().rfold(last, |acc, mut pair| { pair.cdr = acc; - Obj::Ptr(Box::new(pair)) + Obj::new(pair) }); Ok(obj) }, @@ -38,7 +43,11 @@ impl NodeParser for ListParser { Token::Bool(_) => { let parser = BoolParser{}; NodeParseResult::Push { next: Box::new(parser) } - } + }, + Token::Dot => { + self.waiting_for_final = true; + NodeParseResult::Continue + }, Token::LeftParen => { match self.pairs { None => { @@ -57,6 +66,12 @@ impl NodeParser for ListParser { let next = Box::new(SymParser{}); NodeParseResult::Push { next } }, + Token::Num(n) => { + panic!("TODO: Handle numbrs."); + }, + Token::Quote => { + panic!("TODO: Handle quotes."); + }, Token::RightParen => { match self.pairs { None => { @@ -71,7 +86,7 @@ impl NodeParser for ListParser { } } } - } + }, } } @@ -81,12 +96,26 @@ impl NodeParser for ListParser { } fn subparser_completed(&mut self, obj: Obj) -> NodeParseResult { - if let Some(ref mut pairs) = self.pairs { - pairs.push(Pair::with_car(obj)); - NodeParseResult::Continue - } else { - let msg = format!("what happened here???"); - NodeParseResult::error(msg) + match self.pairs { + Some(ref mut pairs) if self.waiting_for_final => match pairs.last_mut() { + Some(ref mut last) => { + last.cdr = obj; + // Waiting for RightParen to close list. + NodeParseResult::Continue + }, + None => { + let msg = "Found dot before any pairs parsed".to_string(); + NodeParseResult::error(msg) + }, + }, + Some(ref mut pairs) => { + pairs.push(Pair::with_car(obj)); + NodeParseResult::Continue + }, + None => { + let msg = "While attempting to parse list, found token before opening paren".to_string(); + NodeParseResult::error(msg) + }, } } } diff --git a/parser/src/parsers/program.rs b/parser/src/parsers/program.rs index 6165468..3a8c610 100644 --- a/parser/src/parsers/program.rs +++ b/parser/src/parsers/program.rs @@ -37,7 +37,10 @@ impl NodeParser for ProgramParser { let parser = SymParser{}; let parser = Box::new(parser); NodeParseResult::Push { next: parser } - } + }, + _ => { + panic!("unhandled symbol"); + }, } } diff --git a/parser/tests/lists.rs b/parser/tests/lists.rs index e3ade69..e990c5a 100644 --- a/parser/tests/lists.rs +++ b/parser/tests/lists.rs @@ -9,7 +9,6 @@ extern crate sibilparser; extern crate sibiltypes; use sibillexer::{Lex, Token}; -use sibillexer::Result as LexerResult; use sibilparser::Parser; use sibiltypes::{Obj, Pair, Sym}; @@ -32,3 +31,31 @@ fn list_of_four_tokens() { assert_eq!(parser.next(), Some(Ok(ex_list))); assert_eq!(parser.next(), None); } + +#[test] +fn single_dotted_pair() { + let tokens = vec![Ok(Lex::new(Token::LeftParen, "(", 0, 0)), + Ok(Lex::new(Token::Id, "ab", 0, 0)), + Ok(Lex::new(Token::Dot, ".", 0, 0)), + Ok(Lex::new(Token::Id, "cd", 0, 0)), + Ok(Lex::new(Token::RightParen, ")", 0, 0))].into_iter(); + let mut parser = Parser::new(tokens); + let ex_list = Obj::new(Pair::new(Obj::new(Sym::with_str("ab")), Obj::new(Sym::with_str("cd")))); + assert_eq!(parser.next(), Some(Ok(ex_list))); + assert_eq!(parser.next(), None); +} + +#[test] +fn three_element_dotted_pair() { + let tokens = vec![Ok(Lex::new(Token::LeftParen, "(", 0, 0)), + Ok(Lex::new(Token::Id, "ab", 0, 0)), + Ok(Lex::new(Token::Id, "cd", 0, 0)), + Ok(Lex::new(Token::Dot, ".", 0, 0)), + Ok(Lex::new(Token::Id, "ef", 0, 0)), + Ok(Lex::new(Token::RightParen, ")", 0, 0))].into_iter(); + let mut parser = Parser::new(tokens); + let ex_list = Obj::new(Pair::new(Obj::new(Sym::with_str("ab")), Obj::new( + Pair::new(Obj::new(Sym::with_str("cd")), Obj::new(Sym::with_str("ef")))))); + assert_eq!(parser.next(), Some(Ok(ex_list))); + assert_eq!(parser.next(), None); +} diff --git a/parser/tests/single_item.rs b/parser/tests/single_item.rs index 108fc37..795aaea 100644 --- a/parser/tests/single_item.rs +++ b/parser/tests/single_item.rs @@ -11,7 +11,7 @@ extern crate sibiltypes; use sibillexer::{Lex, Token}; use sibillexer::Result as LexerResult; use sibilparser::Parser; -use sibiltypes::{Bool, Obj, Pair, Sym}; +use sibiltypes::{Bool, Obj, Sym}; #[test] fn single_sym() { diff --git a/types/src/lib.rs b/types/src/lib.rs index 7632128..c077ee5 100644 --- a/types/src/lib.rs +++ b/types/src/lib.rs @@ -9,4 +9,5 @@ pub use object::Obj; pub use pair::Pair; pub use sym::Sym; +pub use self::number::Number; pub use self::number::Int; diff --git a/types/src/number/arith.rs b/types/src/number/arith.rs new file mode 100644 index 0000000..f33a6c6 --- /dev/null +++ b/types/src/number/arith.rs @@ -0,0 +1,50 @@ +/* types/src/number/arith.rs + * Eryn Wells + */ + +use std::ops::{Add, Div, Mul, Sub, Rem}; +use number::{Int, Irr, Number}; + +pub trait GCD { + /// Find the greatest common divisor of `self` and another number. + fn gcd(self, other: Self) -> Self; +} + +pub trait LCM { + /// Find the least common multiple of `self` and another number. + fn lcm(self, other: Self) -> Self; +} + +macro_rules! impl_newtype_arith_op { + ($id:ident, $opt:ident, $opm:ident, $op:tt) => { + impl $opt for $id where T: Number + Into<$id> { + type Output = $id; + #[inline] + fn $opm(self, rhs: T) -> Self::Output { + let rhs: $id = rhs.into(); + $id(self.0 $op rhs.0) + } + } + impl<'a, T> $opt for &'a $id where T: Number + Into<$id> { + type Output = $id; + #[inline] + fn $opm(self, rhs: T) -> Self::Output { + let rhs: $id = rhs.into(); + $id(self.0 $op rhs.0) + } + } + } +} + +macro_rules! impl_newtype_arith { + ($($id:ident)*) => ($( + impl_newtype_arith_op!{$id, Add, add, +} + impl_newtype_arith_op!{$id, Div, div, /} + impl_newtype_arith_op!{$id, Mul, mul, *} + impl_newtype_arith_op!{$id, Sub, sub, -} + )*) +} + +impl_newtype_arith!{ Int Irr } +impl_newtype_arith_op!{Int, Rem, rem, %} +impl_newtype_arith_op!{Irr, Rem, rem, %} diff --git a/types/src/number/frac.rs b/types/src/number/frac.rs new file mode 100644 index 0000000..67aa476 --- /dev/null +++ b/types/src/number/frac.rs @@ -0,0 +1,197 @@ +/* types/src/number/frac.rs + * Eryn Wells + */ + +use std::any::Any; +use std::fmt; +use std::ops::{Add, Mul}; +use number::arith::GCD; +use number::{Int, Number}; +use object::{Obj, Object}; + +/// A fraction of two integers. +#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct Frac { + /// The numerator. + p: Int, + /// The denominator. + q: Int +} + +impl Frac { + pub fn new(p: Int, q: Int) -> Result { + if q.is_zero() { + // TODO: Return a more specific error about dividing by zero. + Err(()) + } else { + Ok(Frac{p, q}.reduced()) + } + } + + pub fn from_ints(p: i64, q: i64) -> Result { + Frac::new(Int(p), Int(q)) + } + + pub fn quotient(&self) -> f64 { + self.p.0 as f64 / self.q.0 as f64 + } + + fn reduced(self) -> Frac { + let gcd = self.p.gcd(self.q); + Frac { p: self.p / gcd, q: self.q / gcd } + } + + fn _add(self, rhs: Frac) -> Frac { + let p = self.p * rhs.q + rhs.p * self.q; + let q = self.q * rhs.q; + Frac{p,q}.reduced() + } + + fn _mul(self, rhs: Frac) -> Frac { + let p = self.p * rhs.p; + let q = self.q * rhs.q; + Frac{p,q}.reduced() + } +} + +impl Add for Frac { + type Output = Frac; + fn add(self, rhs: Self) -> Self::Output { + self._add(rhs) + } +} + +impl<'a> Add for &'a Frac { + type Output = Frac; + fn add(self, rhs: Frac) -> Self::Output { + self._add(rhs) + } +} + +impl<'a, 'b> Add<&'a Frac> for &'b Frac { + type Output = Frac; + fn add(self, rhs: &Frac) -> Self::Output { + self._add(*rhs) + } +} + +impl fmt::Display for Frac { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}/{}", self.p, self.q) + } +} + +impl From for Frac { + fn from(i: Int) -> Frac { + Frac{p: i, q: Int(1)} + } +} + +impl Mul for Frac { + type Output = Frac; + fn mul(self, rhs: Self) -> Self::Output { + self._mul(rhs) + } +} + +impl<'a> Mul for &'a Frac { + type Output = Frac; + fn mul(self, rhs: Frac) -> Self::Output { + self._mul(rhs) + } +} + +impl<'a, 'b> Mul<&'a Frac> for &'b Frac { + type Output = Frac; + fn mul(self, rhs: &Frac) -> Self::Output { + self._mul(*rhs) + } +} + +impl Number for Frac { + fn as_int(&self) -> Option { + if self.q == Int(1) { + Some(self.p) + } else { + None + } + } + + fn as_frac(&self) -> Option { Frac::new(self.p, self.q).ok() } + + fn is_zero(&self) -> bool { self.p.is_zero() } +} + +impl Object for Frac { + fn as_any(&self) -> &Any { self } + fn as_num(&self) -> Option<&Number> { Some(self) } +} + +impl PartialEq for Frac { + fn eq<'a>(&self, rhs: &'a Obj) -> bool { + match rhs.obj().and_then(Object::as_num) { + Some(num) => self == num, + None => false + } + } +} + +impl<'a> PartialEq for Frac { + fn eq(&self, rhs: &(Number + 'a)) -> bool { + match rhs.as_frac() { + Some(rhs) => *self == rhs, + None => false + } + } +} + +#[cfg(test)] +mod tests { + use number::Number; + use super::*; + + #[test] + fn fracs_with_zero_q_are_invalid() { + assert!(Frac::from_ints(3, 0).is_err()) + } + + #[test] + fn equal_fracs_are_equal() { + assert_eq!(Frac::from_ints(3, 2), Frac::from_ints(3, 2)); + assert_ne!(Frac::from_ints(12, 4), Frac::from_ints(9, 7)); + } + + #[test] + fn fracs_should_reduce_to_ints_where_possible() { + let fr = Frac::from_ints(3, 1).unwrap(); + assert_eq!(fr.as_int(), Some(Int(3))); + } + + #[test] + fn fracs_should_not_reduce_to_ints_where_impossible() { + let fr = Frac::from_ints(3, 2).unwrap(); + assert_eq!(fr.as_int(), None); + } + + #[test] + fn fracs_are_exact() { + let fr = Frac::from_ints(4, 2).unwrap(); + assert!(fr.is_exact()); + } + + #[test] + fn fracs_can_add() { + let a = Frac::from_ints(5, 6).unwrap(); + let b = Frac::from_ints(2, 3).unwrap(); + let r = Frac::from_ints(3, 2).unwrap(); + assert_eq!(a + b, r); + } + + #[test] + fn fracs_can_multiply() { + let a = Frac::from_ints(4, 3).unwrap(); + let b = Frac::from_ints(3, 8).unwrap(); + let r = Frac::from_ints(1, 2).unwrap(); + assert_eq!(a * b, r); + } +} diff --git a/types/src/number/integer.rs b/types/src/number/integer.rs index ab725b7..72e1fa5 100644 --- a/types/src/number/integer.rs +++ b/types/src/number/integer.rs @@ -3,24 +3,64 @@ */ use std::any::Any; -use number::Number; +use std::fmt; +use number::arith::{GCD, LCM}; use object::{Obj, Object}; +use super::{Frac, Number}; -pub type Int = i64; +#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct Int(pub i64); + +impl Int { + pub fn zero() -> Int { Int(0) } +} + +impl fmt::Display for Int { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl GCD for Int { + fn gcd(self, other: Int) -> Int { + let (mut a, mut b) = if self > other { + (self, other) + } else { + (other, self) + }; + while !b.is_zero() { + let r = a % b; + a = b; + b = r; + } + a + } +} + +impl LCM for Int { + fn lcm(self, other: Int) -> Int { + if self.0 == 0 && other.0 == 0 { + Int::zero() + } else { + self * other / self.gcd(other) + } + } +} impl Object for Int { fn as_any(&self) -> &Any { self } + fn as_num(&self) -> Option<&Number> { Some(self) } } impl Number for Int { - fn as_int(&self) -> Option<&Int> { Some(self) } + fn as_int(&self) -> Option { Some(*self) } + fn as_frac(&self) -> Option { Frac::new(*self, Int(1)).ok() } + fn is_zero(&self) -> bool { self.0 == 0 } } impl PartialEq for Int { fn eq<'a>(&self, rhs: &'a Obj) -> bool { - let obj: Option<&'a Object> = rhs.obj(); - let num: Option<&'a Number> = obj.and_then(Object::as_num); - match num { + match rhs.obj().and_then(Object::as_num) { Some(num) => self == num, None => false } @@ -30,7 +70,7 @@ impl PartialEq for Int { impl<'a> PartialEq for Int { fn eq(&self, rhs: &(Number + 'a)) -> bool { match rhs.as_int() { - Some(rhs) => *self == *rhs, + Some(rhs) => *self == rhs, None => false } } @@ -38,32 +78,56 @@ impl<'a> PartialEq for Int { #[cfg(test)] mod tests { - use super::Int; - use number::*; - use value::*; + use super::*; #[test] fn equal_integers_are_equal() { - assert_eq!(Integer(3), Integer(3)); - assert_ne!(Integer(12), Integer(9)); - assert_eq!(Integer(4).as_value(), Integer(4).as_value()); - assert_ne!(Integer(5).as_value(), Integer(7).as_value()); + assert_eq!(Int(3), Int(3)); + assert_ne!(Int(12), Int(9)); + assert_eq!(Obj::new(Int(3)), Obj::new(Int(3))); + assert_ne!(Obj::new(Int(3)), Obj::new(Int(4))); } #[test] fn integers_are_integers() { - assert!(Integer(4).is_complex()); - assert!(Integer(4).is_real()); - assert!(Integer(4).is_rational()); - assert!(Integer(4).is_integer()); - assert!(Integer(4).is_number()); - assert!(!Integer(6).is_char()); - assert!(!Integer(6).is_bool()); + assert_eq!(Int(4).as_bool(), None); } #[test] fn integers_are_exact() { - assert!(Integer(4).is_exact()); - assert!(!Integer(4).is_inexact()); + assert!(Int(4).is_exact()); + } + + #[test] + fn integers_add() { + assert_eq!(Int(4) + Int(8), Int(12)); + } + + #[test] + fn integers_multiply() { + assert_eq!(Int(4) * Int(5), Int(20)); + } + + #[test] + fn integer_modulo_divide() { + assert_eq!(Int(20) % Int(5), Int(0)); + assert_eq!(Int(20) % Int(6), Int(2)); + } + + #[test] + fn finding_int_gcd() { + assert_eq!(Int(0), Int(0).gcd(Int(0))); + assert_eq!(Int(10), Int(10).gcd(Int(0))); + assert_eq!(Int(10), Int(0).gcd(Int(10))); + assert_eq!(Int(10), Int(10).gcd(Int(20))); + assert_eq!(Int(44), Int(2024).gcd(Int(748))); + } + + #[test] + fn finding_int_lcm() { + assert_eq!(Int(0), Int(0).lcm(Int(0))); + assert_eq!(Int(0), Int(10).lcm(Int(0))); + assert_eq!(Int(0), Int(10).lcm(Int(0))); + assert_eq!(Int(42), Int(21).lcm(Int(6))); } } diff --git a/types/src/number/irr.rs b/types/src/number/irr.rs new file mode 100644 index 0000000..be19b8a --- /dev/null +++ b/types/src/number/irr.rs @@ -0,0 +1,72 @@ +/* types/src/number/irr.rs + * Eryn Wells + */ + +use std::any::Any; +use std::fmt; +use number::{Frac, Int, Number}; +use object::{Obj, Object}; + +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct Irr(pub f64); + +impl Irr { + pub fn zero() -> Irr { Irr(0.0) } +} + +impl fmt::Display for Irr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for Irr { + fn from(i: Int) -> Irr { Irr(i.0 as f64) } +} + +impl From for Irr { + fn from(f: Frac) -> Irr { + Irr(f.quotient()) + } +} + +impl Number for Irr { + fn as_int(&self) -> Option { + if self.0.trunc() == self.0 { + Some(Int(self.0.trunc() as i64)) + } else { + None + } + } + + fn as_frac(&self) -> Option { + if !self.0.is_infinite() && !self.0.is_nan() { + // TODO + None + } else { + None + } + } + + fn is_zero(&self) -> bool { self.0 == 0.0 } +} + +impl Object for Irr { + fn as_any(&self) -> &Any { self } + fn as_num(&self) -> Option<&Number> { Some(self) } +} + +impl PartialEq for Irr { + fn eq<'a>(&self, rhs: &'a Obj) -> bool { + match rhs.obj().and_then(Object::as_num) { + Some(num) => self == num, + None => false + } + } +} + +impl<'a> PartialEq for Irr { + fn eq(&self, rhs: &(Number + 'a)) -> bool { + false + } +} diff --git a/types/src/number/math.rs b/types/src/number/math.rs deleted file mode 100644 index a5ef5b7..0000000 --- a/types/src/number/math.rs +++ /dev/null @@ -1,95 +0,0 @@ -/* types/src/number/math.rs - * Eryn Wells - */ - -use number::{Int, Flt}; - -pub trait GCD { - /// Find the greatest common divisor of `self` and another number. - fn gcd(self, other: Self) -> Self; -} - -pub trait LCM { - /// Find the least common multiple of `self` and another number. - fn lcm(self, other: Self) -> Self; -} - -pub trait Rational { - /// Convert `self` into a rational number -- the quotient of two whole numbers. - fn to_rational(self) -> (Int, Int); -} - -impl GCD for Int { - fn gcd(self, other: Int) -> Int { - let (mut a, mut b) = if self > other { - (self, other) - } else { - (other, self) - }; - - while b != 0 { - let r = a % b; - a = b; - b = r; - } - - a - } -} - -impl LCM for Int { - fn lcm(self, other: Int) -> Int { - if self == 0 && other == 0 { - 0 - } - else { - self * other / self.gcd(other) - } - } -} - -impl Rational for Int { - fn to_rational(self) -> (Int, Int) { (self, 1) } -} - -impl Rational for Flt { - fn to_rational(self) -> (Int, Int) { - // Convert the float to a fraction by iteratively multiplying by 10 until the fractional part of the float is 0.0. - let whole_part = self.trunc(); - let mut p = self.fract(); - let mut q = 1.0; - while p.fract() != 0.0 { - p *= 10.0; - q *= 10.0; - } - p += whole_part * q; - - // Integers from here down. Reduce the fraction before returning. - let p = p as Int; - let q = q as Int; - let gcd = p.gcd(q); - (p / gcd, q / gcd) - } -} - -#[cfg(test)] -mod tests { - use super::{LCM, GCD}; - - #[test] - fn gcd_works() { - assert_eq!(0, 0.gcd(0)); - assert_eq!(10, 10.gcd(0)); - assert_eq!(10, 0.gcd(10)); - assert_eq!(10, 10.gcd(20)); - assert_eq!(44, 2024.gcd(748)); - } - - #[test] - fn lcm_works() { - assert_eq!(0, 0.lcm(0)); - assert_eq!(0, 10.lcm(0)); - assert_eq!(0, 10.lcm(0)); - assert_eq!(42, 21.lcm(6)); - } -} diff --git a/types/src/number/mod.rs b/types/src/number/mod.rs index 122214e..afacf0f 100644 --- a/types/src/number/mod.rs +++ b/types/src/number/mod.rs @@ -2,109 +2,35 @@ * Eryn Wells */ -/// # Numbers -/// -/// Scheme numbers are complex, literally. +//! # Numbers +//! +//! Scheme numbers are complex, literally. The model it uses is a hierarchy of types called the +//! Number Tower. It consists of four types, in order: Integers, Rationals (or Fractionals), +//! Irrationals (or Reals), and Complex Numbers. Each type going down the tower can be +//! unequivocally cast to the type below it, but the reverse is not necessarily true. So, an +//! Integer can be cast as a Rational (by putting its value over 1), but a Rational like 1/2 cannot +//! be represented as an Integer. -mod integer; - -use std::fmt; use object::Object; +mod arith; +mod frac; +mod integer; +mod irr; + +pub use self::frac::Frac; pub use self::integer::Int; +pub use self::irr::Irr; pub trait Number: - Object + Object { - fn as_int(&self) -> Option<&Int> { None } + /// Cast this Number to an Int if possible. + fn as_int(&self) -> Option { None } + /// Cast this Number to a Frac if possible. + fn as_frac(&self) -> Option { None } + /// Return `true` if this Number is an exact representation of its value. + fn is_exact(&self) -> bool { true } + /// Return `true` if this Number is equal to 0. + fn is_zero(&self) -> bool; } - -#[derive(Debug, Eq, PartialEq)] -pub enum Exact { Yes, No } - -impl fmt::Display for Exact { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", match *self { - Exact::Yes => "#e", - Exact::No => "#i", - }) - } -} - -// TODO: Implement PartialEq myself cause there are some weird nuances to comparing numbers. -//#[derive(Debug, PartialEq)] -//pub struct Number { -// real: Real, -// imag: Option, -// exact: Exact, -//} - -//impl Number { -// fn new(real: Real, imag: Option, exact: Exact) -> Number { -// Number { -// real: real.reduce(), -// imag: imag.map(|n| n.reduce()), -// exact: exact, -// } -// } -// -// pub fn from_int(value: Int, exact: Exact) -> Number { -// Number::new(Real::Integer(value), None, exact) -// } -// -// pub fn from_quotient(p: Int, q: Int, exact: Exact) -> Number { -// let real = if exact == Exact::Yes { -// // Make an exact rational an integer if possible. -// Real::Rational(p, q).demote() -// } -// else { -// // Make an inexact rational an irrational. -// Real::Rational(p, q).promote_once() -// }; -// Number::new(real, None, exact) -// } -// -// pub fn from_float(value: Flt, exact: Exact) -> Number { -// let real = if exact == Exact::Yes { -// // Attempt to demote irrationals. -// Real::Irrational(value).demote() -// } -// else { -// Real::Irrational(value) -// }; -// Number::new(real, None, exact) -// } -// -// pub fn is_exact(&self) -> bool { -// match self.exact { -// Exact::Yes => true, -// Exact::No => false, -// } -// } -//} -// -//impl fmt::Display for Number { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!(f, "{}", self.real).and_then( -// |r| self.imag.map(|i| write!(f, "{:+}i", i)).unwrap_or(Ok(r))) -// } -//} -// -//#[cfg(test)] -//mod tests { -// use super::Exact; -// use super::Number; -// use super::real::Real; -// -// #[test] -// fn exact_numbers_are_exact() { -// assert!(Number::from_int(3, Exact::Yes).is_exact()); -// assert!(!Number::from_int(3, Exact::No).is_exact()); -// } -// -// #[test] -// fn exact_irrationals_are_reduced() { -// let real = Real::Rational(3, 2); -// assert_eq!(Number::from_float(1.5, Exact::Yes), Number::new(real, None, Exact::Yes)); -// } -//} diff --git a/types/src/number/rational.rs b/types/src/number/rational.rs deleted file mode 100644 index 0ddbef2..0000000 --- a/types/src/number/rational.rs +++ /dev/null @@ -1,90 +0,0 @@ -/* types/src/number/rational.rs - * Eryn Wells - */ - -use std::any::Any; -use value::*; -use super::*; - -#[derive(Debug, Eq, PartialEq)] -pub struct Rational(pub Int, pub Int); - -impl Number for Rational { - fn convert_down(&self) -> Option> { - if self.1 == 1 { - Some(Box::new(Integer(self.0))) - } - else { - None - } - } - - fn is_exact(&self) -> bool { true } -} - -impl Value for Rational { - fn as_value(&self) -> &Value { self } -} - -impl IsBool for Rational { } -impl IsChar for Rational { } - -impl IsNumber for Rational { - fn is_rational(&self) -> bool { true } -} - -impl ValueEq for Rational { - fn eq(&self, other: &Value) -> bool { - other.as_any().downcast_ref::().map_or(false, |x| x == self) - } - - fn as_any(&self) -> &Any { self } -} - -#[cfg(test)] -mod tests { - use std::ops::Deref; - use number::*; - use value::*; - - #[test] - fn equal_rationals_are_equal() { - assert_eq!(Rational(3, 2), Rational(3, 2)); - assert_ne!(Rational(12, 4), Rational(9, 7)); - assert_eq!(Rational(4, 5).as_value(), Rational(4, 5).as_value()); - assert_ne!(Rational(5, 6).as_value(), Rational(7, 6).as_value()); - } - - #[test] - fn rationals_are_rationals() { - assert!(Rational(4, 3).is_complex()); - assert!(Rational(4, 3).is_real()); - assert!(Rational(4, 3).is_rational()); - assert!(!Rational(4, 3).is_integer()); - assert!(Rational(4, 3).is_number()); - assert!(!Rational(6, 8).is_char()); - assert!(!Rational(6, 9).is_bool()); - } - - #[test] - fn rationals_should_reduce_to_integers_where_possible() { - let rational_as_integer = Rational(3, 1).convert_down(); - assert!(rational_as_integer.is_some()); - // Oh my god this line is so dumb. - let rational_as_integer = rational_as_integer.unwrap(); - let rational_as_integer = rational_as_integer.as_value(); - assert_eq!(rational_as_integer.deref(), Integer(3).as_value()); - } - - #[test] - fn rationals_should_not_reduce_to_integers_where_impossible() { - let rational_as_integer = Rational(3, 2).convert_down(); - assert!(rational_as_integer.is_none()); - } - - #[test] - fn rationals_are_exact() { - assert!(Rational(4, 2).is_exact()); - assert!(!Rational(4, 2).is_inexact()); - } -} diff --git a/types/src/object.rs b/types/src/object.rs index 584421a..3680ef7 100644 --- a/types/src/object.rs +++ b/types/src/object.rs @@ -188,8 +188,6 @@ impl PartialEq for Obj { #[cfg(test)] mod tests { - use super::Obj; - // #[test] // fn display_bools() { // assert_eq!(format!("{}", Object::Bool(true)), "#t"); diff --git a/types/src/pair.rs b/types/src/pair.rs index 28e4565..3927e69 100644 --- a/types/src/pair.rs +++ b/types/src/pair.rs @@ -4,13 +4,12 @@ use std::any::Any; use std::fmt; -use super::*; -use object::Object; +use object::{Obj, Object}; #[derive(Debug, PartialEq)] pub struct Pair { pub car: Obj, - pub cdr: Obj + pub cdr: Obj, } impl Pair { @@ -72,11 +71,46 @@ impl PartialEq for Pair { #[cfg(test)] mod tests { use super::Pair; + use object::Obj; + use sym::Sym; #[test] - fn empty_pairs_are_equal() { + fn eq_empty_pairs() { let a = Pair::empty(); let b = Pair::empty(); assert_eq!(a, b); } + + #[test] + fn display_empty_pair() { + let a = Pair::empty(); + let disp = format!("{}", a); + assert_eq!(disp, "(())"); + } + + #[test] + fn display_single_element_pair() { + let a = Pair::with_car(Obj::new(Sym::new("abc".to_string()))); + let disp = format!("{}", a); + assert_eq!(disp, "(abc)"); + } + + #[test] + fn display_dotted_pair() { + let car = Obj::new(Sym::new("abc".to_string())); + let cdr = Obj::new(Sym::new("def".to_string())); + let p = Pair::new(car, cdr); + let disp = format!("{}", p); + assert_eq!(disp, "(abc . def)"); + } + + #[test] + fn display_long_dotted_pair() { + let a = Obj::new(Sym::new("abc".to_string())); + let d = Obj::new(Sym::new("def".to_string())); + let g = Obj::new(Sym::new("ghi".to_string())); + let p = Pair::new(a, Obj::new(Pair::new(d, g))); + let disp = format!("{}", p); + assert_eq!(disp, "(abc def . ghi)"); + } } diff --git a/types/src/sym.rs b/types/src/sym.rs index c318e08..1310ccd 100644 --- a/types/src/sym.rs +++ b/types/src/sym.rs @@ -54,7 +54,7 @@ mod tests { use super::Sym; #[test] - fn syms_with_the_same_name_are_equal() { + fn eq_syms_with_same_name() { let a = Sym::with_str("abc"); let b = Sym::with_str("abc"); assert_eq!(a, b);