Move all the lexer stuff to a module directory
This commit is contained in:
parent
d333819dee
commit
d4dee92904
5 changed files with 97 additions and 92 deletions
31
src/lexer/char.rs
Normal file
31
src/lexer/char.rs
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
/* char.rs
|
||||||
|
* Eryn Wells <eryn@erynwells.me>
|
||||||
|
*/
|
||||||
|
|
||||||
|
use lexer::charset;
|
||||||
|
|
||||||
|
pub trait Lexable {
|
||||||
|
fn is_left_paren(&self) -> bool;
|
||||||
|
fn is_right_paren(&self) -> bool;
|
||||||
|
fn is_identifier_initial(&self) -> bool;
|
||||||
|
fn is_identifier_subsequent(&self) -> bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Lexable for char {
|
||||||
|
fn is_left_paren(&self) -> bool {
|
||||||
|
self == &'('
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_right_paren(&self) -> bool {
|
||||||
|
self == &')'
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_identifier_initial(&self) -> bool {
|
||||||
|
charset::identifier_initials().contains(&self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_identifier_subsequent(&self) -> bool {
|
||||||
|
charset::identifier_subsequents().contains(&self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
43
src/lexer/charset.rs
Normal file
43
src/lexer/charset.rs
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
/// Character Sets
|
||||||
|
///
|
||||||
|
/// Sets of characters valid for making up tokens.
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
|
pub type CharSet = HashSet<char>;
|
||||||
|
|
||||||
|
// TODO: Use std::sync::Once for these sets?
|
||||||
|
// https://doc.rust-lang.org/beta/std/sync/struct.Once.html
|
||||||
|
|
||||||
|
fn ascii_letters() -> CharSet {
|
||||||
|
let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".chars();
|
||||||
|
CharSet::from_iter(letters)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ascii_digits() -> CharSet {
|
||||||
|
let digits = "1234567890".chars();
|
||||||
|
CharSet::from_iter(digits)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A set of all characters allowed to start Scheme identifiers.
|
||||||
|
pub fn identifier_initials() -> CharSet {
|
||||||
|
let letters = ascii_letters();
|
||||||
|
let extras = CharSet::from_iter("!$%&*/:<=>?~_^".chars());
|
||||||
|
let mut initials = CharSet::new();
|
||||||
|
initials.extend(letters.iter());
|
||||||
|
initials.extend(extras.iter());
|
||||||
|
initials
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A set of all characters allowed to follow an identifier initial.
|
||||||
|
pub fn identifier_subsequents() -> CharSet {
|
||||||
|
let initials = identifier_initials();
|
||||||
|
let digits = ascii_digits();
|
||||||
|
let extras = CharSet::from_iter(".+-".chars());
|
||||||
|
let mut subsequents = CharSet::new();
|
||||||
|
subsequents.extend(initials.iter());
|
||||||
|
subsequents.extend(digits.iter());
|
||||||
|
subsequents.extend(extras.iter());
|
||||||
|
subsequents
|
||||||
|
}
|
||||||
|
|
@ -1,11 +1,15 @@
|
||||||
//! # Lexer
|
//! # Lexer
|
||||||
|
|
||||||
pub mod token;
|
pub mod token;
|
||||||
|
mod char;
|
||||||
|
mod charset;
|
||||||
|
mod str;
|
||||||
|
|
||||||
use characters;
|
use self::char::Lexable;
|
||||||
use characters::CharAt;
|
use self::str::CharAt;
|
||||||
use characters::Lexable;
|
use self::str::RelativeIndexable;
|
||||||
use characters::RelativeIndexable;
|
use self::token::Token;
|
||||||
|
use self::token::Kind;
|
||||||
|
|
||||||
enum State {
|
enum State {
|
||||||
Initial,
|
Initial,
|
||||||
|
|
|
||||||
|
|
@ -1,79 +1,6 @@
|
||||||
//! Characters
|
/* str.rs
|
||||||
//!
|
* Eryn Wells <eryn@erynwells.me>
|
||||||
//! Utilities for dealing with chars of various sorts.
|
*/
|
||||||
|
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::iter::FromIterator;
|
|
||||||
|
|
||||||
pub type CharSet = HashSet<char>;
|
|
||||||
|
|
||||||
// TODO: Use std::sync::Once for these sets?
|
|
||||||
// https://doc.rust-lang.org/beta/std/sync/struct.Once.html
|
|
||||||
|
|
||||||
fn ascii_letters() -> CharSet {
|
|
||||||
let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".chars();
|
|
||||||
CharSet::from_iter(letters)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn ascii_digits() -> CharSet {
|
|
||||||
let digits = "1234567890".chars();
|
|
||||||
CharSet::from_iter(digits)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A set of all characters allowed to start Scheme identifiers.
|
|
||||||
pub fn identifier_initials() -> CharSet {
|
|
||||||
let letters = ascii_letters();
|
|
||||||
let extras = CharSet::from_iter("!$%&*/:<=>?~_^".chars());
|
|
||||||
let mut initials = CharSet::new();
|
|
||||||
initials.extend(letters.iter());
|
|
||||||
initials.extend(extras.iter());
|
|
||||||
initials
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A set of all characters allowed to follow an identifier initial.
|
|
||||||
pub fn identifier_subsequents() -> CharSet {
|
|
||||||
let initials = identifier_initials();
|
|
||||||
let digits = ascii_digits();
|
|
||||||
let extras = CharSet::from_iter(".+-".chars());
|
|
||||||
let mut subsequents = CharSet::new();
|
|
||||||
subsequents.extend(initials.iter());
|
|
||||||
subsequents.extend(digits.iter());
|
|
||||||
subsequents.extend(extras.iter());
|
|
||||||
subsequents
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// char
|
|
||||||
//
|
|
||||||
|
|
||||||
pub trait Lexable {
|
|
||||||
fn is_left_paren(&self) -> bool;
|
|
||||||
fn is_right_paren(&self) -> bool;
|
|
||||||
fn is_identifier_initial(&self) -> bool;
|
|
||||||
fn is_identifier_subsequent(&self) -> bool;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Lexable for char {
|
|
||||||
fn is_left_paren(&self) -> bool {
|
|
||||||
self == &'('
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_right_paren(&self) -> bool {
|
|
||||||
self == &')'
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_identifier_initial(&self) -> bool {
|
|
||||||
identifier_initials().contains(&self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_identifier_subsequent(&self) -> bool {
|
|
||||||
identifier_subsequents().contains(&self)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// str and String
|
|
||||||
//
|
|
||||||
|
|
||||||
pub trait RelativeIndexable {
|
pub trait RelativeIndexable {
|
||||||
/// Get the index of the character boundary preceding the given index. The index does not need to be on a character
|
/// Get the index of the character boundary preceding the given index. The index does not need to be on a character
|
||||||
|
|
@ -124,6 +51,17 @@ impl RelativeIndexable for str {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl CharAt for str {
|
||||||
|
fn char_at(&self, index: usize) -> Option<char> {
|
||||||
|
if !self.is_char_boundary(index) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let end = self.index_after(index);
|
||||||
|
let char_str = &self[index .. end];
|
||||||
|
char_str.chars().nth(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn index_before_is_well_behaved_for_ascii() {
|
fn index_before_is_well_behaved_for_ascii() {
|
||||||
let s = "abc";
|
let s = "abc";
|
||||||
|
|
@ -158,14 +96,3 @@ fn index_after_is_well_behaved_for_ascii() {
|
||||||
assert!(s.is_char_boundary(idx));
|
assert!(s.is_char_boundary(idx));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CharAt for str {
|
|
||||||
fn char_at(&self, index: usize) -> Option<char> {
|
|
||||||
if !self.is_char_boundary(index) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
let end = self.index_after(index);
|
|
||||||
let char_str = &self[index .. end];
|
|
||||||
char_str.chars().nth(0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -14,7 +14,7 @@ pub struct Token {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
fn new(kind: Kind, value: String) -> Token {
|
pub fn new(kind: Kind, value: String) -> Token {
|
||||||
Token { kind: kind, value: value, }
|
Token { kind: kind, value: value, }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue