2016-12-20 17:38:44 -08:00
|
|
|
//! Characters
|
|
|
|
//!
|
|
|
|
//! Utilities for dealing with chars of various sorts.
|
|
|
|
|
|
|
|
use std::collections::HashSet;
|
|
|
|
use std::iter::FromIterator;
|
|
|
|
|
|
|
|
pub type CharSet = HashSet<char>;
|
|
|
|
|
2016-12-22 09:24:53 -08:00
|
|
|
// TODO: Use std::sync::Once for these sets?
|
|
|
|
// https://doc.rust-lang.org/beta/std/sync/struct.Once.html
|
|
|
|
|
2016-12-20 17:51:43 -08:00
|
|
|
fn ascii_letters() -> CharSet {
|
2016-12-20 17:38:44 -08:00
|
|
|
let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".chars();
|
2016-12-20 17:51:13 -08:00
|
|
|
CharSet::from_iter(letters)
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
|
2016-12-20 17:51:43 -08:00
|
|
|
fn ascii_digits() -> CharSet {
|
2016-12-20 17:38:44 -08:00
|
|
|
let digits = "1234567890".chars();
|
2016-12-20 17:51:13 -08:00
|
|
|
CharSet::from_iter(digits)
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
|
2016-12-22 09:24:53 -08:00
|
|
|
/// A set of all characters allowed to start Scheme identifiers.
|
2016-12-20 17:38:44 -08:00
|
|
|
pub fn identifier_initials() -> CharSet {
|
|
|
|
let letters = ascii_letters();
|
|
|
|
let extras = CharSet::from_iter("!$%&*/:<=>?~_^".chars());
|
|
|
|
let mut initials = CharSet::new();
|
|
|
|
initials.extend(letters.iter());
|
|
|
|
initials.extend(extras.iter());
|
2016-12-20 17:51:13 -08:00
|
|
|
initials
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
|
2016-12-22 09:24:53 -08:00
|
|
|
/// A set of all characters allowed to follow an identifier initial.
|
2016-12-20 17:38:44 -08:00
|
|
|
pub fn identifier_subsequents() -> CharSet {
|
|
|
|
let initials = identifier_initials();
|
|
|
|
let digits = ascii_digits();
|
|
|
|
let extras = CharSet::from_iter(".+-".chars());
|
|
|
|
let mut subsequents = CharSet::new();
|
|
|
|
subsequents.extend(initials.iter());
|
|
|
|
subsequents.extend(digits.iter());
|
|
|
|
subsequents.extend(extras.iter());
|
2016-12-20 17:51:13 -08:00
|
|
|
subsequents
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
|
2016-12-23 17:38:33 -07:00
|
|
|
//
|
|
|
|
// char
|
|
|
|
//
|
|
|
|
|
|
|
|
pub trait Lexable {
|
|
|
|
fn is_left_paren(&self) -> bool;
|
|
|
|
fn is_right_paren(&self) -> bool;
|
|
|
|
fn is_identifier_initial(&self) -> bool;
|
|
|
|
fn is_identifier_subsequent(&self) -> bool;
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Lexable for char {
|
|
|
|
fn is_left_paren(&self) -> bool {
|
|
|
|
self == &'('
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_right_paren(&self) -> bool {
|
|
|
|
self == &')'
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_identifier_initial(&self) -> bool {
|
|
|
|
identifier_initials().contains(&self)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_identifier_subsequent(&self) -> bool {
|
|
|
|
identifier_subsequents().contains(&self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// str and String
|
|
|
|
//
|
|
|
|
|
2016-12-20 17:38:44 -08:00
|
|
|
pub trait RelativeIndexable {
|
2016-12-22 09:25:10 -08:00
|
|
|
fn index_before(&self, &usize) -> Option<usize>;
|
|
|
|
fn index_after(&self, &usize) -> Option<usize>;
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl RelativeIndexable for str {
|
2016-12-22 09:25:10 -08:00
|
|
|
fn index_before(&self, index: &usize) -> Option<usize> {
|
2016-12-20 17:38:44 -08:00
|
|
|
let mut prev_index = index - 1;
|
|
|
|
if prev_index <= 0 {
|
2016-12-22 09:25:10 -08:00
|
|
|
return None;
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
while !self.is_char_boundary(prev_index) {
|
|
|
|
prev_index -= 1;
|
|
|
|
}
|
2016-12-22 09:25:10 -08:00
|
|
|
Some(prev_index)
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
|
2016-12-22 09:25:10 -08:00
|
|
|
fn index_after(&self, index: &usize) -> Option<usize> {
|
2016-12-20 17:38:44 -08:00
|
|
|
let mut next_index = index + 1;
|
2016-12-20 17:52:00 -08:00
|
|
|
if next_index >= self.len() {
|
2016-12-22 09:25:10 -08:00
|
|
|
return None;
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
while !self.is_char_boundary(next_index) {
|
|
|
|
next_index += 1;
|
|
|
|
}
|
2016-12-22 09:25:10 -08:00
|
|
|
Some(next_index)
|
2016-12-20 17:38:44 -08:00
|
|
|
}
|
|
|
|
}
|