Fiddling with state, I dunno

This commit is contained in:
Eryn Wells 2016-12-17 10:04:35 -08:00
parent aed0d3e43d
commit 35a5cf1dc8

View file

@ -40,6 +40,12 @@ class Lexer {
extension Lexer: Sequence, IteratorProtocol {
typealias Element = Token
private enum State {
case Initial
case Identifier
case Emit
}
func makeIterator() -> Lexer {
return self
}
@ -49,19 +55,84 @@ extension Lexer: Sequence, IteratorProtocol {
return nil
}
var state = State.Initial
var token: Token?
while token == nil {
var forward = index
let toState = { (nextState: State) in
state = nextState
}
let retract = {
forward = self.input.index(before: forward)
}
let advance = {
forward = self.input.index(after: forward)
}
let emit = { (kind: Token.Kind) in
let valueRange = Range(uncheckedBounds: (lower: self.index, upper: forward))
let value = self.input.substring(with: valueRange)
token = Token(kind: kind, value: value)
toState(.Emit)
}
while state != .Emit {
let c = input[index]
switch c {
case "(":
token = Token(kind: .LeftParen, value: String(c))
case ")":
token = Token(kind: .RightParen, value: String(c))
default:
switch state {
case .Initial:
if c.isLeftParen {
}
else if c.isRightParen {
}
else if c.isIdentifierInitial {
advance()
toState(.Identifier)
}
case .Identifier:
if c.isIdentifierSubsequent
case .Emit:
// Nothing to do for this state
break
}
index = input.index(after: index)
}
return token
}
}
extension Character {
static let identifierInitialSet: CharacterSet = {
let letters = CharacterSet.letters
let extras = CharacterSet(charactersIn: "!$%&*/:<=>?~_^")
let initials = letters.union(extras)
return initials
}()
static let identifierSubsequentSet: CharacterSet = {
let initials = Character.identifierInitialSet
let digits = CharacterSet.decimalDigits
let extras = CharacterSet(charactersIn: ".+-")
let subsequents = initials.union(digits).union(extras)
return subsequents
}()
var isLeftParen: Bool {
return self == "("
}
var isRightParen: Bool {
return self == ")"
}
var isIdentifierInitial: Bool {
return false
}
var isIdentifierSubsequent: Bool {
Character.identifierSubsequentSet.contains(<#T##member: UnicodeScalar##UnicodeScalar#>)
}
}