diff --git a/Sibil/Lexer.swift b/Sibil/Lexer.swift index eec7719..b3b9788 100644 --- a/Sibil/Lexer.swift +++ b/Sibil/Lexer.swift @@ -40,6 +40,12 @@ class Lexer { extension Lexer: Sequence, IteratorProtocol { typealias Element = Token + private enum State { + case Initial + case Identifier + case Emit + } + func makeIterator() -> Lexer { return self } @@ -49,19 +55,84 @@ extension Lexer: Sequence, IteratorProtocol { return nil } + var state = State.Initial var token: Token? - while token == nil { + var forward = index + + let toState = { (nextState: State) in + state = nextState + } + + let retract = { + forward = self.input.index(before: forward) + } + + let advance = { + forward = self.input.index(after: forward) + } + + let emit = { (kind: Token.Kind) in + let valueRange = Range(uncheckedBounds: (lower: self.index, upper: forward)) + let value = self.input.substring(with: valueRange) + token = Token(kind: kind, value: value) + toState(.Emit) + } + + while state != .Emit { let c = input[index] - switch c { - case "(": - token = Token(kind: .LeftParen, value: String(c)) - case ")": - token = Token(kind: .RightParen, value: String(c)) - default: + switch state { + case .Initial: + if c.isLeftParen { + + } + else if c.isRightParen { + + } + else if c.isIdentifierInitial { + advance() + toState(.Identifier) + } + case .Identifier: + if c.isIdentifierSubsequent + case .Emit: + // Nothing to do for this state break } - index = input.index(after: index) } + return token } } + +extension Character { + static let identifierInitialSet: CharacterSet = { + let letters = CharacterSet.letters + let extras = CharacterSet(charactersIn: "!$%&*/:<=>?~_^") + let initials = letters.union(extras) + return initials + }() + + static let identifierSubsequentSet: CharacterSet = { + let initials = Character.identifierInitialSet + let digits = CharacterSet.decimalDigits + let extras = CharacterSet(charactersIn: ".+-") + let subsequents = initials.union(digits).union(extras) + return subsequents + }() + + var isLeftParen: Bool { + return self == "(" + } + + var isRightParen: Bool { + return self == ")" + } + + var isIdentifierInitial: Bool { + return false + } + + var isIdentifierSubsequent: Bool { + Character.identifierSubsequentSet.contains(<#T##member: UnicodeScalar##UnicodeScalar#>) + } +}