Let's start by defining the rules of production and post-production, in which all elements are implemented with Swift.
Production and Post-production rules, context-free grammar
::= <> Backus-Naur Form
Context Free <A> ::= 0 | <A><B>
A => 0
A => AB
Is a set of production rules that describe all posible strings,
defined as simple replacements
::= "Separate replacement symbols"
Terminal -> Simple replacement, only one rule
<NON TERMINAL> -> Multiple replacement, set of rules
with context free (position of symbols in text)
<letter> ::= A-Z,a-z
<var> ::= [<letter>]
<var> ::= <E>
<Number> ::= <Decimal>
<Operator> ::= + | - | * | /
<Decimal> ::= 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | <Decimal <Decimal>
<E> ::= <Number> | <Number> <Operator> <E> | (<E>)
Recursive Decent Parsing (no left recursion)
Parser without backtracking, allows to decide which production to use
by examining only the next n tokens of input.
Utilities
This set of helpers are mainly for the node
Helpers.swift
import Foundation
var identifiers: [String: Float] = [:]
extension Float: Node {
public func interpret() throws -> Float {
return self
}
}
extension String: Node {
public func interpret() throws -> Float {
guard let value = identifiers[self] else {
throw Parser.Error.undefined(self)
}
return value
}
}
extension String {
func getPrefix(regex: String) -> String? {
let expression = try! NSRegularExpression(pattern: "^\(regex)", options: [])
let range = expression.rangeOfFirstMatch(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count))
if range.location == 0 {
return (self as NSString).substring(with: range)
}
return nil
}
mutating func trimLeadingWhitespace() {
let i = startIndex
while i < endIndex {
guard CharacterSet.whitespacesAndNewlines.contains(self[i].unicodeScalars.first!) else {
return
}
self.remove(at: i)
}
}
}
Token
Each token represents a pattern within the chain of characters, let it be data types, symbols, identifiers, etc.
Token.swift
import Foundation
public enum Token {
case `var`
case equals
case op(String)
case number(Int)
case parenthesesOpen
case parenthesesClose
case identifier(String)
typealias Generator = (String) -> Token?
static var generators: [String: Generator] {
return [
"\\=": { _ in .equals },
"\\+|\\-|\\*|\\/": { .op($0) },
"\\(": { _ in .parenthesesOpen },
"\\)": { _ in .parenthesesClose },
"\\-?([1-9][0-9]*)": { .number(Int($0)!) },
"[a-zA-Z][a-zA-Z_0-9]*": {
guard $0 != "var" else { return .var }
return .identifier($0)
},
//"\\-?([1-9]*\\.[0-9]+|[0-9]+)": { .number(Float($0)!) } //FLOAT
]
}
}
Lexer
Process the input String that it receives as argument and outputs a collection of tokens.
Lexer.swift
import Foundation
public class Lexer {
public let tokens: [Token]
public init(code: String) {
var code = code
code.trimLeadingWhitespace()
var tokens: [Token] = []
while let next = Lexer.getNextPrefix(code: code) {
let (regex, prefix) = next
code = String(code[prefix.endIndex...])
code.trimLeadingWhitespace()
guard let generator = Token.generators[regex],
let token = generator(prefix) else {
fatalError()
}
tokens.append(token)
}
self.tokens = tokens
}
public static func getNextPrefix(code: String) -> (regex: String, prefix: String)? {
let keyValue = Token.generators
.first(where: { regex, generator in
code.getPrefix(regex: regex) != nil
})
guard let regex = keyValue?.key,
keyValue?.value != nil else {
return nil
}
return (regex, code.getPrefix(regex: regex)!)
}
}
Parser
From the collection of tokens returned by the Lexer the Parser generates a tree which holds a syntactic structure of the source code of the programming language. At this level is possible to return information about any errors that may be found.
Parser.swift
import Foundation
public class Parser {
enum Error: Swift.Error {
case expected(String)
case undefined(String)
case expectedNumber
case expectedOperator
case expectedIdentifier
case expectedExpression
}
var index = 0
let tokens: [Token]
var canPop: Bool {
return index < tokens.count
}
public init(tokens: [Token]) {
self.tokens = tokens
}
func peek() -> Token {
return tokens[index]
}
func peekPrecedence() throws -> Int {
guard canPop, case let .op(op) = peek() else {
return -1
}
switch op {
case "+", "-": return 1
case "*", "/": return 2
default: return 0
}
}
func popToken() -> Token {
let token = tokens[index]
index += 1
return token
}
func parseNumber() throws -> Float {
guard case let .number(float) = popToken() else {
throw Error.expectedNumber
}
return Float(float)
}
func parseParentheses() throws -> Node {
guard case .parenthesesOpen = popToken() else {
throw Error.expected("(")
}
let expressionNode = try parseExpression()
guard case .parenthesesClose = popToken() else {
throw Error.expected(")")
}
return expressionNode
}
func parseIdentifier() throws -> Node {
guard case let .identifier(name) = popToken() else {
throw Error.expectedIdentifier
}
return name
}
func parseValue() throws -> Node {
switch (peek()) {
case .number:
return try parseNumber()
case .identifier:
return try parseIdentifier()
case .parenthesesOpen:
return try parseParentheses()
default:
throw Error.expected("<Expression> not valid")
}
}
public func parseExpression() throws -> Node {
guard canPop else { throw Error.expectedExpression }
let node = try parseValue()
return try ast(node: node)
}
func parseVariableDeclaration() throws -> Node {
guard case .var = popToken() else {
throw Error.expected("\"var\" in variable declaration")
}
guard case let .identifier(name) = popToken() else {
throw Error.expectedIdentifier
}
guard case .equals = popToken() else {
throw Error.expected("= not found for variable <\(name)>")
}
let expression = try parseExpression()
return VariableDeclaration(name: name, value: expression)
}
func parse() throws -> Node {
var nodes: [Node] = []
while canPop {
let token = peek()
switch token {
case .var:
let declaration = try parseVariableDeclaration()
nodes.append(declaration)
default:
let expression = try parseExpression()
nodes.append(expression)
}
}
return Block(nodes: nodes)
}
func ast(node: Node, nodePrecedence: Int = 0) throws -> Node { //Abstract Syntax Tree
var leftNode = node
while let precedence = try peekPrecedence() as Int?, precedence >= nodePrecedence {
guard case let .op(op) = popToken() else {
throw Error.expectedOperator
}
let rightNode = try parseValue()
/*
// ************************** PRECEDENCE ************************** //
var rightNode = try parseValue()
let nextPrecedence = try peekPrecedence()
if precedence < nextPrecedence {
rightNode = try ast(node: rightNode, nodePrecedence: precedence + 1)
}
// ************************** PRECEDENCE ************************** //
*/
leftNode = InfixOperation(lhs: leftNode, rhs: rightNode, op: op)
}
return leftNode
}
}
The Parser returns an AST (Abstract Syntax Tree) that has to be interpreted, and it's composed by Nodes
Node
Nodes define the structure for the declaration of variables with a keyword or for any INFIX operation that are characterized by an operator between two operands like 2+2
and blocks like (1 + 2) - 2
as well.
Either side of the operator can contain different operations
Node.swift
import Foundation
public protocol Node {
func interpret() throws -> Float
}
struct VariableDeclaration: Node {
let name: String
let value: Node
func interpret() throws -> Float {
let val = try value.interpret()
identifiers[name] = val
return val
}
}
public struct InfixOperation: Node {
let lhs: Node
let rhs: Node
let op: String
public func interpret() throws -> Float {
let left = try lhs.interpret()
let right = try rhs.interpret()
switch op {
case "+": return left + right
case "-": return left - right
case "*": return left * right
case "+/": return left / right
default: return 0
}
}
}
// Node wrapper around multiple nodes
struct Block: Node {
let nodes: [Node]
func interpret() throws -> Float {
for line in nodes[0..<(nodes.endIndex - 1)] {
_ = try line.interpret()
}
guard let last = nodes.last else {
throw Parser.Error.expectedExpression
}
// All nodes have been interpreted before returning
return try last.interpret()
}
}
Interpeter
Is the implementation of arithmetic operations and the order of expressions based on their priority.
Main Program
Here we have some simple examples to test the Lexer and Parser according to the production and post-production rules.
main.swift
//let code = "(4+2)*4+6" //30
//let code = "(4/2)*4+2" //10
let code = "4+2*4+6" //18 with precedence, 30 no precedence
/*
var code = """
var a = 4
var b = 2
var c = 6
var z = a + b * a + c
"""
*/
let lexer = Lexer(code: code)
let parser = Parser(tokens: lexer.tokens)
do {
let ast = try parser.parse()
print(try ast.interpret())
}
catch {
print((error as? Parser.Error)!)
}
With such a simple example is easy to follow each stage and see what output is expected from each.