Pableins | ME

Pableins | ME

Formal Languages

From November 25, 2019

Let's start by defining the rules of production and post-production, in which all elements are implemented with Swift.

Production and Post-production rules, context-free grammar

::= <> Backus-Naur Form

 Context Free <A> ::= 0 | <A><B>
 A => 0
 A => AB

 Is a set of production rules that describe all posible strings,
 defined as simple replacements

  ::= "Separate replacement symbols"

 Terminal -> Simple replacement, only one rule
 <NON TERMINAL> -> Multiple replacement, set of rules
 with context free (position of symbols in text)

 <letter>   ::= A-Z,a-z
 <var>      ::= [<letter>]
 <var>      ::= <E>
 <Number>   ::= <Decimal>
 <Operator> ::= + | - | * | /
 <Decimal>  ::= 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 0 | <Decimal <Decimal>

 <E> ::= <Number> | <Number> <Operator> <E> | (<E>)

 Recursive Decent Parsing (no left recursion)
 Parser without backtracking, allows to decide which production to use
 by examining only the next n tokens of input.

Utilities

This set of helpers are mainly for the node

Helpers.swift

import Foundation

var identifiers: [String: Float] = [:]

extension Float: Node {
    public func interpret() throws -> Float {
        return self
    }
}

extension String: Node {
    public func interpret() throws -> Float {
        guard let value = identifiers[self] else {
            throw Parser.Error.undefined(self)
        }

        return value
    }
}

extension String {
    func getPrefix(regex: String) -> String? {
        let expression = try! NSRegularExpression(pattern: "^\(regex)", options: [])
        let range = expression.rangeOfFirstMatch(in: self, options: [], range: NSRange(location: 0, length: self.utf16.count))

        if range.location == 0 {
            return (self as NSString).substring(with: range)
        }
        return nil
    }

    mutating func trimLeadingWhitespace() {
        let i = startIndex

        while i < endIndex {
            guard CharacterSet.whitespacesAndNewlines.contains(self[i].unicodeScalars.first!) else {
                return
            }
            self.remove(at: i)
        }
    }
}

Token

Each token represents a pattern within the chain of characters, let it be data types, symbols, identifiers, etc.

Token.swift

import Foundation

public enum Token {
    case `var`
    case equals
    case op(String)
    case number(Int)
    case parenthesesOpen
    case parenthesesClose
    case identifier(String)

    typealias Generator = (String) -> Token?

    static var generators: [String: Generator] {
        return [
            "\\=": { _ in .equals },
            "\\+|\\-|\\*|\\/": { .op($0) },
            "\\(": { _ in .parenthesesOpen },
            "\\)": { _ in .parenthesesClose },
            "\\-?([1-9][0-9]*)": { .number(Int($0)!) },
            "[a-zA-Z][a-zA-Z_0-9]*": {
                guard $0 != "var" else { return .var }
                return .identifier($0)
            },
            //"\\-?([1-9]*\\.[0-9]+|[0-9]+)": { .number(Float($0)!) } //FLOAT
        ]
    }
}

Lexer

Process the input String that it receives as argument and outputs a collection of tokens.

Lexer.swift

import Foundation

public class Lexer {
    public let tokens: [Token]

    public init(code: String) {
        var code = code
        code.trimLeadingWhitespace()

        var tokens: [Token] = []

        while let next = Lexer.getNextPrefix(code: code) {
            let (regex, prefix) = next

            code = String(code[prefix.endIndex...])
            code.trimLeadingWhitespace()

            guard let generator = Token.generators[regex],
                let token = generator(prefix) else {
                fatalError()
            }
            tokens.append(token)
        }
        self.tokens = tokens
    }

    public static func getNextPrefix(code: String) -> (regex: String, prefix: String)? {
        let keyValue = Token.generators
            .first(where: { regex, generator in
                code.getPrefix(regex: regex) != nil
            })

        guard let regex = keyValue?.key,
            keyValue?.value != nil else {
            return nil
        }

        return (regex, code.getPrefix(regex: regex)!)
    }
}

Parser

From the collection of tokens returned by the Lexer the Parser generates a tree which holds a syntactic structure of the source code of the programming language. At this level is possible to return information about any errors that may be found.

Parser.swift

import Foundation

public class Parser {
    enum Error: Swift.Error {
        case expected(String)
        case undefined(String)
        case expectedNumber
        case expectedOperator
        case expectedIdentifier
        case expectedExpression
    }

    var index = 0
    let tokens: [Token]

    var canPop: Bool {
        return index < tokens.count
    }

    public init(tokens: [Token]) {
        self.tokens = tokens
    }

    func peek() -> Token {
        return tokens[index]
    }

    func peekPrecedence() throws -> Int {
        guard canPop, case let .op(op) = peek() else {
            return -1
        }

        switch op {
            case "+", "-": return 1
            case "*", "/": return 2
            default: return 0
        }
    }

    func popToken() -> Token {
        let token = tokens[index]

        index += 1

        return token
    }

    func parseNumber() throws -> Float {
        guard case let .number(float) = popToken() else {
            throw Error.expectedNumber
        }

        return Float(float)
    }

    func parseParentheses() throws -> Node {
        guard case .parenthesesOpen = popToken() else {
            throw Error.expected("(")
        }

        let expressionNode = try parseExpression()

        guard case .parenthesesClose = popToken() else {
            throw Error.expected(")")
        }

        return expressionNode
    }

    func parseIdentifier() throws -> Node {
        guard case let .identifier(name) = popToken() else {
            throw Error.expectedIdentifier
        }

        return name
    }

    func parseValue() throws -> Node {
        switch (peek()) {
            case .number:
                return try parseNumber()
            case .identifier:
                return try parseIdentifier()
            case .parenthesesOpen:
                return try parseParentheses()
            default:
                throw Error.expected("<Expression> not valid")
        }
    }

    public func parseExpression() throws -> Node {
        guard canPop else { throw Error.expectedExpression }

        let node = try parseValue()

        return try ast(node: node)
    }

    func parseVariableDeclaration() throws -> Node {
        guard case .var = popToken() else {
            throw Error.expected("\"var\" in variable declaration")
        }

        guard case let .identifier(name) = popToken() else {
            throw Error.expectedIdentifier
        }

        guard case .equals = popToken() else {
            throw Error.expected("= not found for variable <\(name)>")
        }

        let expression = try parseExpression()

        return VariableDeclaration(name: name, value: expression)
    }

    func parse() throws -> Node {
        var nodes: [Node] = []

        while canPop {
            let token = peek()

            switch token {
                case .var:
                    let declaration = try parseVariableDeclaration()
                    nodes.append(declaration)
                default:
                    let expression = try parseExpression()
                    nodes.append(expression)
            }
        }
        return Block(nodes: nodes)
    }

    func ast(node: Node, nodePrecedence: Int = 0) throws -> Node { //Abstract Syntax Tree
        var leftNode = node

        while let precedence = try peekPrecedence() as Int?, precedence >= nodePrecedence {
            guard case let .op(op) = popToken() else {
                throw Error.expectedOperator
            }

            let rightNode = try parseValue()

            /*

            // **************************  PRECEDENCE **************************   //
            var rightNode = try parseValue()
            let nextPrecedence = try peekPrecedence()

            if precedence < nextPrecedence {
                rightNode = try ast(node: rightNode, nodePrecedence: precedence + 1)
            }

            // **************************  PRECEDENCE **************************   //
            */

            leftNode = InfixOperation(lhs: leftNode, rhs: rightNode, op: op)
        }

        return leftNode
    }
}

The Parser returns an AST (Abstract Syntax Tree) that has to be interpreted, and it's composed by Nodes

Node

Nodes define the structure for the declaration of variables with a keyword or for any INFIX operation that are characterized by an operator between two operands like 2+2 and blocks like (1 + 2) - 2 as well.

Either side of the operator can contain different operations

Node.swift

import Foundation

public protocol Node {
    func interpret() throws -> Float
}

struct VariableDeclaration: Node {
    let name: String
    let value: Node

    func interpret() throws -> Float {
        let val = try value.interpret()
        identifiers[name] = val
        return val
    }
}

public struct InfixOperation: Node {
    let lhs: Node
    let rhs: Node
    let op: String

    public func interpret() throws -> Float {
        let left = try lhs.interpret()
        let right = try rhs.interpret()

        switch op {
            case "+": return left + right
            case "-": return left - right
            case "*": return left * right
            case "+/": return left / right
            default: return 0
        }
    }
}

// Node wrapper around multiple nodes
struct Block: Node {
    let nodes: [Node]

    func interpret() throws -> Float {
        for line in nodes[0..<(nodes.endIndex - 1)] {
            _ = try line.interpret()
        }

        guard let last = nodes.last else {
            throw Parser.Error.expectedExpression
        }

        // All nodes have been interpreted before returning
        return try last.interpret()
    }
}

Interpeter

Is the implementation of arithmetic operations and the order of expressions based on their priority.

Main Program

Here we have some simple examples to test the Lexer and Parser according to the production and post-production rules.

main.swift

//let code = "(4+2)*4+6" //30
//let code = "(4/2)*4+2" //10

let code = "4+2*4+6" //18 with precedence, 30 no precedence

/*
var code = """
    var a = 4
    var b = 2
    var c = 6
    var z = a + b * a + c
"""
*/

let lexer = Lexer(code: code)
let parser = Parser(tokens: lexer.tokens)

do {
    let ast = try parser.parse()

    print(try ast.interpret())
}
catch {
    print((error as? Parser.Error)!)
}

With such a simple example is easy to follow each stage and see what output is expected from each.

 
Share this