diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Lexer/Lexer.php | 81 | ||||
| -rw-r--r-- | src/Parser/Parser.php | 501 | 
2 files changed, 572 insertions, 10 deletions
diff --git a/src/Lexer/Lexer.php b/src/Lexer/Lexer.php index 747b0c9..fba9623 100644 --- a/src/Lexer/Lexer.php +++ b/src/Lexer/Lexer.php @@ -27,7 +27,7 @@ class Lexer              "(", ")",              "[", "]",              "{", "}", -            "$", +            "$", ".",          ];          $lastPosition = -1; @@ -58,6 +58,7 @@ class Lexer                  // equals                  if ($nextChar == "=") {                      $output[] = new Token( +                        TokenType::Comparator,                          $currentChar . $nextChar,                          $currentChar . $nextChar,                          $this->line, @@ -69,6 +70,7 @@ class Lexer                  // pipe                  else if ($nextChar == ">") {                      $output[] = new Token( +                        TokenType::Pipe,                          $currentChar . $nextChar,                          $currentChar . $nextChar,                          $this->line, @@ -80,6 +82,7 @@ class Lexer                  // assign                  else {                      $output[] = new Token( +                        TokenType::Assign,                          $currentChar,                          $currentChar,                          $this->line, @@ -107,6 +110,7 @@ class Lexer                  // divide                  else {                      $output[] = new Token( +                        TokenType::Operator,                          $currentChar,                          $currentChar,                          $this->line, @@ -124,6 +128,7 @@ class Lexer                  // exponential                  if ($nextChar == "*") {                      $output[] = new Token( +                        TokenType::Operator,                          $currentChar . $nextChar,                          $currentChar . $nextChar,                          $this->line, @@ -135,6 +140,7 @@ class Lexer                  // multiply                  else {                      $output[] = new Token( +                        TokenType::Operator,                          $currentChar,                          $currentChar,                          $this->line, @@ -152,6 +158,7 @@ class Lexer                  // lower equals                  if ($nextChar == "=") {                      $output[] = new Token( +                        TokenType::Comparator,                          $currentChar . $nextChar,                          $currentChar . $nextChar,                          $this->line, @@ -162,6 +169,7 @@ class Lexer                  else {                      $output[] = new Token( +                        TokenType::Comparator,                          $currentChar,                          $currentChar,                          $this->line, @@ -179,6 +187,7 @@ class Lexer                  // greater equals                  if ($nextChar == "=") {                      $output[] = new Token( +                        TokenType::Comparator,                          $currentChar . $nextChar,                          $currentChar . $nextChar,                          $this->line, @@ -189,6 +198,37 @@ class Lexer                  else {                      $output[] = new Token( +                        TokenType::Comparator, +                        $currentChar, +                        $currentChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(1); +                } +            } + +            // exclamation mark +            else if ($currentChar == "!") { +                $nextChar = $this->getNextChar(); +                $startColumn = $this->column; + +                // not equal +                if ($nextChar == "=") { +                    $output[] = new Token( +                        TokenType::Comparator, +                        $currentChar . $nextChar, +                        $currentChar . $nextChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(2); +                } + +                // not +                else { +                    $output[] = new Token( +                        TokenType::Operator,                          $currentChar,                          $currentChar,                          $this->line, @@ -201,6 +241,7 @@ class Lexer              // single char tokens              else if (in_array($currentChar, $singleCharTokens)) {                  $output[] = new Token( +                    TokenType::Operator, # TODO                      $currentChar,                      $currentChar,                      $this->line, @@ -225,6 +266,15 @@ class Lexer              }          } +        // add end of file token +        $output[] = new Token( +            TokenType::EndOfFile, +            "", +            "", +            $this->line, +            $this->column +        ); +          return $output;      } @@ -238,13 +288,6 @@ class Lexer          return $this->input[$this->position + 1] ?? null;      } -    /* -    public function seek(int $position): void -    { -        $this->position = $position; -    } -    */ -      public function advance(int $steps): void      {          $this->position += $steps; @@ -283,7 +326,7 @@ class Lexer          $startPosition = $this->position;          $endPosition = $this->position; -        while ($this->isIdentifierChar($this->getCurrentChar())) { +        while ($this->isIdentifierChar($this->getCurrentChar()) or $this->isNumberChar($this->getCurrentChar())) {              $endPosition = $this->position;              $this->advance(1);          } @@ -291,6 +334,7 @@ class Lexer          $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1);          return new Token( +            TokenType::Identifier,              $value,              $value,              $this->line, @@ -322,8 +366,9 @@ class Lexer          $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1);          return new Token( +            TokenType::Number,              $value, -            $value, +            str_replace("_", "", $value),              $this->line,              $startColumn,          ); @@ -357,6 +402,7 @@ class Lexer          $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1);          return new Token( +            TokenType::String,              $rawValue,              substr($rawValue, 1, strlen($rawValue) - 2),              $startLine, @@ -379,6 +425,7 @@ class Lexer          $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1);          return new Token( +            TokenType::Comment,              $rawValue,              trim(substr($rawValue, 2)),              $this->line, @@ -414,6 +461,7 @@ class Lexer          $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1);          return new Token( +            TokenType::Comment,              $rawValue,              trim(substr($rawValue, 2, strlen($rawValue) - 4)),              $startLine, @@ -424,9 +472,22 @@ class Lexer  class Token {      public function __construct( +        public TokenType $type,          public string $literal,          public string $value,          public string $line,          public string $column,      ) {}  } + +enum TokenType { +    case Identifier; +    case Number; +    case String; +    case Comment; +    case Comparator; +    case Operator; +    case Assign; +    case Pipe; +    case EndOfFile; +} diff --git a/src/Parser/Parser.php b/src/Parser/Parser.php new file mode 100644 index 0000000..7d780b0 --- /dev/null +++ b/src/Parser/Parser.php @@ -0,0 +1,501 @@ +<?php + +namespace Mnml\Parser; + +use Mnml\Lexer\Token; +use Mnml\Lexer\TokenType; + +class Parser +{ +    private array $tokens; +    private int $position; +    private array $errors; +    private array $nodes; + +    /** +    * @param Token[] $tokens +    */ +    public function __construct(array $tokens) +    { +        $this->tokens = $tokens; +        $this->position = 0; +        $this->errors = []; +        $this->nodes = []; +    } + +    public function parse(): array +    { +        while ($this->position < count($this->tokens) - 1) { +            $currentToken = $this->getCurrentToken(); + +            if ($currentToken->literal == "const") { +                $this->nodes[] = $this->parseConst(); +            } +             +            else { +                $this->position += 1; +            } +        } + +        return $this->nodes; +    } + +    public function printTree(): void +    { +        $tree = new Tree($this->nodes); +        $this->printNodeRecursive($tree, 0); +    } + +    private function printNodeRecursive(Node $node, int $level = 0): void +    { +        $properties = get_object_vars($node); + +        foreach ($properties as $propertyKey => $propertyValue) { +            if ($propertyValue instanceof Node) { +                echo sprintf(str_repeat("> ", $level) . "%s: %s\n", $propertyKey, (new \ReflectionClass($propertyValue))->getShortName()); +                $this->printNodeRecursive($propertyValue, $level + 1); +            } +             +            else if (is_array($propertyValue)) { +                $length = count($propertyValue); + +                echo sprintf(str_repeat("> ", $level) . "%s[%d]:\n", $propertyKey, $length); + +                if ($length == 0) { +                    echo str_repeat("> ", $level + 1) . "None"; +                } else { +                    foreach ($propertyValue as $idx => $item) { +                        echo sprintf(str_repeat("> ", $level + 1) . "[%d]%s\n", $idx + 1, (new \ReflectionClass($item))->getShortName()); +                        $this->printNodeRecursive($item, $level + 2); +                    } +                } +            } + +            else { +                if (! empty($propertyValue)) { +                    if ($propertyValue instanceof Token) {                     +                        echo sprintf(str_repeat("> ", $level) . "%s: %s - %d:%d\n", $propertyKey, $propertyValue->literal, $propertyValue->line, $propertyValue->column); +                    } else { +                        echo sprintf(str_repeat("> ", $level) . "%s: %s\n", $propertyKey, $propertyValue); +                    } +                } else { +                    echo sprintf(str_repeat("> ", $level) . "%s: %s\n", $propertyKey, "_empty_"); +                } +            } +        } +    } + +    private function getCurrentToken(): Token +    { +        return $this->tokens[$this->position];  +    } + +    private function getNextToken(): ?Token +    { +        return $this->tokens[$this->position + 1] ?? null; +    } + +    private function advance(int $steps): void +    { +        assert( +            $this->position + $steps < count($this->tokens), +            "Can't advance. Position out of bounds." +        ); +         +        $this->position += $steps; +    } + +    private function parseConst(): Node +    { +        // skip const +        $this->advance(1); + +        $identifier = $this->getCurrentToken(); + +        // skip : +        $this->advance(2); + +        $type = $this->parseType(); + +        // skip = +        $this->advance(1); +         +        $expression = $this->parseExpression( +            shouldBeMap: $type instanceof MapTypeDeclaration, +            shouldBeFunction: $type->left->literal == "function", +        ); + +        return new ConstVariableDeclaration( +            $identifier, +            $type, +            $expression, +        ); +    } + +    private function parseType(): Node +    {    +        $currentToken = $this->getCurrentToken(); +        $nextToken = $this->getNextToken(); + +        if ($currentToken->literal == "[") { +            return $this->parseArrayOrMapType(); +        } + +        else if (in_array($nextToken->literal, ["and", "or"])) { +            $this->advance(2); +             +            return new TypeDeclaration($currentToken, $nextToken, $this->parseType()); +        } + +        else { +            $this->advance(1); +             +            return new TypeDeclaration($currentToken); +        } +    } + +    private function parseArrayOrMapType(): Node +    { +        // skip first [ +        $this->advance(1); +         +        $key = $this->parseType(); + +        $currentToken = $this->getCurrentToken(); +        $nextToken = $this->getNextToken(); + +        if ($currentToken->literal == "]" && $nextToken->literal == "[") { +            // skip to first type +            $this->advance(2); +             +            $value = $this->parseType(); + +            // skip last ] +            $this->advance(1); + +            return new MapTypeDeclaration($key, $value); +        } else { +            // skip last ] +            $this->advance(1); + +            return new ArrayTypeDeclaration($key); +        } +    } + +    private function parseExpression($shouldBeMap = false, $shouldBeFunction = false): Node|Token +    { +        $currentToken = $this->getCurrentToken(); +        $currentExpression = $currentToken; +         +        if ($currentToken->literal == "[") { +            $currentExpression = $this->parseArrayOrMap($shouldBeMap); +        } + +        else if ($currentToken->type == TokenType::Number) { +            $currentExpression = $this->parseNumber(); +        } + +        else if ($currentToken->type == TokenType::String) { +            $currentExpression = new StringNode($currentToken); +            $this->advance(1); +        } + +        else if ($currentToken->literal == "(") { +            if ($shouldBeFunction) { +                $currentExpression = $this->parseFunctionDefinition(); +            } +        } + +        $nextToken = $this->getCurrentToken(); + +        if (in_array($nextToken->literal, ["+", "-", "*", "**", "/"])) { +            $this->advance(1); +             +            return new OperatorExpression($currentExpression, $nextToken, $this->parseExpression(shouldBeMap: $shouldBeMap)); +        } + +        else if (in_array($nextToken->literal, ["<", ">", "<=", ">=", "=="])) { +            $this->advance(1); + +            return new CompareExpression($currentExpression, $nextToken, $this->parseExpression(shouldBeMap: $shouldBeMap)); +        } +         +        else { +            return $currentExpression; +        } +    } + +    private function parseArrayOrMap($shouldBeMap = false): Node +    { +        $values = []; + +        // skip first [ +        if ($this->getCurrentToken()->literal == "[") { +            $this->advance(1); +        } + +        $tokenShouldBeComma = false; +        while ($this->getCurrentToken()->literal != "]") { +            $currentToken = $this->getCurrentToken(); +             +            // skip , +            if ($tokenShouldBeComma) { +                if ($currentToken->literal == ",") { +                    $this->advance(1); +                    $tokenShouldBeComma = false; +                    continue; +                } + +                // , missing => error +                else { +                    $error = sprintf( +                        "Expected \",\" at position %d,%d - got %s instead" . PHP_EOL, +                        $currentToken->line, +                        $currentToken->column, +                        $currentToken->literal +                    ); +                    $this->errors[] = $error; + +                    echo $error; + +                    break; +                } +            } +                         +            // nested array or map +            if ($currentToken->literal == "[") { +                $values[] = $this->parseArrayOrMap(); +                $tokenShouldBeComma = true; +            } + +            // skip comments +            else if ($currentToken->type == TokenType::Comment) { +                $this->advance(1); +                continue; +            } + +            else { +                $values[] = $this->parseArrayOrMapItem(); +                $tokenShouldBeComma = true; +            } +        } + +        // skip last ] +        $this->advance(1); +         +        if ((count($values) > 0 and $values[0] instanceof MapItemNode) or $shouldBeMap) { +            return new MapNode($values); +        } else { +            return new ArrayNode($values); +        } +    } + +    private function parseArrayOrMapItem(): Node|Token +    { +        $key = $this->parseExpression(); + +        // is map item +        if ($this->getCurrentToken()->literal == "=") { +            $this->advance(1); +             +            $value = $this->parseExpression(); + +            return new MapItemNode($key, $value); +        } + +        // is array item +        else { +            return $key; +        } +    } + +    private function parseNumber(): Node +    { +        $currentToken = $this->getCurrentToken(); +         +        $value = $currentToken->value; +        if (str_contains($value, ".")) { +            $value = floatval($value); +        } else { +            $value = intval($value); +        } + +        // step to next token +        $this->advance(1); + +        return new NumberNode($currentToken, $value); +    } + +    private function parseFunctionDefinition(): Node +    { +        // skip first ( +        $this->advance(1); + +        $parameters = $this->parseFunctionParameters(); + +        // skip : +        $this->advance(1); + +        $returnType = $this->parseType(); + +        $body = $this->parseFunctionBody(); + +        return new FunctionDefinition( +            $parameters, +            $returnType, +        ); +    } + +    private function parseFunctionParameters(): array +    { +        while ($this->getCurrentToken() != ")") { +            # TODO +        } +         +        $identifier = $this->getCurrentToken(); + +        // skip : +        $this->advance(1); + +        $type = $this->parseType(); +         +        return new FunctionParameter( +             +        ); +    } + +    private function parseFunctionBody(): array +    { +        # TODO +    } + +    private function parseFunctionCall(){} +} + +class Node +{} + +class Tree extends Node +{ +    /** +     * @param Node[] $nodes +     */ +    public function __construct( +        public array $nodes, +    ) {} +} + +class ConstVariableDeclaration extends Node +{ +    public function __construct( +        public Token $identifier, +        public Node|Token $type, +        public Node|Token $expression, +    ) {} +} + +class TypeDeclaration extends Node +{ +    public function __construct( +        public Token $left, +        public ?Token $operator = null, +        public Token|TypeDeclaration|null $right = null, +    ) {}  +} + +class ArrayTypeDeclaration extends Node +{ +    public function __construct( +        public TypeDeclaration|ArrayTypeDeclaration $value, +    ) {}} + +class MapTypeDeclaration extends Node +{ +    public function __construct( +        public TypeDeclaration $key, +        public TypeDeclaration|ArrayTypeDeclaration $value, +    ) {} +} + +class ArrayNode extends Node +{ +    /** +     * @param Array<Token|Node> $values +     */ +    public function __construct( +        public array $values, +    ) {} +} + +class MapNode extends Node +{ +    /** +     * @param MapItemNode[] $values +     */ +    public function __construct( +        public array $values, +    ) {} +} +class MapItemNode extends Node +{ +    public function __construct( +        public Token|Node $key, +        public Token|Node $value, +    ) {} +} + +class OperatorExpression extends Node +{ +    public function __construct( +        public Token|Node $left, +        public Token $operator, +        public Token|Node $right, +    ) {}    +} + +class CompareExpression extends Node +{ +    public function __construct( +        public Token|Node $left, +        public Token $operator, +        public Token|Node $right, +    ) {} +} + +class NumberNode extends Node +{ +    public function __construct( +        public Token $token, +        public int|float $value, +    ) {} +} + +class StringNode extends Node +{ +    public function __construct( +        public Token $token, +    ) {} +} + +class FunctionDefinition extends Node +{ +    public function __construct( +        /** +         * @param FunctionParameter[] $parameters +         */ +        public array $parameters, +        public TypeDefinition $returnType, +        /** +         * @param Node[] $body +         */ +        public array $body, +    ) {} +} + +class FunctionParameter extends Node +{ +    public function __construct( +        public Token $identifier, +        public TypeDefinition $type, +    ) {} +}  | 
