diff options
-rwxr-xr-x[-rw-r--r--] | mnml | 11 | ||||
-rw-r--r-- | src/Lexer/Lexer.php | 81 | ||||
-rw-r--r-- | src/Parser/Parser.php | 501 | ||||
-rw-r--r-- | test/const-array-complex.mnml | 1 | ||||
-rw-r--r-- | test/const-array.mnml | 2 | ||||
-rw-r--r-- | test/const-function.mnml | 6 | ||||
-rw-r--r-- | test/const-map.mnml | 11 | ||||
-rw-r--r-- | test/const-simple.mnml | 2 | ||||
-rw-r--r-- | test/const-union.mnml | 1 | ||||
-rw-r--r-- | test/test.test | 78 |
10 files changed, 682 insertions, 12 deletions
@@ -1,10 +1,17 @@ +#!/bin/env php + <?php require "vendor/autoload.php"; use Mnml\Lexer\Lexer; +use Mnml\Parser\Parser; + +$input = file_get_contents(realpath($argv[1])); -$input = file_get_contents(__DIR__ . "/test/test.mnml"); $lexer = new Lexer($input); $tokens = $lexer->lex(); -#var_dump($tokens); + +$parser = new Parser($tokens); +$nodes = $parser->parse(); +$parser->printTree(); diff --git a/src/Lexer/Lexer.php b/src/Lexer/Lexer.php index 747b0c9..fba9623 100644 --- a/src/Lexer/Lexer.php +++ b/src/Lexer/Lexer.php @@ -27,7 +27,7 @@ class Lexer "(", ")", "[", "]", "{", "}", - "$", + "$", ".", ]; $lastPosition = -1; @@ -58,6 +58,7 @@ class Lexer // equals if ($nextChar == "=") { $output[] = new Token( + TokenType::Comparator, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, @@ -69,6 +70,7 @@ class Lexer // pipe else if ($nextChar == ">") { $output[] = new Token( + TokenType::Pipe, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, @@ -80,6 +82,7 @@ class Lexer // assign else { $output[] = new Token( + TokenType::Assign, $currentChar, $currentChar, $this->line, @@ -107,6 +110,7 @@ class Lexer // divide else { $output[] = new Token( + TokenType::Operator, $currentChar, $currentChar, $this->line, @@ -124,6 +128,7 @@ class Lexer // exponential if ($nextChar == "*") { $output[] = new Token( + TokenType::Operator, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, @@ -135,6 +140,7 @@ class Lexer // multiply else { $output[] = new Token( + TokenType::Operator, $currentChar, $currentChar, $this->line, @@ -152,6 +158,7 @@ class Lexer // lower equals if ($nextChar == "=") { $output[] = new Token( + TokenType::Comparator, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, @@ -162,6 +169,7 @@ class Lexer else { $output[] = new Token( + TokenType::Comparator, $currentChar, $currentChar, $this->line, @@ -179,6 +187,7 @@ class Lexer // greater equals if ($nextChar == "=") { $output[] = new Token( + TokenType::Comparator, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, @@ -189,6 +198,37 @@ class Lexer else { $output[] = new Token( + TokenType::Comparator, + $currentChar, + $currentChar, + $this->line, + $startColumn, + ); + $this->advance(1); + } + } + + // exclamation mark + else if ($currentChar == "!") { + $nextChar = $this->getNextChar(); + $startColumn = $this->column; + + // not equal + if ($nextChar == "=") { + $output[] = new Token( + TokenType::Comparator, + $currentChar . $nextChar, + $currentChar . $nextChar, + $this->line, + $startColumn, + ); + $this->advance(2); + } + + // not + else { + $output[] = new Token( + TokenType::Operator, $currentChar, $currentChar, $this->line, @@ -201,6 +241,7 @@ class Lexer // single char tokens else if (in_array($currentChar, $singleCharTokens)) { $output[] = new Token( + TokenType::Operator, # TODO $currentChar, $currentChar, $this->line, @@ -225,6 +266,15 @@ class Lexer } } + // add end of file token + $output[] = new Token( + TokenType::EndOfFile, + "", + "", + $this->line, + $this->column + ); + return $output; } @@ -238,13 +288,6 @@ class Lexer return $this->input[$this->position + 1] ?? null; } - /* - public function seek(int $position): void - { - $this->position = $position; - } - */ - public function advance(int $steps): void { $this->position += $steps; @@ -283,7 +326,7 @@ class Lexer $startPosition = $this->position; $endPosition = $this->position; - while ($this->isIdentifierChar($this->getCurrentChar())) { + while ($this->isIdentifierChar($this->getCurrentChar()) or $this->isNumberChar($this->getCurrentChar())) { $endPosition = $this->position; $this->advance(1); } @@ -291,6 +334,7 @@ class Lexer $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( + TokenType::Identifier, $value, $value, $this->line, @@ -322,8 +366,9 @@ class Lexer $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( + TokenType::Number, $value, - $value, + str_replace("_", "", $value), $this->line, $startColumn, ); @@ -357,6 +402,7 @@ class Lexer $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( + TokenType::String, $rawValue, substr($rawValue, 1, strlen($rawValue) - 2), $startLine, @@ -379,6 +425,7 @@ class Lexer $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( + TokenType::Comment, $rawValue, trim(substr($rawValue, 2)), $this->line, @@ -414,6 +461,7 @@ class Lexer $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( + TokenType::Comment, $rawValue, trim(substr($rawValue, 2, strlen($rawValue) - 4)), $startLine, @@ -424,9 +472,22 @@ class Lexer class Token { public function __construct( + public TokenType $type, public string $literal, public string $value, public string $line, public string $column, ) {} } + +enum TokenType { + case Identifier; + case Number; + case String; + case Comment; + case Comparator; + case Operator; + case Assign; + case Pipe; + case EndOfFile; +} diff --git a/src/Parser/Parser.php b/src/Parser/Parser.php new file mode 100644 index 0000000..7d780b0 --- /dev/null +++ b/src/Parser/Parser.php @@ -0,0 +1,501 @@ +<?php + +namespace Mnml\Parser; + +use Mnml\Lexer\Token; +use Mnml\Lexer\TokenType; + +class Parser +{ + private array $tokens; + private int $position; + private array $errors; + private array $nodes; + + /** + * @param Token[] $tokens + */ + public function __construct(array $tokens) + { + $this->tokens = $tokens; + $this->position = 0; + $this->errors = []; + $this->nodes = []; + } + + public function parse(): array + { + while ($this->position < count($this->tokens) - 1) { + $currentToken = $this->getCurrentToken(); + + if ($currentToken->literal == "const") { + $this->nodes[] = $this->parseConst(); + } + + else { + $this->position += 1; + } + } + + return $this->nodes; + } + + public function printTree(): void + { + $tree = new Tree($this->nodes); + $this->printNodeRecursive($tree, 0); + } + + private function printNodeRecursive(Node $node, int $level = 0): void + { + $properties = get_object_vars($node); + + foreach ($properties as $propertyKey => $propertyValue) { + if ($propertyValue instanceof Node) { + echo sprintf(str_repeat("> ", $level) . "%s: %s\n", $propertyKey, (new \ReflectionClass($propertyValue))->getShortName()); + $this->printNodeRecursive($propertyValue, $level + 1); + } + + else if (is_array($propertyValue)) { + $length = count($propertyValue); + + echo sprintf(str_repeat("> ", $level) . "%s[%d]:\n", $propertyKey, $length); + + if ($length == 0) { + echo str_repeat("> ", $level + 1) . "None"; + } else { + foreach ($propertyValue as $idx => $item) { + echo sprintf(str_repeat("> ", $level + 1) . "[%d]%s\n", $idx + 1, (new \ReflectionClass($item))->getShortName()); + $this->printNodeRecursive($item, $level + 2); + } + } + } + + else { + if (! empty($propertyValue)) { + if ($propertyValue instanceof Token) { + echo sprintf(str_repeat("> ", $level) . "%s: %s - %d:%d\n", $propertyKey, $propertyValue->literal, $propertyValue->line, $propertyValue->column); + } else { + echo sprintf(str_repeat("> ", $level) . "%s: %s\n", $propertyKey, $propertyValue); + } + } else { + echo sprintf(str_repeat("> ", $level) . "%s: %s\n", $propertyKey, "_empty_"); + } + } + } + } + + private function getCurrentToken(): Token + { + return $this->tokens[$this->position]; + } + + private function getNextToken(): ?Token + { + return $this->tokens[$this->position + 1] ?? null; + } + + private function advance(int $steps): void + { + assert( + $this->position + $steps < count($this->tokens), + "Can't advance. Position out of bounds." + ); + + $this->position += $steps; + } + + private function parseConst(): Node + { + // skip const + $this->advance(1); + + $identifier = $this->getCurrentToken(); + + // skip : + $this->advance(2); + + $type = $this->parseType(); + + // skip = + $this->advance(1); + + $expression = $this->parseExpression( + shouldBeMap: $type instanceof MapTypeDeclaration, + shouldBeFunction: $type->left->literal == "function", + ); + + return new ConstVariableDeclaration( + $identifier, + $type, + $expression, + ); + } + + private function parseType(): Node + { + $currentToken = $this->getCurrentToken(); + $nextToken = $this->getNextToken(); + + if ($currentToken->literal == "[") { + return $this->parseArrayOrMapType(); + } + + else if (in_array($nextToken->literal, ["and", "or"])) { + $this->advance(2); + + return new TypeDeclaration($currentToken, $nextToken, $this->parseType()); + } + + else { + $this->advance(1); + + return new TypeDeclaration($currentToken); + } + } + + private function parseArrayOrMapType(): Node + { + // skip first [ + $this->advance(1); + + $key = $this->parseType(); + + $currentToken = $this->getCurrentToken(); + $nextToken = $this->getNextToken(); + + if ($currentToken->literal == "]" && $nextToken->literal == "[") { + // skip to first type + $this->advance(2); + + $value = $this->parseType(); + + // skip last ] + $this->advance(1); + + return new MapTypeDeclaration($key, $value); + } else { + // skip last ] + $this->advance(1); + + return new ArrayTypeDeclaration($key); + } + } + + private function parseExpression($shouldBeMap = false, $shouldBeFunction = false): Node|Token + { + $currentToken = $this->getCurrentToken(); + $currentExpression = $currentToken; + + if ($currentToken->literal == "[") { + $currentExpression = $this->parseArrayOrMap($shouldBeMap); + } + + else if ($currentToken->type == TokenType::Number) { + $currentExpression = $this->parseNumber(); + } + + else if ($currentToken->type == TokenType::String) { + $currentExpression = new StringNode($currentToken); + $this->advance(1); + } + + else if ($currentToken->literal == "(") { + if ($shouldBeFunction) { + $currentExpression = $this->parseFunctionDefinition(); + } + } + + $nextToken = $this->getCurrentToken(); + + if (in_array($nextToken->literal, ["+", "-", "*", "**", "/"])) { + $this->advance(1); + + return new OperatorExpression($currentExpression, $nextToken, $this->parseExpression(shouldBeMap: $shouldBeMap)); + } + + else if (in_array($nextToken->literal, ["<", ">", "<=", ">=", "=="])) { + $this->advance(1); + + return new CompareExpression($currentExpression, $nextToken, $this->parseExpression(shouldBeMap: $shouldBeMap)); + } + + else { + return $currentExpression; + } + } + + private function parseArrayOrMap($shouldBeMap = false): Node + { + $values = []; + + // skip first [ + if ($this->getCurrentToken()->literal == "[") { + $this->advance(1); + } + + $tokenShouldBeComma = false; + while ($this->getCurrentToken()->literal != "]") { + $currentToken = $this->getCurrentToken(); + + // skip , + if ($tokenShouldBeComma) { + if ($currentToken->literal == ",") { + $this->advance(1); + $tokenShouldBeComma = false; + continue; + } + + // , missing => error + else { + $error = sprintf( + "Expected \",\" at position %d,%d - got %s instead" . PHP_EOL, + $currentToken->line, + $currentToken->column, + $currentToken->literal + ); + $this->errors[] = $error; + + echo $error; + + break; + } + } + + // nested array or map + if ($currentToken->literal == "[") { + $values[] = $this->parseArrayOrMap(); + $tokenShouldBeComma = true; + } + + // skip comments + else if ($currentToken->type == TokenType::Comment) { + $this->advance(1); + continue; + } + + else { + $values[] = $this->parseArrayOrMapItem(); + $tokenShouldBeComma = true; + } + } + + // skip last ] + $this->advance(1); + + if ((count($values) > 0 and $values[0] instanceof MapItemNode) or $shouldBeMap) { + return new MapNode($values); + } else { + return new ArrayNode($values); + } + } + + private function parseArrayOrMapItem(): Node|Token + { + $key = $this->parseExpression(); + + // is map item + if ($this->getCurrentToken()->literal == "=") { + $this->advance(1); + + $value = $this->parseExpression(); + + return new MapItemNode($key, $value); + } + + // is array item + else { + return $key; + } + } + + private function parseNumber(): Node + { + $currentToken = $this->getCurrentToken(); + + $value = $currentToken->value; + if (str_contains($value, ".")) { + $value = floatval($value); + } else { + $value = intval($value); + } + + // step to next token + $this->advance(1); + + return new NumberNode($currentToken, $value); + } + + private function parseFunctionDefinition(): Node + { + // skip first ( + $this->advance(1); + + $parameters = $this->parseFunctionParameters(); + + // skip : + $this->advance(1); + + $returnType = $this->parseType(); + + $body = $this->parseFunctionBody(); + + return new FunctionDefinition( + $parameters, + $returnType, + ); + } + + private function parseFunctionParameters(): array + { + while ($this->getCurrentToken() != ")") { + # TODO + } + + $identifier = $this->getCurrentToken(); + + // skip : + $this->advance(1); + + $type = $this->parseType(); + + return new FunctionParameter( + + ); + } + + private function parseFunctionBody(): array + { + # TODO + } + + private function parseFunctionCall(){} +} + +class Node +{} + +class Tree extends Node +{ + /** + * @param Node[] $nodes + */ + public function __construct( + public array $nodes, + ) {} +} + +class ConstVariableDeclaration extends Node +{ + public function __construct( + public Token $identifier, + public Node|Token $type, + public Node|Token $expression, + ) {} +} + +class TypeDeclaration extends Node +{ + public function __construct( + public Token $left, + public ?Token $operator = null, + public Token|TypeDeclaration|null $right = null, + ) {} +} + +class ArrayTypeDeclaration extends Node +{ + public function __construct( + public TypeDeclaration|ArrayTypeDeclaration $value, + ) {}} + +class MapTypeDeclaration extends Node +{ + public function __construct( + public TypeDeclaration $key, + public TypeDeclaration|ArrayTypeDeclaration $value, + ) {} +} + +class ArrayNode extends Node +{ + /** + * @param Array<Token|Node> $values + */ + public function __construct( + public array $values, + ) {} +} + +class MapNode extends Node +{ + /** + * @param MapItemNode[] $values + */ + public function __construct( + public array $values, + ) {} +} +class MapItemNode extends Node +{ + public function __construct( + public Token|Node $key, + public Token|Node $value, + ) {} +} + +class OperatorExpression extends Node +{ + public function __construct( + public Token|Node $left, + public Token $operator, + public Token|Node $right, + ) {} +} + +class CompareExpression extends Node +{ + public function __construct( + public Token|Node $left, + public Token $operator, + public Token|Node $right, + ) {} +} + +class NumberNode extends Node +{ + public function __construct( + public Token $token, + public int|float $value, + ) {} +} + +class StringNode extends Node +{ + public function __construct( + public Token $token, + ) {} +} + +class FunctionDefinition extends Node +{ + public function __construct( + /** + * @param FunctionParameter[] $parameters + */ + public array $parameters, + public TypeDefinition $returnType, + /** + * @param Node[] $body + */ + public array $body, + ) {} +} + +class FunctionParameter extends Node +{ + public function __construct( + public Token $identifier, + public TypeDefinition $type, + ) {} +} diff --git a/test/const-array-complex.mnml b/test/const-array-complex.mnml new file mode 100644 index 0000000..07e2286 --- /dev/null +++ b/test/const-array-complex.mnml @@ -0,0 +1 @@ +const array: [[integer]] = [[1, 2], [3, 4], [5, 6, 7]] diff --git a/test/const-array.mnml b/test/const-array.mnml new file mode 100644 index 0000000..03c7163 --- /dev/null +++ b/test/const-array.mnml @@ -0,0 +1,2 @@ +const array: [integer] = [1, 2, 3] +const array2: [integer] = [3, 4, 5] diff --git a/test/const-function.mnml b/test/const-function.mnml new file mode 100644 index 0000000..1f77a73 --- /dev/null +++ b/test/const-function.mnml @@ -0,0 +1,6 @@ +const main: function = (input: string or integer): void { + const hello: string = input + " world!" + const bye: string = "bye!" + + return hello + " " + bye +} diff --git a/test/const-map.mnml b/test/const-map.mnml new file mode 100644 index 0000000..b63b68b --- /dev/null +++ b/test/const-map.mnml @@ -0,0 +1,11 @@ +const map: [string][string or integer or bool] = [ + "first" = 1 <= 2, + "second" = "two", + "third" = "3", + "fourth" = 4 > 3, +/* "fifth" = (): void { + return "5." + },*/ + "sixth" = 20_000, + "seventh" = 20.02, +] diff --git a/test/const-simple.mnml b/test/const-simple.mnml new file mode 100644 index 0000000..5da0a0f --- /dev/null +++ b/test/const-simple.mnml @@ -0,0 +1,2 @@ +const henshin: integer = 2 +const new: integer = henshin + 5 * 10 diff --git a/test/const-union.mnml b/test/const-union.mnml new file mode 100644 index 0000000..ed9fe81 --- /dev/null +++ b/test/const-union.mnml @@ -0,0 +1 @@ +const henshin: integer or string or bool = "2" diff --git a/test/test.test b/test/test.test new file mode 100644 index 0000000..9fea5b2 --- /dev/null +++ b/test/test.test @@ -0,0 +1,78 @@ +const henshin: integer = 2 // comment +// next comment +var ply: string = "cool!" +ply = "way cooler!!" + +const new: integer = henshin + 5 * 10 + +const test: integer = 1 + 1 + +const main: function = (input: string or integer): void { + const hello: string = input + " world!" + const bye: string = "bye!" + + return hello + " " + bye +} + +const x: string = main(input = "hello!") +print(x) + +const array: [integer] = [1, 2, 3] +const map: [string][string or integer] = [ + "first" = 1, + "second" = "two", + "third" = "3", + "fourth" = 4, + "fifth" = (): void { + return "5." + }, +] + +main("pipe1!") => print($) +main("pipe2!") => main($) => print($) + +const test_type: type = { + const test_field: string = "test" + var another_field: integer = 4 + + const test_function: function = (): void { + return self.test_field + } + + const another_function: function = (add: integer): integer { + return self.another_field + add + } +} + +const other_int: type = integer + +const inherited_type: type[test_type] = { + const nested: test_type = test_type() + + const new_function: function = (): void { + return self.another_field + 4 + } +} + +const object: inherited_type = inherited_type() +object.test_field = "hey" +object.test_function() => print($) +object.nested.another_field = 5 +object.nested.another_function(add = 2) => print($) + +if (henshin == 2) { + print("nice") +} +print(henshin == 3 or henshin > 1) +henshin = 4 +if (henshin == 3) { + print("shouldn't be") +} else if (henshin == 2 or henshin != 3) { + print("else will") +} else if (henshin != 3 and henshin != 2) { + print("else won't") +} + +const scoped: [string][string or function] = import("test_import.test") +print(scoped) +scoped.exported_function() => print($) |