summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Weipert <git@mail.dweipert.de>2025-01-02 11:14:26 +0100
committerDaniel Weipert <git@mail.dweipert.de>2025-01-02 11:14:26 +0100
commit64f56807a090a2d3e7a565caf63cfd0e265b41ca (patch)
treebbedfcea6718d091575e6cb613b42ec67299c52f
parent02864c8c29aee80d59cbd7251046f77a3e8e4093 (diff)
intermediate parser commit
-rwxr-xr-x[-rw-r--r--]mnml11
-rw-r--r--src/Lexer/Lexer.php81
-rw-r--r--src/Parser/Parser.php501
-rw-r--r--test/const-array-complex.mnml1
-rw-r--r--test/const-array.mnml2
-rw-r--r--test/const-function.mnml6
-rw-r--r--test/const-map.mnml11
-rw-r--r--test/const-simple.mnml2
-rw-r--r--test/const-union.mnml1
-rw-r--r--test/test.test78
10 files changed, 682 insertions, 12 deletions
diff --git a/mnml b/mnml
index ae5f43c..33f7ca0 100644..100755
--- a/mnml
+++ b/mnml
@@ -1,10 +1,17 @@
+#!/bin/env php
+
<?php
require "vendor/autoload.php";
use Mnml\Lexer\Lexer;
+use Mnml\Parser\Parser;
+
+$input = file_get_contents(realpath($argv[1]));
-$input = file_get_contents(__DIR__ . "/test/test.mnml");
$lexer = new Lexer($input);
$tokens = $lexer->lex();
-#var_dump($tokens);
+
+$parser = new Parser($tokens);
+$nodes = $parser->parse();
+$parser->printTree();
diff --git a/src/Lexer/Lexer.php b/src/Lexer/Lexer.php
index 747b0c9..fba9623 100644
--- a/src/Lexer/Lexer.php
+++ b/src/Lexer/Lexer.php
@@ -27,7 +27,7 @@ class Lexer
"(", ")",
"[", "]",
"{", "}",
- "$",
+ "$", ".",
];
$lastPosition = -1;
@@ -58,6 +58,7 @@ class Lexer
// equals
if ($nextChar == "=") {
$output[] = new Token(
+ TokenType::Comparator,
$currentChar . $nextChar,
$currentChar . $nextChar,
$this->line,
@@ -69,6 +70,7 @@ class Lexer
// pipe
else if ($nextChar == ">") {
$output[] = new Token(
+ TokenType::Pipe,
$currentChar . $nextChar,
$currentChar . $nextChar,
$this->line,
@@ -80,6 +82,7 @@ class Lexer
// assign
else {
$output[] = new Token(
+ TokenType::Assign,
$currentChar,
$currentChar,
$this->line,
@@ -107,6 +110,7 @@ class Lexer
// divide
else {
$output[] = new Token(
+ TokenType::Operator,
$currentChar,
$currentChar,
$this->line,
@@ -124,6 +128,7 @@ class Lexer
// exponential
if ($nextChar == "*") {
$output[] = new Token(
+ TokenType::Operator,
$currentChar . $nextChar,
$currentChar . $nextChar,
$this->line,
@@ -135,6 +140,7 @@ class Lexer
// multiply
else {
$output[] = new Token(
+ TokenType::Operator,
$currentChar,
$currentChar,
$this->line,
@@ -152,6 +158,7 @@ class Lexer
// lower equals
if ($nextChar == "=") {
$output[] = new Token(
+ TokenType::Comparator,
$currentChar . $nextChar,
$currentChar . $nextChar,
$this->line,
@@ -162,6 +169,7 @@ class Lexer
else {
$output[] = new Token(
+ TokenType::Comparator,
$currentChar,
$currentChar,
$this->line,
@@ -179,6 +187,7 @@ class Lexer
// greater equals
if ($nextChar == "=") {
$output[] = new Token(
+ TokenType::Comparator,
$currentChar . $nextChar,
$currentChar . $nextChar,
$this->line,
@@ -189,6 +198,37 @@ class Lexer
else {
$output[] = new Token(
+ TokenType::Comparator,
+ $currentChar,
+ $currentChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(1);
+ }
+ }
+
+ // exclamation mark
+ else if ($currentChar == "!") {
+ $nextChar = $this->getNextChar();
+ $startColumn = $this->column;
+
+ // not equal
+ if ($nextChar == "=") {
+ $output[] = new Token(
+ TokenType::Comparator,
+ $currentChar . $nextChar,
+ $currentChar . $nextChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(2);
+ }
+
+ // not
+ else {
+ $output[] = new Token(
+ TokenType::Operator,
$currentChar,
$currentChar,
$this->line,
@@ -201,6 +241,7 @@ class Lexer
// single char tokens
else if (in_array($currentChar, $singleCharTokens)) {
$output[] = new Token(
+ TokenType::Operator, # TODO
$currentChar,
$currentChar,
$this->line,
@@ -225,6 +266,15 @@ class Lexer
}
}
+ // add end of file token
+ $output[] = new Token(
+ TokenType::EndOfFile,
+ "",
+ "",
+ $this->line,
+ $this->column
+ );
+
return $output;
}
@@ -238,13 +288,6 @@ class Lexer
return $this->input[$this->position + 1] ?? null;
}
- /*
- public function seek(int $position): void
- {
- $this->position = $position;
- }
- */
-
public function advance(int $steps): void
{
$this->position += $steps;
@@ -283,7 +326,7 @@ class Lexer
$startPosition = $this->position;
$endPosition = $this->position;
- while ($this->isIdentifierChar($this->getCurrentChar())) {
+ while ($this->isIdentifierChar($this->getCurrentChar()) or $this->isNumberChar($this->getCurrentChar())) {
$endPosition = $this->position;
$this->advance(1);
}
@@ -291,6 +334,7 @@ class Lexer
$value = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
return new Token(
+ TokenType::Identifier,
$value,
$value,
$this->line,
@@ -322,8 +366,9 @@ class Lexer
$value = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
return new Token(
+ TokenType::Number,
$value,
- $value,
+ str_replace("_", "", $value),
$this->line,
$startColumn,
);
@@ -357,6 +402,7 @@ class Lexer
$rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
return new Token(
+ TokenType::String,
$rawValue,
substr($rawValue, 1, strlen($rawValue) - 2),
$startLine,
@@ -379,6 +425,7 @@ class Lexer
$rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
return new Token(
+ TokenType::Comment,
$rawValue,
trim(substr($rawValue, 2)),
$this->line,
@@ -414,6 +461,7 @@ class Lexer
$rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
return new Token(
+ TokenType::Comment,
$rawValue,
trim(substr($rawValue, 2, strlen($rawValue) - 4)),
$startLine,
@@ -424,9 +472,22 @@ class Lexer
class Token {
public function __construct(
+ public TokenType $type,
public string $literal,
public string $value,
public string $line,
public string $column,
) {}
}
+
+enum TokenType {
+ case Identifier;
+ case Number;
+ case String;
+ case Comment;
+ case Comparator;
+ case Operator;
+ case Assign;
+ case Pipe;
+ case EndOfFile;
+}
diff --git a/src/Parser/Parser.php b/src/Parser/Parser.php
new file mode 100644
index 0000000..7d780b0
--- /dev/null
+++ b/src/Parser/Parser.php
@@ -0,0 +1,501 @@
+<?php
+
+namespace Mnml\Parser;
+
+use Mnml\Lexer\Token;
+use Mnml\Lexer\TokenType;
+
+class Parser
+{
+ private array $tokens;
+ private int $position;
+ private array $errors;
+ private array $nodes;
+
+ /**
+ * @param Token[] $tokens
+ */
+ public function __construct(array $tokens)
+ {
+ $this->tokens = $tokens;
+ $this->position = 0;
+ $this->errors = [];
+ $this->nodes = [];
+ }
+
+ public function parse(): array
+ {
+ while ($this->position < count($this->tokens) - 1) {
+ $currentToken = $this->getCurrentToken();
+
+ if ($currentToken->literal == "const") {
+ $this->nodes[] = $this->parseConst();
+ }
+
+ else {
+ $this->position += 1;
+ }
+ }
+
+ return $this->nodes;
+ }
+
+ public function printTree(): void
+ {
+ $tree = new Tree($this->nodes);
+ $this->printNodeRecursive($tree, 0);
+ }
+
+ private function printNodeRecursive(Node $node, int $level = 0): void
+ {
+ $properties = get_object_vars($node);
+
+ foreach ($properties as $propertyKey => $propertyValue) {
+ if ($propertyValue instanceof Node) {
+ echo sprintf(str_repeat("> ", $level) . "%s: %s\n", $propertyKey, (new \ReflectionClass($propertyValue))->getShortName());
+ $this->printNodeRecursive($propertyValue, $level + 1);
+ }
+
+ else if (is_array($propertyValue)) {
+ $length = count($propertyValue);
+
+ echo sprintf(str_repeat("> ", $level) . "%s[%d]:\n", $propertyKey, $length);
+
+ if ($length == 0) {
+ echo str_repeat("> ", $level + 1) . "None";
+ } else {
+ foreach ($propertyValue as $idx => $item) {
+ echo sprintf(str_repeat("> ", $level + 1) . "[%d]%s\n", $idx + 1, (new \ReflectionClass($item))->getShortName());
+ $this->printNodeRecursive($item, $level + 2);
+ }
+ }
+ }
+
+ else {
+ if (! empty($propertyValue)) {
+ if ($propertyValue instanceof Token) {
+ echo sprintf(str_repeat("> ", $level) . "%s: %s - %d:%d\n", $propertyKey, $propertyValue->literal, $propertyValue->line, $propertyValue->column);
+ } else {
+ echo sprintf(str_repeat("> ", $level) . "%s: %s\n", $propertyKey, $propertyValue);
+ }
+ } else {
+ echo sprintf(str_repeat("> ", $level) . "%s: %s\n", $propertyKey, "_empty_");
+ }
+ }
+ }
+ }
+
+ private function getCurrentToken(): Token
+ {
+ return $this->tokens[$this->position];
+ }
+
+ private function getNextToken(): ?Token
+ {
+ return $this->tokens[$this->position + 1] ?? null;
+ }
+
+ private function advance(int $steps): void
+ {
+ assert(
+ $this->position + $steps < count($this->tokens),
+ "Can't advance. Position out of bounds."
+ );
+
+ $this->position += $steps;
+ }
+
+ private function parseConst(): Node
+ {
+ // skip const
+ $this->advance(1);
+
+ $identifier = $this->getCurrentToken();
+
+ // skip :
+ $this->advance(2);
+
+ $type = $this->parseType();
+
+ // skip =
+ $this->advance(1);
+
+ $expression = $this->parseExpression(
+ shouldBeMap: $type instanceof MapTypeDeclaration,
+ shouldBeFunction: $type->left->literal == "function",
+ );
+
+ return new ConstVariableDeclaration(
+ $identifier,
+ $type,
+ $expression,
+ );
+ }
+
+ private function parseType(): Node
+ {
+ $currentToken = $this->getCurrentToken();
+ $nextToken = $this->getNextToken();
+
+ if ($currentToken->literal == "[") {
+ return $this->parseArrayOrMapType();
+ }
+
+ else if (in_array($nextToken->literal, ["and", "or"])) {
+ $this->advance(2);
+
+ return new TypeDeclaration($currentToken, $nextToken, $this->parseType());
+ }
+
+ else {
+ $this->advance(1);
+
+ return new TypeDeclaration($currentToken);
+ }
+ }
+
+ private function parseArrayOrMapType(): Node
+ {
+ // skip first [
+ $this->advance(1);
+
+ $key = $this->parseType();
+
+ $currentToken = $this->getCurrentToken();
+ $nextToken = $this->getNextToken();
+
+ if ($currentToken->literal == "]" && $nextToken->literal == "[") {
+ // skip to first type
+ $this->advance(2);
+
+ $value = $this->parseType();
+
+ // skip last ]
+ $this->advance(1);
+
+ return new MapTypeDeclaration($key, $value);
+ } else {
+ // skip last ]
+ $this->advance(1);
+
+ return new ArrayTypeDeclaration($key);
+ }
+ }
+
+ private function parseExpression($shouldBeMap = false, $shouldBeFunction = false): Node|Token
+ {
+ $currentToken = $this->getCurrentToken();
+ $currentExpression = $currentToken;
+
+ if ($currentToken->literal == "[") {
+ $currentExpression = $this->parseArrayOrMap($shouldBeMap);
+ }
+
+ else if ($currentToken->type == TokenType::Number) {
+ $currentExpression = $this->parseNumber();
+ }
+
+ else if ($currentToken->type == TokenType::String) {
+ $currentExpression = new StringNode($currentToken);
+ $this->advance(1);
+ }
+
+ else if ($currentToken->literal == "(") {
+ if ($shouldBeFunction) {
+ $currentExpression = $this->parseFunctionDefinition();
+ }
+ }
+
+ $nextToken = $this->getCurrentToken();
+
+ if (in_array($nextToken->literal, ["+", "-", "*", "**", "/"])) {
+ $this->advance(1);
+
+ return new OperatorExpression($currentExpression, $nextToken, $this->parseExpression(shouldBeMap: $shouldBeMap));
+ }
+
+ else if (in_array($nextToken->literal, ["<", ">", "<=", ">=", "=="])) {
+ $this->advance(1);
+
+ return new CompareExpression($currentExpression, $nextToken, $this->parseExpression(shouldBeMap: $shouldBeMap));
+ }
+
+ else {
+ return $currentExpression;
+ }
+ }
+
+ private function parseArrayOrMap($shouldBeMap = false): Node
+ {
+ $values = [];
+
+ // skip first [
+ if ($this->getCurrentToken()->literal == "[") {
+ $this->advance(1);
+ }
+
+ $tokenShouldBeComma = false;
+ while ($this->getCurrentToken()->literal != "]") {
+ $currentToken = $this->getCurrentToken();
+
+ // skip ,
+ if ($tokenShouldBeComma) {
+ if ($currentToken->literal == ",") {
+ $this->advance(1);
+ $tokenShouldBeComma = false;
+ continue;
+ }
+
+ // , missing => error
+ else {
+ $error = sprintf(
+ "Expected \",\" at position %d,%d - got %s instead" . PHP_EOL,
+ $currentToken->line,
+ $currentToken->column,
+ $currentToken->literal
+ );
+ $this->errors[] = $error;
+
+ echo $error;
+
+ break;
+ }
+ }
+
+ // nested array or map
+ if ($currentToken->literal == "[") {
+ $values[] = $this->parseArrayOrMap();
+ $tokenShouldBeComma = true;
+ }
+
+ // skip comments
+ else if ($currentToken->type == TokenType::Comment) {
+ $this->advance(1);
+ continue;
+ }
+
+ else {
+ $values[] = $this->parseArrayOrMapItem();
+ $tokenShouldBeComma = true;
+ }
+ }
+
+ // skip last ]
+ $this->advance(1);
+
+ if ((count($values) > 0 and $values[0] instanceof MapItemNode) or $shouldBeMap) {
+ return new MapNode($values);
+ } else {
+ return new ArrayNode($values);
+ }
+ }
+
+ private function parseArrayOrMapItem(): Node|Token
+ {
+ $key = $this->parseExpression();
+
+ // is map item
+ if ($this->getCurrentToken()->literal == "=") {
+ $this->advance(1);
+
+ $value = $this->parseExpression();
+
+ return new MapItemNode($key, $value);
+ }
+
+ // is array item
+ else {
+ return $key;
+ }
+ }
+
+ private function parseNumber(): Node
+ {
+ $currentToken = $this->getCurrentToken();
+
+ $value = $currentToken->value;
+ if (str_contains($value, ".")) {
+ $value = floatval($value);
+ } else {
+ $value = intval($value);
+ }
+
+ // step to next token
+ $this->advance(1);
+
+ return new NumberNode($currentToken, $value);
+ }
+
+ private function parseFunctionDefinition(): Node
+ {
+ // skip first (
+ $this->advance(1);
+
+ $parameters = $this->parseFunctionParameters();
+
+ // skip :
+ $this->advance(1);
+
+ $returnType = $this->parseType();
+
+ $body = $this->parseFunctionBody();
+
+ return new FunctionDefinition(
+ $parameters,
+ $returnType,
+ );
+ }
+
+ private function parseFunctionParameters(): array
+ {
+ while ($this->getCurrentToken() != ")") {
+ # TODO
+ }
+
+ $identifier = $this->getCurrentToken();
+
+ // skip :
+ $this->advance(1);
+
+ $type = $this->parseType();
+
+ return new FunctionParameter(
+
+ );
+ }
+
+ private function parseFunctionBody(): array
+ {
+ # TODO
+ }
+
+ private function parseFunctionCall(){}
+}
+
+class Node
+{}
+
+class Tree extends Node
+{
+ /**
+ * @param Node[] $nodes
+ */
+ public function __construct(
+ public array $nodes,
+ ) {}
+}
+
+class ConstVariableDeclaration extends Node
+{
+ public function __construct(
+ public Token $identifier,
+ public Node|Token $type,
+ public Node|Token $expression,
+ ) {}
+}
+
+class TypeDeclaration extends Node
+{
+ public function __construct(
+ public Token $left,
+ public ?Token $operator = null,
+ public Token|TypeDeclaration|null $right = null,
+ ) {}
+}
+
+class ArrayTypeDeclaration extends Node
+{
+ public function __construct(
+ public TypeDeclaration|ArrayTypeDeclaration $value,
+ ) {}}
+
+class MapTypeDeclaration extends Node
+{
+ public function __construct(
+ public TypeDeclaration $key,
+ public TypeDeclaration|ArrayTypeDeclaration $value,
+ ) {}
+}
+
+class ArrayNode extends Node
+{
+ /**
+ * @param Array<Token|Node> $values
+ */
+ public function __construct(
+ public array $values,
+ ) {}
+}
+
+class MapNode extends Node
+{
+ /**
+ * @param MapItemNode[] $values
+ */
+ public function __construct(
+ public array $values,
+ ) {}
+}
+class MapItemNode extends Node
+{
+ public function __construct(
+ public Token|Node $key,
+ public Token|Node $value,
+ ) {}
+}
+
+class OperatorExpression extends Node
+{
+ public function __construct(
+ public Token|Node $left,
+ public Token $operator,
+ public Token|Node $right,
+ ) {}
+}
+
+class CompareExpression extends Node
+{
+ public function __construct(
+ public Token|Node $left,
+ public Token $operator,
+ public Token|Node $right,
+ ) {}
+}
+
+class NumberNode extends Node
+{
+ public function __construct(
+ public Token $token,
+ public int|float $value,
+ ) {}
+}
+
+class StringNode extends Node
+{
+ public function __construct(
+ public Token $token,
+ ) {}
+}
+
+class FunctionDefinition extends Node
+{
+ public function __construct(
+ /**
+ * @param FunctionParameter[] $parameters
+ */
+ public array $parameters,
+ public TypeDefinition $returnType,
+ /**
+ * @param Node[] $body
+ */
+ public array $body,
+ ) {}
+}
+
+class FunctionParameter extends Node
+{
+ public function __construct(
+ public Token $identifier,
+ public TypeDefinition $type,
+ ) {}
+}
diff --git a/test/const-array-complex.mnml b/test/const-array-complex.mnml
new file mode 100644
index 0000000..07e2286
--- /dev/null
+++ b/test/const-array-complex.mnml
@@ -0,0 +1 @@
+const array: [[integer]] = [[1, 2], [3, 4], [5, 6, 7]]
diff --git a/test/const-array.mnml b/test/const-array.mnml
new file mode 100644
index 0000000..03c7163
--- /dev/null
+++ b/test/const-array.mnml
@@ -0,0 +1,2 @@
+const array: [integer] = [1, 2, 3]
+const array2: [integer] = [3, 4, 5]
diff --git a/test/const-function.mnml b/test/const-function.mnml
new file mode 100644
index 0000000..1f77a73
--- /dev/null
+++ b/test/const-function.mnml
@@ -0,0 +1,6 @@
+const main: function = (input: string or integer): void {
+ const hello: string = input + " world!"
+ const bye: string = "bye!"
+
+ return hello + " " + bye
+}
diff --git a/test/const-map.mnml b/test/const-map.mnml
new file mode 100644
index 0000000..b63b68b
--- /dev/null
+++ b/test/const-map.mnml
@@ -0,0 +1,11 @@
+const map: [string][string or integer or bool] = [
+ "first" = 1 <= 2,
+ "second" = "two",
+ "third" = "3",
+ "fourth" = 4 > 3,
+/* "fifth" = (): void {
+ return "5."
+ },*/
+ "sixth" = 20_000,
+ "seventh" = 20.02,
+]
diff --git a/test/const-simple.mnml b/test/const-simple.mnml
new file mode 100644
index 0000000..5da0a0f
--- /dev/null
+++ b/test/const-simple.mnml
@@ -0,0 +1,2 @@
+const henshin: integer = 2
+const new: integer = henshin + 5 * 10
diff --git a/test/const-union.mnml b/test/const-union.mnml
new file mode 100644
index 0000000..ed9fe81
--- /dev/null
+++ b/test/const-union.mnml
@@ -0,0 +1 @@
+const henshin: integer or string or bool = "2"
diff --git a/test/test.test b/test/test.test
new file mode 100644
index 0000000..9fea5b2
--- /dev/null
+++ b/test/test.test
@@ -0,0 +1,78 @@
+const henshin: integer = 2 // comment
+// next comment
+var ply: string = "cool!"
+ply = "way cooler!!"
+
+const new: integer = henshin + 5 * 10
+
+const test: integer = 1 + 1
+
+const main: function = (input: string or integer): void {
+ const hello: string = input + " world!"
+ const bye: string = "bye!"
+
+ return hello + " " + bye
+}
+
+const x: string = main(input = "hello!")
+print(x)
+
+const array: [integer] = [1, 2, 3]
+const map: [string][string or integer] = [
+ "first" = 1,
+ "second" = "two",
+ "third" = "3",
+ "fourth" = 4,
+ "fifth" = (): void {
+ return "5."
+ },
+]
+
+main("pipe1!") => print($)
+main("pipe2!") => main($) => print($)
+
+const test_type: type = {
+ const test_field: string = "test"
+ var another_field: integer = 4
+
+ const test_function: function = (): void {
+ return self.test_field
+ }
+
+ const another_function: function = (add: integer): integer {
+ return self.another_field + add
+ }
+}
+
+const other_int: type = integer
+
+const inherited_type: type[test_type] = {
+ const nested: test_type = test_type()
+
+ const new_function: function = (): void {
+ return self.another_field + 4
+ }
+}
+
+const object: inherited_type = inherited_type()
+object.test_field = "hey"
+object.test_function() => print($)
+object.nested.another_field = 5
+object.nested.another_function(add = 2) => print($)
+
+if (henshin == 2) {
+ print("nice")
+}
+print(henshin == 3 or henshin > 1)
+henshin = 4
+if (henshin == 3) {
+ print("shouldn't be")
+} else if (henshin == 2 or henshin != 3) {
+ print("else will")
+} else if (henshin != 3 and henshin != 2) {
+ print("else won't")
+}
+
+const scoped: [string][string or function] = import("test_import.test")
+print(scoped)
+scoped.exported_function() => print($)