summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Weipert <git@mail.dweipert.de>2024-12-31 15:17:23 +0100
committerDaniel Weipert <git@mail.dweipert.de>2024-12-31 15:17:23 +0100
commit02864c8c29aee80d59cbd7251046f77a3e8e4093 (patch)
tree514f17bfdfa14538f50031e3ed0cb71e39e8f454
lexer
-rw-r--r--.gitignore1
-rw-r--r--composer.json15
-rw-r--r--composer.lock18
-rw-r--r--mnml10
-rw-r--r--src/Lexer/Lexer.php432
-rw-r--r--test/test.mnml36
6 files changed, 512 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..57872d0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/vendor/
diff --git a/composer.json b/composer.json
new file mode 100644
index 0000000..4ba5a1e
--- /dev/null
+++ b/composer.json
@@ -0,0 +1,15 @@
+{
+ "name": "mnml/mnml",
+ "authors": [
+ {
+ "name": "Daniel Weipert",
+ "email": "git@mail.dweipert.de"
+ }
+ ],
+ "require": {},
+ "autoload": {
+ "psr-4": {
+ "Mnml\\": "src/"
+ }
+ }
+}
diff --git a/composer.lock b/composer.lock
new file mode 100644
index 0000000..5277af2
--- /dev/null
+++ b/composer.lock
@@ -0,0 +1,18 @@
+{
+ "_readme": [
+ "This file locks the dependencies of your project to a known state",
+ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
+ "This file is @generated automatically"
+ ],
+ "content-hash": "4d9e6aadd972d1d9b5e8c54714babfa2",
+ "packages": [],
+ "packages-dev": [],
+ "aliases": [],
+ "minimum-stability": "stable",
+ "stability-flags": {},
+ "prefer-stable": false,
+ "prefer-lowest": false,
+ "platform": {},
+ "platform-dev": {},
+ "plugin-api-version": "2.6.0"
+}
diff --git a/mnml b/mnml
new file mode 100644
index 0000000..ae5f43c
--- /dev/null
+++ b/mnml
@@ -0,0 +1,10 @@
+<?php
+
+require "vendor/autoload.php";
+
+use Mnml\Lexer\Lexer;
+
+$input = file_get_contents(__DIR__ . "/test/test.mnml");
+$lexer = new Lexer($input);
+$tokens = $lexer->lex();
+#var_dump($tokens);
diff --git a/src/Lexer/Lexer.php b/src/Lexer/Lexer.php
new file mode 100644
index 0000000..747b0c9
--- /dev/null
+++ b/src/Lexer/Lexer.php
@@ -0,0 +1,432 @@
+<?php
+
+namespace Mnml\Lexer;
+
+class Lexer
+{
+ public string $input;
+ public int $position;
+ public int $line;
+ public int $column;
+ public array $errors;
+
+ public function __construct(string $input)
+ {
+ $this->input = $input;
+ $this->position = 0;
+ $this->line = 1;
+ $this->column = 1;
+ }
+
+ public function lex(): array {
+ $output = [];
+
+ $singleCharTokens = [
+ ":", ",",
+ "+", "-",
+ "(", ")",
+ "[", "]",
+ "{", "}",
+ "$",
+ ];
+
+ $lastPosition = -1;
+ while ($this->position < strlen($this->input) - 1) {
+ $lastPosition = $this->position;
+ $currentChar = $this->getCurrentChar();
+
+ // identifier
+ if ($this->isIdentifierChar($currentChar)) {
+ $output[] = $this->lexIdentifier();
+ }
+
+ // number
+ else if ($this->isNumberChar($currentChar)) {
+ $output[] = $this->lexNumber();
+ }
+
+ // string
+ else if ($currentChar == "\"") {
+ $output[] = $this->lexString();
+ }
+
+ // equal sign
+ else if ($currentChar == "=") {
+ $nextChar = $this->getNextChar();
+ $startColumn = $this->column;
+
+ // equals
+ if ($nextChar == "=") {
+ $output[] = new Token(
+ $currentChar . $nextChar,
+ $currentChar . $nextChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(2);
+ }
+
+ // pipe
+ else if ($nextChar == ">") {
+ $output[] = new Token(
+ $currentChar . $nextChar,
+ $currentChar . $nextChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(2);
+ }
+
+ // assign
+ else {
+ $output[] = new Token(
+ $currentChar,
+ $currentChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(1);
+ }
+ }
+
+ // forward slash
+ else if ($currentChar == "/") {
+ $nextChar = $this->getNextChar();
+ $startColumn = $this->column;
+
+ // single line comment
+ if ($nextChar == "/") {
+ $output[] = $this->lexSingleLineComment();
+ }
+
+ // multiline comment
+ else if ($nextChar == "*") {
+ $output[] = $this->lexMultiLineComment();
+ }
+
+ // divide
+ else {
+ $output[] = new Token(
+ $currentChar,
+ $currentChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(1);
+ }
+ }
+
+ // asterisk
+ else if ($currentChar == "*") {
+ $nextChar = $this->getNextChar();
+ $startColumn = $this->column;
+
+ // exponential
+ if ($nextChar == "*") {
+ $output[] = new Token(
+ $currentChar . $nextChar,
+ $currentChar . $nextChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(2);
+ }
+
+ // multiply
+ else {
+ $output[] = new Token(
+ $currentChar,
+ $currentChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(1);
+ }
+ }
+
+ // lower
+ else if ($currentChar == "<") {
+ $nextChar = $this->getNextChar();
+ $startColumn = $this->column;
+
+ // lower equals
+ if ($nextChar == "=") {
+ $output[] = new Token(
+ $currentChar . $nextChar,
+ $currentChar . $nextChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(2);
+ }
+
+ else {
+ $output[] = new Token(
+ $currentChar,
+ $currentChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(1);
+ }
+ }
+
+ // greater
+ else if ($currentChar == ">") {
+ $nextChar = $this->getNextChar();
+ $startColumn = $this->column;
+
+ // greater equals
+ if ($nextChar == "=") {
+ $output[] = new Token(
+ $currentChar . $nextChar,
+ $currentChar . $nextChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(2);
+ }
+
+ else {
+ $output[] = new Token(
+ $currentChar,
+ $currentChar,
+ $this->line,
+ $startColumn,
+ );
+ $this->advance(1);
+ }
+ }
+
+ // single char tokens
+ else if (in_array($currentChar, $singleCharTokens)) {
+ $output[] = new Token(
+ $currentChar,
+ $currentChar,
+ $this->line,
+ $this->column,
+ );
+ $this->advance(1);
+ }
+
+ // whitespace
+ else {
+ $this->consumeWhitespace();
+ }
+
+ // unknown character
+ if ($this->position == $lastPosition) {
+ $error = sprintf("Unknown character %s at position %d,%d" . PHP_EOL, $this->getCurrentChar(), $this->line, $this->column);
+ $this->errors[] = $error;
+
+ $this->advance(1);
+
+ echo $error;
+ }
+ }
+
+ return $output;
+ }
+
+ public function getCurrentChar(): string
+ {
+ return $this->input[$this->position];
+ }
+
+ public function getNextChar(): ?string
+ {
+ return $this->input[$this->position + 1] ?? null;
+ }
+
+ /*
+ public function seek(int $position): void
+ {
+ $this->position = $position;
+ }
+ */
+
+ public function advance(int $steps): void
+ {
+ $this->position += $steps;
+ $this->column += $steps;
+ }
+
+ public function consumeWhitespace(): void
+ {
+ $whitespace = [
+ " ",
+ "\t",
+ PHP_EOL,
+ ];
+
+ while (in_array($this->getCurrentChar(), $whitespace)) {
+ if ($this->getCurrentChar() == PHP_EOL) {
+ $this->line += 1;
+ $this->column = 0;
+ }
+
+ $this->advance(1);
+ }
+ }
+
+ public function isIdentifierChar(string $char): bool
+ {
+ return "a" <= $char && $char <= "z" or "A" <= $char && $char <= "Z" or in_array($char, [
+ "_",
+ ]);
+ }
+
+ public function lexIdentifier(): Token
+ {
+ $startColumn = $this->column;
+
+ $startPosition = $this->position;
+ $endPosition = $this->position;
+
+ while ($this->isIdentifierChar($this->getCurrentChar())) {
+ $endPosition = $this->position;
+ $this->advance(1);
+ }
+
+ $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
+
+ return new Token(
+ $value,
+ $value,
+ $this->line,
+ $startColumn,
+ );
+ }
+
+ private function isNumberChar(string $char): bool
+ {
+ return "0" <= $char && $char <= "9";
+ }
+
+ private function lexNumber(): Token
+ {
+ $startColumn = $this->column;
+
+ $startPosition = $this->position;
+ $endPosition = $this->position;
+
+ while (
+ $this->isNumberChar($this->getCurrentChar())
+ or $this->getCurrentChar() == "_"
+ or $this->getCurrentChar() == "."
+ ) {
+ $endPosition = $this->position;
+ $this->advance(1);
+ }
+
+ $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
+
+ return new Token(
+ $value,
+ $value,
+ $this->line,
+ $startColumn,
+ );
+ }
+
+ private function lexString(): Token
+ {
+ $startLine = $this->line;
+ $startColumn = $this->column;
+
+ $startPosition = $this->position;
+ $endPosition = $this->position;
+
+ // skip first "
+ $this->advance(1);
+
+ while ($this->getCurrentChar() != "\"") {
+ if ($this->getCurrentChar() == PHP_EOL) {
+ $this->line += 1;
+ $this->column = 0;
+ }
+
+ $endPosition = $this->position;
+ $this->advance(1);
+ }
+
+ // add last "
+ $endPosition += 1;
+ $this->advance(1);
+
+ $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
+
+ return new Token(
+ $rawValue,
+ substr($rawValue, 1, strlen($rawValue) - 2),
+ $startLine,
+ $startColumn,
+ );
+ }
+
+ private function lexSingleLineComment(): Token
+ {
+ $startColumn = $this->column;
+
+ $startPosition = $this->position;
+ $endPosition = $this->position;
+
+ while ($this->getCurrentChar() != PHP_EOL) {
+ $endPosition = $this->position;
+ $this->advance(1);
+ }
+
+ $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
+
+ return new Token(
+ $rawValue,
+ trim(substr($rawValue, 2)),
+ $this->line,
+ $startColumn,
+ );
+ }
+
+ private function lexMultiLineComment(): Token
+ {
+ $startLine = $this->line;
+ $startColumn = $this->column;
+
+ $startPosition = $this->position;
+ $endPosition = $this->position;
+
+ // skip start
+ $this->advance(2);
+
+ while (($this->getCurrentChar() . $this->getNextChar()) != "*/") {
+ if ($this->getCurrentChar() == PHP_EOL) {
+ $this->line += 1;
+ $this->column = 0;
+ }
+
+ $endPosition = $this->position;
+ $this->advance(1);
+ }
+
+ // skip end
+ $endPosition += 2;
+ $this->advance(2);
+
+ $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1);
+
+ return new Token(
+ $rawValue,
+ trim(substr($rawValue, 2, strlen($rawValue) - 4)),
+ $startLine,
+ $startColumn,
+ );
+ }
+}
+
+class Token {
+ public function __construct(
+ public string $literal,
+ public string $value,
+ public string $line,
+ public string $column,
+ ) {}
+}
diff --git a/test/test.mnml b/test/test.mnml
new file mode 100644
index 0000000..9659081
--- /dev/null
+++ b/test/test.mnml
@@ -0,0 +1,36 @@
+const henshin: integer = 2 // comment
+// next comment
+var ply: string = "abc"
+ply = "way cooler!!"
+
+const new: integer = henshin + 5 * 10
+
+const main: function = (input: string or integer): void {
+ const hello: string = input + " world!"
+ const bye: string = "bye!"
+
+ return hello + " " + bye
+}
+
+main()/*
+ * mlc
+*/
+
+main("pipe1!") => print($)
+
+var mls: string = "alphabet
+ende
+gelände"
+
+const array: [integer] = [1, 2, 3]
+const map: [string][string or integer or bool] = [
+ "first" = 1 <= 2,
+ "second" = "two",
+ "third" = "3",
+ "fourth" = 4 > 3,
+ "fifth" = (): void {
+ return "5."
+ },
+ "sixth" = 20_000,
+ "seventh" = 20.02,
+]