diff options
Diffstat (limited to 'src/Lexer/Lexer.php')
| -rw-r--r-- | src/Lexer/Lexer.php | 432 | 
1 files changed, 432 insertions, 0 deletions
diff --git a/src/Lexer/Lexer.php b/src/Lexer/Lexer.php new file mode 100644 index 0000000..747b0c9 --- /dev/null +++ b/src/Lexer/Lexer.php @@ -0,0 +1,432 @@ +<?php + +namespace Mnml\Lexer; + +class Lexer +{ +    public string $input; +    public int $position; +    public int $line; +    public int $column; +    public array $errors; + +    public function __construct(string $input) +    { +        $this->input = $input; +        $this->position = 0; +        $this->line = 1; +        $this->column = 1; +    } + +    public function lex(): array { +        $output = []; + +        $singleCharTokens = [ +            ":", ",", +            "+", "-", +            "(", ")", +            "[", "]", +            "{", "}", +            "$", +        ]; + +        $lastPosition = -1; +        while ($this->position < strlen($this->input) - 1) { +            $lastPosition = $this->position; +            $currentChar = $this->getCurrentChar(); + +            // identifier +            if ($this->isIdentifierChar($currentChar)) { +                $output[] = $this->lexIdentifier(); +            } + +            // number +            else if ($this->isNumberChar($currentChar)) { +                $output[] = $this->lexNumber(); +            } + +            // string +            else if ($currentChar == "\"") { +                $output[] = $this->lexString(); +            } + +            // equal sign +            else if ($currentChar == "=") { +                $nextChar = $this->getNextChar(); +                $startColumn = $this->column; + +                // equals +                if ($nextChar == "=") { +                    $output[] = new Token( +                        $currentChar . $nextChar, +                        $currentChar . $nextChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(2); +                } + +                // pipe +                else if ($nextChar == ">") { +                    $output[] = new Token( +                        $currentChar . $nextChar, +                        $currentChar . $nextChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(2); +                } + +                // assign +                else { +                    $output[] = new Token( +                        $currentChar, +                        $currentChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(1); +                } +            } + +            // forward slash +            else if ($currentChar == "/") { +                $nextChar = $this->getNextChar(); +                $startColumn = $this->column; + +                // single line comment +                if ($nextChar == "/") { +                    $output[] = $this->lexSingleLineComment(); +                } + +                // multiline comment +                else if ($nextChar == "*") { +                    $output[] = $this->lexMultiLineComment(); +                } + +                // divide +                else { +                    $output[] = new Token( +                        $currentChar, +                        $currentChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(1); +                } +            } + +            // asterisk +            else if ($currentChar == "*") { +                $nextChar = $this->getNextChar(); +                $startColumn = $this->column; + +                // exponential +                if ($nextChar == "*") { +                    $output[] = new Token( +                        $currentChar . $nextChar, +                        $currentChar . $nextChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(2); +                } + +                // multiply +                else { +                    $output[] = new Token( +                        $currentChar, +                        $currentChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(1); +                } +            } + +            // lower +            else if ($currentChar == "<") { +                $nextChar = $this->getNextChar(); +                $startColumn = $this->column; + +                // lower equals +                if ($nextChar == "=") { +                    $output[] = new Token( +                        $currentChar . $nextChar, +                        $currentChar . $nextChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(2); +                } + +                else { +                    $output[] = new Token( +                        $currentChar, +                        $currentChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(1); +                } +            } + +            // greater +            else if ($currentChar == ">") { +                $nextChar = $this->getNextChar(); +                $startColumn = $this->column; + +                // greater equals +                if ($nextChar == "=") { +                    $output[] = new Token( +                        $currentChar . $nextChar, +                        $currentChar . $nextChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(2); +                } + +                else { +                    $output[] = new Token( +                        $currentChar, +                        $currentChar, +                        $this->line, +                        $startColumn, +                    ); +                    $this->advance(1); +                } +            } + +            // single char tokens +            else if (in_array($currentChar, $singleCharTokens)) { +                $output[] = new Token( +                    $currentChar, +                    $currentChar, +                    $this->line, +                    $this->column, +                ); +                $this->advance(1); +            } + +            // whitespace +            else { +                $this->consumeWhitespace(); +            } + +            // unknown character +            if ($this->position == $lastPosition) { +                $error = sprintf("Unknown character %s at position %d,%d" . PHP_EOL, $this->getCurrentChar(),  $this->line, $this->column); +                $this->errors[] = $error; + +                $this->advance(1); + +                echo $error; +            } +        } + +        return $output; +    } + +    public function getCurrentChar(): string +    { +        return $this->input[$this->position]; +    } + +    public function getNextChar(): ?string +    { +        return $this->input[$this->position + 1] ?? null; +    } + +    /* +    public function seek(int $position): void +    { +        $this->position = $position; +    } +    */ + +    public function advance(int $steps): void +    { +        $this->position += $steps; +        $this->column += $steps; +    } + +    public function consumeWhitespace(): void +    { +        $whitespace = [ +            " ", +            "\t", +            PHP_EOL, +        ]; + +        while (in_array($this->getCurrentChar(), $whitespace)) { +            if ($this->getCurrentChar() == PHP_EOL) { +                $this->line += 1; +                $this->column = 0; +            } + +            $this->advance(1); +        } +    } + +    public function isIdentifierChar(string $char): bool +    { +        return "a" <= $char && $char <= "z" or "A" <= $char && $char <= "Z" or in_array($char, [ +            "_", +        ]); +    } + +    public function lexIdentifier(): Token +    { +        $startColumn = $this->column; + +        $startPosition = $this->position; +        $endPosition = $this->position; + +        while ($this->isIdentifierChar($this->getCurrentChar())) { +            $endPosition = $this->position; +            $this->advance(1); +        } + +        $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1); + +        return new Token( +            $value, +            $value, +            $this->line, +            $startColumn, +        ); +    } + +    private function isNumberChar(string $char): bool +    { +        return "0" <= $char && $char <= "9"; +    } + +    private function lexNumber(): Token +    { +        $startColumn = $this->column; + +        $startPosition = $this->position; +        $endPosition = $this->position; + +        while ( +            $this->isNumberChar($this->getCurrentChar()) +            or $this->getCurrentChar() == "_" +            or $this->getCurrentChar() == "." +        ) { +            $endPosition = $this->position; +            $this->advance(1); +        } + +        $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1); + +        return new Token( +            $value, +            $value, +            $this->line, +            $startColumn, +        ); +    } + +    private function lexString(): Token +    { +        $startLine = $this->line; +        $startColumn = $this->column; + +        $startPosition = $this->position; +        $endPosition = $this->position; + +        // skip first " +        $this->advance(1); + +        while ($this->getCurrentChar() != "\"") { +            if ($this->getCurrentChar() == PHP_EOL) { +                $this->line += 1; +                $this->column = 0; +            } + +            $endPosition = $this->position; +            $this->advance(1); +        } + +        // add last " +        $endPosition += 1; +        $this->advance(1); + +        $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1); + +        return new Token( +            $rawValue, +            substr($rawValue, 1, strlen($rawValue) - 2), +            $startLine, +            $startColumn, +        ); +    } + +    private function lexSingleLineComment(): Token +    { +        $startColumn = $this->column; + +        $startPosition = $this->position; +        $endPosition = $this->position; + +        while ($this->getCurrentChar() != PHP_EOL) { +            $endPosition = $this->position; +            $this->advance(1); +        } + +        $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1); + +        return new Token( +            $rawValue, +            trim(substr($rawValue, 2)), +            $this->line, +            $startColumn, +        ); +    } + +    private function lexMultiLineComment(): Token +    { +        $startLine = $this->line; +        $startColumn = $this->column; + +        $startPosition = $this->position; +        $endPosition = $this->position; + +        // skip start +        $this->advance(2); + +        while (($this->getCurrentChar() . $this->getNextChar()) != "*/") { +            if ($this->getCurrentChar() == PHP_EOL) { +                $this->line += 1; +                $this->column = 0; +            } + +            $endPosition = $this->position; +            $this->advance(1); +        } + +        // skip end +        $endPosition += 2; +        $this->advance(2); + +        $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1); + +        return new Token( +            $rawValue, +            trim(substr($rawValue, 2, strlen($rawValue) - 4)), +            $startLine, +            $startColumn, +        ); +    } +} + +class Token { +    public function __construct( +        public string $literal, +        public string $value, +        public string $line, +        public string $column, +    ) {} +}  | 
