input = $input; $this->position = 0; $this->line = 1; $this->column = 1; } public function lex(): array { $output = []; $singleCharTokens = [ ":", ",", "+", "-", "(", ")", "[", "]", "{", "}", ".", ]; $lastPosition = -1; while ($this->position < strlen($this->input) - 1) { $lastPosition = $this->position; $currentChar = $this->getCurrentChar(); // identifier if ($this->isIdentifierChar($currentChar)) { $output[] = $this->lexIdentifier(); } // number else if ($this->isNumberChar($currentChar)) { $output[] = $this->lexNumber(); } // string else if ($currentChar == "\"") { $output[] = $this->lexString(); } // equal sign else if ($currentChar == "=") { $nextChar = $this->getNextChar(); $startColumn = $this->column; // equals if ($nextChar == "=") { $output[] = new Token( TokenType::Comparator, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, $startColumn, ); $this->advance(2); } // pipe else if ($nextChar == ">") { $output[] = new Token( TokenType::Pipe, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, $startColumn, ); $this->advance(2); } // assign else { $output[] = new Token( TokenType::Assign, $currentChar, $currentChar, $this->line, $startColumn, ); $this->advance(1); } } // forward slash else if ($currentChar == "/") { $nextChar = $this->getNextChar(); $startColumn = $this->column; // single line comment if ($nextChar == "/") { $output[] = $this->lexSingleLineComment(); } // multiline comment else if ($nextChar == "*") { $output[] = $this->lexMultiLineComment(); } // divide else { $output[] = new Token( TokenType::Operator, $currentChar, $currentChar, $this->line, $startColumn, ); $this->advance(1); } } // asterisk else if ($currentChar == "*") { $nextChar = $this->getNextChar(); $startColumn = $this->column; // exponential if ($nextChar == "*") { $output[] = new Token( TokenType::Operator, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, $startColumn, ); $this->advance(2); } // multiply else { $output[] = new Token( TokenType::Operator, $currentChar, $currentChar, $this->line, $startColumn, ); $this->advance(1); } } // lower else if ($currentChar == "<") { $nextChar = $this->getNextChar(); $startColumn = $this->column; // lower equals if ($nextChar == "=") { $output[] = new Token( TokenType::Comparator, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, $startColumn, ); $this->advance(2); } else { $output[] = new Token( TokenType::Comparator, $currentChar, $currentChar, $this->line, $startColumn, ); $this->advance(1); } } // greater else if ($currentChar == ">") { $nextChar = $this->getNextChar(); $startColumn = $this->column; // greater equals if ($nextChar == "=") { $output[] = new Token( TokenType::Comparator, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, $startColumn, ); $this->advance(2); } else { $output[] = new Token( TokenType::Comparator, $currentChar, $currentChar, $this->line, $startColumn, ); $this->advance(1); } } // exclamation mark else if ($currentChar == "!") { $nextChar = $this->getNextChar(); $startColumn = $this->column; // not equal if ($nextChar == "=") { $output[] = new Token( TokenType::Comparator, $currentChar . $nextChar, $currentChar . $nextChar, $this->line, $startColumn, ); $this->advance(2); } // not else { $output[] = new Token( TokenType::Operator, $currentChar, $currentChar, $this->line, $startColumn, ); $this->advance(1); } } // pipe placeholder else if ($currentChar == "$") { $output[] = new Token( TokenType::PipePlaceholder, $currentChar, $currentChar, $this->line, $startColumn, ); $this->advance(1); } // single char tokens else if (in_array($currentChar, $singleCharTokens)) { $output[] = new Token( TokenType::Operator, # TODO $currentChar, $currentChar, $this->line, $this->column, ); $this->advance(1); } // whitespace else { $this->consumeWhitespace(); } // unknown character if ($this->position == $lastPosition) { $error = sprintf("Unknown character %s at position %d,%d" . PHP_EOL, $this->getCurrentChar(), $this->line, $this->column); $this->errors[] = $error; $this->advance(1); echo $error; } } // add end of file token $output[] = new Token( TokenType::EndOfFile, "", "", $this->line, $this->column ); return $output; } public function getCurrentChar(): string { return $this->input[$this->position]; } public function getNextChar(): ?string { return $this->input[$this->position + 1] ?? null; } public function advance(int $steps): void { $this->position += $steps; $this->column += $steps; } public function consumeWhitespace(): void { $whitespace = [ " ", "\t", PHP_EOL, ]; while (in_array($this->getCurrentChar(), $whitespace)) { if ($this->getCurrentChar() == PHP_EOL) { $this->line += 1; $this->column = 0; } $this->advance(1); } } public function isIdentifierChar(string $char): bool { return "a" <= $char && $char <= "z" or "A" <= $char && $char <= "Z" or in_array($char, [ "_", ]); } public function lexIdentifier(): Token { $startColumn = $this->column; $startPosition = $this->position; $endPosition = $this->position; while ($this->isIdentifierChar($this->getCurrentChar()) or $this->isNumberChar($this->getCurrentChar())) { $endPosition = $this->position; $this->advance(1); } $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( TokenType::Identifier, $value, $value, $this->line, $startColumn, ); } private function isNumberChar(string $char): bool { return "0" <= $char && $char <= "9"; } private function lexNumber(): Token { $startColumn = $this->column; $startPosition = $this->position; $endPosition = $this->position; while ( $this->isNumberChar($this->getCurrentChar()) or $this->getCurrentChar() == "_" or $this->getCurrentChar() == "." ) { $endPosition = $this->position; $this->advance(1); } $value = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( TokenType::Number, $value, str_replace("_", "", $value), $this->line, $startColumn, ); } private function lexString(): Token { $startLine = $this->line; $startColumn = $this->column; $startPosition = $this->position; $endPosition = $this->position; // skip first " $this->advance(1); while ($this->getCurrentChar() != "\"") { if ($this->getCurrentChar() == PHP_EOL) { $this->line += 1; $this->column = 0; } $endPosition = $this->position; $this->advance(1); } // add last " $endPosition += 1; $this->advance(1); $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( TokenType::String, $rawValue, substr($rawValue, 1, strlen($rawValue) - 2), $startLine, $startColumn, ); } private function lexSingleLineComment(): Token { $startColumn = $this->column; $startPosition = $this->position; $endPosition = $this->position; while ($this->getCurrentChar() != PHP_EOL) { $endPosition = $this->position; $this->advance(1); } $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( TokenType::Comment, $rawValue, trim(substr($rawValue, 2)), $this->line, $startColumn, ); } private function lexMultiLineComment(): Token { $startLine = $this->line; $startColumn = $this->column; $startPosition = $this->position; $endPosition = $this->position; // skip start $this->advance(2); while (($this->getCurrentChar() . $this->getNextChar()) != "*/") { if ($this->getCurrentChar() == PHP_EOL) { $this->line += 1; $this->column = 0; } $endPosition = $this->position; $this->advance(1); } // skip end $endPosition += 2; $this->advance(2); $rawValue = substr($this->input, $startPosition, $endPosition - $startPosition + 1); return new Token( TokenType::Comment, $rawValue, trim(substr($rawValue, 2, strlen($rawValue) - 4)), $startLine, $startColumn, ); } } class Token { public function __construct( public TokenType $type, public string $literal, public string $value, public int $line, public int $column, ) {} } enum TokenType { case Identifier; case Number; case String; case Comment; case Comparator; case Operator; case Assign; case Pipe; case PipePlaceholder; case EndOfFile; }