From b5cc10cff15797bc9f89724ab53ac7d296fbbc0c Mon Sep 17 00:00:00 2001 From: Daniel Weipert Date: Fri, 25 Aug 2023 16:24:59 +0200 Subject: trying ply --- .gitignore | 4 + Makefile | 2 + Readme.txt | 4 + grammar.y | 43 +++-------- parse.py | 197 +++++++++++++++++++++++++++++++++++++++++++++++++ test/hello-world.hnshn | 28 +++++++ test/test.test | 10 +-- 7 files changed, 252 insertions(+), 36 deletions(-) create mode 100755 parse.py create mode 100644 test/hello-world.hnshn diff --git a/.gitignore b/.gitignore index 84c048a..78b98dd 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ /build/ + +__pycache__ +parser.out +parsetab.py diff --git a/Makefile b/Makefile index 58f4163..1c63b58 100644 --- a/Makefile +++ b/Makefile @@ -2,9 +2,11 @@ build: grammar.y lex.l ast.h ast.c bison \ -o build/grammar.tab.c \ -d grammar.y \ + -t \ #-Wcounterexamples flex \ -o build/lex.yy.c \ + -d \ lex.l gcc \ -o build/henshin \ diff --git a/Readme.txt b/Readme.txt index 09f9e49..5f0d3a7 100644 --- a/Readme.txt +++ b/Readme.txt @@ -6,3 +6,7 @@ - https://github.com/labis7/Flex_Bison_Transpiler - https://gnuu.org/2009/09/18/writing-your-own-toy-compiler/ - https://lloydrochester.com/post/flex-bison/json-parse-ast/ + +- https://news.ycombinator.com/item?id=37252334 +- https://tomassetti.me/why-you-should-not-use-flex-yacc-and-bison/ +- http://www.dabeaz.com/ply/ply.html#ply_nn27 diff --git a/grammar.y b/grammar.y index 8d660a9..a37dae6 100644 --- a/grammar.y +++ b/grammar.y @@ -21,13 +21,13 @@ ast_node* yyast = NULL; %token OPERATOR_PLUS %token OPERATOR_MINUS -%token ASSIGN +%token ASSIGN %token FUNCTION %token IF %token ELSE %token RETURN -%token CONST +%token CONST %token VAR %token TYPE_INTEGER @@ -42,7 +42,7 @@ ast_node* yyast = NULL; %token BRACKET_RIGHT %token COMMA; -%token COLON; +%token COLON; %token NUMBER; %token IDENTIFIER @@ -63,31 +63,12 @@ ast_node* yyast = NULL; %start program %% -program: { $$ = NULL; } - | program statement END_OF_LINE { $$ = create_program_node($1, $2); } - - -// function henshin(): void {} -// function henshin(hen: integer, shin: integer): void {} -function: - FUNCTION IDENTIFIER PARENTHESIS_LEFT PARENTHESIS_RIGHT COLON return_type BRACE_LEFT statements BRACE_RIGHT - | FUNCTION IDENTIFIER PARENTHESIS_LEFT arguments PARENTHESIS_RIGHT COLON return_type BRACE_LEFT statements BRACE_RIGHT - -arguments: - argument - | arguments COMMA argument - -argument: - IDENTIFIER COLON variable_type { $$ = create_argument_node($1, $3); } - - -return_type: - TYPE_INTEGER { $$ = create_type_node($1); } - | TYPE_STRING { $$ = create_type_node($1); } - | TYPE_VOID { $$ = create_type_node($1); } +program: + statement END_OF_LINE + | program program { $$ = create_program_node($1, $2); } variable_type: - TYPE_INTEGER { $$ = create_type_node($1); } + TYPE_INTEGER { printf("variable_type:: %s\n", $1); $$ = create_type_node($1); } | TYPE_STRING { $$ = create_type_node($1); } @@ -97,19 +78,19 @@ statements: statement: // const henshin: integer = 2 - | CONST IDENTIFIER COLON variable_type ASSIGN expression - | RETURN expression - | IDENTIFIER PARENTHESIS_LEFT PARENTHESIS_RIGHT - | function + CONST IDENTIFIER COLON variable_type ASSIGN expression { printf("$1: %s -- $2: %s -- $3: %s -- $4: %s -- $5: %s -- $6: %s\n", $1, $2, $3, "4", $5, "6"); } expression: NUMBER - | IDENTIFIER %% void main (int argc, char **argv) { + #ifdef YYDEBUG + yydebug = 1; + #endif + //henshin_lex(); yyin = fopen(argv[1], "r"); yyparse(); diff --git a/parse.py b/parse.py new file mode 100755 index 0000000..17e3be1 --- /dev/null +++ b/parse.py @@ -0,0 +1,197 @@ +#!/bin/env python3 + + +import sys +import ply.lex as lex +import ply.yacc as yacc + + +input = open(sys.argv[1]).read() + + +reserved = { + "function": "FUNCTION", + "if": "IF", + "else": "ELSE", + "return": "RETURN", + "const": "CONST", + "var": "VAR", + + + "integer": "TYPE_INTEGER", + "string": "TYPE_STRING", + "void": "TYPE_VOID", +} + +tokens = [ + "OPERATOR_PLUS", + "OPERATOR_MINUS", + "OPERATOR_MULTIPLY", + "OPERATOR_DIVIDE", + "ASSIGN", + + "OPERATOR_PIPE", + "OPERATOR_PIPE_REPLACEMENT", + + + "PARENTHESIS_LEFT", + "PARENTHESIS_RIGHT", + "BRACE_LEFT", + "BRACE_RIGHT", + "BRACKET_LEFT", + "BRACKET_RIGHT", + + + "BIT_OR", + + + "NAMESPACE_ACCESSOR", + + + "COMMA", + "COLON", + "SEMICOLON", + + + "IDENTIFIER", + "NUMBER", + "STRING", + "COMMENT", +] + list(reserved.values()) + + +t_OPERATOR_PLUS = r'\+' +t_OPERATOR_MINUS = r'-' +t_OPERATOR_MULTIPLY = r'\*' +t_OPERATOR_DIVIDE = r'/' +t_ASSIGN = '=' + +t_OPERATOR_PIPE = r'\|>' +t_OPERATOR_PIPE_REPLACEMENT = r'\$' + + +t_PARENTHESIS_LEFT = r'\(' +t_PARENTHESIS_RIGHT = r'\)' +t_BRACE_LEFT = r'\[' +t_BRACE_RIGHT = r'\]' +t_BRACKET_LEFT = r'{' +t_BRACKET_RIGHT = r'}' + + +t_BIT_OR = r'\|' + + +t_NAMESPACE_ACCESSOR = r'\.' + + +t_COMMA = r',' +t_COLON = r':' +t_SEMICOLON = r';' + + +def t_IDENTIFIER(t): + r'[a-zA-Z][a-zA-Z0-9_]*' + + t.type = reserved.get(t.value, 'IDENTIFIER') + + return t + +def t_NUMBER(t): + r'[0-9]+' + + t.value = int(t.value) + + return t + +def t_STRING(t): + r'(".+"|\'.*\')' + + return t + +def t_COMMENT(t): + r'//.*' + + pass + + +def t_newline(t): + r'\n+' + + t.lexer.lineno += len(t.value) + +t_ignore = ' \t' + +def t_error(t): + print("undefined: '%s'" % t.value[0]) + t.lexer.skip(1) + + +lexer = lex.lex() +lexer.input(input) + +lineno = 0 +for token in lexer: + if token.lineno != lineno: + lineno = token.lineno + print("\nLine %s:" % token.lineno) + + print('%s: "%s" --' % (token.type, token.value), end=' ') +print("\n") + + +precedence = ( + ('left', 'OPERATOR_PLUS', 'OPERATOR_MINUS'), + ('left', 'OPERATOR_MULTIPLY', 'OPERATOR_DIVIDE'), +) + + +def p_statements(p): + '''statements : statement + | statement statements''' + pass + +def p_statement(p): + '''statement : variable_declaration_statement + | variable_reassignment_statement''' + pass + +# def p_function(p): +# '''function : FUNCTION IDENTIFIER PARENTHESIS_LEFT function_parameters PARENTHESIS_RIGHT COLON return_type BRACKET_LEFT statements BRACKET_RIGHT''' +# pass + +def p_variable_declaration_statement(p): + '''variable_declaration_statement : variable_declarator IDENTIFIER COLON variable_type ASSIGN expression SEMICOLON''' + pass + +def p_variable_reassignment_statement(p): + '''variable_reassignment_statement : IDENTIFIER ASSIGN expression SEMICOLON''' + pass + +def p_variable_declarator(p): + '''variable_declarator : CONST + | VAR''' + pass + +def p_variable_type(p): + '''variable_type : TYPE_INTEGER + | TYPE_STRING''' + pass + +def p_expression(p): + '''expression : IDENTIFIER + | NUMBER + | STRING + | expression OPERATOR_PLUS expression + | expression OPERATOR_MINUS expression + | expression OPERATOR_MULTIPLY expression + | expression OPERATOR_DIVIDE expression''' + pass + + +def p_error(p): + print("Syntax error in input!", p) + + +parser = yacc.yacc() +result = parser.parse(input) +print(result) diff --git a/test/hello-world.hnshn b/test/hello-world.hnshn new file mode 100644 index 0000000..4d4af3d --- /dev/null +++ b/test/hello-world.hnshn @@ -0,0 +1,28 @@ +const std = import('@std'); + +function main(): void { + const integer: integer32 = 123; + const string: string = '123'; + const array: [integer32][3] = [1, 2, 3]; + const map: [string][string|integer32] = [ + 'first': 1, + 'second': 'two', + 'third': 3, + ]; + + for (array) |index, value| { + // cool + } + + for (map) |key, value| { + // also cool + } + + for (string) |index, char| { + // cool? + const char2 = std.str.get_char_at_index(string, index); + } + + std.str.format('cool %s', string) + |> print($); +} diff --git a/test/test.test b/test/test.test index 44580c7..5bbd0c0 100644 --- a/test/test.test +++ b/test/test.test @@ -1,6 +1,6 @@ -function henshin(hen: integer, shin: integer): void { - const x: integer = 2 - const b: string = x +const henshin: integer = 2; // comment +// next comment +var ply: string = "cool!"; +ply = "way cooler!!"; - return b -} +const new: integer = henshin * 5 + 10; -- cgit v1.2.3