summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Weipert <code@drogueronin.de>2023-08-25 16:24:59 +0200
committerDaniel Weipert <code@drogueronin.de>2023-08-25 16:24:59 +0200
commitb5cc10cff15797bc9f89724ab53ac7d296fbbc0c (patch)
tree00f2ecd44cfe1c66149ec9217b60c2c9aaf262a2
parentf7fddeaf9f0453054d4666d8a5b544d4c4cefebd (diff)
trying ply
-rw-r--r--.gitignore4
-rw-r--r--Makefile2
-rw-r--r--Readme.txt4
-rw-r--r--grammar.y43
-rwxr-xr-xparse.py197
-rw-r--r--test/hello-world.hnshn28
-rw-r--r--test/test.test10
7 files changed, 252 insertions, 36 deletions
diff --git a/.gitignore b/.gitignore
index 84c048a..78b98dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,5 @@
/build/
+
+__pycache__
+parser.out
+parsetab.py
diff --git a/Makefile b/Makefile
index 58f4163..1c63b58 100644
--- a/Makefile
+++ b/Makefile
@@ -2,9 +2,11 @@ build: grammar.y lex.l ast.h ast.c
bison \
-o build/grammar.tab.c \
-d grammar.y \
+ -t \
#-Wcounterexamples
flex \
-o build/lex.yy.c \
+ -d \
lex.l
gcc \
-o build/henshin \
diff --git a/Readme.txt b/Readme.txt
index 09f9e49..5f0d3a7 100644
--- a/Readme.txt
+++ b/Readme.txt
@@ -6,3 +6,7 @@
- https://github.com/labis7/Flex_Bison_Transpiler
- https://gnuu.org/2009/09/18/writing-your-own-toy-compiler/
- https://lloydrochester.com/post/flex-bison/json-parse-ast/
+
+- https://news.ycombinator.com/item?id=37252334
+- https://tomassetti.me/why-you-should-not-use-flex-yacc-and-bison/
+- http://www.dabeaz.com/ply/ply.html#ply_nn27
diff --git a/grammar.y b/grammar.y
index 8d660a9..a37dae6 100644
--- a/grammar.y
+++ b/grammar.y
@@ -21,13 +21,13 @@ ast_node* yyast = NULL;
%token OPERATOR_PLUS
%token OPERATOR_MINUS
-%token ASSIGN
+%token <string> ASSIGN
%token FUNCTION
%token IF
%token ELSE
%token RETURN
-%token CONST
+%token <string> CONST
%token VAR
%token <string> TYPE_INTEGER
@@ -42,7 +42,7 @@ ast_node* yyast = NULL;
%token BRACKET_RIGHT
%token COMMA;
-%token COLON;
+%token <string> COLON;
%token <number> NUMBER;
%token <string> IDENTIFIER
@@ -63,31 +63,12 @@ ast_node* yyast = NULL;
%start program
%%
-program: { $$ = NULL; }
- | program statement END_OF_LINE { $$ = create_program_node($1, $2); }
-
-
-// function henshin(): void {}
-// function henshin(hen: integer, shin: integer): void {}
-function:
- FUNCTION IDENTIFIER PARENTHESIS_LEFT PARENTHESIS_RIGHT COLON return_type BRACE_LEFT statements BRACE_RIGHT
- | FUNCTION IDENTIFIER PARENTHESIS_LEFT arguments PARENTHESIS_RIGHT COLON return_type BRACE_LEFT statements BRACE_RIGHT
-
-arguments:
- argument
- | arguments COMMA argument
-
-argument:
- IDENTIFIER COLON variable_type { $$ = create_argument_node($1, $3); }
-
-
-return_type:
- TYPE_INTEGER { $$ = create_type_node($1); }
- | TYPE_STRING { $$ = create_type_node($1); }
- | TYPE_VOID { $$ = create_type_node($1); }
+program:
+ statement END_OF_LINE
+ | program program { $$ = create_program_node($1, $2); }
variable_type:
- TYPE_INTEGER { $$ = create_type_node($1); }
+ TYPE_INTEGER { printf("variable_type:: %s\n", $1); $$ = create_type_node($1); }
| TYPE_STRING { $$ = create_type_node($1); }
@@ -97,19 +78,19 @@ statements:
statement:
// const henshin: integer = 2
- | CONST IDENTIFIER COLON variable_type ASSIGN expression
- | RETURN expression
- | IDENTIFIER PARENTHESIS_LEFT PARENTHESIS_RIGHT
- | function
+ CONST IDENTIFIER COLON variable_type ASSIGN expression { printf("$1: %s -- $2: %s -- $3: %s -- $4: %s -- $5: %s -- $6: %s\n", $1, $2, $3, "4", $5, "6"); }
expression:
NUMBER
- | IDENTIFIER
%%
void main (int argc, char **argv)
{
+ #ifdef YYDEBUG
+ yydebug = 1;
+ #endif
+
//henshin_lex();
yyin = fopen(argv[1], "r");
yyparse();
diff --git a/parse.py b/parse.py
new file mode 100755
index 0000000..17e3be1
--- /dev/null
+++ b/parse.py
@@ -0,0 +1,197 @@
+#!/bin/env python3
+
+
+import sys
+import ply.lex as lex
+import ply.yacc as yacc
+
+
+input = open(sys.argv[1]).read()
+
+
+reserved = {
+ "function": "FUNCTION",
+ "if": "IF",
+ "else": "ELSE",
+ "return": "RETURN",
+ "const": "CONST",
+ "var": "VAR",
+
+
+ "integer": "TYPE_INTEGER",
+ "string": "TYPE_STRING",
+ "void": "TYPE_VOID",
+}
+
+tokens = [
+ "OPERATOR_PLUS",
+ "OPERATOR_MINUS",
+ "OPERATOR_MULTIPLY",
+ "OPERATOR_DIVIDE",
+ "ASSIGN",
+
+ "OPERATOR_PIPE",
+ "OPERATOR_PIPE_REPLACEMENT",
+
+
+ "PARENTHESIS_LEFT",
+ "PARENTHESIS_RIGHT",
+ "BRACE_LEFT",
+ "BRACE_RIGHT",
+ "BRACKET_LEFT",
+ "BRACKET_RIGHT",
+
+
+ "BIT_OR",
+
+
+ "NAMESPACE_ACCESSOR",
+
+
+ "COMMA",
+ "COLON",
+ "SEMICOLON",
+
+
+ "IDENTIFIER",
+ "NUMBER",
+ "STRING",
+ "COMMENT",
+] + list(reserved.values())
+
+
+t_OPERATOR_PLUS = r'\+'
+t_OPERATOR_MINUS = r'-'
+t_OPERATOR_MULTIPLY = r'\*'
+t_OPERATOR_DIVIDE = r'/'
+t_ASSIGN = '='
+
+t_OPERATOR_PIPE = r'\|>'
+t_OPERATOR_PIPE_REPLACEMENT = r'\$'
+
+
+t_PARENTHESIS_LEFT = r'\('
+t_PARENTHESIS_RIGHT = r'\)'
+t_BRACE_LEFT = r'\['
+t_BRACE_RIGHT = r'\]'
+t_BRACKET_LEFT = r'{'
+t_BRACKET_RIGHT = r'}'
+
+
+t_BIT_OR = r'\|'
+
+
+t_NAMESPACE_ACCESSOR = r'\.'
+
+
+t_COMMA = r','
+t_COLON = r':'
+t_SEMICOLON = r';'
+
+
+def t_IDENTIFIER(t):
+ r'[a-zA-Z][a-zA-Z0-9_]*'
+
+ t.type = reserved.get(t.value, 'IDENTIFIER')
+
+ return t
+
+def t_NUMBER(t):
+ r'[0-9]+'
+
+ t.value = int(t.value)
+
+ return t
+
+def t_STRING(t):
+ r'(".+"|\'.*\')'
+
+ return t
+
+def t_COMMENT(t):
+ r'//.*'
+
+ pass
+
+
+def t_newline(t):
+ r'\n+'
+
+ t.lexer.lineno += len(t.value)
+
+t_ignore = ' \t'
+
+def t_error(t):
+ print("undefined: '%s'" % t.value[0])
+ t.lexer.skip(1)
+
+
+lexer = lex.lex()
+lexer.input(input)
+
+lineno = 0
+for token in lexer:
+ if token.lineno != lineno:
+ lineno = token.lineno
+ print("\nLine %s:" % token.lineno)
+
+ print('%s: "%s" --' % (token.type, token.value), end=' ')
+print("\n")
+
+
+precedence = (
+ ('left', 'OPERATOR_PLUS', 'OPERATOR_MINUS'),
+ ('left', 'OPERATOR_MULTIPLY', 'OPERATOR_DIVIDE'),
+)
+
+
+def p_statements(p):
+ '''statements : statement
+ | statement statements'''
+ pass
+
+def p_statement(p):
+ '''statement : variable_declaration_statement
+ | variable_reassignment_statement'''
+ pass
+
+# def p_function(p):
+# '''function : FUNCTION IDENTIFIER PARENTHESIS_LEFT function_parameters PARENTHESIS_RIGHT COLON return_type BRACKET_LEFT statements BRACKET_RIGHT'''
+# pass
+
+def p_variable_declaration_statement(p):
+ '''variable_declaration_statement : variable_declarator IDENTIFIER COLON variable_type ASSIGN expression SEMICOLON'''
+ pass
+
+def p_variable_reassignment_statement(p):
+ '''variable_reassignment_statement : IDENTIFIER ASSIGN expression SEMICOLON'''
+ pass
+
+def p_variable_declarator(p):
+ '''variable_declarator : CONST
+ | VAR'''
+ pass
+
+def p_variable_type(p):
+ '''variable_type : TYPE_INTEGER
+ | TYPE_STRING'''
+ pass
+
+def p_expression(p):
+ '''expression : IDENTIFIER
+ | NUMBER
+ | STRING
+ | expression OPERATOR_PLUS expression
+ | expression OPERATOR_MINUS expression
+ | expression OPERATOR_MULTIPLY expression
+ | expression OPERATOR_DIVIDE expression'''
+ pass
+
+
+def p_error(p):
+ print("Syntax error in input!", p)
+
+
+parser = yacc.yacc()
+result = parser.parse(input)
+print(result)
diff --git a/test/hello-world.hnshn b/test/hello-world.hnshn
new file mode 100644
index 0000000..4d4af3d
--- /dev/null
+++ b/test/hello-world.hnshn
@@ -0,0 +1,28 @@
+const std = import('@std');
+
+function main(): void {
+ const integer: integer32 = 123;
+ const string: string = '123';
+ const array: [integer32][3] = [1, 2, 3];
+ const map: [string][string|integer32] = [
+ 'first': 1,
+ 'second': 'two',
+ 'third': 3,
+ ];
+
+ for (array) |index, value| {
+ // cool
+ }
+
+ for (map) |key, value| {
+ // also cool
+ }
+
+ for (string) |index, char| {
+ // cool?
+ const char2 = std.str.get_char_at_index(string, index);
+ }
+
+ std.str.format('cool %s', string)
+ |> print($);
+}
diff --git a/test/test.test b/test/test.test
index 44580c7..5bbd0c0 100644
--- a/test/test.test
+++ b/test/test.test
@@ -1,6 +1,6 @@
-function henshin(hen: integer, shin: integer): void {
- const x: integer = 2
- const b: string = x
+const henshin: integer = 2; // comment
+// next comment
+var ply: string = "cool!";
+ply = "way cooler!!";
- return b
-}
+const new: integer = henshin * 5 + 10;