summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Weipert <code@drogueronin.de>2023-08-26 13:44:07 +0200
committerDaniel Weipert <code@drogueronin.de>2023-08-26 13:44:07 +0200
commit5f8c1d144b3c91f2c4ba75d709a74d83c1f3d5a0 (patch)
treef842fc870c24e5b913aa277747985f68f99b951a
parentb5cc10cff15797bc9f89724ab53ac7d296fbbc0c (diff)
ast and cleanup
-rw-r--r--.gitignore3
-rw-r--r--Makefile15
-rw-r--r--ast.c45
-rw-r--r--ast.h26
-rw-r--r--grammar.y99
-rwxr-xr-xhenshin (renamed from parse.py)126
-rw-r--r--lex.l53
-rw-r--r--test/test.test2
8 files changed, 115 insertions, 254 deletions
diff --git a/.gitignore b/.gitignore
index 78b98dd..dfdcc7a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,4 @@
-/build/
-
__pycache__
parser.out
+parselog.txt
parsetab.py
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 1c63b58..0000000
--- a/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-build: grammar.y lex.l ast.h ast.c
- bison \
- -o build/grammar.tab.c \
- -d grammar.y \
- -t \
- #-Wcounterexamples
- flex \
- -o build/lex.yy.c \
- -d \
- lex.l
- gcc \
- -o build/henshin \
- build/grammar.tab.c build/lex.yy.c \
- -I ./ \
- -lfl -ly
diff --git a/ast.c b/ast.c
deleted file mode 100644
index 2287b5c..0000000
--- a/ast.c
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "ast.h"
-
-
-ast_node* create_node() {
- ast_node* node = malloc(sizeof(ast_node));
-
- return node;
-}
-
-ast_node* create_program_node(ast_node* previous_node, ast_node* current_node) {
- ast_node* node = malloc(sizeof(ast_node));
-
- node->type = PROGRAM_NODE;
- node->left = previous_node;
- node->right = current_node;
-
- return node;
-}
-
-ast_node* create_type_node(char* type) {
- ast_node* node = malloc(sizeof(ast_node));
- node->type = TYPE_NODE;
-
- ast_node_data* data = malloc(sizeof(ast_node_data));
- data->value = type;
- node->data = data;
-
- printf("type:: %s\n", type);
-
- return node;
-}
-
-ast_node* create_argument_node(char* identifier, ast_node* type) {
- ast_node* node = malloc(sizeof(ast_node));
- node->type = ARGUMENT_NODE;
-
- ast_node_data* data = malloc(sizeof(ast_node_data));
- data->value = identifier;
- data->type = type;
- node->data = data;
-
- printf("argument:: %s -- %s\n", identifier, type->data->value);
-
- return node;
-}
diff --git a/ast.h b/ast.h
deleted file mode 100644
index 7b2a35e..0000000
--- a/ast.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef AST_H
-#define AST_H
-
-#include <stdlib.h>
-
-enum NODE_TYPE {
- PROGRAM_NODE,
- TYPE_NODE,
- ARGUMENT_NODE
-};
-
-typedef struct ast_node_data {
- void* value;
- void* type;
-} ast_node_data;
-
-typedef struct ast_node {
- int type;
- ast_node_data* data;
- struct ast_node* left;
- struct ast_node* right;
-} ast_node;
-
-ast_node* create_program_node(ast_node* previous_node, ast_node* current_node);
-
-#endif // AST_H
diff --git a/grammar.y b/grammar.y
deleted file mode 100644
index a37dae6..0000000
--- a/grammar.y
+++ /dev/null
@@ -1,99 +0,0 @@
-%{
-#include <stdio.h>
-
-#include "ast.c"
-
-extern FILE *yyin;
-
-extern int yylex();
-extern void yyerror();
-
-ast_node* yyast = NULL;
-%}
-
-%define parse.error verbose
-
-%union {
- char* string;
- int number;
- ast_node* node;
-}
-
-%token OPERATOR_PLUS
-%token OPERATOR_MINUS
-%token <string> ASSIGN
-
-%token FUNCTION
-%token IF
-%token ELSE
-%token RETURN
-%token <string> CONST
-%token VAR
-
-%token <string> TYPE_INTEGER
-%token <string> TYPE_STRING
-%token <string> TYPE_VOID
-
-%token PARENTHESIS_LEFT
-%token PARENTHESIS_RIGHT
-%token BRACE_LEFT
-%token BRACE_RIGHT
-%token BRACKET_LEFT
-%token BRACKET_RIGHT
-
-%token COMMA;
-%token <string> COLON;
-
-%token <number> NUMBER;
-%token <string> IDENTIFIER
-%token COMMENT;
-
-%token END_OF_LINE;
-
-%type <node> program
-%type <node> function
-%type <node> arguments
-%type <node> argument
-%type <node> return_type
-%type <node> variable_type
-%type <node> statements
-%type <node> statement
-%type <node> expression
-
-%start program
-
-%%
-program:
- statement END_OF_LINE
- | program program { $$ = create_program_node($1, $2); }
-
-variable_type:
- TYPE_INTEGER { printf("variable_type:: %s\n", $1); $$ = create_type_node($1); }
- | TYPE_STRING { $$ = create_type_node($1); }
-
-
-statements:
- statement
- | statements END_OF_LINE statement
-
-statement:
- // const henshin: integer = 2
- CONST IDENTIFIER COLON variable_type ASSIGN expression { printf("$1: %s -- $2: %s -- $3: %s -- $4: %s -- $5: %s -- $6: %s\n", $1, $2, $3, "4", $5, "6"); }
-
-
-expression:
- NUMBER
-%%
-
-void main (int argc, char **argv)
-{
- #ifdef YYDEBUG
- yydebug = 1;
- #endif
-
- //henshin_lex();
- yyin = fopen(argv[1], "r");
- yyparse();
-
- printf("%s", yyast);
-}
diff --git a/parse.py b/henshin
index 17e3be1..9958da3 100755
--- a/parse.py
+++ b/henshin
@@ -2,12 +2,42 @@
import sys
+import argparse
+import logging
import ply.lex as lex
import ply.yacc as yacc
+from enum import Enum
-input = open(sys.argv[1]).read()
+# args
+
+parser = argparse.ArgumentParser(
+ prog='henshin'
+)
+
+parser.add_argument('filename')
+
+args = parser.parse_args()
+
+
+input = open(args.filename).read()
+
+
+
+# log
+
+logging.basicConfig(
+ level = logging.DEBUG,
+ filename = "parselog.txt",
+ filemode = "w",
+)
+
+log = logging.getLogger()
+
+
+
+# Lexer
reserved = {
"function": "FUNCTION",
@@ -126,7 +156,7 @@ def t_error(t):
t.lexer.skip(1)
-lexer = lex.lex()
+lexer = lex.lex(debug=True, debuglog=log)
lexer.input(input)
lineno = 0
@@ -135,10 +165,45 @@ for token in lexer:
lineno = token.lineno
print("\nLine %s:" % token.lineno)
- print('%s: "%s" --' % (token.type, token.value), end=' ')
+ # print('%s: "%s" --' % (token.type, token.value), end=' ')
+ print(token.value, end=' ')
print("\n")
+
+# Parser
+
+class AstNodeExpressionType(Enum):
+ IDENTIFIER = 'identifier'
+ NUMBER = 'number'
+ STRING = 'string'
+
+class AstNode: pass
+
+class AstNodeVariableDeclarationStatement(AstNode):
+ def __init__(self, type, name, value_type, value):
+ self.type = type
+ self.name = name
+ self.value_type = value_type
+ self.value = value
+
+class AstNodeVariableReassignmentStatement(AstNode):
+ def __init__(self, name, value):
+ self.type = type
+ self.value = value
+
+class AstNodeExpression(AstNode):
+ def __init__(self, type, value):
+ self.type = type
+ self.value = value
+
+class AstNodeOperatorExpression(AstNode):
+ def __init__(self, type, left, right):
+ self.type = type
+ self.left = left
+ self.right = right
+
+
precedence = (
('left', 'OPERATOR_PLUS', 'OPERATOR_MINUS'),
('left', 'OPERATOR_MULTIPLY', 'OPERATOR_DIVIDE'),
@@ -148,12 +213,25 @@ precedence = (
def p_statements(p):
'''statements : statement
| statement statements'''
- pass
+
+ def resolve_nodes(node, level):
+ node_dict = node.__dict__
+ for property in node_dict:
+ if isinstance(node_dict[property], AstNode):
+ print('> '*level, property, node_dict[property].__class__.__name__)
+ resolve_nodes(node_dict[property], level+1)
+ else:
+ print('> '*level, property, node_dict[property])
+
+ if p[1]:
+ print(p[1].__class__.__name__)
+ resolve_nodes(p[1], 0)
def p_statement(p):
'''statement : variable_declaration_statement
| variable_reassignment_statement'''
- pass
+
+ p[0] = p[1]
# def p_function(p):
# '''function : FUNCTION IDENTIFIER PARENTHESIS_LEFT function_parameters PARENTHESIS_RIGHT COLON return_type BRACKET_LEFT statements BRACKET_RIGHT'''
@@ -161,7 +239,8 @@ def p_statement(p):
def p_variable_declaration_statement(p):
'''variable_declaration_statement : variable_declarator IDENTIFIER COLON variable_type ASSIGN expression SEMICOLON'''
- pass
+
+ p[0] = AstNodeVariableDeclarationStatement(p[1], p[2], p[4], p[6])
def p_variable_reassignment_statement(p):
'''variable_reassignment_statement : IDENTIFIER ASSIGN expression SEMICOLON'''
@@ -170,28 +249,47 @@ def p_variable_reassignment_statement(p):
def p_variable_declarator(p):
'''variable_declarator : CONST
| VAR'''
- pass
+ p[0] = p[1]
def p_variable_type(p):
'''variable_type : TYPE_INTEGER
| TYPE_STRING'''
- pass
+ p[0] = p[1]
+
+def p_identifier(p):
+ '''identifier : IDENTIFIER'''
+
+ p[0] = AstNodeExpression(AstNodeExpressionType.IDENTIFIER, p[1])
+
+def p_number(p):
+ '''number : NUMBER'''
+
+ p[0] = AstNodeExpression(AstNodeExpressionType.NUMBER, p[1])
+
+def p_string(p):
+ '''string : STRING'''
+
+ p[0] = AstNodeExpression(AstNodeExpressionType.STRING, p[1])
def p_expression(p):
- '''expression : IDENTIFIER
- | NUMBER
- | STRING
+ '''expression : identifier
+ | number
+ | string
| expression OPERATOR_PLUS expression
| expression OPERATOR_MINUS expression
| expression OPERATOR_MULTIPLY expression
| expression OPERATOR_DIVIDE expression'''
- pass
+
+ if len(p) == 4:
+ p[0] = AstNodeOperatorExpression(p[2], p[1], p[3])
+ else:
+ p[0] = p[1]
def p_error(p):
print("Syntax error in input!", p)
-parser = yacc.yacc()
+parser = yacc.yacc(debug=True, debuglog=log)
result = parser.parse(input)
-print(result)
+# print(result)
diff --git a/lex.l b/lex.l
deleted file mode 100644
index 64c3765..0000000
--- a/lex.l
+++ /dev/null
@@ -1,53 +0,0 @@
-%{
-#include "ast.h"
-#include "grammar.tab.h"
-%}
-
-%%
-"+" { return OPERATOR_PLUS; }
-"-" { return OPERATOR_MINUS; }
-"=" { return ASSIGN; }
-
-"function" { return FUNCTION; }
-"if" { return IF; }
-"else" { return ELSE; }
-"return" { return RETURN; }
-"const" { return CONST; }
-"var" { return VAR; }
-
-"integer" { return TYPE_INTEGER; }
-"string" { return TYPE_STRING; }
-"void" { return TYPE_VOID; }
-
-"(" { return PARENTHESIS_LEFT; }
-")" { return PARENTHESIS_RIGHT; }
-"{" { return BRACE_LEFT; }
-"}" { return BRACE_RIGHT; }
-"[" { return BRACKET_LEFT; }
-"]" { return BRACKET_RIGHT; }
-
-"," { return COMMA; }
-":" { return COLON; }
-
-[a-zA-Z][a-zA-Z0-9]* { yylval.string = yytext; return IDENTIFIER; }
-[0-9]+ { yylval.number = atoi(yytext); return NUMBER; }
-"//".* { return COMMENT; }
-
-\n { return END_OF_LINE; }
-[ \t] {}
-
-. { printf("undefined: %c\n", *yytext); }
-%%
-
-void henshin_lex(int argc, char **argv)
-{
- int tok;
-
- while (tok = yylex()) {
- printf("%d", tok);
- if (tok == NUMBER) {
- printf(" = %d", yylval);
- }
- printf("\n");
- }
-}
diff --git a/test/test.test b/test/test.test
index 5bbd0c0..5e3f420 100644
--- a/test/test.test
+++ b/test/test.test
@@ -4,3 +4,5 @@ var ply: string = "cool!";
ply = "way cooler!!";
const new: integer = henshin * 5 + 10;
+
+const test: integer = 1 + 1;