diff options
author | Daniel Weipert <code@drogueronin.de> | 2023-08-26 13:44:07 +0200 |
---|---|---|
committer | Daniel Weipert <code@drogueronin.de> | 2023-08-26 13:44:07 +0200 |
commit | 5f8c1d144b3c91f2c4ba75d709a74d83c1f3d5a0 (patch) | |
tree | f842fc870c24e5b913aa277747985f68f99b951a | |
parent | b5cc10cff15797bc9f89724ab53ac7d296fbbc0c (diff) |
ast and cleanup
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Makefile | 15 | ||||
-rw-r--r-- | ast.c | 45 | ||||
-rw-r--r-- | ast.h | 26 | ||||
-rw-r--r-- | grammar.y | 99 | ||||
-rwxr-xr-x | henshin (renamed from parse.py) | 126 | ||||
-rw-r--r-- | lex.l | 53 | ||||
-rw-r--r-- | test/test.test | 2 |
8 files changed, 115 insertions, 254 deletions
@@ -1,5 +1,4 @@ -/build/ - __pycache__ parser.out +parselog.txt parsetab.py diff --git a/Makefile b/Makefile deleted file mode 100644 index 1c63b58..0000000 --- a/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -build: grammar.y lex.l ast.h ast.c - bison \ - -o build/grammar.tab.c \ - -d grammar.y \ - -t \ - #-Wcounterexamples - flex \ - -o build/lex.yy.c \ - -d \ - lex.l - gcc \ - -o build/henshin \ - build/grammar.tab.c build/lex.yy.c \ - -I ./ \ - -lfl -ly @@ -1,45 +0,0 @@ -#include "ast.h" - - -ast_node* create_node() { - ast_node* node = malloc(sizeof(ast_node)); - - return node; -} - -ast_node* create_program_node(ast_node* previous_node, ast_node* current_node) { - ast_node* node = malloc(sizeof(ast_node)); - - node->type = PROGRAM_NODE; - node->left = previous_node; - node->right = current_node; - - return node; -} - -ast_node* create_type_node(char* type) { - ast_node* node = malloc(sizeof(ast_node)); - node->type = TYPE_NODE; - - ast_node_data* data = malloc(sizeof(ast_node_data)); - data->value = type; - node->data = data; - - printf("type:: %s\n", type); - - return node; -} - -ast_node* create_argument_node(char* identifier, ast_node* type) { - ast_node* node = malloc(sizeof(ast_node)); - node->type = ARGUMENT_NODE; - - ast_node_data* data = malloc(sizeof(ast_node_data)); - data->value = identifier; - data->type = type; - node->data = data; - - printf("argument:: %s -- %s\n", identifier, type->data->value); - - return node; -} @@ -1,26 +0,0 @@ -#ifndef AST_H -#define AST_H - -#include <stdlib.h> - -enum NODE_TYPE { - PROGRAM_NODE, - TYPE_NODE, - ARGUMENT_NODE -}; - -typedef struct ast_node_data { - void* value; - void* type; -} ast_node_data; - -typedef struct ast_node { - int type; - ast_node_data* data; - struct ast_node* left; - struct ast_node* right; -} ast_node; - -ast_node* create_program_node(ast_node* previous_node, ast_node* current_node); - -#endif // AST_H diff --git a/grammar.y b/grammar.y deleted file mode 100644 index a37dae6..0000000 --- a/grammar.y +++ /dev/null @@ -1,99 +0,0 @@ -%{ -#include <stdio.h> - -#include "ast.c" - -extern FILE *yyin; - -extern int yylex(); -extern void yyerror(); - -ast_node* yyast = NULL; -%} - -%define parse.error verbose - -%union { - char* string; - int number; - ast_node* node; -} - -%token OPERATOR_PLUS -%token OPERATOR_MINUS -%token <string> ASSIGN - -%token FUNCTION -%token IF -%token ELSE -%token RETURN -%token <string> CONST -%token VAR - -%token <string> TYPE_INTEGER -%token <string> TYPE_STRING -%token <string> TYPE_VOID - -%token PARENTHESIS_LEFT -%token PARENTHESIS_RIGHT -%token BRACE_LEFT -%token BRACE_RIGHT -%token BRACKET_LEFT -%token BRACKET_RIGHT - -%token COMMA; -%token <string> COLON; - -%token <number> NUMBER; -%token <string> IDENTIFIER -%token COMMENT; - -%token END_OF_LINE; - -%type <node> program -%type <node> function -%type <node> arguments -%type <node> argument -%type <node> return_type -%type <node> variable_type -%type <node> statements -%type <node> statement -%type <node> expression - -%start program - -%% -program: - statement END_OF_LINE - | program program { $$ = create_program_node($1, $2); } - -variable_type: - TYPE_INTEGER { printf("variable_type:: %s\n", $1); $$ = create_type_node($1); } - | TYPE_STRING { $$ = create_type_node($1); } - - -statements: - statement - | statements END_OF_LINE statement - -statement: - // const henshin: integer = 2 - CONST IDENTIFIER COLON variable_type ASSIGN expression { printf("$1: %s -- $2: %s -- $3: %s -- $4: %s -- $5: %s -- $6: %s\n", $1, $2, $3, "4", $5, "6"); } - - -expression: - NUMBER -%% - -void main (int argc, char **argv) -{ - #ifdef YYDEBUG - yydebug = 1; - #endif - - //henshin_lex(); - yyin = fopen(argv[1], "r"); - yyparse(); - - printf("%s", yyast); -} @@ -2,12 +2,42 @@ import sys +import argparse +import logging import ply.lex as lex import ply.yacc as yacc +from enum import Enum -input = open(sys.argv[1]).read() +# args + +parser = argparse.ArgumentParser( + prog='henshin' +) + +parser.add_argument('filename') + +args = parser.parse_args() + + +input = open(args.filename).read() + + + +# log + +logging.basicConfig( + level = logging.DEBUG, + filename = "parselog.txt", + filemode = "w", +) + +log = logging.getLogger() + + + +# Lexer reserved = { "function": "FUNCTION", @@ -126,7 +156,7 @@ def t_error(t): t.lexer.skip(1) -lexer = lex.lex() +lexer = lex.lex(debug=True, debuglog=log) lexer.input(input) lineno = 0 @@ -135,10 +165,45 @@ for token in lexer: lineno = token.lineno print("\nLine %s:" % token.lineno) - print('%s: "%s" --' % (token.type, token.value), end=' ') + # print('%s: "%s" --' % (token.type, token.value), end=' ') + print(token.value, end=' ') print("\n") + +# Parser + +class AstNodeExpressionType(Enum): + IDENTIFIER = 'identifier' + NUMBER = 'number' + STRING = 'string' + +class AstNode: pass + +class AstNodeVariableDeclarationStatement(AstNode): + def __init__(self, type, name, value_type, value): + self.type = type + self.name = name + self.value_type = value_type + self.value = value + +class AstNodeVariableReassignmentStatement(AstNode): + def __init__(self, name, value): + self.type = type + self.value = value + +class AstNodeExpression(AstNode): + def __init__(self, type, value): + self.type = type + self.value = value + +class AstNodeOperatorExpression(AstNode): + def __init__(self, type, left, right): + self.type = type + self.left = left + self.right = right + + precedence = ( ('left', 'OPERATOR_PLUS', 'OPERATOR_MINUS'), ('left', 'OPERATOR_MULTIPLY', 'OPERATOR_DIVIDE'), @@ -148,12 +213,25 @@ precedence = ( def p_statements(p): '''statements : statement | statement statements''' - pass + + def resolve_nodes(node, level): + node_dict = node.__dict__ + for property in node_dict: + if isinstance(node_dict[property], AstNode): + print('> '*level, property, node_dict[property].__class__.__name__) + resolve_nodes(node_dict[property], level+1) + else: + print('> '*level, property, node_dict[property]) + + if p[1]: + print(p[1].__class__.__name__) + resolve_nodes(p[1], 0) def p_statement(p): '''statement : variable_declaration_statement | variable_reassignment_statement''' - pass + + p[0] = p[1] # def p_function(p): # '''function : FUNCTION IDENTIFIER PARENTHESIS_LEFT function_parameters PARENTHESIS_RIGHT COLON return_type BRACKET_LEFT statements BRACKET_RIGHT''' @@ -161,7 +239,8 @@ def p_statement(p): def p_variable_declaration_statement(p): '''variable_declaration_statement : variable_declarator IDENTIFIER COLON variable_type ASSIGN expression SEMICOLON''' - pass + + p[0] = AstNodeVariableDeclarationStatement(p[1], p[2], p[4], p[6]) def p_variable_reassignment_statement(p): '''variable_reassignment_statement : IDENTIFIER ASSIGN expression SEMICOLON''' @@ -170,28 +249,47 @@ def p_variable_reassignment_statement(p): def p_variable_declarator(p): '''variable_declarator : CONST | VAR''' - pass + p[0] = p[1] def p_variable_type(p): '''variable_type : TYPE_INTEGER | TYPE_STRING''' - pass + p[0] = p[1] + +def p_identifier(p): + '''identifier : IDENTIFIER''' + + p[0] = AstNodeExpression(AstNodeExpressionType.IDENTIFIER, p[1]) + +def p_number(p): + '''number : NUMBER''' + + p[0] = AstNodeExpression(AstNodeExpressionType.NUMBER, p[1]) + +def p_string(p): + '''string : STRING''' + + p[0] = AstNodeExpression(AstNodeExpressionType.STRING, p[1]) def p_expression(p): - '''expression : IDENTIFIER - | NUMBER - | STRING + '''expression : identifier + | number + | string | expression OPERATOR_PLUS expression | expression OPERATOR_MINUS expression | expression OPERATOR_MULTIPLY expression | expression OPERATOR_DIVIDE expression''' - pass + + if len(p) == 4: + p[0] = AstNodeOperatorExpression(p[2], p[1], p[3]) + else: + p[0] = p[1] def p_error(p): print("Syntax error in input!", p) -parser = yacc.yacc() +parser = yacc.yacc(debug=True, debuglog=log) result = parser.parse(input) -print(result) +# print(result) @@ -1,53 +0,0 @@ -%{ -#include "ast.h" -#include "grammar.tab.h" -%} - -%% -"+" { return OPERATOR_PLUS; } -"-" { return OPERATOR_MINUS; } -"=" { return ASSIGN; } - -"function" { return FUNCTION; } -"if" { return IF; } -"else" { return ELSE; } -"return" { return RETURN; } -"const" { return CONST; } -"var" { return VAR; } - -"integer" { return TYPE_INTEGER; } -"string" { return TYPE_STRING; } -"void" { return TYPE_VOID; } - -"(" { return PARENTHESIS_LEFT; } -")" { return PARENTHESIS_RIGHT; } -"{" { return BRACE_LEFT; } -"}" { return BRACE_RIGHT; } -"[" { return BRACKET_LEFT; } -"]" { return BRACKET_RIGHT; } - -"," { return COMMA; } -":" { return COLON; } - -[a-zA-Z][a-zA-Z0-9]* { yylval.string = yytext; return IDENTIFIER; } -[0-9]+ { yylval.number = atoi(yytext); return NUMBER; } -"//".* { return COMMENT; } - -\n { return END_OF_LINE; } -[ \t] {} - -. { printf("undefined: %c\n", *yytext); } -%% - -void henshin_lex(int argc, char **argv) -{ - int tok; - - while (tok = yylex()) { - printf("%d", tok); - if (tok == NUMBER) { - printf(" = %d", yylval); - } - printf("\n"); - } -} diff --git a/test/test.test b/test/test.test index 5bbd0c0..5e3f420 100644 --- a/test/test.test +++ b/test/test.test @@ -4,3 +4,5 @@ var ply: string = "cool!"; ply = "way cooler!!"; const new: integer = henshin * 5 + 10; + +const test: integer = 1 + 1; |