From 5f8c1d144b3c91f2c4ba75d709a74d83c1f3d5a0 Mon Sep 17 00:00:00 2001 From: Daniel Weipert Date: Sat, 26 Aug 2023 13:44:07 +0200 Subject: ast and cleanup --- .gitignore | 3 +- Makefile | 15 --- ast.c | 45 --------- ast.h | 26 ----- grammar.y | 99 ------------------- henshin | 295 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lex.l | 53 ----------- parse.py | 197 -------------------------------------- test/test.test | 2 + 9 files changed, 298 insertions(+), 437 deletions(-) delete mode 100644 Makefile delete mode 100644 ast.c delete mode 100644 ast.h delete mode 100644 grammar.y create mode 100755 henshin delete mode 100644 lex.l delete mode 100755 parse.py diff --git a/.gitignore b/.gitignore index 78b98dd..dfdcc7a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ -/build/ - __pycache__ parser.out +parselog.txt parsetab.py diff --git a/Makefile b/Makefile deleted file mode 100644 index 1c63b58..0000000 --- a/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -build: grammar.y lex.l ast.h ast.c - bison \ - -o build/grammar.tab.c \ - -d grammar.y \ - -t \ - #-Wcounterexamples - flex \ - -o build/lex.yy.c \ - -d \ - lex.l - gcc \ - -o build/henshin \ - build/grammar.tab.c build/lex.yy.c \ - -I ./ \ - -lfl -ly diff --git a/ast.c b/ast.c deleted file mode 100644 index 2287b5c..0000000 --- a/ast.c +++ /dev/null @@ -1,45 +0,0 @@ -#include "ast.h" - - -ast_node* create_node() { - ast_node* node = malloc(sizeof(ast_node)); - - return node; -} - -ast_node* create_program_node(ast_node* previous_node, ast_node* current_node) { - ast_node* node = malloc(sizeof(ast_node)); - - node->type = PROGRAM_NODE; - node->left = previous_node; - node->right = current_node; - - return node; -} - -ast_node* create_type_node(char* type) { - ast_node* node = malloc(sizeof(ast_node)); - node->type = TYPE_NODE; - - ast_node_data* data = malloc(sizeof(ast_node_data)); - data->value = type; - node->data = data; - - printf("type:: %s\n", type); - - return node; -} - -ast_node* create_argument_node(char* identifier, ast_node* type) { - ast_node* node = malloc(sizeof(ast_node)); - node->type = ARGUMENT_NODE; - - ast_node_data* data = malloc(sizeof(ast_node_data)); - data->value = identifier; - data->type = type; - node->data = data; - - printf("argument:: %s -- %s\n", identifier, type->data->value); - - return node; -} diff --git a/ast.h b/ast.h deleted file mode 100644 index 7b2a35e..0000000 --- a/ast.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef AST_H -#define AST_H - -#include - -enum NODE_TYPE { - PROGRAM_NODE, - TYPE_NODE, - ARGUMENT_NODE -}; - -typedef struct ast_node_data { - void* value; - void* type; -} ast_node_data; - -typedef struct ast_node { - int type; - ast_node_data* data; - struct ast_node* left; - struct ast_node* right; -} ast_node; - -ast_node* create_program_node(ast_node* previous_node, ast_node* current_node); - -#endif // AST_H diff --git a/grammar.y b/grammar.y deleted file mode 100644 index a37dae6..0000000 --- a/grammar.y +++ /dev/null @@ -1,99 +0,0 @@ -%{ -#include - -#include "ast.c" - -extern FILE *yyin; - -extern int yylex(); -extern void yyerror(); - -ast_node* yyast = NULL; -%} - -%define parse.error verbose - -%union { - char* string; - int number; - ast_node* node; -} - -%token OPERATOR_PLUS -%token OPERATOR_MINUS -%token ASSIGN - -%token FUNCTION -%token IF -%token ELSE -%token RETURN -%token CONST -%token VAR - -%token TYPE_INTEGER -%token TYPE_STRING -%token TYPE_VOID - -%token PARENTHESIS_LEFT -%token PARENTHESIS_RIGHT -%token BRACE_LEFT -%token BRACE_RIGHT -%token BRACKET_LEFT -%token BRACKET_RIGHT - -%token COMMA; -%token COLON; - -%token NUMBER; -%token IDENTIFIER -%token COMMENT; - -%token END_OF_LINE; - -%type program -%type function -%type arguments -%type argument -%type return_type -%type variable_type -%type statements -%type statement -%type expression - -%start program - -%% -program: - statement END_OF_LINE - | program program { $$ = create_program_node($1, $2); } - -variable_type: - TYPE_INTEGER { printf("variable_type:: %s\n", $1); $$ = create_type_node($1); } - | TYPE_STRING { $$ = create_type_node($1); } - - -statements: - statement - | statements END_OF_LINE statement - -statement: - // const henshin: integer = 2 - CONST IDENTIFIER COLON variable_type ASSIGN expression { printf("$1: %s -- $2: %s -- $3: %s -- $4: %s -- $5: %s -- $6: %s\n", $1, $2, $3, "4", $5, "6"); } - - -expression: - NUMBER -%% - -void main (int argc, char **argv) -{ - #ifdef YYDEBUG - yydebug = 1; - #endif - - //henshin_lex(); - yyin = fopen(argv[1], "r"); - yyparse(); - - printf("%s", yyast); -} diff --git a/henshin b/henshin new file mode 100755 index 0000000..9958da3 --- /dev/null +++ b/henshin @@ -0,0 +1,295 @@ +#!/bin/env python3 + + +import sys +import argparse +import logging +import ply.lex as lex +import ply.yacc as yacc +from enum import Enum + + + +# args + +parser = argparse.ArgumentParser( + prog='henshin' +) + +parser.add_argument('filename') + +args = parser.parse_args() + + +input = open(args.filename).read() + + + +# log + +logging.basicConfig( + level = logging.DEBUG, + filename = "parselog.txt", + filemode = "w", +) + +log = logging.getLogger() + + + +# Lexer + +reserved = { + "function": "FUNCTION", + "if": "IF", + "else": "ELSE", + "return": "RETURN", + "const": "CONST", + "var": "VAR", + + + "integer": "TYPE_INTEGER", + "string": "TYPE_STRING", + "void": "TYPE_VOID", +} + +tokens = [ + "OPERATOR_PLUS", + "OPERATOR_MINUS", + "OPERATOR_MULTIPLY", + "OPERATOR_DIVIDE", + "ASSIGN", + + "OPERATOR_PIPE", + "OPERATOR_PIPE_REPLACEMENT", + + + "PARENTHESIS_LEFT", + "PARENTHESIS_RIGHT", + "BRACE_LEFT", + "BRACE_RIGHT", + "BRACKET_LEFT", + "BRACKET_RIGHT", + + + "BIT_OR", + + + "NAMESPACE_ACCESSOR", + + + "COMMA", + "COLON", + "SEMICOLON", + + + "IDENTIFIER", + "NUMBER", + "STRING", + "COMMENT", +] + list(reserved.values()) + + +t_OPERATOR_PLUS = r'\+' +t_OPERATOR_MINUS = r'-' +t_OPERATOR_MULTIPLY = r'\*' +t_OPERATOR_DIVIDE = r'/' +t_ASSIGN = '=' + +t_OPERATOR_PIPE = r'\|>' +t_OPERATOR_PIPE_REPLACEMENT = r'\$' + + +t_PARENTHESIS_LEFT = r'\(' +t_PARENTHESIS_RIGHT = r'\)' +t_BRACE_LEFT = r'\[' +t_BRACE_RIGHT = r'\]' +t_BRACKET_LEFT = r'{' +t_BRACKET_RIGHT = r'}' + + +t_BIT_OR = r'\|' + + +t_NAMESPACE_ACCESSOR = r'\.' + + +t_COMMA = r',' +t_COLON = r':' +t_SEMICOLON = r';' + + +def t_IDENTIFIER(t): + r'[a-zA-Z][a-zA-Z0-9_]*' + + t.type = reserved.get(t.value, 'IDENTIFIER') + + return t + +def t_NUMBER(t): + r'[0-9]+' + + t.value = int(t.value) + + return t + +def t_STRING(t): + r'(".+"|\'.*\')' + + return t + +def t_COMMENT(t): + r'//.*' + + pass + + +def t_newline(t): + r'\n+' + + t.lexer.lineno += len(t.value) + +t_ignore = ' \t' + +def t_error(t): + print("undefined: '%s'" % t.value[0]) + t.lexer.skip(1) + + +lexer = lex.lex(debug=True, debuglog=log) +lexer.input(input) + +lineno = 0 +for token in lexer: + if token.lineno != lineno: + lineno = token.lineno + print("\nLine %s:" % token.lineno) + + # print('%s: "%s" --' % (token.type, token.value), end=' ') + print(token.value, end=' ') +print("\n") + + + +# Parser + +class AstNodeExpressionType(Enum): + IDENTIFIER = 'identifier' + NUMBER = 'number' + STRING = 'string' + +class AstNode: pass + +class AstNodeVariableDeclarationStatement(AstNode): + def __init__(self, type, name, value_type, value): + self.type = type + self.name = name + self.value_type = value_type + self.value = value + +class AstNodeVariableReassignmentStatement(AstNode): + def __init__(self, name, value): + self.type = type + self.value = value + +class AstNodeExpression(AstNode): + def __init__(self, type, value): + self.type = type + self.value = value + +class AstNodeOperatorExpression(AstNode): + def __init__(self, type, left, right): + self.type = type + self.left = left + self.right = right + + +precedence = ( + ('left', 'OPERATOR_PLUS', 'OPERATOR_MINUS'), + ('left', 'OPERATOR_MULTIPLY', 'OPERATOR_DIVIDE'), +) + + +def p_statements(p): + '''statements : statement + | statement statements''' + + def resolve_nodes(node, level): + node_dict = node.__dict__ + for property in node_dict: + if isinstance(node_dict[property], AstNode): + print('> '*level, property, node_dict[property].__class__.__name__) + resolve_nodes(node_dict[property], level+1) + else: + print('> '*level, property, node_dict[property]) + + if p[1]: + print(p[1].__class__.__name__) + resolve_nodes(p[1], 0) + +def p_statement(p): + '''statement : variable_declaration_statement + | variable_reassignment_statement''' + + p[0] = p[1] + +# def p_function(p): +# '''function : FUNCTION IDENTIFIER PARENTHESIS_LEFT function_parameters PARENTHESIS_RIGHT COLON return_type BRACKET_LEFT statements BRACKET_RIGHT''' +# pass + +def p_variable_declaration_statement(p): + '''variable_declaration_statement : variable_declarator IDENTIFIER COLON variable_type ASSIGN expression SEMICOLON''' + + p[0] = AstNodeVariableDeclarationStatement(p[1], p[2], p[4], p[6]) + +def p_variable_reassignment_statement(p): + '''variable_reassignment_statement : IDENTIFIER ASSIGN expression SEMICOLON''' + pass + +def p_variable_declarator(p): + '''variable_declarator : CONST + | VAR''' + p[0] = p[1] + +def p_variable_type(p): + '''variable_type : TYPE_INTEGER + | TYPE_STRING''' + p[0] = p[1] + +def p_identifier(p): + '''identifier : IDENTIFIER''' + + p[0] = AstNodeExpression(AstNodeExpressionType.IDENTIFIER, p[1]) + +def p_number(p): + '''number : NUMBER''' + + p[0] = AstNodeExpression(AstNodeExpressionType.NUMBER, p[1]) + +def p_string(p): + '''string : STRING''' + + p[0] = AstNodeExpression(AstNodeExpressionType.STRING, p[1]) + +def p_expression(p): + '''expression : identifier + | number + | string + | expression OPERATOR_PLUS expression + | expression OPERATOR_MINUS expression + | expression OPERATOR_MULTIPLY expression + | expression OPERATOR_DIVIDE expression''' + + if len(p) == 4: + p[0] = AstNodeOperatorExpression(p[2], p[1], p[3]) + else: + p[0] = p[1] + + +def p_error(p): + print("Syntax error in input!", p) + + +parser = yacc.yacc(debug=True, debuglog=log) +result = parser.parse(input) +# print(result) diff --git a/lex.l b/lex.l deleted file mode 100644 index 64c3765..0000000 --- a/lex.l +++ /dev/null @@ -1,53 +0,0 @@ -%{ -#include "ast.h" -#include "grammar.tab.h" -%} - -%% -"+" { return OPERATOR_PLUS; } -"-" { return OPERATOR_MINUS; } -"=" { return ASSIGN; } - -"function" { return FUNCTION; } -"if" { return IF; } -"else" { return ELSE; } -"return" { return RETURN; } -"const" { return CONST; } -"var" { return VAR; } - -"integer" { return TYPE_INTEGER; } -"string" { return TYPE_STRING; } -"void" { return TYPE_VOID; } - -"(" { return PARENTHESIS_LEFT; } -")" { return PARENTHESIS_RIGHT; } -"{" { return BRACE_LEFT; } -"}" { return BRACE_RIGHT; } -"[" { return BRACKET_LEFT; } -"]" { return BRACKET_RIGHT; } - -"," { return COMMA; } -":" { return COLON; } - -[a-zA-Z][a-zA-Z0-9]* { yylval.string = yytext; return IDENTIFIER; } -[0-9]+ { yylval.number = atoi(yytext); return NUMBER; } -"//".* { return COMMENT; } - -\n { return END_OF_LINE; } -[ \t] {} - -. { printf("undefined: %c\n", *yytext); } -%% - -void henshin_lex(int argc, char **argv) -{ - int tok; - - while (tok = yylex()) { - printf("%d", tok); - if (tok == NUMBER) { - printf(" = %d", yylval); - } - printf("\n"); - } -} diff --git a/parse.py b/parse.py deleted file mode 100755 index 17e3be1..0000000 --- a/parse.py +++ /dev/null @@ -1,197 +0,0 @@ -#!/bin/env python3 - - -import sys -import ply.lex as lex -import ply.yacc as yacc - - -input = open(sys.argv[1]).read() - - -reserved = { - "function": "FUNCTION", - "if": "IF", - "else": "ELSE", - "return": "RETURN", - "const": "CONST", - "var": "VAR", - - - "integer": "TYPE_INTEGER", - "string": "TYPE_STRING", - "void": "TYPE_VOID", -} - -tokens = [ - "OPERATOR_PLUS", - "OPERATOR_MINUS", - "OPERATOR_MULTIPLY", - "OPERATOR_DIVIDE", - "ASSIGN", - - "OPERATOR_PIPE", - "OPERATOR_PIPE_REPLACEMENT", - - - "PARENTHESIS_LEFT", - "PARENTHESIS_RIGHT", - "BRACE_LEFT", - "BRACE_RIGHT", - "BRACKET_LEFT", - "BRACKET_RIGHT", - - - "BIT_OR", - - - "NAMESPACE_ACCESSOR", - - - "COMMA", - "COLON", - "SEMICOLON", - - - "IDENTIFIER", - "NUMBER", - "STRING", - "COMMENT", -] + list(reserved.values()) - - -t_OPERATOR_PLUS = r'\+' -t_OPERATOR_MINUS = r'-' -t_OPERATOR_MULTIPLY = r'\*' -t_OPERATOR_DIVIDE = r'/' -t_ASSIGN = '=' - -t_OPERATOR_PIPE = r'\|>' -t_OPERATOR_PIPE_REPLACEMENT = r'\$' - - -t_PARENTHESIS_LEFT = r'\(' -t_PARENTHESIS_RIGHT = r'\)' -t_BRACE_LEFT = r'\[' -t_BRACE_RIGHT = r'\]' -t_BRACKET_LEFT = r'{' -t_BRACKET_RIGHT = r'}' - - -t_BIT_OR = r'\|' - - -t_NAMESPACE_ACCESSOR = r'\.' - - -t_COMMA = r',' -t_COLON = r':' -t_SEMICOLON = r';' - - -def t_IDENTIFIER(t): - r'[a-zA-Z][a-zA-Z0-9_]*' - - t.type = reserved.get(t.value, 'IDENTIFIER') - - return t - -def t_NUMBER(t): - r'[0-9]+' - - t.value = int(t.value) - - return t - -def t_STRING(t): - r'(".+"|\'.*\')' - - return t - -def t_COMMENT(t): - r'//.*' - - pass - - -def t_newline(t): - r'\n+' - - t.lexer.lineno += len(t.value) - -t_ignore = ' \t' - -def t_error(t): - print("undefined: '%s'" % t.value[0]) - t.lexer.skip(1) - - -lexer = lex.lex() -lexer.input(input) - -lineno = 0 -for token in lexer: - if token.lineno != lineno: - lineno = token.lineno - print("\nLine %s:" % token.lineno) - - print('%s: "%s" --' % (token.type, token.value), end=' ') -print("\n") - - -precedence = ( - ('left', 'OPERATOR_PLUS', 'OPERATOR_MINUS'), - ('left', 'OPERATOR_MULTIPLY', 'OPERATOR_DIVIDE'), -) - - -def p_statements(p): - '''statements : statement - | statement statements''' - pass - -def p_statement(p): - '''statement : variable_declaration_statement - | variable_reassignment_statement''' - pass - -# def p_function(p): -# '''function : FUNCTION IDENTIFIER PARENTHESIS_LEFT function_parameters PARENTHESIS_RIGHT COLON return_type BRACKET_LEFT statements BRACKET_RIGHT''' -# pass - -def p_variable_declaration_statement(p): - '''variable_declaration_statement : variable_declarator IDENTIFIER COLON variable_type ASSIGN expression SEMICOLON''' - pass - -def p_variable_reassignment_statement(p): - '''variable_reassignment_statement : IDENTIFIER ASSIGN expression SEMICOLON''' - pass - -def p_variable_declarator(p): - '''variable_declarator : CONST - | VAR''' - pass - -def p_variable_type(p): - '''variable_type : TYPE_INTEGER - | TYPE_STRING''' - pass - -def p_expression(p): - '''expression : IDENTIFIER - | NUMBER - | STRING - | expression OPERATOR_PLUS expression - | expression OPERATOR_MINUS expression - | expression OPERATOR_MULTIPLY expression - | expression OPERATOR_DIVIDE expression''' - pass - - -def p_error(p): - print("Syntax error in input!", p) - - -parser = yacc.yacc() -result = parser.parse(input) -print(result) diff --git a/test/test.test b/test/test.test index 5bbd0c0..5e3f420 100644 --- a/test/test.test +++ b/test/test.test @@ -4,3 +4,5 @@ var ply: string = "cool!"; ply = "way cooler!!"; const new: integer = henshin * 5 + 10; + +const test: integer = 1 + 1; -- cgit v1.2.3