From b5cc10cff15797bc9f89724ab53ac7d296fbbc0c Mon Sep 17 00:00:00 2001 From: Daniel Weipert Date: Fri, 25 Aug 2023 16:24:59 +0200 Subject: trying ply --- parse.py | 197 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100755 parse.py (limited to 'parse.py') diff --git a/parse.py b/parse.py new file mode 100755 index 0000000..17e3be1 --- /dev/null +++ b/parse.py @@ -0,0 +1,197 @@ +#!/bin/env python3 + + +import sys +import ply.lex as lex +import ply.yacc as yacc + + +input = open(sys.argv[1]).read() + + +reserved = { + "function": "FUNCTION", + "if": "IF", + "else": "ELSE", + "return": "RETURN", + "const": "CONST", + "var": "VAR", + + + "integer": "TYPE_INTEGER", + "string": "TYPE_STRING", + "void": "TYPE_VOID", +} + +tokens = [ + "OPERATOR_PLUS", + "OPERATOR_MINUS", + "OPERATOR_MULTIPLY", + "OPERATOR_DIVIDE", + "ASSIGN", + + "OPERATOR_PIPE", + "OPERATOR_PIPE_REPLACEMENT", + + + "PARENTHESIS_LEFT", + "PARENTHESIS_RIGHT", + "BRACE_LEFT", + "BRACE_RIGHT", + "BRACKET_LEFT", + "BRACKET_RIGHT", + + + "BIT_OR", + + + "NAMESPACE_ACCESSOR", + + + "COMMA", + "COLON", + "SEMICOLON", + + + "IDENTIFIER", + "NUMBER", + "STRING", + "COMMENT", +] + list(reserved.values()) + + +t_OPERATOR_PLUS = r'\+' +t_OPERATOR_MINUS = r'-' +t_OPERATOR_MULTIPLY = r'\*' +t_OPERATOR_DIVIDE = r'/' +t_ASSIGN = '=' + +t_OPERATOR_PIPE = r'\|>' +t_OPERATOR_PIPE_REPLACEMENT = r'\$' + + +t_PARENTHESIS_LEFT = r'\(' +t_PARENTHESIS_RIGHT = r'\)' +t_BRACE_LEFT = r'\[' +t_BRACE_RIGHT = r'\]' +t_BRACKET_LEFT = r'{' +t_BRACKET_RIGHT = r'}' + + +t_BIT_OR = r'\|' + + +t_NAMESPACE_ACCESSOR = r'\.' + + +t_COMMA = r',' +t_COLON = r':' +t_SEMICOLON = r';' + + +def t_IDENTIFIER(t): + r'[a-zA-Z][a-zA-Z0-9_]*' + + t.type = reserved.get(t.value, 'IDENTIFIER') + + return t + +def t_NUMBER(t): + r'[0-9]+' + + t.value = int(t.value) + + return t + +def t_STRING(t): + r'(".+"|\'.*\')' + + return t + +def t_COMMENT(t): + r'//.*' + + pass + + +def t_newline(t): + r'\n+' + + t.lexer.lineno += len(t.value) + +t_ignore = ' \t' + +def t_error(t): + print("undefined: '%s'" % t.value[0]) + t.lexer.skip(1) + + +lexer = lex.lex() +lexer.input(input) + +lineno = 0 +for token in lexer: + if token.lineno != lineno: + lineno = token.lineno + print("\nLine %s:" % token.lineno) + + print('%s: "%s" --' % (token.type, token.value), end=' ') +print("\n") + + +precedence = ( + ('left', 'OPERATOR_PLUS', 'OPERATOR_MINUS'), + ('left', 'OPERATOR_MULTIPLY', 'OPERATOR_DIVIDE'), +) + + +def p_statements(p): + '''statements : statement + | statement statements''' + pass + +def p_statement(p): + '''statement : variable_declaration_statement + | variable_reassignment_statement''' + pass + +# def p_function(p): +# '''function : FUNCTION IDENTIFIER PARENTHESIS_LEFT function_parameters PARENTHESIS_RIGHT COLON return_type BRACKET_LEFT statements BRACKET_RIGHT''' +# pass + +def p_variable_declaration_statement(p): + '''variable_declaration_statement : variable_declarator IDENTIFIER COLON variable_type ASSIGN expression SEMICOLON''' + pass + +def p_variable_reassignment_statement(p): + '''variable_reassignment_statement : IDENTIFIER ASSIGN expression SEMICOLON''' + pass + +def p_variable_declarator(p): + '''variable_declarator : CONST + | VAR''' + pass + +def p_variable_type(p): + '''variable_type : TYPE_INTEGER + | TYPE_STRING''' + pass + +def p_expression(p): + '''expression : IDENTIFIER + | NUMBER + | STRING + | expression OPERATOR_PLUS expression + | expression OPERATOR_MINUS expression + | expression OPERATOR_MULTIPLY expression + | expression OPERATOR_DIVIDE expression''' + pass + + +def p_error(p): + print("Syntax error in input!", p) + + +parser = yacc.yacc() +result = parser.parse(input) +print(result) -- cgit v1.2.3