#!/bin/env python3 import sys import ply.lex as lex import ply.yacc as yacc input = open(sys.argv[1]).read() reserved = { "function": "FUNCTION", "if": "IF", "else": "ELSE", "return": "RETURN", "const": "CONST", "var": "VAR", "integer": "TYPE_INTEGER", "string": "TYPE_STRING", "void": "TYPE_VOID", } tokens = [ "OPERATOR_PLUS", "OPERATOR_MINUS", "OPERATOR_MULTIPLY", "OPERATOR_DIVIDE", "ASSIGN", "OPERATOR_PIPE", "OPERATOR_PIPE_REPLACEMENT", "PARENTHESIS_LEFT", "PARENTHESIS_RIGHT", "BRACE_LEFT", "BRACE_RIGHT", "BRACKET_LEFT", "BRACKET_RIGHT", "BIT_OR", "NAMESPACE_ACCESSOR", "COMMA", "COLON", "SEMICOLON", "IDENTIFIER", "NUMBER", "STRING", "COMMENT", ] + list(reserved.values()) t_OPERATOR_PLUS = r'\+' t_OPERATOR_MINUS = r'-' t_OPERATOR_MULTIPLY = r'\*' t_OPERATOR_DIVIDE = r'/' t_ASSIGN = '=' t_OPERATOR_PIPE = r'\|>' t_OPERATOR_PIPE_REPLACEMENT = r'\$' t_PARENTHESIS_LEFT = r'\(' t_PARENTHESIS_RIGHT = r'\)' t_BRACE_LEFT = r'\[' t_BRACE_RIGHT = r'\]' t_BRACKET_LEFT = r'{' t_BRACKET_RIGHT = r'}' t_BIT_OR = r'\|' t_NAMESPACE_ACCESSOR = r'\.' t_COMMA = r',' t_COLON = r':' t_SEMICOLON = r';' def t_IDENTIFIER(t): r'[a-zA-Z][a-zA-Z0-9_]*' t.type = reserved.get(t.value, 'IDENTIFIER') return t def t_NUMBER(t): r'[0-9]+' t.value = int(t.value) return t def t_STRING(t): r'(".+"|\'.*\')' return t def t_COMMENT(t): r'//.*' pass def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) t_ignore = ' \t' def t_error(t): print("undefined: '%s'" % t.value[0]) t.lexer.skip(1) lexer = lex.lex() lexer.input(input) lineno = 0 for token in lexer: if token.lineno != lineno: lineno = token.lineno print("\nLine %s:" % token.lineno) print('%s: "%s" --' % (token.type, token.value), end=' ') print("\n") precedence = ( ('left', 'OPERATOR_PLUS', 'OPERATOR_MINUS'), ('left', 'OPERATOR_MULTIPLY', 'OPERATOR_DIVIDE'), ) def p_statements(p): '''statements : statement | statement statements''' pass def p_statement(p): '''statement : variable_declaration_statement | variable_reassignment_statement''' pass # def p_function(p): # '''function : FUNCTION IDENTIFIER PARENTHESIS_LEFT function_parameters PARENTHESIS_RIGHT COLON return_type BRACKET_LEFT statements BRACKET_RIGHT''' # pass def p_variable_declaration_statement(p): '''variable_declaration_statement : variable_declarator IDENTIFIER COLON variable_type ASSIGN expression SEMICOLON''' pass def p_variable_reassignment_statement(p): '''variable_reassignment_statement : IDENTIFIER ASSIGN expression SEMICOLON''' pass def p_variable_declarator(p): '''variable_declarator : CONST | VAR''' pass def p_variable_type(p): '''variable_type : TYPE_INTEGER | TYPE_STRING''' pass def p_expression(p): '''expression : IDENTIFIER | NUMBER | STRING | expression OPERATOR_PLUS expression | expression OPERATOR_MINUS expression | expression OPERATOR_MULTIPLY expression | expression OPERATOR_DIVIDE expression''' pass def p_error(p): print("Syntax error in input!", p) parser = yacc.yacc() result = parser.parse(input) print(result)