From b858cc223a936a522bb5f88065f6686489841955 Mon Sep 17 00:00:00 2001 From: Daniel Weipert Date: Mon, 8 Jul 2024 15:03:42 +0200 Subject: array and map --- henshin | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++------- test/test.test | 10 ++- 2 files changed, 176 insertions(+), 23 deletions(-) diff --git a/henshin b/henshin index c780b17..d8a1de8 100755 --- a/henshin +++ b/henshin @@ -37,6 +37,9 @@ log = logging.getLogger() + + + # Lexer reserved = { @@ -51,6 +54,9 @@ reserved = { "integer": "TYPE_INTEGER", "string": "TYPE_STRING", "void": "TYPE_VOID", + + "and": "AND", + "or": "OR", } tokens = [ @@ -72,12 +78,6 @@ tokens = [ "BRACKET_RIGHT", - "AND", - "OR", - "BIT_AND", - "BIT_OR", - - "NAMESPACE_ACCESSOR", @@ -93,6 +93,7 @@ tokens = [ ] + list(reserved.values()) + t_OPERATOR_PLUS = r'\+' t_OPERATOR_MINUS = r'-' t_OPERATOR_MULTIPLY = r'\*' @@ -111,12 +112,6 @@ t_BRACE_LEFT = r'{' t_BRACE_RIGHT = r'}' -t_AND = r'\&\&' -t_OR = r'\|\|' -t_BIT_AND = r'\&' -t_BIT_OR = r'\|' - - t_NAMESPACE_ACCESSOR = r'\.' @@ -125,6 +120,7 @@ t_COLON = r':' t_SEMICOLON = r';' + def t_IDENTIFIER(t): r'[a-zA-Z][a-zA-Z0-9_]*' @@ -140,7 +136,7 @@ def t_NUMBER(t): return t def t_STRING(t): - r'(".+"|\'.*\')' + r'(".*?")' return t @@ -157,6 +153,7 @@ def t_newline(t): t_ignore = ' \t' + def t_error(t): print("undefined: '%s'" % t.value[0]) t.lexer.skip(1) @@ -171,18 +168,23 @@ for token in lexer: lineno = token.lineno print("\nLine %s:" % token.lineno) - # print('%s: "%s" --' % (token.type, token.value), end=' ') - print(token.value, end=' ') + print('%s: "%s" --' % (token.type, token.value), end=' ') + # print(token.value, end=' ') print("\n") + + + # Parser class AstNodeExpressionType(Enum): IDENTIFIER = 'identifier' NUMBER = 'number' STRING = 'string' + ARRAY = 'array' + MAP = 'map' class AstNode: pass @@ -198,6 +200,27 @@ class AstNodeVariableReassignmentStatement(AstNode): self.name = name self.value = value +class AstNodeVariableTypes(AstNode): + def __init__(self, operator, left, right): + self.operator = operator + self.left = left + self.right = right + +class AstNodeVariableTypeArray(AstNode): + def __init__(self, type, length): + self.type = type + self.length = length + +class AstNodeVariableTypeMap(AstNode): + def __init__(self, key_type, value_type): + self.key_type = key_type + self.value_type = value_type + +class AstNodeMapElement(AstNode): + def __init__(self, key, value): + self.key = key + self.value = value + class AstNodeExpression(AstNode): def __init__(self, type, value): self.type = type @@ -236,12 +259,16 @@ class AstNodeFunctionCallParameter(AstNode): self.value = value + precedence = ( ('left', 'OPERATOR_PLUS', 'OPERATOR_MINUS'), ('left', 'OPERATOR_MULTIPLY', 'OPERATOR_DIVIDE'), + ('left', 'OR', 'OR'), + ('left', 'AND', 'AND'), ) + def p_statements(p): '''statements : statement | statement statements''' @@ -275,114 +302,219 @@ def p_statements(p): statements.extend(p[2]) p[0] = statements + def p_statement(p): '''statement : variable_declaration_statement | variable_reassignment_statement | function_declaration + | function_call | return_statement''' p[0] = p[1] + + def p_function_declaration(p): '''function_declaration : FUNCTION IDENTIFIER PARENTHESIS_LEFT function_declaration_parameters PARENTHESIS_RIGHT COLON return_type BRACE_LEFT statements BRACE_RIGHT''' p[0] = AstNodeFunctionDeclaration(p[2], p[4], p[7], p[9]) + def p_function_declaration_parameters(p): '''function_declaration_parameters : function_declaration_parameter COMMA function_declaration_parameters + | function_declaration_parameter COMMA | function_declaration_parameter |''' parameters = [p[1]] - if len(p) > 2: - parameters.append(p[3]) + if len(p) == 4: + parameters.extend(p[3]) p[0] = parameters + def p_function_declaration_parameter(p): '''function_declaration_parameter : IDENTIFIER COLON variable_type''' p[0] = AstNodeFunctionDeclarationParameter(p[1], p[3]) + + def p_function_call(p): '''function_call : IDENTIFIER PARENTHESIS_LEFT function_call_parameters PARENTHESIS_RIGHT SEMICOLON''' p[0] = AstNodeFunctionCall(p[1], p[3]) + def p_function_call_parameters(p): '''function_call_parameters : function_call_parameter COMMA function_call_parameters + | function_call_parameter COMMA | function_call_parameter |''' parameters = [p[1]] - if len(p) > 2: - parameters.append(p[3]) + if len(p) == 4: + parameters.extend(p[3]) p[0] = parameters + def p_function_call_parameter(p): '''function_call_parameter : IDENTIFIER ASSIGN expression | expression''' - if len(p) > 2: + if len(p) == 4: p[0] = AstNodeFunctionCallParameter(p[1], p[3]) else: p[0] = AstNodeFunctionCallParameter("", p[1]) + + def p_variable_declaration_statement(p): '''variable_declaration_statement : variable_declarator IDENTIFIER COLON variable_type ASSIGN expression SEMICOLON''' p[0] = AstNodeVariableDeclarationStatement(p[1], p[2], p[4], p[6]) + def p_variable_reassignment_statement(p): '''variable_reassignment_statement : IDENTIFIER ASSIGN expression SEMICOLON''' p[0] = AstNodeVariableReassignmentStatement(p[1], p[3]) + def p_variable_declarator(p): '''variable_declarator : CONST | VAR''' p[0] = p[1] + def p_variable_type(p): - '''variable_type : TYPE_INTEGER + '''variable_type : variable_type_array + | variable_type_map + | TYPE_INTEGER | TYPE_STRING''' p[0] = p[1] +def p_variable_types(p): + '''variable_types : variable_type OR variable_types + | variable_type''' + + if len(p) == 4: + p[0] = AstNodeVariableTypes(p[2], p[1], p[3]) + else: + p[0] = p[1] + + +def p_variable_type_array(p): + '''variable_type_array : BRACKET_LEFT variable_types BRACKET_RIGHT BRACKET_LEFT NUMBER BRACKET_RIGHT''' + + p[0] = AstNodeVariableTypeArray(p[2], p[5]) + + +def p_variable_type_map(p): + '''variable_type_map : BRACKET_LEFT variable_types BRACKET_RIGHT BRACKET_LEFT variable_types BRACKET_RIGHT''' + + p[0] = AstNodeVariableTypeMap(p[2], p[5]) + + + def p_return_statement(p): '''return_statement : RETURN expression SEMICOLON''' p[0] = AstNodeReturnStatement(p[2]) + def p_return_type(p): '''return_type : variable_type | TYPE_VOID''' p[0] = p[1] + + def p_identifier(p): '''identifier : IDENTIFIER''' p[0] = AstNodeExpression(AstNodeExpressionType.IDENTIFIER, p[1]) + def p_number(p): '''number : NUMBER''' p[0] = AstNodeExpression(AstNodeExpressionType.NUMBER, p[1]) + def p_string(p): '''string : STRING''' p[0] = AstNodeExpression(AstNodeExpressionType.STRING, p[1]) + +def p_array(p): + '''array : BRACKET_LEFT array_elements BRACKET_RIGHT''' + + p[0] = AstNodeExpression(AstNodeExpressionType.ARRAY, p[2]) + + +def p_array_elements(p): + '''array_elements : array_element COMMA array_elements + | array_element COMMA + | array_element''' + + elements = [p[1]] + if len(p) == 4: + elements.extend(p[3]) + + p[0] = elements + + +def p_array_element(p): + '''array_element : expression''' + + p[0] = p[1] + + +def p_map(p): + '''map : BRACKET_LEFT map_elements BRACKET_RIGHT''' + + p[0] = AstNodeExpression(AstNodeExpressionType.MAP, p[2]) + + +def p_map_elements(p): + '''map_elements : map_element COMMA map_elements + | map_element COMMA + | map_element''' + + elements = [p[1]] + if len(p) == 4: + elements.extend(p[3]) + + p[0] = elements + + +def p_map_element(p): + '''map_element : map_element_key COLON expression''' + + p[0] = AstNodeMapElement(p[1], p[3]) + +def p_map_element_key(p): + '''map_element_key : identifier + | number + | string''' + + p[0] = p[1] + + def p_expression(p): '''expression : identifier | number | string + | array + | map | function_call | expression OPERATOR_PLUS expression | expression OPERATOR_MINUS expression @@ -395,13 +527,26 @@ def p_expression(p): p[0] = p[1] + + def p_error(p): print("Syntax error in input!", p) + + parser = yacc.yacc(debug=True, debuglog=log) result = parser.parse(input) -# print(result) + + + + +if result: + for ast_node in result: + print(ast_node) + + + # if __name__ == "__main__": # lex.runmain() diff --git a/test/test.test b/test/test.test index 2ffe743..af1bf76 100644 --- a/test/test.test +++ b/test/test.test @@ -14,4 +14,12 @@ function main(input: string): void { return hello; } -main(test); +main(input = "hello!"); + +const array: [integer][3] = [1, 2, 3]; +const map: [string][string or integer] = [ + "first": 1, + "second": "two", + "third": "3", + "fourth": 4, +]; -- cgit v1.2.3