# parser definition for the Jackal compiler # Author: Josh Holtrop # Date: 2010-09-14 from nodes import * ########################################################################### # Lexer Definition # ########################################################################### reserved = { # control 'if': 'IF', 'else': 'ELSE', 'while': 'WHILE', 'return': 'RETURN', # types 'byte': 'BYTE', 'short': 'SHORT', 'int': 'INT', 'long': 'LONG', 'string': 'STRING', 'class': 'CLASS', } tokens = [ # identifiers 'ID', # literals 'NUMBER', # assignment 'ASSIGN', # operators 'EQUALS', 'DEQUALS', 'TIMES', 'DIVIDE', 'PLUS', 'MINUS', 'LESS', 'GREATER', # punctuation 'LPAREN', 'RPAREN', 'LCURLY', 'RCURLY', 'LBRACKET', 'RBRACKET', 'DOT', 'COLON', 'SEMICOLON', 'COMMA', ] + list(reserved.values()) t_ASSIGN = r':=' t_NUMBER = r'[0-9]+' t_EQUALS = r'=' t_DEQUALS = r'==' t_TIMES = r'\*' t_DIVIDE = r'/' t_PLUS = r'\+' t_MINUS = r'-' t_LESS = r'<' t_GREATER = r'>' t_LPAREN = r'\(' t_RPAREN = r'\)' t_LCURLY = r'\{' t_RCURLY = r'\}' t_LBRACKET = r'\[' t_RBRACKET = r'\]' t_DOT = r'\.' t_COLON = r':' t_SEMICOLON = r';' t_COMMA = r',' def t_ID(t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = reserved.get(t.value, 'ID') # check for reserved words return t def t_newline(t): r'\n' t.lexer.lineno += 1 t_ignore = " \t" ########################################################################### # Parser Definition # ########################################################################### precedence = ( ('right', 'ASSIGN'), ('left', 'PLUS', 'MINUS'), ('left', 'TIMES', 'DIVIDE'), ) def p_module(p): '''module : classes''' p[0] = ModuleNode(p[1]) def p_classes_empty(p): '''classes : empty''' p[0] = [] def p_classes_class(p): '''classes : class classes''' p[0] = [p[1]] + p[2] def p_class(p): '''class : CLASS ID LCURLY class_items RCURLY''' p[0] = ClassNode(p[2], p[4]) def p_class_items_empty(p): '''class_items : empty''' p[0] = [] def p_class_items_item(p): '''class_items : class_item class_items''' p[0] = [p[1]] + p[2] def p_class_item(p): '''class_item : varspec | function''' p[0] = p[1] def p_varspec(p): '''varspec : ID COLON type SEMICOLON''' p[0] = VarspecNode(p[1], p[3]) def p_function(p): '''function : ID LPAREN param_list RPAREN COLON type LCURLY function_items RCURLY''' p[0] = FunctionNode(p[1], p[3], p[6], p[8]) def p_param_list_empty(p): '''param_list : empty''' p[0] = [] def p_param_list_param(p): '''param_list : param param_list_more''' p[0] = [p[1]] + p[2] def p_param_list_more_empty(p): '''param_list_more : empty''' p[0] = [] def p_param_list_more_param(p): '''param_list_more : COMMA param param_list_more''' p[0] = [p[2]] + p[3] def p_param(p): '''param : type ID''' p[0] = ParamNode(p[2], p[1]) def p_function_items_empty(p): '''function_items : empty''' p[0] = [] def p_function_items_item(p): '''function_items : function_item function_items''' p[0] = [p[1]] + p[2] def p_function_item(p): '''function_item : function | statement''' p[0] = p[1] def p_statement_varspec(p): '''statement : varspec''' p[0] = p[1] def p_statement_expression(p): '''statement : expression SEMICOLON''' p[0] = p[1] def p_statement_block(p): '''statement : LCURLY function_items RCURLY''' p[0] = BlockNode(p[2]) def p_statement_return(p): '''statement : RETURN expression SEMICOLON''' p[0] = ReturnNode(p[2]) def p_type(p): '''type : BYTE | SHORT | INT | LONG | STRING | ID''' p[0] = TypeNode(p[1]) def p_expression_id(p): '''expression : ID''' p[0] = p[1] def p_expression_literal(p): '''expression : NUMBER''' p[0] = p[1] def p_expression_binop(p): '''expression : expression binop expression''' p[0] = BinOpNode(p[2], p[1], p[3]) def p_binop(p): '''binop : TIMES | DIVIDE | PLUS | MINUS | ASSIGN''' p[0] = p[1] def p_empty(p): '''empty :'''