From 1661d11c8fe55a020df3dc8152c8290d90544d9c Mon Sep 17 00:00:00 2001 From: josh Date: Tue, 14 Sep 2010 20:18:58 +0000 Subject: [PATCH] initial skeleton parser with simple driver git-svn-id: svn://anubis/jackalc/trunk@2 2d0ce1a6-f80c-425e-916c-c881d0336438 --- jackalc.py | 13 +++ nodes.py | 51 ++++++++++++ parserdef.py | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 287 insertions(+) create mode 100644 jackalc.py create mode 100644 nodes.py create mode 100644 parserdef.py diff --git a/jackalc.py b/jackalc.py new file mode 100644 index 0000000..31da337 --- /dev/null +++ b/jackalc.py @@ -0,0 +1,13 @@ + +import ply.yacc as yacc +import ply.lex as lex +import parserdef +import nodes + +lexer = lex.lex(module = parserdef) +parser = yacc.yacc(module = parserdef) + +s = raw_input("give input > ") +result = parser.parse(s, lexer = lexer) + +print result diff --git a/nodes.py b/nodes.py new file mode 100644 index 0000000..1dffa19 --- /dev/null +++ b/nodes.py @@ -0,0 +1,51 @@ + +# AST nodes for the Jackal compiler +# Author: Josh Holtrop +# Date: 2010-09-14 + +class Node: + pass + +class BinOpNode(Node): + def __init__(self, binop, s1, s2): + self.binop = binop + self.s1 = s1 + self.s2 = s2 + +class ModuleNode(Node): + def __init__(self, classes): + self.classes = classes + +class ClassNode(Node): + def __init__(self, name, items): + self.name = name + self.items = items + +class TypeNode(Node): + def __init__(self, name): + self.name = name + +class FunctionNode(Node): + def __init__(self, name, parameters, rtype, body): + self.name = name + self.parameters = parameters + self.rtype = rtype + self.body = body + +class BlockNode(Node): + def __init__(self, body): + self.body = body + +class VarspecNode(Node): + def __init__(self, name, typ): + self.name = name + self.typ = typ + +class ReturnNode(Node): + def __init__(self, expr): + self.expr = expr + +class ParamNode(Node): + def __init__(self, name, typ): + self.name = name + self.typ = typ diff --git a/parserdef.py b/parserdef.py new file mode 100644 index 0000000..1e60d10 --- /dev/null +++ b/parserdef.py @@ -0,0 +1,223 @@ + +# parser definition for the Jackal compiler +# Author: Josh Holtrop +# Date: 2010-09-14 + +from nodes import * + +########################################################################### +# Lexer Definition # +########################################################################### +reserved = { + # control + 'if': 'IF', + 'else': 'ELSE', + 'while': 'WHILE', + 'return': 'RETURN', + + # types + 'byte': 'BYTE', + 'short': 'SHORT', + 'int': 'INT', + 'long': 'LONG', + 'string': 'STRING', + 'class': 'CLASS', + } + +tokens = [ + # identifiers + 'ID', + + # literals + 'NUMBER', + + # assignment + 'ASSIGN', + + # operators + 'EQUALS', + 'DEQUALS', + 'TIMES', + 'DIVIDE', + 'PLUS', + 'MINUS', + 'LESS', + 'GREATER', + + # punctuation + 'LPAREN', + 'RPAREN', + 'LCURLY', + 'RCURLY', + 'LBRACKET', + 'RBRACKET', + 'DOT', + 'COLON', + 'SEMICOLON', + 'COMMA', + ] + list(reserved.values()) + +t_ASSIGN = r':=' + +t_NUMBER = r'[0-9]+' + +t_EQUALS = r'=' +t_DEQUALS = r'==' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_PLUS = r'\+' +t_MINUS = r'-' +t_LESS = r'<' +t_GREATER = r'>' + +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LCURLY = r'\{' +t_RCURLY = r'\}' +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_DOT = r'\.' +t_COLON = r':' +t_SEMICOLON = r';' +t_COMMA = r',' + +def t_ID(t): + r'[a-zA-Z_][a-zA-Z_0-9]*' + t.type = reserved.get(t.value, 'ID') # check for reserved words + return t + +def t_newline(t): + r'\n' + t.lexer.lineno += 1 + +t_ignore = " \t" + + +########################################################################### +# Parser Definition # +########################################################################### + +precedence = ( + ('right', 'ASSIGN'), + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ) + +def p_module(p): + '''module : classes''' + p[0] = ModuleNode(p[1]) + +def p_classes_empty(p): + '''classes : empty''' + p[0] = [] + +def p_classes_class(p): + '''classes : class classes''' + p[0] = [p[1]] + p[2] + +def p_class(p): + '''class : CLASS ID LCURLY class_items RCURLY''' + p[0] = ClassNode(p[2], p[4]) + +def p_class_items_empty(p): + '''class_items : empty''' + p[0] = [] + +def p_class_items_item(p): + '''class_items : class_item class_items''' + p[0] = [p[1]] + p[2] + +def p_class_item(p): + '''class_item : varspec + | function''' + p[0] = p[1] + +def p_varspec(p): + '''varspec : ID COLON type SEMICOLON''' + p[0] = VarspecNode(p[1], p[3]) + +def p_function(p): + '''function : ID LPAREN param_list RPAREN COLON type LCURLY function_items RCURLY''' + p[0] = FunctionNode(p[1], p[3], p[6], p[8]) + +def p_param_list_empty(p): + '''param_list : empty''' + p[0] = [] + +def p_param_list_param(p): + '''param_list : param param_list_more''' + p[0] = [p[1]] + p[2] + +def p_param_list_more_empty(p): + '''param_list_more : empty''' + p[0] = [] + +def p_param_list_more_param(p): + '''param_list_more : COMMA param param_list_more''' + p[0] = [p[2]] + p[3] + +def p_param(p): + '''param : type ID''' + p[0] = ParamNode(p[2], p[1]) + +def p_function_items_empty(p): + '''function_items : empty''' + p[0] = [] + +def p_function_items_item(p): + '''function_items : function_item function_items''' + p[0] = [p[1]] + p[2] + +def p_function_item(p): + '''function_item : function + | statement''' + p[0] = p[1] + +def p_statement_varspec(p): + '''statement : varspec''' + p[0] = p[1] + +def p_statement_expression(p): + '''statement : expression SEMICOLON''' + p[0] = p[1] + +def p_statement_block(p): + '''statement : LCURLY function_items RCURLY''' + p[0] = BlockNode(p[2]) + +def p_statement_return(p): + '''statement : RETURN expression SEMICOLON''' + p[0] = ReturnNode(p[2]) + +def p_type(p): + '''type : BYTE + | SHORT + | INT + | LONG + | STRING + | ID''' + p[0] = TypeNode(p[1]) + +def p_expression_id(p): + '''expression : ID''' + p[0] = p[1] + +def p_expression_literal(p): + '''expression : NUMBER''' + p[0] = p[1] + +def p_expression_binop(p): + '''expression : expression binop expression''' + p[0] = BinOpNode(p[2], p[1], p[3]) + +def p_binop(p): + '''binop : TIMES + | DIVIDE + | PLUS + | MINUS + | ASSIGN''' + p[0] = p[1] + +def p_empty(p): + '''empty :''' +