jackalc/parserdef.py
josh b4dd8064a3 improved integer literals, added string literals
git-svn-id: svn://anubis/jackalc/trunk@3 2d0ce1a6-f80c-425e-916c-c881d0336438
2010-09-14 21:12:56 +00:00

270 lines
5.5 KiB
Python

# parser definition for the Jackal compiler
# Author: Josh Holtrop
# Date: 2010-09-14
from nodes import *
###########################################################################
# Lexer Definition #
###########################################################################
reserved = {
# control
'if': 'IF',
'else': 'ELSE',
'while': 'WHILE',
'return': 'RETURN',
# types
'byte': 'BYTE',
'short': 'SHORT',
'int': 'INT',
'long': 'LONG',
'string': 'STRING',
'class': 'CLASS',
}
tokens = [
# identifiers
'ID',
# literals
'NUMBERLIT',
'HEXNUMBERLIT',
'STRINGLIT',
# assignment
'ASSIGN',
# operators
'EQUALS',
'DEQUALS',
'TIMES',
'DIVIDE',
'PLUS',
'MINUS',
'LESS',
'GREATER',
# punctuation
'LPAREN',
'RPAREN',
'LCURLY',
'RCURLY',
'LBRACKET',
'RBRACKET',
'DOT',
'COLON',
'SEMICOLON',
'COMMA',
] + list(reserved.values())
states = (
('string', 'exclusive'),
)
t_ASSIGN = r':='
t_EQUALS = r'='
t_DEQUALS = r'=='
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_PLUS = r'\+'
t_MINUS = r'-'
t_LESS = r'<'
t_GREATER = r'>'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LCURLY = r'\{'
t_RCURLY = r'\}'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_DOT = r'\.'
t_COLON = r':'
t_SEMICOLON = r';'
t_COMMA = r','
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reserved.get(t.value, 'ID') # check for reserved words
return t
def t_NUMBERLIT(t):
r'[0-9]+'
t.value = int(t.value)
return t
def t_HEXNUMBERLIT(t):
r'0x[0-9a-fA-F]+'
t.value = int(t.value)
return t
def t_begin_stringlit(t):
r'"'
global gather_string
gather_string = ''
t.lexer.push_state('string')
def t_string_elem(t):
r'[^\\\n"]'
global gather_string
gather_string += t.value
def t_string_escape(t):
r'\\[^\n]'
global gather_string
e = t.value[1]
gather_string += {'n':"\n", 't':"\t", 'b':"\b", 'v':"\v", 'r':"\r"}.get(e,e)
def t_string_end(t):
r'"'
global gather_string
t.type = 'STRING'
t.value = gather_string
t.lexer.pop_state()
return t
t_string_ignore = ''
def t_newline(t):
r'\n'
t.lexer.lineno += 1
t_ignore = " \t"
###########################################################################
# Parser Definition #
###########################################################################
precedence = (
('right', 'ASSIGN'),
('left', 'PLUS', 'MINUS'),
('left', 'TIMES', 'DIVIDE'),
)
def p_module(p):
'''module : classes'''
p[0] = ModuleNode(p[1])
def p_classes_empty(p):
'''classes : empty'''
p[0] = []
def p_classes_class(p):
'''classes : class classes'''
p[0] = [p[1]] + p[2]
def p_class(p):
'''class : CLASS ID LCURLY class_items RCURLY'''
p[0] = ClassNode(p[2], p[4])
def p_class_items_empty(p):
'''class_items : empty'''
p[0] = []
def p_class_items_item(p):
'''class_items : class_item class_items'''
p[0] = [p[1]] + p[2]
def p_class_item(p):
'''class_item : varspec
| function'''
p[0] = p[1]
def p_varspec(p):
'''varspec : ID COLON type SEMICOLON'''
p[0] = VarspecNode(p[1], p[3])
def p_function(p):
'''function : ID LPAREN param_list RPAREN COLON type LCURLY function_items RCURLY'''
p[0] = FunctionNode(p[1], p[3], p[6], p[8])
def p_param_list_empty(p):
'''param_list : empty'''
p[0] = []
def p_param_list_param(p):
'''param_list : param param_list_more'''
p[0] = [p[1]] + p[2]
def p_param_list_more_empty(p):
'''param_list_more : empty'''
p[0] = []
def p_param_list_more_param(p):
'''param_list_more : COMMA param param_list_more'''
p[0] = [p[2]] + p[3]
def p_param(p):
'''param : type ID'''
p[0] = ParamNode(p[2], p[1])
def p_function_items_empty(p):
'''function_items : empty'''
p[0] = []
def p_function_items_item(p):
'''function_items : function_item function_items'''
p[0] = [p[1]] + p[2]
def p_function_item(p):
'''function_item : function
| statement'''
p[0] = p[1]
def p_statement_varspec(p):
'''statement : varspec'''
p[0] = p[1]
def p_statement_expression(p):
'''statement : expression SEMICOLON'''
p[0] = p[1]
def p_statement_block(p):
'''statement : LCURLY function_items RCURLY'''
p[0] = BlockNode(p[2])
def p_statement_return(p):
'''statement : RETURN expression SEMICOLON'''
p[0] = ReturnNode(p[2])
def p_type(p):
'''type : BYTE
| SHORT
| INT
| LONG
| STRING
| ID'''
p[0] = TypeNode(p[1])
def p_expression_id(p):
'''expression : ID'''
p[0] = p[1]
def p_expression_literal(p):
'''expression : integer'''
p[0] = p[1]
def p_expression_binop(p):
'''expression : expression binop expression'''
p[0] = BinOpNode(p[2], p[1], p[3])
def p_integer(p):
'''integer : NUMBERLIT
| HEXNUMBERLIT'''
p[0] = IntegerNode(p[1])
def p_binop(p):
'''binop : TIMES
| DIVIDE
| PLUS
| MINUS
| ASSIGN'''
p[0] = p[1]
def p_empty(p):
'''empty :'''