From 9fe95aa7f2c9abe138b5383a4df06679fc37f70e Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Tue, 30 Aug 2011 16:03:13 -0400 Subject: [PATCH] convert lexer and parser into classes --- parser/Lexer.py | 76 ++++++++++++++++++++++++++++++++++++++++++++ parser/Parser.py | 46 +++++++++++++++++++++++++++ parser/__init__.py | 10 ++---- parser/lexrules.py | 64 ------------------------------------- parser/parserules.py | 35 -------------------- 5 files changed, 124 insertions(+), 107 deletions(-) create mode 100644 parser/Lexer.py create mode 100644 parser/Parser.py delete mode 100644 parser/lexrules.py delete mode 100644 parser/parserules.py diff --git a/parser/Lexer.py b/parser/Lexer.py new file mode 100644 index 0000000..261831d --- /dev/null +++ b/parser/Lexer.py @@ -0,0 +1,76 @@ + +import ply.lex as lex + +class Lexer(object): + def __init__(self): + self.reserved = { + 'C': 'C', + } + + self.tokens = [ + 'LPAREN', + 'RPAREN', + 'SEMICOLON', + 'STRING', + 'ID', + ] + list(self.reserved.values()) + + self.states = ( + ('string', 'exclusive'), + ) + + self.t_LPAREN = r'\(' + self.t_RPAREN = r'\)' + self.t_SEMICOLON = r';' + + self.t_ignore = ' \t\r' + + self.lexer = lex.lex(module = self) + + def t_ID(self, t): + r'[a-zA-Z_][a-zA-Z_0-9]*' + t.type = self.reserved.get(t.value, 'ID') + return t + + def t_STRING(self, t): + r'\"' + self.str_buildup = '' + t.lexer.begin('string') + + def t_string_1(self, t): + r'[^\\\n\"]' + self.str_buildup += t.value + + def t_string_2(self, t): + r'\\[^\n]' + c = { + 't': '\t', + 'r': '\r', + 'n': '\n', + 'f': '\f', + 'b': '\b', + 'v': '\v', + }.get(t.value[1], t.value[1]) + self.str_buildup += c + + def t_string_3(self, t): + r'\"' + t.type = 'STRING' + t.value = self.str_buildup + t.lexer.begin('INITIAL') + return t + + def t_newline(self, t): + r'\n+' + t.lexer.lineno += len(t.value) + return None + + def t_error(self, t): + print 'Illegal character "%s"' % t.value[0] + t.lexer.skip(1) + + def input(self, *args): + self.lexer.input(*args) + + def token(self, *args): + return self.lexer.token(*args) diff --git a/parser/Parser.py b/parser/Parser.py new file mode 100644 index 0000000..9f0fd1e --- /dev/null +++ b/parser/Parser.py @@ -0,0 +1,46 @@ + +import ply.yacc as yacc +from Lexer import Lexer +from nodes import * + +class Parser(object): + def __init__(self, input): + self.input = input + self.lexer = Lexer() + self.tokens = self.lexer.tokens + self.parser = yacc.yacc(module = self, outputdir = 'parser') + + def p_unit(self, p): + 'unit : unit_items' + p[0] = UnitNode([p[1]]) + + def p_unit_items(self, p): + 'unit_items : unit_item unit_items' + p[0] = Node([p[1]] + p[2].children) + + def p_unit_items_empty(self, p): + 'unit_items : empty' + p[0] = p[1] + + def p_unit_item_c_stmt(self, p): + 'unit_item : c_expr SEMICOLON' + p[0] = p[1] + + def p_statement(self, p): + 'statement : expr SEMICOLON' + p[0] = StatementNode([p[1]]) + + def p_expr(self, p): + 'expr : c_expr' + p[0] = p[1] + + def p_c_expr(self, p): + 'c_expr : C LPAREN STRING RPAREN' + p[0] = CExprNode(p[3]) + + def p_empty(self, p): + 'empty :' + p[0] = Node() + + def parse(self): + return self.parser.parse(self.input, lexer = self.lexer) diff --git a/parser/__init__.py b/parser/__init__.py index 59a2587..690fdf0 100644 --- a/parser/__init__.py +++ b/parser/__init__.py @@ -1,11 +1,5 @@ -import ply.lex as lex -import ply.yacc as yacc -import lexrules -import parserules +from Parser import Parser def parse(input): - lexer = lex.lex(module = lexrules) - parser = yacc.yacc(module = parserules, outputdir = 'parser') - result = parser.parse(input) - return result + return Parser(input).parse() diff --git a/parser/lexrules.py b/parser/lexrules.py deleted file mode 100644 index 4334a36..0000000 --- a/parser/lexrules.py +++ /dev/null @@ -1,64 +0,0 @@ - -reserved = { - 'C': 'C', - } - -tokens = [ - 'LPAREN', - 'RPAREN', - 'SEMICOLON', - 'STRING', - 'ID', - ] + list(reserved.values()) - -states = ( - ('string', 'exclusive'), - ) - -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_SEMICOLON = r';' - -t_ignore = ' \t\r' - -def t_ID(t): - r'[a-zA-Z_][a-zA-Z_0-9]*' - t.type = reserved.get(t.value, 'ID') - return t - -def t_STRING(t): - r'\"' - t.lexer.str_buildup = '' - t.lexer.begin('string') - -def t_string_1(t): - r'[^\\\n\"]' - t.lexer.str_buildup += t.value - -def t_string_2(t): - r'\\[^\n]' - c = { - 't': '\t', - 'r': '\r', - 'n': '\n', - 'f': '\f', - 'b': '\b', - 'v': '\v', - }.get(t.value[1], t.value[1]) - t.lexer.str_buildup += c - -def t_string_3(t): - r'\"' - t.type = 'STRING' - t.value = t.lexer.str_buildup - t.lexer.begin('INITIAL') - return t - -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - return None - -def t_error(t): - print 'Illegal character "%s"' % t.value[0] - t.lexer.skip(1) diff --git a/parser/parserules.py b/parser/parserules.py deleted file mode 100644 index 9c72947..0000000 --- a/parser/parserules.py +++ /dev/null @@ -1,35 +0,0 @@ - -from lexrules import tokens -from nodes import * - -def p_unit(p): - 'unit : unit_items' - p[0] = UnitNode([p[1]]) - -def p_unit_items(p): - 'unit_items : unit_item unit_items' - p[0] = Node([p[1]] + p[2].children) - -def p_unit_items_empty(p): - 'unit_items : empty' - p[0] = p[1] - -def p_unit_item_c_stmt(p): - 'unit_item : c_expr SEMICOLON' - p[0] = p[1] - -def p_statement(p): - 'statement : expr SEMICOLON' - p[0] = StatementNode([p[1]]) - -def p_expr(p): - 'expr : c_expr' - p[0] = p[1] - -def p_c_expr(p): - 'c_expr : C LPAREN STRING RPAREN' - p[0] = CExprNode(p[3]) - -def p_empty(p): - 'empty :' - p[0] = Node()