import ply.lex as lex class Lexer(object): def __init__(self): self.reserved = { 'C': 'C', # types 'char': 'CHAR', 'short': 'SHORT', 'int': 'INT', 'long': 'LONG', # control 'if': 'IF', 'else': 'ELSE', 'while': 'WHILE', 'for': 'FOR', 'return': 'RETURN', } self.tokens = [ 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', 'LCURLY', 'RCURLY', 'SEMICOLON', 'STRING', 'ID', ] + list(self.reserved.values()) self.states = ( ('string', 'exclusive'), ) self.t_LPAREN = r'\(' self.t_RPAREN = r'\)' self.t_LBRACKET = r'\[' self.t_RBRACKET = r'\]' self.t_LCURLY = r'\{' self.t_RCURLY = r'\}' self.t_SEMICOLON = r';' self.t_ignore = ' \t\r' self.t_string_ignore = '' self.lexer = lex.lex(module = self) def t_ID(self, t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = self.reserved.get(t.value, 'ID') return t def t_STRING(self, t): r'\"' self.str_buildup = '' t.lexer.begin('string') def t_string_1(self, t): r'[^\\\n\"]' self.str_buildup += t.value def t_string_2(self, t): r'\\[^\n]' c = { 't': '\t', 'r': '\r', 'n': '\n', 'f': '\f', 'b': '\b', 'v': '\v', }.get(t.value[1], t.value[1]) self.str_buildup += c def t_string_3(self, t): r'\"' t.type = 'STRING' t.value = self.str_buildup t.lexer.begin('INITIAL') return t def t_newline(self, t): r'\n+' t.lexer.lineno += len(t.value) return None def t_ANY_error(self, t): print 'Illegal character "%s"' % t.value[0] t.lexer.skip(1) def input(self, *args): self.lexer.input(*args) def token(self, *args): return self.lexer.token(*args)