import ply.lex as lex class Lexer(object): def __init__(self): self.reserved = { 'C': 'C', } self.tokens = [ 'LPAREN', 'RPAREN', 'SEMICOLON', 'STRING', 'ID', ] + list(self.reserved.values()) self.states = ( ('string', 'exclusive'), ) self.t_LPAREN = r'\(' self.t_RPAREN = r'\)' self.t_SEMICOLON = r';' self.t_ignore = ' \t\r' self.lexer = lex.lex(module = self) def t_ID(self, t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = self.reserved.get(t.value, 'ID') return t def t_STRING(self, t): r'\"' self.str_buildup = '' t.lexer.begin('string') def t_string_1(self, t): r'[^\\\n\"]' self.str_buildup += t.value def t_string_2(self, t): r'\\[^\n]' c = { 't': '\t', 'r': '\r', 'n': '\n', 'f': '\f', 'b': '\b', 'v': '\v', }.get(t.value[1], t.value[1]) self.str_buildup += c def t_string_3(self, t): r'\"' t.type = 'STRING' t.value = self.str_buildup t.lexer.begin('INITIAL') return t def t_newline(self, t): r'\n+' t.lexer.lineno += len(t.value) return None def t_error(self, t): print 'Illegal character "%s"' % t.value[0] t.lexer.skip(1) def input(self, *args): self.lexer.input(*args) def token(self, *args): return self.lexer.token(*args)