reserved = { 'C': 'C', } tokens = [ 'LPAREN', 'RPAREN', 'SEMICOLON', 'STRING', 'ID', ] + list(reserved.values()) states = ( ('string', 'exclusive'), ) t_LPAREN = r'\(' t_RPAREN = r'\)' t_SEMICOLON = r';' t_ignore = ' \t\r' def t_ID(t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = reserved.get(t.value, 'ID') return t def t_STRING(t): r'\"' t.lexer.str_buildup = '' t.lexer.begin('string') def t_string_1(t): r'[^\\\n\"]' t.lexer.str_buildup += t.value def t_string_2(t): r'\\[^\n]' c = { 't': '\t', 'r': '\r', 'n': '\n', 'f': '\f', 'b': '\b', 'v': '\v', }.get(t.value[1], t.value[1]) t.lexer.str_buildup += c def t_string_3(t): r'\"' t.type = 'STRING' t.value = t.lexer.str_buildup t.lexer.begin('INITIAL') return t def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) return None def t_error(t): print 'Illegal character "%s"' % t.value[0] t.lexer.skip(1)