From f534f3e753182f37ec8ffbe1953eacbecb65abb3 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Tue, 30 Aug 2011 13:59:15 -0400 Subject: [PATCH] properly lex a string with escaped characters --- parser/lexrules.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/parser/lexrules.py b/parser/lexrules.py index 686c9cc..4334a36 100644 --- a/parser/lexrules.py +++ b/parser/lexrules.py @@ -11,6 +11,10 @@ tokens = [ 'ID', ] + list(reserved.values()) +states = ( + ('string', 'exclusive'), + ) + t_LPAREN = r'\(' t_RPAREN = r'\)' t_SEMICOLON = r';' @@ -23,8 +27,31 @@ def t_ID(t): return t def t_STRING(t): - r'"([^"])*"' - t.value = t.value[1:-1] + r'\"' + t.lexer.str_buildup = '' + t.lexer.begin('string') + +def t_string_1(t): + r'[^\\\n\"]' + t.lexer.str_buildup += t.value + +def t_string_2(t): + r'\\[^\n]' + c = { + 't': '\t', + 'r': '\r', + 'n': '\n', + 'f': '\f', + 'b': '\b', + 'v': '\v', + }.get(t.value[1], t.value[1]) + t.lexer.str_buildup += c + +def t_string_3(t): + r'\"' + t.type = 'STRING' + t.value = t.lexer.str_buildup + t.lexer.begin('INITIAL') return t def t_newline(t):