jtlcpy/parser/Lexer.py
2011-08-31 10:41:31 -04:00

100 lines
2.3 KiB
Python

import ply.lex as lex
class Lexer(object):
def __init__(self):
self.reserved = {
'C': 'C',
# types
'char': 'CHAR',
'short': 'SHORT',
'int': 'INT',
'long': 'LONG',
# control
'if': 'IF',
'else': 'ELSE',
'while': 'WHILE',
'for': 'FOR',
'return': 'RETURN',
}
self.tokens = [
'LPAREN',
'RPAREN',
'LBRACKET',
'RBRACKET',
'LCURLY',
'RCURLY',
'SEMICOLON',
'STRING',
'ID',
] + list(self.reserved.values())
self.states = (
('string', 'exclusive'),
)
self.t_LPAREN = r'\('
self.t_RPAREN = r'\)'
self.t_LBRACKET = r'\['
self.t_RBRACKET = r'\]'
self.t_LCURLY = r'\{'
self.t_RCURLY = r'\}'
self.t_SEMICOLON = r';'
self.t_ignore = ' \t\r'
self.t_string_ignore = ''
self.lexer = lex.lex(module = self)
def t_ID(self, t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = self.reserved.get(t.value, 'ID')
return t
def t_STRING(self, t):
r'\"'
self.str_buildup = ''
t.lexer.begin('string')
def t_string_1(self, t):
r'[^\\\n\"]'
self.str_buildup += t.value
def t_string_2(self, t):
r'\\[^\n]'
c = {
't': '\t',
'r': '\r',
'n': '\n',
'f': '\f',
'b': '\b',
'v': '\v',
}.get(t.value[1], t.value[1])
self.str_buildup += c
def t_string_3(self, t):
r'\"'
t.type = 'STRING'
t.value = self.str_buildup
t.lexer.begin('INITIAL')
return t
def t_newline(self, t):
r'\n+'
t.lexer.lineno += len(t.value)
return None
def t_ANY_error(self, t):
print 'Illegal character "%s"' % t.value[0]
t.lexer.skip(1)
def input(self, *args):
self.lexer.input(*args)
def token(self, *args):
return self.lexer.token(*args)