100 lines
2.3 KiB
Python
100 lines
2.3 KiB
Python
|
|
import ply.lex as lex
|
|
|
|
class Lexer(object):
|
|
def __init__(self):
|
|
self.reserved = {
|
|
'C': 'C',
|
|
|
|
# types
|
|
'char': 'CHAR',
|
|
'short': 'SHORT',
|
|
'int': 'INT',
|
|
'long': 'LONG',
|
|
|
|
# control
|
|
'if': 'IF',
|
|
'else': 'ELSE',
|
|
'while': 'WHILE',
|
|
'for': 'FOR',
|
|
'return': 'RETURN',
|
|
}
|
|
|
|
self.tokens = [
|
|
'LPAREN',
|
|
'RPAREN',
|
|
'LBRACKET',
|
|
'RBRACKET',
|
|
'LCURLY',
|
|
'RCURLY',
|
|
'SEMICOLON',
|
|
'STRING',
|
|
'ID',
|
|
] + list(self.reserved.values())
|
|
|
|
self.states = (
|
|
('string', 'exclusive'),
|
|
)
|
|
|
|
self.t_LPAREN = r'\('
|
|
self.t_RPAREN = r'\)'
|
|
self.t_LBRACKET = r'\['
|
|
self.t_RBRACKET = r'\]'
|
|
self.t_LCURLY = r'\{'
|
|
self.t_RCURLY = r'\}'
|
|
self.t_SEMICOLON = r';'
|
|
|
|
self.t_ignore = ' \t\r'
|
|
|
|
self.t_string_ignore = ''
|
|
|
|
self.lexer = lex.lex(module = self)
|
|
|
|
def t_ID(self, t):
|
|
r'[a-zA-Z_][a-zA-Z_0-9]*'
|
|
t.type = self.reserved.get(t.value, 'ID')
|
|
return t
|
|
|
|
def t_STRING(self, t):
|
|
r'\"'
|
|
self.str_buildup = ''
|
|
t.lexer.begin('string')
|
|
|
|
def t_string_1(self, t):
|
|
r'[^\\\n\"]'
|
|
self.str_buildup += t.value
|
|
|
|
def t_string_2(self, t):
|
|
r'\\[^\n]'
|
|
c = {
|
|
't': '\t',
|
|
'r': '\r',
|
|
'n': '\n',
|
|
'f': '\f',
|
|
'b': '\b',
|
|
'v': '\v',
|
|
}.get(t.value[1], t.value[1])
|
|
self.str_buildup += c
|
|
|
|
def t_string_3(self, t):
|
|
r'\"'
|
|
t.type = 'STRING'
|
|
t.value = self.str_buildup
|
|
t.lexer.begin('INITIAL')
|
|
return t
|
|
|
|
def t_newline(self, t):
|
|
r'\n+'
|
|
t.lexer.lineno += len(t.value)
|
|
return None
|
|
|
|
def t_ANY_error(self, t):
|
|
print 'Illegal character "%s"' % t.value[0]
|
|
t.lexer.skip(1)
|
|
|
|
def input(self, *args):
|
|
self.lexer.input(*args)
|
|
|
|
def token(self, *args):
|
|
return self.lexer.token(*args)
|