Store tokens and drop tokens separately
This commit is contained in:
parent
6f1ce32775
commit
f37801ec9e
@ -6,10 +6,8 @@ class <%= classname %>
|
||||
{
|
||||
enum
|
||||
{
|
||||
<% @tokens.each_with_index do |(name, token), index| %>
|
||||
<% if token.name %>
|
||||
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||
TOKEN_<%= token.c_name %> = <%= index %>,
|
||||
<% end %>
|
||||
<% end %>
|
||||
TOKEN_EOF = <%= TOKEN_EOF %>,
|
||||
TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
|
||||
@ -18,12 +16,8 @@ class <%= classname %>
|
||||
}
|
||||
|
||||
static immutable string TokenNames[] = [
|
||||
<% @tokens.each_with_index do |(name, token), index| %>
|
||||
<% if token.name %>
|
||||
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||
"<%= token.name %>",
|
||||
<% else %>
|
||||
null,
|
||||
<% end %>
|
||||
<% end %>
|
||||
];
|
||||
|
||||
|
@ -37,17 +37,16 @@ class Propane
|
||||
end
|
||||
|
||||
def initialize(input)
|
||||
grammar = Grammar.new(input)
|
||||
@classname = grammar.classname
|
||||
@modulename = grammar.modulename
|
||||
@tokens = grammar.tokens
|
||||
@rule_sets = grammar.rule_sets
|
||||
@grammar = Grammar.new(input)
|
||||
@classname = @grammar.classname
|
||||
@modulename = @grammar.modulename
|
||||
@rule_sets = @grammar.rule_sets
|
||||
end
|
||||
|
||||
def generate(output_file, log_file)
|
||||
expand_rules
|
||||
lexer = Lexer.new(@tokens)
|
||||
parser = Parser.new(@tokens, @rule_sets)
|
||||
lexer = Lexer.new(@grammar.tokens, @grammar.drop_tokens)
|
||||
parser = Parser.new(@rule_sets)
|
||||
classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
|
||||
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>")
|
||||
result = erb.result(binding.clone)
|
||||
@ -59,9 +58,16 @@ class Propane
|
||||
private
|
||||
|
||||
def expand_rules
|
||||
tokens_by_name = {}
|
||||
@grammar.tokens.each do |token|
|
||||
if tokens_by_name.include?(token.name)
|
||||
raise Error.new("Duplicate token name #{token.name.inspect}")
|
||||
end
|
||||
tokens_by_name[token.name] = token
|
||||
end
|
||||
@rule_sets.each do |rule_name, rule_set|
|
||||
if @tokens.include?(rule_name)
|
||||
raise Error.new("Rule name collides with token name #{rule_name}")
|
||||
if tokens_by_name.include?(rule_name)
|
||||
raise Error.new("Rule name collides with token name #{rule_name.inspect}")
|
||||
end
|
||||
end
|
||||
unless @rule_sets["Start"]
|
||||
@ -70,8 +76,8 @@ class Propane
|
||||
@rule_sets.each do |rule_name, rule_set|
|
||||
rule_set.rules.each do |rule|
|
||||
rule.components.map! do |component|
|
||||
if @tokens[component]
|
||||
@tokens[component]
|
||||
if tokens_by_name[component]
|
||||
tokens_by_name[component]
|
||||
elsif @rule_sets[component]
|
||||
@rule_sets[component]
|
||||
else
|
||||
|
@ -3,12 +3,14 @@ class Propane
|
||||
class Grammar
|
||||
|
||||
attr_reader :classname
|
||||
attr_reader :drop_tokens
|
||||
attr_reader :modulename
|
||||
attr_reader :rule_sets
|
||||
attr_reader :tokens
|
||||
|
||||
def initialize(input)
|
||||
@tokens = {}
|
||||
@tokens = []
|
||||
@drop_tokens = []
|
||||
@rule_sets = {}
|
||||
input = input.gsub("\r\n", "\n")
|
||||
parse_grammar(input)
|
||||
@ -35,14 +37,10 @@ class Propane
|
||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||
raise Error.new("Invalid token name #{name}")
|
||||
end
|
||||
if @tokens[name]
|
||||
raise Error.new("Duplicate token name #{name}")
|
||||
else
|
||||
@tokens[name] = Token.new(name, pattern, @tokens.size, line_number)
|
||||
end
|
||||
@tokens << Token.new(name, pattern, @tokens.size, line_number)
|
||||
elsif sliced = input.slice!(/\Adrop\s+(\S+)\n/)
|
||||
pattern = $1
|
||||
@tokens[@tokens.size] = Token.new(nil, pattern, @tokens.size, line_number)
|
||||
@drop_tokens << Token.new(nil, pattern, nil, line_number)
|
||||
elsif sliced = input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
|
||||
rule_name, components, code = $1, $2, $3
|
||||
components = components.strip.split(/\s+/)
|
||||
|
@ -5,8 +5,8 @@ class Propane
|
||||
# Lexer DFA.
|
||||
attr_accessor :dfa
|
||||
|
||||
def initialize(tokens)
|
||||
@dfa = DFA.new(tokens)
|
||||
def initialize(tokens, drop_tokens)
|
||||
@dfa = DFA.new(tokens, drop_tokens)
|
||||
end
|
||||
|
||||
def build_tables
|
||||
@ -17,10 +17,10 @@ class Propane
|
||||
accepts =
|
||||
if state.accepts.nil?
|
||||
TOKEN_NONE
|
||||
elsif state.accepts.name
|
||||
state.accepts.id
|
||||
else
|
||||
elsif state.accepts.drop?
|
||||
TOKEN_DROP
|
||||
else
|
||||
state.accepts.id
|
||||
end
|
||||
state_table << {
|
||||
transition_table_index: transition_table.size,
|
||||
|
@ -3,10 +3,10 @@ class Propane
|
||||
|
||||
class DFA < FA
|
||||
|
||||
def initialize(tokens)
|
||||
def initialize(tokens, drop_tokens)
|
||||
super()
|
||||
start_nfa = Regex::NFA.new
|
||||
tokens.each do |name, token|
|
||||
(tokens + drop_tokens).each do |token|
|
||||
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
|
||||
end
|
||||
@nfa_state_sets = {}
|
||||
|
@ -2,7 +2,7 @@ class Propane
|
||||
|
||||
class Parser
|
||||
|
||||
def initialize(tokens, rule_sets)
|
||||
def initialize(rule_sets)
|
||||
@token_eof = Token.new("$", nil, TOKEN_EOF, nil)
|
||||
@item_sets = []
|
||||
@item_sets_set = {}
|
||||
|
@ -48,6 +48,14 @@ class Propane
|
||||
@name.upcase
|
||||
end
|
||||
|
||||
# Whether the token is a drop token.
|
||||
#
|
||||
# @return [Boolean]
|
||||
# Whether the token is a drop token.
|
||||
def drop?
|
||||
@name.nil?
|
||||
end
|
||||
|
||||
def to_s
|
||||
@name
|
||||
end
|
||||
|
@ -50,8 +50,8 @@ class TestLexer
|
||||
end
|
||||
|
||||
def run(grammar, input)
|
||||
propane = Propane.new(grammar)
|
||||
token_dfa = Propane::Lexer::DFA.new(propane.instance_variable_get(:@tokens))
|
||||
grammar = Propane::Grammar.new(grammar)
|
||||
token_dfa = Propane::Lexer::DFA.new(grammar.tokens, grammar.drop_tokens)
|
||||
test_lexer = TestLexer.new(token_dfa)
|
||||
test_lexer.lex(input)
|
||||
end
|
||||
|
Loading…
x
Reference in New Issue
Block a user