diff --git a/assets/parser.d.erb b/assets/parser.d.erb index cc0563d..fed8a2f 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -6,10 +6,8 @@ class <%= classname %> { enum { -<% @tokens.each_with_index do |(name, token), index| %> -<% if token.name %> +<% @grammar.tokens.each_with_index do |token, index| %> TOKEN_<%= token.c_name %> = <%= index %>, -<% end %> <% end %> TOKEN_EOF = <%= TOKEN_EOF %>, TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>, @@ -18,12 +16,8 @@ class <%= classname %> } static immutable string TokenNames[] = [ -<% @tokens.each_with_index do |(name, token), index| %> -<% if token.name %> +<% @grammar.tokens.each_with_index do |token, index| %> "<%= token.name %>", -<% else %> - null, -<% end %> <% end %> ]; diff --git a/lib/propane.rb b/lib/propane.rb index 00e7240..dc488fd 100644 --- a/lib/propane.rb +++ b/lib/propane.rb @@ -37,17 +37,16 @@ class Propane end def initialize(input) - grammar = Grammar.new(input) - @classname = grammar.classname - @modulename = grammar.modulename - @tokens = grammar.tokens - @rule_sets = grammar.rule_sets + @grammar = Grammar.new(input) + @classname = @grammar.classname + @modulename = @grammar.modulename + @rule_sets = @grammar.rule_sets end def generate(output_file, log_file) expand_rules - lexer = Lexer.new(@tokens) - parser = Parser.new(@tokens, @rule_sets) + lexer = Lexer.new(@grammar.tokens, @grammar.drop_tokens) + parser = Parser.new(@rule_sets) classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>") result = erb.result(binding.clone) @@ -59,9 +58,16 @@ class Propane private def expand_rules + tokens_by_name = {} + @grammar.tokens.each do |token| + if tokens_by_name.include?(token.name) + raise Error.new("Duplicate token name #{token.name.inspect}") + end + tokens_by_name[token.name] = token + end @rule_sets.each do |rule_name, rule_set| - if @tokens.include?(rule_name) - raise Error.new("Rule name collides with token name #{rule_name}") + if tokens_by_name.include?(rule_name) + raise Error.new("Rule name collides with token name #{rule_name.inspect}") end end unless @rule_sets["Start"] @@ -70,8 +76,8 @@ class Propane @rule_sets.each do |rule_name, rule_set| rule_set.rules.each do |rule| rule.components.map! do |component| - if @tokens[component] - @tokens[component] + if tokens_by_name[component] + tokens_by_name[component] elsif @rule_sets[component] @rule_sets[component] else diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index 79ccf61..97f8a05 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -3,12 +3,14 @@ class Propane class Grammar attr_reader :classname + attr_reader :drop_tokens attr_reader :modulename attr_reader :rule_sets attr_reader :tokens def initialize(input) - @tokens = {} + @tokens = [] + @drop_tokens = [] @rule_sets = {} input = input.gsub("\r\n", "\n") parse_grammar(input) @@ -35,14 +37,10 @@ class Propane unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ raise Error.new("Invalid token name #{name}") end - if @tokens[name] - raise Error.new("Duplicate token name #{name}") - else - @tokens[name] = Token.new(name, pattern, @tokens.size, line_number) - end + @tokens << Token.new(name, pattern, @tokens.size, line_number) elsif sliced = input.slice!(/\Adrop\s+(\S+)\n/) pattern = $1 - @tokens[@tokens.size] = Token.new(nil, pattern, @tokens.size, line_number) + @drop_tokens << Token.new(nil, pattern, nil, line_number) elsif sliced = input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m) rule_name, components, code = $1, $2, $3 components = components.strip.split(/\s+/) diff --git a/lib/propane/lexer.rb b/lib/propane/lexer.rb index f3983c8..7dc5921 100644 --- a/lib/propane/lexer.rb +++ b/lib/propane/lexer.rb @@ -5,8 +5,8 @@ class Propane # Lexer DFA. attr_accessor :dfa - def initialize(tokens) - @dfa = DFA.new(tokens) + def initialize(tokens, drop_tokens) + @dfa = DFA.new(tokens, drop_tokens) end def build_tables @@ -17,10 +17,10 @@ class Propane accepts = if state.accepts.nil? TOKEN_NONE - elsif state.accepts.name - state.accepts.id - else + elsif state.accepts.drop? TOKEN_DROP + else + state.accepts.id end state_table << { transition_table_index: transition_table.size, diff --git a/lib/propane/lexer/dfa.rb b/lib/propane/lexer/dfa.rb index d20248b..b10dee7 100644 --- a/lib/propane/lexer/dfa.rb +++ b/lib/propane/lexer/dfa.rb @@ -3,10 +3,10 @@ class Propane class DFA < FA - def initialize(tokens) + def initialize(tokens, drop_tokens) super() start_nfa = Regex::NFA.new - tokens.each do |name, token| + (tokens + drop_tokens).each do |token| start_nfa.start_state.add_transition(nil, token.nfa.start_state) end @nfa_state_sets = {} diff --git a/lib/propane/parser.rb b/lib/propane/parser.rb index e2459f8..d51b1d8 100644 --- a/lib/propane/parser.rb +++ b/lib/propane/parser.rb @@ -2,7 +2,7 @@ class Propane class Parser - def initialize(tokens, rule_sets) + def initialize(rule_sets) @token_eof = Token.new("$", nil, TOKEN_EOF, nil) @item_sets = [] @item_sets_set = {} diff --git a/lib/propane/token.rb b/lib/propane/token.rb index dced646..98130d1 100644 --- a/lib/propane/token.rb +++ b/lib/propane/token.rb @@ -48,6 +48,14 @@ class Propane @name.upcase end + # Whether the token is a drop token. + # + # @return [Boolean] + # Whether the token is a drop token. + def drop? + @name.nil? + end + def to_s @name end diff --git a/spec/propane/lexer/dfa_spec.rb b/spec/propane/lexer/dfa_spec.rb index 7adf125..f7698ac 100644 --- a/spec/propane/lexer/dfa_spec.rb +++ b/spec/propane/lexer/dfa_spec.rb @@ -50,8 +50,8 @@ class TestLexer end def run(grammar, input) - propane = Propane.new(grammar) - token_dfa = Propane::Lexer::DFA.new(propane.instance_variable_get(:@tokens)) + grammar = Propane::Grammar.new(grammar) + token_dfa = Propane::Lexer::DFA.new(grammar.tokens, grammar.drop_tokens) test_lexer = TestLexer.new(token_dfa) test_lexer.lex(input) end