From f46b5b3f4d82cba21107db2c73108d0c8e1c5842 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sun, 2 Oct 2022 10:07:44 -0400 Subject: [PATCH] Remove TOKEN_EOF; define EOF token and start rule in Generator --- assets/parser.d.erb | 17 ++++------------- lib/propane.rb | 3 --- lib/propane/generator.rb | 28 ++++++++++++++++++---------- lib/propane/grammar.rb | 7 +++---- lib/propane/parser.rb | 14 ++++++-------- lib/propane/rule.rb | 7 ++----- lib/propane/token.rb | 31 ++++++++++++++++++++++++------- spec/propane/grammar_spec.rb | 8 -------- spec/test_d_lexer.d | 4 ++-- 9 files changed, 59 insertions(+), 60 deletions(-) diff --git a/assets/parser.d.erb b/assets/parser.d.erb index ca6821c..c1182f4 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -10,10 +10,9 @@ class <%= @classname %> enum { <% @grammar.tokens.each_with_index do |token, index| %> - TOKEN_<%= token.name %> = <%= index %>, + TOKEN_<%= token.code_name %> = <%= index %>, <% end %> _TOKEN_COUNT = <%= @grammar.tokens.size %>, - _TOKEN_EOF = <%= TOKEN_EOF %>, _TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>, _TOKEN_DROP = <%= TOKEN_DROP %>, _TOKEN_NONE = <%= TOKEN_NONE %>, @@ -247,7 +246,7 @@ class <%= @classname %> } else if (attempt_match_info.length == 0u) { - lt.token = _TOKEN_EOF; + lt.token = TOKEN_0EOF; break; } if (!lex_continue) @@ -378,7 +377,7 @@ class <%= @classname %> } if (shift_state != 0xFFFFFFFFu) { - if (token == _TOKEN_EOF) + if (token == TOKEN_0EOF) { /* Successful parse. */ return true; @@ -405,11 +404,7 @@ class <%= @classname %> /* Error, unexpected token. */ write("Unexpected token "); - if (token == _TOKEN_EOF) - { - writeln("{EOF}"); - } - else if (token < _TOKEN_COUNT) + if (token < _TOKEN_COUNT) { writeln(token_names[token]); } @@ -457,10 +452,6 @@ class <%= @classname %> // { // writeln(token_names[token]); // } -// else if (token == _TOKEN_EOF) -// { -// writeln("{EOF}"); -// } // else // { // writeln("{other}"); diff --git a/lib/propane.rb b/lib/propane.rb index 2de0f54..b381fa8 100644 --- a/lib/propane.rb +++ b/lib/propane.rb @@ -25,9 +25,6 @@ require_relative "propane/version" class Propane - # EOF. - TOKEN_EOF = 0xFFFFFFFC - # Decoding error. TOKEN_DECODE_ERROR = 0xFFFFFFFD diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index b89e667..0b14f28 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -26,33 +26,41 @@ class Propane private def process_grammar! + # Add EOF token. + @grammar.tokens << Token.new("$EOF", nil) tokens_by_name = {} - @grammar.tokens.each do |token| + @grammar.tokens.each_with_index do |token, token_id| + # Assign token ID. + token.id = token_id # Check for token name conflicts. if tokens_by_name.include?(token.name) raise Error.new("Duplicate token name #{token.name.inspect}") end tokens_by_name[token.name] = token end + # Check for user start rule. + unless @grammar.rules.find {|rule| rule.name == "Start"} + raise Error.new("Start rule not found") + end + # Add "real" start rule. + @grammar.rules.unshift(Rule.new("$Start", ["Start", "$EOF"], nil, nil)) rule_sets = {} - @grammar.rules.each do |rule| + rule_set_id = @grammar.tokens.size + @grammar.rules.each_with_index do |rule, rule_id| + # Assign rule ID. + rule.id = rule_id # Check for token/rule name conflict. if tokens_by_name.include?(rule.name) raise Error.new("Rule name collides with token name #{rule.name.inspect}") end # Build rule sets of all rules with the same name. - @_rule_set_id ||= @grammar.tokens.size unless rule_sets[rule.name] - rule_sets[rule.name] = RuleSet.new(rule.name, @_rule_set_id) - @_rule_set_id += 1 + rule_sets[rule.name] = RuleSet.new(rule.name, rule_set_id) + rule_set_id += 1 end rule.rule_set = rule_sets[rule.name] rule_sets[rule.name] << rule end - # Check for start rule. - unless rule_sets["Start"] - raise Error.new("Start rule not found") - end # Generate lexer user code IDs for lexer patterns with user code blocks. @grammar.patterns.select do |pattern| pattern.code @@ -75,7 +83,7 @@ class Propane # Generate the lexer. @lexer = Lexer.new(@grammar.patterns) # Generate the parser. - @parser = Parser.new(@grammar, rule_sets, rule_sets["Start"], @log) + @parser = Parser.new(@grammar, rule_sets, @log) end # Determine which grammar rules could expand to empty sequences. diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index 06e5d2b..eb24b35 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -80,7 +80,7 @@ class Propane unless code = parse_code_block! consume!(/;/, "expected pattern or `;' or code block") end - token = Token.new(name: name, id: @tokens.size, line_number: @line_number) + token = Token.new(name, @line_number) @tokens << token pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code) @patterns << pattern @@ -93,7 +93,7 @@ class Propane unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ raise Error.new("Invalid token name #{name.inspect}") end - token = Token.new(name: name, id: @tokens.size, line_number: @line_number) + token = Token.new(name, @line_number) @tokens << token end end @@ -117,8 +117,7 @@ class Propane raise Error.new("Invalid rule name #{name.inspect}") end components = components.strip.split(/\s+/) - # Reserve rule ID 0 for the "real" start rule. - @rules << Rule.new(rule_name, components, code, @line_number, @rules.size + 1) + @rules << Rule.new(rule_name, components, code, @line_number) end end diff --git a/lib/propane/parser.rb b/lib/propane/parser.rb index 79a8851..5bd1e20 100644 --- a/lib/propane/parser.rb +++ b/lib/propane/parser.rb @@ -2,15 +2,13 @@ class Propane class Parser - def initialize(grammar, rule_sets, start_rule_set, log) + def initialize(grammar, rule_sets, log) @grammar = grammar @rule_sets = rule_sets @log = log - @eof_token = Token.new(name: "$", id: TOKEN_EOF) - @start_rule = Rule.new("$$", [start_rule_set, @eof_token], nil, nil, 0) @item_sets = [] @item_sets_set = {} - start_item = Item.new(@start_rule, 0) + start_item = Item.new(grammar.rules.first, 0) eval_item_sets = Set[ItemSet.new([start_item])] while eval_item_sets.size > 0 @@ -21,7 +19,7 @@ class Propane @item_sets << item_set @item_sets_set[item_set] = item_set item_set.following_symbols.each do |following_symbol| - unless following_symbol == @eof_token + unless following_symbol.name == "$EOF" following_set = item_set.build_following_item_set(following_symbol) eval_item_sets << following_set end @@ -44,7 +42,7 @@ class Propane @item_sets.each do |item_set| shift_entries = item_set.following_symbols.map do |following_symbol| state_id = - if following_symbol == @eof_token + if following_symbol.name == "$EOF" 0 else item_set.following_item_set[following_symbol].id @@ -83,7 +81,7 @@ class Propane def process_item_set(item_set) item_set.following_symbols.each do |following_symbol| - unless following_symbol == @eof_token + unless following_symbol.name == "$EOF" following_set = @item_sets_set[item_set.build_following_item_set(following_symbol)] item_set.following_item_set[following_symbol] = following_set following_set.in_sets << item_set @@ -206,7 +204,7 @@ class Propane def write_log! @log.puts Util.banner("Parser Rules") - ([@start_rule] + @grammar.rules).each do |rule| + @grammar.rules.each do |rule| @log.puts @log.puts "Rule #{rule.id}:" @log.puts " #{rule}" diff --git a/lib/propane/rule.rb b/lib/propane/rule.rb index f3e7d95..62c6758 100644 --- a/lib/propane/rule.rb +++ b/lib/propane/rule.rb @@ -12,7 +12,7 @@ class Propane # @return [Integer] # Rule ID. - attr_reader :id + attr_accessor :id # @return [Integer] # Line number where the rule was defined in the input grammar. @@ -36,13 +36,10 @@ class Propane # User code associated with the rule. # @param line_number [Integer] # Line number where the rule was defined in the input grammar. - # @param id [Integer] - # Rule ID. - def initialize(name, components, code, line_number, id) + def initialize(name, components, code, line_number) @name = name @components = components @code = code - @id = id @line_number = line_number end diff --git a/lib/propane/token.rb b/lib/propane/token.rb index 8cd7920..6c21554 100644 --- a/lib/propane/token.rb +++ b/lib/propane/token.rb @@ -2,13 +2,25 @@ class Propane class Token + class << self + + # Name of the token to use in code (special characters replaced). + # + # @return [String] + # Name of the token to use in code (special characters replaced). + def code_name(name) + name.sub(/^\$/, "0") + end + + end + # @return [String, nil] # Token name. attr_reader :name # @return [Integer, nil] # Token ID. - attr_reader :id + attr_accessor :id # @return [Integer, nil] # Line number where the token was defined in the input grammar. @@ -20,14 +32,19 @@ class Propane # Optional parameters. # @option options [String, nil] :name # Token name. - # @option options [Integer, nil] :id - # Token ID. # @option options [Integer, nil] :line_number # Line number where the token was defined in the input grammar. - def initialize(options) - @name = options[:name] - @id = options[:id] - @line_number = options[:line_number] + def initialize(name, line_number) + @name = name + @line_number = line_number + end + + # Name of the token to use in code (special characters replaced). + # + # @return [String] + # Name of the token to use in code (special characters replaced). + def code_name + self.class.code_name(@name) end def to_s diff --git a/spec/propane/grammar_spec.rb b/spec/propane/grammar_spec.rb index 8f159ce..9b8a50c 100644 --- a/spec/propane/grammar_spec.rb +++ b/spec/propane/grammar_spec.rb @@ -35,7 +35,6 @@ EOF o = grammar.tokens.find {|token| token.name == "while"} expect(o).to_not be_nil expect(o.line_number).to eq 6 - expect(o.id).to eq 0 o = grammar.patterns.find {|pattern| pattern.token == o} expect(o).to_not be_nil @@ -46,7 +45,6 @@ EOF o = grammar.tokens.find {|token| token.name == "id"} expect(o).to_not be_nil expect(o.line_number).to eq 9 - expect(o.id).to eq 1 o = grammar.patterns.find {|pattern| pattern.token == o} expect(o).to_not be_nil @@ -57,7 +55,6 @@ EOF o = grammar.tokens.find {|token| token.name == "token_with_code"} expect(o).to_not be_nil expect(o.line_number).to eq 11 - expect(o.id).to eq 2 o = grammar.patterns.find {|pattern| pattern.token == o} expect(o).to_not be_nil @@ -83,21 +80,18 @@ EOF o = grammar.rules[0] expect(o.name).to eq "A" expect(o.components).to eq %w[B] - expect(o.id).to eq 1 expect(o.line_number).to eq 19 expect(o.code).to eq " a = 42;\n" o = grammar.rules[1] expect(o.name).to eq "B" expect(o.components).to eq %w[C while id] - expect(o.id).to eq 2 expect(o.line_number).to eq 22 expect(o.code).to be_nil o = grammar.rules[2] expect(o.name).to eq "B" expect(o.components).to eq [] - expect(o.id).to eq 3 expect(o.line_number).to eq 23 expect(o.code).to eq " b = 0;\n" end @@ -119,7 +113,6 @@ EOF o = grammar.tokens.find {|token| token.name == "code1"} expect(o).to_not be_nil - expect(o.id).to eq 0 expect(o.line_number).to eq 1 o = grammar.patterns.find {|pattern| pattern.token == o} @@ -128,7 +121,6 @@ EOF o = grammar.tokens.find {|token| token.name == "code2"} expect(o).to_not be_nil - expect(o.id).to eq 1 expect(o.line_number).to eq 6 o = grammar.patterns.find {|pattern| pattern.token == o} diff --git a/spec/test_d_lexer.d b/spec/test_d_lexer.d index 6f08a29..5af56d8 100644 --- a/spec/test_d_lexer.d +++ b/spec/test_d_lexer.d @@ -77,8 +77,8 @@ unittest assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_int)); assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_plus)); assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_int)); - assert(lexer.lex_token() == LT(1, 9, 0, Testparser._TOKEN_EOF)); + assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_0EOF)); lexer = new Testparser.Lexer(null, 0u); - assert(lexer.lex_token() == LT(0, 0, 0, Testparser._TOKEN_EOF)); + assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_0EOF)); }