Determine the reduce actions for each parser state

2022-06-17 01:45:48 -04:00 · 2022-06-17 01:45:48 -04:00 · 60e2818075
commit 60e2818075
parent 5af3179ff2
5 changed files with 169 additions and 4 deletions
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -34,6 +34,7 @@ class Propane
          raise Error.new("Rule name collides with token name #{rule.name.inspect}")
        end
        rule_sets[rule.name] ||= RuleSet.new(rule.name)
+        rule.rule_set = rule_sets[rule.name]
        rule_sets[rule.name] << rule
      end
      unless rule_sets["Start"]
@ -51,6 +52,11 @@ class Propane
        end
      end
      determine_possibly_empty_rulesets!(rule_sets)
+      puts "Start token set"
+      rule_sets.each do |rule_set_name, rule_set|
+        puts "RuleSet #{rule_set_name}:"
+        puts "  " + rule_set.start_token_set.map(&:name).join(", ")
+      end
      @lexer = Lexer.new(@grammar.tokens, @grammar.drop_tokens)
      @parser = Parser.new(rule_sets["Start"])
    end
--- a/lib/propane/parser.rb
+++ b/lib/propane/parser.rb
@ -28,15 +28,31 @@ class Propane

      @item_sets.each do |item_set|
        process_item_set(item_set)
+      end
+
+      build_reduce_actions!
+
+      @item_sets.each do |item_set|
        puts "Item set #{item_set.id}:"
        ids = item_set.in_sets.map(&:id)
-        if ids.size > 0
-          puts "    (in from #{ids.join(", ")})"
+        puts "  In sets: #{ids.join(", ")}"
+        puts "  Out sets:"
+        item_set.out_sets.each do |symbol, out_set|
+          puts "    #{symbol.name} => #{out_set.id}"
        end
        puts item_set
        item_set.following_item_set.each do |following_symbol, following_item_set|
          puts " #{following_symbol.name} => #{following_item_set.id}"
        end
+        puts "  Reduce actions:"
+        case item_set.reduce_actions
+        when Rule
+          puts "    * => #{item_set.reduce_actions.id} (#{item_set.reduce_actions.name})"
+        when Hash
+          item_set.reduce_actions.each do |token, rule|
+            puts "    #{token.name} => #{rule.id} (#{rule.name})"
+          end
+        end
        puts
      end
    end
@ -70,10 +86,124 @@ class Propane
          following_set = @item_sets_set[item_set.build_following_item_set(following_symbol)]
          item_set.following_item_set[following_symbol] = following_set
          following_set.in_sets << item_set
+          item_set.out_sets[following_symbol] = following_set
        end
      end
    end

+    # Build the reduce actions for each ItemSet.
+    #
+    # @return [void]
+    def build_reduce_actions!
+      @item_sets.each do |item_set|
+        item_set.reduce_actions = build_reduce_actions_for_item_set(item_set)
+      end
+    end
+
+    # Build the reduce actions for a single item set (parser state).
+    #
+    # @param item_set [ItemSet]
+    #   ItemSet (parser state)
+    #
+    # @return [nil, Rule, Hash]
+    #   If no reduce actions are possible for the given item set, nil.
+    #   If only one reduce action is possible for the given item set, the Rule
+    #   to reduce.
+    #   Otherwise, a mapping of lookahead Tokens to the Rules to reduce.
+    def build_reduce_actions_for_item_set(item_set)
+      # To build the reduce actions, we start by looking at any
+      # "complete" items, i.e., items where the parse position is at the
+      # end of a rule. These are the only rules that are candidates for
+      # reduction in the current ItemSet.
+      reduce_rules = Set.new(item_set.items.select(&:complete?).map(&:rule))
+
+      # If there are no rules to reduce for this ItemSet, we're done here.
+      return nil if reduce_rules.size == 0
+
+      # If there is exactly one rule to reduce for this ItemSet, then do not
+      # figure out the lookaheads; just reduce it.
+      return reduce_rules.first if reduce_rules.size == 1
+
+      # Otherwise, we have more than one possible rule to reduce.
+
+      # We will be looking for all possible tokens that can follow instances of
+      # these rules. Rather than looking through the entire grammar for the
+      # possible following tokens, we will only look in the item sets leading
+      # up to this one. This restriction gives us a more precise lookahead set,
+      # and allows us to parse LALR grammars.
+      item_sets = item_set.leading_item_sets
+      reduce_rules.reduce({}) do |reduce_actions, reduce_rule|
+        lookahead_tokens_for_rule = build_lookahead_tokens_to_reduce(reduce_rule, item_sets)
+        lookahead_tokens_for_rule.each do |lookahead_token|
+          if existing_reduce_rule = reduce_actions[lookahead_token]
+            raise Error.new("Error: reduce/reduce conflict between rule #{existing_reduce_rule.id} (#{existing_reduce_rule.name}) and rule #{reduce_rule.id} (#{reduce_rule.name})")
+          end
+          reduce_actions[lookahead_token] = reduce_rule
+        end
+        reduce_actions
+      end
+    end
+
+    # Build the set of lookahead Tokens that should cause the given Rule to be
+    # reduced in the given context of ItemSets.
+    #
+    # @param rule [Rule]
+    #   Rule to reduce.
+    # @param item_sets [Set<ItemSet>]
+    #   ItemSets to consider for the context in which to reduce this Rule.
+    #
+    # @return [Set<Token>]
+    #   Possible lookahead Tokens for the given Rule within the context of the
+    #   given ItemSets.
+    def build_lookahead_tokens_to_reduce(rule, item_sets)
+      # We need to look for possible following tokens for this reduce rule. We
+      # do this by looking for tokens that follow the reduce rule, or the
+      # start token set for any other rule that follows the reduce rule.
+      # While doing this, the following situations could arise:
+      # 1. We may come across a following rule that could be empty. In this
+      #    case, in addition to the start token set for that rule, we must also
+      #    continue to the next following symbol after the potentially empty
+      #    rule and continue the search for potential following tokens.
+      # 2. We may reach the end of a rule that was not one of the original
+      #    reduce rules. In this case, we must also search for all potential
+      #    following tokens for this rule as well.
+      lookahead_tokens = Set.new
+      rule_sets_to_check_after = [rule.rule_set]
+      checked_rule_sets = Set.new
+      while !rule_sets_to_check_after.empty?
+        rule_set = rule_sets_to_check_after.slice!(0)
+        checked_rule_sets << rule_set
+        # For each RuleSet we're checking, we're going to look through all
+        # items in the item sets of interest and gather all possible following
+        # tokens to form the lookahead token set.
+        item_sets.each do |item_set|
+          item_set.items.each do |item|
+            if item.following_symbol == rule_set
+              (1..).each do |offset|
+                case symbol = item.following_symbol(offset)
+                when nil
+                  rule_set = item.rule.rule_set
+                  unless checked_rule_sets.include?(rule_set)
+                    rule_sets_to_check_after << rule_set
+                  end
+                  break
+                when Token
+                  lookahead_tokens << symbol
+                  break
+                when RuleSet
+                  lookahead_tokens += symbol.start_token_set
+                  unless symbol.could_be_empty?
+                    break
+                  end
+                end
+              end
+            end
+          end
+        end
+      end
+      lookahead_tokens
+    end
+
  end

 end
--- a/lib/propane/parser/item.rb
+++ b/lib/propane/parser/item.rb
@ -86,10 +86,26 @@ class Propane
      # That is, the symbol which follows the parse position marker in the
      # current Item.
      #
+      # @param offset [Integer]
+      #   Offset from current parse position to examine.
+      #
      # @return [Token, RuleSet, nil]
      #   Following symbol for the Item.
-      def following_symbol
-        @rule.components[@position]
+      def following_symbol(offset = 0)
+        @rule.components[@position + offset]
+      end
+
+      # Get the previous symbol for the Item.
+      #
+      # That is, the symbol which precedes the parse position marker in the
+      # current Item.
+      #
+      # @return [Token, RuleSet, nil]
+      #   Previous symbol for this Item.
+      def previous_symbol
+        if @position > 0
+          @rule.components[@position - 1]
+        end
      end

      # Get whether this Item is followed by the provided symbol.
--- a/lib/propane/parser/item_set.rb
+++ b/lib/propane/parser/item_set.rb
@ -21,6 +21,14 @@ class Propane
      #   ItemSets leading to this item set.
      attr_reader :in_sets

+      # @return [Hash]
+      #   ItemSets reached from this item set. Key is a Token or Rule.
+      attr_reader :out_sets
+
+      # @return [nil, Rule, Hash]
+      #   Reduce actions, mapping lookahead tokens to rules.
+      attr_accessor :reduce_actions
+
      # Build an ItemSet.
      #
      # @param items [Array<Item>]
@ -29,6 +37,7 @@ class Propane
        @items = Set.new(items)
        @following_item_set = {}
        @in_sets = Set.new
+        @out_sets = {}
        close!
      end

--- a/lib/propane/rule.rb
+++ b/lib/propane/rule.rb
@ -22,6 +22,10 @@ class Propane
    #   Rule name.
    attr_reader :name

+    # @return [RuleSet]
+    #   The RuleSet that this Rule is a part of.
+    attr_accessor :rule_set
+
    # Construct a Rule.
    #
    # @param name [String]