From 60e281807535194a23ed8a4110134d1366f1c691 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Fri, 17 Jun 2022 01:45:48 -0400 Subject: [PATCH] Determine the reduce actions for each parser state --- lib/propane/generator.rb | 6 ++ lib/propane/parser.rb | 134 ++++++++++++++++++++++++++++++++- lib/propane/parser/item.rb | 20 ++++- lib/propane/parser/item_set.rb | 9 +++ lib/propane/rule.rb | 4 + 5 files changed, 169 insertions(+), 4 deletions(-) diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index 2950b2a..06b9450 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -34,6 +34,7 @@ class Propane raise Error.new("Rule name collides with token name #{rule.name.inspect}") end rule_sets[rule.name] ||= RuleSet.new(rule.name) + rule.rule_set = rule_sets[rule.name] rule_sets[rule.name] << rule end unless rule_sets["Start"] @@ -51,6 +52,11 @@ class Propane end end determine_possibly_empty_rulesets!(rule_sets) + puts "Start token set" + rule_sets.each do |rule_set_name, rule_set| + puts "RuleSet #{rule_set_name}:" + puts " " + rule_set.start_token_set.map(&:name).join(", ") + end @lexer = Lexer.new(@grammar.tokens, @grammar.drop_tokens) @parser = Parser.new(rule_sets["Start"]) end diff --git a/lib/propane/parser.rb b/lib/propane/parser.rb index 9fd4762..bec27c7 100644 --- a/lib/propane/parser.rb +++ b/lib/propane/parser.rb @@ -28,15 +28,31 @@ class Propane @item_sets.each do |item_set| process_item_set(item_set) + end + + build_reduce_actions! + + @item_sets.each do |item_set| puts "Item set #{item_set.id}:" ids = item_set.in_sets.map(&:id) - if ids.size > 0 - puts " (in from #{ids.join(", ")})" + puts " In sets: #{ids.join(", ")}" + puts " Out sets:" + item_set.out_sets.each do |symbol, out_set| + puts " #{symbol.name} => #{out_set.id}" end puts item_set item_set.following_item_set.each do |following_symbol, following_item_set| puts " #{following_symbol.name} => #{following_item_set.id}" end + puts " Reduce actions:" + case item_set.reduce_actions + when Rule + puts " * => #{item_set.reduce_actions.id} (#{item_set.reduce_actions.name})" + when Hash + item_set.reduce_actions.each do |token, rule| + puts " #{token.name} => #{rule.id} (#{rule.name})" + end + end puts end end @@ -70,10 +86,124 @@ class Propane following_set = @item_sets_set[item_set.build_following_item_set(following_symbol)] item_set.following_item_set[following_symbol] = following_set following_set.in_sets << item_set + item_set.out_sets[following_symbol] = following_set end end end + # Build the reduce actions for each ItemSet. + # + # @return [void] + def build_reduce_actions! + @item_sets.each do |item_set| + item_set.reduce_actions = build_reduce_actions_for_item_set(item_set) + end + end + + # Build the reduce actions for a single item set (parser state). + # + # @param item_set [ItemSet] + # ItemSet (parser state) + # + # @return [nil, Rule, Hash] + # If no reduce actions are possible for the given item set, nil. + # If only one reduce action is possible for the given item set, the Rule + # to reduce. + # Otherwise, a mapping of lookahead Tokens to the Rules to reduce. + def build_reduce_actions_for_item_set(item_set) + # To build the reduce actions, we start by looking at any + # "complete" items, i.e., items where the parse position is at the + # end of a rule. These are the only rules that are candidates for + # reduction in the current ItemSet. + reduce_rules = Set.new(item_set.items.select(&:complete?).map(&:rule)) + + # If there are no rules to reduce for this ItemSet, we're done here. + return nil if reduce_rules.size == 0 + + # If there is exactly one rule to reduce for this ItemSet, then do not + # figure out the lookaheads; just reduce it. + return reduce_rules.first if reduce_rules.size == 1 + + # Otherwise, we have more than one possible rule to reduce. + + # We will be looking for all possible tokens that can follow instances of + # these rules. Rather than looking through the entire grammar for the + # possible following tokens, we will only look in the item sets leading + # up to this one. This restriction gives us a more precise lookahead set, + # and allows us to parse LALR grammars. + item_sets = item_set.leading_item_sets + reduce_rules.reduce({}) do |reduce_actions, reduce_rule| + lookahead_tokens_for_rule = build_lookahead_tokens_to_reduce(reduce_rule, item_sets) + lookahead_tokens_for_rule.each do |lookahead_token| + if existing_reduce_rule = reduce_actions[lookahead_token] + raise Error.new("Error: reduce/reduce conflict between rule #{existing_reduce_rule.id} (#{existing_reduce_rule.name}) and rule #{reduce_rule.id} (#{reduce_rule.name})") + end + reduce_actions[lookahead_token] = reduce_rule + end + reduce_actions + end + end + + # Build the set of lookahead Tokens that should cause the given Rule to be + # reduced in the given context of ItemSets. + # + # @param rule [Rule] + # Rule to reduce. + # @param item_sets [Set] + # ItemSets to consider for the context in which to reduce this Rule. + # + # @return [Set] + # Possible lookahead Tokens for the given Rule within the context of the + # given ItemSets. + def build_lookahead_tokens_to_reduce(rule, item_sets) + # We need to look for possible following tokens for this reduce rule. We + # do this by looking for tokens that follow the reduce rule, or the + # start token set for any other rule that follows the reduce rule. + # While doing this, the following situations could arise: + # 1. We may come across a following rule that could be empty. In this + # case, in addition to the start token set for that rule, we must also + # continue to the next following symbol after the potentially empty + # rule and continue the search for potential following tokens. + # 2. We may reach the end of a rule that was not one of the original + # reduce rules. In this case, we must also search for all potential + # following tokens for this rule as well. + lookahead_tokens = Set.new + rule_sets_to_check_after = [rule.rule_set] + checked_rule_sets = Set.new + while !rule_sets_to_check_after.empty? + rule_set = rule_sets_to_check_after.slice!(0) + checked_rule_sets << rule_set + # For each RuleSet we're checking, we're going to look through all + # items in the item sets of interest and gather all possible following + # tokens to form the lookahead token set. + item_sets.each do |item_set| + item_set.items.each do |item| + if item.following_symbol == rule_set + (1..).each do |offset| + case symbol = item.following_symbol(offset) + when nil + rule_set = item.rule.rule_set + unless checked_rule_sets.include?(rule_set) + rule_sets_to_check_after << rule_set + end + break + when Token + lookahead_tokens << symbol + break + when RuleSet + lookahead_tokens += symbol.start_token_set + unless symbol.could_be_empty? + break + end + end + end + end + end + end + end + lookahead_tokens + end + end end diff --git a/lib/propane/parser/item.rb b/lib/propane/parser/item.rb index d694691..e49d279 100644 --- a/lib/propane/parser/item.rb +++ b/lib/propane/parser/item.rb @@ -86,10 +86,26 @@ class Propane # That is, the symbol which follows the parse position marker in the # current Item. # + # @param offset [Integer] + # Offset from current parse position to examine. + # # @return [Token, RuleSet, nil] # Following symbol for the Item. - def following_symbol - @rule.components[@position] + def following_symbol(offset = 0) + @rule.components[@position + offset] + end + + # Get the previous symbol for the Item. + # + # That is, the symbol which precedes the parse position marker in the + # current Item. + # + # @return [Token, RuleSet, nil] + # Previous symbol for this Item. + def previous_symbol + if @position > 0 + @rule.components[@position - 1] + end end # Get whether this Item is followed by the provided symbol. diff --git a/lib/propane/parser/item_set.rb b/lib/propane/parser/item_set.rb index 6c2df31..3cb4b1f 100644 --- a/lib/propane/parser/item_set.rb +++ b/lib/propane/parser/item_set.rb @@ -21,6 +21,14 @@ class Propane # ItemSets leading to this item set. attr_reader :in_sets + # @return [Hash] + # ItemSets reached from this item set. Key is a Token or Rule. + attr_reader :out_sets + + # @return [nil, Rule, Hash] + # Reduce actions, mapping lookahead tokens to rules. + attr_accessor :reduce_actions + # Build an ItemSet. # # @param items [Array] @@ -29,6 +37,7 @@ class Propane @items = Set.new(items) @following_item_set = {} @in_sets = Set.new + @out_sets = {} close! end diff --git a/lib/propane/rule.rb b/lib/propane/rule.rb index f486b7a..c2cc759 100644 --- a/lib/propane/rule.rb +++ b/lib/propane/rule.rb @@ -22,6 +22,10 @@ class Propane # Rule name. attr_reader :name + # @return [RuleSet] + # The RuleSet that this Rule is a part of. + attr_accessor :rule_set + # Construct a Rule. # # @param name [String]