propane/lib/propane/parser.rb

391 lines
14 KiB
Ruby

class Propane
class Parser
attr_reader :state_table
attr_reader :shift_table
attr_reader :reduce_table
attr_reader :rule_sets
def initialize(grammar, rule_sets, log, options)
@grammar = grammar
@rule_sets = rule_sets
@log = log
@item_sets = []
@item_sets_set = {}
@warnings = Set.new
@errors = Set.new
@options = options
start_item = Item.new(grammar.rules.first, 0)
eval_item_sets = Set[ItemSet.new([start_item])]
while eval_item_sets.size > 0
item_set = eval_item_sets.first
eval_item_sets.delete(item_set)
unless @item_sets_set.include?(item_set)
item_set.id = @item_sets.size
@item_sets << item_set
@item_sets_set[item_set] = item_set
item_set.next_symbols.each do |next_symbol|
unless next_symbol.name == "$EOF"
next_item_set = item_set.build_next_item_set(next_symbol)
eval_item_sets << next_item_set
end
end
end
end
@item_sets.each do |item_set|
process_item_set(item_set)
end
build_reduce_actions!
build_tables!
write_log!
errormessage = ""
if @errors.size > 0
errormessage += @errors.join("\n")
end
if @warnings.size > 0 && @options[:warnings_as_errors]
if errormessage != ""
errormessage += "\n"
end
errormessage += "Fatal errors (-w):\n" + @warnings.join("\n")
end
if errormessage != ""
raise Error.new(errormessage)
end
end
private
def build_tables!
@state_table = []
@shift_table = []
@reduce_table = []
@item_sets.each do |item_set|
unless item_set.reduce_rules.empty?
item_set.shift_entries.each do |shift_entry|
token = shift_entry[:symbol]
if item_set.reduce_actions
if rule = item_set.reduce_actions[token]
@warnings << "Shift/Reduce conflict (state #{item_set.id}) between token #{token.name} and rule #{rule.name} (defined on line #{rule.line_number})"
end
end
end
end
reduce_entries =
if rule = item_set.reduce_rule
[{token_id: @grammar.invalid_token_id, rule_id: rule.id, rule: rule,
rule_set_id: rule.rule_set.id, n_states: rule.components.size,
propagate_optional_target: rule.optional? && rule.components.size == 1}]
elsif reduce_actions = item_set.reduce_actions
reduce_actions.map do |token, rule|
{token: token, token_id: token.id, rule_id: rule.id, rule: rule,
rule_set_id: rule.rule_set.id, n_states: rule.components.size,
propagate_optional_target: rule.optional? && rule.components.size == 1}
end
else
[]
end
@state_table << {
shift_index: @shift_table.size,
n_shifts: item_set.shift_entries.size,
reduce_index: @reduce_table.size,
n_reduces: reduce_entries.size,
}
@shift_table += item_set.shift_entries
@reduce_table += reduce_entries
end
end
def process_item_set(item_set)
item_set.next_symbols.each do |next_symbol|
unless next_symbol.name == "$EOF"
next_item_set = @item_sets_set[item_set.build_next_item_set(next_symbol)]
item_set.next_item_set[next_symbol] = next_item_set
next_item_set.in_sets << item_set
end
end
end
# Build the reduce actions for each ItemSet.
#
# @return [void]
def build_reduce_actions!
@item_sets.each do |item_set|
build_shift_entries(item_set)
build_reduce_actions_for_item_set(item_set)
end
item_sets_to_process = @item_sets.select do |item_set|
# We need lookahead reduce actions if:
# 1) There is more than one possible rule to reduce. In this case the
# lookahead token can help choose which rule to reduce.
# 2) There is at least one shift action and one reduce action for
# this item set. In this case the lookahead reduce actions are
# needed to test for a Shift/Reduce conflict.
item_set.reduce_rules.size > 1 ||
(item_set.reduce_rules.size > 0 && item_set.shift_entries.size > 0)
end
if RbConfig::CONFIG["host_os"] =~ /linux/
item_sets_by_id = {}
item_sets_to_process.each do |item_set|
item_sets_by_id[item_set.object_id] = item_set
end
tokens_by_id = {}
@grammar.tokens.each do |token|
tokens_by_id[token.object_id] = token
end
rules_by_id = {}
@grammar.rules.each do |rule|
rules_by_id[rule.object_id] = rule
end
n_threads = Util.determine_n_threads
semaphore = Mutex.new
queue = Queue.new
threads = {}
n_threads.times do
piper, pipew = IO.pipe
thread = Thread.new do
loop do
item_set = nil
semaphore.synchronize do
item_set = item_sets_to_process.slice!(0)
end
break if item_set.nil?
fork do
piper.close
build_lookahead_reduce_actions_for_item_set(item_set, pipew)
end
end
queue.push(Thread.current)
end
threads[thread] = [piper, pipew]
end
until threads.empty?
thread = queue.pop
piper, pipew = threads[thread]
pipew.close
thread_txt = piper.read
thread_txt.each_line do |line|
if line.start_with?("RA,")
parts = line.split(",")
item_set_id, token_id, rule_id = parts[1..3].map(&:to_i)
item_set = item_sets_by_id[item_set_id]
unless item_set
raise "Internal error: could not find item set from thread"
end
token = tokens_by_id[token_id]
unless item_set
raise "Internal error: could not find token from thread"
end
rule = rules_by_id[rule_id]
unless item_set
raise "Internal error: could not find rule from thread"
end
item_set.reduce_actions ||= {}
item_set.reduce_actions[token] = rule
elsif line.start_with?("Error: ")
@errors << line.chomp
else
raise "Internal error: unhandled thread line #{line}"
end
end
thread.join
threads.delete(thread)
end
else
# Fall back to single threaded algorithm.
item_sets_to_process.each do |item_set|
item_set.reduce_actions = build_lookahead_reduce_actions_for_item_set(item_set)
end
end
end
# Build the shift entries for a single item set.
#
# @return [void]
def build_shift_entries(item_set)
item_set.shift_entries = item_set.next_symbols.map do |next_symbol|
state_id =
if next_symbol.name == "$EOF"
0
else
item_set.next_item_set[next_symbol].id
end
{
symbol: next_symbol,
state_id: state_id,
}
end
end
# Build the reduce actions for a single item set (parser state).
#
# @param item_set [ItemSet]
# ItemSet (parser state)
#
# @return [void]
def build_reduce_actions_for_item_set(item_set)
# To build the reduce actions, we start by looking at any
# "complete" items, i.e., items where the parse position is at the
# end of a rule. These are the only rules that are candidates for
# reduction in the current ItemSet.
item_set.reduce_rules = Set.new(item_set.items.select(&:complete?).map(&:rule))
if item_set.reduce_rules.size == 1
item_set.reduce_rule = item_set.reduce_rules.first
end
end
# Build the reduce actions for a single item set (parser state).
#
# @param item_set [ItemSet]
# ItemSet (parser state)
# @param fh [File]
# Output file handle for multiprocessing mode.
#
# @return [Hash]
# Mapping of lookahead Tokens to the Rules to reduce.
def build_lookahead_reduce_actions_for_item_set(item_set, fh = nil)
# We will be looking for all possible tokens that can follow instances of
# these rules. Rather than looking through the entire grammar for the
# possible following tokens, we will only look in the item sets leading
# up to this one. This restriction gives us a more precise lookahead set,
# and allows us to parse LALR grammars.
item_sets = Set[item_set] + item_set.leading_item_sets
item_set.reduce_rules.reduce({}) do |reduce_actions, reduce_rule|
lookahead_tokens_for_rule = build_lookahead_tokens_to_reduce(reduce_rule, item_sets)
lookahead_tokens_for_rule.each do |lookahead_token|
if existing_reduce_rule = reduce_actions[lookahead_token]
error = "Error: reduce/reduce conflict (state #{item_set.id}) between rule #{existing_reduce_rule.name}##{existing_reduce_rule.id} (defined on line #{existing_reduce_rule.line_number}) and rule #{reduce_rule.name}##{reduce_rule.id} (defined on line #{reduce_rule.line_number}) for lookahead token #{lookahead_token}"
@errors << error
fh.puts(error) if fh
end
reduce_actions[lookahead_token] = reduce_rule
fh.puts "RA,#{item_set.object_id},#{lookahead_token.object_id},#{reduce_rule.object_id}" if fh
end
reduce_actions
end
end
# Build the set of lookahead Tokens that should cause the given Rule to be
# reduced in the given context of ItemSets.
#
# @param rule [Rule]
# Rule to reduce.
# @param item_sets [Set<ItemSet>]
# ItemSets to consider for the context in which to reduce this Rule.
#
# @return [Set<Token>]
# Possible lookahead Tokens for the given Rule within the context of the
# given ItemSets.
def build_lookahead_tokens_to_reduce(rule, item_sets)
# We need to look for possible following tokens for this reduce rule. We
# do this by looking for tokens that follow the reduce rule, or the
# start token set for any other rule that follows the reduce rule.
# While doing this, the following situations could arise:
# 1. We may come across a following rule that could be empty. In this
# case, in addition to the start token set for that rule, we must also
# continue to the next following symbol after the potentially empty
# rule and continue the search for potential following tokens.
# 2. We may reach the end of a rule that was not one of the original
# reduce rules. In this case, we must also search for all potential
# following tokens for this rule as well.
lookahead_tokens = Set.new
rule_sets_to_check_after = [rule.rule_set]
checked_rule_sets = Set.new
while !rule_sets_to_check_after.empty?
rule_set = rule_sets_to_check_after.slice!(0)
checked_rule_sets << rule_set
# For each RuleSet we're checking, we're going to look through all
# items in the item sets of interest and gather all possible following
# tokens to form the lookahead token set.
item_sets.each do |item_set|
item_set.items.each do |item|
if item.next_symbol == rule_set
(1..).each do |offset|
case symbol = item.next_symbol(offset)
when nil
rule_set = item.rule.rule_set
unless checked_rule_sets.include?(rule_set)
rule_sets_to_check_after << rule_set
checked_rule_sets << rule_set
end
break
when Token
lookahead_tokens << symbol
break
when RuleSet
lookahead_tokens += symbol.start_token_set
unless symbol.could_be_empty?
break
end
end
end
end
end
end
end
lookahead_tokens
end
def write_log!
@log.puts Util.banner("Parser Rules")
@grammar.rules.each do |rule|
@log.puts
@log.puts "Rule #{rule.id}:"
@log.puts " #{rule}"
end
@log.puts
@log.puts Util.banner("Parser Tokens")
@log.puts
@grammar.tokens.each do |token|
@log.puts "Token #{token.id}: #{token.name}"
end
@log.puts
@log.puts Util.banner("Parser Rule Sets")
@rule_sets.each do |rule_set_name, rule_set|
@log.puts
@log.puts "Rule Set #{rule_set.id}: #{rule_set_name}"
@log.puts " Start token set: #{rule_set.start_token_set.map(&:name).join(", ")}"
end
@log.puts
@log.puts Util.banner("Parser States")
@item_sets.each do |item_set|
@log.puts
@log.puts "State #{item_set.id}:"
@log.puts item_set.to_s.gsub(/^/, " ")
incoming_ids = item_set.in_sets.map(&:id)
@log.puts
@log.puts " Incoming states: #{incoming_ids.join(", ")}"
@log.puts " Outgoing states:"
item_set.next_item_set.each do |next_symbol, next_item_set|
@log.puts " #{next_symbol.name} => #{next_item_set.id}"
end
@log.puts
@log.puts " Reduce actions:"
if item_set.reduce_rule
@log.puts " * => rule #{item_set.reduce_rule.id}, rule set #{@rule_sets[item_set.reduce_rule.name].id} (#{item_set.reduce_rule.name})"
elsif item_set.reduce_actions
item_set.reduce_actions.each do |token, rule|
@log.puts " lookahead #{token.name} => #{rule.name} (#{rule.id}), rule set ##{rule.rule_set.id}"
end
end
end
if @warnings.size > 0
@log.puts
@log.puts "Warnings:"
@warnings.each do |warning|
@log.puts " #{warning}"
end
end
end
end
end