From b6e3a5c15179260b0c2eae301dc005d6196f22ea Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Tue, 25 May 2021 16:00:25 -0400 Subject: [PATCH] Record accepting token in DFA state --- lib/imbecile/grammar.rb | 4 +++- lib/imbecile/regex/dfa.rb | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/imbecile/grammar.rb b/lib/imbecile/grammar.rb index 0b92174..7c7c9cc 100644 --- a/lib/imbecile/grammar.rb +++ b/lib/imbecile/grammar.rb @@ -39,9 +39,11 @@ module Imbecile end # Build NFA from each token expression. + i = 0 @tokens.each do |token_name, token_def| token_def[:regex] = Regex.new(token_def[:pattern]) - token_def[:regex].nfa.end_state.accepts = token_name + token_def[:regex].nfa.end_state.accepts = "#{i}:#{token_name}" + i += 1 end dfa = Regex::DFA.new(@tokens.map {|token_name, token_def| token_def[:regex].nfa}) puts dfa diff --git a/lib/imbecile/regex/dfa.rb b/lib/imbecile/regex/dfa.rb index 2dc006d..65a93cf 100644 --- a/lib/imbecile/regex/dfa.rb +++ b/lib/imbecile/regex/dfa.rb @@ -60,13 +60,15 @@ module Imbecile end rv = "" @states.each_with_index do |state, state_id| - rv += "#{state_id}:\n" + accepts_s = state.accepts ? " (#{state.accepts})" : "" + rv += "#{state_id}#{accepts_s}:\n" state.transitions.each do |transition| range_s = chr[transition.code_point_range.first] if transition.code_point_range.size > 1 range_s += "-" + chr[transition.code_point_range.last] end - rv += " #{range_s} => #{transition.destination.id}\n" + accepts_s = transition.destination.accepts ? " (#{transition.destination.accepts})" : "" + rv += " #{range_s} => #{transition.destination.id}#{accepts_s}\n" end end rv @@ -85,6 +87,17 @@ module Imbecile def process_nfa_state_set(nfa_state_set) state = @states[@nfa_state_sets[nfa_state_set]] + nfa_state_set.each do |nfa_state| + if nfa_state.accepts + if state.accepts + if nfa_state.accepts.to_i < state.accepts.to_i + state.accepts = nfa_state.accepts + end + else + state.accepts = nfa_state.accepts + end + end + end transitions = transitions_for(nfa_state_set) while transitions.size > 0 subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range))