From 7f54778ba8aaaf4feec459050eed198c5c8fd20f Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sun, 6 Jun 2021 15:18:21 -0400 Subject: [PATCH] Rename Regex::DFA to TokenDFA --- lib/imbecile.rb | 8 +++- lib/imbecile/grammar.rb | 8 ---- lib/imbecile/regex/dfa.rb | 86 --------------------------------------- lib/imbecile/token_dfa.rb | 84 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 95 deletions(-) delete mode 100644 lib/imbecile/regex/dfa.rb create mode 100644 lib/imbecile/token_dfa.rb diff --git a/lib/imbecile.rb b/lib/imbecile.rb index 6ec739f..1d9c977 100644 --- a/lib/imbecile.rb +++ b/lib/imbecile.rb @@ -8,9 +8,9 @@ require_relative "imbecile/regex" require_relative "imbecile/regex/fa" require_relative "imbecile/regex/fa/state" require_relative "imbecile/regex/fa/state/transition" -require_relative "imbecile/regex/dfa" require_relative "imbecile/regex/nfa" require_relative "imbecile/regex/unit" +require_relative "imbecile/token_dfa" require_relative "imbecile/version" module Imbecile @@ -23,6 +23,12 @@ module Imbecile def run(input_file, output_file) begin grammar = Grammar.new(File.read(input_file)) + # Build NFA from each token expression. + grammar.tokens.each do |token| + puts token.nfa + end + token_dfa = TokenDFA.new(grammar.tokens) + puts token_dfa rescue Error => e $stderr.puts e.message return 2 diff --git a/lib/imbecile/grammar.rb b/lib/imbecile/grammar.rb index 68cbe2b..80b3dfe 100644 --- a/lib/imbecile/grammar.rb +++ b/lib/imbecile/grammar.rb @@ -44,14 +44,6 @@ module Imbecile raise Error.new("Unexpected input on line #{line_number}: #{line}") end end - - # Build NFA from each token expression. - nfas = @tokens.map do |token| - puts token.nfa - token.nfa - end - dfa = Regex::DFA.new(nfas) - puts dfa end end diff --git a/lib/imbecile/regex/dfa.rb b/lib/imbecile/regex/dfa.rb deleted file mode 100644 index 707079d..0000000 --- a/lib/imbecile/regex/dfa.rb +++ /dev/null @@ -1,86 +0,0 @@ -module Imbecile - class Regex - - class DFA < FA - - def initialize(nfas) - super() - start_nfa = NFA.new - nfas.each do |nfa| - start_nfa.start_state.add_transition(nil, nfa.start_state) - end - @nfa_state_sets = {} - @states = [] - @to_process = Set.new - nil_transition_states = start_nfa.start_state.nil_transition_states - register_nfa_state_set(nil_transition_states) - while @to_process.size > 0 - state_set = @to_process.first - @to_process.delete(state_set) - process_nfa_state_set(state_set) - end - @start_state = @states[0] - end - - private - - def register_nfa_state_set(nfa_state_set) - unless @nfa_state_sets.include?(nfa_state_set) - state_id = @states.size - @nfa_state_sets[nfa_state_set] = state_id - @states << State.new - @to_process << nfa_state_set - end - end - - def process_nfa_state_set(nfa_state_set) - state = @states[@nfa_state_sets[nfa_state_set]] - nfa_state_set.each do |nfa_state| - if nfa_state.accepts - if state.accepts - if nfa_state.accepts.id < state.accepts.id - state.accepts = nfa_state.accepts - end - else - state.accepts = nfa_state.accepts - end - end - end - transitions = transitions_for(nfa_state_set) - while transitions.size > 0 - subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range)) - dest_nfa_states = transitions.reduce(Set.new) do |result, transition| - if transition.code_point_range.include?(subrange) - result << transition.destination - end - result - end - dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state| - result + dest_nfa_state.nil_transition_states - end - register_nfa_state_set(dest_nfa_states) - dest_state = @states[@nfa_state_sets[dest_nfa_states]] - state.add_transition(subrange, dest_state) - transitions.delete_if do |transition| - transition.code_point_range.last <= subrange.last - end - transitions.map! do |transition| - if transition.code_point_range.first <= subrange.last - NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination) - else - transition - end - end - end - end - - def transitions_for(nfa_state_set) - nfa_state_set.reduce([]) do |result, state| - result + state.cp_transitions - end - end - - end - - end -end diff --git a/lib/imbecile/token_dfa.rb b/lib/imbecile/token_dfa.rb new file mode 100644 index 0000000..46de03b --- /dev/null +++ b/lib/imbecile/token_dfa.rb @@ -0,0 +1,84 @@ +module Imbecile + + class TokenDFA < Regex::FA + + def initialize(tokens) + super() + start_nfa = Regex::NFA.new + tokens.each do |token| + start_nfa.start_state.add_transition(nil, token.nfa.start_state) + end + @nfa_state_sets = {} + @states = [] + @to_process = Set.new + nil_transition_states = start_nfa.start_state.nil_transition_states + register_nfa_state_set(nil_transition_states) + while @to_process.size > 0 + state_set = @to_process.first + @to_process.delete(state_set) + process_nfa_state_set(state_set) + end + @start_state = @states[0] + end + + private + + def register_nfa_state_set(nfa_state_set) + unless @nfa_state_sets.include?(nfa_state_set) + state_id = @states.size + @nfa_state_sets[nfa_state_set] = state_id + @states << State.new + @to_process << nfa_state_set + end + end + + def process_nfa_state_set(nfa_state_set) + state = @states[@nfa_state_sets[nfa_state_set]] + nfa_state_set.each do |nfa_state| + if nfa_state.accepts + if state.accepts + if nfa_state.accepts.id < state.accepts.id + state.accepts = nfa_state.accepts + end + else + state.accepts = nfa_state.accepts + end + end + end + transitions = transitions_for(nfa_state_set) + while transitions.size > 0 + subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range)) + dest_nfa_states = transitions.reduce(Set.new) do |result, transition| + if transition.code_point_range.include?(subrange) + result << transition.destination + end + result + end + dest_nfa_states = dest_nfa_states.reduce(Set.new) do |result, dest_nfa_state| + result + dest_nfa_state.nil_transition_states + end + register_nfa_state_set(dest_nfa_states) + dest_state = @states[@nfa_state_sets[dest_nfa_states]] + state.add_transition(subrange, dest_state) + transitions.delete_if do |transition| + transition.code_point_range.last <= subrange.last + end + transitions.map! do |transition| + if transition.code_point_range.first <= subrange.last + Regex::NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination) + else + transition + end + end + end + end + + def transitions_for(nfa_state_set) + nfa_state_set.reduce([]) do |result, state| + result + state.cp_transitions + end + end + + end + +end