diff --git a/lib/imbecile/regex/dfa.rb b/lib/imbecile/regex/dfa.rb index 28e5d33..137f025 100644 --- a/lib/imbecile/regex/dfa.rb +++ b/lib/imbecile/regex/dfa.rb @@ -4,6 +4,29 @@ module Imbecile class DFA class State + + class Transition + + attr_reader :code_point_range + attr_reader :destination + + def initialize(code_point_range, destination) + @code_point_range = code_point_range + end + + end + + attr_accessor :accepts + attr_reader :transitions + + def initialize + @transitions = [] + end + + def add_transition(code_point_range, destination) + @transitions << Transition.new(code_point_range, destination) + end + end def initialize(nfas) @@ -11,34 +34,49 @@ module Imbecile nfas.each do |nfa| start_nfa.start_state.add_transition(nil, nfa.start_state) end - @states = {} + @nfa_state_sets = {} + @states = [] @to_process = Set.new nil_transition_states = start_nfa.start_state.nil_transition_states - @states[nil_transition_states] = 0 - process_nfa_state_set(nil_transition_states) + register_nfa_state_set(nil_transition_states) + while @to_process.size > 0 + state_set = @to_process.first + @to_process.delete(state_set) + process_nfa_state_set(state_set) + end end private + def register_nfa_state_set(nfa_state_set) + unless @nfa_state_sets.include?(nfa_state_set) + state_id = @states.size + @nfa_state_sets[nfa_state_set] = state_id + @states << State.new + @to_process << nfa_state_set + end + end + def process_nfa_state_set(nfa_state_set) + state = @states[@nfa_state_sets[nfa_state_set]] transitions = transitions_for(nfa_state_set) while transitions.size > 0 subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range)) dest_nfa_states = transitions.reduce(Set.new) do |result, transition| if transition.code_point_range.include?(subrange) - result << transition.last + result << transition.destination end result end - unless @states.include?(dest_nfa_states) - @to_process << dest_nfa_states - end + register_nfa_state_set(dest_nfa_states) + dest_state = @states[@nfa_state_sets[dest_nfa_states]] + state.add_transition(subrange, dest_state) transitions.delete_if do |transition| transition.code_point_range.last <= subrange.last end transitions.map! do |transition| if transition.code_point_range.first <= subrange.last - Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination) + NFA::State::Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination) else transition end @@ -46,8 +84,8 @@ module Imbecile end end - def transitions_for(states) - states.reduce([]) do |result, state| + def transitions_for(nfa_state_set) + nfa_state_set.reduce([]) do |result, state| result + state.cp_transitions end end diff --git a/lib/imbecile/regex/nfa.rb b/lib/imbecile/regex/nfa.rb index 6863a6d..cc2dca5 100644 --- a/lib/imbecile/regex/nfa.rb +++ b/lib/imbecile/regex/nfa.rb @@ -12,6 +12,7 @@ module Imbecile def initialize(code_point_range, destination) @code_point_range = code_point_range + @destination = destination end def nil?