diff --git a/lib/imbecile/regex/dfa.rb b/lib/imbecile/regex/dfa.rb index b1ca0b7..28e5d33 100644 --- a/lib/imbecile/regex/dfa.rb +++ b/lib/imbecile/regex/dfa.rb @@ -3,12 +3,53 @@ module Imbecile class DFA + class State + end + def initialize(nfas) start_nfa = NFA.new nfas.each do |nfa| start_nfa.start_state.add_transition(nil, nfa.start_state) end + @states = {} + @to_process = Set.new nil_transition_states = start_nfa.start_state.nil_transition_states + @states[nil_transition_states] = 0 + process_nfa_state_set(nil_transition_states) + end + + private + + def process_nfa_state_set(nfa_state_set) + transitions = transitions_for(nfa_state_set) + while transitions.size > 0 + subrange = CodePointRange.first_subrange(transitions.map(&:code_point_range)) + dest_nfa_states = transitions.reduce(Set.new) do |result, transition| + if transition.code_point_range.include?(subrange) + result << transition.last + end + result + end + unless @states.include?(dest_nfa_states) + @to_process << dest_nfa_states + end + transitions.delete_if do |transition| + transition.code_point_range.last <= subrange.last + end + transitions.map! do |transition| + if transition.code_point_range.first <= subrange.last + Transition.new(CodePointRange.new(subrange.last + 1, transition.code_point_range.last), transition.destination) + else + transition + end + end + end + end + + def transitions_for(states) + states.reduce([]) do |result, state| + result + state.cp_transitions + end end end diff --git a/lib/imbecile/regex/nfa.rb b/lib/imbecile/regex/nfa.rb index b7a13ff..6863a6d 100644 --- a/lib/imbecile/regex/nfa.rb +++ b/lib/imbecile/regex/nfa.rb @@ -5,6 +5,21 @@ module Imbecile class State + class Transition + + attr_reader :code_point_range + attr_reader :destination + + def initialize(code_point_range, destination) + @code_point_range = code_point_range + end + + def nil? + @code_point_range.nil? + end + + end + attr_accessor :accepts attr_reader :transitions @@ -12,8 +27,8 @@ module Imbecile @transitions = [] end - def add_transition(code_point, destination_state) - @transitions << [code_point, destination_state] + def add_transition(code_point_range, destination) + @transitions << Transition.new(code_point_range, destination) end # Determine the set of states that can be reached by nil transitions. @@ -24,10 +39,10 @@ module Imbecile def nil_transition_states states = Set[self] analyze_state = lambda do |state| - state.nil_transitions.each do |range, dest_state| - unless states.include?(dest_state) - states << dest_state - analyze_state[dest_state] + state.nil_transitions.each do |transition| + unless states.include?(transition.destination) + states << transition.destination + analyze_state[transition.destination] end end end @@ -36,14 +51,14 @@ module Imbecile end def nil_transitions - @transitions.select do |code_point, dest_state| - code_point.nil? + @transitions.select do |transition| + transition.nil? end end def cp_transitions - @transitions.select do |code_point, dest_state| - code_point + @transitions.reject do |transition| + transition.nil? end end @@ -79,13 +94,13 @@ module Imbecile visit = lambda do |state| accepts_s = state.accepts ? " *" : "" rv += "#{state_id[state]}#{accepts_s}:\n" - state.transitions.each do |code_point_range, dest_state| - if code_point_range.nil? + state.transitions.each do |transition| + if transition.nil? range_s = "nil" else - range_s = chr[code_point_range.first] - if code_point_range.size > 1 - range_s += "-" + chr[code_point_range.last] + range_s = chr[transition.code_point_range.first] + if transition.code_point_range.size > 1 + range_s += "-" + chr[transition.code_point_range.last] end end accepts_s = dest_state.accepts ? " *" : ""