diff --git a/lib/imbecile.rb b/lib/imbecile.rb index 5a7e113..08dba08 100644 --- a/lib/imbecile.rb +++ b/lib/imbecile.rb @@ -1,6 +1,7 @@ require_relative "imbecile/cli" require_relative "imbecile/grammar" require_relative "imbecile/regex" +require_relative "imbecile/regex/unit" require_relative "imbecile/version" require "erb" diff --git a/lib/imbecile/regex.rb b/lib/imbecile/regex.rb index e46fe88..0eede93 100644 --- a/lib/imbecile/regex.rb +++ b/lib/imbecile/regex.rb @@ -14,204 +14,7 @@ module Imbecile end def initialize(pattern) - @pattern = pattern.dup - parse - end - - private - - class Unit - end - - class SequenceUnit < Unit - attr_accessor :units - def initialize - @units = [] - end - def method_missing(*args) - @units.__send__(*args) - end - end - - class AlternatesUnit < Unit - attr_accessor :alternates - attr_accessor :negate - def initialize - @alternates = [] - @negate = false - end - def new_alternate! - @alternates << SequenceUnit.new - end - def append_alternate(unit) - @alternates << unit - end - def <<(unit) - new_alternate! if @alternates.empty? - @alternates[-1] << unit - end - def last_unit - new_alternate! if @alternates.empty? - @alternates[-1][-1] - end - def replace_last!(new_unit) - @alternates[-1][-1] = new_unit - end - end - - class CharacterUnit < Unit - attr_accessor :code_point - def initialize(c) - @code_point = c.ord - end - end - - class CharacterRangeUnit < Unit - attr_accessor :start_code_point - attr_accessor :end_code_point - def initialize(c1, c2) - @start_code_point = c1.ord - @end_code_point = c2.ord - end - end - - class MultiplicityUnit < Unit - attr_accessor :unit - attr_accessor :min_count - attr_accessor :max_count - def initialize(unit, min_count, max_count) - @unit = unit - @min_count = min_count - @max_count = max_count - end - end - - def parse - @unit = parse_alternates - if @pattern != "" - raise "Invalid pattern: #{@pattern}" - end - end - - def parse_alternates - au = AlternatesUnit.new - while @pattern != "" - c = @pattern[0] - return au if c == ")" - @pattern.slice!(0) - case c - when "[" - au << parse_character_class - when "(" - au << parse_group - when "*", "+", "?", "{" - if last_unit = au.last_unit - case c - when "*" - min_count, max_count = 0, nil - when "+" - min_count, max_count = 1, nil - when "?" - min_count, max_count = 0, 1 - when "{" - min_count, max_count = parse_curly_count - end - mu = MultiplicityUnit.new(last_unit, min_count, max_count) - au.replace_last!(mu) - else - raise Error.new("#{c} follows nothing") - end - when "|" - au.new_alternate! - when "\\" - au << parse_backslash - else - au << CharacterUnit.new(c) - end - end - au - end - - def parse_group - au = parse_alternates - if @pattern[0] != ")" - raise Error.new("Unterminated group in pattern") - end - @pattern.slice!(0) - au - end - - def parse_character_class - au = AlternatesUnit.new - index = 0 - loop do - if @pattern == "" - raise Error.new("Unterminated character class") - end - c = @pattern.slice!(0) - if c == "]" - break - elsif c == "^" && index == 0 - au.negate = true - elsif c == "-" && (index == 0 || @pattern[0] == "]") - au.append_alternate(CharacterUnit.new(c)) - elsif c == "\\" - au.append_alternate(parse_backslash) - elsif c == "-" && @pattern[0] != "]" - begin_cu = au.last_unit - unless begin_cu.is_a?(CharacterUnit) - raise Error.new("Character range must be between single characters") - end - if @pattern[0] == "\\" - @pattern.slice!(0) - end_cu = parse_backslash - unless end_cu.is_a?(CharacterUnit) - raise Error.new("Character range must be between single characters") - end - end_code_point = end_cu.code_point - else - end_code_point = @pattern[0].ord - @pattern.slice!(0) - end - cru = CharacterRangeUnit.new(begin_cu.code_point, end_code_point) - au.replace_last!(cru) - else - au.append_alternate(CharacterUnit.new(c)) - end - index += 1 - end - au - end - - def parse_curly_count - if @pattern =~ /^(\d+)(?:,(\d+))?\}(.*)$/ - min_count = $1.to_i - max_count = nil - if $2 != "" - max_count = $2.to_i - if max_count < min_count - raise Error.new("Maximum repetition count cannot be less than minimum repetition count") - end - end - @pattern = $3 - [min_count, max_count] - else - raise Error.new("Unexpected match count at #{@pattern}") - end - end - - def parse_backslash - if @pattern == "" - raise Error.new("Error: unfollowed \\") - else - c = @pattern.slice!(0) - case c - when "d" - CharacterRangeUnit.new("0", "9") - else - CharacterUnit.new(c) - end - end + @unit = Unit.new(pattern) end end diff --git a/lib/imbecile/regex/unit.rb b/lib/imbecile/regex/unit.rb new file mode 100644 index 0000000..9dce5c6 --- /dev/null +++ b/lib/imbecile/regex/unit.rb @@ -0,0 +1,203 @@ +module Imbecile + class Regex + + class Unit + + class SequenceUnit < Unit + attr_accessor :units + def initialize + @units = [] + end + def method_missing(*args) + @units.__send__(*args) + end + end + + class AlternatesUnit < Unit + attr_accessor :alternates + attr_accessor :negate + def initialize + @alternates = [] + @negate = false + end + def new_alternate! + @alternates << SequenceUnit.new + end + def append_alternate(unit) + @alternates << unit + end + def <<(unit) + new_alternate! if @alternates.empty? + @alternates[-1] << unit + end + def last_unit + new_alternate! if @alternates.empty? + @alternates[-1][-1] + end + def replace_last!(new_unit) + @alternates[-1][-1] = new_unit + end + end + + class CharacterUnit < Unit + attr_accessor :code_point + def initialize(c) + @code_point = c.ord + end + end + + class CharacterRangeUnit < Unit + attr_accessor :start_code_point + attr_accessor :end_code_point + def initialize(c1, c2) + @start_code_point = c1.ord + @end_code_point = c2.ord + end + end + + class MultiplicityUnit < Unit + attr_accessor :unit + attr_accessor :min_count + attr_accessor :max_count + def initialize(unit, min_count, max_count) + @unit = unit + @min_count = min_count + @max_count = max_count + end + end + + def initialize(pattern) + @pattern = pattern.dup + @unit = parse_alternates + if @pattern != "" + raise "Invalid pattern: #{@pattern}" + end + end + + private + + def parse_alternates + au = AlternatesUnit.new + while @pattern != "" + c = @pattern[0] + return au if c == ")" + @pattern.slice!(0) + case c + when "[" + au << parse_character_class + when "(" + au << parse_group + when "*", "+", "?", "{" + if last_unit = au.last_unit + case c + when "*" + min_count, max_count = 0, nil + when "+" + min_count, max_count = 1, nil + when "?" + min_count, max_count = 0, 1 + when "{" + min_count, max_count = parse_curly_count + end + mu = MultiplicityUnit.new(last_unit, min_count, max_count) + au.replace_last!(mu) + else + raise Error.new("#{c} follows nothing") + end + when "|" + au.new_alternate! + when "\\" + au << parse_backslash + else + au << CharacterUnit.new(c) + end + end + au + end + + def parse_group + au = parse_alternates + if @pattern[0] != ")" + raise Error.new("Unterminated group in pattern") + end + @pattern.slice!(0) + au + end + + def parse_character_class + au = AlternatesUnit.new + index = 0 + loop do + if @pattern == "" + raise Error.new("Unterminated character class") + end + c = @pattern.slice!(0) + if c == "]" + break + elsif c == "^" && index == 0 + au.negate = true + elsif c == "-" && (index == 0 || @pattern[0] == "]") + au.append_alternate(CharacterUnit.new(c)) + elsif c == "\\" + au.append_alternate(parse_backslash) + elsif c == "-" && @pattern[0] != "]" + begin_cu = au.last_unit + unless begin_cu.is_a?(CharacterUnit) + raise Error.new("Character range must be between single characters") + end + if @pattern[0] == "\\" + @pattern.slice!(0) + end_cu = parse_backslash + unless end_cu.is_a?(CharacterUnit) + raise Error.new("Character range must be between single characters") + end + end_code_point = end_cu.code_point + else + end_code_point = @pattern[0].ord + @pattern.slice!(0) + end + cru = CharacterRangeUnit.new(begin_cu.code_point, end_code_point) + au.replace_last!(cru) + else + au.append_alternate(CharacterUnit.new(c)) + end + index += 1 + end + au + end + + def parse_curly_count + if @pattern =~ /^(\d+)(?:,(\d+))?\}(.*)$/ + min_count = $1.to_i + max_count = nil + if $2 != "" + max_count = $2.to_i + if max_count < min_count + raise Error.new("Maximum repetition count cannot be less than minimum repetition count") + end + end + @pattern = $3 + [min_count, max_count] + else + raise Error.new("Unexpected match count at #{@pattern}") + end + end + + def parse_backslash + if @pattern == "" + raise Error.new("Error: unfollowed \\") + else + c = @pattern.slice!(0) + case c + when "d" + CharacterRangeUnit.new("0", "9") + else + CharacterUnit.new(c) + end + end + end + + end + + end +end