From 24054461a23699afcb917529b9c4570432bb1223 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Tue, 18 May 2021 16:14:42 -0400 Subject: [PATCH] Merge Regex::Parser into Regex, move Unit to its own file --- lib/imbecile.rb | 2 +- lib/imbecile/grammar.rb | 1 - lib/imbecile/regex.rb | 146 ++++++++++++- lib/imbecile/regex/parser.rb | 325 ---------------------------- lib/imbecile/regex/unit.rb | 173 +++++++++++++++ spec/imbecile/regex/parser_spec.rb | 335 ----------------------------- spec/imbecile/regex_spec.rb | 333 ++++++++++++++++++++++++++++ 7 files changed, 651 insertions(+), 664 deletions(-) delete mode 100644 lib/imbecile/regex/parser.rb create mode 100644 lib/imbecile/regex/unit.rb delete mode 100644 spec/imbecile/regex/parser_spec.rb create mode 100644 spec/imbecile/regex_spec.rb diff --git a/lib/imbecile.rb b/lib/imbecile.rb index 5e32394..cfd22a5 100644 --- a/lib/imbecile.rb +++ b/lib/imbecile.rb @@ -2,7 +2,7 @@ require_relative "imbecile/cli" require_relative "imbecile/grammar" require_relative "imbecile/regex" require_relative "imbecile/regex/nfa" -require_relative "imbecile/regex/parser" +require_relative "imbecile/regex/unit" require_relative "imbecile/version" require "erb" diff --git a/lib/imbecile/grammar.rb b/lib/imbecile/grammar.rb index b131e94..24e9e6d 100644 --- a/lib/imbecile/grammar.rb +++ b/lib/imbecile/grammar.rb @@ -41,7 +41,6 @@ module Imbecile # Build NFA from each token expression. @tokens.each do |token_name, token_def| token_def[:regex] = Regex.new(token_def[:pattern]) - token_def[:nfa] = token_def[:regex].parser.unit.to_nfa end end diff --git a/lib/imbecile/regex.rb b/lib/imbecile/regex.rb index 2871c19..f526b3a 100644 --- a/lib/imbecile/regex.rb +++ b/lib/imbecile/regex.rb @@ -1,10 +1,152 @@ module Imbecile class Regex - attr_accessor :parser + attr_reader :unit + attr_reader :nfa def initialize(pattern) - @parser = Parser.new(pattern) + @pattern = pattern.dup + @unit = parse_alternates + @nfa = @unit.to_nfa + if @pattern != "" + raise Error.new(%[Unexpected "#{@pattern}" in pattern]) + end + end + + private + + def parse_alternates + au = AlternatesUnit.new + while @pattern != "" + c = @pattern[0] + return au if c == ")" + @pattern.slice!(0) + case c + when "[" + au << parse_character_class + when "(" + au << parse_group + when "*", "+", "?", "{" + if last_unit = au.last_unit + case c + when "*" + min_count, max_count = 0, nil + when "+" + min_count, max_count = 1, nil + when "?" + min_count, max_count = 0, 1 + when "{" + min_count, max_count = parse_curly_count + end + mu = MultiplicityUnit.new(last_unit, min_count, max_count) + au.replace_last!(mu) + else + raise Error.new("#{c} follows nothing") + end + when "|" + au.new_alternate! + when "\\" + au << parse_backslash + when "." + au << period_character_class + else + au << CharacterRangeUnit.new(c) + end + end + au + end + + def parse_group + au = parse_alternates + if @pattern[0] != ")" + raise Error.new("Unterminated group in pattern") + end + @pattern.slice!(0) + au + end + + def parse_character_class + ccu = CharacterClassUnit.new + index = 0 + loop do + if @pattern == "" + raise Error.new("Unterminated character class") + end + c = @pattern.slice!(0) + if c == "]" + break + elsif c == "^" && index == 0 + ccu.negate = true + elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]") + ccu << CharacterRangeUnit.new(c) + elsif c == "\\" + ccu << parse_backslash + elsif c == "-" && @pattern[0] != "]" + begin_cu = ccu.last_unit + unless begin_cu.is_a?(CharacterRangeUnit) && begin_cu.range.size == 1 + raise Error.new("Character range must be between single characters") + end + if @pattern[0] == "\\" + @pattern.slice!(0) + end_cu = parse_backslash + unless end_cu.is_a?(CharacterRangeUnit) && end_cu.range.size == 1 + raise Error.new("Character range must be between single characters") + end + max_code_point = end_cu.code_point + else + max_code_point = @pattern[0].ord + @pattern.slice!(0) + end + cru = CharacterRangeUnit.new(begin_cu.min_code_point, max_code_point) + ccu.replace_last!(cru) + else + ccu << CharacterRangeUnit.new(c) + end + index += 1 + end + ccu + end + + def parse_curly_count + if @pattern =~ /^(\d+)(?:(,)(\d*))?\}(.*)$/ + min_count, comma, max_count, pattern = $1, $2, $3, $4 + min_count = min_count.to_i + if comma.to_s == "" + max_count = min_count + elsif max_count.to_s != "" + max_count = max_count.to_i + if max_count < min_count + raise Error.new("Maximum repetition count cannot be less than minimum repetition count") + end + else + max_count = nil + end + @pattern = pattern + [min_count, max_count] + else + raise Error.new("Unexpected match count at #{@pattern}") + end + end + + def parse_backslash + if @pattern == "" + raise Error.new("Error: unfollowed \\") + else + c = @pattern.slice!(0) + case c + when "d" + CharacterRangeUnit.new("0", "9") + else + CharacterRangeUnit.new(c) + end + end + end + + def period_character_class + ccu = CharacterClassUnit.new + ccu << CharacterRangeUnit.new(0, "\n".ord - 1) + ccu << CharacterRangeUnit.new("\n".ord + 1, 0xFFFFFFFF) + ccu end end diff --git a/lib/imbecile/regex/parser.rb b/lib/imbecile/regex/parser.rb deleted file mode 100644 index 768ad2d..0000000 --- a/lib/imbecile/regex/parser.rb +++ /dev/null @@ -1,325 +0,0 @@ -module Imbecile - class Regex - - class Parser - - class Unit - end - - class SequenceUnit < Unit - attr_accessor :units - def initialize - @units = [] - end - def method_missing(*args) - @units.__send__(*args) - end - def to_nfa - if @units.empty? - NFA.empty - else - @units.map do |unit| - unit.to_nfa - end.reduce do |result, nfa| - result.end_state.add_transition(nil, nfa.start_state) - result - end - end - end - end - - class AlternatesUnit < Unit - attr_accessor :alternates - def initialize - @alternates = [] - new_alternate! - end - def new_alternate! - @alternates << SequenceUnit.new - end - def <<(unit) - @alternates[-1] << unit - end - def last_unit - @alternates[-1][-1] - end - def replace_last!(new_unit) - @alternates[-1][-1] = new_unit - end - def to_nfa - if @alternates.size == 0 - NFA.empty - elsif @alternates.size == 1 - @alternates[0].to_nfa - else - nfa = NFA.new - alternate_nfas = @alternates.map do |alternate| - alternate.to_nfa - end - alternate_nfas.each do |alternate_nfa| - nfa.start_state.add_transition(nil, alternate_nfa.start_state) - alternate_nfa.end_state.add_transition(nil, nfa.end_state) - end - nfa - end - end - end - - class CharacterRangeUnit < Unit - attr_accessor :min_code_point - attr_accessor :max_code_point - def initialize(c1, c2 = nil) - @min_code_point = c1.ord - @max_code_point = c2 ? c2.ord : @min_code_point - end - def range - @min_code_point..@max_code_point - end - def to_nfa - nfa = NFA.new - nfa.start_state.add_transition(range, nfa.end_state) - nfa - end - end - - class CharacterClassUnit < Unit - attr_accessor :units - attr_accessor :negate - def initialize - @units = [] - @negate = false - end - def initialize - @units = [] - end - def method_missing(*args) - @units.__send__(*args) - end - def last_unit - @units[-1] - end - def replace_last!(new_unit) - @units[-1] = new_unit - end - def to_nfa - nfa = NFA.new - if @units.empty? - nfa.start_state.add_transition(nil, nfa.end_state) - else - ranges = @units.map(&:range) - if @negate - ranges = negate_ranges(ranges) - end - ranges.each do |range| - nfa.start_state.add_transition(range, nfa.end_state) - end - end - nfa - end - private - def negate_ranges(ranges) - ranges = ranges.sort_by(&:first) - new_ranges = [] - last_cp = -1 - ranges.each do |range| - if range.first > (last_cp + 1) - new_ranges << ((last_cp + 1)..(range.first - 1)) - last_cp = range.last - end - end - if last_cp < 0xFFFFFFFF - new_ranges << ((last_cp + 1)..0xFFFFFFFF) - end - new_ranges - end - end - - class MultiplicityUnit < Unit - attr_accessor :unit - attr_accessor :min_count - attr_accessor :max_count - def initialize(unit, min_count, max_count) - @unit = unit - @min_count = min_count - @max_count = max_count - end - def to_nfa - nfa = NFA.new - unit_nfa = @unit.to_nfa - nfa.start_state.add_transition(nil, unit_nfa.start_state) - if @min_count == 0 - nfa.start_state.add_transition(nil, nfa.end_state) - else - (@min_count - 1).times do - prev_nfa = unit_nfa - unit_nfa = @unit.to_nfa - prev_nfa.end_state.add_transition(nil, unit_nfa.start_state) - end - end - unit_nfa.end_state.add_transition(nil, nfa.end_state) - if @max_count.nil? - unit_nfa.end_state.add_transition(nil, nfa.start_state) - else - (@max_count - @min_count).times do - prev_nfa = unit_nfa - unit_nfa = @unit.to_nfa - prev_nfa.end_state.add_transition(nil, unit_nfa.start_state) - unit_nfa.end_state.add_transition(nil, nfa.end_state) - end - end - nfa - end - end - - attr_reader :unit - attr_reader :nfa - - def initialize(pattern) - @pattern = pattern.dup - @unit = parse_alternates - @nfa = @unit.to_nfa - if @pattern != "" - raise Error.new(%[Unexpected "#{@pattern}" in pattern]) - end - end - - private - - def parse_alternates - au = AlternatesUnit.new - while @pattern != "" - c = @pattern[0] - return au if c == ")" - @pattern.slice!(0) - case c - when "[" - au << parse_character_class - when "(" - au << parse_group - when "*", "+", "?", "{" - if last_unit = au.last_unit - case c - when "*" - min_count, max_count = 0, nil - when "+" - min_count, max_count = 1, nil - when "?" - min_count, max_count = 0, 1 - when "{" - min_count, max_count = parse_curly_count - end - mu = MultiplicityUnit.new(last_unit, min_count, max_count) - au.replace_last!(mu) - else - raise Error.new("#{c} follows nothing") - end - when "|" - au.new_alternate! - when "\\" - au << parse_backslash - when "." - au << period_character_class - else - au << CharacterRangeUnit.new(c) - end - end - au - end - - def parse_group - au = parse_alternates - if @pattern[0] != ")" - raise Error.new("Unterminated group in pattern") - end - @pattern.slice!(0) - au - end - - def parse_character_class - ccu = CharacterClassUnit.new - index = 0 - loop do - if @pattern == "" - raise Error.new("Unterminated character class") - end - c = @pattern.slice!(0) - if c == "]" - break - elsif c == "^" && index == 0 - ccu.negate = true - elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]") - ccu << CharacterRangeUnit.new(c) - elsif c == "\\" - ccu << parse_backslash - elsif c == "-" && @pattern[0] != "]" - begin_cu = ccu.last_unit - unless begin_cu.is_a?(CharacterRangeUnit) && begin_cu.range.size == 1 - raise Error.new("Character range must be between single characters") - end - if @pattern[0] == "\\" - @pattern.slice!(0) - end_cu = parse_backslash - unless end_cu.is_a?(CharacterRangeUnit) && end_cu.range.size == 1 - raise Error.new("Character range must be between single characters") - end - max_code_point = end_cu.code_point - else - max_code_point = @pattern[0].ord - @pattern.slice!(0) - end - cru = CharacterRangeUnit.new(begin_cu.min_code_point, max_code_point) - ccu.replace_last!(cru) - else - ccu << CharacterRangeUnit.new(c) - end - index += 1 - end - ccu - end - - def parse_curly_count - if @pattern =~ /^(\d+)(?:(,)(\d*))?\}(.*)$/ - min_count, comma, max_count, pattern = $1, $2, $3, $4 - min_count = min_count.to_i - if comma.to_s == "" - max_count = min_count - elsif max_count.to_s != "" - max_count = max_count.to_i - if max_count < min_count - raise Error.new("Maximum repetition count cannot be less than minimum repetition count") - end - else - max_count = nil - end - @pattern = pattern - [min_count, max_count] - else - raise Error.new("Unexpected match count at #{@pattern}") - end - end - - def parse_backslash - if @pattern == "" - raise Error.new("Error: unfollowed \\") - else - c = @pattern.slice!(0) - case c - when "d" - CharacterRangeUnit.new("0", "9") - else - CharacterRangeUnit.new(c) - end - end - end - - def period_character_class - ccu = CharacterClassUnit.new - ccu << CharacterRangeUnit.new(0, "\n".ord - 1) - ccu << CharacterRangeUnit.new("\n".ord + 1, 0xFFFFFFFF) - ccu - end - - end - - end -end diff --git a/lib/imbecile/regex/unit.rb b/lib/imbecile/regex/unit.rb new file mode 100644 index 0000000..e94156b --- /dev/null +++ b/lib/imbecile/regex/unit.rb @@ -0,0 +1,173 @@ +module Imbecile + class Regex + + class Unit + end + + class SequenceUnit < Unit + attr_accessor :units + def initialize + @units = [] + end + def method_missing(*args) + @units.__send__(*args) + end + def to_nfa + if @units.empty? + NFA.empty + else + @units.map do |unit| + unit.to_nfa + end.reduce do |result, nfa| + result.end_state.add_transition(nil, nfa.start_state) + result + end + end + end + end + + class AlternatesUnit < Unit + attr_accessor :alternates + def initialize + @alternates = [] + new_alternate! + end + def new_alternate! + @alternates << SequenceUnit.new + end + def <<(unit) + @alternates[-1] << unit + end + def last_unit + @alternates[-1][-1] + end + def replace_last!(new_unit) + @alternates[-1][-1] = new_unit + end + def to_nfa + if @alternates.size == 0 + NFA.empty + elsif @alternates.size == 1 + @alternates[0].to_nfa + else + nfa = NFA.new + alternate_nfas = @alternates.map do |alternate| + alternate.to_nfa + end + alternate_nfas.each do |alternate_nfa| + nfa.start_state.add_transition(nil, alternate_nfa.start_state) + alternate_nfa.end_state.add_transition(nil, nfa.end_state) + end + nfa + end + end + end + + class CharacterRangeUnit < Unit + attr_accessor :min_code_point + attr_accessor :max_code_point + def initialize(c1, c2 = nil) + @min_code_point = c1.ord + @max_code_point = c2 ? c2.ord : @min_code_point + end + def range + @min_code_point..@max_code_point + end + def to_nfa + nfa = NFA.new + nfa.start_state.add_transition(range, nfa.end_state) + nfa + end + end + + class CharacterClassUnit < Unit + attr_accessor :units + attr_accessor :negate + def initialize + @units = [] + @negate = false + end + def initialize + @units = [] + end + def method_missing(*args) + @units.__send__(*args) + end + def last_unit + @units[-1] + end + def replace_last!(new_unit) + @units[-1] = new_unit + end + def to_nfa + nfa = NFA.new + if @units.empty? + nfa.start_state.add_transition(nil, nfa.end_state) + else + ranges = @units.map(&:range) + if @negate + ranges = negate_ranges(ranges) + end + ranges.each do |range| + nfa.start_state.add_transition(range, nfa.end_state) + end + end + nfa + end + private + def negate_ranges(ranges) + ranges = ranges.sort_by(&:first) + new_ranges = [] + last_cp = -1 + ranges.each do |range| + if range.first > (last_cp + 1) + new_ranges << ((last_cp + 1)..(range.first - 1)) + last_cp = range.last + end + end + if last_cp < 0xFFFFFFFF + new_ranges << ((last_cp + 1)..0xFFFFFFFF) + end + new_ranges + end + end + + class MultiplicityUnit < Unit + attr_accessor :unit + attr_accessor :min_count + attr_accessor :max_count + def initialize(unit, min_count, max_count) + @unit = unit + @min_count = min_count + @max_count = max_count + end + def to_nfa + nfa = NFA.new + unit_nfa = @unit.to_nfa + nfa.start_state.add_transition(nil, unit_nfa.start_state) + if @min_count == 0 + nfa.start_state.add_transition(nil, nfa.end_state) + else + (@min_count - 1).times do + prev_nfa = unit_nfa + unit_nfa = @unit.to_nfa + prev_nfa.end_state.add_transition(nil, unit_nfa.start_state) + end + end + unit_nfa.end_state.add_transition(nil, nfa.end_state) + if @max_count.nil? + unit_nfa.end_state.add_transition(nil, nfa.start_state) + else + (@max_count - @min_count).times do + prev_nfa = unit_nfa + unit_nfa = @unit.to_nfa + prev_nfa.end_state.add_transition(nil, unit_nfa.start_state) + unit_nfa.end_state.add_transition(nil, nfa.end_state) + end + end + nfa + end + end + + end +end diff --git a/spec/imbecile/regex/parser_spec.rb b/spec/imbecile/regex/parser_spec.rb deleted file mode 100644 index 4d96e07..0000000 --- a/spec/imbecile/regex/parser_spec.rb +++ /dev/null @@ -1,335 +0,0 @@ -module Imbecile - class Regex - RSpec.describe Parser do - - it "parses an empty expression" do - parser = Parser.new("") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0].size).to eq 0 - end - - it "parses a single character unit expression" do - parser = Parser.new("a") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::CharacterRangeUnit - end - - it "parses a group with a single character unit expression" do - parser = Parser.new("(a)") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::AlternatesUnit - alt_unit = seq_unit[0] - expect(alt_unit.alternates.size).to eq 1 - expect(alt_unit.alternates[0]).to be_a Parser::SequenceUnit - expect(alt_unit.alternates[0][0]).to be_a Parser::CharacterRangeUnit - end - - it "parses a *" do - parser = Parser.new("a*") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::MultiplicityUnit - m_unit = seq_unit[0] - expect(m_unit.min_count).to eq 0 - expect(m_unit.max_count).to be_nil - expect(m_unit.unit).to be_a Parser::CharacterRangeUnit - end - - it "parses a +" do - parser = Parser.new("a+") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::MultiplicityUnit - m_unit = seq_unit[0] - expect(m_unit.min_count).to eq 1 - expect(m_unit.max_count).to be_nil - expect(m_unit.unit).to be_a Parser::CharacterRangeUnit - end - - it "parses a ?" do - parser = Parser.new("a?") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::MultiplicityUnit - m_unit = seq_unit[0] - expect(m_unit.min_count).to eq 0 - expect(m_unit.max_count).to eq 1 - expect(m_unit.unit).to be_a Parser::CharacterRangeUnit - end - - it "parses a multiplicity count" do - parser = Parser.new("a{5}") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::MultiplicityUnit - m_unit = seq_unit[0] - expect(m_unit.min_count).to eq 5 - expect(m_unit.max_count).to eq 5 - expect(m_unit.unit).to be_a Parser::CharacterRangeUnit - end - - it "parses a minimum-only multiplicity count" do - parser = Parser.new("a{5,}") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::MultiplicityUnit - m_unit = seq_unit[0] - expect(m_unit.min_count).to eq 5 - expect(m_unit.max_count).to be_nil - expect(m_unit.unit).to be_a Parser::CharacterRangeUnit - end - - it "parses a minimum and maximum multiplicity count" do - parser = Parser.new("a{5,8}") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::MultiplicityUnit - m_unit = seq_unit[0] - expect(m_unit.min_count).to eq 5 - expect(m_unit.max_count).to eq 8 - expect(m_unit.unit).to be_a Parser::CharacterRangeUnit - expect(m_unit.unit.range.first).to eq "a".ord - end - - it "parses an escaped *" do - parser = Parser.new("a\\*") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 2 - expect(seq_unit[0]).to be_a Parser::CharacterRangeUnit - expect(seq_unit[0].min_code_point).to eq "a".ord - expect(seq_unit[1]).to be_a Parser::CharacterRangeUnit - expect(seq_unit[1].min_code_point).to eq "*".ord - end - - it "parses an escaped +" do - parser = Parser.new("a\\+") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 2 - expect(seq_unit[0]).to be_a Parser::CharacterRangeUnit - expect(seq_unit[0].min_code_point).to eq "a".ord - expect(seq_unit[1]).to be_a Parser::CharacterRangeUnit - expect(seq_unit[1].min_code_point).to eq "+".ord - end - - it "parses an escaped \\" do - parser = Parser.new("\\\\d") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 2 - expect(seq_unit[0]).to be_a Parser::CharacterRangeUnit - expect(seq_unit[0].min_code_point).to eq "\\".ord - expect(seq_unit[1]).to be_a Parser::CharacterRangeUnit - expect(seq_unit[1].min_code_point).to eq "d".ord - end - - it "parses a character class" do - parser = Parser.new("[a-z_]") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::CharacterClassUnit - ccu = seq_unit[0] - expect(ccu.negate).to be_falsey - expect(ccu.size).to eq 2 - expect(ccu[0]).to be_a Parser::CharacterRangeUnit - expect(ccu[0].min_code_point).to eq "a".ord - expect(ccu[0].max_code_point).to eq "z".ord - expect(ccu[1]).to be_a Parser::CharacterRangeUnit - expect(ccu[1].min_code_point).to eq "_".ord - end - - it "parses a negated character class" do - parser = Parser.new("[^xyz]") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::CharacterClassUnit - ccu = seq_unit[0] - expect(ccu.negate).to be_truthy - expect(ccu.size).to eq 3 - expect(ccu[0]).to be_a Parser::CharacterRangeUnit - expect(ccu[0].min_code_point).to eq "x".ord - end - - it "parses - as a plain character at beginning of a character class" do - parser = Parser.new("[-9]") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::CharacterClassUnit - ccu = seq_unit[0] - expect(ccu.size).to eq 2 - expect(ccu[0]).to be_a Parser::CharacterRangeUnit - expect(ccu[0].min_code_point).to eq "-".ord - end - - it "parses - as a plain character at end of a character class" do - parser = Parser.new("[0-]") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::CharacterClassUnit - ccu = seq_unit[0] - expect(ccu.size).to eq 2 - expect(ccu[0]).to be_a Parser::CharacterRangeUnit - expect(ccu[0].min_code_point).to eq "0".ord - expect(ccu[1]).to be_a Parser::CharacterRangeUnit - expect(ccu[1].min_code_point).to eq "-".ord - end - - it "parses - as a plain character at beginning of a negated character class" do - parser = Parser.new("[^-9]") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::CharacterClassUnit - ccu = seq_unit[0] - expect(ccu.negate).to be_truthy - expect(ccu.size).to eq 2 - expect(ccu[0]).to be_a Parser::CharacterRangeUnit - expect(ccu[0].min_code_point).to eq "-".ord - end - - it "parses . as a plain character in a character class" do - parser = Parser.new("[.]") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::CharacterClassUnit - ccu = seq_unit[0] - expect(ccu.negate).to be_falsey - expect(ccu.size).to eq 1 - expect(ccu[0]).to be_a Parser::CharacterRangeUnit - expect(ccu[0].min_code_point).to eq ".".ord - end - - it "parses - as a plain character when escaped in middle of character class" do - parser = Parser.new("[0\\-9]") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - seq_unit = parser.unit.alternates[0] - expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::CharacterClassUnit - ccu = seq_unit[0] - expect(ccu.negate).to be_falsey - expect(ccu.size).to eq 3 - expect(ccu[0]).to be_a Parser::CharacterRangeUnit - expect(ccu[0].min_code_point).to eq "0".ord - expect(ccu[1]).to be_a Parser::CharacterRangeUnit - expect(ccu[1].min_code_point).to eq "-".ord - expect(ccu[2]).to be_a Parser::CharacterRangeUnit - expect(ccu[2].min_code_point).to eq "9".ord - end - - it "parses alternates" do - parser = Parser.new("ab|c") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 2 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - expect(parser.unit.alternates[1]).to be_a Parser::SequenceUnit - expect(parser.unit.alternates[0].size).to eq 2 - expect(parser.unit.alternates[1].size).to eq 1 - end - - it "parses a ." do - parser = Parser.new("a.b") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 1 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - expect(parser.unit.alternates[0][0]).to be_a Parser::CharacterRangeUnit - expect(parser.unit.alternates[0][1]).to be_a Parser::CharacterClassUnit - expect(parser.unit.alternates[0][1].units.size).to eq 2 - expect(parser.unit.alternates[0][2]).to be_a Parser::CharacterRangeUnit - end - - it "parses something complex" do - parser = Parser.new("(a|)*|[^^]|\\|v|[x-y]+") - expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 4 - expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit - expect(parser.unit.alternates[0].size).to eq 1 - expect(parser.unit.alternates[0][0]).to be_a Parser::MultiplicityUnit - expect(parser.unit.alternates[0][0].min_count).to eq 0 - expect(parser.unit.alternates[0][0].max_count).to be_nil - expect(parser.unit.alternates[0][0].unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates[0][0].unit.alternates.size).to eq 2 - expect(parser.unit.alternates[0][0].unit.alternates[0]).to be_a Parser::SequenceUnit - expect(parser.unit.alternates[0][0].unit.alternates[0].size).to eq 1 - expect(parser.unit.alternates[0][0].unit.alternates[0][0]).to be_a Parser::CharacterRangeUnit - expect(parser.unit.alternates[0][0].unit.alternates[1]).to be_a Parser::SequenceUnit - expect(parser.unit.alternates[0][0].unit.alternates[1].size).to eq 0 - expect(parser.unit.alternates[1]).to be_a Parser::SequenceUnit - expect(parser.unit.alternates[1].size).to eq 1 - expect(parser.unit.alternates[1][0]).to be_a Parser::CharacterClassUnit - expect(parser.unit.alternates[1][0].negate).to be_truthy - expect(parser.unit.alternates[1][0].size).to eq 1 - expect(parser.unit.alternates[1][0][0]).to be_a Parser::CharacterRangeUnit - expect(parser.unit.alternates[2]).to be_a Parser::SequenceUnit - expect(parser.unit.alternates[2].size).to eq 2 - expect(parser.unit.alternates[2][0]).to be_a Parser::CharacterRangeUnit - expect(parser.unit.alternates[2][0].min_code_point).to eq "|".ord - expect(parser.unit.alternates[2][1]).to be_a Parser::CharacterRangeUnit - expect(parser.unit.alternates[2][1].min_code_point).to eq "v".ord - expect(parser.unit.alternates[3]).to be_a Parser::SequenceUnit - expect(parser.unit.alternates[3].size).to eq 1 - expect(parser.unit.alternates[3][0]).to be_a Parser::MultiplicityUnit - expect(parser.unit.alternates[3][0].min_count).to eq 1 - expect(parser.unit.alternates[3][0].max_count).to be_nil - expect(parser.unit.alternates[3][0].unit).to be_a Parser::CharacterClassUnit - expect(parser.unit.alternates[3][0].unit.size).to eq 1 - expect(parser.unit.alternates[3][0].unit[0]).to be_a Parser::CharacterRangeUnit - expect(parser.unit.alternates[3][0].unit[0].min_code_point).to eq "x".ord - expect(parser.unit.alternates[3][0].unit[0].max_code_point).to eq "y".ord - end - - end - end -end diff --git a/spec/imbecile/regex_spec.rb b/spec/imbecile/regex_spec.rb new file mode 100644 index 0000000..3be355e --- /dev/null +++ b/spec/imbecile/regex_spec.rb @@ -0,0 +1,333 @@ +module Imbecile + RSpec.describe Regex do + + it "parses an empty expression" do + regex = Regex.new("") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0].size).to eq 0 + end + + it "parses a single character unit expression" do + regex = Regex.new("a") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit + end + + it "parses a group with a single character unit expression" do + regex = Regex.new("(a)") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::AlternatesUnit + alt_unit = seq_unit[0] + expect(alt_unit.alternates.size).to eq 1 + expect(alt_unit.alternates[0]).to be_a Regex::SequenceUnit + expect(alt_unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit + end + + it "parses a *" do + regex = Regex.new("a*") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::MultiplicityUnit + m_unit = seq_unit[0] + expect(m_unit.min_count).to eq 0 + expect(m_unit.max_count).to be_nil + expect(m_unit.unit).to be_a Regex::CharacterRangeUnit + end + + it "parses a +" do + regex = Regex.new("a+") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::MultiplicityUnit + m_unit = seq_unit[0] + expect(m_unit.min_count).to eq 1 + expect(m_unit.max_count).to be_nil + expect(m_unit.unit).to be_a Regex::CharacterRangeUnit + end + + it "parses a ?" do + regex = Regex.new("a?") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::MultiplicityUnit + m_unit = seq_unit[0] + expect(m_unit.min_count).to eq 0 + expect(m_unit.max_count).to eq 1 + expect(m_unit.unit).to be_a Regex::CharacterRangeUnit + end + + it "parses a multiplicity count" do + regex = Regex.new("a{5}") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::MultiplicityUnit + m_unit = seq_unit[0] + expect(m_unit.min_count).to eq 5 + expect(m_unit.max_count).to eq 5 + expect(m_unit.unit).to be_a Regex::CharacterRangeUnit + end + + it "parses a minimum-only multiplicity count" do + regex = Regex.new("a{5,}") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::MultiplicityUnit + m_unit = seq_unit[0] + expect(m_unit.min_count).to eq 5 + expect(m_unit.max_count).to be_nil + expect(m_unit.unit).to be_a Regex::CharacterRangeUnit + end + + it "parses a minimum and maximum multiplicity count" do + regex = Regex.new("a{5,8}") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::MultiplicityUnit + m_unit = seq_unit[0] + expect(m_unit.min_count).to eq 5 + expect(m_unit.max_count).to eq 8 + expect(m_unit.unit).to be_a Regex::CharacterRangeUnit + expect(m_unit.unit.range.first).to eq "a".ord + end + + it "parses an escaped *" do + regex = Regex.new("a\\*") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 2 + expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit + expect(seq_unit[0].min_code_point).to eq "a".ord + expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit + expect(seq_unit[1].min_code_point).to eq "*".ord + end + + it "parses an escaped +" do + regex = Regex.new("a\\+") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 2 + expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit + expect(seq_unit[0].min_code_point).to eq "a".ord + expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit + expect(seq_unit[1].min_code_point).to eq "+".ord + end + + it "parses an escaped \\" do + regex = Regex.new("\\\\d") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 2 + expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit + expect(seq_unit[0].min_code_point).to eq "\\".ord + expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit + expect(seq_unit[1].min_code_point).to eq "d".ord + end + + it "parses a character class" do + regex = Regex.new("[a-z_]") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_falsey + expect(ccu.size).to eq 2 + expect(ccu[0]).to be_a Regex::CharacterRangeUnit + expect(ccu[0].min_code_point).to eq "a".ord + expect(ccu[0].max_code_point).to eq "z".ord + expect(ccu[1]).to be_a Regex::CharacterRangeUnit + expect(ccu[1].min_code_point).to eq "_".ord + end + + it "parses a negated character class" do + regex = Regex.new("[^xyz]") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_truthy + expect(ccu.size).to eq 3 + expect(ccu[0]).to be_a Regex::CharacterRangeUnit + expect(ccu[0].min_code_point).to eq "x".ord + end + + it "parses - as a plain character at beginning of a character class" do + regex = Regex.new("[-9]") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.size).to eq 2 + expect(ccu[0]).to be_a Regex::CharacterRangeUnit + expect(ccu[0].min_code_point).to eq "-".ord + end + + it "parses - as a plain character at end of a character class" do + regex = Regex.new("[0-]") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.size).to eq 2 + expect(ccu[0]).to be_a Regex::CharacterRangeUnit + expect(ccu[0].min_code_point).to eq "0".ord + expect(ccu[1]).to be_a Regex::CharacterRangeUnit + expect(ccu[1].min_code_point).to eq "-".ord + end + + it "parses - as a plain character at beginning of a negated character class" do + regex = Regex.new("[^-9]") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_truthy + expect(ccu.size).to eq 2 + expect(ccu[0]).to be_a Regex::CharacterRangeUnit + expect(ccu[0].min_code_point).to eq "-".ord + end + + it "parses . as a plain character in a character class" do + regex = Regex.new("[.]") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_falsey + expect(ccu.size).to eq 1 + expect(ccu[0]).to be_a Regex::CharacterRangeUnit + expect(ccu[0].min_code_point).to eq ".".ord + end + + it "parses - as a plain character when escaped in middle of character class" do + regex = Regex.new("[0\\-9]") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + seq_unit = regex.unit.alternates[0] + expect(seq_unit.size).to eq 1 + expect(seq_unit[0]).to be_a Regex::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_falsey + expect(ccu.size).to eq 3 + expect(ccu[0]).to be_a Regex::CharacterRangeUnit + expect(ccu[0].min_code_point).to eq "0".ord + expect(ccu[1]).to be_a Regex::CharacterRangeUnit + expect(ccu[1].min_code_point).to eq "-".ord + expect(ccu[2]).to be_a Regex::CharacterRangeUnit + expect(ccu[2].min_code_point).to eq "9".ord + end + + it "parses alternates" do + regex = Regex.new("ab|c") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 2 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit + expect(regex.unit.alternates[0].size).to eq 2 + expect(regex.unit.alternates[1].size).to eq 1 + end + + it "parses a ." do + regex = Regex.new("a.b") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 1 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + expect(regex.unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit + expect(regex.unit.alternates[0][1]).to be_a Regex::CharacterClassUnit + expect(regex.unit.alternates[0][1].units.size).to eq 2 + expect(regex.unit.alternates[0][2]).to be_a Regex::CharacterRangeUnit + end + + it "parses something complex" do + regex = Regex.new("(a|)*|[^^]|\\|v|[x-y]+") + expect(regex.unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates.size).to eq 4 + expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit + expect(regex.unit.alternates[0].size).to eq 1 + expect(regex.unit.alternates[0][0]).to be_a Regex::MultiplicityUnit + expect(regex.unit.alternates[0][0].min_count).to eq 0 + expect(regex.unit.alternates[0][0].max_count).to be_nil + expect(regex.unit.alternates[0][0].unit).to be_a Regex::AlternatesUnit + expect(regex.unit.alternates[0][0].unit.alternates.size).to eq 2 + expect(regex.unit.alternates[0][0].unit.alternates[0]).to be_a Regex::SequenceUnit + expect(regex.unit.alternates[0][0].unit.alternates[0].size).to eq 1 + expect(regex.unit.alternates[0][0].unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit + expect(regex.unit.alternates[0][0].unit.alternates[1]).to be_a Regex::SequenceUnit + expect(regex.unit.alternates[0][0].unit.alternates[1].size).to eq 0 + expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit + expect(regex.unit.alternates[1].size).to eq 1 + expect(regex.unit.alternates[1][0]).to be_a Regex::CharacterClassUnit + expect(regex.unit.alternates[1][0].negate).to be_truthy + expect(regex.unit.alternates[1][0].size).to eq 1 + expect(regex.unit.alternates[1][0][0]).to be_a Regex::CharacterRangeUnit + expect(regex.unit.alternates[2]).to be_a Regex::SequenceUnit + expect(regex.unit.alternates[2].size).to eq 2 + expect(regex.unit.alternates[2][0]).to be_a Regex::CharacterRangeUnit + expect(regex.unit.alternates[2][0].min_code_point).to eq "|".ord + expect(regex.unit.alternates[2][1]).to be_a Regex::CharacterRangeUnit + expect(regex.unit.alternates[2][1].min_code_point).to eq "v".ord + expect(regex.unit.alternates[3]).to be_a Regex::SequenceUnit + expect(regex.unit.alternates[3].size).to eq 1 + expect(regex.unit.alternates[3][0]).to be_a Regex::MultiplicityUnit + expect(regex.unit.alternates[3][0].min_count).to eq 1 + expect(regex.unit.alternates[3][0].max_count).to be_nil + expect(regex.unit.alternates[3][0].unit).to be_a Regex::CharacterClassUnit + expect(regex.unit.alternates[3][0].unit.size).to eq 1 + expect(regex.unit.alternates[3][0].unit[0]).to be_a Regex::CharacterRangeUnit + expect(regex.unit.alternates[3][0].unit[0].min_code_point).to eq "x".ord + expect(regex.unit.alternates[3][0].unit[0].max_code_point).to eq "y".ord + end + + end +end