From 2e8e72a1e8f5d62e6137485d40a46485334b9bec Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Fri, 14 May 2021 12:32:53 -0400 Subject: [PATCH] Add CharacterClassUnit and use it instead of AlternatesUnit --- lib/imbecile/regex/parser.rb | 58 ++++++++------ spec/imbecile/regex/parser_spec.rb | 117 +++++++++++++++-------------- 2 files changed, 92 insertions(+), 83 deletions(-) diff --git a/lib/imbecile/regex/parser.rb b/lib/imbecile/regex/parser.rb index 6d7d605..03b0d24 100644 --- a/lib/imbecile/regex/parser.rb +++ b/lib/imbecile/regex/parser.rb @@ -30,34 +30,21 @@ module Imbecile class AlternatesUnit < Unit attr_accessor :alternates - attr_accessor :negate def initialize @alternates = [] - @negate = false + new_alternate! end def new_alternate! @alternates << SequenceUnit.new end - def append_alternate(unit) - @alternates << unit - end def <<(unit) - new_alternate! if @alternates.empty? @alternates[-1] << unit end def last_unit - if @alternates.last.is_a?(SequenceUnit) - @alternates[-1][-1] - else - @alternates[-1] - end + @alternates[-1][-1] end def replace_last!(new_unit) - if @alternates.last.is_a?(SequenceUnit) - @alternates[-1][-1] = new_unit - else - @alternates[-1] = new_unit - end + @alternates[-1][-1] = new_unit end def to_nfa if @alternates.size == 0 @@ -104,6 +91,27 @@ module Imbecile end end + class CharacterClassUnit < Unit + attr_accessor :units + attr_accessor :negate + def initialize + @units = [] + @negate = false + end + def initialize + @units = [] + end + def method_missing(*args) + @units.__send__(*args) + end + def last_unit + @units[-1] + end + def replace_last!(new_unit) + @units[-1] = new_unit + end + end + class MultiplicityUnit < Unit attr_accessor :unit attr_accessor :min_count @@ -202,7 +210,7 @@ module Imbecile end def parse_character_class - au = AlternatesUnit.new + ccu = CharacterClassUnit.new index = 0 loop do if @pattern == "" @@ -212,13 +220,13 @@ module Imbecile if c == "]" break elsif c == "^" && index == 0 - au.negate = true - elsif c == "-" && (au.alternates.size == 0 || @pattern[0] == "]") - au.append_alternate(CharacterUnit.new(c)) + ccu.negate = true + elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]") + ccu << CharacterUnit.new(c) elsif c == "\\" - au.append_alternate(parse_backslash) + ccu << parse_backslash elsif c == "-" && @pattern[0] != "]" - begin_cu = au.last_unit + begin_cu = ccu.last_unit unless begin_cu.is_a?(CharacterUnit) raise Error.new("Character range must be between single characters") end @@ -234,13 +242,13 @@ module Imbecile @pattern.slice!(0) end cru = CharacterRangeUnit.new(begin_cu.code_point, max_code_point) - au.replace_last!(cru) + ccu.replace_last!(cru) else - au.append_alternate(CharacterUnit.new(c)) + ccu << CharacterUnit.new(c) end index += 1 end - au + ccu end def parse_curly_count diff --git a/spec/imbecile/regex/parser_spec.rb b/spec/imbecile/regex/parser_spec.rb index 2092386..cc5febb 100644 --- a/spec/imbecile/regex/parser_spec.rb +++ b/spec/imbecile/regex/parser_spec.rb @@ -5,7 +5,8 @@ module Imbecile it "parses an empty expression" do parser = Parser.new("") expect(parser.unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates.size).to eq 0 + expect(parser.unit.alternates.size).to eq 1 + expect(parser.unit.alternates[0].size).to eq 0 end it "parses a single character unit expression" do @@ -163,15 +164,15 @@ module Imbecile expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit seq_unit = parser.unit.alternates[0] expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::AlternatesUnit - alt_unit = seq_unit[0] - expect(alt_unit.negate).to be_falsey - expect(alt_unit.alternates.size).to eq 2 - expect(alt_unit.alternates[0]).to be_a Parser::CharacterRangeUnit - expect(alt_unit.alternates[0].min_code_point).to eq "a".ord - expect(alt_unit.alternates[0].max_code_point).to eq "z".ord - expect(alt_unit.alternates[1]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[1].code_point).to eq "_".ord + expect(seq_unit[0]).to be_a Parser::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_falsey + expect(ccu.size).to eq 2 + expect(ccu[0]).to be_a Parser::CharacterRangeUnit + expect(ccu[0].min_code_point).to eq "a".ord + expect(ccu[0].max_code_point).to eq "z".ord + expect(ccu[1]).to be_a Parser::CharacterUnit + expect(ccu[1].code_point).to eq "_".ord end it "parses a negated character class" do @@ -181,12 +182,12 @@ module Imbecile expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit seq_unit = parser.unit.alternates[0] expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::AlternatesUnit - alt_unit = seq_unit[0] - expect(alt_unit.negate).to be_truthy - expect(alt_unit.alternates.size).to eq 3 - expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[0].code_point).to eq "x".ord + expect(seq_unit[0]).to be_a Parser::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_truthy + expect(ccu.size).to eq 3 + expect(ccu[0]).to be_a Parser::CharacterUnit + expect(ccu[0].code_point).to eq "x".ord end it "parses - as a plain character at beginning of a character class" do @@ -196,11 +197,11 @@ module Imbecile expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit seq_unit = parser.unit.alternates[0] expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::AlternatesUnit - alt_unit = seq_unit[0] - expect(alt_unit.alternates.size).to eq 2 - expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[0].code_point).to eq "-".ord + expect(seq_unit[0]).to be_a Parser::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.size).to eq 2 + expect(ccu[0]).to be_a Parser::CharacterUnit + expect(ccu[0].code_point).to eq "-".ord end it "parses - as a plain character at end of a character class" do @@ -210,13 +211,13 @@ module Imbecile expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit seq_unit = parser.unit.alternates[0] expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::AlternatesUnit - alt_unit = seq_unit[0] - expect(alt_unit.alternates.size).to eq 2 - expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[0].code_point).to eq "0".ord - expect(alt_unit.alternates[1]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[1].code_point).to eq "-".ord + expect(seq_unit[0]).to be_a Parser::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.size).to eq 2 + expect(ccu[0]).to be_a Parser::CharacterUnit + expect(ccu[0].code_point).to eq "0".ord + expect(ccu[1]).to be_a Parser::CharacterUnit + expect(ccu[1].code_point).to eq "-".ord end it "parses - as a plain character at beginning of a negated character class" do @@ -226,12 +227,12 @@ module Imbecile expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit seq_unit = parser.unit.alternates[0] expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::AlternatesUnit - alt_unit = seq_unit[0] - expect(alt_unit.negate).to be_truthy - expect(alt_unit.alternates.size).to eq 2 - expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[0].code_point).to eq "-".ord + expect(seq_unit[0]).to be_a Parser::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_truthy + expect(ccu.size).to eq 2 + expect(ccu[0]).to be_a Parser::CharacterUnit + expect(ccu[0].code_point).to eq "-".ord end it "parses . as a plain character in a negated character class" do @@ -241,12 +242,12 @@ module Imbecile expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit seq_unit = parser.unit.alternates[0] expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::AlternatesUnit - alt_unit = seq_unit[0] - expect(alt_unit.negate).to be_falsey - expect(alt_unit.alternates.size).to eq 1 - expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[0].code_point).to eq ".".ord + expect(seq_unit[0]).to be_a Parser::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_falsey + expect(ccu.size).to eq 1 + expect(ccu[0]).to be_a Parser::CharacterUnit + expect(ccu[0].code_point).to eq ".".ord end it "parses - as a plain character when escaped in middle of character class" do @@ -256,16 +257,16 @@ module Imbecile expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit seq_unit = parser.unit.alternates[0] expect(seq_unit.size).to eq 1 - expect(seq_unit[0]).to be_a Parser::AlternatesUnit - alt_unit = seq_unit[0] - expect(alt_unit.negate).to be_falsey - expect(alt_unit.alternates.size).to eq 3 - expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[0].code_point).to eq "0".ord - expect(alt_unit.alternates[1]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[1].code_point).to eq "-".ord - expect(alt_unit.alternates[2]).to be_a Parser::CharacterUnit - expect(alt_unit.alternates[2].code_point).to eq "9".ord + expect(seq_unit[0]).to be_a Parser::CharacterClassUnit + ccu = seq_unit[0] + expect(ccu.negate).to be_falsey + expect(ccu.size).to eq 3 + expect(ccu[0]).to be_a Parser::CharacterUnit + expect(ccu[0].code_point).to eq "0".ord + expect(ccu[1]).to be_a Parser::CharacterUnit + expect(ccu[1].code_point).to eq "-".ord + expect(ccu[2]).to be_a Parser::CharacterUnit + expect(ccu[2].code_point).to eq "9".ord end it "parses alternates" do @@ -296,10 +297,10 @@ module Imbecile expect(parser.unit.alternates[0][0].unit.alternates[1].size).to eq 0 expect(parser.unit.alternates[1]).to be_a Parser::SequenceUnit expect(parser.unit.alternates[1].size).to eq 1 - expect(parser.unit.alternates[1][0]).to be_a Parser::AlternatesUnit + expect(parser.unit.alternates[1][0]).to be_a Parser::CharacterClassUnit expect(parser.unit.alternates[1][0].negate).to be_truthy - expect(parser.unit.alternates[1][0].alternates.size).to eq 1 - expect(parser.unit.alternates[1][0].alternates[0]).to be_a Parser::CharacterUnit + expect(parser.unit.alternates[1][0].size).to eq 1 + expect(parser.unit.alternates[1][0][0]).to be_a Parser::CharacterUnit expect(parser.unit.alternates[2]).to be_a Parser::SequenceUnit expect(parser.unit.alternates[2].size).to eq 2 expect(parser.unit.alternates[2][0]).to be_a Parser::CharacterUnit @@ -311,11 +312,11 @@ module Imbecile expect(parser.unit.alternates[3][0]).to be_a Parser::MultiplicityUnit expect(parser.unit.alternates[3][0].min_count).to eq 1 expect(parser.unit.alternates[3][0].max_count).to be_nil - expect(parser.unit.alternates[3][0].unit).to be_a Parser::AlternatesUnit - expect(parser.unit.alternates[3][0].unit.alternates.size).to eq 1 - expect(parser.unit.alternates[3][0].unit.alternates[0]).to be_a Parser::CharacterRangeUnit - expect(parser.unit.alternates[3][0].unit.alternates[0].min_code_point).to eq "x".ord - expect(parser.unit.alternates[3][0].unit.alternates[0].max_code_point).to eq "y".ord + expect(parser.unit.alternates[3][0].unit).to be_a Parser::CharacterClassUnit + expect(parser.unit.alternates[3][0].unit.size).to eq 1 + expect(parser.unit.alternates[3][0].unit[0]).to be_a Parser::CharacterRangeUnit + expect(parser.unit.alternates[3][0].unit[0].min_code_point).to eq "x".ord + expect(parser.unit.alternates[3][0].unit[0].max_code_point).to eq "y".ord end end