Add CharacterClassUnit and use it instead of AlternatesUnit
This commit is contained in:
parent
ea27baa630
commit
2e8e72a1e8
@ -30,34 +30,21 @@ module Imbecile
|
||||
|
||||
class AlternatesUnit < Unit
|
||||
attr_accessor :alternates
|
||||
attr_accessor :negate
|
||||
def initialize
|
||||
@alternates = []
|
||||
@negate = false
|
||||
new_alternate!
|
||||
end
|
||||
def new_alternate!
|
||||
@alternates << SequenceUnit.new
|
||||
end
|
||||
def append_alternate(unit)
|
||||
@alternates << unit
|
||||
end
|
||||
def <<(unit)
|
||||
new_alternate! if @alternates.empty?
|
||||
@alternates[-1] << unit
|
||||
end
|
||||
def last_unit
|
||||
if @alternates.last.is_a?(SequenceUnit)
|
||||
@alternates[-1][-1]
|
||||
else
|
||||
@alternates[-1]
|
||||
end
|
||||
end
|
||||
def replace_last!(new_unit)
|
||||
if @alternates.last.is_a?(SequenceUnit)
|
||||
@alternates[-1][-1] = new_unit
|
||||
else
|
||||
@alternates[-1] = new_unit
|
||||
end
|
||||
end
|
||||
def to_nfa
|
||||
if @alternates.size == 0
|
||||
@ -104,6 +91,27 @@ module Imbecile
|
||||
end
|
||||
end
|
||||
|
||||
class CharacterClassUnit < Unit
|
||||
attr_accessor :units
|
||||
attr_accessor :negate
|
||||
def initialize
|
||||
@units = []
|
||||
@negate = false
|
||||
end
|
||||
def initialize
|
||||
@units = []
|
||||
end
|
||||
def method_missing(*args)
|
||||
@units.__send__(*args)
|
||||
end
|
||||
def last_unit
|
||||
@units[-1]
|
||||
end
|
||||
def replace_last!(new_unit)
|
||||
@units[-1] = new_unit
|
||||
end
|
||||
end
|
||||
|
||||
class MultiplicityUnit < Unit
|
||||
attr_accessor :unit
|
||||
attr_accessor :min_count
|
||||
@ -202,7 +210,7 @@ module Imbecile
|
||||
end
|
||||
|
||||
def parse_character_class
|
||||
au = AlternatesUnit.new
|
||||
ccu = CharacterClassUnit.new
|
||||
index = 0
|
||||
loop do
|
||||
if @pattern == ""
|
||||
@ -212,13 +220,13 @@ module Imbecile
|
||||
if c == "]"
|
||||
break
|
||||
elsif c == "^" && index == 0
|
||||
au.negate = true
|
||||
elsif c == "-" && (au.alternates.size == 0 || @pattern[0] == "]")
|
||||
au.append_alternate(CharacterUnit.new(c))
|
||||
ccu.negate = true
|
||||
elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]")
|
||||
ccu << CharacterUnit.new(c)
|
||||
elsif c == "\\"
|
||||
au.append_alternate(parse_backslash)
|
||||
ccu << parse_backslash
|
||||
elsif c == "-" && @pattern[0] != "]"
|
||||
begin_cu = au.last_unit
|
||||
begin_cu = ccu.last_unit
|
||||
unless begin_cu.is_a?(CharacterUnit)
|
||||
raise Error.new("Character range must be between single characters")
|
||||
end
|
||||
@ -234,13 +242,13 @@ module Imbecile
|
||||
@pattern.slice!(0)
|
||||
end
|
||||
cru = CharacterRangeUnit.new(begin_cu.code_point, max_code_point)
|
||||
au.replace_last!(cru)
|
||||
ccu.replace_last!(cru)
|
||||
else
|
||||
au.append_alternate(CharacterUnit.new(c))
|
||||
ccu << CharacterUnit.new(c)
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
au
|
||||
ccu
|
||||
end
|
||||
|
||||
def parse_curly_count
|
||||
|
@ -5,7 +5,8 @@ module Imbecile
|
||||
it "parses an empty expression" do
|
||||
parser = Parser.new("")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 0
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0].size).to eq 0
|
||||
end
|
||||
|
||||
it "parses a single character unit expression" do
|
||||
@ -163,15 +164,15 @@ module Imbecile
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.negate).to be_falsey
|
||||
expect(alt_unit.alternates.size).to eq 2
|
||||
expect(alt_unit.alternates[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(alt_unit.alternates[0].min_code_point).to eq "a".ord
|
||||
expect(alt_unit.alternates[0].max_code_point).to eq "z".ord
|
||||
expect(alt_unit.alternates[1]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[1].code_point).to eq "_".ord
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "a".ord
|
||||
expect(ccu[0].max_code_point).to eq "z".ord
|
||||
expect(ccu[1]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[1].code_point).to eq "_".ord
|
||||
end
|
||||
|
||||
it "parses a negated character class" do
|
||||
@ -181,12 +182,12 @@ module Imbecile
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.negate).to be_truthy
|
||||
expect(alt_unit.alternates.size).to eq 3
|
||||
expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[0].code_point).to eq "x".ord
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_truthy
|
||||
expect(ccu.size).to eq 3
|
||||
expect(ccu[0]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[0].code_point).to eq "x".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at beginning of a character class" do
|
||||
@ -196,11 +197,11 @@ module Imbecile
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.alternates.size).to eq 2
|
||||
expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[0].code_point).to eq "-".ord
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[0].code_point).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at end of a character class" do
|
||||
@ -210,13 +211,13 @@ module Imbecile
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.alternates.size).to eq 2
|
||||
expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[0].code_point).to eq "0".ord
|
||||
expect(alt_unit.alternates[1]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[1].code_point).to eq "-".ord
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[0].code_point).to eq "0".ord
|
||||
expect(ccu[1]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[1].code_point).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at beginning of a negated character class" do
|
||||
@ -226,12 +227,12 @@ module Imbecile
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.negate).to be_truthy
|
||||
expect(alt_unit.alternates.size).to eq 2
|
||||
expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[0].code_point).to eq "-".ord
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_truthy
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[0].code_point).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses . as a plain character in a negated character class" do
|
||||
@ -241,12 +242,12 @@ module Imbecile
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.negate).to be_falsey
|
||||
expect(alt_unit.alternates.size).to eq 1
|
||||
expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[0].code_point).to eq ".".ord
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 1
|
||||
expect(ccu[0]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[0].code_point).to eq ".".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character when escaped in middle of character class" do
|
||||
@ -256,16 +257,16 @@ module Imbecile
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.negate).to be_falsey
|
||||
expect(alt_unit.alternates.size).to eq 3
|
||||
expect(alt_unit.alternates[0]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[0].code_point).to eq "0".ord
|
||||
expect(alt_unit.alternates[1]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[1].code_point).to eq "-".ord
|
||||
expect(alt_unit.alternates[2]).to be_a Parser::CharacterUnit
|
||||
expect(alt_unit.alternates[2].code_point).to eq "9".ord
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 3
|
||||
expect(ccu[0]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[0].code_point).to eq "0".ord
|
||||
expect(ccu[1]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[1].code_point).to eq "-".ord
|
||||
expect(ccu[2]).to be_a Parser::CharacterUnit
|
||||
expect(ccu[2].code_point).to eq "9".ord
|
||||
end
|
||||
|
||||
it "parses alternates" do
|
||||
@ -296,10 +297,10 @@ module Imbecile
|
||||
expect(parser.unit.alternates[0][0].unit.alternates[1].size).to eq 0
|
||||
expect(parser.unit.alternates[1]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[1].size).to eq 1
|
||||
expect(parser.unit.alternates[1][0]).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates[1][0]).to be_a Parser::CharacterClassUnit
|
||||
expect(parser.unit.alternates[1][0].negate).to be_truthy
|
||||
expect(parser.unit.alternates[1][0].alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[1][0].alternates[0]).to be_a Parser::CharacterUnit
|
||||
expect(parser.unit.alternates[1][0].size).to eq 1
|
||||
expect(parser.unit.alternates[1][0][0]).to be_a Parser::CharacterUnit
|
||||
expect(parser.unit.alternates[2]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[2].size).to eq 2
|
||||
expect(parser.unit.alternates[2][0]).to be_a Parser::CharacterUnit
|
||||
@ -311,11 +312,11 @@ module Imbecile
|
||||
expect(parser.unit.alternates[3][0]).to be_a Parser::MultiplicityUnit
|
||||
expect(parser.unit.alternates[3][0].min_count).to eq 1
|
||||
expect(parser.unit.alternates[3][0].max_count).to be_nil
|
||||
expect(parser.unit.alternates[3][0].unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates[3][0].unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[3][0].unit.alternates[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(parser.unit.alternates[3][0].unit.alternates[0].min_code_point).to eq "x".ord
|
||||
expect(parser.unit.alternates[3][0].unit.alternates[0].max_code_point).to eq "y".ord
|
||||
expect(parser.unit.alternates[3][0].unit).to be_a Parser::CharacterClassUnit
|
||||
expect(parser.unit.alternates[3][0].unit.size).to eq 1
|
||||
expect(parser.unit.alternates[3][0].unit[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(parser.unit.alternates[3][0].unit[0].min_code_point).to eq "x".ord
|
||||
expect(parser.unit.alternates[3][0].unit[0].max_code_point).to eq "y".ord
|
||||
end
|
||||
|
||||
end
|
||||
|
Loading…
x
Reference in New Issue
Block a user