Add \D, \S, \w, \W special character classes

This commit is contained in:
Josh Holtrop 2025-07-26 21:21:29 -04:00
parent 5b243507cf
commit 5486e5f138
4 changed files with 91 additions and 3 deletions

View File

@ -141,6 +141,11 @@ class Propane
CharacterRangeUnit.new("\b")
when "d"
CharacterRangeUnit.new("0", "9")
when "D"
ccu = CharacterClassUnit.new
ccu << CharacterRangeUnit.new("0", "9")
ccu.negate = true
ccu
when "f"
CharacterRangeUnit.new("\f")
when "n"
@ -156,10 +161,35 @@ class Propane
ccu << CharacterRangeUnit.new("\f")
ccu << CharacterRangeUnit.new("\v")
ccu
when "S"
ccu = CharacterClassUnit.new
ccu << CharacterRangeUnit.new(" ")
ccu << CharacterRangeUnit.new("\t")
ccu << CharacterRangeUnit.new("\r")
ccu << CharacterRangeUnit.new("\n")
ccu << CharacterRangeUnit.new("\f")
ccu << CharacterRangeUnit.new("\v")
ccu.negate = true
ccu
when "t"
CharacterRangeUnit.new("\t")
when "v"
CharacterRangeUnit.new("\v")
when "w"
ccu = CharacterClassUnit.new
ccu << CharacterRangeUnit.new("_")
ccu << CharacterRangeUnit.new("0", "9")
ccu << CharacterRangeUnit.new("a", "z")
ccu << CharacterRangeUnit.new("A", "Z")
ccu
when "W"
ccu = CharacterClassUnit.new
ccu << CharacterRangeUnit.new("_")
ccu << CharacterRangeUnit.new("0", "9")
ccu << CharacterRangeUnit.new("a", "z")
ccu << CharacterRangeUnit.new("A", "Z")
ccu.negate = true
ccu
else
CharacterRangeUnit.new(c)
end

View File

@ -97,8 +97,14 @@ class Propane
end
def <<(thing)
if thing.is_a?(CharacterClassUnit)
thing.each do |ccu_unit|
@units << ccu_unit
if thing.negate
CodePointRange.invert_ranges(thing.map(&:code_point_range)).each do |cpr|
CharacterRangeUnit.new(cpr.first, cpr.last)
end
else
thing.each do |ccu_unit|
@units << ccu_unit
end
end
else
@units << thing

View File

@ -135,6 +135,58 @@ EOF
]
expect(run(<<EOF, "/abc/")).to eq expected
token pattern /\\/[^\\s]*\\//;
EOF
end
it "matches special character classes " do
expected = [
["a", "abc123_FOO"],
]
expect(run(<<EOF, "abc123_FOO")).to eq expected
token a /\\w+/;
EOF
expected = [
["b", "FROG*%$#"],
]
expect(run(<<EOF, "FROG*%$#")).to eq expected
token b /FROG\\D{1,4}/;
EOF
expected = [
["c", "$883366"],
]
expect(run(<<EOF, "$883366")).to eq expected
token c /$\\d+/;
EOF
expected = [
["d", "^&$@"],
]
expect(run(<<EOF, "^&$@")).to eq expected
token d /^\\W+/;
EOF
expected = [
["a", "abc123_FOO"],
[nil, " "],
["b", "FROG*%$#"],
[nil, " "],
["c", "$883366"],
[nil, " "],
["d", "^&$@"],
]
expect(run(<<EOF, "abc123_FOO FROG*%$# $883366 ^&$@")).to eq expected
token a /\\w+/;
token b /FROG\\D{1,4}/;
token c /$\\d+/;
token d /^\\W+/;
drop /\\s+/;
EOF
end
it "matches a negated character class with a nested inner negated character class" do
expected = [
["t", "$&*"],
]
expect(run(<<EOF, "$&*")).to eq expected
token t /[^%\\W]+/;
EOF
end
end

View File

@ -190,7 +190,7 @@ class Propane
end
it "parses a negated character class with inner character classes" do
regex = Regex.new("[^x\\sz]")
regex = Regex.new("[^x\\sz]", 1)
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit