Fix matching lexer patterns containing a negated character class which includes a backslash-escaped sequence that matches more than one character

This commit is contained in:
Josh Holtrop 2025-07-25 16:14:35 -04:00
parent 125c149750
commit 035bb2fc60
3 changed files with 28 additions and 5 deletions

View File

@ -92,11 +92,8 @@ class Propane
@units = []
@negate = false
end
def initialize
@units = []
end
def method_missing(*args)
@units.__send__(*args)
def method_missing(*args, &block)
@units.__send__(*args, &block)
end
def <<(thing)
if thing.is_a?(CharacterClassUnit)

View File

@ -126,6 +126,15 @@ EOF
]
expect(run(<<EOF, ";")).to eq expected
token semicolon /;/;
EOF
end
it "matches a negated character class" do
expected = [
["pattern", "/abc/"],
]
expect(run(<<EOF, "/abc/")).to eq expected
token pattern /\\/[^\\s]*\\//;
EOF
end
end

View File

@ -189,6 +189,23 @@ class Propane
expect(ccu[0].first).to eq "x".ord
end
it "parses a negated character class with inner character classes" do
regex = Regex.new("[^x\\sz]")
expect(regex.unit).to be_a Regex::AlternatesUnit
expect(regex.unit.alternates.size).to eq 1
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
seq_unit = regex.unit.alternates[0]
expect(seq_unit.size).to eq 1
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
ccu = seq_unit[0]
expect(ccu.negate).to be_truthy
expect(ccu.size).to eq 8
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
expect(ccu[0].first).to eq "x".ord
expect(ccu[1].first).to eq " ".ord
expect(ccu[7].first).to eq "z".ord
end
it "parses - as a plain character at beginning of a character class" do
regex = Regex.new("[-9]")
expect(regex.unit).to be_a Regex::AlternatesUnit