From 5486e5f138f75c951aabc726f398c3e430888c80 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sat, 26 Jul 2025 21:21:29 -0400 Subject: [PATCH] Add \D, \S, \w, \W special character classes --- lib/propane/regex.rb | 30 ++++++++++++++++++++ lib/propane/regex/unit.rb | 10 +++++-- spec/propane/lexer/dfa_spec.rb | 52 ++++++++++++++++++++++++++++++++++ spec/propane/regex_spec.rb | 2 +- 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/lib/propane/regex.rb b/lib/propane/regex.rb index 0564584..098b8c9 100644 --- a/lib/propane/regex.rb +++ b/lib/propane/regex.rb @@ -141,6 +141,11 @@ class Propane CharacterRangeUnit.new("\b") when "d" CharacterRangeUnit.new("0", "9") + when "D" + ccu = CharacterClassUnit.new + ccu << CharacterRangeUnit.new("0", "9") + ccu.negate = true + ccu when "f" CharacterRangeUnit.new("\f") when "n" @@ -156,10 +161,35 @@ class Propane ccu << CharacterRangeUnit.new("\f") ccu << CharacterRangeUnit.new("\v") ccu + when "S" + ccu = CharacterClassUnit.new + ccu << CharacterRangeUnit.new(" ") + ccu << CharacterRangeUnit.new("\t") + ccu << CharacterRangeUnit.new("\r") + ccu << CharacterRangeUnit.new("\n") + ccu << CharacterRangeUnit.new("\f") + ccu << CharacterRangeUnit.new("\v") + ccu.negate = true + ccu when "t" CharacterRangeUnit.new("\t") when "v" CharacterRangeUnit.new("\v") + when "w" + ccu = CharacterClassUnit.new + ccu << CharacterRangeUnit.new("_") + ccu << CharacterRangeUnit.new("0", "9") + ccu << CharacterRangeUnit.new("a", "z") + ccu << CharacterRangeUnit.new("A", "Z") + ccu + when "W" + ccu = CharacterClassUnit.new + ccu << CharacterRangeUnit.new("_") + ccu << CharacterRangeUnit.new("0", "9") + ccu << CharacterRangeUnit.new("a", "z") + ccu << CharacterRangeUnit.new("A", "Z") + ccu.negate = true + ccu else CharacterRangeUnit.new(c) end diff --git a/lib/propane/regex/unit.rb b/lib/propane/regex/unit.rb index e68658f..d0a1d22 100644 --- a/lib/propane/regex/unit.rb +++ b/lib/propane/regex/unit.rb @@ -97,8 +97,14 @@ class Propane end def <<(thing) if thing.is_a?(CharacterClassUnit) - thing.each do |ccu_unit| - @units << ccu_unit + if thing.negate + CodePointRange.invert_ranges(thing.map(&:code_point_range)).each do |cpr| + CharacterRangeUnit.new(cpr.first, cpr.last) + end + else + thing.each do |ccu_unit| + @units << ccu_unit + end end else @units << thing diff --git a/spec/propane/lexer/dfa_spec.rb b/spec/propane/lexer/dfa_spec.rb index 555a271..2e13ac2 100644 --- a/spec/propane/lexer/dfa_spec.rb +++ b/spec/propane/lexer/dfa_spec.rb @@ -135,6 +135,58 @@ EOF ] expect(run(<