Merge Regex::Parser into Regex, move Unit to its own file
This commit is contained in:
parent
89a5976064
commit
24054461a2
@ -2,7 +2,7 @@ require_relative "imbecile/cli"
|
||||
require_relative "imbecile/grammar"
|
||||
require_relative "imbecile/regex"
|
||||
require_relative "imbecile/regex/nfa"
|
||||
require_relative "imbecile/regex/parser"
|
||||
require_relative "imbecile/regex/unit"
|
||||
require_relative "imbecile/version"
|
||||
require "erb"
|
||||
|
||||
|
@ -41,7 +41,6 @@ module Imbecile
|
||||
# Build NFA from each token expression.
|
||||
@tokens.each do |token_name, token_def|
|
||||
token_def[:regex] = Regex.new(token_def[:pattern])
|
||||
token_def[:nfa] = token_def[:regex].parser.unit.to_nfa
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -1,10 +1,152 @@
|
||||
module Imbecile
|
||||
class Regex
|
||||
|
||||
attr_accessor :parser
|
||||
attr_reader :unit
|
||||
attr_reader :nfa
|
||||
|
||||
def initialize(pattern)
|
||||
@parser = Parser.new(pattern)
|
||||
@pattern = pattern.dup
|
||||
@unit = parse_alternates
|
||||
@nfa = @unit.to_nfa
|
||||
if @pattern != ""
|
||||
raise Error.new(%[Unexpected "#{@pattern}" in pattern])
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_alternates
|
||||
au = AlternatesUnit.new
|
||||
while @pattern != ""
|
||||
c = @pattern[0]
|
||||
return au if c == ")"
|
||||
@pattern.slice!(0)
|
||||
case c
|
||||
when "["
|
||||
au << parse_character_class
|
||||
when "("
|
||||
au << parse_group
|
||||
when "*", "+", "?", "{"
|
||||
if last_unit = au.last_unit
|
||||
case c
|
||||
when "*"
|
||||
min_count, max_count = 0, nil
|
||||
when "+"
|
||||
min_count, max_count = 1, nil
|
||||
when "?"
|
||||
min_count, max_count = 0, 1
|
||||
when "{"
|
||||
min_count, max_count = parse_curly_count
|
||||
end
|
||||
mu = MultiplicityUnit.new(last_unit, min_count, max_count)
|
||||
au.replace_last!(mu)
|
||||
else
|
||||
raise Error.new("#{c} follows nothing")
|
||||
end
|
||||
when "|"
|
||||
au.new_alternate!
|
||||
when "\\"
|
||||
au << parse_backslash
|
||||
when "."
|
||||
au << period_character_class
|
||||
else
|
||||
au << CharacterRangeUnit.new(c)
|
||||
end
|
||||
end
|
||||
au
|
||||
end
|
||||
|
||||
def parse_group
|
||||
au = parse_alternates
|
||||
if @pattern[0] != ")"
|
||||
raise Error.new("Unterminated group in pattern")
|
||||
end
|
||||
@pattern.slice!(0)
|
||||
au
|
||||
end
|
||||
|
||||
def parse_character_class
|
||||
ccu = CharacterClassUnit.new
|
||||
index = 0
|
||||
loop do
|
||||
if @pattern == ""
|
||||
raise Error.new("Unterminated character class")
|
||||
end
|
||||
c = @pattern.slice!(0)
|
||||
if c == "]"
|
||||
break
|
||||
elsif c == "^" && index == 0
|
||||
ccu.negate = true
|
||||
elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]")
|
||||
ccu << CharacterRangeUnit.new(c)
|
||||
elsif c == "\\"
|
||||
ccu << parse_backslash
|
||||
elsif c == "-" && @pattern[0] != "]"
|
||||
begin_cu = ccu.last_unit
|
||||
unless begin_cu.is_a?(CharacterRangeUnit) && begin_cu.range.size == 1
|
||||
raise Error.new("Character range must be between single characters")
|
||||
end
|
||||
if @pattern[0] == "\\"
|
||||
@pattern.slice!(0)
|
||||
end_cu = parse_backslash
|
||||
unless end_cu.is_a?(CharacterRangeUnit) && end_cu.range.size == 1
|
||||
raise Error.new("Character range must be between single characters")
|
||||
end
|
||||
max_code_point = end_cu.code_point
|
||||
else
|
||||
max_code_point = @pattern[0].ord
|
||||
@pattern.slice!(0)
|
||||
end
|
||||
cru = CharacterRangeUnit.new(begin_cu.min_code_point, max_code_point)
|
||||
ccu.replace_last!(cru)
|
||||
else
|
||||
ccu << CharacterRangeUnit.new(c)
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
ccu
|
||||
end
|
||||
|
||||
def parse_curly_count
|
||||
if @pattern =~ /^(\d+)(?:(,)(\d*))?\}(.*)$/
|
||||
min_count, comma, max_count, pattern = $1, $2, $3, $4
|
||||
min_count = min_count.to_i
|
||||
if comma.to_s == ""
|
||||
max_count = min_count
|
||||
elsif max_count.to_s != ""
|
||||
max_count = max_count.to_i
|
||||
if max_count < min_count
|
||||
raise Error.new("Maximum repetition count cannot be less than minimum repetition count")
|
||||
end
|
||||
else
|
||||
max_count = nil
|
||||
end
|
||||
@pattern = pattern
|
||||
[min_count, max_count]
|
||||
else
|
||||
raise Error.new("Unexpected match count at #{@pattern}")
|
||||
end
|
||||
end
|
||||
|
||||
def parse_backslash
|
||||
if @pattern == ""
|
||||
raise Error.new("Error: unfollowed \\")
|
||||
else
|
||||
c = @pattern.slice!(0)
|
||||
case c
|
||||
when "d"
|
||||
CharacterRangeUnit.new("0", "9")
|
||||
else
|
||||
CharacterRangeUnit.new(c)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def period_character_class
|
||||
ccu = CharacterClassUnit.new
|
||||
ccu << CharacterRangeUnit.new(0, "\n".ord - 1)
|
||||
ccu << CharacterRangeUnit.new("\n".ord + 1, 0xFFFFFFFF)
|
||||
ccu
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -1,325 +0,0 @@
|
||||
module Imbecile
|
||||
class Regex
|
||||
|
||||
class Parser
|
||||
|
||||
class Unit
|
||||
end
|
||||
|
||||
class SequenceUnit < Unit
|
||||
attr_accessor :units
|
||||
def initialize
|
||||
@units = []
|
||||
end
|
||||
def method_missing(*args)
|
||||
@units.__send__(*args)
|
||||
end
|
||||
def to_nfa
|
||||
if @units.empty?
|
||||
NFA.empty
|
||||
else
|
||||
@units.map do |unit|
|
||||
unit.to_nfa
|
||||
end.reduce do |result, nfa|
|
||||
result.end_state.add_transition(nil, nfa.start_state)
|
||||
result
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class AlternatesUnit < Unit
|
||||
attr_accessor :alternates
|
||||
def initialize
|
||||
@alternates = []
|
||||
new_alternate!
|
||||
end
|
||||
def new_alternate!
|
||||
@alternates << SequenceUnit.new
|
||||
end
|
||||
def <<(unit)
|
||||
@alternates[-1] << unit
|
||||
end
|
||||
def last_unit
|
||||
@alternates[-1][-1]
|
||||
end
|
||||
def replace_last!(new_unit)
|
||||
@alternates[-1][-1] = new_unit
|
||||
end
|
||||
def to_nfa
|
||||
if @alternates.size == 0
|
||||
NFA.empty
|
||||
elsif @alternates.size == 1
|
||||
@alternates[0].to_nfa
|
||||
else
|
||||
nfa = NFA.new
|
||||
alternate_nfas = @alternates.map do |alternate|
|
||||
alternate.to_nfa
|
||||
end
|
||||
alternate_nfas.each do |alternate_nfa|
|
||||
nfa.start_state.add_transition(nil, alternate_nfa.start_state)
|
||||
alternate_nfa.end_state.add_transition(nil, nfa.end_state)
|
||||
end
|
||||
nfa
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class CharacterRangeUnit < Unit
|
||||
attr_accessor :min_code_point
|
||||
attr_accessor :max_code_point
|
||||
def initialize(c1, c2 = nil)
|
||||
@min_code_point = c1.ord
|
||||
@max_code_point = c2 ? c2.ord : @min_code_point
|
||||
end
|
||||
def range
|
||||
@min_code_point..@max_code_point
|
||||
end
|
||||
def to_nfa
|
||||
nfa = NFA.new
|
||||
nfa.start_state.add_transition(range, nfa.end_state)
|
||||
nfa
|
||||
end
|
||||
end
|
||||
|
||||
class CharacterClassUnit < Unit
|
||||
attr_accessor :units
|
||||
attr_accessor :negate
|
||||
def initialize
|
||||
@units = []
|
||||
@negate = false
|
||||
end
|
||||
def initialize
|
||||
@units = []
|
||||
end
|
||||
def method_missing(*args)
|
||||
@units.__send__(*args)
|
||||
end
|
||||
def last_unit
|
||||
@units[-1]
|
||||
end
|
||||
def replace_last!(new_unit)
|
||||
@units[-1] = new_unit
|
||||
end
|
||||
def to_nfa
|
||||
nfa = NFA.new
|
||||
if @units.empty?
|
||||
nfa.start_state.add_transition(nil, nfa.end_state)
|
||||
else
|
||||
ranges = @units.map(&:range)
|
||||
if @negate
|
||||
ranges = negate_ranges(ranges)
|
||||
end
|
||||
ranges.each do |range|
|
||||
nfa.start_state.add_transition(range, nfa.end_state)
|
||||
end
|
||||
end
|
||||
nfa
|
||||
end
|
||||
private
|
||||
def negate_ranges(ranges)
|
||||
ranges = ranges.sort_by(&:first)
|
||||
new_ranges = []
|
||||
last_cp = -1
|
||||
ranges.each do |range|
|
||||
if range.first > (last_cp + 1)
|
||||
new_ranges << ((last_cp + 1)..(range.first - 1))
|
||||
last_cp = range.last
|
||||
end
|
||||
end
|
||||
if last_cp < 0xFFFFFFFF
|
||||
new_ranges << ((last_cp + 1)..0xFFFFFFFF)
|
||||
end
|
||||
new_ranges
|
||||
end
|
||||
end
|
||||
|
||||
class MultiplicityUnit < Unit
|
||||
attr_accessor :unit
|
||||
attr_accessor :min_count
|
||||
attr_accessor :max_count
|
||||
def initialize(unit, min_count, max_count)
|
||||
@unit = unit
|
||||
@min_count = min_count
|
||||
@max_count = max_count
|
||||
end
|
||||
def to_nfa
|
||||
nfa = NFA.new
|
||||
unit_nfa = @unit.to_nfa
|
||||
nfa.start_state.add_transition(nil, unit_nfa.start_state)
|
||||
if @min_count == 0
|
||||
nfa.start_state.add_transition(nil, nfa.end_state)
|
||||
else
|
||||
(@min_count - 1).times do
|
||||
prev_nfa = unit_nfa
|
||||
unit_nfa = @unit.to_nfa
|
||||
prev_nfa.end_state.add_transition(nil, unit_nfa.start_state)
|
||||
end
|
||||
end
|
||||
unit_nfa.end_state.add_transition(nil, nfa.end_state)
|
||||
if @max_count.nil?
|
||||
unit_nfa.end_state.add_transition(nil, nfa.start_state)
|
||||
else
|
||||
(@max_count - @min_count).times do
|
||||
prev_nfa = unit_nfa
|
||||
unit_nfa = @unit.to_nfa
|
||||
prev_nfa.end_state.add_transition(nil, unit_nfa.start_state)
|
||||
unit_nfa.end_state.add_transition(nil, nfa.end_state)
|
||||
end
|
||||
end
|
||||
nfa
|
||||
end
|
||||
end
|
||||
|
||||
attr_reader :unit
|
||||
attr_reader :nfa
|
||||
|
||||
def initialize(pattern)
|
||||
@pattern = pattern.dup
|
||||
@unit = parse_alternates
|
||||
@nfa = @unit.to_nfa
|
||||
if @pattern != ""
|
||||
raise Error.new(%[Unexpected "#{@pattern}" in pattern])
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_alternates
|
||||
au = AlternatesUnit.new
|
||||
while @pattern != ""
|
||||
c = @pattern[0]
|
||||
return au if c == ")"
|
||||
@pattern.slice!(0)
|
||||
case c
|
||||
when "["
|
||||
au << parse_character_class
|
||||
when "("
|
||||
au << parse_group
|
||||
when "*", "+", "?", "{"
|
||||
if last_unit = au.last_unit
|
||||
case c
|
||||
when "*"
|
||||
min_count, max_count = 0, nil
|
||||
when "+"
|
||||
min_count, max_count = 1, nil
|
||||
when "?"
|
||||
min_count, max_count = 0, 1
|
||||
when "{"
|
||||
min_count, max_count = parse_curly_count
|
||||
end
|
||||
mu = MultiplicityUnit.new(last_unit, min_count, max_count)
|
||||
au.replace_last!(mu)
|
||||
else
|
||||
raise Error.new("#{c} follows nothing")
|
||||
end
|
||||
when "|"
|
||||
au.new_alternate!
|
||||
when "\\"
|
||||
au << parse_backslash
|
||||
when "."
|
||||
au << period_character_class
|
||||
else
|
||||
au << CharacterRangeUnit.new(c)
|
||||
end
|
||||
end
|
||||
au
|
||||
end
|
||||
|
||||
def parse_group
|
||||
au = parse_alternates
|
||||
if @pattern[0] != ")"
|
||||
raise Error.new("Unterminated group in pattern")
|
||||
end
|
||||
@pattern.slice!(0)
|
||||
au
|
||||
end
|
||||
|
||||
def parse_character_class
|
||||
ccu = CharacterClassUnit.new
|
||||
index = 0
|
||||
loop do
|
||||
if @pattern == ""
|
||||
raise Error.new("Unterminated character class")
|
||||
end
|
||||
c = @pattern.slice!(0)
|
||||
if c == "]"
|
||||
break
|
||||
elsif c == "^" && index == 0
|
||||
ccu.negate = true
|
||||
elsif c == "-" && (ccu.size == 0 || @pattern[0] == "]")
|
||||
ccu << CharacterRangeUnit.new(c)
|
||||
elsif c == "\\"
|
||||
ccu << parse_backslash
|
||||
elsif c == "-" && @pattern[0] != "]"
|
||||
begin_cu = ccu.last_unit
|
||||
unless begin_cu.is_a?(CharacterRangeUnit) && begin_cu.range.size == 1
|
||||
raise Error.new("Character range must be between single characters")
|
||||
end
|
||||
if @pattern[0] == "\\"
|
||||
@pattern.slice!(0)
|
||||
end_cu = parse_backslash
|
||||
unless end_cu.is_a?(CharacterRangeUnit) && end_cu.range.size == 1
|
||||
raise Error.new("Character range must be between single characters")
|
||||
end
|
||||
max_code_point = end_cu.code_point
|
||||
else
|
||||
max_code_point = @pattern[0].ord
|
||||
@pattern.slice!(0)
|
||||
end
|
||||
cru = CharacterRangeUnit.new(begin_cu.min_code_point, max_code_point)
|
||||
ccu.replace_last!(cru)
|
||||
else
|
||||
ccu << CharacterRangeUnit.new(c)
|
||||
end
|
||||
index += 1
|
||||
end
|
||||
ccu
|
||||
end
|
||||
|
||||
def parse_curly_count
|
||||
if @pattern =~ /^(\d+)(?:(,)(\d*))?\}(.*)$/
|
||||
min_count, comma, max_count, pattern = $1, $2, $3, $4
|
||||
min_count = min_count.to_i
|
||||
if comma.to_s == ""
|
||||
max_count = min_count
|
||||
elsif max_count.to_s != ""
|
||||
max_count = max_count.to_i
|
||||
if max_count < min_count
|
||||
raise Error.new("Maximum repetition count cannot be less than minimum repetition count")
|
||||
end
|
||||
else
|
||||
max_count = nil
|
||||
end
|
||||
@pattern = pattern
|
||||
[min_count, max_count]
|
||||
else
|
||||
raise Error.new("Unexpected match count at #{@pattern}")
|
||||
end
|
||||
end
|
||||
|
||||
def parse_backslash
|
||||
if @pattern == ""
|
||||
raise Error.new("Error: unfollowed \\")
|
||||
else
|
||||
c = @pattern.slice!(0)
|
||||
case c
|
||||
when "d"
|
||||
CharacterRangeUnit.new("0", "9")
|
||||
else
|
||||
CharacterRangeUnit.new(c)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def period_character_class
|
||||
ccu = CharacterClassUnit.new
|
||||
ccu << CharacterRangeUnit.new(0, "\n".ord - 1)
|
||||
ccu << CharacterRangeUnit.new("\n".ord + 1, 0xFFFFFFFF)
|
||||
ccu
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
173
lib/imbecile/regex/unit.rb
Normal file
173
lib/imbecile/regex/unit.rb
Normal file
@ -0,0 +1,173 @@
|
||||
module Imbecile
|
||||
class Regex
|
||||
|
||||
class Unit
|
||||
end
|
||||
|
||||
class SequenceUnit < Unit
|
||||
attr_accessor :units
|
||||
def initialize
|
||||
@units = []
|
||||
end
|
||||
def method_missing(*args)
|
||||
@units.__send__(*args)
|
||||
end
|
||||
def to_nfa
|
||||
if @units.empty?
|
||||
NFA.empty
|
||||
else
|
||||
@units.map do |unit|
|
||||
unit.to_nfa
|
||||
end.reduce do |result, nfa|
|
||||
result.end_state.add_transition(nil, nfa.start_state)
|
||||
result
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class AlternatesUnit < Unit
|
||||
attr_accessor :alternates
|
||||
def initialize
|
||||
@alternates = []
|
||||
new_alternate!
|
||||
end
|
||||
def new_alternate!
|
||||
@alternates << SequenceUnit.new
|
||||
end
|
||||
def <<(unit)
|
||||
@alternates[-1] << unit
|
||||
end
|
||||
def last_unit
|
||||
@alternates[-1][-1]
|
||||
end
|
||||
def replace_last!(new_unit)
|
||||
@alternates[-1][-1] = new_unit
|
||||
end
|
||||
def to_nfa
|
||||
if @alternates.size == 0
|
||||
NFA.empty
|
||||
elsif @alternates.size == 1
|
||||
@alternates[0].to_nfa
|
||||
else
|
||||
nfa = NFA.new
|
||||
alternate_nfas = @alternates.map do |alternate|
|
||||
alternate.to_nfa
|
||||
end
|
||||
alternate_nfas.each do |alternate_nfa|
|
||||
nfa.start_state.add_transition(nil, alternate_nfa.start_state)
|
||||
alternate_nfa.end_state.add_transition(nil, nfa.end_state)
|
||||
end
|
||||
nfa
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class CharacterRangeUnit < Unit
|
||||
attr_accessor :min_code_point
|
||||
attr_accessor :max_code_point
|
||||
def initialize(c1, c2 = nil)
|
||||
@min_code_point = c1.ord
|
||||
@max_code_point = c2 ? c2.ord : @min_code_point
|
||||
end
|
||||
def range
|
||||
@min_code_point..@max_code_point
|
||||
end
|
||||
def to_nfa
|
||||
nfa = NFA.new
|
||||
nfa.start_state.add_transition(range, nfa.end_state)
|
||||
nfa
|
||||
end
|
||||
end
|
||||
|
||||
class CharacterClassUnit < Unit
|
||||
attr_accessor :units
|
||||
attr_accessor :negate
|
||||
def initialize
|
||||
@units = []
|
||||
@negate = false
|
||||
end
|
||||
def initialize
|
||||
@units = []
|
||||
end
|
||||
def method_missing(*args)
|
||||
@units.__send__(*args)
|
||||
end
|
||||
def last_unit
|
||||
@units[-1]
|
||||
end
|
||||
def replace_last!(new_unit)
|
||||
@units[-1] = new_unit
|
||||
end
|
||||
def to_nfa
|
||||
nfa = NFA.new
|
||||
if @units.empty?
|
||||
nfa.start_state.add_transition(nil, nfa.end_state)
|
||||
else
|
||||
ranges = @units.map(&:range)
|
||||
if @negate
|
||||
ranges = negate_ranges(ranges)
|
||||
end
|
||||
ranges.each do |range|
|
||||
nfa.start_state.add_transition(range, nfa.end_state)
|
||||
end
|
||||
end
|
||||
nfa
|
||||
end
|
||||
private
|
||||
def negate_ranges(ranges)
|
||||
ranges = ranges.sort_by(&:first)
|
||||
new_ranges = []
|
||||
last_cp = -1
|
||||
ranges.each do |range|
|
||||
if range.first > (last_cp + 1)
|
||||
new_ranges << ((last_cp + 1)..(range.first - 1))
|
||||
last_cp = range.last
|
||||
end
|
||||
end
|
||||
if last_cp < 0xFFFFFFFF
|
||||
new_ranges << ((last_cp + 1)..0xFFFFFFFF)
|
||||
end
|
||||
new_ranges
|
||||
end
|
||||
end
|
||||
|
||||
class MultiplicityUnit < Unit
|
||||
attr_accessor :unit
|
||||
attr_accessor :min_count
|
||||
attr_accessor :max_count
|
||||
def initialize(unit, min_count, max_count)
|
||||
@unit = unit
|
||||
@min_count = min_count
|
||||
@max_count = max_count
|
||||
end
|
||||
def to_nfa
|
||||
nfa = NFA.new
|
||||
unit_nfa = @unit.to_nfa
|
||||
nfa.start_state.add_transition(nil, unit_nfa.start_state)
|
||||
if @min_count == 0
|
||||
nfa.start_state.add_transition(nil, nfa.end_state)
|
||||
else
|
||||
(@min_count - 1).times do
|
||||
prev_nfa = unit_nfa
|
||||
unit_nfa = @unit.to_nfa
|
||||
prev_nfa.end_state.add_transition(nil, unit_nfa.start_state)
|
||||
end
|
||||
end
|
||||
unit_nfa.end_state.add_transition(nil, nfa.end_state)
|
||||
if @max_count.nil?
|
||||
unit_nfa.end_state.add_transition(nil, nfa.start_state)
|
||||
else
|
||||
(@max_count - @min_count).times do
|
||||
prev_nfa = unit_nfa
|
||||
unit_nfa = @unit.to_nfa
|
||||
prev_nfa.end_state.add_transition(nil, unit_nfa.start_state)
|
||||
unit_nfa.end_state.add_transition(nil, nfa.end_state)
|
||||
end
|
||||
end
|
||||
nfa
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
@ -1,335 +0,0 @@
|
||||
module Imbecile
|
||||
class Regex
|
||||
RSpec.describe Parser do
|
||||
|
||||
it "parses an empty expression" do
|
||||
parser = Parser.new("")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0].size).to eq 0
|
||||
end
|
||||
|
||||
it "parses a single character unit expression" do
|
||||
parser = Parser.new("a")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a group with a single character unit expression" do
|
||||
parser = Parser.new("(a)")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.alternates.size).to eq 1
|
||||
expect(alt_unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
expect(alt_unit.alternates[0][0]).to be_a Parser::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a *" do
|
||||
parser = Parser.new("a*")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 0
|
||||
expect(m_unit.max_count).to be_nil
|
||||
expect(m_unit.unit).to be_a Parser::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a +" do
|
||||
parser = Parser.new("a+")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 1
|
||||
expect(m_unit.max_count).to be_nil
|
||||
expect(m_unit.unit).to be_a Parser::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a ?" do
|
||||
parser = Parser.new("a?")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 0
|
||||
expect(m_unit.max_count).to eq 1
|
||||
expect(m_unit.unit).to be_a Parser::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a multiplicity count" do
|
||||
parser = Parser.new("a{5}")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 5
|
||||
expect(m_unit.max_count).to eq 5
|
||||
expect(m_unit.unit).to be_a Parser::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a minimum-only multiplicity count" do
|
||||
parser = Parser.new("a{5,}")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 5
|
||||
expect(m_unit.max_count).to be_nil
|
||||
expect(m_unit.unit).to be_a Parser::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a minimum and maximum multiplicity count" do
|
||||
parser = Parser.new("a{5,8}")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 5
|
||||
expect(m_unit.max_count).to eq 8
|
||||
expect(m_unit.unit).to be_a Parser::CharacterRangeUnit
|
||||
expect(m_unit.unit.range.first).to eq "a".ord
|
||||
end
|
||||
|
||||
it "parses an escaped *" do
|
||||
parser = Parser.new("a\\*")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 2
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(seq_unit[0].min_code_point).to eq "a".ord
|
||||
expect(seq_unit[1]).to be_a Parser::CharacterRangeUnit
|
||||
expect(seq_unit[1].min_code_point).to eq "*".ord
|
||||
end
|
||||
|
||||
it "parses an escaped +" do
|
||||
parser = Parser.new("a\\+")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 2
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(seq_unit[0].min_code_point).to eq "a".ord
|
||||
expect(seq_unit[1]).to be_a Parser::CharacterRangeUnit
|
||||
expect(seq_unit[1].min_code_point).to eq "+".ord
|
||||
end
|
||||
|
||||
it "parses an escaped \\" do
|
||||
parser = Parser.new("\\\\d")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 2
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(seq_unit[0].min_code_point).to eq "\\".ord
|
||||
expect(seq_unit[1]).to be_a Parser::CharacterRangeUnit
|
||||
expect(seq_unit[1].min_code_point).to eq "d".ord
|
||||
end
|
||||
|
||||
it "parses a character class" do
|
||||
parser = Parser.new("[a-z_]")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "a".ord
|
||||
expect(ccu[0].max_code_point).to eq "z".ord
|
||||
expect(ccu[1]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[1].min_code_point).to eq "_".ord
|
||||
end
|
||||
|
||||
it "parses a negated character class" do
|
||||
parser = Parser.new("[^xyz]")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_truthy
|
||||
expect(ccu.size).to eq 3
|
||||
expect(ccu[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "x".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at beginning of a character class" do
|
||||
parser = Parser.new("[-9]")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at end of a character class" do
|
||||
parser = Parser.new("[0-]")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "0".ord
|
||||
expect(ccu[1]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[1].min_code_point).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at beginning of a negated character class" do
|
||||
parser = Parser.new("[^-9]")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_truthy
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses . as a plain character in a character class" do
|
||||
parser = Parser.new("[.]")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 1
|
||||
expect(ccu[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq ".".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character when escaped in middle of character class" do
|
||||
parser = Parser.new("[0\\-9]")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
seq_unit = parser.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Parser::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 3
|
||||
expect(ccu[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "0".ord
|
||||
expect(ccu[1]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[1].min_code_point).to eq "-".ord
|
||||
expect(ccu[2]).to be_a Parser::CharacterRangeUnit
|
||||
expect(ccu[2].min_code_point).to eq "9".ord
|
||||
end
|
||||
|
||||
it "parses alternates" do
|
||||
parser = Parser.new("ab|c")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 2
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[1]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[0].size).to eq 2
|
||||
expect(parser.unit.alternates[1].size).to eq 1
|
||||
end
|
||||
|
||||
it "parses a ." do
|
||||
parser = Parser.new("a.b")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 1
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[0][0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(parser.unit.alternates[0][1]).to be_a Parser::CharacterClassUnit
|
||||
expect(parser.unit.alternates[0][1].units.size).to eq 2
|
||||
expect(parser.unit.alternates[0][2]).to be_a Parser::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses something complex" do
|
||||
parser = Parser.new("(a|)*|[^^]|\\|v|[x-y]+")
|
||||
expect(parser.unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates.size).to eq 4
|
||||
expect(parser.unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[0].size).to eq 1
|
||||
expect(parser.unit.alternates[0][0]).to be_a Parser::MultiplicityUnit
|
||||
expect(parser.unit.alternates[0][0].min_count).to eq 0
|
||||
expect(parser.unit.alternates[0][0].max_count).to be_nil
|
||||
expect(parser.unit.alternates[0][0].unit).to be_a Parser::AlternatesUnit
|
||||
expect(parser.unit.alternates[0][0].unit.alternates.size).to eq 2
|
||||
expect(parser.unit.alternates[0][0].unit.alternates[0]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[0][0].unit.alternates[0].size).to eq 1
|
||||
expect(parser.unit.alternates[0][0].unit.alternates[0][0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(parser.unit.alternates[0][0].unit.alternates[1]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[0][0].unit.alternates[1].size).to eq 0
|
||||
expect(parser.unit.alternates[1]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[1].size).to eq 1
|
||||
expect(parser.unit.alternates[1][0]).to be_a Parser::CharacterClassUnit
|
||||
expect(parser.unit.alternates[1][0].negate).to be_truthy
|
||||
expect(parser.unit.alternates[1][0].size).to eq 1
|
||||
expect(parser.unit.alternates[1][0][0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(parser.unit.alternates[2]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[2].size).to eq 2
|
||||
expect(parser.unit.alternates[2][0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(parser.unit.alternates[2][0].min_code_point).to eq "|".ord
|
||||
expect(parser.unit.alternates[2][1]).to be_a Parser::CharacterRangeUnit
|
||||
expect(parser.unit.alternates[2][1].min_code_point).to eq "v".ord
|
||||
expect(parser.unit.alternates[3]).to be_a Parser::SequenceUnit
|
||||
expect(parser.unit.alternates[3].size).to eq 1
|
||||
expect(parser.unit.alternates[3][0]).to be_a Parser::MultiplicityUnit
|
||||
expect(parser.unit.alternates[3][0].min_count).to eq 1
|
||||
expect(parser.unit.alternates[3][0].max_count).to be_nil
|
||||
expect(parser.unit.alternates[3][0].unit).to be_a Parser::CharacterClassUnit
|
||||
expect(parser.unit.alternates[3][0].unit.size).to eq 1
|
||||
expect(parser.unit.alternates[3][0].unit[0]).to be_a Parser::CharacterRangeUnit
|
||||
expect(parser.unit.alternates[3][0].unit[0].min_code_point).to eq "x".ord
|
||||
expect(parser.unit.alternates[3][0].unit[0].max_code_point).to eq "y".ord
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
333
spec/imbecile/regex_spec.rb
Normal file
333
spec/imbecile/regex_spec.rb
Normal file
@ -0,0 +1,333 @@
|
||||
module Imbecile
|
||||
RSpec.describe Regex do
|
||||
|
||||
it "parses an empty expression" do
|
||||
regex = Regex.new("")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0].size).to eq 0
|
||||
end
|
||||
|
||||
it "parses a single character unit expression" do
|
||||
regex = Regex.new("a")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a group with a single character unit expression" do
|
||||
regex = Regex.new("(a)")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::AlternatesUnit
|
||||
alt_unit = seq_unit[0]
|
||||
expect(alt_unit.alternates.size).to eq 1
|
||||
expect(alt_unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(alt_unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a *" do
|
||||
regex = Regex.new("a*")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 0
|
||||
expect(m_unit.max_count).to be_nil
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a +" do
|
||||
regex = Regex.new("a+")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 1
|
||||
expect(m_unit.max_count).to be_nil
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a ?" do
|
||||
regex = Regex.new("a?")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 0
|
||||
expect(m_unit.max_count).to eq 1
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a multiplicity count" do
|
||||
regex = Regex.new("a{5}")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 5
|
||||
expect(m_unit.max_count).to eq 5
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a minimum-only multiplicity count" do
|
||||
regex = Regex.new("a{5,}")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 5
|
||||
expect(m_unit.max_count).to be_nil
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses a minimum and maximum multiplicity count" do
|
||||
regex = Regex.new("a{5,8}")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::MultiplicityUnit
|
||||
m_unit = seq_unit[0]
|
||||
expect(m_unit.min_count).to eq 5
|
||||
expect(m_unit.max_count).to eq 8
|
||||
expect(m_unit.unit).to be_a Regex::CharacterRangeUnit
|
||||
expect(m_unit.unit.range.first).to eq "a".ord
|
||||
end
|
||||
|
||||
it "parses an escaped *" do
|
||||
regex = Regex.new("a\\*")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 2
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[0].min_code_point).to eq "a".ord
|
||||
expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[1].min_code_point).to eq "*".ord
|
||||
end
|
||||
|
||||
it "parses an escaped +" do
|
||||
regex = Regex.new("a\\+")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 2
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[0].min_code_point).to eq "a".ord
|
||||
expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[1].min_code_point).to eq "+".ord
|
||||
end
|
||||
|
||||
it "parses an escaped \\" do
|
||||
regex = Regex.new("\\\\d")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 2
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[0].min_code_point).to eq "\\".ord
|
||||
expect(seq_unit[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(seq_unit[1].min_code_point).to eq "d".ord
|
||||
end
|
||||
|
||||
it "parses a character class" do
|
||||
regex = Regex.new("[a-z_]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "a".ord
|
||||
expect(ccu[0].max_code_point).to eq "z".ord
|
||||
expect(ccu[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[1].min_code_point).to eq "_".ord
|
||||
end
|
||||
|
||||
it "parses a negated character class" do
|
||||
regex = Regex.new("[^xyz]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_truthy
|
||||
expect(ccu.size).to eq 3
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "x".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at beginning of a character class" do
|
||||
regex = Regex.new("[-9]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at end of a character class" do
|
||||
regex = Regex.new("[0-]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "0".ord
|
||||
expect(ccu[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[1].min_code_point).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character at beginning of a negated character class" do
|
||||
regex = Regex.new("[^-9]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_truthy
|
||||
expect(ccu.size).to eq 2
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "-".ord
|
||||
end
|
||||
|
||||
it "parses . as a plain character in a character class" do
|
||||
regex = Regex.new("[.]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 1
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq ".".ord
|
||||
end
|
||||
|
||||
it "parses - as a plain character when escaped in middle of character class" do
|
||||
regex = Regex.new("[0\\-9]")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
seq_unit = regex.unit.alternates[0]
|
||||
expect(seq_unit.size).to eq 1
|
||||
expect(seq_unit[0]).to be_a Regex::CharacterClassUnit
|
||||
ccu = seq_unit[0]
|
||||
expect(ccu.negate).to be_falsey
|
||||
expect(ccu.size).to eq 3
|
||||
expect(ccu[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[0].min_code_point).to eq "0".ord
|
||||
expect(ccu[1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[1].min_code_point).to eq "-".ord
|
||||
expect(ccu[2]).to be_a Regex::CharacterRangeUnit
|
||||
expect(ccu[2].min_code_point).to eq "9".ord
|
||||
end
|
||||
|
||||
it "parses alternates" do
|
||||
regex = Regex.new("ab|c")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 2
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0].size).to eq 2
|
||||
expect(regex.unit.alternates[1].size).to eq 1
|
||||
end
|
||||
|
||||
it "parses a ." do
|
||||
regex = Regex.new("a.b")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 1
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[0][1]).to be_a Regex::CharacterClassUnit
|
||||
expect(regex.unit.alternates[0][1].units.size).to eq 2
|
||||
expect(regex.unit.alternates[0][2]).to be_a Regex::CharacterRangeUnit
|
||||
end
|
||||
|
||||
it "parses something complex" do
|
||||
regex = Regex.new("(a|)*|[^^]|\\|v|[x-y]+")
|
||||
expect(regex.unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates.size).to eq 4
|
||||
expect(regex.unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0].size).to eq 1
|
||||
expect(regex.unit.alternates[0][0]).to be_a Regex::MultiplicityUnit
|
||||
expect(regex.unit.alternates[0][0].min_count).to eq 0
|
||||
expect(regex.unit.alternates[0][0].max_count).to be_nil
|
||||
expect(regex.unit.alternates[0][0].unit).to be_a Regex::AlternatesUnit
|
||||
expect(regex.unit.alternates[0][0].unit.alternates.size).to eq 2
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[0]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[0].size).to eq 1
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[0][0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[1]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[0][0].unit.alternates[1].size).to eq 0
|
||||
expect(regex.unit.alternates[1]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[1].size).to eq 1
|
||||
expect(regex.unit.alternates[1][0]).to be_a Regex::CharacterClassUnit
|
||||
expect(regex.unit.alternates[1][0].negate).to be_truthy
|
||||
expect(regex.unit.alternates[1][0].size).to eq 1
|
||||
expect(regex.unit.alternates[1][0][0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[2]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[2].size).to eq 2
|
||||
expect(regex.unit.alternates[2][0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[2][0].min_code_point).to eq "|".ord
|
||||
expect(regex.unit.alternates[2][1]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[2][1].min_code_point).to eq "v".ord
|
||||
expect(regex.unit.alternates[3]).to be_a Regex::SequenceUnit
|
||||
expect(regex.unit.alternates[3].size).to eq 1
|
||||
expect(regex.unit.alternates[3][0]).to be_a Regex::MultiplicityUnit
|
||||
expect(regex.unit.alternates[3][0].min_count).to eq 1
|
||||
expect(regex.unit.alternates[3][0].max_count).to be_nil
|
||||
expect(regex.unit.alternates[3][0].unit).to be_a Regex::CharacterClassUnit
|
||||
expect(regex.unit.alternates[3][0].unit.size).to eq 1
|
||||
expect(regex.unit.alternates[3][0].unit[0]).to be_a Regex::CharacterRangeUnit
|
||||
expect(regex.unit.alternates[3][0].unit[0].min_code_point).to eq "x".ord
|
||||
expect(regex.unit.alternates[3][0].unit[0].max_code_point).to eq "y".ord
|
||||
end
|
||||
|
||||
end
|
||||
end
|
Loading…
x
Reference in New Issue
Block a user