Parse grammar input by multiline regex

This commit is contained in:
Josh Holtrop 2021-06-12 22:57:32 -04:00
parent 03035a25a5
commit 9d05861819

View File

@ -15,37 +15,45 @@ module Imbecile
def initialize(input) def initialize(input)
@tokens = [] @tokens = []
token_names = Set.new @token_names = Set.new
input.each_line.each_with_index do |line, line_index| input = input.gsub("\r\n", "\n")
line = line.chomp while !input.empty?
line_number = line_index + 1 consume(input)
if line =~ /^\s*#/ end
# Skip comment lines. end
elsif line =~ /^\s*$/
# Skip blank lines. private
elsif line =~ /^\s*module\s+(\S+)$/
@modulename = $1 def consume(input)
elsif line =~ /^\s*class\s+(\S+)$/ if input.slice!(/\A\s+/)
@classname = $1 # Skip white space.
elsif line =~ /^\s*token\s+(\S+)(?:\s+(\S+))?$/ elsif input.slice!(/\A#.*\n/)
name, pattern = $1, $2 # Skip comment lines.
if pattern.to_s == "" elsif input.slice!(/\Amodule\s+(\S+)\n/)
pattern = name @modulename = $1
end elsif input.slice!(/\Aclass\s+(\S+)\n/)
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ @classname = $1
raise Error.new("Invalid token name #{name} on line #{line_number}") elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
end name, pattern = $1, $2
if token_names.include?(name) if pattern.nil?
raise Error.new("Duplicate token name #{name} on line #{line_number}") pattern = name
end
@tokens << Token.new(name, pattern, @tokens.size)
token_names << name
elsif line =~ /^\s*drop\s+(\S+)$/
pattern = $1
@tokens << Token.new(nil, pattern, @tokens.size)
else
raise Error.new("Unexpected input on line #{line_number}: #{line}")
end end
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name}")
end
if @token_names.include?(name)
raise Error.new("Duplicate token name #{name}")
end
@tokens << Token.new(name, pattern, @tokens.size)
@token_names << name
elsif input.slice!(/\Adrop\s+(\S+)\n/)
pattern = $1
@tokens << Token.new(nil, pattern, @tokens.size)
else
if input.size > 25
input = input.slice(0..20) + "..."
end
raise Error.new("Unexpected grammar input: #{input}")
end end
end end