Parse grammar input by multiline regex

This commit is contained in:
Josh Holtrop 2021-06-12 22:57:32 -04:00
parent 03035a25a5
commit 9d05861819

View File

@ -15,37 +15,45 @@ module Imbecile
def initialize(input) def initialize(input)
@tokens = [] @tokens = []
token_names = Set.new @token_names = Set.new
input.each_line.each_with_index do |line, line_index| input = input.gsub("\r\n", "\n")
line = line.chomp while !input.empty?
line_number = line_index + 1 consume(input)
if line =~ /^\s*#/ end
end
private
def consume(input)
if input.slice!(/\A\s+/)
# Skip white space.
elsif input.slice!(/\A#.*\n/)
# Skip comment lines. # Skip comment lines.
elsif line =~ /^\s*$/ elsif input.slice!(/\Amodule\s+(\S+)\n/)
# Skip blank lines.
elsif line =~ /^\s*module\s+(\S+)$/
@modulename = $1 @modulename = $1
elsif line =~ /^\s*class\s+(\S+)$/ elsif input.slice!(/\Aclass\s+(\S+)\n/)
@classname = $1 @classname = $1
elsif line =~ /^\s*token\s+(\S+)(?:\s+(\S+))?$/ elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
name, pattern = $1, $2 name, pattern = $1, $2
if pattern.to_s == "" if pattern.nil?
pattern = name pattern = name
end end
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name} on line #{line_number}") raise Error.new("Invalid token name #{name}")
end end
if token_names.include?(name) if @token_names.include?(name)
raise Error.new("Duplicate token name #{name} on line #{line_number}") raise Error.new("Duplicate token name #{name}")
end end
@tokens << Token.new(name, pattern, @tokens.size) @tokens << Token.new(name, pattern, @tokens.size)
token_names << name @token_names << name
elsif line =~ /^\s*drop\s+(\S+)$/ elsif input.slice!(/\Adrop\s+(\S+)\n/)
pattern = $1 pattern = $1
@tokens << Token.new(nil, pattern, @tokens.size) @tokens << Token.new(nil, pattern, @tokens.size)
else else
raise Error.new("Unexpected input on line #{line_number}: #{line}") if input.size > 25
input = input.slice(0..20) + "..."
end end
raise Error.new("Unexpected grammar input: #{input}")
end end
end end