propane/lib/imbecile.rb

138 lines
3.6 KiB
Ruby

require "erb"
require "set"
require_relative "imbecile/cli"
require_relative "imbecile/code_point_range"
require_relative "imbecile/fa"
require_relative "imbecile/fa/state"
require_relative "imbecile/fa/state/transition"
require_relative "imbecile/lexer"
require_relative "imbecile/lexer/dfa"
require_relative "imbecile/parser"
require_relative "imbecile/parser/item"
require_relative "imbecile/parser/item_set"
require_relative "imbecile/regex"
require_relative "imbecile/regex/nfa"
require_relative "imbecile/regex/unit"
require_relative "imbecile/rule"
require_relative "imbecile/token"
require_relative "imbecile/version"
class Imbecile
# EOF.
TOKEN_EOF = 0xFFFFFFFC
# Decoding error.
TOKEN_DECODE_ERROR = 0xFFFFFFFD
# Token ID for a "dropped" token.
TOKEN_DROP = 0xFFFFFFFE
# Invalid token ID.
TOKEN_NONE = 0xFFFFFFFF
class Error < RuntimeError
end
def initialize(input)
@tokens = {}
@rules = {}
input = input.gsub("\r\n", "\n")
while !input.empty?
parse_grammar(input)
end
end
def generate(output_file, log_file)
expand_rules
lexer = Lexer.new(@tokens)
parser = Parser.new(@tokens, @rules)
classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), nil, "<>")
result = erb.result(binding.clone)
File.open(output_file, "wb") do |fh|
fh.write(result)
end
end
private
def parse_grammar(input)
if input.slice!(/\A\s+/)
# Skip white space.
elsif input.slice!(/\A#.*\n/)
# Skip comment lines.
elsif input.slice!(/\Amodule\s+(\S+)\n/)
@modulename = $1
elsif input.slice!(/\Aclass\s+(\S+)\n/)
@classname = $1
elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
name, pattern = $1, $2
if pattern.nil?
pattern = name
end
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name}")
end
if @tokens[name]
raise Error.new("Duplicate token name #{name}")
else
@tokens[name] = Token.new(name, pattern, @tokens.size)
end
elsif input.slice!(/\Adrop\s+(\S+)\n/)
pattern = $1
@tokens[name] = Token.new(nil, pattern, @tokens.size)
elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
rule_name, components, code = $1, $2, $3
components = components.strip.split(/\s+/)
@rules[rule_name] ||= Rule.new(rule_name, @rules.size)
@rules[rule_name].add_pattern(components, code)
else
if input.size > 25
input = input.slice(0..20) + "..."
end
raise Error.new("Unexpected grammar input: #{input}")
end
end
def expand_rules
@rules.each do |rule_name, rule|
if @tokens.include?(rule_name)
raise Error.new("Rule name collides with token name #{rule_name}")
end
end
unless @rules["Start"]
raise Error.new("Start rule not found")
end
@rules.each do |rule_name, rule|
rule.patterns.each do |rule|
rule.components.map! do |component|
if @tokens[component]
@tokens[component]
elsif @rules[component]
@rules[component]
else
raise Error.new("Symbol #{component} not found")
end
end
end
end
end
class << self
def run(input_file, output_file, log_file)
begin
imbecile = Imbecile.new(File.read(input_file))
imbecile.generate(output_file, log_file)
rescue Error => e
$stderr.puts e.message
return 2
end
return 0
end
end
end