Combine Grammar and Generator into top-level Imbecile class

This commit is contained in:
Josh Holtrop 2021-08-22 21:04:46 -04:00
parent 9273bfccf6
commit 00016f16b3
20 changed files with 96 additions and 133 deletions

View File

@ -1,12 +1,12 @@
<% if @grammar.modulename %> <% if @modulename %>
module <%= @grammar.modulename %>; module <%= @modulename %>;
<% end %> <% end %>
class <%= classname %> class <%= classname %>
{ {
enum enum
{ {
<% @grammar.tokens.each_with_index do |token, index| %> <% @tokens.each_with_index do |token, index| %>
<% if token.name %> <% if token.name %>
TOKEN_<%= token.c_name %> = <%= index %>, TOKEN_<%= token.c_name %> = <%= index %>,
<% end %> <% end %>
@ -18,7 +18,7 @@ class <%= classname %>
} }
static immutable string TokenNames[] = [ static immutable string TokenNames[] = [
<% @grammar.tokens.each_with_index do |token, index| %> <% @tokens.each_with_index do |token, index| %>
<% if token.name %> <% if token.name %>
"<%= token.name %>", "<%= token.name %>",
<% else %> <% else %>

View File

@ -5,8 +5,6 @@ require_relative "imbecile/code_point_range"
require_relative "imbecile/fa" require_relative "imbecile/fa"
require_relative "imbecile/fa/state" require_relative "imbecile/fa/state"
require_relative "imbecile/fa/state/transition" require_relative "imbecile/fa/state/transition"
require_relative "imbecile/generator"
require_relative "imbecile/grammar"
require_relative "imbecile/lexer" require_relative "imbecile/lexer"
require_relative "imbecile/lexer/dfa" require_relative "imbecile/lexer/dfa"
require_relative "imbecile/regex" require_relative "imbecile/regex"
@ -16,7 +14,7 @@ require_relative "imbecile/rule"
require_relative "imbecile/token" require_relative "imbecile/token"
require_relative "imbecile/version" require_relative "imbecile/version"
module Imbecile class Imbecile
# EOF. # EOF.
TOKEN_EOF = 0xFFFFFFFC TOKEN_EOF = 0xFFFFFFFC
@ -33,13 +31,82 @@ module Imbecile
class Error < RuntimeError class Error < RuntimeError
end end
def initialize(input)
@tokens = []
@rules = []
input = input.gsub("\r\n", "\n")
while !input.empty?
parse_grammar(input)
end
end
def generate(output_file, log_file)
token_names = @tokens.each_with_object({}) do |token, token_names|
if token_names.include?(token.name)
raise Error.new("Duplicate token name #{token.name}")
end
token_names[token.name] = token
end
rule_names = @rules.each_with_object({}) do |rule, rule_names|
if token_names.include?(rule.name)
raise Error.new("Rule name collides with token name #{rule.name}")
end
rule_names[rule.name] ||= []
rule_names[rule.name] << rule
end
unless rule_names["Start"]
raise Error.new("Start rule not found")
end
lexer = Lexer.new(@tokens)
classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), nil, "<>")
result = erb.result(binding.clone)
File.open(output_file, "wb") do |fh|
fh.write(result)
end
end
private
def parse_grammar(input)
if input.slice!(/\A\s+/)
# Skip white space.
elsif input.slice!(/\A#.*\n/)
# Skip comment lines.
elsif input.slice!(/\Amodule\s+(\S+)\n/)
@modulename = $1
elsif input.slice!(/\Aclass\s+(\S+)\n/)
@classname = $1
elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
name, pattern = $1, $2
if pattern.nil?
pattern = name
end
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name}")
end
@tokens << Token.new(name, pattern, @tokens.size)
elsif input.slice!(/\Adrop\s+(\S+)\n/)
pattern = $1
@tokens << Token.new(nil, pattern, @tokens.size)
elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
rule_name, rule, code = $1, $2, $3
rule = rule.strip.split(/\s+/)
@rules << Rule.new(rule_name, rule, code)
else
if input.size > 25
input = input.slice(0..20) + "..."
end
raise Error.new("Unexpected grammar input: #{input}")
end
end
class << self class << self
def run(input_file, output_file, log_file) def run(input_file, output_file, log_file)
begin begin
grammar = Grammar.new(File.read(input_file)) imbecile = Imbecile.new(File.read(input_file))
generator = Generator.new(grammar, log_file) imbecile.generate(output_file, log_file)
generator.generate(output_file)
rescue Error => e rescue Error => e
$stderr.puts e.message $stderr.puts e.message
return 2 return 2

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
module CLI module CLI
USAGE = <<EOF USAGE = <<EOF

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class CodePointRange class CodePointRange
MAX_CODE_POINT = 0xFFFFFFFF MAX_CODE_POINT = 0xFFFFFFFF

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class FA class FA

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class FA class FA
class State class State

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class FA class FA
class State class State

View File

@ -1,39 +0,0 @@
module Imbecile
# Class to generate the parser generator source.
class Generator
def initialize(grammar, log_file)
@grammar = grammar
@log_file = log_file
end
def generate(output_file)
token_names = @grammar.tokens.each_with_object({}) do |token, token_names|
if token_names.include?(token.name)
raise Error.new("Duplicate token name #{token.name}")
end
token_names[token.name] = token
end
rule_names = @grammar.rules.each_with_object({}) do |rule, rule_names|
if token_names.include?(rule.name)
raise Error.new("Rule name collides with token name #{rule.name}")
end
rule_names[rule.name] ||= []
rule_names[rule.name] << rule
end
unless rule_names["Start"]
raise Error.new("Start rule not found")
end
lexer = Lexer.new(@grammar)
classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), nil, "<>")
result = erb.result(binding.clone)
File.open(output_file, "wb") do |fh|
fh.write(result)
end
end
end
end

View File

@ -1,65 +0,0 @@
module Imbecile
class Grammar
# @return [String, nil]
# Module name.
attr_reader :modulename
# @return [String, nil]
# Class name.
attr_reader :classname
# @return [Array<Token>]
# Tokens.
attr_reader :tokens
# @return [Array<Rule>]
# Rules.
attr_reader :rules
def initialize(input)
@tokens = []
@rules = []
input = input.gsub("\r\n", "\n")
while !input.empty?
consume(input)
end
end
private
def consume(input)
if input.slice!(/\A\s+/)
# Skip white space.
elsif input.slice!(/\A#.*\n/)
# Skip comment lines.
elsif input.slice!(/\Amodule\s+(\S+)\n/)
@modulename = $1
elsif input.slice!(/\Aclass\s+(\S+)\n/)
@classname = $1
elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
name, pattern = $1, $2
if pattern.nil?
pattern = name
end
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name}")
end
@tokens << Token.new(name, pattern, @tokens.size)
elsif input.slice!(/\Adrop\s+(\S+)\n/)
pattern = $1
@tokens << Token.new(nil, pattern, @tokens.size)
elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
rule_name, rule, code = $1, $2, $3
rule = rule.strip.split(/\s+/)
@rules << Rule.new(rule_name, rule, code)
else
if input.size > 25
input = input.slice(0..20) + "..."
end
raise Error.new("Unexpected grammar input: #{input}")
end
end
end
end

View File

@ -1,12 +1,12 @@
module Imbecile class Imbecile
class Lexer class Lexer
# @return [DFA] # @return [DFA]
# Lexer DFA. # Lexer DFA.
attr_accessor :dfa attr_accessor :dfa
def initialize(grammar) def initialize(tokens)
@dfa = DFA.new(grammar.tokens) @dfa = DFA.new(tokens)
end end
end end

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class Lexer class Lexer
class DFA < FA class DFA < FA

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class Regex class Regex
attr_reader :unit attr_reader :unit

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class Regex class Regex
class NFA < FA class NFA < FA

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class Regex class Regex
class Unit class Unit

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class Rule class Rule

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
class Token class Token

View File

@ -1,3 +1,3 @@
module Imbecile class Imbecile
VERSION = "0.1.0" VERSION = "0.1.0"
end end

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
describe CodePointRange do describe CodePointRange do
describe "#<=>" do describe "#<=>" do

View File

@ -50,8 +50,8 @@ class TestLexer
end end
def run(grammar, input) def run(grammar, input)
g = Imbecile::Grammar.new(grammar) imbecile = Imbecile.new(grammar)
token_dfa = Imbecile::Lexer::DFA.new(g.tokens) token_dfa = Imbecile::Lexer::DFA.new(imbecile.instance_variable_get(:@tokens))
test_lexer = TestLexer.new(token_dfa) test_lexer = TestLexer.new(token_dfa)
test_lexer.lex(input) test_lexer.lex(input)
end end

View File

@ -1,4 +1,4 @@
module Imbecile class Imbecile
RSpec.describe Regex do RSpec.describe Regex do
it "parses an empty expression" do it "parses an empty expression" do