From 00016f16b36d15a182d32841d43f8252da9905a4 Mon Sep 17 00:00:00 2001
From: Josh Holtrop <jholtrop@gmail.com>
Date: Sun, 22 Aug 2021 21:04:46 -0400
Subject: [PATCH] Combine Grammar and Generator into top-level Imbecile class

---
 assets/parser.d.erb                    |  8 +--
 lib/imbecile.rb                        | 79 ++++++++++++++++++++++++--
 lib/imbecile/cli.rb                    |  2 +-
 lib/imbecile/code_point_range.rb       |  2 +-
 lib/imbecile/fa.rb                     |  2 +-
 lib/imbecile/fa/state.rb               |  2 +-
 lib/imbecile/fa/state/transition.rb    |  2 +-
 lib/imbecile/generator.rb              | 39 -------------
 lib/imbecile/grammar.rb                | 65 ---------------------
 lib/imbecile/lexer.rb                  |  6 +-
 lib/imbecile/lexer/dfa.rb              |  2 +-
 lib/imbecile/regex.rb                  |  2 +-
 lib/imbecile/regex/nfa.rb              |  2 +-
 lib/imbecile/regex/unit.rb             |  2 +-
 lib/imbecile/rule.rb                   |  2 +-
 lib/imbecile/token.rb                  |  2 +-
 lib/imbecile/version.rb                |  2 +-
 spec/imbecile/code_point_range_spec.rb |  2 +-
 spec/imbecile/lexer/dfa_spec.rb        |  4 +-
 spec/imbecile/regex_spec.rb            |  2 +-
 20 files changed, 96 insertions(+), 133 deletions(-)
 delete mode 100644 lib/imbecile/generator.rb
 delete mode 100644 lib/imbecile/grammar.rb

diff --git a/assets/parser.d.erb b/assets/parser.d.erb
index d258713..fd04ec5 100644
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@@ -1,12 +1,12 @@
-<% if @grammar.modulename %>
-module <%= @grammar.modulename %>;
+<% if @modulename %>
+module <%= @modulename %>;
 
 <% end %>
 class <%= classname %>
 {
     enum
     {
-<% @grammar.tokens.each_with_index do |token, index| %>
+<% @tokens.each_with_index do |token, index| %>
 <%   if token.name %>
         TOKEN_<%= token.c_name %> = <%= index %>,
 <%   end %>
@@ -18,7 +18,7 @@ class <%= classname %>
     }
 
     static immutable string TokenNames[] = [
-<% @grammar.tokens.each_with_index do |token, index| %>
+<% @tokens.each_with_index do |token, index| %>
 <%   if token.name %>
         "<%= token.name %>",
 <%   else %>
diff --git a/lib/imbecile.rb b/lib/imbecile.rb
index 1cf254d..4672670 100644
--- a/lib/imbecile.rb
+++ b/lib/imbecile.rb
@@ -5,8 +5,6 @@ require_relative "imbecile/code_point_range"
 require_relative "imbecile/fa"
 require_relative "imbecile/fa/state"
 require_relative "imbecile/fa/state/transition"
-require_relative "imbecile/generator"
-require_relative "imbecile/grammar"
 require_relative "imbecile/lexer"
 require_relative "imbecile/lexer/dfa"
 require_relative "imbecile/regex"
@@ -16,7 +14,7 @@ require_relative "imbecile/rule"
 require_relative "imbecile/token"
 require_relative "imbecile/version"
 
-module Imbecile
+class Imbecile
 
   # EOF.
   TOKEN_EOF = 0xFFFFFFFC
@@ -33,13 +31,82 @@ module Imbecile
   class Error < RuntimeError
   end
 
+  def initialize(input)
+    @tokens = []
+    @rules = []
+    input = input.gsub("\r\n", "\n")
+    while !input.empty?
+      parse_grammar(input)
+    end
+  end
+
+  def generate(output_file, log_file)
+    token_names = @tokens.each_with_object({}) do |token, token_names|
+      if token_names.include?(token.name)
+        raise Error.new("Duplicate token name #{token.name}")
+      end
+      token_names[token.name] = token
+    end
+    rule_names = @rules.each_with_object({}) do |rule, rule_names|
+      if token_names.include?(rule.name)
+        raise Error.new("Rule name collides with token name #{rule.name}")
+      end
+      rule_names[rule.name] ||= []
+      rule_names[rule.name] << rule
+    end
+    unless rule_names["Start"]
+      raise Error.new("Start rule not found")
+    end
+    lexer = Lexer.new(@tokens)
+    classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
+    erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), nil, "<>")
+    result = erb.result(binding.clone)
+    File.open(output_file, "wb") do |fh|
+      fh.write(result)
+    end
+  end
+
+  private
+
+  def parse_grammar(input)
+    if input.slice!(/\A\s+/)
+      # Skip white space.
+    elsif input.slice!(/\A#.*\n/)
+      # Skip comment lines.
+    elsif input.slice!(/\Amodule\s+(\S+)\n/)
+      @modulename = $1
+    elsif input.slice!(/\Aclass\s+(\S+)\n/)
+      @classname = $1
+    elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
+      name, pattern = $1, $2
+      if pattern.nil?
+        pattern = name
+      end
+      unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
+        raise Error.new("Invalid token name #{name}")
+      end
+      @tokens << Token.new(name, pattern, @tokens.size)
+    elsif input.slice!(/\Adrop\s+(\S+)\n/)
+      pattern = $1
+      @tokens << Token.new(nil, pattern, @tokens.size)
+    elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
+      rule_name, rule, code = $1, $2, $3
+      rule = rule.strip.split(/\s+/)
+      @rules << Rule.new(rule_name, rule, code)
+    else
+      if input.size > 25
+        input = input.slice(0..20) + "..."
+      end
+      raise Error.new("Unexpected grammar input: #{input}")
+    end
+  end
+
   class << self
 
     def run(input_file, output_file, log_file)
       begin
-        grammar = Grammar.new(File.read(input_file))
-        generator = Generator.new(grammar, log_file)
-        generator.generate(output_file)
+        imbecile = Imbecile.new(File.read(input_file))
+        imbecile.generate(output_file, log_file)
       rescue Error => e
         $stderr.puts e.message
         return 2
diff --git a/lib/imbecile/cli.rb b/lib/imbecile/cli.rb
index 46ecaee..66b0679 100644
--- a/lib/imbecile/cli.rb
+++ b/lib/imbecile/cli.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   module CLI
 
     USAGE = <<EOF
diff --git a/lib/imbecile/code_point_range.rb b/lib/imbecile/code_point_range.rb
index b1d9a96..a83a4aa 100644
--- a/lib/imbecile/code_point_range.rb
+++ b/lib/imbecile/code_point_range.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   class CodePointRange
 
     MAX_CODE_POINT = 0xFFFFFFFF
diff --git a/lib/imbecile/fa.rb b/lib/imbecile/fa.rb
index 0909358..63ce8d7 100644
--- a/lib/imbecile/fa.rb
+++ b/lib/imbecile/fa.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
 
   class FA
 
diff --git a/lib/imbecile/fa/state.rb b/lib/imbecile/fa/state.rb
index 1373cbc..e1bb44b 100644
--- a/lib/imbecile/fa/state.rb
+++ b/lib/imbecile/fa/state.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   class FA
 
     class State
diff --git a/lib/imbecile/fa/state/transition.rb b/lib/imbecile/fa/state/transition.rb
index bd07671..8a6a1f5 100644
--- a/lib/imbecile/fa/state/transition.rb
+++ b/lib/imbecile/fa/state/transition.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   class FA
     class State
 
diff --git a/lib/imbecile/generator.rb b/lib/imbecile/generator.rb
deleted file mode 100644
index 1390a6a..0000000
--- a/lib/imbecile/generator.rb
+++ /dev/null
@@ -1,39 +0,0 @@
-module Imbecile
-
-  # Class to generate the parser generator source.
-  class Generator
-
-    def initialize(grammar, log_file)
-      @grammar = grammar
-      @log_file = log_file
-    end
-
-    def generate(output_file)
-      token_names = @grammar.tokens.each_with_object({}) do |token, token_names|
-        if token_names.include?(token.name)
-          raise Error.new("Duplicate token name #{token.name}")
-        end
-        token_names[token.name] = token
-      end
-      rule_names = @grammar.rules.each_with_object({}) do |rule, rule_names|
-        if token_names.include?(rule.name)
-          raise Error.new("Rule name collides with token name #{rule.name}")
-        end
-        rule_names[rule.name] ||= []
-        rule_names[rule.name] << rule
-      end
-      unless rule_names["Start"]
-        raise Error.new("Start rule not found")
-      end
-      lexer = Lexer.new(@grammar)
-      classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
-      erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), nil, "<>")
-      result = erb.result(binding.clone)
-      File.open(output_file, "wb") do |fh|
-        fh.write(result)
-      end
-    end
-
-  end
-
-end
diff --git a/lib/imbecile/grammar.rb b/lib/imbecile/grammar.rb
deleted file mode 100644
index 3581204..0000000
--- a/lib/imbecile/grammar.rb
+++ /dev/null
@@ -1,65 +0,0 @@
-module Imbecile
-  class Grammar
-
-    # @return [String, nil]
-    #   Module name.
-    attr_reader :modulename
-
-    # @return [String, nil]
-    #   Class name.
-    attr_reader :classname
-
-    # @return [Array<Token>]
-    #   Tokens.
-    attr_reader :tokens
-
-    # @return [Array<Rule>]
-    #   Rules.
-    attr_reader :rules
-
-    def initialize(input)
-      @tokens = []
-      @rules = []
-      input = input.gsub("\r\n", "\n")
-      while !input.empty?
-        consume(input)
-      end
-    end
-
-    private
-
-    def consume(input)
-      if input.slice!(/\A\s+/)
-        # Skip white space.
-      elsif input.slice!(/\A#.*\n/)
-        # Skip comment lines.
-      elsif input.slice!(/\Amodule\s+(\S+)\n/)
-        @modulename = $1
-      elsif input.slice!(/\Aclass\s+(\S+)\n/)
-        @classname = $1
-      elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
-        name, pattern = $1, $2
-        if pattern.nil?
-          pattern = name
-        end
-        unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
-          raise Error.new("Invalid token name #{name}")
-        end
-        @tokens << Token.new(name, pattern, @tokens.size)
-      elsif input.slice!(/\Adrop\s+(\S+)\n/)
-        pattern = $1
-        @tokens << Token.new(nil, pattern, @tokens.size)
-      elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
-        rule_name, rule, code = $1, $2, $3
-        rule = rule.strip.split(/\s+/)
-        @rules << Rule.new(rule_name, rule, code)
-      else
-        if input.size > 25
-          input = input.slice(0..20) + "..."
-        end
-        raise Error.new("Unexpected grammar input: #{input}")
-      end
-    end
-
-  end
-end
diff --git a/lib/imbecile/lexer.rb b/lib/imbecile/lexer.rb
index c139a16..0115ccc 100644
--- a/lib/imbecile/lexer.rb
+++ b/lib/imbecile/lexer.rb
@@ -1,12 +1,12 @@
-module Imbecile
+class Imbecile
   class Lexer
 
     # @return [DFA]
     #   Lexer DFA.
     attr_accessor :dfa
 
-    def initialize(grammar)
-      @dfa = DFA.new(grammar.tokens)
+    def initialize(tokens)
+      @dfa = DFA.new(tokens)
     end
 
   end
diff --git a/lib/imbecile/lexer/dfa.rb b/lib/imbecile/lexer/dfa.rb
index 74903f4..0930fb5 100644
--- a/lib/imbecile/lexer/dfa.rb
+++ b/lib/imbecile/lexer/dfa.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   class Lexer
 
     class DFA < FA
diff --git a/lib/imbecile/regex.rb b/lib/imbecile/regex.rb
index 59b22b7..c62d45c 100644
--- a/lib/imbecile/regex.rb
+++ b/lib/imbecile/regex.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   class Regex
 
     attr_reader :unit
diff --git a/lib/imbecile/regex/nfa.rb b/lib/imbecile/regex/nfa.rb
index 824ed04..4b89c8e 100644
--- a/lib/imbecile/regex/nfa.rb
+++ b/lib/imbecile/regex/nfa.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   class Regex
 
     class NFA < FA
diff --git a/lib/imbecile/regex/unit.rb b/lib/imbecile/regex/unit.rb
index f1d4ff6..db12c2a 100644
--- a/lib/imbecile/regex/unit.rb
+++ b/lib/imbecile/regex/unit.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   class Regex
 
     class Unit
diff --git a/lib/imbecile/rule.rb b/lib/imbecile/rule.rb
index df3c251..c89543e 100644
--- a/lib/imbecile/rule.rb
+++ b/lib/imbecile/rule.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
 
   class Rule
 
diff --git a/lib/imbecile/token.rb b/lib/imbecile/token.rb
index 266ef25..e4283e7 100644
--- a/lib/imbecile/token.rb
+++ b/lib/imbecile/token.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
 
   class Token
 
diff --git a/lib/imbecile/version.rb b/lib/imbecile/version.rb
index 04dbd28..38d1097 100644
--- a/lib/imbecile/version.rb
+++ b/lib/imbecile/version.rb
@@ -1,3 +1,3 @@
-module Imbecile
+class Imbecile
   VERSION = "0.1.0"
 end
diff --git a/spec/imbecile/code_point_range_spec.rb b/spec/imbecile/code_point_range_spec.rb
index e62edad..ec8e2e0 100644
--- a/spec/imbecile/code_point_range_spec.rb
+++ b/spec/imbecile/code_point_range_spec.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   describe CodePointRange do
 
     describe "#<=>" do
diff --git a/spec/imbecile/lexer/dfa_spec.rb b/spec/imbecile/lexer/dfa_spec.rb
index e8b2557..d692e0e 100644
--- a/spec/imbecile/lexer/dfa_spec.rb
+++ b/spec/imbecile/lexer/dfa_spec.rb
@@ -50,8 +50,8 @@ class TestLexer
 end
 
 def run(grammar, input)
-  g = Imbecile::Grammar.new(grammar)
-  token_dfa = Imbecile::Lexer::DFA.new(g.tokens)
+  imbecile = Imbecile.new(grammar)
+  token_dfa = Imbecile::Lexer::DFA.new(imbecile.instance_variable_get(:@tokens))
   test_lexer = TestLexer.new(token_dfa)
   test_lexer.lex(input)
 end
diff --git a/spec/imbecile/regex_spec.rb b/spec/imbecile/regex_spec.rb
index caeb533..cecf2c4 100644
--- a/spec/imbecile/regex_spec.rb
+++ b/spec/imbecile/regex_spec.rb
@@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
   RSpec.describe Regex do
 
     it "parses an empty expression" do