Combine Grammar and Generator into top-level Imbecile class

2021-08-22 21:04:46 -04:00 · 2021-08-22 21:04:46 -04:00 · 00016f16b3
commit 00016f16b3
parent 9273bfccf6
20 changed files with 96 additions and 133 deletions
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -1,12 +1,12 @@
-<% if @grammar.modulename %>
-module <%= @grammar.modulename %>;
+<% if @modulename %>
+module <%= @modulename %>;

 <% end %>
 class <%= classname %>
 {
    enum
    {
-<% @grammar.tokens.each_with_index do |token, index| %>
+<% @tokens.each_with_index do |token, index| %>
 <%   if token.name %>
        TOKEN_<%= token.c_name %> = <%= index %>,
 <%   end %>
@ -18,7 +18,7 @@ class <%= classname %>
    }

    static immutable string TokenNames[] = [
-<% @grammar.tokens.each_with_index do |token, index| %>
+<% @tokens.each_with_index do |token, index| %>
 <%   if token.name %>
        "<%= token.name %>",
 <%   else %>
--- a/lib/imbecile.rb
+++ b/lib/imbecile.rb
@ -5,8 +5,6 @@ require_relative "imbecile/code_point_range"
 require_relative "imbecile/fa"
 require_relative "imbecile/fa/state"
 require_relative "imbecile/fa/state/transition"
-require_relative "imbecile/generator"
-require_relative "imbecile/grammar"
 require_relative "imbecile/lexer"
 require_relative "imbecile/lexer/dfa"
 require_relative "imbecile/regex"
@ -16,7 +14,7 @@ require_relative "imbecile/rule"
 require_relative "imbecile/token"
 require_relative "imbecile/version"

-module Imbecile
+class Imbecile

  # EOF.
  TOKEN_EOF = 0xFFFFFFFC
@ -33,13 +31,82 @@ module Imbecile
  class Error < RuntimeError
  end

+  def initialize(input)
+    @tokens = []
+    @rules = []
+    input = input.gsub("\r\n", "\n")
+    while !input.empty?
+      parse_grammar(input)
+    end
+  end
+
+  def generate(output_file, log_file)
+    token_names = @tokens.each_with_object({}) do |token, token_names|
+      if token_names.include?(token.name)
+        raise Error.new("Duplicate token name #{token.name}")
+      end
+      token_names[token.name] = token
+    end
+    rule_names = @rules.each_with_object({}) do |rule, rule_names|
+      if token_names.include?(rule.name)
+        raise Error.new("Rule name collides with token name #{rule.name}")
+      end
+      rule_names[rule.name] ||= []
+      rule_names[rule.name] << rule
+    end
+    unless rule_names["Start"]
+      raise Error.new("Start rule not found")
+    end
+    lexer = Lexer.new(@tokens)
+    classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
+    erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), nil, "<>")
+    result = erb.result(binding.clone)
+    File.open(output_file, "wb") do |fh|
+      fh.write(result)
+    end
+  end
+
+  private
+
+  def parse_grammar(input)
+    if input.slice!(/\A\s+/)
+      # Skip white space.
+    elsif input.slice!(/\A#.*\n/)
+      # Skip comment lines.
+    elsif input.slice!(/\Amodule\s+(\S+)\n/)
+      @modulename = $1
+    elsif input.slice!(/\Aclass\s+(\S+)\n/)
+      @classname = $1
+    elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
+      name, pattern = $1, $2
+      if pattern.nil?
+        pattern = name
+      end
+      unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
+        raise Error.new("Invalid token name #{name}")
+      end
+      @tokens << Token.new(name, pattern, @tokens.size)
+    elsif input.slice!(/\Adrop\s+(\S+)\n/)
+      pattern = $1
+      @tokens << Token.new(nil, pattern, @tokens.size)
+    elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
+      rule_name, rule, code = $1, $2, $3
+      rule = rule.strip.split(/\s+/)
+      @rules << Rule.new(rule_name, rule, code)
+    else
+      if input.size > 25
+        input = input.slice(0..20) + "..."
+      end
+      raise Error.new("Unexpected grammar input: #{input}")
+    end
+  end
+
  class << self

    def run(input_file, output_file, log_file)
      begin
-        grammar = Grammar.new(File.read(input_file))
-        generator = Generator.new(grammar, log_file)
-        generator.generate(output_file)
+        imbecile = Imbecile.new(File.read(input_file))
+        imbecile.generate(output_file, log_file)
      rescue Error => e
        $stderr.puts e.message
        return 2
--- a/lib/imbecile/cli.rb
+++ b/lib/imbecile/cli.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  module CLI

    USAGE = <<EOF
--- a/lib/imbecile/code_point_range.rb
+++ b/lib/imbecile/code_point_range.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class CodePointRange

    MAX_CODE_POINT = 0xFFFFFFFF
--- a/lib/imbecile/fa.rb
+++ b/lib/imbecile/fa.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile

  class FA

--- a/lib/imbecile/fa/state.rb
+++ b/lib/imbecile/fa/state.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class FA

    class State
--- a/lib/imbecile/fa/state/transition.rb
+++ b/lib/imbecile/fa/state/transition.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class FA
    class State

--- a/lib/imbecile/generator.rb
+++ b/lib/imbecile/generator.rb
@ -1,39 +0,0 @@
-module Imbecile
-
-  # Class to generate the parser generator source.
-  class Generator
-
-    def initialize(grammar, log_file)
-      @grammar = grammar
-      @log_file = log_file
-    end
-
-    def generate(output_file)
-      token_names = @grammar.tokens.each_with_object({}) do |token, token_names|
-        if token_names.include?(token.name)
-          raise Error.new("Duplicate token name #{token.name}")
-        end
-        token_names[token.name] = token
-      end
-      rule_names = @grammar.rules.each_with_object({}) do |rule, rule_names|
-        if token_names.include?(rule.name)
-          raise Error.new("Rule name collides with token name #{rule.name}")
-        end
-        rule_names[rule.name] ||= []
-        rule_names[rule.name] << rule
-      end
-      unless rule_names["Start"]
-        raise Error.new("Start rule not found")
-      end
-      lexer = Lexer.new(@grammar)
-      classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
-      erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), nil, "<>")
-      result = erb.result(binding.clone)
-      File.open(output_file, "wb") do |fh|
-        fh.write(result)
-      end
-    end
-
-  end
-
-end
--- a/lib/imbecile/grammar.rb
+++ b/lib/imbecile/grammar.rb
@ -1,65 +0,0 @@
-module Imbecile
-  class Grammar
-
-    # @return [String, nil]
-    #   Module name.
-    attr_reader :modulename
-
-    # @return [String, nil]
-    #   Class name.
-    attr_reader :classname
-
-    # @return [Array<Token>]
-    #   Tokens.
-    attr_reader :tokens
-
-    # @return [Array<Rule>]
-    #   Rules.
-    attr_reader :rules
-
-    def initialize(input)
-      @tokens = []
-      @rules = []
-      input = input.gsub("\r\n", "\n")
-      while !input.empty?
-        consume(input)
-      end
-    end
-
-    private
-
-    def consume(input)
-      if input.slice!(/\A\s+/)
-        # Skip white space.
-      elsif input.slice!(/\A#.*\n/)
-        # Skip comment lines.
-      elsif input.slice!(/\Amodule\s+(\S+)\n/)
-        @modulename = $1
-      elsif input.slice!(/\Aclass\s+(\S+)\n/)
-        @classname = $1
-      elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
-        name, pattern = $1, $2
-        if pattern.nil?
-          pattern = name
-        end
-        unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
-          raise Error.new("Invalid token name #{name}")
-        end
-        @tokens << Token.new(name, pattern, @tokens.size)
-      elsif input.slice!(/\Adrop\s+(\S+)\n/)
-        pattern = $1
-        @tokens << Token.new(nil, pattern, @tokens.size)
-      elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
-        rule_name, rule, code = $1, $2, $3
-        rule = rule.strip.split(/\s+/)
-        @rules << Rule.new(rule_name, rule, code)
-      else
-        if input.size > 25
-          input = input.slice(0..20) + "..."
-        end
-        raise Error.new("Unexpected grammar input: #{input}")
-      end
-    end
-
-  end
-end
--- a/lib/imbecile/lexer.rb
+++ b/lib/imbecile/lexer.rb
@ -1,12 +1,12 @@
-module Imbecile
+class Imbecile
  class Lexer

    # @return [DFA]
    #   Lexer DFA.
    attr_accessor :dfa

-    def initialize(grammar)
-      @dfa = DFA.new(grammar.tokens)
+    def initialize(tokens)
+      @dfa = DFA.new(tokens)
    end

  end
--- a/lib/imbecile/lexer/dfa.rb
+++ b/lib/imbecile/lexer/dfa.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Lexer

    class DFA < FA
--- a/lib/imbecile/regex.rb
+++ b/lib/imbecile/regex.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Regex

    attr_reader :unit
--- a/lib/imbecile/regex/nfa.rb
+++ b/lib/imbecile/regex/nfa.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Regex

    class NFA < FA
--- a/lib/imbecile/regex/unit.rb
+++ b/lib/imbecile/regex/unit.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Regex

    class Unit
--- a/lib/imbecile/rule.rb
+++ b/lib/imbecile/rule.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile

  class Rule

--- a/lib/imbecile/token.rb
+++ b/lib/imbecile/token.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile

  class Token

--- a/lib/imbecile/version.rb
+++ b/lib/imbecile/version.rb
@ -1,3 +1,3 @@
-module Imbecile
+class Imbecile
  VERSION = "0.1.0"
 end
--- a/spec/imbecile/code_point_range_spec.rb
+++ b/spec/imbecile/code_point_range_spec.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  describe CodePointRange do

    describe "#<=>" do
--- a/spec/imbecile/lexer/dfa_spec.rb
+++ b/spec/imbecile/lexer/dfa_spec.rb
@ -50,8 +50,8 @@ class TestLexer
 end

 def run(grammar, input)
-  g = Imbecile::Grammar.new(grammar)
-  token_dfa = Imbecile::Lexer::DFA.new(g.tokens)
+  imbecile = Imbecile.new(grammar)
+  token_dfa = Imbecile::Lexer::DFA.new(imbecile.instance_variable_get(:@tokens))
  test_lexer = TestLexer.new(token_dfa)
  test_lexer.lex(input)
 end
--- a/spec/imbecile/regex_spec.rb
+++ b/spec/imbecile/regex_spec.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  RSpec.describe Regex do

    it "parses an empty expression" do