Combine Grammar and Generator into top-level Imbecile class

2021-08-22 21:04:46 -04:00 · 2021-08-22 21:04:46 -04:00 · 00016f16b3
commit 00016f16b3
parent 9273bfccf6
20 changed files with 96 additions and 133 deletions
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -1,12 +1,12 @@
-<% if @grammar.modulename %>
+<% if @modulename %>
-module <%= @grammar.modulename %>;
+module <%= @modulename %>;
 <% end %>
 class <%= classname %>
 {
    enum
    {
-<% @grammar.tokens.each_with_index do |token, index| %>
+<% @tokens.each_with_index do |token, index| %>
 <%   if token.name %>
        TOKEN_<%= token.c_name %> = <%= index %>,
 <%   end %>
@ -18,7 +18,7 @@ class <%= classname %>
    }
    static immutable string TokenNames[] = [
-<% @grammar.tokens.each_with_index do |token, index| %>
+<% @tokens.each_with_index do |token, index| %>
 <%   if token.name %>
        "<%= token.name %>",
 <%   else %>
--- a/lib/imbecile.rb
+++ b/lib/imbecile.rb
@ -5,8 +5,6 @@ require_relative "imbecile/code_point_range"
 require_relative "imbecile/fa"
 require_relative "imbecile/fa/state"
 require_relative "imbecile/fa/state/transition"
 require_relative "imbecile/generator"
 require_relative "imbecile/grammar"
 require_relative "imbecile/lexer"
 require_relative "imbecile/lexer/dfa"
 require_relative "imbecile/regex"
@ -16,7 +14,7 @@ require_relative "imbecile/rule"
 require_relative "imbecile/token"
 require_relative "imbecile/version"
-module Imbecile
+class Imbecile
  # EOF.
  TOKEN_EOF = 0xFFFFFFFC
@ -33,13 +31,82 @@ module Imbecile
  class Error < RuntimeError
  end
  def initialize(input)
    @tokens = []
    @rules = []
    input = input.gsub("\r\n", "\n")
    while !input.empty?
      parse_grammar(input)
    end
  end
  def generate(output_file, log_file)
    token_names = @tokens.each_with_object({}) do |token, token_names|
      if token_names.include?(token.name)
        raise Error.new("Duplicate token name #{token.name}")
      end
      token_names[token.name] = token
    end
    rule_names = @rules.each_with_object({}) do |rule, rule_names|
      if token_names.include?(rule.name)
        raise Error.new("Rule name collides with token name #{rule.name}")
      end
      rule_names[rule.name] ||= []
      rule_names[rule.name] << rule
    end
    unless rule_names["Start"]
      raise Error.new("Start rule not found")
    end
    lexer = Lexer.new(@tokens)
    classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
    erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), nil, "<>")
    result = erb.result(binding.clone)
    File.open(output_file, "wb") do |fh|
      fh.write(result)
    end
  end
  private
  def parse_grammar(input)
    if input.slice!(/\A\s+/)
      # Skip white space.
    elsif input.slice!(/\A#.*\n/)
      # Skip comment lines.
    elsif input.slice!(/\Amodule\s+(\S+)\n/)
      @modulename = $1
    elsif input.slice!(/\Aclass\s+(\S+)\n/)
      @classname = $1
    elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
      name, pattern = $1, $2
      if pattern.nil?
        pattern = name
      end
      unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
        raise Error.new("Invalid token name #{name}")
      end
      @tokens << Token.new(name, pattern, @tokens.size)
    elsif input.slice!(/\Adrop\s+(\S+)\n/)
      pattern = $1
      @tokens << Token.new(nil, pattern, @tokens.size)
    elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
      rule_name, rule, code = $1, $2, $3
      rule = rule.strip.split(/\s+/)
      @rules << Rule.new(rule_name, rule, code)
    else
      if input.size > 25
        input = input.slice(0..20) + "..."
      end
      raise Error.new("Unexpected grammar input: #{input}")
    end
  end
  class << self
    def run(input_file, output_file, log_file)
      begin
-        grammar = Grammar.new(File.read(input_file))
+        imbecile = Imbecile.new(File.read(input_file))
-        generator = Generator.new(grammar, log_file)
+        imbecile.generate(output_file, log_file)
        generator.generate(output_file)
      rescue Error => e
        $stderr.puts e.message
        return 2
--- a/lib/imbecile/cli.rb
+++ b/lib/imbecile/cli.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  module CLI
    USAGE = <<EOF
--- a/lib/imbecile/code_point_range.rb
+++ b/lib/imbecile/code_point_range.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class CodePointRange
    MAX_CODE_POINT = 0xFFFFFFFF
--- a/lib/imbecile/fa.rb
+++ b/lib/imbecile/fa.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class FA
--- a/lib/imbecile/fa/state.rb
+++ b/lib/imbecile/fa/state.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class FA
    class State
--- a/lib/imbecile/fa/state/transition.rb
+++ b/lib/imbecile/fa/state/transition.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class FA
    class State
--- a/lib/imbecile/generator.rb
+++ b/lib/imbecile/generator.rb
@ -1,39 +0,0 @@
 module Imbecile
  # Class to generate the parser generator source.
  class Generator
    def initialize(grammar, log_file)
      @grammar = grammar
      @log_file = log_file
    end
    def generate(output_file)
      token_names = @grammar.tokens.each_with_object({}) do |token, token_names|
        if token_names.include?(token.name)
          raise Error.new("Duplicate token name #{token.name}")
        end
        token_names[token.name] = token
      end
      rule_names = @grammar.rules.each_with_object({}) do |rule, rule_names|
        if token_names.include?(rule.name)
          raise Error.new("Rule name collides with token name #{rule.name}")
        end
        rule_names[rule.name] ||= []
        rule_names[rule.name] << rule
      end
      unless rule_names["Start"]
        raise Error.new("Start rule not found")
      end
      lexer = Lexer.new(@grammar)
      classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
      erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), nil, "<>")
      result = erb.result(binding.clone)
      File.open(output_file, "wb") do |fh|
        fh.write(result)
      end
    end
  end
 end
--- a/lib/imbecile/grammar.rb
+++ b/lib/imbecile/grammar.rb
@ -1,65 +0,0 @@
 module Imbecile
  class Grammar
    # @return [String, nil]
    #   Module name.
    attr_reader :modulename
    # @return [String, nil]
    #   Class name.
    attr_reader :classname
    # @return [Array<Token>]
    #   Tokens.
    attr_reader :tokens
    # @return [Array<Rule>]
    #   Rules.
    attr_reader :rules
    def initialize(input)
      @tokens = []
      @rules = []
      input = input.gsub("\r\n", "\n")
      while !input.empty?
        consume(input)
      end
    end
    private
    def consume(input)
      if input.slice!(/\A\s+/)
        # Skip white space.
      elsif input.slice!(/\A#.*\n/)
        # Skip comment lines.
      elsif input.slice!(/\Amodule\s+(\S+)\n/)
        @modulename = $1
      elsif input.slice!(/\Aclass\s+(\S+)\n/)
        @classname = $1
      elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
        name, pattern = $1, $2
        if pattern.nil?
          pattern = name
        end
        unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
          raise Error.new("Invalid token name #{name}")
        end
        @tokens << Token.new(name, pattern, @tokens.size)
      elsif input.slice!(/\Adrop\s+(\S+)\n/)
        pattern = $1
        @tokens << Token.new(nil, pattern, @tokens.size)
      elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
        rule_name, rule, code = $1, $2, $3
        rule = rule.strip.split(/\s+/)
        @rules << Rule.new(rule_name, rule, code)
      else
        if input.size > 25
          input = input.slice(0..20) + "..."
        end
        raise Error.new("Unexpected grammar input: #{input}")
      end
    end
  end
 end
--- a/lib/imbecile/lexer.rb
+++ b/lib/imbecile/lexer.rb
@ -1,12 +1,12 @@
-module Imbecile
+class Imbecile
  class Lexer
    # @return [DFA]
    #   Lexer DFA.
    attr_accessor :dfa
-    def initialize(grammar)
+    def initialize(tokens)
-      @dfa = DFA.new(grammar.tokens)
+      @dfa = DFA.new(tokens)
    end
  end
--- a/lib/imbecile/lexer/dfa.rb
+++ b/lib/imbecile/lexer/dfa.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Lexer
    class DFA < FA
--- a/lib/imbecile/regex.rb
+++ b/lib/imbecile/regex.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Regex
    attr_reader :unit
--- a/lib/imbecile/regex/nfa.rb
+++ b/lib/imbecile/regex/nfa.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Regex
    class NFA < FA
--- a/lib/imbecile/regex/unit.rb
+++ b/lib/imbecile/regex/unit.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Regex
    class Unit
--- a/lib/imbecile/rule.rb
+++ b/lib/imbecile/rule.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Rule
--- a/lib/imbecile/token.rb
+++ b/lib/imbecile/token.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  class Token
--- a/lib/imbecile/version.rb
+++ b/lib/imbecile/version.rb
@ -1,3 +1,3 @@
-module Imbecile
+class Imbecile
  VERSION = "0.1.0"
 end
--- a/spec/imbecile/code_point_range_spec.rb
+++ b/spec/imbecile/code_point_range_spec.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  describe CodePointRange do
    describe "#<=>" do
--- a/spec/imbecile/lexer/dfa_spec.rb
+++ b/spec/imbecile/lexer/dfa_spec.rb
@ -50,8 +50,8 @@ class TestLexer
 end
 def run(grammar, input)
-  g = Imbecile::Grammar.new(grammar)
+  imbecile = Imbecile.new(grammar)
-  token_dfa = Imbecile::Lexer::DFA.new(g.tokens)
+  token_dfa = Imbecile::Lexer::DFA.new(imbecile.instance_variable_get(:@tokens))
  test_lexer = TestLexer.new(token_dfa)
  test_lexer.lex(input)
 end
--- a/spec/imbecile/regex_spec.rb
+++ b/spec/imbecile/regex_spec.rb
@ -1,4 +1,4 @@
-module Imbecile
+class Imbecile
  RSpec.describe Regex do
    it "parses an empty expression" do
`@ -1,4 +1,4 @@`
	`module Imbecile`	`class Imbecile`

	`class FA`	`class FA`