From 00016f16b36d15a182d32841d43f8252da9905a4 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sun, 22 Aug 2021 21:04:46 -0400 Subject: [PATCH] Combine Grammar and Generator into top-level Imbecile class --- assets/parser.d.erb | 8 +-- lib/imbecile.rb | 79 ++++++++++++++++++++++++-- lib/imbecile/cli.rb | 2 +- lib/imbecile/code_point_range.rb | 2 +- lib/imbecile/fa.rb | 2 +- lib/imbecile/fa/state.rb | 2 +- lib/imbecile/fa/state/transition.rb | 2 +- lib/imbecile/generator.rb | 39 ------------- lib/imbecile/grammar.rb | 65 --------------------- lib/imbecile/lexer.rb | 6 +- lib/imbecile/lexer/dfa.rb | 2 +- lib/imbecile/regex.rb | 2 +- lib/imbecile/regex/nfa.rb | 2 +- lib/imbecile/regex/unit.rb | 2 +- lib/imbecile/rule.rb | 2 +- lib/imbecile/token.rb | 2 +- lib/imbecile/version.rb | 2 +- spec/imbecile/code_point_range_spec.rb | 2 +- spec/imbecile/lexer/dfa_spec.rb | 4 +- spec/imbecile/regex_spec.rb | 2 +- 20 files changed, 96 insertions(+), 133 deletions(-) delete mode 100644 lib/imbecile/generator.rb delete mode 100644 lib/imbecile/grammar.rb diff --git a/assets/parser.d.erb b/assets/parser.d.erb index d258713..fd04ec5 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -1,12 +1,12 @@ -<% if @grammar.modulename %> -module <%= @grammar.modulename %>; +<% if @modulename %> +module <%= @modulename %>; <% end %> class <%= classname %> { enum { -<% @grammar.tokens.each_with_index do |token, index| %> +<% @tokens.each_with_index do |token, index| %> <% if token.name %> TOKEN_<%= token.c_name %> = <%= index %>, <% end %> @@ -18,7 +18,7 @@ class <%= classname %> } static immutable string TokenNames[] = [ -<% @grammar.tokens.each_with_index do |token, index| %> +<% @tokens.each_with_index do |token, index| %> <% if token.name %> "<%= token.name %>", <% else %> diff --git a/lib/imbecile.rb b/lib/imbecile.rb index 1cf254d..4672670 100644 --- a/lib/imbecile.rb +++ b/lib/imbecile.rb @@ -5,8 +5,6 @@ require_relative "imbecile/code_point_range" require_relative "imbecile/fa" require_relative "imbecile/fa/state" require_relative "imbecile/fa/state/transition" -require_relative "imbecile/generator" -require_relative "imbecile/grammar" require_relative "imbecile/lexer" require_relative "imbecile/lexer/dfa" require_relative "imbecile/regex" @@ -16,7 +14,7 @@ require_relative "imbecile/rule" require_relative "imbecile/token" require_relative "imbecile/version" -module Imbecile +class Imbecile # EOF. TOKEN_EOF = 0xFFFFFFFC @@ -33,13 +31,82 @@ module Imbecile class Error < RuntimeError end + def initialize(input) + @tokens = [] + @rules = [] + input = input.gsub("\r\n", "\n") + while !input.empty? + parse_grammar(input) + end + end + + def generate(output_file, log_file) + token_names = @tokens.each_with_object({}) do |token, token_names| + if token_names.include?(token.name) + raise Error.new("Duplicate token name #{token.name}") + end + token_names[token.name] = token + end + rule_names = @rules.each_with_object({}) do |rule, rule_names| + if token_names.include?(rule.name) + raise Error.new("Rule name collides with token name #{rule.name}") + end + rule_names[rule.name] ||= [] + rule_names[rule.name] << rule + end + unless rule_names["Start"] + raise Error.new("Start rule not found") + end + lexer = Lexer.new(@tokens) + classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize + erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), nil, "<>") + result = erb.result(binding.clone) + File.open(output_file, "wb") do |fh| + fh.write(result) + end + end + + private + + def parse_grammar(input) + if input.slice!(/\A\s+/) + # Skip white space. + elsif input.slice!(/\A#.*\n/) + # Skip comment lines. + elsif input.slice!(/\Amodule\s+(\S+)\n/) + @modulename = $1 + elsif input.slice!(/\Aclass\s+(\S+)\n/) + @classname = $1 + elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/) + name, pattern = $1, $2 + if pattern.nil? + pattern = name + end + unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ + raise Error.new("Invalid token name #{name}") + end + @tokens << Token.new(name, pattern, @tokens.size) + elsif input.slice!(/\Adrop\s+(\S+)\n/) + pattern = $1 + @tokens << Token.new(nil, pattern, @tokens.size) + elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m) + rule_name, rule, code = $1, $2, $3 + rule = rule.strip.split(/\s+/) + @rules << Rule.new(rule_name, rule, code) + else + if input.size > 25 + input = input.slice(0..20) + "..." + end + raise Error.new("Unexpected grammar input: #{input}") + end + end + class << self def run(input_file, output_file, log_file) begin - grammar = Grammar.new(File.read(input_file)) - generator = Generator.new(grammar, log_file) - generator.generate(output_file) + imbecile = Imbecile.new(File.read(input_file)) + imbecile.generate(output_file, log_file) rescue Error => e $stderr.puts e.message return 2 diff --git a/lib/imbecile/cli.rb b/lib/imbecile/cli.rb index 46ecaee..66b0679 100644 --- a/lib/imbecile/cli.rb +++ b/lib/imbecile/cli.rb @@ -1,4 +1,4 @@ -module Imbecile +class Imbecile module CLI USAGE = <") - result = erb.result(binding.clone) - File.open(output_file, "wb") do |fh| - fh.write(result) - end - end - - end - -end diff --git a/lib/imbecile/grammar.rb b/lib/imbecile/grammar.rb deleted file mode 100644 index 3581204..0000000 --- a/lib/imbecile/grammar.rb +++ /dev/null @@ -1,65 +0,0 @@ -module Imbecile - class Grammar - - # @return [String, nil] - # Module name. - attr_reader :modulename - - # @return [String, nil] - # Class name. - attr_reader :classname - - # @return [Array] - # Tokens. - attr_reader :tokens - - # @return [Array] - # Rules. - attr_reader :rules - - def initialize(input) - @tokens = [] - @rules = [] - input = input.gsub("\r\n", "\n") - while !input.empty? - consume(input) - end - end - - private - - def consume(input) - if input.slice!(/\A\s+/) - # Skip white space. - elsif input.slice!(/\A#.*\n/) - # Skip comment lines. - elsif input.slice!(/\Amodule\s+(\S+)\n/) - @modulename = $1 - elsif input.slice!(/\Aclass\s+(\S+)\n/) - @classname = $1 - elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/) - name, pattern = $1, $2 - if pattern.nil? - pattern = name - end - unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/ - raise Error.new("Invalid token name #{name}") - end - @tokens << Token.new(name, pattern, @tokens.size) - elsif input.slice!(/\Adrop\s+(\S+)\n/) - pattern = $1 - @tokens << Token.new(nil, pattern, @tokens.size) - elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m) - rule_name, rule, code = $1, $2, $3 - rule = rule.strip.split(/\s+/) - @rules << Rule.new(rule_name, rule, code) - else - if input.size > 25 - input = input.slice(0..20) + "..." - end - raise Error.new("Unexpected grammar input: #{input}") - end - end - - end -end diff --git a/lib/imbecile/lexer.rb b/lib/imbecile/lexer.rb index c139a16..0115ccc 100644 --- a/lib/imbecile/lexer.rb +++ b/lib/imbecile/lexer.rb @@ -1,12 +1,12 @@ -module Imbecile +class Imbecile class Lexer # @return [DFA] # Lexer DFA. attr_accessor :dfa - def initialize(grammar) - @dfa = DFA.new(grammar.tokens) + def initialize(tokens) + @dfa = DFA.new(tokens) end end diff --git a/lib/imbecile/lexer/dfa.rb b/lib/imbecile/lexer/dfa.rb index 74903f4..0930fb5 100644 --- a/lib/imbecile/lexer/dfa.rb +++ b/lib/imbecile/lexer/dfa.rb @@ -1,4 +1,4 @@ -module Imbecile +class Imbecile class Lexer class DFA < FA diff --git a/lib/imbecile/regex.rb b/lib/imbecile/regex.rb index 59b22b7..c62d45c 100644 --- a/lib/imbecile/regex.rb +++ b/lib/imbecile/regex.rb @@ -1,4 +1,4 @@ -module Imbecile +class Imbecile class Regex attr_reader :unit diff --git a/lib/imbecile/regex/nfa.rb b/lib/imbecile/regex/nfa.rb index 824ed04..4b89c8e 100644 --- a/lib/imbecile/regex/nfa.rb +++ b/lib/imbecile/regex/nfa.rb @@ -1,4 +1,4 @@ -module Imbecile +class Imbecile class Regex class NFA < FA diff --git a/lib/imbecile/regex/unit.rb b/lib/imbecile/regex/unit.rb index f1d4ff6..db12c2a 100644 --- a/lib/imbecile/regex/unit.rb +++ b/lib/imbecile/regex/unit.rb @@ -1,4 +1,4 @@ -module Imbecile +class Imbecile class Regex class Unit diff --git a/lib/imbecile/rule.rb b/lib/imbecile/rule.rb index df3c251..c89543e 100644 --- a/lib/imbecile/rule.rb +++ b/lib/imbecile/rule.rb @@ -1,4 +1,4 @@ -module Imbecile +class Imbecile class Rule diff --git a/lib/imbecile/token.rb b/lib/imbecile/token.rb index 266ef25..e4283e7 100644 --- a/lib/imbecile/token.rb +++ b/lib/imbecile/token.rb @@ -1,4 +1,4 @@ -module Imbecile +class Imbecile class Token diff --git a/lib/imbecile/version.rb b/lib/imbecile/version.rb index 04dbd28..38d1097 100644 --- a/lib/imbecile/version.rb +++ b/lib/imbecile/version.rb @@ -1,3 +1,3 @@ -module Imbecile +class Imbecile VERSION = "0.1.0" end diff --git a/spec/imbecile/code_point_range_spec.rb b/spec/imbecile/code_point_range_spec.rb index e62edad..ec8e2e0 100644 --- a/spec/imbecile/code_point_range_spec.rb +++ b/spec/imbecile/code_point_range_spec.rb @@ -1,4 +1,4 @@ -module Imbecile +class Imbecile describe CodePointRange do describe "#<=>" do diff --git a/spec/imbecile/lexer/dfa_spec.rb b/spec/imbecile/lexer/dfa_spec.rb index e8b2557..d692e0e 100644 --- a/spec/imbecile/lexer/dfa_spec.rb +++ b/spec/imbecile/lexer/dfa_spec.rb @@ -50,8 +50,8 @@ class TestLexer end def run(grammar, input) - g = Imbecile::Grammar.new(grammar) - token_dfa = Imbecile::Lexer::DFA.new(g.tokens) + imbecile = Imbecile.new(grammar) + token_dfa = Imbecile::Lexer::DFA.new(imbecile.instance_variable_get(:@tokens)) test_lexer = TestLexer.new(token_dfa) test_lexer.lex(input) end diff --git a/spec/imbecile/regex_spec.rb b/spec/imbecile/regex_spec.rb index caeb533..cecf2c4 100644 --- a/spec/imbecile/regex_spec.rb +++ b/spec/imbecile/regex_spec.rb @@ -1,4 +1,4 @@ -module Imbecile +class Imbecile RSpec.describe Regex do it "parses an empty expression" do