Combine Grammar and Generator into top-level Imbecile class
This commit is contained in:
parent
9273bfccf6
commit
00016f16b3
@ -1,12 +1,12 @@
|
||||
<% if @grammar.modulename %>
|
||||
module <%= @grammar.modulename %>;
|
||||
<% if @modulename %>
|
||||
module <%= @modulename %>;
|
||||
|
||||
<% end %>
|
||||
class <%= classname %>
|
||||
{
|
||||
enum
|
||||
{
|
||||
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||
<% @tokens.each_with_index do |token, index| %>
|
||||
<% if token.name %>
|
||||
TOKEN_<%= token.c_name %> = <%= index %>,
|
||||
<% end %>
|
||||
@ -18,7 +18,7 @@ class <%= classname %>
|
||||
}
|
||||
|
||||
static immutable string TokenNames[] = [
|
||||
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||
<% @tokens.each_with_index do |token, index| %>
|
||||
<% if token.name %>
|
||||
"<%= token.name %>",
|
||||
<% else %>
|
||||
|
@ -5,8 +5,6 @@ require_relative "imbecile/code_point_range"
|
||||
require_relative "imbecile/fa"
|
||||
require_relative "imbecile/fa/state"
|
||||
require_relative "imbecile/fa/state/transition"
|
||||
require_relative "imbecile/generator"
|
||||
require_relative "imbecile/grammar"
|
||||
require_relative "imbecile/lexer"
|
||||
require_relative "imbecile/lexer/dfa"
|
||||
require_relative "imbecile/regex"
|
||||
@ -16,7 +14,7 @@ require_relative "imbecile/rule"
|
||||
require_relative "imbecile/token"
|
||||
require_relative "imbecile/version"
|
||||
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
|
||||
# EOF.
|
||||
TOKEN_EOF = 0xFFFFFFFC
|
||||
@ -33,13 +31,82 @@ module Imbecile
|
||||
class Error < RuntimeError
|
||||
end
|
||||
|
||||
def initialize(input)
|
||||
@tokens = []
|
||||
@rules = []
|
||||
input = input.gsub("\r\n", "\n")
|
||||
while !input.empty?
|
||||
parse_grammar(input)
|
||||
end
|
||||
end
|
||||
|
||||
def generate(output_file, log_file)
|
||||
token_names = @tokens.each_with_object({}) do |token, token_names|
|
||||
if token_names.include?(token.name)
|
||||
raise Error.new("Duplicate token name #{token.name}")
|
||||
end
|
||||
token_names[token.name] = token
|
||||
end
|
||||
rule_names = @rules.each_with_object({}) do |rule, rule_names|
|
||||
if token_names.include?(rule.name)
|
||||
raise Error.new("Rule name collides with token name #{rule.name}")
|
||||
end
|
||||
rule_names[rule.name] ||= []
|
||||
rule_names[rule.name] << rule
|
||||
end
|
||||
unless rule_names["Start"]
|
||||
raise Error.new("Start rule not found")
|
||||
end
|
||||
lexer = Lexer.new(@tokens)
|
||||
classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
|
||||
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), nil, "<>")
|
||||
result = erb.result(binding.clone)
|
||||
File.open(output_file, "wb") do |fh|
|
||||
fh.write(result)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_grammar(input)
|
||||
if input.slice!(/\A\s+/)
|
||||
# Skip white space.
|
||||
elsif input.slice!(/\A#.*\n/)
|
||||
# Skip comment lines.
|
||||
elsif input.slice!(/\Amodule\s+(\S+)\n/)
|
||||
@modulename = $1
|
||||
elsif input.slice!(/\Aclass\s+(\S+)\n/)
|
||||
@classname = $1
|
||||
elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
|
||||
name, pattern = $1, $2
|
||||
if pattern.nil?
|
||||
pattern = name
|
||||
end
|
||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||
raise Error.new("Invalid token name #{name}")
|
||||
end
|
||||
@tokens << Token.new(name, pattern, @tokens.size)
|
||||
elsif input.slice!(/\Adrop\s+(\S+)\n/)
|
||||
pattern = $1
|
||||
@tokens << Token.new(nil, pattern, @tokens.size)
|
||||
elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
|
||||
rule_name, rule, code = $1, $2, $3
|
||||
rule = rule.strip.split(/\s+/)
|
||||
@rules << Rule.new(rule_name, rule, code)
|
||||
else
|
||||
if input.size > 25
|
||||
input = input.slice(0..20) + "..."
|
||||
end
|
||||
raise Error.new("Unexpected grammar input: #{input}")
|
||||
end
|
||||
end
|
||||
|
||||
class << self
|
||||
|
||||
def run(input_file, output_file, log_file)
|
||||
begin
|
||||
grammar = Grammar.new(File.read(input_file))
|
||||
generator = Generator.new(grammar, log_file)
|
||||
generator.generate(output_file)
|
||||
imbecile = Imbecile.new(File.read(input_file))
|
||||
imbecile.generate(output_file, log_file)
|
||||
rescue Error => e
|
||||
$stderr.puts e.message
|
||||
return 2
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
module CLI
|
||||
|
||||
USAGE = <<EOF
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
class CodePointRange
|
||||
|
||||
MAX_CODE_POINT = 0xFFFFFFFF
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
|
||||
class FA
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
class FA
|
||||
|
||||
class State
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
class FA
|
||||
class State
|
||||
|
||||
|
@ -1,39 +0,0 @@
|
||||
module Imbecile
|
||||
|
||||
# Class to generate the parser generator source.
|
||||
class Generator
|
||||
|
||||
def initialize(grammar, log_file)
|
||||
@grammar = grammar
|
||||
@log_file = log_file
|
||||
end
|
||||
|
||||
def generate(output_file)
|
||||
token_names = @grammar.tokens.each_with_object({}) do |token, token_names|
|
||||
if token_names.include?(token.name)
|
||||
raise Error.new("Duplicate token name #{token.name}")
|
||||
end
|
||||
token_names[token.name] = token
|
||||
end
|
||||
rule_names = @grammar.rules.each_with_object({}) do |rule, rule_names|
|
||||
if token_names.include?(rule.name)
|
||||
raise Error.new("Rule name collides with token name #{rule.name}")
|
||||
end
|
||||
rule_names[rule.name] ||= []
|
||||
rule_names[rule.name] << rule
|
||||
end
|
||||
unless rule_names["Start"]
|
||||
raise Error.new("Start rule not found")
|
||||
end
|
||||
lexer = Lexer.new(@grammar)
|
||||
classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
|
||||
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), nil, "<>")
|
||||
result = erb.result(binding.clone)
|
||||
File.open(output_file, "wb") do |fh|
|
||||
fh.write(result)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
@ -1,65 +0,0 @@
|
||||
module Imbecile
|
||||
class Grammar
|
||||
|
||||
# @return [String, nil]
|
||||
# Module name.
|
||||
attr_reader :modulename
|
||||
|
||||
# @return [String, nil]
|
||||
# Class name.
|
||||
attr_reader :classname
|
||||
|
||||
# @return [Array<Token>]
|
||||
# Tokens.
|
||||
attr_reader :tokens
|
||||
|
||||
# @return [Array<Rule>]
|
||||
# Rules.
|
||||
attr_reader :rules
|
||||
|
||||
def initialize(input)
|
||||
@tokens = []
|
||||
@rules = []
|
||||
input = input.gsub("\r\n", "\n")
|
||||
while !input.empty?
|
||||
consume(input)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def consume(input)
|
||||
if input.slice!(/\A\s+/)
|
||||
# Skip white space.
|
||||
elsif input.slice!(/\A#.*\n/)
|
||||
# Skip comment lines.
|
||||
elsif input.slice!(/\Amodule\s+(\S+)\n/)
|
||||
@modulename = $1
|
||||
elsif input.slice!(/\Aclass\s+(\S+)\n/)
|
||||
@classname = $1
|
||||
elsif input.slice!(/\Atoken\s+(\S+)(?:\s+(\S+))?\n/)
|
||||
name, pattern = $1, $2
|
||||
if pattern.nil?
|
||||
pattern = name
|
||||
end
|
||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||
raise Error.new("Invalid token name #{name}")
|
||||
end
|
||||
@tokens << Token.new(name, pattern, @tokens.size)
|
||||
elsif input.slice!(/\Adrop\s+(\S+)\n/)
|
||||
pattern = $1
|
||||
@tokens << Token.new(nil, pattern, @tokens.size)
|
||||
elsif input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
|
||||
rule_name, rule, code = $1, $2, $3
|
||||
rule = rule.strip.split(/\s+/)
|
||||
@rules << Rule.new(rule_name, rule, code)
|
||||
else
|
||||
if input.size > 25
|
||||
input = input.slice(0..20) + "..."
|
||||
end
|
||||
raise Error.new("Unexpected grammar input: #{input}")
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
@ -1,12 +1,12 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
class Lexer
|
||||
|
||||
# @return [DFA]
|
||||
# Lexer DFA.
|
||||
attr_accessor :dfa
|
||||
|
||||
def initialize(grammar)
|
||||
@dfa = DFA.new(grammar.tokens)
|
||||
def initialize(tokens)
|
||||
@dfa = DFA.new(tokens)
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
class Lexer
|
||||
|
||||
class DFA < FA
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
class Regex
|
||||
|
||||
attr_reader :unit
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
class Regex
|
||||
|
||||
class NFA < FA
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
class Regex
|
||||
|
||||
class Unit
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
|
||||
class Rule
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
|
||||
class Token
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
VERSION = "0.1.0"
|
||||
end
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
describe CodePointRange do
|
||||
|
||||
describe "#<=>" do
|
||||
|
@ -50,8 +50,8 @@ class TestLexer
|
||||
end
|
||||
|
||||
def run(grammar, input)
|
||||
g = Imbecile::Grammar.new(grammar)
|
||||
token_dfa = Imbecile::Lexer::DFA.new(g.tokens)
|
||||
imbecile = Imbecile.new(grammar)
|
||||
token_dfa = Imbecile::Lexer::DFA.new(imbecile.instance_variable_get(:@tokens))
|
||||
test_lexer = TestLexer.new(token_dfa)
|
||||
test_lexer.lex(input)
|
||||
end
|
||||
|
@ -1,4 +1,4 @@
|
||||
module Imbecile
|
||||
class Imbecile
|
||||
RSpec.describe Regex do
|
||||
|
||||
it "parses an empty expression" do
|
||||
|
Loading…
x
Reference in New Issue
Block a user