Store tokens and drop tokens separately

This commit is contained in:
Josh Holtrop 2022-06-05 14:36:19 -04:00
parent 6f1ce32775
commit f37801ec9e
8 changed files with 42 additions and 36 deletions

View File

@ -6,10 +6,8 @@ class <%= classname %>
{
enum
{
<% @tokens.each_with_index do |(name, token), index| %>
<% if token.name %>
<% @grammar.tokens.each_with_index do |token, index| %>
TOKEN_<%= token.c_name %> = <%= index %>,
<% end %>
<% end %>
TOKEN_EOF = <%= TOKEN_EOF %>,
TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
@ -18,12 +16,8 @@ class <%= classname %>
}
static immutable string TokenNames[] = [
<% @tokens.each_with_index do |(name, token), index| %>
<% if token.name %>
<% @grammar.tokens.each_with_index do |token, index| %>
"<%= token.name %>",
<% else %>
null,
<% end %>
<% end %>
];

View File

@ -37,17 +37,16 @@ class Propane
end
def initialize(input)
grammar = Grammar.new(input)
@classname = grammar.classname
@modulename = grammar.modulename
@tokens = grammar.tokens
@rule_sets = grammar.rule_sets
@grammar = Grammar.new(input)
@classname = @grammar.classname
@modulename = @grammar.modulename
@rule_sets = @grammar.rule_sets
end
def generate(output_file, log_file)
expand_rules
lexer = Lexer.new(@tokens)
parser = Parser.new(@tokens, @rule_sets)
lexer = Lexer.new(@grammar.tokens, @grammar.drop_tokens)
parser = Parser.new(@rule_sets)
classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>")
result = erb.result(binding.clone)
@ -59,9 +58,16 @@ class Propane
private
def expand_rules
tokens_by_name = {}
@grammar.tokens.each do |token|
if tokens_by_name.include?(token.name)
raise Error.new("Duplicate token name #{token.name.inspect}")
end
tokens_by_name[token.name] = token
end
@rule_sets.each do |rule_name, rule_set|
if @tokens.include?(rule_name)
raise Error.new("Rule name collides with token name #{rule_name}")
if tokens_by_name.include?(rule_name)
raise Error.new("Rule name collides with token name #{rule_name.inspect}")
end
end
unless @rule_sets["Start"]
@ -70,8 +76,8 @@ class Propane
@rule_sets.each do |rule_name, rule_set|
rule_set.rules.each do |rule|
rule.components.map! do |component|
if @tokens[component]
@tokens[component]
if tokens_by_name[component]
tokens_by_name[component]
elsif @rule_sets[component]
@rule_sets[component]
else

View File

@ -3,12 +3,14 @@ class Propane
class Grammar
attr_reader :classname
attr_reader :drop_tokens
attr_reader :modulename
attr_reader :rule_sets
attr_reader :tokens
def initialize(input)
@tokens = {}
@tokens = []
@drop_tokens = []
@rule_sets = {}
input = input.gsub("\r\n", "\n")
parse_grammar(input)
@ -35,14 +37,10 @@ class Propane
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name}")
end
if @tokens[name]
raise Error.new("Duplicate token name #{name}")
else
@tokens[name] = Token.new(name, pattern, @tokens.size, line_number)
end
@tokens << Token.new(name, pattern, @tokens.size, line_number)
elsif sliced = input.slice!(/\Adrop\s+(\S+)\n/)
pattern = $1
@tokens[@tokens.size] = Token.new(nil, pattern, @tokens.size, line_number)
@drop_tokens << Token.new(nil, pattern, nil, line_number)
elsif sliced = input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
rule_name, components, code = $1, $2, $3
components = components.strip.split(/\s+/)

View File

@ -5,8 +5,8 @@ class Propane
# Lexer DFA.
attr_accessor :dfa
def initialize(tokens)
@dfa = DFA.new(tokens)
def initialize(tokens, drop_tokens)
@dfa = DFA.new(tokens, drop_tokens)
end
def build_tables
@ -17,10 +17,10 @@ class Propane
accepts =
if state.accepts.nil?
TOKEN_NONE
elsif state.accepts.name
state.accepts.id
else
elsif state.accepts.drop?
TOKEN_DROP
else
state.accepts.id
end
state_table << {
transition_table_index: transition_table.size,

View File

@ -3,10 +3,10 @@ class Propane
class DFA < FA
def initialize(tokens)
def initialize(tokens, drop_tokens)
super()
start_nfa = Regex::NFA.new
tokens.each do |name, token|
(tokens + drop_tokens).each do |token|
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
end
@nfa_state_sets = {}

View File

@ -2,7 +2,7 @@ class Propane
class Parser
def initialize(tokens, rule_sets)
def initialize(rule_sets)
@token_eof = Token.new("$", nil, TOKEN_EOF, nil)
@item_sets = []
@item_sets_set = {}

View File

@ -48,6 +48,14 @@ class Propane
@name.upcase
end
# Whether the token is a drop token.
#
# @return [Boolean]
# Whether the token is a drop token.
def drop?
@name.nil?
end
def to_s
@name
end

View File

@ -50,8 +50,8 @@ class TestLexer
end
def run(grammar, input)
propane = Propane.new(grammar)
token_dfa = Propane::Lexer::DFA.new(propane.instance_variable_get(:@tokens))
grammar = Propane::Grammar.new(grammar)
token_dfa = Propane::Lexer::DFA.new(grammar.tokens, grammar.drop_tokens)
test_lexer = TestLexer.new(token_dfa)
test_lexer.lex(input)
end