Store tokens and drop tokens separately
This commit is contained in:
parent
6f1ce32775
commit
f37801ec9e
@ -6,10 +6,8 @@ class <%= classname %>
|
|||||||
{
|
{
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
<% @tokens.each_with_index do |(name, token), index| %>
|
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||||
<% if token.name %>
|
|
||||||
TOKEN_<%= token.c_name %> = <%= index %>,
|
TOKEN_<%= token.c_name %> = <%= index %>,
|
||||||
<% end %>
|
|
||||||
<% end %>
|
<% end %>
|
||||||
TOKEN_EOF = <%= TOKEN_EOF %>,
|
TOKEN_EOF = <%= TOKEN_EOF %>,
|
||||||
TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
|
TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
|
||||||
@ -18,12 +16,8 @@ class <%= classname %>
|
|||||||
}
|
}
|
||||||
|
|
||||||
static immutable string TokenNames[] = [
|
static immutable string TokenNames[] = [
|
||||||
<% @tokens.each_with_index do |(name, token), index| %>
|
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||||
<% if token.name %>
|
|
||||||
"<%= token.name %>",
|
"<%= token.name %>",
|
||||||
<% else %>
|
|
||||||
null,
|
|
||||||
<% end %>
|
|
||||||
<% end %>
|
<% end %>
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -37,17 +37,16 @@ class Propane
|
|||||||
end
|
end
|
||||||
|
|
||||||
def initialize(input)
|
def initialize(input)
|
||||||
grammar = Grammar.new(input)
|
@grammar = Grammar.new(input)
|
||||||
@classname = grammar.classname
|
@classname = @grammar.classname
|
||||||
@modulename = grammar.modulename
|
@modulename = @grammar.modulename
|
||||||
@tokens = grammar.tokens
|
@rule_sets = @grammar.rule_sets
|
||||||
@rule_sets = grammar.rule_sets
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def generate(output_file, log_file)
|
def generate(output_file, log_file)
|
||||||
expand_rules
|
expand_rules
|
||||||
lexer = Lexer.new(@tokens)
|
lexer = Lexer.new(@grammar.tokens, @grammar.drop_tokens)
|
||||||
parser = Parser.new(@tokens, @rule_sets)
|
parser = Parser.new(@rule_sets)
|
||||||
classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
|
classname = @classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize
|
||||||
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>")
|
erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../assets/parser.d.erb")), trim_mode: "<>")
|
||||||
result = erb.result(binding.clone)
|
result = erb.result(binding.clone)
|
||||||
@ -59,9 +58,16 @@ class Propane
|
|||||||
private
|
private
|
||||||
|
|
||||||
def expand_rules
|
def expand_rules
|
||||||
|
tokens_by_name = {}
|
||||||
|
@grammar.tokens.each do |token|
|
||||||
|
if tokens_by_name.include?(token.name)
|
||||||
|
raise Error.new("Duplicate token name #{token.name.inspect}")
|
||||||
|
end
|
||||||
|
tokens_by_name[token.name] = token
|
||||||
|
end
|
||||||
@rule_sets.each do |rule_name, rule_set|
|
@rule_sets.each do |rule_name, rule_set|
|
||||||
if @tokens.include?(rule_name)
|
if tokens_by_name.include?(rule_name)
|
||||||
raise Error.new("Rule name collides with token name #{rule_name}")
|
raise Error.new("Rule name collides with token name #{rule_name.inspect}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
unless @rule_sets["Start"]
|
unless @rule_sets["Start"]
|
||||||
@ -70,8 +76,8 @@ class Propane
|
|||||||
@rule_sets.each do |rule_name, rule_set|
|
@rule_sets.each do |rule_name, rule_set|
|
||||||
rule_set.rules.each do |rule|
|
rule_set.rules.each do |rule|
|
||||||
rule.components.map! do |component|
|
rule.components.map! do |component|
|
||||||
if @tokens[component]
|
if tokens_by_name[component]
|
||||||
@tokens[component]
|
tokens_by_name[component]
|
||||||
elsif @rule_sets[component]
|
elsif @rule_sets[component]
|
||||||
@rule_sets[component]
|
@rule_sets[component]
|
||||||
else
|
else
|
||||||
|
@ -3,12 +3,14 @@ class Propane
|
|||||||
class Grammar
|
class Grammar
|
||||||
|
|
||||||
attr_reader :classname
|
attr_reader :classname
|
||||||
|
attr_reader :drop_tokens
|
||||||
attr_reader :modulename
|
attr_reader :modulename
|
||||||
attr_reader :rule_sets
|
attr_reader :rule_sets
|
||||||
attr_reader :tokens
|
attr_reader :tokens
|
||||||
|
|
||||||
def initialize(input)
|
def initialize(input)
|
||||||
@tokens = {}
|
@tokens = []
|
||||||
|
@drop_tokens = []
|
||||||
@rule_sets = {}
|
@rule_sets = {}
|
||||||
input = input.gsub("\r\n", "\n")
|
input = input.gsub("\r\n", "\n")
|
||||||
parse_grammar(input)
|
parse_grammar(input)
|
||||||
@ -35,14 +37,10 @@ class Propane
|
|||||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||||
raise Error.new("Invalid token name #{name}")
|
raise Error.new("Invalid token name #{name}")
|
||||||
end
|
end
|
||||||
if @tokens[name]
|
@tokens << Token.new(name, pattern, @tokens.size, line_number)
|
||||||
raise Error.new("Duplicate token name #{name}")
|
|
||||||
else
|
|
||||||
@tokens[name] = Token.new(name, pattern, @tokens.size, line_number)
|
|
||||||
end
|
|
||||||
elsif sliced = input.slice!(/\Adrop\s+(\S+)\n/)
|
elsif sliced = input.slice!(/\Adrop\s+(\S+)\n/)
|
||||||
pattern = $1
|
pattern = $1
|
||||||
@tokens[@tokens.size] = Token.new(nil, pattern, @tokens.size, line_number)
|
@drop_tokens << Token.new(nil, pattern, nil, line_number)
|
||||||
elsif sliced = input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
|
elsif sliced = input.slice!(/\A(\S+)\s*:\s*\[(.*?)\] <<\n(.*?)^>>\n/m)
|
||||||
rule_name, components, code = $1, $2, $3
|
rule_name, components, code = $1, $2, $3
|
||||||
components = components.strip.split(/\s+/)
|
components = components.strip.split(/\s+/)
|
||||||
|
@ -5,8 +5,8 @@ class Propane
|
|||||||
# Lexer DFA.
|
# Lexer DFA.
|
||||||
attr_accessor :dfa
|
attr_accessor :dfa
|
||||||
|
|
||||||
def initialize(tokens)
|
def initialize(tokens, drop_tokens)
|
||||||
@dfa = DFA.new(tokens)
|
@dfa = DFA.new(tokens, drop_tokens)
|
||||||
end
|
end
|
||||||
|
|
||||||
def build_tables
|
def build_tables
|
||||||
@ -17,10 +17,10 @@ class Propane
|
|||||||
accepts =
|
accepts =
|
||||||
if state.accepts.nil?
|
if state.accepts.nil?
|
||||||
TOKEN_NONE
|
TOKEN_NONE
|
||||||
elsif state.accepts.name
|
elsif state.accepts.drop?
|
||||||
state.accepts.id
|
|
||||||
else
|
|
||||||
TOKEN_DROP
|
TOKEN_DROP
|
||||||
|
else
|
||||||
|
state.accepts.id
|
||||||
end
|
end
|
||||||
state_table << {
|
state_table << {
|
||||||
transition_table_index: transition_table.size,
|
transition_table_index: transition_table.size,
|
||||||
|
@ -3,10 +3,10 @@ class Propane
|
|||||||
|
|
||||||
class DFA < FA
|
class DFA < FA
|
||||||
|
|
||||||
def initialize(tokens)
|
def initialize(tokens, drop_tokens)
|
||||||
super()
|
super()
|
||||||
start_nfa = Regex::NFA.new
|
start_nfa = Regex::NFA.new
|
||||||
tokens.each do |name, token|
|
(tokens + drop_tokens).each do |token|
|
||||||
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
|
start_nfa.start_state.add_transition(nil, token.nfa.start_state)
|
||||||
end
|
end
|
||||||
@nfa_state_sets = {}
|
@nfa_state_sets = {}
|
||||||
|
@ -2,7 +2,7 @@ class Propane
|
|||||||
|
|
||||||
class Parser
|
class Parser
|
||||||
|
|
||||||
def initialize(tokens, rule_sets)
|
def initialize(rule_sets)
|
||||||
@token_eof = Token.new("$", nil, TOKEN_EOF, nil)
|
@token_eof = Token.new("$", nil, TOKEN_EOF, nil)
|
||||||
@item_sets = []
|
@item_sets = []
|
||||||
@item_sets_set = {}
|
@item_sets_set = {}
|
||||||
|
@ -48,6 +48,14 @@ class Propane
|
|||||||
@name.upcase
|
@name.upcase
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Whether the token is a drop token.
|
||||||
|
#
|
||||||
|
# @return [Boolean]
|
||||||
|
# Whether the token is a drop token.
|
||||||
|
def drop?
|
||||||
|
@name.nil?
|
||||||
|
end
|
||||||
|
|
||||||
def to_s
|
def to_s
|
||||||
@name
|
@name
|
||||||
end
|
end
|
||||||
|
@ -50,8 +50,8 @@ class TestLexer
|
|||||||
end
|
end
|
||||||
|
|
||||||
def run(grammar, input)
|
def run(grammar, input)
|
||||||
propane = Propane.new(grammar)
|
grammar = Propane::Grammar.new(grammar)
|
||||||
token_dfa = Propane::Lexer::DFA.new(propane.instance_variable_get(:@tokens))
|
token_dfa = Propane::Lexer::DFA.new(grammar.tokens, grammar.drop_tokens)
|
||||||
test_lexer = TestLexer.new(token_dfa)
|
test_lexer = TestLexer.new(token_dfa)
|
||||||
test_lexer.lex(input)
|
test_lexer.lex(input)
|
||||||
end
|
end
|
||||||
|
Loading…
x
Reference in New Issue
Block a user