Remove TOKEN_EOF; define EOF token and start rule in Generator

This commit is contained in:
Josh Holtrop 2022-10-02 10:07:44 -04:00
parent 150be33826
commit f46b5b3f4d
9 changed files with 59 additions and 60 deletions

View File

@ -10,10 +10,9 @@ class <%= @classname %>
enum
{
<% @grammar.tokens.each_with_index do |token, index| %>
TOKEN_<%= token.name %> = <%= index %>,
TOKEN_<%= token.code_name %> = <%= index %>,
<% end %>
_TOKEN_COUNT = <%= @grammar.tokens.size %>,
_TOKEN_EOF = <%= TOKEN_EOF %>,
_TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
_TOKEN_DROP = <%= TOKEN_DROP %>,
_TOKEN_NONE = <%= TOKEN_NONE %>,
@ -247,7 +246,7 @@ class <%= @classname %>
}
else if (attempt_match_info.length == 0u)
{
lt.token = _TOKEN_EOF;
lt.token = TOKEN_0EOF;
break;
}
if (!lex_continue)
@ -378,7 +377,7 @@ class <%= @classname %>
}
if (shift_state != 0xFFFFFFFFu)
{
if (token == _TOKEN_EOF)
if (token == TOKEN_0EOF)
{
/* Successful parse. */
return true;
@ -405,11 +404,7 @@ class <%= @classname %>
/* Error, unexpected token. */
write("Unexpected token ");
if (token == _TOKEN_EOF)
{
writeln("{EOF}");
}
else if (token < _TOKEN_COUNT)
if (token < _TOKEN_COUNT)
{
writeln(token_names[token]);
}
@ -457,10 +452,6 @@ class <%= @classname %>
// {
// writeln(token_names[token]);
// }
// else if (token == _TOKEN_EOF)
// {
// writeln("{EOF}");
// }
// else
// {
// writeln("{other}");

View File

@ -25,9 +25,6 @@ require_relative "propane/version"
class Propane
# EOF.
TOKEN_EOF = 0xFFFFFFFC
# Decoding error.
TOKEN_DECODE_ERROR = 0xFFFFFFFD

View File

@ -26,33 +26,41 @@ class Propane
private
def process_grammar!
# Add EOF token.
@grammar.tokens << Token.new("$EOF", nil)
tokens_by_name = {}
@grammar.tokens.each do |token|
@grammar.tokens.each_with_index do |token, token_id|
# Assign token ID.
token.id = token_id
# Check for token name conflicts.
if tokens_by_name.include?(token.name)
raise Error.new("Duplicate token name #{token.name.inspect}")
end
tokens_by_name[token.name] = token
end
# Check for user start rule.
unless @grammar.rules.find {|rule| rule.name == "Start"}
raise Error.new("Start rule not found")
end
# Add "real" start rule.
@grammar.rules.unshift(Rule.new("$Start", ["Start", "$EOF"], nil, nil))
rule_sets = {}
@grammar.rules.each do |rule|
rule_set_id = @grammar.tokens.size
@grammar.rules.each_with_index do |rule, rule_id|
# Assign rule ID.
rule.id = rule_id
# Check for token/rule name conflict.
if tokens_by_name.include?(rule.name)
raise Error.new("Rule name collides with token name #{rule.name.inspect}")
end
# Build rule sets of all rules with the same name.
@_rule_set_id ||= @grammar.tokens.size
unless rule_sets[rule.name]
rule_sets[rule.name] = RuleSet.new(rule.name, @_rule_set_id)
@_rule_set_id += 1
rule_sets[rule.name] = RuleSet.new(rule.name, rule_set_id)
rule_set_id += 1
end
rule.rule_set = rule_sets[rule.name]
rule_sets[rule.name] << rule
end
# Check for start rule.
unless rule_sets["Start"]
raise Error.new("Start rule not found")
end
# Generate lexer user code IDs for lexer patterns with user code blocks.
@grammar.patterns.select do |pattern|
pattern.code
@ -75,7 +83,7 @@ class Propane
# Generate the lexer.
@lexer = Lexer.new(@grammar.patterns)
# Generate the parser.
@parser = Parser.new(@grammar, rule_sets, rule_sets["Start"], @log)
@parser = Parser.new(@grammar, rule_sets, @log)
end
# Determine which grammar rules could expand to empty sequences.

View File

@ -80,7 +80,7 @@ class Propane
unless code = parse_code_block!
consume!(/;/, "expected pattern or `;' or code block")
end
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
token = Token.new(name, @line_number)
@tokens << token
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code)
@patterns << pattern
@ -93,7 +93,7 @@ class Propane
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
raise Error.new("Invalid token name #{name.inspect}")
end
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
token = Token.new(name, @line_number)
@tokens << token
end
end
@ -117,8 +117,7 @@ class Propane
raise Error.new("Invalid rule name #{name.inspect}")
end
components = components.strip.split(/\s+/)
# Reserve rule ID 0 for the "real" start rule.
@rules << Rule.new(rule_name, components, code, @line_number, @rules.size + 1)
@rules << Rule.new(rule_name, components, code, @line_number)
end
end

View File

@ -2,15 +2,13 @@ class Propane
class Parser
def initialize(grammar, rule_sets, start_rule_set, log)
def initialize(grammar, rule_sets, log)
@grammar = grammar
@rule_sets = rule_sets
@log = log
@eof_token = Token.new(name: "$", id: TOKEN_EOF)
@start_rule = Rule.new("$$", [start_rule_set, @eof_token], nil, nil, 0)
@item_sets = []
@item_sets_set = {}
start_item = Item.new(@start_rule, 0)
start_item = Item.new(grammar.rules.first, 0)
eval_item_sets = Set[ItemSet.new([start_item])]
while eval_item_sets.size > 0
@ -21,7 +19,7 @@ class Propane
@item_sets << item_set
@item_sets_set[item_set] = item_set
item_set.following_symbols.each do |following_symbol|
unless following_symbol == @eof_token
unless following_symbol.name == "$EOF"
following_set = item_set.build_following_item_set(following_symbol)
eval_item_sets << following_set
end
@ -44,7 +42,7 @@ class Propane
@item_sets.each do |item_set|
shift_entries = item_set.following_symbols.map do |following_symbol|
state_id =
if following_symbol == @eof_token
if following_symbol.name == "$EOF"
0
else
item_set.following_item_set[following_symbol].id
@ -83,7 +81,7 @@ class Propane
def process_item_set(item_set)
item_set.following_symbols.each do |following_symbol|
unless following_symbol == @eof_token
unless following_symbol.name == "$EOF"
following_set = @item_sets_set[item_set.build_following_item_set(following_symbol)]
item_set.following_item_set[following_symbol] = following_set
following_set.in_sets << item_set
@ -206,7 +204,7 @@ class Propane
def write_log!
@log.puts Util.banner("Parser Rules")
([@start_rule] + @grammar.rules).each do |rule|
@grammar.rules.each do |rule|
@log.puts
@log.puts "Rule #{rule.id}:"
@log.puts " #{rule}"

View File

@ -12,7 +12,7 @@ class Propane
# @return [Integer]
# Rule ID.
attr_reader :id
attr_accessor :id
# @return [Integer]
# Line number where the rule was defined in the input grammar.
@ -36,13 +36,10 @@ class Propane
# User code associated with the rule.
# @param line_number [Integer]
# Line number where the rule was defined in the input grammar.
# @param id [Integer]
# Rule ID.
def initialize(name, components, code, line_number, id)
def initialize(name, components, code, line_number)
@name = name
@components = components
@code = code
@id = id
@line_number = line_number
end

View File

@ -2,13 +2,25 @@ class Propane
class Token
class << self
# Name of the token to use in code (special characters replaced).
#
# @return [String]
# Name of the token to use in code (special characters replaced).
def code_name(name)
name.sub(/^\$/, "0")
end
end
# @return [String, nil]
# Token name.
attr_reader :name
# @return [Integer, nil]
# Token ID.
attr_reader :id
attr_accessor :id
# @return [Integer, nil]
# Line number where the token was defined in the input grammar.
@ -20,14 +32,19 @@ class Propane
# Optional parameters.
# @option options [String, nil] :name
# Token name.
# @option options [Integer, nil] :id
# Token ID.
# @option options [Integer, nil] :line_number
# Line number where the token was defined in the input grammar.
def initialize(options)
@name = options[:name]
@id = options[:id]
@line_number = options[:line_number]
def initialize(name, line_number)
@name = name
@line_number = line_number
end
# Name of the token to use in code (special characters replaced).
#
# @return [String]
# Name of the token to use in code (special characters replaced).
def code_name
self.class.code_name(@name)
end
def to_s

View File

@ -35,7 +35,6 @@ EOF
o = grammar.tokens.find {|token| token.name == "while"}
expect(o).to_not be_nil
expect(o.line_number).to eq 6
expect(o.id).to eq 0
o = grammar.patterns.find {|pattern| pattern.token == o}
expect(o).to_not be_nil
@ -46,7 +45,6 @@ EOF
o = grammar.tokens.find {|token| token.name == "id"}
expect(o).to_not be_nil
expect(o.line_number).to eq 9
expect(o.id).to eq 1
o = grammar.patterns.find {|pattern| pattern.token == o}
expect(o).to_not be_nil
@ -57,7 +55,6 @@ EOF
o = grammar.tokens.find {|token| token.name == "token_with_code"}
expect(o).to_not be_nil
expect(o.line_number).to eq 11
expect(o.id).to eq 2
o = grammar.patterns.find {|pattern| pattern.token == o}
expect(o).to_not be_nil
@ -83,21 +80,18 @@ EOF
o = grammar.rules[0]
expect(o.name).to eq "A"
expect(o.components).to eq %w[B]
expect(o.id).to eq 1
expect(o.line_number).to eq 19
expect(o.code).to eq " a = 42;\n"
o = grammar.rules[1]
expect(o.name).to eq "B"
expect(o.components).to eq %w[C while id]
expect(o.id).to eq 2
expect(o.line_number).to eq 22
expect(o.code).to be_nil
o = grammar.rules[2]
expect(o.name).to eq "B"
expect(o.components).to eq []
expect(o.id).to eq 3
expect(o.line_number).to eq 23
expect(o.code).to eq " b = 0;\n"
end
@ -119,7 +113,6 @@ EOF
o = grammar.tokens.find {|token| token.name == "code1"}
expect(o).to_not be_nil
expect(o.id).to eq 0
expect(o.line_number).to eq 1
o = grammar.patterns.find {|pattern| pattern.token == o}
@ -128,7 +121,6 @@ EOF
o = grammar.tokens.find {|token| token.name == "code2"}
expect(o).to_not be_nil
expect(o.id).to eq 1
expect(o.line_number).to eq 6
o = grammar.patterns.find {|pattern| pattern.token == o}

View File

@ -77,8 +77,8 @@ unittest
assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_int));
assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_plus));
assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_int));
assert(lexer.lex_token() == LT(1, 9, 0, Testparser._TOKEN_EOF));
assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_0EOF));
lexer = new Testparser.Lexer(null, 0u);
assert(lexer.lex_token() == LT(0, 0, 0, Testparser._TOKEN_EOF));
assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_0EOF));
}