Remove TOKEN_EOF; define EOF token and start rule in Generator
This commit is contained in:
parent
150be33826
commit
f46b5b3f4d
@ -10,10 +10,9 @@ class <%= @classname %>
|
||||
enum
|
||||
{
|
||||
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||
TOKEN_<%= token.name %> = <%= index %>,
|
||||
TOKEN_<%= token.code_name %> = <%= index %>,
|
||||
<% end %>
|
||||
_TOKEN_COUNT = <%= @grammar.tokens.size %>,
|
||||
_TOKEN_EOF = <%= TOKEN_EOF %>,
|
||||
_TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
|
||||
_TOKEN_DROP = <%= TOKEN_DROP %>,
|
||||
_TOKEN_NONE = <%= TOKEN_NONE %>,
|
||||
@ -247,7 +246,7 @@ class <%= @classname %>
|
||||
}
|
||||
else if (attempt_match_info.length == 0u)
|
||||
{
|
||||
lt.token = _TOKEN_EOF;
|
||||
lt.token = TOKEN_0EOF;
|
||||
break;
|
||||
}
|
||||
if (!lex_continue)
|
||||
@ -378,7 +377,7 @@ class <%= @classname %>
|
||||
}
|
||||
if (shift_state != 0xFFFFFFFFu)
|
||||
{
|
||||
if (token == _TOKEN_EOF)
|
||||
if (token == TOKEN_0EOF)
|
||||
{
|
||||
/* Successful parse. */
|
||||
return true;
|
||||
@ -405,11 +404,7 @@ class <%= @classname %>
|
||||
|
||||
/* Error, unexpected token. */
|
||||
write("Unexpected token ");
|
||||
if (token == _TOKEN_EOF)
|
||||
{
|
||||
writeln("{EOF}");
|
||||
}
|
||||
else if (token < _TOKEN_COUNT)
|
||||
if (token < _TOKEN_COUNT)
|
||||
{
|
||||
writeln(token_names[token]);
|
||||
}
|
||||
@ -457,10 +452,6 @@ class <%= @classname %>
|
||||
// {
|
||||
// writeln(token_names[token]);
|
||||
// }
|
||||
// else if (token == _TOKEN_EOF)
|
||||
// {
|
||||
// writeln("{EOF}");
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// writeln("{other}");
|
||||
|
@ -25,9 +25,6 @@ require_relative "propane/version"
|
||||
|
||||
class Propane
|
||||
|
||||
# EOF.
|
||||
TOKEN_EOF = 0xFFFFFFFC
|
||||
|
||||
# Decoding error.
|
||||
TOKEN_DECODE_ERROR = 0xFFFFFFFD
|
||||
|
||||
|
@ -26,33 +26,41 @@ class Propane
|
||||
private
|
||||
|
||||
def process_grammar!
|
||||
# Add EOF token.
|
||||
@grammar.tokens << Token.new("$EOF", nil)
|
||||
tokens_by_name = {}
|
||||
@grammar.tokens.each do |token|
|
||||
@grammar.tokens.each_with_index do |token, token_id|
|
||||
# Assign token ID.
|
||||
token.id = token_id
|
||||
# Check for token name conflicts.
|
||||
if tokens_by_name.include?(token.name)
|
||||
raise Error.new("Duplicate token name #{token.name.inspect}")
|
||||
end
|
||||
tokens_by_name[token.name] = token
|
||||
end
|
||||
# Check for user start rule.
|
||||
unless @grammar.rules.find {|rule| rule.name == "Start"}
|
||||
raise Error.new("Start rule not found")
|
||||
end
|
||||
# Add "real" start rule.
|
||||
@grammar.rules.unshift(Rule.new("$Start", ["Start", "$EOF"], nil, nil))
|
||||
rule_sets = {}
|
||||
@grammar.rules.each do |rule|
|
||||
rule_set_id = @grammar.tokens.size
|
||||
@grammar.rules.each_with_index do |rule, rule_id|
|
||||
# Assign rule ID.
|
||||
rule.id = rule_id
|
||||
# Check for token/rule name conflict.
|
||||
if tokens_by_name.include?(rule.name)
|
||||
raise Error.new("Rule name collides with token name #{rule.name.inspect}")
|
||||
end
|
||||
# Build rule sets of all rules with the same name.
|
||||
@_rule_set_id ||= @grammar.tokens.size
|
||||
unless rule_sets[rule.name]
|
||||
rule_sets[rule.name] = RuleSet.new(rule.name, @_rule_set_id)
|
||||
@_rule_set_id += 1
|
||||
rule_sets[rule.name] = RuleSet.new(rule.name, rule_set_id)
|
||||
rule_set_id += 1
|
||||
end
|
||||
rule.rule_set = rule_sets[rule.name]
|
||||
rule_sets[rule.name] << rule
|
||||
end
|
||||
# Check for start rule.
|
||||
unless rule_sets["Start"]
|
||||
raise Error.new("Start rule not found")
|
||||
end
|
||||
# Generate lexer user code IDs for lexer patterns with user code blocks.
|
||||
@grammar.patterns.select do |pattern|
|
||||
pattern.code
|
||||
@ -75,7 +83,7 @@ class Propane
|
||||
# Generate the lexer.
|
||||
@lexer = Lexer.new(@grammar.patterns)
|
||||
# Generate the parser.
|
||||
@parser = Parser.new(@grammar, rule_sets, rule_sets["Start"], @log)
|
||||
@parser = Parser.new(@grammar, rule_sets, @log)
|
||||
end
|
||||
|
||||
# Determine which grammar rules could expand to empty sequences.
|
||||
|
@ -80,7 +80,7 @@ class Propane
|
||||
unless code = parse_code_block!
|
||||
consume!(/;/, "expected pattern or `;' or code block")
|
||||
end
|
||||
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
|
||||
token = Token.new(name, @line_number)
|
||||
@tokens << token
|
||||
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code)
|
||||
@patterns << pattern
|
||||
@ -93,7 +93,7 @@ class Propane
|
||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||
raise Error.new("Invalid token name #{name.inspect}")
|
||||
end
|
||||
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
|
||||
token = Token.new(name, @line_number)
|
||||
@tokens << token
|
||||
end
|
||||
end
|
||||
@ -117,8 +117,7 @@ class Propane
|
||||
raise Error.new("Invalid rule name #{name.inspect}")
|
||||
end
|
||||
components = components.strip.split(/\s+/)
|
||||
# Reserve rule ID 0 for the "real" start rule.
|
||||
@rules << Rule.new(rule_name, components, code, @line_number, @rules.size + 1)
|
||||
@rules << Rule.new(rule_name, components, code, @line_number)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -2,15 +2,13 @@ class Propane
|
||||
|
||||
class Parser
|
||||
|
||||
def initialize(grammar, rule_sets, start_rule_set, log)
|
||||
def initialize(grammar, rule_sets, log)
|
||||
@grammar = grammar
|
||||
@rule_sets = rule_sets
|
||||
@log = log
|
||||
@eof_token = Token.new(name: "$", id: TOKEN_EOF)
|
||||
@start_rule = Rule.new("$$", [start_rule_set, @eof_token], nil, nil, 0)
|
||||
@item_sets = []
|
||||
@item_sets_set = {}
|
||||
start_item = Item.new(@start_rule, 0)
|
||||
start_item = Item.new(grammar.rules.first, 0)
|
||||
eval_item_sets = Set[ItemSet.new([start_item])]
|
||||
|
||||
while eval_item_sets.size > 0
|
||||
@ -21,7 +19,7 @@ class Propane
|
||||
@item_sets << item_set
|
||||
@item_sets_set[item_set] = item_set
|
||||
item_set.following_symbols.each do |following_symbol|
|
||||
unless following_symbol == @eof_token
|
||||
unless following_symbol.name == "$EOF"
|
||||
following_set = item_set.build_following_item_set(following_symbol)
|
||||
eval_item_sets << following_set
|
||||
end
|
||||
@ -44,7 +42,7 @@ class Propane
|
||||
@item_sets.each do |item_set|
|
||||
shift_entries = item_set.following_symbols.map do |following_symbol|
|
||||
state_id =
|
||||
if following_symbol == @eof_token
|
||||
if following_symbol.name == "$EOF"
|
||||
0
|
||||
else
|
||||
item_set.following_item_set[following_symbol].id
|
||||
@ -83,7 +81,7 @@ class Propane
|
||||
|
||||
def process_item_set(item_set)
|
||||
item_set.following_symbols.each do |following_symbol|
|
||||
unless following_symbol == @eof_token
|
||||
unless following_symbol.name == "$EOF"
|
||||
following_set = @item_sets_set[item_set.build_following_item_set(following_symbol)]
|
||||
item_set.following_item_set[following_symbol] = following_set
|
||||
following_set.in_sets << item_set
|
||||
@ -206,7 +204,7 @@ class Propane
|
||||
|
||||
def write_log!
|
||||
@log.puts Util.banner("Parser Rules")
|
||||
([@start_rule] + @grammar.rules).each do |rule|
|
||||
@grammar.rules.each do |rule|
|
||||
@log.puts
|
||||
@log.puts "Rule #{rule.id}:"
|
||||
@log.puts " #{rule}"
|
||||
|
@ -12,7 +12,7 @@ class Propane
|
||||
|
||||
# @return [Integer]
|
||||
# Rule ID.
|
||||
attr_reader :id
|
||||
attr_accessor :id
|
||||
|
||||
# @return [Integer]
|
||||
# Line number where the rule was defined in the input grammar.
|
||||
@ -36,13 +36,10 @@ class Propane
|
||||
# User code associated with the rule.
|
||||
# @param line_number [Integer]
|
||||
# Line number where the rule was defined in the input grammar.
|
||||
# @param id [Integer]
|
||||
# Rule ID.
|
||||
def initialize(name, components, code, line_number, id)
|
||||
def initialize(name, components, code, line_number)
|
||||
@name = name
|
||||
@components = components
|
||||
@code = code
|
||||
@id = id
|
||||
@line_number = line_number
|
||||
end
|
||||
|
||||
|
@ -2,13 +2,25 @@ class Propane
|
||||
|
||||
class Token
|
||||
|
||||
class << self
|
||||
|
||||
# Name of the token to use in code (special characters replaced).
|
||||
#
|
||||
# @return [String]
|
||||
# Name of the token to use in code (special characters replaced).
|
||||
def code_name(name)
|
||||
name.sub(/^\$/, "0")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# @return [String, nil]
|
||||
# Token name.
|
||||
attr_reader :name
|
||||
|
||||
# @return [Integer, nil]
|
||||
# Token ID.
|
||||
attr_reader :id
|
||||
attr_accessor :id
|
||||
|
||||
# @return [Integer, nil]
|
||||
# Line number where the token was defined in the input grammar.
|
||||
@ -20,14 +32,19 @@ class Propane
|
||||
# Optional parameters.
|
||||
# @option options [String, nil] :name
|
||||
# Token name.
|
||||
# @option options [Integer, nil] :id
|
||||
# Token ID.
|
||||
# @option options [Integer, nil] :line_number
|
||||
# Line number where the token was defined in the input grammar.
|
||||
def initialize(options)
|
||||
@name = options[:name]
|
||||
@id = options[:id]
|
||||
@line_number = options[:line_number]
|
||||
def initialize(name, line_number)
|
||||
@name = name
|
||||
@line_number = line_number
|
||||
end
|
||||
|
||||
# Name of the token to use in code (special characters replaced).
|
||||
#
|
||||
# @return [String]
|
||||
# Name of the token to use in code (special characters replaced).
|
||||
def code_name
|
||||
self.class.code_name(@name)
|
||||
end
|
||||
|
||||
def to_s
|
||||
|
@ -35,7 +35,6 @@ EOF
|
||||
o = grammar.tokens.find {|token| token.name == "while"}
|
||||
expect(o).to_not be_nil
|
||||
expect(o.line_number).to eq 6
|
||||
expect(o.id).to eq 0
|
||||
|
||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||
expect(o).to_not be_nil
|
||||
@ -46,7 +45,6 @@ EOF
|
||||
o = grammar.tokens.find {|token| token.name == "id"}
|
||||
expect(o).to_not be_nil
|
||||
expect(o.line_number).to eq 9
|
||||
expect(o.id).to eq 1
|
||||
|
||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||
expect(o).to_not be_nil
|
||||
@ -57,7 +55,6 @@ EOF
|
||||
o = grammar.tokens.find {|token| token.name == "token_with_code"}
|
||||
expect(o).to_not be_nil
|
||||
expect(o.line_number).to eq 11
|
||||
expect(o.id).to eq 2
|
||||
|
||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||
expect(o).to_not be_nil
|
||||
@ -83,21 +80,18 @@ EOF
|
||||
o = grammar.rules[0]
|
||||
expect(o.name).to eq "A"
|
||||
expect(o.components).to eq %w[B]
|
||||
expect(o.id).to eq 1
|
||||
expect(o.line_number).to eq 19
|
||||
expect(o.code).to eq " a = 42;\n"
|
||||
|
||||
o = grammar.rules[1]
|
||||
expect(o.name).to eq "B"
|
||||
expect(o.components).to eq %w[C while id]
|
||||
expect(o.id).to eq 2
|
||||
expect(o.line_number).to eq 22
|
||||
expect(o.code).to be_nil
|
||||
|
||||
o = grammar.rules[2]
|
||||
expect(o.name).to eq "B"
|
||||
expect(o.components).to eq []
|
||||
expect(o.id).to eq 3
|
||||
expect(o.line_number).to eq 23
|
||||
expect(o.code).to eq " b = 0;\n"
|
||||
end
|
||||
@ -119,7 +113,6 @@ EOF
|
||||
|
||||
o = grammar.tokens.find {|token| token.name == "code1"}
|
||||
expect(o).to_not be_nil
|
||||
expect(o.id).to eq 0
|
||||
expect(o.line_number).to eq 1
|
||||
|
||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||
@ -128,7 +121,6 @@ EOF
|
||||
|
||||
o = grammar.tokens.find {|token| token.name == "code2"}
|
||||
expect(o).to_not be_nil
|
||||
expect(o.id).to eq 1
|
||||
expect(o.line_number).to eq 6
|
||||
|
||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||
|
@ -77,8 +77,8 @@ unittest
|
||||
assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_int));
|
||||
assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_plus));
|
||||
assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_int));
|
||||
assert(lexer.lex_token() == LT(1, 9, 0, Testparser._TOKEN_EOF));
|
||||
assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_0EOF));
|
||||
|
||||
lexer = new Testparser.Lexer(null, 0u);
|
||||
assert(lexer.lex_token() == LT(0, 0, 0, Testparser._TOKEN_EOF));
|
||||
assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_0EOF));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user