Remove TOKEN_EOF; define EOF token and start rule in Generator
This commit is contained in:
parent
150be33826
commit
f46b5b3f4d
@ -10,10 +10,9 @@ class <%= @classname %>
|
|||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
<% @grammar.tokens.each_with_index do |token, index| %>
|
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||||
TOKEN_<%= token.name %> = <%= index %>,
|
TOKEN_<%= token.code_name %> = <%= index %>,
|
||||||
<% end %>
|
<% end %>
|
||||||
_TOKEN_COUNT = <%= @grammar.tokens.size %>,
|
_TOKEN_COUNT = <%= @grammar.tokens.size %>,
|
||||||
_TOKEN_EOF = <%= TOKEN_EOF %>,
|
|
||||||
_TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
|
_TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
|
||||||
_TOKEN_DROP = <%= TOKEN_DROP %>,
|
_TOKEN_DROP = <%= TOKEN_DROP %>,
|
||||||
_TOKEN_NONE = <%= TOKEN_NONE %>,
|
_TOKEN_NONE = <%= TOKEN_NONE %>,
|
||||||
@ -247,7 +246,7 @@ class <%= @classname %>
|
|||||||
}
|
}
|
||||||
else if (attempt_match_info.length == 0u)
|
else if (attempt_match_info.length == 0u)
|
||||||
{
|
{
|
||||||
lt.token = _TOKEN_EOF;
|
lt.token = TOKEN_0EOF;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!lex_continue)
|
if (!lex_continue)
|
||||||
@ -378,7 +377,7 @@ class <%= @classname %>
|
|||||||
}
|
}
|
||||||
if (shift_state != 0xFFFFFFFFu)
|
if (shift_state != 0xFFFFFFFFu)
|
||||||
{
|
{
|
||||||
if (token == _TOKEN_EOF)
|
if (token == TOKEN_0EOF)
|
||||||
{
|
{
|
||||||
/* Successful parse. */
|
/* Successful parse. */
|
||||||
return true;
|
return true;
|
||||||
@ -405,11 +404,7 @@ class <%= @classname %>
|
|||||||
|
|
||||||
/* Error, unexpected token. */
|
/* Error, unexpected token. */
|
||||||
write("Unexpected token ");
|
write("Unexpected token ");
|
||||||
if (token == _TOKEN_EOF)
|
if (token < _TOKEN_COUNT)
|
||||||
{
|
|
||||||
writeln("{EOF}");
|
|
||||||
}
|
|
||||||
else if (token < _TOKEN_COUNT)
|
|
||||||
{
|
{
|
||||||
writeln(token_names[token]);
|
writeln(token_names[token]);
|
||||||
}
|
}
|
||||||
@ -457,10 +452,6 @@ class <%= @classname %>
|
|||||||
// {
|
// {
|
||||||
// writeln(token_names[token]);
|
// writeln(token_names[token]);
|
||||||
// }
|
// }
|
||||||
// else if (token == _TOKEN_EOF)
|
|
||||||
// {
|
|
||||||
// writeln("{EOF}");
|
|
||||||
// }
|
|
||||||
// else
|
// else
|
||||||
// {
|
// {
|
||||||
// writeln("{other}");
|
// writeln("{other}");
|
||||||
|
@ -25,9 +25,6 @@ require_relative "propane/version"
|
|||||||
|
|
||||||
class Propane
|
class Propane
|
||||||
|
|
||||||
# EOF.
|
|
||||||
TOKEN_EOF = 0xFFFFFFFC
|
|
||||||
|
|
||||||
# Decoding error.
|
# Decoding error.
|
||||||
TOKEN_DECODE_ERROR = 0xFFFFFFFD
|
TOKEN_DECODE_ERROR = 0xFFFFFFFD
|
||||||
|
|
||||||
|
@ -26,33 +26,41 @@ class Propane
|
|||||||
private
|
private
|
||||||
|
|
||||||
def process_grammar!
|
def process_grammar!
|
||||||
|
# Add EOF token.
|
||||||
|
@grammar.tokens << Token.new("$EOF", nil)
|
||||||
tokens_by_name = {}
|
tokens_by_name = {}
|
||||||
@grammar.tokens.each do |token|
|
@grammar.tokens.each_with_index do |token, token_id|
|
||||||
|
# Assign token ID.
|
||||||
|
token.id = token_id
|
||||||
# Check for token name conflicts.
|
# Check for token name conflicts.
|
||||||
if tokens_by_name.include?(token.name)
|
if tokens_by_name.include?(token.name)
|
||||||
raise Error.new("Duplicate token name #{token.name.inspect}")
|
raise Error.new("Duplicate token name #{token.name.inspect}")
|
||||||
end
|
end
|
||||||
tokens_by_name[token.name] = token
|
tokens_by_name[token.name] = token
|
||||||
end
|
end
|
||||||
|
# Check for user start rule.
|
||||||
|
unless @grammar.rules.find {|rule| rule.name == "Start"}
|
||||||
|
raise Error.new("Start rule not found")
|
||||||
|
end
|
||||||
|
# Add "real" start rule.
|
||||||
|
@grammar.rules.unshift(Rule.new("$Start", ["Start", "$EOF"], nil, nil))
|
||||||
rule_sets = {}
|
rule_sets = {}
|
||||||
@grammar.rules.each do |rule|
|
rule_set_id = @grammar.tokens.size
|
||||||
|
@grammar.rules.each_with_index do |rule, rule_id|
|
||||||
|
# Assign rule ID.
|
||||||
|
rule.id = rule_id
|
||||||
# Check for token/rule name conflict.
|
# Check for token/rule name conflict.
|
||||||
if tokens_by_name.include?(rule.name)
|
if tokens_by_name.include?(rule.name)
|
||||||
raise Error.new("Rule name collides with token name #{rule.name.inspect}")
|
raise Error.new("Rule name collides with token name #{rule.name.inspect}")
|
||||||
end
|
end
|
||||||
# Build rule sets of all rules with the same name.
|
# Build rule sets of all rules with the same name.
|
||||||
@_rule_set_id ||= @grammar.tokens.size
|
|
||||||
unless rule_sets[rule.name]
|
unless rule_sets[rule.name]
|
||||||
rule_sets[rule.name] = RuleSet.new(rule.name, @_rule_set_id)
|
rule_sets[rule.name] = RuleSet.new(rule.name, rule_set_id)
|
||||||
@_rule_set_id += 1
|
rule_set_id += 1
|
||||||
end
|
end
|
||||||
rule.rule_set = rule_sets[rule.name]
|
rule.rule_set = rule_sets[rule.name]
|
||||||
rule_sets[rule.name] << rule
|
rule_sets[rule.name] << rule
|
||||||
end
|
end
|
||||||
# Check for start rule.
|
|
||||||
unless rule_sets["Start"]
|
|
||||||
raise Error.new("Start rule not found")
|
|
||||||
end
|
|
||||||
# Generate lexer user code IDs for lexer patterns with user code blocks.
|
# Generate lexer user code IDs for lexer patterns with user code blocks.
|
||||||
@grammar.patterns.select do |pattern|
|
@grammar.patterns.select do |pattern|
|
||||||
pattern.code
|
pattern.code
|
||||||
@ -75,7 +83,7 @@ class Propane
|
|||||||
# Generate the lexer.
|
# Generate the lexer.
|
||||||
@lexer = Lexer.new(@grammar.patterns)
|
@lexer = Lexer.new(@grammar.patterns)
|
||||||
# Generate the parser.
|
# Generate the parser.
|
||||||
@parser = Parser.new(@grammar, rule_sets, rule_sets["Start"], @log)
|
@parser = Parser.new(@grammar, rule_sets, @log)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Determine which grammar rules could expand to empty sequences.
|
# Determine which grammar rules could expand to empty sequences.
|
||||||
|
@ -80,7 +80,7 @@ class Propane
|
|||||||
unless code = parse_code_block!
|
unless code = parse_code_block!
|
||||||
consume!(/;/, "expected pattern or `;' or code block")
|
consume!(/;/, "expected pattern or `;' or code block")
|
||||||
end
|
end
|
||||||
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
|
token = Token.new(name, @line_number)
|
||||||
@tokens << token
|
@tokens << token
|
||||||
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code)
|
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code)
|
||||||
@patterns << pattern
|
@patterns << pattern
|
||||||
@ -93,7 +93,7 @@ class Propane
|
|||||||
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
|
||||||
raise Error.new("Invalid token name #{name.inspect}")
|
raise Error.new("Invalid token name #{name.inspect}")
|
||||||
end
|
end
|
||||||
token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
|
token = Token.new(name, @line_number)
|
||||||
@tokens << token
|
@tokens << token
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -117,8 +117,7 @@ class Propane
|
|||||||
raise Error.new("Invalid rule name #{name.inspect}")
|
raise Error.new("Invalid rule name #{name.inspect}")
|
||||||
end
|
end
|
||||||
components = components.strip.split(/\s+/)
|
components = components.strip.split(/\s+/)
|
||||||
# Reserve rule ID 0 for the "real" start rule.
|
@rules << Rule.new(rule_name, components, code, @line_number)
|
||||||
@rules << Rule.new(rule_name, components, code, @line_number, @rules.size + 1)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -2,15 +2,13 @@ class Propane
|
|||||||
|
|
||||||
class Parser
|
class Parser
|
||||||
|
|
||||||
def initialize(grammar, rule_sets, start_rule_set, log)
|
def initialize(grammar, rule_sets, log)
|
||||||
@grammar = grammar
|
@grammar = grammar
|
||||||
@rule_sets = rule_sets
|
@rule_sets = rule_sets
|
||||||
@log = log
|
@log = log
|
||||||
@eof_token = Token.new(name: "$", id: TOKEN_EOF)
|
|
||||||
@start_rule = Rule.new("$$", [start_rule_set, @eof_token], nil, nil, 0)
|
|
||||||
@item_sets = []
|
@item_sets = []
|
||||||
@item_sets_set = {}
|
@item_sets_set = {}
|
||||||
start_item = Item.new(@start_rule, 0)
|
start_item = Item.new(grammar.rules.first, 0)
|
||||||
eval_item_sets = Set[ItemSet.new([start_item])]
|
eval_item_sets = Set[ItemSet.new([start_item])]
|
||||||
|
|
||||||
while eval_item_sets.size > 0
|
while eval_item_sets.size > 0
|
||||||
@ -21,7 +19,7 @@ class Propane
|
|||||||
@item_sets << item_set
|
@item_sets << item_set
|
||||||
@item_sets_set[item_set] = item_set
|
@item_sets_set[item_set] = item_set
|
||||||
item_set.following_symbols.each do |following_symbol|
|
item_set.following_symbols.each do |following_symbol|
|
||||||
unless following_symbol == @eof_token
|
unless following_symbol.name == "$EOF"
|
||||||
following_set = item_set.build_following_item_set(following_symbol)
|
following_set = item_set.build_following_item_set(following_symbol)
|
||||||
eval_item_sets << following_set
|
eval_item_sets << following_set
|
||||||
end
|
end
|
||||||
@ -44,7 +42,7 @@ class Propane
|
|||||||
@item_sets.each do |item_set|
|
@item_sets.each do |item_set|
|
||||||
shift_entries = item_set.following_symbols.map do |following_symbol|
|
shift_entries = item_set.following_symbols.map do |following_symbol|
|
||||||
state_id =
|
state_id =
|
||||||
if following_symbol == @eof_token
|
if following_symbol.name == "$EOF"
|
||||||
0
|
0
|
||||||
else
|
else
|
||||||
item_set.following_item_set[following_symbol].id
|
item_set.following_item_set[following_symbol].id
|
||||||
@ -83,7 +81,7 @@ class Propane
|
|||||||
|
|
||||||
def process_item_set(item_set)
|
def process_item_set(item_set)
|
||||||
item_set.following_symbols.each do |following_symbol|
|
item_set.following_symbols.each do |following_symbol|
|
||||||
unless following_symbol == @eof_token
|
unless following_symbol.name == "$EOF"
|
||||||
following_set = @item_sets_set[item_set.build_following_item_set(following_symbol)]
|
following_set = @item_sets_set[item_set.build_following_item_set(following_symbol)]
|
||||||
item_set.following_item_set[following_symbol] = following_set
|
item_set.following_item_set[following_symbol] = following_set
|
||||||
following_set.in_sets << item_set
|
following_set.in_sets << item_set
|
||||||
@ -206,7 +204,7 @@ class Propane
|
|||||||
|
|
||||||
def write_log!
|
def write_log!
|
||||||
@log.puts Util.banner("Parser Rules")
|
@log.puts Util.banner("Parser Rules")
|
||||||
([@start_rule] + @grammar.rules).each do |rule|
|
@grammar.rules.each do |rule|
|
||||||
@log.puts
|
@log.puts
|
||||||
@log.puts "Rule #{rule.id}:"
|
@log.puts "Rule #{rule.id}:"
|
||||||
@log.puts " #{rule}"
|
@log.puts " #{rule}"
|
||||||
|
@ -12,7 +12,7 @@ class Propane
|
|||||||
|
|
||||||
# @return [Integer]
|
# @return [Integer]
|
||||||
# Rule ID.
|
# Rule ID.
|
||||||
attr_reader :id
|
attr_accessor :id
|
||||||
|
|
||||||
# @return [Integer]
|
# @return [Integer]
|
||||||
# Line number where the rule was defined in the input grammar.
|
# Line number where the rule was defined in the input grammar.
|
||||||
@ -36,13 +36,10 @@ class Propane
|
|||||||
# User code associated with the rule.
|
# User code associated with the rule.
|
||||||
# @param line_number [Integer]
|
# @param line_number [Integer]
|
||||||
# Line number where the rule was defined in the input grammar.
|
# Line number where the rule was defined in the input grammar.
|
||||||
# @param id [Integer]
|
def initialize(name, components, code, line_number)
|
||||||
# Rule ID.
|
|
||||||
def initialize(name, components, code, line_number, id)
|
|
||||||
@name = name
|
@name = name
|
||||||
@components = components
|
@components = components
|
||||||
@code = code
|
@code = code
|
||||||
@id = id
|
|
||||||
@line_number = line_number
|
@line_number = line_number
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -2,13 +2,25 @@ class Propane
|
|||||||
|
|
||||||
class Token
|
class Token
|
||||||
|
|
||||||
|
class << self
|
||||||
|
|
||||||
|
# Name of the token to use in code (special characters replaced).
|
||||||
|
#
|
||||||
|
# @return [String]
|
||||||
|
# Name of the token to use in code (special characters replaced).
|
||||||
|
def code_name(name)
|
||||||
|
name.sub(/^\$/, "0")
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
# @return [String, nil]
|
# @return [String, nil]
|
||||||
# Token name.
|
# Token name.
|
||||||
attr_reader :name
|
attr_reader :name
|
||||||
|
|
||||||
# @return [Integer, nil]
|
# @return [Integer, nil]
|
||||||
# Token ID.
|
# Token ID.
|
||||||
attr_reader :id
|
attr_accessor :id
|
||||||
|
|
||||||
# @return [Integer, nil]
|
# @return [Integer, nil]
|
||||||
# Line number where the token was defined in the input grammar.
|
# Line number where the token was defined in the input grammar.
|
||||||
@ -20,14 +32,19 @@ class Propane
|
|||||||
# Optional parameters.
|
# Optional parameters.
|
||||||
# @option options [String, nil] :name
|
# @option options [String, nil] :name
|
||||||
# Token name.
|
# Token name.
|
||||||
# @option options [Integer, nil] :id
|
|
||||||
# Token ID.
|
|
||||||
# @option options [Integer, nil] :line_number
|
# @option options [Integer, nil] :line_number
|
||||||
# Line number where the token was defined in the input grammar.
|
# Line number where the token was defined in the input grammar.
|
||||||
def initialize(options)
|
def initialize(name, line_number)
|
||||||
@name = options[:name]
|
@name = name
|
||||||
@id = options[:id]
|
@line_number = line_number
|
||||||
@line_number = options[:line_number]
|
end
|
||||||
|
|
||||||
|
# Name of the token to use in code (special characters replaced).
|
||||||
|
#
|
||||||
|
# @return [String]
|
||||||
|
# Name of the token to use in code (special characters replaced).
|
||||||
|
def code_name
|
||||||
|
self.class.code_name(@name)
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_s
|
def to_s
|
||||||
|
@ -35,7 +35,6 @@ EOF
|
|||||||
o = grammar.tokens.find {|token| token.name == "while"}
|
o = grammar.tokens.find {|token| token.name == "while"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.line_number).to eq 6
|
expect(o.line_number).to eq 6
|
||||||
expect(o.id).to eq 0
|
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
@ -46,7 +45,6 @@ EOF
|
|||||||
o = grammar.tokens.find {|token| token.name == "id"}
|
o = grammar.tokens.find {|token| token.name == "id"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.line_number).to eq 9
|
expect(o.line_number).to eq 9
|
||||||
expect(o.id).to eq 1
|
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
@ -57,7 +55,6 @@ EOF
|
|||||||
o = grammar.tokens.find {|token| token.name == "token_with_code"}
|
o = grammar.tokens.find {|token| token.name == "token_with_code"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.line_number).to eq 11
|
expect(o.line_number).to eq 11
|
||||||
expect(o.id).to eq 2
|
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
@ -83,21 +80,18 @@ EOF
|
|||||||
o = grammar.rules[0]
|
o = grammar.rules[0]
|
||||||
expect(o.name).to eq "A"
|
expect(o.name).to eq "A"
|
||||||
expect(o.components).to eq %w[B]
|
expect(o.components).to eq %w[B]
|
||||||
expect(o.id).to eq 1
|
|
||||||
expect(o.line_number).to eq 19
|
expect(o.line_number).to eq 19
|
||||||
expect(o.code).to eq " a = 42;\n"
|
expect(o.code).to eq " a = 42;\n"
|
||||||
|
|
||||||
o = grammar.rules[1]
|
o = grammar.rules[1]
|
||||||
expect(o.name).to eq "B"
|
expect(o.name).to eq "B"
|
||||||
expect(o.components).to eq %w[C while id]
|
expect(o.components).to eq %w[C while id]
|
||||||
expect(o.id).to eq 2
|
|
||||||
expect(o.line_number).to eq 22
|
expect(o.line_number).to eq 22
|
||||||
expect(o.code).to be_nil
|
expect(o.code).to be_nil
|
||||||
|
|
||||||
o = grammar.rules[2]
|
o = grammar.rules[2]
|
||||||
expect(o.name).to eq "B"
|
expect(o.name).to eq "B"
|
||||||
expect(o.components).to eq []
|
expect(o.components).to eq []
|
||||||
expect(o.id).to eq 3
|
|
||||||
expect(o.line_number).to eq 23
|
expect(o.line_number).to eq 23
|
||||||
expect(o.code).to eq " b = 0;\n"
|
expect(o.code).to eq " b = 0;\n"
|
||||||
end
|
end
|
||||||
@ -119,7 +113,6 @@ EOF
|
|||||||
|
|
||||||
o = grammar.tokens.find {|token| token.name == "code1"}
|
o = grammar.tokens.find {|token| token.name == "code1"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.id).to eq 0
|
|
||||||
expect(o.line_number).to eq 1
|
expect(o.line_number).to eq 1
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||||
@ -128,7 +121,6 @@ EOF
|
|||||||
|
|
||||||
o = grammar.tokens.find {|token| token.name == "code2"}
|
o = grammar.tokens.find {|token| token.name == "code2"}
|
||||||
expect(o).to_not be_nil
|
expect(o).to_not be_nil
|
||||||
expect(o.id).to eq 1
|
|
||||||
expect(o.line_number).to eq 6
|
expect(o.line_number).to eq 6
|
||||||
|
|
||||||
o = grammar.patterns.find {|pattern| pattern.token == o}
|
o = grammar.patterns.find {|pattern| pattern.token == o}
|
||||||
|
@ -77,8 +77,8 @@ unittest
|
|||||||
assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_int));
|
assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_int));
|
||||||
assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_plus));
|
assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_plus));
|
||||||
assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_int));
|
assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_int));
|
||||||
assert(lexer.lex_token() == LT(1, 9, 0, Testparser._TOKEN_EOF));
|
assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_0EOF));
|
||||||
|
|
||||||
lexer = new Testparser.Lexer(null, 0u);
|
lexer = new Testparser.Lexer(null, 0u);
|
||||||
assert(lexer.lex_token() == LT(0, 0, 0, Testparser._TOKEN_EOF));
|
assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_0EOF));
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user