Store parser values according to the rule/pattern type

This commit is contained in:
Josh Holtrop 2022-11-13 22:20:30 -05:00
parent e4a160f918
commit c6ea4f83c2
8 changed files with 87 additions and 24 deletions

View File

@ -27,6 +27,13 @@ class <%= @classname %>
<% end %>
];
static union ParserValue
{
<% @grammar.ptypes.each do |name, typestring| %>
<%= typestring %> v_<%= name %>;
<% end %>
}
static class Decoder
{
enum
@ -156,7 +163,7 @@ class <%= @classname %>
size_t col;
size_t length;
uint token;
<%= @grammar.ptype %> pvalue;
ParserValue pvalue;
}
private string m_input;
@ -200,7 +207,7 @@ class <%= @classname %>
<% @grammar.patterns.each do |pattern| %>
<% if pattern.code_id %>
case <%= pattern.code_id %>u: {
<%= expand_code(pattern.code, false) %>
<%= expand_code(pattern.code, false, nil, pattern) %>
} break;
<% end %>
<% end %>
@ -349,7 +356,7 @@ class <%= @classname %>
private struct StateValue
{
uint state;
<%= @grammar.ptype %> pvalue;
ParserValue pvalue;
this(uint state)
{
@ -378,7 +385,7 @@ class <%= @classname %>
private Lexer m_lexer;
private <%= @grammar.ptype %> parse_result;
private ParserValue parse_result;
this(string input)
{
@ -391,7 +398,7 @@ class <%= @classname %>
uint token = _TOKEN_COUNT;
StateValue[] statevalues = new StateValue[](1);
uint reduced_rule_set = 0xFFFFFFFFu;
<%= @grammar.ptype %> reduced_parse_result;
ParserValue reduced_parser_value;
for (;;)
{
if (token == _TOKEN_COUNT)
@ -427,9 +434,9 @@ class <%= @classname %>
else
{
/* We shifted a RuleSet. */
statevalues[$-1].pvalue = reduced_parse_result;
<%= @grammar.ptype %> new_parse_result;
reduced_parse_result = new_parse_result;
statevalues[$-1].pvalue = reduced_parser_value;
ParserValue new_parse_result;
reduced_parser_value = new_parse_result;
reduced_rule_set = 0xFFFFFFFFu;
}
continue;
@ -439,7 +446,7 @@ class <%= @classname %>
if (reduce_index != 0xFFFFFFFFu)
{
/* We have something to reduce. */
reduced_parse_result = user_code(reduces[reduce_index].rule, statevalues, reduces[reduce_index].n_states);
reduced_parser_value = user_code(reduces[reduce_index].rule, statevalues, reduces[reduce_index].n_states);
reduced_rule_set = reduces[reduce_index].rule_set;
statevalues.length -= reduces[reduce_index].n_states;
continue;
@ -459,9 +466,9 @@ class <%= @classname %>
}
}
@property <%= @grammar.ptype %> result()
@property <%= start_rule_type[1] %> result()
{
return parse_result;
return parse_result.v_<%= start_rule_type[0] %>;
}
private uint check_shift(uint state, uint symbol)
@ -517,16 +524,16 @@ class <%= @classname %>
*
* @return Parse value.
*/
private <%= @grammar.ptype %> user_code(uint rule, StateValue[] statevalues, uint n_states)
private ParserValue user_code(uint rule, StateValue[] statevalues, uint n_states)
{
<%= @grammar.ptype %> _pvalue;
ParserValue _pvalue;
switch (rule)
{
<% @grammar.rules.each do |rule| %>
<% if rule.code %>
case <%= rule.id %>u: {
<%= expand_code(rule.code, true) %>
<%= expand_code(rule.code, true, rule, nil) %>
} break;
<% end %>
<% end %>

View File

@ -33,6 +33,7 @@ class Propane
pattern.mode = "default"
found_default = true
end
pattern.ptypename ||= "default"
end
unless found_default
raise Error.new("No patterns found for default mode")
@ -43,6 +44,8 @@ class Propane
@grammar.tokens.each_with_index do |token, token_id|
# Assign token ID.
token.id = token_id
# Set default ptypename if none given.
token.ptypename ||= "default"
# Check for token name conflicts.
if tokens_by_name.include?(token.name)
raise Error.new("Duplicate token name #{token.name.inspect}")
@ -69,8 +72,20 @@ class Propane
rule_sets[rule.name] = RuleSet.new(rule.name, rule_set_id)
rule_set_id += 1
end
rule.rule_set = rule_sets[rule.name]
rule_sets[rule.name] << rule
rule_set = rule_sets[rule.name]
if rule_set.ptypename && rule.ptypename && rule_set.ptypename != rule.ptypename
raise Error.new("Conflicting ptypes for rule #{rule.name}")
end
rule_set.ptypename ||= rule.ptypename
rule.rule_set = rule_set
rule_set << rule
end
rule_sets.each do |name, rule_set|
rule_set.ptypename ||= "default"
# Assign rule set ptypenames back to rules.
rule_set.rules.each do |rule|
rule.ptypename = rule_set.ptypename
end
end
# Generate lexer user code IDs for lexer patterns with user code blocks.
@grammar.patterns.select do |pattern|
@ -159,24 +174,28 @@ class Propane
# User code block.
# @param parser [Boolean]
# Whether the user code is for the parser or lexer.
# @param rule [Rule, nil]
# The Rule associated with the user code if user code is for the parser.
# @param pattern [Pattern, nil]
# The Pattern associated with the user code if user code is for the lexer.
#
# @return [String]
# Expanded user code block.
def expand_code(code, parser)
def expand_code(code, parser, rule, pattern)
code = code.gsub(/\$token\(([$\w]+)\)/) do |match|
"TOKEN_#{Token.code_name($1)}"
end
if parser
code = code.gsub(/\$\$/) do |match|
"_pvalue"
"_pvalue.v_#{rule.ptypename}"
end
code = code.gsub(/\$(\d+)/) do |match|
index = $1.to_i
"statevalues[$-1-n_states+#{index}].pvalue"
"statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.ptypename}"
end
else
code = code.gsub(/\$\$/) do |match|
"lt.pvalue"
"lt.pvalue.v_#{pattern.ptypename}"
end
code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
mode_name = $1
@ -190,6 +209,17 @@ class Propane
code
end
# Get the parser value type for the start rule.
#
# @return [Array<String>]
# Start rule parser value type name and type string.
def start_rule_type
start_rule = @grammar.rules.find do |rule|
rule.name == "Start"
end
[start_rule.ptypename, @grammar.ptypes[start_rule.ptypename]]
end
end
end

View File

@ -117,7 +117,7 @@ class Propane
end
token = Token.new(name, ptypename, @line_number)
@tokens << token
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, mode: @mode)
pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename)
@patterns << pattern
@mode = nil
true
@ -170,10 +170,13 @@ class Propane
def parse_pattern_statement!
if pattern = parse_pattern!
consume!(/\s+/)
if md = consume!(/\((#{IDENTIFIER_REGEX})\)\s*/)
ptypename = md[1]
end
unless code = parse_code_block!
raise Error.new("Line #{@line_number}: expected code block to follow pattern")
end
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, mode: @mode)
@patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename)
@mode = nil
true
end

View File

@ -30,6 +30,10 @@ class Propane
# Lexer mode for this pattern.
attr_accessor :mode
# @return [String, nil]
# Parser value type name.
attr_accessor :ptypename
# Construct a Pattern.
#
# @param options [Hash]
@ -53,6 +57,7 @@ class Propane
@token = options[:token]
@line_number = options[:line_number]
@mode = options[:mode]
@ptypename = options[:ptypename]
regex = Regex.new(@pattern)
regex.nfa.end_state.accepts = self
@nfa = regex.nfa

View File

@ -16,7 +16,7 @@ class Propane
# @return [String, nil]
# Parser type name.
attr_reader :ptypename
attr_accessor :ptypename
# @return [Integer]
# Line number where the rule was defined in the input grammar.

View File

@ -10,6 +10,10 @@ class Propane
# Name of the RuleSet.
attr_reader :name
# @return [String, nil]
# Parser type name.
attr_accessor :ptypename
# @return [Array<Rule>]
# Rules in the RuleSet.
attr_reader :rules

View File

@ -20,7 +20,7 @@ class Propane
# @return [String, nil]
# Parser value type name.
attr_reader :ptypename
attr_accessor :ptypename
# @return [Integer, nil]
# Token ID.

View File

@ -186,6 +186,12 @@ token abc(string);
token bar;
tokenid int(integer);
/xyz/ (string) <<
>>
/z28/ <<
>>
Start (node) -> R;
R -> abc int;
EOF
@ -210,6 +216,14 @@ EOF
o = grammar.rules.find {|rule| rule.name == "R"}
expect(o).to_not be_nil
expect(o.ptypename).to be_nil
o = grammar.patterns.find {|pattern| pattern.pattern == "xyz"}
expect(o).to_not be_nil
expect(o.ptypename).to eq "string"
o = grammar.patterns.find {|pattern| pattern.pattern == "z28"}
expect(o).to_not be_nil
expect(o.ptypename).to be_nil
end
end
end