Allow rule terms to be marked as optional

This commit is contained in:
Josh Holtrop 2024-05-09 11:56:13 -04:00
parent 494afb7307
commit f3e4941ad8
14 changed files with 346 additions and 5 deletions

View File

@ -622,6 +622,13 @@ typedef struct
* Number of rule set AST node fields.
*/
uint16_t rule_set_node_field_array_size;
/**
* Whether this rule was a generated optional rule that matched the
* optional target. In this case, propagate the matched target node up
* instead of making a new node for this rule.
*/
bool propagate_optional_target;
<% end %>
} reduce_t;
@ -686,6 +693,7 @@ static const reduce_t parser_reduce_table[] = {
, &r_<%= reduce[:rule].name.gsub("$", "_") %><%= reduce[:rule].id %>_node_field_index_map[0]
<% end %>
, <%= reduce[:rule].rule_set.ast_fields.size %>
, <%= reduce[:propagate_optional_target] %>
<% end %>
},
<% end %>
@ -970,7 +978,11 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{
/* We have something to reduce. */
<% if @grammar.ast %>
if (parser_reduce_table[reduce_index].n_states > 0)
if (parser_reduce_table[reduce_index].propagate_optional_target)
{
reduced_parser_node = state_values_stack_index(&statevalues, -1)->ast_node;
}
else if (parser_reduce_table[reduce_index].n_states > 0)
{
void ** node_fields = calloc(parser_reduce_table[reduce_index].rule_set_node_field_array_size, sizeof(void *));
if (parser_reduce_table[reduce_index].rule_set_node_field_index_map == NULL)

View File

@ -72,6 +72,7 @@ public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
<% @parser.rule_sets.each do |name, rule_set| %>
<% next if name.start_with?("$") %>
<% next if rule_set.optional? %>
public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
{
<% rule_set.ast_fields.each do |fields| %>
@ -763,6 +764,13 @@ private struct reduce_t
* Number of rule set AST node fields.
*/
ushort rule_set_node_field_array_size;
/**
* Whether this rule was a generated optional rule that matched the
* optional target. In this case, propagate the matched target node up
* instead of making a new node for this rule.
*/
bool propagate_optional_target;
<% end %>
}
@ -832,6 +840,7 @@ private immutable reduce_t[] parser_reduce_table = [
, &r_<%= reduce[:rule].name.gsub("$", "_") %><%= reduce[:rule].id %>_node_field_index_map[0]
<% end %>
, <%= reduce[:rule].rule_set.ast_fields.size %>
, <%= reduce[:propagate_optional_target] %>
<% end %>
),
<% end %>
@ -1015,7 +1024,11 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
{
/* We have something to reduce. */
<% if @grammar.ast %>
if (parser_reduce_table[reduce_index].n_states > 0)
if (parser_reduce_table[reduce_index].propagate_optional_target)
{
reduced_parser_node = statevalues[$ - 1].ast_node;
}
else if (parser_reduce_table[reduce_index].n_states > 0)
{
void *[] node_fields = new void *[parser_reduce_table[reduce_index].rule_set_node_field_array_size];
foreach (i; 0..parser_reduce_table[reduce_index].rule_set_node_field_array_size)

View File

@ -64,11 +64,13 @@ typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
<% @parser.rule_sets.each do |name, rule_set| %>
<% next if name.start_with?("$") %>
<% next if rule_set.optional? %>
struct <%= name %>;
<% end %>
<% @parser.rule_sets.each do |name, rule_set| %>
<% next if name.start_with?("$") %>
<% next if rule_set.optional? %>
typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
{
<% rule_set.ast_fields.each do |fields| %>

View File

@ -631,6 +631,20 @@ This example uses the default start rule name of `Start`.
A parser rule has zero or more terms on the right side of its definition.
Each of these terms is either a token name or a rule name.
A term can be immediately followed by a `?` character to signify that it is
optional.
Another example:
```
token public;
token private;
token int;
token ident /[a-zA-Z_][a-zA-Z_0-9]*/;
token semicolon /;/;
IntegerDeclaration -> Visibility? int ident semicolon;
Visibility -> public;
Visibility -> private;
```
In a parser rule code block, parser values for the right side terms are
accessible as `$1` for the first term's parser value, `$2` for the second

View File

@ -71,6 +71,9 @@ class Propane
end
# Add "real" start rule.
@grammar.rules.unshift(Rule.new("$Start", [@grammar.start_rule, "$EOF"], nil, nil, nil))
# Generate and add rules for optional components.
generate_optional_component_rules!(tokens_by_name)
# Build rule sets.
rule_sets = {}
rule_set_id = @grammar.tokens.size
@grammar.rules.each_with_index do |rule, rule_id|
@ -128,6 +131,37 @@ class Propane
@parser = Parser.new(@grammar, rule_sets, @log)
end
# Generate and add rules for any optional components.
def generate_optional_component_rules!(tokens_by_name)
optional_rules_added = Set.new
@grammar.rules.each do |rule|
rule.components.each do |component|
if component =~ /^(.*)\?$/
c = $1
unless optional_rules_added.include?(component)
# Create two rules for the optional component: one empty and
# one just matching the component.
# We need to find the ptypename for the optional component in
# order to copy it to the generated rules.
if tokens_by_name[c]
# The optional component is a token.
ptypename = tokens_by_name[c].ptypename
else
# The optional component must be a rule, so find any instance
# of that rule that specifies a ptypename.
ptypename = @grammar.rules.reduce(nil) do |result, rule|
rule.name == c && rule.ptypename ? rule.ptypename : result
end
end
@grammar.rules << Rule.new(component, [], nil, ptypename, rule.line_number)
@grammar.rules << Rule.new(component, [c], "$$ = $1;\n", ptypename, rule.line_number)
optional_rules_added << component
end
end
end
end
end
# Determine which grammar rules could expand to empty sequences.
#
# @param rule_sets [Hash]

View File

@ -198,7 +198,7 @@ class Propane
if @ast && ptypename
raise Error.new("Multiple ptypes are unsupported in AST mode")
end
md = consume!(/((?:#{IDENTIFIER_REGEX}\s*)*)\s*/, "expected rule component list")
md = consume!(/((?:#{IDENTIFIER_REGEX}\??\s*)*)\s*/, "expected rule component list")
components = md[1].strip.split(/\s+/)
if @ast
consume!(/;/, "expected `;'")

View File

@ -64,11 +64,13 @@ class Propane
case ra = item_set.reduce_actions
when Rule
[{token_id: @grammar.invalid_token_id, rule_id: ra.id, rule: ra,
rule_set_id: ra.rule_set.id, n_states: ra.components.size}]
rule_set_id: ra.rule_set.id, n_states: ra.components.size,
propagate_optional_target: ra.optional? && ra.components.size == 1}]
when Hash
ra.map do |token, rule|
{token_id: token.id, rule_id: rule.id, rule: rule,
rule_set_id: rule.rule_set.id, n_states: rule.components.size}
rule_set_id: rule.rule_set.id, n_states: rule.components.size,
propagate_optional_target: rule.optional? && rule.components.size == 1}
end
else
[]

View File

@ -66,6 +66,14 @@ class Propane
@components.empty?
end
# Return whether this is an optional Rule.
#
# @return [Boolean]
# Whether this is an optional Rule.
def optional?
@name.end_with?("?")
end
# Represent the Rule as a String.
#
# @return [String]

View File

@ -56,6 +56,24 @@ class Propane
@could_be_empty
end
# Return whether this is an optional RuleSet.
#
# @return [Boolean]
# Whether this is an optional RuleSet.
def optional?
@name.end_with?("?")
end
# For optional rule sets, return the underlying component that is optional.
def option_target
@rules.each do |rule|
if rule.components.size > 0
return rule.components[0]
end
end
raise "Optional rule target not found"
end
# Build the start token set for the RuleSet.
#
# @return [Set<Token>]
@ -102,6 +120,9 @@ class Propane
@ast_fields = []
@rules.each do |rule|
rule.components.each_with_index do |component, i|
if component.is_a?(RuleSet) && component.optional?
component = component.option_target
end
if component.is_a?(Token)
node_name = "Token"
else

View File

@ -910,6 +910,111 @@ EOF
run_propane(language: language)
compile("spec/test_start_rule_ast.#{language}", language: language)
end
it "allows marking a rule component as optional" do
if language == "d"
write_grammar <<EOF
<<
import std.stdio;
>>
ptype int;
ptype float = float;
ptype string = string;
token a (float) << $$ = 1.5; >>
token b << $$ = 2; >>
token c << $$ = 3; >>
token d << $$ = 4; >>
Start -> a? b R? <<
writeln("a: ", $1);
writeln("b: ", $2);
writeln("R: ", $3);
>>
R -> c d << $$ = "cd"; >>
R (string) -> d c << $$ = "dc"; >>
EOF
else
write_grammar <<EOF
<<
#include <stdio.h>
>>
ptype int;
ptype float = float;
ptype string = char *;
token a (float) << $$ = 1.5; >>
token b << $$ = 2; >>
token c << $$ = 3; >>
token d << $$ = 4; >>
Start -> a? b R? <<
printf("a: %.1f\\n", $1);
printf("b: %d\\n", $2);
printf("R: %s\\n", $3 == NULL ? "" : $3);
>>
R -> c d << $$ = "cd"; >>
R (string) -> d c << $$ = "dc"; >>
EOF
end
run_propane(language: language)
compile("spec/test_optional_rule_component.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
verify_lines(results.stdout, [
"a: 0#{language == "d" ? "" : ".0"}",
"b: 2",
"R: ",
"a: 1.5",
"b: 2",
"R: cd",
"a: 1.5",
"b: 2",
"R: dc",
])
end
it "allows marking a rule component as optional in AST generation mode" do
if language == "d"
write_grammar <<EOF
ast;
<<
import std.stdio;
>>
token a;
token b;
token c;
token d;
Start -> a? b R?;
R -> c d;
R -> d c;
EOF
else
write_grammar <<EOF
ast;
<<
#include <stdio.h>
>>
token a;
token b;
token c;
token d;
Start -> a? b R?;
R -> c d;
R -> d c;
EOF
end
run_propane(language: language)
compile("spec/test_optional_rule_component_ast.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end
end
end

View File

@ -0,0 +1,22 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
int main()
{
char const * input = "b";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
input = "abcd";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
input = "abdc";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
return 0;
}

View File

@ -0,0 +1,23 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = "b";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
input = "abcd";
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
input = "abdc";
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
}

View File

@ -0,0 +1,42 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "b";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert(start->pToken1 == NULL);
assert(start->pToken2 != NULL);
assert_eq(TOKEN_b, start->pToken2->token);
assert(start->pR3 == NULL);
assert(start->pR == NULL);
input = "abcd";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert(start->pToken1 != NULL);
assert_eq(TOKEN_a, start->pToken1->token);
assert(start->pToken2 != NULL);
assert(start->pR3 != NULL);
assert(start->pR != NULL);
assert(start->pR == start->pR3);
assert_eq(TOKEN_c, start->pR->pToken1->token);
input = "bdc";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert(start->pToken1 == NULL);
assert(start->pToken2 != NULL);
assert(start->pR != NULL);
assert_eq(TOKEN_d, start->pR->pToken1->token);
return 0;
}

View File

@ -0,0 +1,43 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "b";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert(start.pToken1 is null);
assert(start.pToken2 !is null);
assert_eq(TOKEN_b, start.pToken2.token);
assert(start.pR3 is null);
assert(start.pR is null);
input = "abcd";
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert(start.pToken1 != null);
assert_eq(TOKEN_a, start.pToken1.token);
assert(start.pToken2 != null);
assert(start.pR3 != null);
assert(start.pR != null);
assert(start.pR == start.pR3);
assert_eq(TOKEN_c, start.pR.pToken1.token);
input = "bdc";
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert(start.pToken1 is null);
assert(start.pToken2 !is null);
assert(start.pR !is null);
assert_eq(TOKEN_d, start.pR.pToken1.token);
}