Allow rule terms to be marked as optional
This commit is contained in:
parent
494afb7307
commit
f3e4941ad8
@ -622,6 +622,13 @@ typedef struct
|
||||
* Number of rule set AST node fields.
|
||||
*/
|
||||
uint16_t rule_set_node_field_array_size;
|
||||
|
||||
/**
|
||||
* Whether this rule was a generated optional rule that matched the
|
||||
* optional target. In this case, propagate the matched target node up
|
||||
* instead of making a new node for this rule.
|
||||
*/
|
||||
bool propagate_optional_target;
|
||||
<% end %>
|
||||
} reduce_t;
|
||||
|
||||
@ -686,6 +693,7 @@ static const reduce_t parser_reduce_table[] = {
|
||||
, &r_<%= reduce[:rule].name.gsub("$", "_") %><%= reduce[:rule].id %>_node_field_index_map[0]
|
||||
<% end %>
|
||||
, <%= reduce[:rule].rule_set.ast_fields.size %>
|
||||
, <%= reduce[:propagate_optional_target] %>
|
||||
<% end %>
|
||||
},
|
||||
<% end %>
|
||||
@ -970,7 +978,11 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
|
||||
{
|
||||
/* We have something to reduce. */
|
||||
<% if @grammar.ast %>
|
||||
if (parser_reduce_table[reduce_index].n_states > 0)
|
||||
if (parser_reduce_table[reduce_index].propagate_optional_target)
|
||||
{
|
||||
reduced_parser_node = state_values_stack_index(&statevalues, -1)->ast_node;
|
||||
}
|
||||
else if (parser_reduce_table[reduce_index].n_states > 0)
|
||||
{
|
||||
void ** node_fields = calloc(parser_reduce_table[reduce_index].rule_set_node_field_array_size, sizeof(void *));
|
||||
if (parser_reduce_table[reduce_index].rule_set_node_field_index_map == NULL)
|
||||
|
@ -72,6 +72,7 @@ public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
|
||||
|
||||
<% @parser.rule_sets.each do |name, rule_set| %>
|
||||
<% next if name.start_with?("$") %>
|
||||
<% next if rule_set.optional? %>
|
||||
public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
|
||||
{
|
||||
<% rule_set.ast_fields.each do |fields| %>
|
||||
@ -763,6 +764,13 @@ private struct reduce_t
|
||||
* Number of rule set AST node fields.
|
||||
*/
|
||||
ushort rule_set_node_field_array_size;
|
||||
|
||||
/**
|
||||
* Whether this rule was a generated optional rule that matched the
|
||||
* optional target. In this case, propagate the matched target node up
|
||||
* instead of making a new node for this rule.
|
||||
*/
|
||||
bool propagate_optional_target;
|
||||
<% end %>
|
||||
}
|
||||
|
||||
@ -832,6 +840,7 @@ private immutable reduce_t[] parser_reduce_table = [
|
||||
, &r_<%= reduce[:rule].name.gsub("$", "_") %><%= reduce[:rule].id %>_node_field_index_map[0]
|
||||
<% end %>
|
||||
, <%= reduce[:rule].rule_set.ast_fields.size %>
|
||||
, <%= reduce[:propagate_optional_target] %>
|
||||
<% end %>
|
||||
),
|
||||
<% end %>
|
||||
@ -1015,7 +1024,11 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
|
||||
{
|
||||
/* We have something to reduce. */
|
||||
<% if @grammar.ast %>
|
||||
if (parser_reduce_table[reduce_index].n_states > 0)
|
||||
if (parser_reduce_table[reduce_index].propagate_optional_target)
|
||||
{
|
||||
reduced_parser_node = statevalues[$ - 1].ast_node;
|
||||
}
|
||||
else if (parser_reduce_table[reduce_index].n_states > 0)
|
||||
{
|
||||
void *[] node_fields = new void *[parser_reduce_table[reduce_index].rule_set_node_field_array_size];
|
||||
foreach (i; 0..parser_reduce_table[reduce_index].rule_set_node_field_array_size)
|
||||
|
@ -64,11 +64,13 @@ typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
|
||||
|
||||
<% @parser.rule_sets.each do |name, rule_set| %>
|
||||
<% next if name.start_with?("$") %>
|
||||
<% next if rule_set.optional? %>
|
||||
struct <%= name %>;
|
||||
<% end %>
|
||||
|
||||
<% @parser.rule_sets.each do |name, rule_set| %>
|
||||
<% next if name.start_with?("$") %>
|
||||
<% next if rule_set.optional? %>
|
||||
typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
|
||||
{
|
||||
<% rule_set.ast_fields.each do |fields| %>
|
||||
|
@ -631,6 +631,20 @@ This example uses the default start rule name of `Start`.
|
||||
|
||||
A parser rule has zero or more terms on the right side of its definition.
|
||||
Each of these terms is either a token name or a rule name.
|
||||
A term can be immediately followed by a `?` character to signify that it is
|
||||
optional.
|
||||
Another example:
|
||||
|
||||
```
|
||||
token public;
|
||||
token private;
|
||||
token int;
|
||||
token ident /[a-zA-Z_][a-zA-Z_0-9]*/;
|
||||
token semicolon /;/;
|
||||
IntegerDeclaration -> Visibility? int ident semicolon;
|
||||
Visibility -> public;
|
||||
Visibility -> private;
|
||||
```
|
||||
|
||||
In a parser rule code block, parser values for the right side terms are
|
||||
accessible as `$1` for the first term's parser value, `$2` for the second
|
||||
|
@ -71,6 +71,9 @@ class Propane
|
||||
end
|
||||
# Add "real" start rule.
|
||||
@grammar.rules.unshift(Rule.new("$Start", [@grammar.start_rule, "$EOF"], nil, nil, nil))
|
||||
# Generate and add rules for optional components.
|
||||
generate_optional_component_rules!(tokens_by_name)
|
||||
# Build rule sets.
|
||||
rule_sets = {}
|
||||
rule_set_id = @grammar.tokens.size
|
||||
@grammar.rules.each_with_index do |rule, rule_id|
|
||||
@ -128,6 +131,37 @@ class Propane
|
||||
@parser = Parser.new(@grammar, rule_sets, @log)
|
||||
end
|
||||
|
||||
# Generate and add rules for any optional components.
|
||||
def generate_optional_component_rules!(tokens_by_name)
|
||||
optional_rules_added = Set.new
|
||||
@grammar.rules.each do |rule|
|
||||
rule.components.each do |component|
|
||||
if component =~ /^(.*)\?$/
|
||||
c = $1
|
||||
unless optional_rules_added.include?(component)
|
||||
# Create two rules for the optional component: one empty and
|
||||
# one just matching the component.
|
||||
# We need to find the ptypename for the optional component in
|
||||
# order to copy it to the generated rules.
|
||||
if tokens_by_name[c]
|
||||
# The optional component is a token.
|
||||
ptypename = tokens_by_name[c].ptypename
|
||||
else
|
||||
# The optional component must be a rule, so find any instance
|
||||
# of that rule that specifies a ptypename.
|
||||
ptypename = @grammar.rules.reduce(nil) do |result, rule|
|
||||
rule.name == c && rule.ptypename ? rule.ptypename : result
|
||||
end
|
||||
end
|
||||
@grammar.rules << Rule.new(component, [], nil, ptypename, rule.line_number)
|
||||
@grammar.rules << Rule.new(component, [c], "$$ = $1;\n", ptypename, rule.line_number)
|
||||
optional_rules_added << component
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Determine which grammar rules could expand to empty sequences.
|
||||
#
|
||||
# @param rule_sets [Hash]
|
||||
|
@ -198,7 +198,7 @@ class Propane
|
||||
if @ast && ptypename
|
||||
raise Error.new("Multiple ptypes are unsupported in AST mode")
|
||||
end
|
||||
md = consume!(/((?:#{IDENTIFIER_REGEX}\s*)*)\s*/, "expected rule component list")
|
||||
md = consume!(/((?:#{IDENTIFIER_REGEX}\??\s*)*)\s*/, "expected rule component list")
|
||||
components = md[1].strip.split(/\s+/)
|
||||
if @ast
|
||||
consume!(/;/, "expected `;'")
|
||||
|
@ -64,11 +64,13 @@ class Propane
|
||||
case ra = item_set.reduce_actions
|
||||
when Rule
|
||||
[{token_id: @grammar.invalid_token_id, rule_id: ra.id, rule: ra,
|
||||
rule_set_id: ra.rule_set.id, n_states: ra.components.size}]
|
||||
rule_set_id: ra.rule_set.id, n_states: ra.components.size,
|
||||
propagate_optional_target: ra.optional? && ra.components.size == 1}]
|
||||
when Hash
|
||||
ra.map do |token, rule|
|
||||
{token_id: token.id, rule_id: rule.id, rule: rule,
|
||||
rule_set_id: rule.rule_set.id, n_states: rule.components.size}
|
||||
rule_set_id: rule.rule_set.id, n_states: rule.components.size,
|
||||
propagate_optional_target: rule.optional? && rule.components.size == 1}
|
||||
end
|
||||
else
|
||||
[]
|
||||
|
@ -66,6 +66,14 @@ class Propane
|
||||
@components.empty?
|
||||
end
|
||||
|
||||
# Return whether this is an optional Rule.
|
||||
#
|
||||
# @return [Boolean]
|
||||
# Whether this is an optional Rule.
|
||||
def optional?
|
||||
@name.end_with?("?")
|
||||
end
|
||||
|
||||
# Represent the Rule as a String.
|
||||
#
|
||||
# @return [String]
|
||||
|
@ -56,6 +56,24 @@ class Propane
|
||||
@could_be_empty
|
||||
end
|
||||
|
||||
# Return whether this is an optional RuleSet.
|
||||
#
|
||||
# @return [Boolean]
|
||||
# Whether this is an optional RuleSet.
|
||||
def optional?
|
||||
@name.end_with?("?")
|
||||
end
|
||||
|
||||
# For optional rule sets, return the underlying component that is optional.
|
||||
def option_target
|
||||
@rules.each do |rule|
|
||||
if rule.components.size > 0
|
||||
return rule.components[0]
|
||||
end
|
||||
end
|
||||
raise "Optional rule target not found"
|
||||
end
|
||||
|
||||
# Build the start token set for the RuleSet.
|
||||
#
|
||||
# @return [Set<Token>]
|
||||
@ -102,6 +120,9 @@ class Propane
|
||||
@ast_fields = []
|
||||
@rules.each do |rule|
|
||||
rule.components.each_with_index do |component, i|
|
||||
if component.is_a?(RuleSet) && component.optional?
|
||||
component = component.option_target
|
||||
end
|
||||
if component.is_a?(Token)
|
||||
node_name = "Token"
|
||||
else
|
||||
|
@ -910,6 +910,111 @@ EOF
|
||||
run_propane(language: language)
|
||||
compile("spec/test_start_rule_ast.#{language}", language: language)
|
||||
end
|
||||
|
||||
it "allows marking a rule component as optional" do
|
||||
if language == "d"
|
||||
write_grammar <<EOF
|
||||
<<
|
||||
import std.stdio;
|
||||
>>
|
||||
|
||||
ptype int;
|
||||
ptype float = float;
|
||||
ptype string = string;
|
||||
|
||||
token a (float) << $$ = 1.5; >>
|
||||
token b << $$ = 2; >>
|
||||
token c << $$ = 3; >>
|
||||
token d << $$ = 4; >>
|
||||
Start -> a? b R? <<
|
||||
writeln("a: ", $1);
|
||||
writeln("b: ", $2);
|
||||
writeln("R: ", $3);
|
||||
>>
|
||||
R -> c d << $$ = "cd"; >>
|
||||
R (string) -> d c << $$ = "dc"; >>
|
||||
EOF
|
||||
else
|
||||
write_grammar <<EOF
|
||||
<<
|
||||
#include <stdio.h>
|
||||
>>
|
||||
|
||||
ptype int;
|
||||
ptype float = float;
|
||||
ptype string = char *;
|
||||
|
||||
token a (float) << $$ = 1.5; >>
|
||||
token b << $$ = 2; >>
|
||||
token c << $$ = 3; >>
|
||||
token d << $$ = 4; >>
|
||||
Start -> a? b R? <<
|
||||
printf("a: %.1f\\n", $1);
|
||||
printf("b: %d\\n", $2);
|
||||
printf("R: %s\\n", $3 == NULL ? "" : $3);
|
||||
>>
|
||||
R -> c d << $$ = "cd"; >>
|
||||
R (string) -> d c << $$ = "dc"; >>
|
||||
EOF
|
||||
end
|
||||
run_propane(language: language)
|
||||
compile("spec/test_optional_rule_component.#{language}", language: language)
|
||||
results = run_test
|
||||
expect(results.stderr).to eq ""
|
||||
expect(results.status).to eq 0
|
||||
verify_lines(results.stdout, [
|
||||
"a: 0#{language == "d" ? "" : ".0"}",
|
||||
"b: 2",
|
||||
"R: ",
|
||||
"a: 1.5",
|
||||
"b: 2",
|
||||
"R: cd",
|
||||
"a: 1.5",
|
||||
"b: 2",
|
||||
"R: dc",
|
||||
])
|
||||
end
|
||||
|
||||
it "allows marking a rule component as optional in AST generation mode" do
|
||||
if language == "d"
|
||||
write_grammar <<EOF
|
||||
ast;
|
||||
|
||||
<<
|
||||
import std.stdio;
|
||||
>>
|
||||
|
||||
token a;
|
||||
token b;
|
||||
token c;
|
||||
token d;
|
||||
Start -> a? b R?;
|
||||
R -> c d;
|
||||
R -> d c;
|
||||
EOF
|
||||
else
|
||||
write_grammar <<EOF
|
||||
ast;
|
||||
|
||||
<<
|
||||
#include <stdio.h>
|
||||
>>
|
||||
|
||||
token a;
|
||||
token b;
|
||||
token c;
|
||||
token d;
|
||||
Start -> a? b R?;
|
||||
R -> c d;
|
||||
R -> d c;
|
||||
EOF
|
||||
end
|
||||
run_propane(language: language)
|
||||
compile("spec/test_optional_rule_component_ast.#{language}", language: language)
|
||||
results = run_test
|
||||
expect(results.stderr).to eq ""
|
||||
expect(results.status).to eq 0
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
22
spec/test_optional_rule_component.c
Normal file
22
spec/test_optional_rule_component.c
Normal file
@ -0,0 +1,22 @@
|
||||
#include "testparser.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
char const * input = "b";
|
||||
p_context_t context;
|
||||
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
|
||||
input = "abcd";
|
||||
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
|
||||
input = "abdc";
|
||||
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
23
spec/test_optional_rule_component.d
Normal file
23
spec/test_optional_rule_component.d
Normal file
@ -0,0 +1,23 @@
|
||||
import testparser;
|
||||
import std.stdio;
|
||||
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
string input = "b";
|
||||
p_context_t context;
|
||||
p_context_init(&context, input);
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
|
||||
input = "abcd";
|
||||
p_context_init(&context, input);
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
|
||||
input = "abdc";
|
||||
p_context_init(&context, input);
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
}
|
42
spec/test_optional_rule_component_ast.c
Normal file
42
spec/test_optional_rule_component_ast.c
Normal file
@ -0,0 +1,42 @@
|
||||
#include "testparser.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "testutils.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
char const * input = "b";
|
||||
p_context_t context;
|
||||
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
Start * start = p_result(&context);
|
||||
assert(start->pToken1 == NULL);
|
||||
assert(start->pToken2 != NULL);
|
||||
assert_eq(TOKEN_b, start->pToken2->token);
|
||||
assert(start->pR3 == NULL);
|
||||
assert(start->pR == NULL);
|
||||
|
||||
input = "abcd";
|
||||
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
start = p_result(&context);
|
||||
assert(start->pToken1 != NULL);
|
||||
assert_eq(TOKEN_a, start->pToken1->token);
|
||||
assert(start->pToken2 != NULL);
|
||||
assert(start->pR3 != NULL);
|
||||
assert(start->pR != NULL);
|
||||
assert(start->pR == start->pR3);
|
||||
assert_eq(TOKEN_c, start->pR->pToken1->token);
|
||||
|
||||
input = "bdc";
|
||||
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
start = p_result(&context);
|
||||
assert(start->pToken1 == NULL);
|
||||
assert(start->pToken2 != NULL);
|
||||
assert(start->pR != NULL);
|
||||
assert_eq(TOKEN_d, start->pR->pToken1->token);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
43
spec/test_optional_rule_component_ast.d
Normal file
43
spec/test_optional_rule_component_ast.d
Normal file
@ -0,0 +1,43 @@
|
||||
import testparser;
|
||||
import std.stdio;
|
||||
import testutils;
|
||||
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
string input = "b";
|
||||
p_context_t context;
|
||||
p_context_init(&context, input);
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
Start * start = p_result(&context);
|
||||
assert(start.pToken1 is null);
|
||||
assert(start.pToken2 !is null);
|
||||
assert_eq(TOKEN_b, start.pToken2.token);
|
||||
assert(start.pR3 is null);
|
||||
assert(start.pR is null);
|
||||
|
||||
input = "abcd";
|
||||
p_context_init(&context, input);
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
start = p_result(&context);
|
||||
assert(start.pToken1 != null);
|
||||
assert_eq(TOKEN_a, start.pToken1.token);
|
||||
assert(start.pToken2 != null);
|
||||
assert(start.pR3 != null);
|
||||
assert(start.pR != null);
|
||||
assert(start.pR == start.pR3);
|
||||
assert_eq(TOKEN_c, start.pR.pToken1.token);
|
||||
|
||||
input = "bdc";
|
||||
p_context_init(&context, input);
|
||||
assert(p_parse(&context) == P_SUCCESS);
|
||||
start = p_result(&context);
|
||||
assert(start.pToken1 is null);
|
||||
assert(start.pToken2 !is null);
|
||||
assert(start.pR !is null);
|
||||
assert_eq(TOKEN_d, start.pR.pToken1.token);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user