diff --git a/assets/parser.c.erb b/assets/parser.c.erb index f8d5e55..588e236 100644 --- a/assets/parser.c.erb +++ b/assets/parser.c.erb @@ -622,6 +622,13 @@ typedef struct * Number of rule set AST node fields. */ uint16_t rule_set_node_field_array_size; + + /** + * Whether this rule was a generated optional rule that matched the + * optional target. In this case, propagate the matched target node up + * instead of making a new node for this rule. + */ + bool propagate_optional_target; <% end %> } reduce_t; @@ -686,6 +693,7 @@ static const reduce_t parser_reduce_table[] = { , &r_<%= reduce[:rule].name.gsub("$", "_") %><%= reduce[:rule].id %>_node_field_index_map[0] <% end %> , <%= reduce[:rule].rule_set.ast_fields.size %> + , <%= reduce[:propagate_optional_target] %> <% end %> }, <% end %> @@ -970,7 +978,11 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) { /* We have something to reduce. */ <% if @grammar.ast %> - if (parser_reduce_table[reduce_index].n_states > 0) + if (parser_reduce_table[reduce_index].propagate_optional_target) + { + reduced_parser_node = state_values_stack_index(&statevalues, -1)->ast_node; + } + else if (parser_reduce_table[reduce_index].n_states > 0) { void ** node_fields = calloc(parser_reduce_table[reduce_index].rule_set_node_field_array_size, sizeof(void *)); if (parser_reduce_table[reduce_index].rule_set_node_field_index_map == NULL) diff --git a/assets/parser.d.erb b/assets/parser.d.erb index fe1ccfc..ff286be 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -72,6 +72,7 @@ public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> <% @parser.rule_sets.each do |name, rule_set| %> <% next if name.start_with?("$") %> +<% next if rule_set.optional? %> public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %> { <% rule_set.ast_fields.each do |fields| %> @@ -763,6 +764,13 @@ private struct reduce_t * Number of rule set AST node fields. */ ushort rule_set_node_field_array_size; + + /** + * Whether this rule was a generated optional rule that matched the + * optional target. In this case, propagate the matched target node up + * instead of making a new node for this rule. + */ + bool propagate_optional_target; <% end %> } @@ -832,6 +840,7 @@ private immutable reduce_t[] parser_reduce_table = [ , &r_<%= reduce[:rule].name.gsub("$", "_") %><%= reduce[:rule].id %>_node_field_index_map[0] <% end %> , <%= reduce[:rule].rule_set.ast_fields.size %> + , <%= reduce[:propagate_optional_target] %> <% end %> ), <% end %> @@ -1015,7 +1024,11 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont { /* We have something to reduce. */ <% if @grammar.ast %> - if (parser_reduce_table[reduce_index].n_states > 0) + if (parser_reduce_table[reduce_index].propagate_optional_target) + { + reduced_parser_node = statevalues[$ - 1].ast_node; + } + else if (parser_reduce_table[reduce_index].n_states > 0) { void *[] node_fields = new void *[parser_reduce_table[reduce_index].rule_set_node_field_array_size]; foreach (i; 0..parser_reduce_table[reduce_index].rule_set_node_field_array_size) diff --git a/assets/parser.h.erb b/assets/parser.h.erb index a2a3dcb..a701d39 100644 --- a/assets/parser.h.erb +++ b/assets/parser.h.erb @@ -64,11 +64,13 @@ typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> <% @parser.rule_sets.each do |name, rule_set| %> <% next if name.start_with?("$") %> +<% next if rule_set.optional? %> struct <%= name %>; <% end %> <% @parser.rule_sets.each do |name, rule_set| %> <% next if name.start_with?("$") %> +<% next if rule_set.optional? %> typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %> { <% rule_set.ast_fields.each do |fields| %> diff --git a/doc/user_guide.md b/doc/user_guide.md index 01606ea..a20887d 100644 --- a/doc/user_guide.md +++ b/doc/user_guide.md @@ -631,6 +631,20 @@ This example uses the default start rule name of `Start`. A parser rule has zero or more terms on the right side of its definition. Each of these terms is either a token name or a rule name. +A term can be immediately followed by a `?` character to signify that it is +optional. +Another example: + +``` +token public; +token private; +token int; +token ident /[a-zA-Z_][a-zA-Z_0-9]*/; +token semicolon /;/; +IntegerDeclaration -> Visibility? int ident semicolon; +Visibility -> public; +Visibility -> private; +``` In a parser rule code block, parser values for the right side terms are accessible as `$1` for the first term's parser value, `$2` for the second diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index f113591..7c0444b 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -71,6 +71,9 @@ class Propane end # Add "real" start rule. @grammar.rules.unshift(Rule.new("$Start", [@grammar.start_rule, "$EOF"], nil, nil, nil)) + # Generate and add rules for optional components. + generate_optional_component_rules!(tokens_by_name) + # Build rule sets. rule_sets = {} rule_set_id = @grammar.tokens.size @grammar.rules.each_with_index do |rule, rule_id| @@ -128,6 +131,37 @@ class Propane @parser = Parser.new(@grammar, rule_sets, @log) end + # Generate and add rules for any optional components. + def generate_optional_component_rules!(tokens_by_name) + optional_rules_added = Set.new + @grammar.rules.each do |rule| + rule.components.each do |component| + if component =~ /^(.*)\?$/ + c = $1 + unless optional_rules_added.include?(component) + # Create two rules for the optional component: one empty and + # one just matching the component. + # We need to find the ptypename for the optional component in + # order to copy it to the generated rules. + if tokens_by_name[c] + # The optional component is a token. + ptypename = tokens_by_name[c].ptypename + else + # The optional component must be a rule, so find any instance + # of that rule that specifies a ptypename. + ptypename = @grammar.rules.reduce(nil) do |result, rule| + rule.name == c && rule.ptypename ? rule.ptypename : result + end + end + @grammar.rules << Rule.new(component, [], nil, ptypename, rule.line_number) + @grammar.rules << Rule.new(component, [c], "$$ = $1;\n", ptypename, rule.line_number) + optional_rules_added << component + end + end + end + end + end + # Determine which grammar rules could expand to empty sequences. # # @param rule_sets [Hash] diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index c75b825..de5d93a 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -198,7 +198,7 @@ class Propane if @ast && ptypename raise Error.new("Multiple ptypes are unsupported in AST mode") end - md = consume!(/((?:#{IDENTIFIER_REGEX}\s*)*)\s*/, "expected rule component list") + md = consume!(/((?:#{IDENTIFIER_REGEX}\??\s*)*)\s*/, "expected rule component list") components = md[1].strip.split(/\s+/) if @ast consume!(/;/, "expected `;'") diff --git a/lib/propane/parser.rb b/lib/propane/parser.rb index da5eec4..599c1c9 100644 --- a/lib/propane/parser.rb +++ b/lib/propane/parser.rb @@ -64,11 +64,13 @@ class Propane case ra = item_set.reduce_actions when Rule [{token_id: @grammar.invalid_token_id, rule_id: ra.id, rule: ra, - rule_set_id: ra.rule_set.id, n_states: ra.components.size}] + rule_set_id: ra.rule_set.id, n_states: ra.components.size, + propagate_optional_target: ra.optional? && ra.components.size == 1}] when Hash ra.map do |token, rule| {token_id: token.id, rule_id: rule.id, rule: rule, - rule_set_id: rule.rule_set.id, n_states: rule.components.size} + rule_set_id: rule.rule_set.id, n_states: rule.components.size, + propagate_optional_target: rule.optional? && rule.components.size == 1} end else [] diff --git a/lib/propane/rule.rb b/lib/propane/rule.rb index ba97ee3..20047b9 100644 --- a/lib/propane/rule.rb +++ b/lib/propane/rule.rb @@ -66,6 +66,14 @@ class Propane @components.empty? end + # Return whether this is an optional Rule. + # + # @return [Boolean] + # Whether this is an optional Rule. + def optional? + @name.end_with?("?") + end + # Represent the Rule as a String. # # @return [String] diff --git a/lib/propane/rule_set.rb b/lib/propane/rule_set.rb index 361012c..dd20b3c 100644 --- a/lib/propane/rule_set.rb +++ b/lib/propane/rule_set.rb @@ -56,6 +56,24 @@ class Propane @could_be_empty end + # Return whether this is an optional RuleSet. + # + # @return [Boolean] + # Whether this is an optional RuleSet. + def optional? + @name.end_with?("?") + end + + # For optional rule sets, return the underlying component that is optional. + def option_target + @rules.each do |rule| + if rule.components.size > 0 + return rule.components[0] + end + end + raise "Optional rule target not found" + end + # Build the start token set for the RuleSet. # # @return [Set] @@ -102,6 +120,9 @@ class Propane @ast_fields = [] @rules.each do |rule| rule.components.each_with_index do |component, i| + if component.is_a?(RuleSet) && component.optional? + component = component.option_target + end if component.is_a?(Token) node_name = "Token" else diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index 3473be9..ba09416 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -910,6 +910,111 @@ EOF run_propane(language: language) compile("spec/test_start_rule_ast.#{language}", language: language) end + + it "allows marking a rule component as optional" do + if language == "d" + write_grammar <> + +ptype int; +ptype float = float; +ptype string = string; + +token a (float) << $$ = 1.5; >> +token b << $$ = 2; >> +token c << $$ = 3; >> +token d << $$ = 4; >> +Start -> a? b R? << + writeln("a: ", $1); + writeln("b: ", $2); + writeln("R: ", $3); +>> +R -> c d << $$ = "cd"; >> +R (string) -> d c << $$ = "dc"; >> +EOF + else + write_grammar < +>> + +ptype int; +ptype float = float; +ptype string = char *; + +token a (float) << $$ = 1.5; >> +token b << $$ = 2; >> +token c << $$ = 3; >> +token d << $$ = 4; >> +Start -> a? b R? << + printf("a: %.1f\\n", $1); + printf("b: %d\\n", $2); + printf("R: %s\\n", $3 == NULL ? "" : $3); +>> +R -> c d << $$ = "cd"; >> +R (string) -> d c << $$ = "dc"; >> +EOF + end + run_propane(language: language) + compile("spec/test_optional_rule_component.#{language}", language: language) + results = run_test + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + verify_lines(results.stdout, [ + "a: 0#{language == "d" ? "" : ".0"}", + "b: 2", + "R: ", + "a: 1.5", + "b: 2", + "R: cd", + "a: 1.5", + "b: 2", + "R: dc", + ]) + end + + it "allows marking a rule component as optional in AST generation mode" do + if language == "d" + write_grammar <> + +token a; +token b; +token c; +token d; +Start -> a? b R?; +R -> c d; +R -> d c; +EOF + else + write_grammar < +>> + +token a; +token b; +token c; +token d; +Start -> a? b R?; +R -> c d; +R -> d c; +EOF + end + run_propane(language: language) + compile("spec/test_optional_rule_component_ast.#{language}", language: language) + results = run_test + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end end end end diff --git a/spec/test_optional_rule_component.c b/spec/test_optional_rule_component.c new file mode 100644 index 0000000..584432c --- /dev/null +++ b/spec/test_optional_rule_component.c @@ -0,0 +1,22 @@ +#include "testparser.h" +#include +#include + +int main() +{ + char const * input = "b"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + input = "abcd"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + input = "abdc"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + return 0; +} + diff --git a/spec/test_optional_rule_component.d b/spec/test_optional_rule_component.d new file mode 100644 index 0000000..dbfcd3c --- /dev/null +++ b/spec/test_optional_rule_component.d @@ -0,0 +1,23 @@ +import testparser; +import std.stdio; + +int main() +{ + return 0; +} + +unittest +{ + string input = "b"; + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + + input = "abcd"; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + + input = "abdc"; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); +} diff --git a/spec/test_optional_rule_component_ast.c b/spec/test_optional_rule_component_ast.c new file mode 100644 index 0000000..1885983 --- /dev/null +++ b/spec/test_optional_rule_component_ast.c @@ -0,0 +1,42 @@ +#include "testparser.h" +#include +#include +#include "testutils.h" + +int main() +{ + char const * input = "b"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + Start * start = p_result(&context); + assert(start->pToken1 == NULL); + assert(start->pToken2 != NULL); + assert_eq(TOKEN_b, start->pToken2->token); + assert(start->pR3 == NULL); + assert(start->pR == NULL); + + input = "abcd"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + start = p_result(&context); + assert(start->pToken1 != NULL); + assert_eq(TOKEN_a, start->pToken1->token); + assert(start->pToken2 != NULL); + assert(start->pR3 != NULL); + assert(start->pR != NULL); + assert(start->pR == start->pR3); + assert_eq(TOKEN_c, start->pR->pToken1->token); + + input = "bdc"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + start = p_result(&context); + assert(start->pToken1 == NULL); + assert(start->pToken2 != NULL); + assert(start->pR != NULL); + assert_eq(TOKEN_d, start->pR->pToken1->token); + + return 0; +} + diff --git a/spec/test_optional_rule_component_ast.d b/spec/test_optional_rule_component_ast.d new file mode 100644 index 0000000..1cc83c8 --- /dev/null +++ b/spec/test_optional_rule_component_ast.d @@ -0,0 +1,43 @@ +import testparser; +import std.stdio; +import testutils; + +int main() +{ + return 0; +} + +unittest +{ + string input = "b"; + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + Start * start = p_result(&context); + assert(start.pToken1 is null); + assert(start.pToken2 !is null); + assert_eq(TOKEN_b, start.pToken2.token); + assert(start.pR3 is null); + assert(start.pR is null); + + input = "abcd"; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + start = p_result(&context); + assert(start.pToken1 != null); + assert_eq(TOKEN_a, start.pToken1.token); + assert(start.pToken2 != null); + assert(start.pR3 != null); + assert(start.pR != null); + assert(start.pR == start.pR3); + assert_eq(TOKEN_c, start.pR.pToken1.token); + + input = "bdc"; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + start = p_result(&context); + assert(start.pToken1 is null); + assert(start.pToken2 !is null); + assert(start.pR !is null); + assert_eq(TOKEN_d, start.pR.pToken1.token); +}