diff --git a/assets/parser.c.erb b/assets/parser.c.erb index 65ce0ee..bbfa475 100644 --- a/assets/parser.c.erb +++ b/assets/parser.c.erb @@ -924,7 +924,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) { /* Successful parse. */ <% if @grammar.ast %> - context->parse_result = (Start *)state_values_stack_index(&statevalues, -1)->ast_node; + context->parse_result = (<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)state_values_stack_index(&statevalues, -1)->ast_node; <% else %> context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue; <% end %> @@ -941,7 +941,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) { /* We shifted a token, mark it consumed. */ <% if @grammar.ast %> - Token * token_ast_node = malloc(sizeof(Token)); + <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>)); token_ast_node->token = token; token_ast_node->pvalue = token_info.pvalue; state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node; @@ -1029,7 +1029,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) * @return Parse result value. */ <% if @grammar.ast %> -Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) +<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) <% else %> <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) <% end %> diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 052c9ec..4330d14 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -64,7 +64,7 @@ public union <%= @grammar.prefix %>value_t <% if @grammar.ast %> /** AST node types. @{ */ -public struct Token +public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> { <%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>value_t pvalue; @@ -72,7 +72,7 @@ public struct Token <% @parser.rule_sets.each do |name, rule_set| %> <% next if name.start_with?("$") %> -public struct <%= name %> +public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %> { <% rule_set.ast_fields.each do |fields| %> union @@ -144,7 +144,7 @@ public struct <%= @grammar.prefix %>context_t /** Parse result value. */ <% if @grammar.ast %> - Start * parse_result; + <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result; <% else %> <%= @grammar.prefix %>value_t parse_result; <% end %> @@ -973,7 +973,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont { /* Successful parse. */ <% if @grammar.ast %> - context.parse_result = cast(Start *)statevalues[$-1].ast_node; + context.parse_result = cast(<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)statevalues[$-1].ast_node; <% else %> context.parse_result = statevalues[$-1].pvalue; <% end %> @@ -988,7 +988,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont { /* We shifted a token, mark it consumed. */ <% if @grammar.ast %> - Token * token_ast_node = new Token(token, token_info.pvalue); + <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue); statevalues[$-1].ast_node = token_ast_node; <% else %> statevalues[$-1].pvalue = token_info.pvalue; @@ -1075,7 +1075,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont * @return Parse result value. */ <% if @grammar.ast %> -public Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) +public <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) <% else %> public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) <% end %> diff --git a/assets/parser.h.erb b/assets/parser.h.erb index 644e961..967efe6 100644 --- a/assets/parser.h.erb +++ b/assets/parser.h.erb @@ -56,11 +56,11 @@ typedef union <% if @grammar.ast %> /** AST node types. @{ */ -typedef struct Token +typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> { <%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>value_t pvalue; -} Token; +} <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>; <% @parser.rule_sets.each do |name, rule_set| %> <% next if name.start_with?("$") %> @@ -69,7 +69,7 @@ struct <%= name %>; <% @parser.rule_sets.each do |name, rule_set| %> <% next if name.start_with?("$") %> -typedef struct <%= name %> +typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %> { <% rule_set.ast_fields.each do |fields| %> union @@ -79,7 +79,7 @@ typedef struct <%= name %> <% end %> }; <% end %> -} <%= name %>; +} <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>; <% end %> /** @} */ @@ -144,7 +144,7 @@ typedef struct /** Parse result value. */ <% if @grammar.ast %> - Start * parse_result; + <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result; <% else %> <%= @grammar.prefix %>value_t parse_result; <% end %> @@ -173,7 +173,7 @@ size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context); <% if @grammar.ast %> -Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); +<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); <% else %> <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); <% end %> diff --git a/doc/user_guide.md b/doc/user_guide.md index d9c8a4b..33a3f62 100644 --- a/doc/user_guide.md +++ b/doc/user_guide.md @@ -276,6 +276,48 @@ assert_eq(22, itemsmore.pItem.pToken1.pvalue); assert(itemsmore.pItemsMore is null); ``` +## `ast_prefix` and `ast_suffix` statements + +In AST generation mode, structure types are defined and named based on the +rules in the grammar. +Additionally, a structure type called `Token` is generated to hold parsed +token information. + +These structure names can be modified by using the `ast_prefix` or `ast_suffix` +statements in the grammar file. +The field names that point to instances of the structures are not affected by +the `ast_prefix` or `ast_suffix` values. + +For example, if the following two lines were added to the example above: + +``` +ast_prefix ABC; +ast_suffix XYZ; +``` + +Then the types would be used as such instead: + +``` +string input = "a, ((b)), b"; +p_context_t context; +p_context_init(&context, input); +assert_eq(P_SUCCESS, p_parse(&context)); +ABCStartXYZ * start = p_result(&context); +assert(start.pItems1 !is null); +assert(start.pItems !is null); +ABCItemsXYZ * items = start.pItems; +assert(items.pItem !is null); +assert(items.pItem.pToken1 !is null); +assert_eq(TOKEN_a, items.pItem.pToken1.token); +assert_eq(11, items.pItem.pToken1.pvalue); +assert(items.pItemsMore !is null); +ABCItemsMoreXYZ * itemsmore = items.pItemsMore; +assert(itemsmore.pItem !is null); +assert(itemsmore.pItem.pItem !is null); +assert(itemsmore.pItem.pItem.pItem !is null); +assert(itemsmore.pItem.pItem.pItem.pToken1 !is null); +``` + ##> Specifying tokens - the `token` statement The `token` statement allows defining a lexer token and a pattern to match that diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index a690072..c260163 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -120,7 +120,7 @@ class Propane end determine_possibly_empty_rulesets!(rule_sets) rule_sets.each do |name, rule_set| - rule_set.finalize + rule_set.finalize(@grammar) end # Generate the lexer. @lexer = Lexer.new(@grammar) diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index 181abc5..850ef08 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -6,6 +6,8 @@ class Propane IDENTIFIER_REGEX = /(?:[a-zA-Z]|_[a-zA-Z0-9])[a-zA-Z_0-9]*/ attr_reader :ast + attr_reader :ast_prefix + attr_reader :ast_suffix attr_reader :modulename attr_reader :patterns attr_reader :rules @@ -26,6 +28,8 @@ class Propane @ptypes = {"default" => "void *"} @prefix = "p_" @ast = false + @ast_prefix = "" + @ast_suffix = "" parse_grammar! end @@ -54,6 +58,8 @@ class Propane elsif parse_comment_line! elsif @mode.nil? && parse_mode_label! elsif parse_ast_statement! + elsif parse_ast_prefix_statement! + elsif parse_ast_suffix_statement! elsif parse_module_statement! elsif parse_ptype_statement! elsif parse_pattern_statement! @@ -91,6 +97,18 @@ class Propane end end + def parse_ast_prefix_statement! + if md = consume!(/ast_prefix\s+(\w+)\s*;/) + @ast_prefix = md[1] + end + end + + def parse_ast_suffix_statement! + if md = consume!(/ast_suffix\s+(\w+)\s*;/) + @ast_suffix = md[1] + end + end + def parse_module_statement! if consume!(/module\s+/) md = consume!(/([\w.]+)\s*/, "expected module name") diff --git a/lib/propane/rule_set.rb b/lib/propane/rule_set.rb index dbc09f3..361012c 100644 --- a/lib/propane/rule_set.rb +++ b/lib/propane/rule_set.rb @@ -3,6 +3,10 @@ class Propane # A RuleSet collects all grammar rules of the same name. class RuleSet + # @return [Array] + # AST fields. + attr_reader :ast_fields + # @return [Integer] # ID of the RuleSet. attr_reader :id @@ -76,6 +80,13 @@ class Propane @_start_token_set end + # Finalize a RuleSet after adding all Rules to it. + def finalize(grammar) + build_ast_fields(grammar) + end + + private + # Build the set of AST fields for this RuleSet. # # This is an Array of Hashes. Each entry in the Array corresponds to a @@ -84,46 +95,38 @@ class Propane # a key. It may also have the field name without the positional suffix if # that field only exists in one position across all Rules in the RuleSet. # - # @return [Array] - # AST fields. - def ast_fields - @_ast_fields ||= - begin - field_ast_node_indexes = {} - field_indexes_across_all_rules = {} - ast_node_fields = [] - @rules.each do |rule| - rule.components.each_with_index do |component, i| - if component.is_a?(Token) - node_name = "Token" - else - node_name = component.name - end - field_name = "p#{node_name}#{i + 1}" - unless field_ast_node_indexes[field_name] - field_ast_node_indexes[field_name] = ast_node_fields.size - ast_node_fields << {field_name => node_name} - end - field_indexes_across_all_rules[node_name] ||= Set.new - field_indexes_across_all_rules[node_name] << field_ast_node_indexes[field_name] - rule.rule_set_node_field_index_map[i] = field_ast_node_indexes[field_name] - end + # @return [void] + def build_ast_fields(grammar) + field_ast_node_indexes = {} + field_indexes_across_all_rules = {} + @ast_fields = [] + @rules.each do |rule| + rule.components.each_with_index do |component, i| + if component.is_a?(Token) + node_name = "Token" + else + node_name = component.name end - field_indexes_across_all_rules.each do |node_name, indexes_across_all_rules| - if indexes_across_all_rules.size == 1 - # If this field was only seen in one position across all rules, - # then add an alias to the positional field name that does not - # include the position. - ast_node_fields[indexes_across_all_rules.first]["p#{node_name}"] = node_name - end + struct_name = "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}" + field_name = "p#{node_name}#{i + 1}" + unless field_ast_node_indexes[field_name] + field_ast_node_indexes[field_name] = @ast_fields.size + @ast_fields << {field_name => struct_name} end - ast_node_fields + field_indexes_across_all_rules[node_name] ||= Set.new + field_indexes_across_all_rules[node_name] << field_ast_node_indexes[field_name] + rule.rule_set_node_field_index_map[i] = field_ast_node_indexes[field_name] end - end - - # Finalize a RuleSet after adding all Rules to it. - def finalize - ast_fields + end + field_indexes_across_all_rules.each do |node_name, indexes_across_all_rules| + if indexes_across_all_rules.size == 1 + # If this field was only seen in one position across all rules, + # then add an alias to the positional field name that does not + # include the position. + @ast_fields[indexes_across_all_rules.first]["p#{node_name}"] = + "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}" + end + end end end diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index b31e8d9..1a8f130 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -845,6 +845,50 @@ EOF expect(results.stderr).to eq "" expect(results.status).to eq 0 end + + it "supports AST node prefix and suffix" do + write_grammar <> +token b << $$ = 22; >> +token one /1/; +token two /2/; +token comma /,/ << + $$ = 42; +>> +token lparen /\\(/; +token rparen /\\)/; +drop /\\s+/; + +Start -> Items; + +Items -> Item ItemsMore; +Items -> ; + +ItemsMore -> comma Item ItemsMore; +ItemsMore -> ; + +Item -> a; +Item -> b; +Item -> lparen Item rparen; +Item -> Dual; + +Dual -> One Two; +Dual -> Two One; +One -> one; +Two -> two; +EOF + run_propane(language: language) + compile("spec/test_ast_ps.#{language}", language: language) + results = run_test + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end end end end diff --git a/spec/test_ast_ps.c b/spec/test_ast_ps.c new file mode 100644 index 0000000..69ebcae --- /dev/null +++ b/spec/test_ast_ps.c @@ -0,0 +1,55 @@ +#include "testparser.h" +#include +#include +#include "testutils.h" + +int main() +{ + char const * input = "a, ((b)), b"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + PStartS * start = p_result(&context); + assert(start->pItems1 != NULL); + assert(start->pItems != NULL); + PItemsS * items = start->pItems; + assert(items->pItem != NULL); + assert(items->pItem->pToken1 != NULL); + assert_eq(TOKEN_a, items->pItem->pToken1->token); + assert_eq(11, items->pItem->pToken1->pvalue); + assert(items->pItemsMore != NULL); + PItemsMoreS * itemsmore = items->pItemsMore; + assert(itemsmore->pItem != NULL); + assert(itemsmore->pItem->pItem != NULL); + assert(itemsmore->pItem->pItem->pItem != NULL); + assert(itemsmore->pItem->pItem->pItem->pToken1 != NULL); + assert_eq(TOKEN_b, itemsmore->pItem->pItem->pItem->pToken1->token); + assert_eq(22, itemsmore->pItem->pItem->pItem->pToken1->pvalue); + assert(itemsmore->pItemsMore != NULL); + itemsmore = itemsmore->pItemsMore; + assert(itemsmore->pItem != NULL); + assert(itemsmore->pItem->pToken1 != NULL); + assert_eq(TOKEN_b, itemsmore->pItem->pToken1->token); + assert_eq(22, itemsmore->pItem->pToken1->pvalue); + assert(itemsmore->pItemsMore == NULL); + + input = ""; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + start = p_result(&context); + assert(start->pItems == NULL); + + input = "2 1"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + start = p_result(&context); + assert(start->pItems != NULL); + assert(start->pItems->pItem != NULL); + assert(start->pItems->pItem->pDual != NULL); + assert(start->pItems->pItem->pDual->pTwo1 != NULL); + assert(start->pItems->pItem->pDual->pOne2 != NULL); + assert(start->pItems->pItem->pDual->pTwo2 == NULL); + assert(start->pItems->pItem->pDual->pOne1 == NULL); + + return 0; +} diff --git a/spec/test_ast_ps.d b/spec/test_ast_ps.d new file mode 100644 index 0000000..8d01e6f --- /dev/null +++ b/spec/test_ast_ps.d @@ -0,0 +1,57 @@ +import testparser; +import std.stdio; +import testutils; + +int main() +{ + return 0; +} + +unittest +{ + string input = "a, ((b)), b"; + p_context_t context; + p_context_init(&context, input); + assert_eq(P_SUCCESS, p_parse(&context)); + PStartS * start = p_result(&context); + assert(start.pItems1 !is null); + assert(start.pItems !is null); + PItemsS * items = start.pItems; + assert(items.pItem !is null); + assert(items.pItem.pToken1 !is null); + assert_eq(TOKEN_a, items.pItem.pToken1.token); + assert_eq(11, items.pItem.pToken1.pvalue); + assert(items.pItemsMore !is null); + PItemsMoreS * itemsmore = items.pItemsMore; + assert(itemsmore.pItem !is null); + assert(itemsmore.pItem.pItem !is null); + assert(itemsmore.pItem.pItem.pItem !is null); + assert(itemsmore.pItem.pItem.pItem.pToken1 !is null); + assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token); + assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue); + assert(itemsmore.pItemsMore !is null); + itemsmore = itemsmore.pItemsMore; + assert(itemsmore.pItem !is null); + assert(itemsmore.pItem.pToken1 !is null); + assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token); + assert_eq(22, itemsmore.pItem.pToken1.pvalue); + assert(itemsmore.pItemsMore is null); + + input = ""; + p_context_init(&context, input); + assert_eq(P_SUCCESS, p_parse(&context)); + start = p_result(&context); + assert(start.pItems is null); + + input = "2 1"; + p_context_init(&context, input); + assert_eq(P_SUCCESS, p_parse(&context)); + start = p_result(&context); + assert(start.pItems !is null); + assert(start.pItems.pItem !is null); + assert(start.pItems.pItem.pDual !is null); + assert(start.pItems.pItem.pDual.pTwo1 !is null); + assert(start.pItems.pItem.pDual.pOne2 !is null); + assert(start.pItems.pItem.pDual.pTwo2 is null); + assert(start.pItems.pItem.pDual.pOne1 is null); +}