Allow user to specify AST node prefix or suffix

Add ast_prefix and ast_suffix grammar statements.
2024-05-04 11:57:28 -04:00 · 2024-05-04 11:57:28 -04:00 · 153f9d28f8
commit 153f9d28f8
parent d0f542cbd7
10 changed files with 272 additions and 53 deletions
--- a/assets/parser.c.erb
+++ b/assets/parser.c.erb
@ -924,7 +924,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
            {
                /* Successful parse. */
 <% if @grammar.ast %>
-                context->parse_result = (Start *)state_values_stack_index(&statevalues, -1)->ast_node;
+                context->parse_result = (<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)state_values_stack_index(&statevalues, -1)->ast_node;
 <% else %>
                context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue;
 <% end %>
@ -941,7 +941,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
            {
                /* We shifted a token, mark it consumed. */
 <% if @grammar.ast %>
-                Token * token_ast_node = malloc(sizeof(Token));
+                <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
                token_ast_node->token = token;
                token_ast_node->pvalue = token_info.pvalue;
                state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node;
@ -1029,7 +1029,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
 * @return Parse result value.
 */
 <% if @grammar.ast %>
-Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
+<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% else %>
 <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% end %>
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -64,7 +64,7 @@ public union <%= @grammar.prefix %>value_t
 <% if @grammar.ast %>
 /** AST node types. @{ */
-public struct Token
+public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
 {
    <%= @grammar.prefix %>token_t token;
    <%= @grammar.prefix %>value_t pvalue;
@ -72,7 +72,7 @@ public struct Token
 <%   @parser.rule_sets.each do |name, rule_set| %>
 <%     next if name.start_with?("$") %>
-public struct <%= name %>
+public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
 {
 <%     rule_set.ast_fields.each do |fields| %>
    union
@ -144,7 +144,7 @@ public struct <%= @grammar.prefix %>context_t
    /** Parse result value. */
 <% if @grammar.ast %>
-    Start * parse_result;
+    <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
 <% else %>
    <%= @grammar.prefix %>value_t parse_result;
 <% end %>
@ -973,7 +973,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
            {
                /* Successful parse. */
 <% if @grammar.ast %>
-                context.parse_result = cast(Start *)statevalues[$-1].ast_node;
+                context.parse_result = cast(<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)statevalues[$-1].ast_node;
 <% else %>
                context.parse_result = statevalues[$-1].pvalue;
 <% end %>
@ -988,7 +988,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
            {
                /* We shifted a token, mark it consumed. */
 <% if @grammar.ast %>
-                Token * token_ast_node = new Token(token, token_info.pvalue);
+                <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue);
                statevalues[$-1].ast_node = token_ast_node;
 <% else %>
                statevalues[$-1].pvalue = token_info.pvalue;
@ -1075,7 +1075,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
 * @return Parse result value.
 */
 <% if @grammar.ast %>
-public Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
+public <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% else %>
 public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% end %>
--- a/assets/parser.h.erb
+++ b/assets/parser.h.erb
@ -56,11 +56,11 @@ typedef union
 <% if @grammar.ast %>
 /** AST node types. @{ */
-typedef struct Token
+typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
 {
    <%= @grammar.prefix %>token_t token;
    <%= @grammar.prefix %>value_t pvalue;
-} Token;
+} <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>;
 <%   @parser.rule_sets.each do |name, rule_set| %>
 <%     next if name.start_with?("$") %>
@ -69,7 +69,7 @@ struct <%= name %>;
 <%   @parser.rule_sets.each do |name, rule_set| %>
 <%     next if name.start_with?("$") %>
-typedef struct <%= name %>
+typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
 {
 <%     rule_set.ast_fields.each do |fields| %>
    union
@ -79,7 +79,7 @@ typedef struct <%= name %>
 <%       end %>
    };
 <%     end %>
-} <%= name %>;
+} <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>;
 <%   end %>
 /** @} */
@ -144,7 +144,7 @@ typedef struct
    /** Parse result value. */
 <% if @grammar.ast %>
-    Start * parse_result;
+    <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
 <% else %>
    <%= @grammar.prefix %>value_t parse_result;
 <% end %>
@ -173,7 +173,7 @@ size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%=
 size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context);
 <% if @grammar.ast %>
-Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
+<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
 <% else %>
 <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
 <% end %>
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@ -276,6 +276,48 @@ assert_eq(22, itemsmore.pItem.pToken1.pvalue);
 assert(itemsmore.pItemsMore is null);
 ```
 ## `ast_prefix` and `ast_suffix` statements
 In AST generation mode, structure types are defined and named based on the
 rules in the grammar.
 Additionally, a structure type called `Token` is generated to hold parsed
 token information.
 These structure names can be modified by using the `ast_prefix` or `ast_suffix`
 statements in the grammar file.
 The field names that point to instances of the structures are not affected by
 the `ast_prefix` or `ast_suffix` values.
 For example, if the following two lines were added to the example above:
 ```
 ast_prefix ABC;
 ast_suffix XYZ;
 ```
 Then the types would be used as such instead:
 ```
 string input = "a, ((b)), b";
 p_context_t context;
 p_context_init(&context, input);
 assert_eq(P_SUCCESS, p_parse(&context));
 ABCStartXYZ * start = p_result(&context);
 assert(start.pItems1 !is null);
 assert(start.pItems !is null);
 ABCItemsXYZ * items = start.pItems;
 assert(items.pItem !is null);
 assert(items.pItem.pToken1 !is null);
 assert_eq(TOKEN_a, items.pItem.pToken1.token);
 assert_eq(11, items.pItem.pToken1.pvalue);
 assert(items.pItemsMore !is null);
 ABCItemsMoreXYZ * itemsmore = items.pItemsMore;
 assert(itemsmore.pItem !is null);
 assert(itemsmore.pItem.pItem !is null);
 assert(itemsmore.pItem.pItem.pItem !is null);
 assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
 ```
 ##> Specifying tokens - the `token` statement
 The `token` statement allows defining a lexer token and a pattern to match that
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -120,7 +120,7 @@ class Propane
      end
      determine_possibly_empty_rulesets!(rule_sets)
      rule_sets.each do |name, rule_set|
-        rule_set.finalize
+        rule_set.finalize(@grammar)
      end
      # Generate the lexer.
      @lexer = Lexer.new(@grammar)
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@ -6,6 +6,8 @@ class Propane
    IDENTIFIER_REGEX = /(?:[a-zA-Z]|_[a-zA-Z0-9])[a-zA-Z_0-9]*/
    attr_reader :ast
    attr_reader :ast_prefix
    attr_reader :ast_suffix
    attr_reader :modulename
    attr_reader :patterns
    attr_reader :rules
@ -26,6 +28,8 @@ class Propane
      @ptypes = {"default" => "void *"}
      @prefix = "p_"
      @ast = false
      @ast_prefix = ""
      @ast_suffix = ""
      parse_grammar!
    end
@ -54,6 +58,8 @@ class Propane
      elsif parse_comment_line!
      elsif @mode.nil? && parse_mode_label!
      elsif parse_ast_statement!
      elsif parse_ast_prefix_statement!
      elsif parse_ast_suffix_statement!
      elsif parse_module_statement!
      elsif parse_ptype_statement!
      elsif parse_pattern_statement!
@ -91,6 +97,18 @@ class Propane
      end
    end
    def parse_ast_prefix_statement!
      if md = consume!(/ast_prefix\s+(\w+)\s*;/)
        @ast_prefix = md[1]
      end
    end
    def parse_ast_suffix_statement!
      if md = consume!(/ast_suffix\s+(\w+)\s*;/)
        @ast_suffix = md[1]
      end
    end
    def parse_module_statement!
      if consume!(/module\s+/)
        md = consume!(/([\w.]+)\s*/, "expected module name")
--- a/lib/propane/rule_set.rb
+++ b/lib/propane/rule_set.rb
@ -3,6 +3,10 @@ class Propane
  # A RuleSet collects all grammar rules of the same name.
  class RuleSet
    # @return [Array<Hash>]
    #   AST fields.
    attr_reader :ast_fields
    # @return [Integer]
    #   ID of the RuleSet.
    attr_reader :id
@ -76,6 +80,13 @@ class Propane
      @_start_token_set
    end
    # Finalize a RuleSet after adding all Rules to it.
    def finalize(grammar)
      build_ast_fields(grammar)
    end
    private
    # Build the set of AST fields for this RuleSet.
    #
    # This is an Array of Hashes. Each entry in the Array corresponds to a
@ -84,14 +95,11 @@ class Propane
    # a key. It may also have the field name without the positional suffix if
    # that field only exists in one position across all Rules in the RuleSet.
    #
-    # @return [Array<Hash>]
+    # @return [void]
-    #   AST fields.
+    def build_ast_fields(grammar)
    def ast_fields
      @_ast_fields ||=
        begin
      field_ast_node_indexes = {}
      field_indexes_across_all_rules = {}
-          ast_node_fields = []
+      @ast_fields = []
      @rules.each do |rule|
        rule.components.each_with_index do |component, i|
          if component.is_a?(Token)
@ -99,10 +107,11 @@ class Propane
          else
            node_name = component.name
          end
          struct_name = "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
          field_name = "p#{node_name}#{i + 1}"
          unless field_ast_node_indexes[field_name]
-                field_ast_node_indexes[field_name] = ast_node_fields.size
+            field_ast_node_indexes[field_name] = @ast_fields.size
-                ast_node_fields << {field_name => node_name}
+            @ast_fields << {field_name => struct_name}
          end
          field_indexes_across_all_rules[node_name] ||= Set.new
          field_indexes_across_all_rules[node_name] << field_ast_node_indexes[field_name]
@ -114,16 +123,10 @@ class Propane
          # If this field was only seen in one position across all rules,
          # then add an alias to the positional field name that does not
          # include the position.
-              ast_node_fields[indexes_across_all_rules.first]["p#{node_name}"] = node_name
+          @ast_fields[indexes_across_all_rules.first]["p#{node_name}"] =
            "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
        end
      end
          ast_node_fields
        end
    end
    # Finalize a RuleSet after adding all Rules to it.
    def finalize
      ast_fields
    end
  end
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -845,6 +845,50 @@ EOF
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
      it "supports AST node prefix and suffix" do
        write_grammar <<EOF
 ast;
 ast_prefix P ;
 ast_suffix  S;
 ptype int;
 token a << $$ = 11; >>
 token b << $$ = 22; >>
 token one /1/;
 token two /2/;
 token comma /,/ <<
  $$ = 42;
 >>
 token lparen /\\(/;
 token rparen /\\)/;
 drop /\\s+/;
 Start -> Items;
 Items -> Item ItemsMore;
 Items -> ;
 ItemsMore -> comma Item ItemsMore;
 ItemsMore -> ;
 Item -> a;
 Item -> b;
 Item -> lparen Item rparen;
 Item -> Dual;
 Dual -> One Two;
 Dual -> Two One;
 One -> one;
 Two -> two;
 EOF
        run_propane(language: language)
        compile("spec/test_ast_ps.#{language}", language: language)
        results = run_test
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
    end
  end
 end
--- a/spec/test_ast_ps.c
+++ b/spec/test_ast_ps.c
@ -0,0 +1,55 @@
 #include "testparser.h"
 #include <assert.h>
 #include <string.h>
 #include "testutils.h"
 int main()
 {
    char const * input = "a, ((b)), b";
    p_context_t context;
    p_context_init(&context, (uint8_t const *)input, strlen(input));
    assert_eq(P_SUCCESS, p_parse(&context));
    PStartS * start = p_result(&context);
    assert(start->pItems1 != NULL);
    assert(start->pItems != NULL);
    PItemsS * items = start->pItems;
    assert(items->pItem != NULL);
    assert(items->pItem->pToken1 != NULL);
    assert_eq(TOKEN_a, items->pItem->pToken1->token);
    assert_eq(11, items->pItem->pToken1->pvalue);
    assert(items->pItemsMore != NULL);
    PItemsMoreS * itemsmore = items->pItemsMore;
    assert(itemsmore->pItem != NULL);
    assert(itemsmore->pItem->pItem != NULL);
    assert(itemsmore->pItem->pItem->pItem != NULL);
    assert(itemsmore->pItem->pItem->pItem->pToken1 != NULL);
    assert_eq(TOKEN_b, itemsmore->pItem->pItem->pItem->pToken1->token);
    assert_eq(22, itemsmore->pItem->pItem->pItem->pToken1->pvalue);
    assert(itemsmore->pItemsMore != NULL);
    itemsmore = itemsmore->pItemsMore;
    assert(itemsmore->pItem != NULL);
    assert(itemsmore->pItem->pToken1 != NULL);
    assert_eq(TOKEN_b, itemsmore->pItem->pToken1->token);
    assert_eq(22, itemsmore->pItem->pToken1->pvalue);
    assert(itemsmore->pItemsMore == NULL);
    input = "";
    p_context_init(&context, (uint8_t const *)input, strlen(input));
    assert_eq(P_SUCCESS, p_parse(&context));
    start = p_result(&context);
    assert(start->pItems == NULL);
    input = "2 1";
    p_context_init(&context, (uint8_t const *)input, strlen(input));
    assert_eq(P_SUCCESS, p_parse(&context));
    start = p_result(&context);
    assert(start->pItems != NULL);
    assert(start->pItems->pItem != NULL);
    assert(start->pItems->pItem->pDual != NULL);
    assert(start->pItems->pItem->pDual->pTwo1 != NULL);
    assert(start->pItems->pItem->pDual->pOne2 != NULL);
    assert(start->pItems->pItem->pDual->pTwo2 == NULL);
    assert(start->pItems->pItem->pDual->pOne1 == NULL);
    return 0;
 }
--- a/spec/test_ast_ps.d
+++ b/spec/test_ast_ps.d
@ -0,0 +1,57 @@
 import testparser;
 import std.stdio;
 import testutils;
 int main()
 {
    return 0;
 }
 unittest
 {
    string input = "a, ((b)), b";
    p_context_t context;
    p_context_init(&context, input);
    assert_eq(P_SUCCESS, p_parse(&context));
    PStartS * start = p_result(&context);
    assert(start.pItems1 !is null);
    assert(start.pItems !is null);
    PItemsS * items = start.pItems;
    assert(items.pItem !is null);
    assert(items.pItem.pToken1 !is null);
    assert_eq(TOKEN_a, items.pItem.pToken1.token);
    assert_eq(11, items.pItem.pToken1.pvalue);
    assert(items.pItemsMore !is null);
    PItemsMoreS * itemsmore = items.pItemsMore;
    assert(itemsmore.pItem !is null);
    assert(itemsmore.pItem.pItem !is null);
    assert(itemsmore.pItem.pItem.pItem !is null);
    assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
    assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token);
    assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue);
    assert(itemsmore.pItemsMore !is null);
    itemsmore = itemsmore.pItemsMore;
    assert(itemsmore.pItem !is null);
    assert(itemsmore.pItem.pToken1 !is null);
    assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token);
    assert_eq(22, itemsmore.pItem.pToken1.pvalue);
    assert(itemsmore.pItemsMore is null);
    input = "";
    p_context_init(&context, input);
    assert_eq(P_SUCCESS, p_parse(&context));
    start = p_result(&context);
    assert(start.pItems is null);
    input = "2 1";
    p_context_init(&context, input);
    assert_eq(P_SUCCESS, p_parse(&context));
    start = p_result(&context);
    assert(start.pItems !is null);
    assert(start.pItems.pItem !is null);
    assert(start.pItems.pItem.pDual !is null);
    assert(start.pItems.pItem.pDual.pTwo1 !is null);
    assert(start.pItems.pItem.pDual.pOne2 !is null);
    assert(start.pItems.pItem.pDual.pTwo2 is null);
    assert(start.pItems.pItem.pDual.pOne1 is null);
 }