Allow user to specify AST node prefix or suffix

Add ast_prefix and ast_suffix grammar statements.
2024-05-04 11:57:28 -04:00 · 2024-05-04 11:57:28 -04:00 · 153f9d28f8
commit 153f9d28f8
parent d0f542cbd7
10 changed files with 272 additions and 53 deletions
--- a/assets/parser.c.erb
+++ b/assets/parser.c.erb
@ -924,7 +924,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
            {
                /* Successful parse. */
 <% if @grammar.ast %>
-                context->parse_result = (Start *)state_values_stack_index(&statevalues, -1)->ast_node;
+                context->parse_result = (<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)state_values_stack_index(&statevalues, -1)->ast_node;
 <% else %>
                context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue;
 <% end %>
@ -941,7 +941,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
            {
                /* We shifted a token, mark it consumed. */
 <% if @grammar.ast %>
-                Token * token_ast_node = malloc(sizeof(Token));
+                <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
                token_ast_node->token = token;
                token_ast_node->pvalue = token_info.pvalue;
                state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node;
@ -1029,7 +1029,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
 * @return Parse result value.
 */
 <% if @grammar.ast %>
-Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
+<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% else %>
 <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% end %>
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -64,7 +64,7 @@ public union <%= @grammar.prefix %>value_t

 <% if @grammar.ast %>
 /** AST node types. @{ */
-public struct Token
+public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
 {
    <%= @grammar.prefix %>token_t token;
    <%= @grammar.prefix %>value_t pvalue;
@ -72,7 +72,7 @@ public struct Token

 <%   @parser.rule_sets.each do |name, rule_set| %>
 <%     next if name.start_with?("$") %>
-public struct <%= name %>
+public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
 {
 <%     rule_set.ast_fields.each do |fields| %>
    union
@ -144,7 +144,7 @@ public struct <%= @grammar.prefix %>context_t

    /** Parse result value. */
 <% if @grammar.ast %>
-    Start * parse_result;
+    <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
 <% else %>
    <%= @grammar.prefix %>value_t parse_result;
 <% end %>
@ -973,7 +973,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
            {
                /* Successful parse. */
 <% if @grammar.ast %>
-                context.parse_result = cast(Start *)statevalues[$-1].ast_node;
+                context.parse_result = cast(<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)statevalues[$-1].ast_node;
 <% else %>
                context.parse_result = statevalues[$-1].pvalue;
 <% end %>
@ -988,7 +988,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
            {
                /* We shifted a token, mark it consumed. */
 <% if @grammar.ast %>
-                Token * token_ast_node = new Token(token, token_info.pvalue);
+                <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue);
                statevalues[$-1].ast_node = token_ast_node;
 <% else %>
                statevalues[$-1].pvalue = token_info.pvalue;
@ -1075,7 +1075,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
 * @return Parse result value.
 */
 <% if @grammar.ast %>
-public Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
+public <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% else %>
 public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% end %>
--- a/assets/parser.h.erb
+++ b/assets/parser.h.erb
@ -56,11 +56,11 @@ typedef union

 <% if @grammar.ast %>
 /** AST node types. @{ */
-typedef struct Token
+typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
 {
    <%= @grammar.prefix %>token_t token;
    <%= @grammar.prefix %>value_t pvalue;
-} Token;
+} <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>;

 <%   @parser.rule_sets.each do |name, rule_set| %>
 <%     next if name.start_with?("$") %>
@ -69,7 +69,7 @@ struct <%= name %>;

 <%   @parser.rule_sets.each do |name, rule_set| %>
 <%     next if name.start_with?("$") %>
-typedef struct <%= name %>
+typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
 {
 <%     rule_set.ast_fields.each do |fields| %>
    union
@ -79,7 +79,7 @@ typedef struct <%= name %>
 <%       end %>
    };
 <%     end %>
-} <%= name %>;
+} <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>;

 <%   end %>
 /** @} */
@ -144,7 +144,7 @@ typedef struct

    /** Parse result value. */
 <% if @grammar.ast %>
-    Start * parse_result;
+    <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
 <% else %>
    <%= @grammar.prefix %>value_t parse_result;
 <% end %>
@ -173,7 +173,7 @@ size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%=
 size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context);

 <% if @grammar.ast %>
-Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
+<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
 <% else %>
 <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
 <% end %>
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@ -276,6 +276,48 @@ assert_eq(22, itemsmore.pItem.pToken1.pvalue);
 assert(itemsmore.pItemsMore is null);
 ```

+## `ast_prefix` and `ast_suffix` statements
+
+In AST generation mode, structure types are defined and named based on the
+rules in the grammar.
+Additionally, a structure type called `Token` is generated to hold parsed
+token information.
+
+These structure names can be modified by using the `ast_prefix` or `ast_suffix`
+statements in the grammar file.
+The field names that point to instances of the structures are not affected by
+the `ast_prefix` or `ast_suffix` values.
+
+For example, if the following two lines were added to the example above:
+
+```
+ast_prefix ABC;
+ast_suffix XYZ;
+```
+
+Then the types would be used as such instead:
+
+```
+string input = "a, ((b)), b";
+p_context_t context;
+p_context_init(&context, input);
+assert_eq(P_SUCCESS, p_parse(&context));
+ABCStartXYZ * start = p_result(&context);
+assert(start.pItems1 !is null);
+assert(start.pItems !is null);
+ABCItemsXYZ * items = start.pItems;
+assert(items.pItem !is null);
+assert(items.pItem.pToken1 !is null);
+assert_eq(TOKEN_a, items.pItem.pToken1.token);
+assert_eq(11, items.pItem.pToken1.pvalue);
+assert(items.pItemsMore !is null);
+ABCItemsMoreXYZ * itemsmore = items.pItemsMore;
+assert(itemsmore.pItem !is null);
+assert(itemsmore.pItem.pItem !is null);
+assert(itemsmore.pItem.pItem.pItem !is null);
+assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
+```
+
 ##> Specifying tokens - the `token` statement

 The `token` statement allows defining a lexer token and a pattern to match that
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -120,7 +120,7 @@ class Propane
      end
      determine_possibly_empty_rulesets!(rule_sets)
      rule_sets.each do |name, rule_set|
-        rule_set.finalize
+        rule_set.finalize(@grammar)
      end
      # Generate the lexer.
      @lexer = Lexer.new(@grammar)
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@ -6,6 +6,8 @@ class Propane
    IDENTIFIER_REGEX = /(?:[a-zA-Z]|_[a-zA-Z0-9])[a-zA-Z_0-9]*/

    attr_reader :ast
+    attr_reader :ast_prefix
+    attr_reader :ast_suffix
    attr_reader :modulename
    attr_reader :patterns
    attr_reader :rules
@ -26,6 +28,8 @@ class Propane
      @ptypes = {"default" => "void *"}
      @prefix = "p_"
      @ast = false
+      @ast_prefix = ""
+      @ast_suffix = ""
      parse_grammar!
    end

@ -54,6 +58,8 @@ class Propane
      elsif parse_comment_line!
      elsif @mode.nil? && parse_mode_label!
      elsif parse_ast_statement!
+      elsif parse_ast_prefix_statement!
+      elsif parse_ast_suffix_statement!
      elsif parse_module_statement!
      elsif parse_ptype_statement!
      elsif parse_pattern_statement!
@ -91,6 +97,18 @@ class Propane
      end
    end

+    def parse_ast_prefix_statement!
+      if md = consume!(/ast_prefix\s+(\w+)\s*;/)
+        @ast_prefix = md[1]
+      end
+    end
+
+    def parse_ast_suffix_statement!
+      if md = consume!(/ast_suffix\s+(\w+)\s*;/)
+        @ast_suffix = md[1]
+      end
+    end
+
    def parse_module_statement!
      if consume!(/module\s+/)
        md = consume!(/([\w.]+)\s*/, "expected module name")
--- a/lib/propane/rule_set.rb
+++ b/lib/propane/rule_set.rb
@ -3,6 +3,10 @@ class Propane
  # A RuleSet collects all grammar rules of the same name.
  class RuleSet

+    # @return [Array<Hash>]
+    #   AST fields.
+    attr_reader :ast_fields
+
    # @return [Integer]
    #   ID of the RuleSet.
    attr_reader :id
@ -76,6 +80,13 @@ class Propane
      @_start_token_set
    end

+    # Finalize a RuleSet after adding all Rules to it.
+    def finalize(grammar)
+      build_ast_fields(grammar)
+    end
+
+    private
+
    # Build the set of AST fields for this RuleSet.
    #
    # This is an Array of Hashes. Each entry in the Array corresponds to a
@ -84,14 +95,11 @@ class Propane
    # a key. It may also have the field name without the positional suffix if
    # that field only exists in one position across all Rules in the RuleSet.
    #
-    # @return [Array<Hash>]
-    #   AST fields.
-    def ast_fields
-      @_ast_fields ||=
-        begin
+    # @return [void]
+    def build_ast_fields(grammar)
      field_ast_node_indexes = {}
      field_indexes_across_all_rules = {}
-          ast_node_fields = []
+      @ast_fields = []
      @rules.each do |rule|
        rule.components.each_with_index do |component, i|
          if component.is_a?(Token)
@ -99,10 +107,11 @@ class Propane
          else
            node_name = component.name
          end
+          struct_name = "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
          field_name = "p#{node_name}#{i + 1}"
          unless field_ast_node_indexes[field_name]
-                field_ast_node_indexes[field_name] = ast_node_fields.size
-                ast_node_fields << {field_name => node_name}
+            field_ast_node_indexes[field_name] = @ast_fields.size
+            @ast_fields << {field_name => struct_name}
          end
          field_indexes_across_all_rules[node_name] ||= Set.new
          field_indexes_across_all_rules[node_name] << field_ast_node_indexes[field_name]
@ -114,16 +123,10 @@ class Propane
          # If this field was only seen in one position across all rules,
          # then add an alias to the positional field name that does not
          # include the position.
-              ast_node_fields[indexes_across_all_rules.first]["p#{node_name}"] = node_name
+          @ast_fields[indexes_across_all_rules.first]["p#{node_name}"] =
+            "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
        end
      end
-          ast_node_fields
-        end
-    end
-
-    # Finalize a RuleSet after adding all Rules to it.
-    def finalize
-      ast_fields
    end

  end
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -845,6 +845,50 @@ EOF
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
+
+      it "supports AST node prefix and suffix" do
+        write_grammar <<EOF
+ast;
+ast_prefix P ;
+ast_suffix  S;
+
+ptype int;
+
+token a << $$ = 11; >>
+token b << $$ = 22; >>
+token one /1/;
+token two /2/;
+token comma /,/ <<
+  $$ = 42;
+>>
+token lparen /\\(/;
+token rparen /\\)/;
+drop /\\s+/;
+
+Start -> Items;
+
+Items -> Item ItemsMore;
+Items -> ;
+
+ItemsMore -> comma Item ItemsMore;
+ItemsMore -> ;
+
+Item -> a;
+Item -> b;
+Item -> lparen Item rparen;
+Item -> Dual;
+
+Dual -> One Two;
+Dual -> Two One;
+One -> one;
+Two -> two;
+EOF
+        run_propane(language: language)
+        compile("spec/test_ast_ps.#{language}", language: language)
+        results = run_test
+        expect(results.stderr).to eq ""
+        expect(results.status).to eq 0
+      end
    end
  end
 end
--- a/spec/test_ast_ps.c
+++ b/spec/test_ast_ps.c
@ -0,0 +1,55 @@
+#include "testparser.h"
+#include <assert.h>
+#include <string.h>
+#include "testutils.h"
+
+int main()
+{
+    char const * input = "a, ((b)), b";
+    p_context_t context;
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert_eq(P_SUCCESS, p_parse(&context));
+    PStartS * start = p_result(&context);
+    assert(start->pItems1 != NULL);
+    assert(start->pItems != NULL);
+    PItemsS * items = start->pItems;
+    assert(items->pItem != NULL);
+    assert(items->pItem->pToken1 != NULL);
+    assert_eq(TOKEN_a, items->pItem->pToken1->token);
+    assert_eq(11, items->pItem->pToken1->pvalue);
+    assert(items->pItemsMore != NULL);
+    PItemsMoreS * itemsmore = items->pItemsMore;
+    assert(itemsmore->pItem != NULL);
+    assert(itemsmore->pItem->pItem != NULL);
+    assert(itemsmore->pItem->pItem->pItem != NULL);
+    assert(itemsmore->pItem->pItem->pItem->pToken1 != NULL);
+    assert_eq(TOKEN_b, itemsmore->pItem->pItem->pItem->pToken1->token);
+    assert_eq(22, itemsmore->pItem->pItem->pItem->pToken1->pvalue);
+    assert(itemsmore->pItemsMore != NULL);
+    itemsmore = itemsmore->pItemsMore;
+    assert(itemsmore->pItem != NULL);
+    assert(itemsmore->pItem->pToken1 != NULL);
+    assert_eq(TOKEN_b, itemsmore->pItem->pToken1->token);
+    assert_eq(22, itemsmore->pItem->pToken1->pvalue);
+    assert(itemsmore->pItemsMore == NULL);
+
+    input = "";
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert_eq(P_SUCCESS, p_parse(&context));
+    start = p_result(&context);
+    assert(start->pItems == NULL);
+
+    input = "2 1";
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert_eq(P_SUCCESS, p_parse(&context));
+    start = p_result(&context);
+    assert(start->pItems != NULL);
+    assert(start->pItems->pItem != NULL);
+    assert(start->pItems->pItem->pDual != NULL);
+    assert(start->pItems->pItem->pDual->pTwo1 != NULL);
+    assert(start->pItems->pItem->pDual->pOne2 != NULL);
+    assert(start->pItems->pItem->pDual->pTwo2 == NULL);
+    assert(start->pItems->pItem->pDual->pOne1 == NULL);
+
+    return 0;
+}
--- a/spec/test_ast_ps.d
+++ b/spec/test_ast_ps.d
@ -0,0 +1,57 @@
+import testparser;
+import std.stdio;
+import testutils;
+
+int main()
+{
+    return 0;
+}
+
+unittest
+{
+    string input = "a, ((b)), b";
+    p_context_t context;
+    p_context_init(&context, input);
+    assert_eq(P_SUCCESS, p_parse(&context));
+    PStartS * start = p_result(&context);
+    assert(start.pItems1 !is null);
+    assert(start.pItems !is null);
+    PItemsS * items = start.pItems;
+    assert(items.pItem !is null);
+    assert(items.pItem.pToken1 !is null);
+    assert_eq(TOKEN_a, items.pItem.pToken1.token);
+    assert_eq(11, items.pItem.pToken1.pvalue);
+    assert(items.pItemsMore !is null);
+    PItemsMoreS * itemsmore = items.pItemsMore;
+    assert(itemsmore.pItem !is null);
+    assert(itemsmore.pItem.pItem !is null);
+    assert(itemsmore.pItem.pItem.pItem !is null);
+    assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
+    assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token);
+    assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue);
+    assert(itemsmore.pItemsMore !is null);
+    itemsmore = itemsmore.pItemsMore;
+    assert(itemsmore.pItem !is null);
+    assert(itemsmore.pItem.pToken1 !is null);
+    assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token);
+    assert_eq(22, itemsmore.pItem.pToken1.pvalue);
+    assert(itemsmore.pItemsMore is null);
+
+    input = "";
+    p_context_init(&context, input);
+    assert_eq(P_SUCCESS, p_parse(&context));
+    start = p_result(&context);
+    assert(start.pItems is null);
+
+    input = "2 1";
+    p_context_init(&context, input);
+    assert_eq(P_SUCCESS, p_parse(&context));
+    start = p_result(&context);
+    assert(start.pItems !is null);
+    assert(start.pItems.pItem !is null);
+    assert(start.pItems.pItem.pDual !is null);
+    assert(start.pItems.pItem.pDual.pTwo1 !is null);
+    assert(start.pItems.pItem.pDual.pOne2 !is null);
+    assert(start.pItems.pItem.pDual.pTwo2 is null);
+    assert(start.pItems.pItem.pDual.pOne1 is null);
+}