From a7348be95dd7c0ae503d46d5af701fda10f3c06f Mon Sep 17 00:00:00 2001
From: Josh Holtrop <jholtrop@gmail.com>
Date: Mon, 22 Jul 2024 17:29:54 -0400
Subject: [PATCH] Add rule field aliases - #24

---
 doc/user_guide.md             |  71 ++++++++++++++++--------
 lib/propane/generator.rb      |  13 +++++
 lib/propane/grammar.rb        |   2 +-
 lib/propane/rule.rb           |  18 +++++-
 lib/propane/rule_set.rb       |  16 +++++-
 spec/propane_spec.rb          | 100 ++++++++++++++++++++++++++++++++++
 spec/test_ast_field_aliases.c |  19 +++++++
 spec/test_ast_field_aliases.d |  21 +++++++
 spec/test_field_aliases.c     |  13 +++++
 spec/test_field_aliases.d     |  15 +++++
 10 files changed, 261 insertions(+), 27 deletions(-)
 create mode 100644 spec/test_ast_field_aliases.c
 create mode 100644 spec/test_ast_field_aliases.d
 create mode 100644 spec/test_field_aliases.c
 create mode 100644 spec/test_field_aliases.d

diff --git a/doc/user_guide.md b/doc/user_guide.md
index 6ac3fd3..dba5050 100644
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@@ -234,15 +234,15 @@ drop /\\s+/;
 
 Start -> Items;
 
-Items -> Item ItemsMore;
+Items -> Item:item ItemsMore;
 Items -> ;
 
-ItemsMore -> comma Item ItemsMore;
+ItemsMore -> comma Item:item ItemsMore;
 ItemsMore -> ;
 
 Item -> a;
 Item -> b;
-Item -> lparen Item rparen;
+Item -> lparen Item:item rparen;
 Item -> Dual;
 
 Dual -> One Two;
@@ -263,24 +263,24 @@ Start * start = p_result(&context);
 assert(start.pItems1 !is null);
 assert(start.pItems !is null);
 Items * items = start.pItems;
-assert(items.pItem !is null);
-assert(items.pItem.pToken1 !is null);
-assert_eq(TOKEN_a, items.pItem.pToken1.token);
-assert_eq(11, items.pItem.pToken1.pvalue);
+assert(items.item !is null);
+assert(items.item.pToken1 !is null);
+assert_eq(TOKEN_a, items.item.pToken1.token);
+assert_eq(11, items.item.pToken1.pvalue);
 assert(items.pItemsMore !is null);
 ItemsMore * itemsmore = items.pItemsMore;
-assert(itemsmore.pItem !is null);
-assert(itemsmore.pItem.pItem !is null);
-assert(itemsmore.pItem.pItem.pItem !is null);
-assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
-assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token);
-assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue);
+assert(itemsmore.item !is null);
+assert(itemsmore.item.item !is null);
+assert(itemsmore.item.item.item !is null);
+assert(itemsmore.item.item.item.pToken1 !is null);
+assert_eq(TOKEN_b, itemsmore.item.item.item.pToken1.token);
+assert_eq(22, itemsmore.item.item.item.pToken1.pvalue);
 assert(itemsmore.pItemsMore !is null);
 itemsmore = itemsmore.pItemsMore;
-assert(itemsmore.pItem !is null);
-assert(itemsmore.pItem.pToken1 !is null);
-assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token);
-assert_eq(22, itemsmore.pItem.pToken1.pvalue);
+assert(itemsmore.item !is null);
+assert(itemsmore.item.pToken1 !is null);
+assert_eq(TOKEN_b, itemsmore.item.pToken1.token);
+assert_eq(22, itemsmore.item.pToken1.pvalue);
 assert(itemsmore.pItemsMore is null);
 ```
 
@@ -607,6 +607,10 @@ This can be changed with the `start` statement.
 The grammar file must define a rule with the name of the start rule name which
 will be used as the top-level starting rule that the parser attempts to reduce.
 
+Rule statements are composed of the name of the rule, a `->` token, the fields
+defining the rule pattern that must be matched, and a terminating semicolon or
+user code block.
+
 Example:
 
 ```
@@ -635,9 +639,13 @@ E4 -> lparen E1 rparen << $$ = $2; >>
 
 This example uses the default start rule name of `Start`.
 
-A parser rule has zero or more terms on the right side of its definition.
-Each of these terms is either a token name or a rule name.
-A term can be immediately followed by a `?` character to signify that it is
+A parser rule has zero or more fields on the right side of its definition.
+Each of these fields is either a token name or a rule name.
+A field can optionally be followed by a `:` and then a field alias name.
+If present, the field alias name is used to refer to the field value in user
+code blocks, or if AST mode is active, the field alias name is used as the
+field name in the generated AST node structure.
+A field can be immediately followed by a `?` character to signify that it is
 optional.
 Another example:
 
@@ -647,14 +655,16 @@ token private;
 token int;
 token ident /[a-zA-Z_][a-zA-Z_0-9]*/;
 token semicolon /;/;
-IntegerDeclaration -> Visibility? int ident semicolon;
+IntegerDeclaration -> Visibility? int ident:name semicolon;
 Visibility -> public;
 Visibility -> private;
 ```
 
-In a parser rule code block, parser values for the right side terms are
-accessible as `$1` for the first term's parser value, `$2` for the second
-term's parser value, etc...
+In a parser rule code block, parser values for the right side fields are
+accessible as `$1` for the first field's parser value, `$2` for the second
+field's parser value, etc...
+For the `IntegerDeclaration` rule, the third field value can also be referred
+to as `${name}`.
 The `$$` symbol accesses the output parser value for this rule.
 The above examples demonstrate how the parser values for the rule components
 can be used to produce the parser value for the accepted rule.
@@ -849,6 +859,19 @@ If the first rule is matched, then `pOne1` and `pTwo2` will be non-null while
 `pTwo1` and `pOne2` will be null.
 If the second rule is matched instead, then the opposite would be the case.
 
+If a field alias is present in a rule definition, an additional field will be
+generated in the AST node with the field alias name.
+For example:
+
+```
+Exp -> Exp:left plus ExpB:right;
+```
+
+In the generated `Exp` structure, the fields `pExp`, `pExp1`, and `left` will
+all point to the same child node (an instance of the `Exp` structure), and the
+fields `pExpB`, `pExpB3`, and `right` will all point to the same child node
+(an instance of the `ExpB` structure).
+
 ##> Functions
 
 ### `p_context_init`
diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb
index 21c5923..0a4bce5 100644
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@@ -276,6 +276,19 @@ class Propane
             "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
           end
         end
+        code = code.gsub(/\$\{(\w+)\}/) do |match|
+          aliasname = $1
+          if index = rule.aliases[aliasname]
+            case @language
+            when "c"
+              "state_values_stack_index(statevalues, -(int)n_states + #{index})->pvalue.v_#{rule.components[index].ptypename}"
+            when "d"
+              "statevalues[$-n_states+#{index}].pvalue.v_#{rule.components[index].ptypename}"
+            end
+          else
+            raise Error.new("Field alias '#{aliasname}' not found")
+          end
+        end
       else
         code = code.gsub(/\$\$/) do |match|
           if @grammar.ast
diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb
index de5d93a..0e8f090 100644
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@@ -198,7 +198,7 @@ class Propane
         if @ast && ptypename
           raise Error.new("Multiple ptypes are unsupported in AST mode")
         end
-        md = consume!(/((?:#{IDENTIFIER_REGEX}\??\s*)*)\s*/, "expected rule component list")
+        md = consume!(/((?:#{IDENTIFIER_REGEX}(?::#{IDENTIFIER_REGEX})?\??\s*)*)\s*/, "expected rule component list")
         components = md[1].strip.split(/\s+/)
         if @ast
           consume!(/;/, "expected `;'")
diff --git a/lib/propane/rule.rb b/lib/propane/rule.rb
index 20047b9..82bf860 100644
--- a/lib/propane/rule.rb
+++ b/lib/propane/rule.rb
@@ -6,6 +6,10 @@ class Propane
     #   Rule components.
     attr_reader :components
 
+    # @return [Hash]
+    #   Field aliases.
+    attr_reader :aliases
+
     # @return [String]
     #   User code associated with the rule.
     attr_reader :code
@@ -49,7 +53,19 @@ class Propane
     #   Line number where the rule was defined in the input grammar.
     def initialize(name, components, code, ptypename, line_number)
       @name = name
-      @components = components
+      @aliases = {}
+      @components = components.each_with_index.map do |component, i|
+        if component =~ /(\S+):(\S+)/
+          c, aliasname = $1, $2
+          if @aliases[aliasname]
+            raise Error.new("Error: duplicate field alias `#{aliasname}` for rule #{name} defined on line #{line_number}")
+          end
+          @aliases[aliasname] = i
+          c
+        else
+          component
+        end
+      end
       @rule_set_node_field_index_map = components.map {0}
       @code = code
       @ptypename = ptypename
diff --git a/lib/propane/rule_set.rb b/lib/propane/rule_set.rb
index dd20b3c..d8ef0f6 100644
--- a/lib/propane/rule_set.rb
+++ b/lib/propane/rule_set.rb
@@ -100,7 +100,9 @@ class Propane
 
     # Finalize a RuleSet after adding all Rules to it.
     def finalize(grammar)
-      build_ast_fields(grammar)
+      if grammar.ast
+        build_ast_fields(grammar)
+      end
     end
 
     private
@@ -148,6 +150,18 @@ class Propane
             "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
         end
       end
+      # Now merge in the field aliases as given by the user in the
+      # grammar.
+      field_aliases = {}
+      @rules.each do |rule|
+        rule.aliases.each do |alias_name, index|
+          if field_aliases[alias_name] && field_aliases[alias_name] != index
+            raise Error.new("Error: conflicting AST node field positions for alias `#{alias_name}`")
+          end
+          field_aliases[alias_name] = index
+          @ast_fields[index][alias_name] = @ast_fields[index].first[1]
+        end
+      end
     end
 
   end
diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb
index f5fd48d..9e961b1 100644
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@@ -213,6 +213,42 @@ EOF
     expect(File.binread("spec/run/testparser.log")).to match %r{Shift/Reduce conflict \(state \d+\) between token b and rule As2\? \(defined on line 4\)}
   end
 
+  it "errors on duplicate field aliases in a rule" do
+    write_grammar <<EOF
+token a;
+token b;
+Start -> a:foo b:foo;
+EOF
+    results = run_propane(extra_args: %w[-w], capture: true)
+    expect(results.stderr).to match %r{Error: duplicate field alias `foo` for rule Start defined on line 3}
+    expect(results.status).to_not eq 0
+  end
+
+  it "errors when an alias is in different positions for different rules in a rule set when AST mode is enabled" do
+    write_grammar <<EOF
+ast;
+token a;
+token b;
+Start -> a:foo b;
+Start -> b b:foo;
+EOF
+    results = run_propane(extra_args: %w[-w], capture: true)
+    expect(results.stderr).to match %r{Error: conflicting AST node field positions for alias `foo`}
+    expect(results.status).to_not eq 0
+  end
+
+  it "does not error when an alias is in different positions for different rules in a rule set when AST mode is not enabled" do
+    write_grammar <<EOF
+token a;
+token b;
+Start -> a:foo b;
+Start -> b b:foo;
+EOF
+    results = run_propane(extra_args: %w[-w], capture: true)
+    expect(results.stderr).to eq ""
+    expect(results.status).to eq 0
+  end
+
   %w[d c].each do |language|
 
     context "#{language.upcase} language" do
@@ -1120,6 +1156,70 @@ EOF
         expect(results.stderr).to eq ""
         expect(results.status).to eq 0
       end
+
+      it "allows specifying field aliases in AST mode" do
+        write_grammar <<EOF
+ast;
+
+token a;
+token b;
+token c;
+drop /\\s+/;
+Start -> T:first T:second T:third;
+T -> a;
+T -> b;
+T -> c;
+EOF
+        run_propane(language: language)
+        compile("spec/test_ast_field_aliases.#{language}", language: language)
+        results = run_test
+        expect(results.stderr).to eq ""
+        expect(results.status).to eq 0
+      end
+
+      it "allows specifying field aliases when AST mode is not enabled" do
+        if language == "d"
+          write_grammar <<EOF
+<<
+import std.stdio;
+>>
+ptype string;
+token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
+  $$ = match;
+>>
+drop /\\s+/;
+Start -> id:first id:second <<
+  writeln("first is ", ${first});
+  writeln("second is ", ${second});
+>>
+EOF
+        else
+          write_grammar <<EOF
+<<
+#include <stdio.h>
+#include <string.h>
+>>
+ptype char const *;
+token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
+  char * s = malloc(match_length + 1);
+  strncpy(s, (char const *)match, match_length);
+  s[match_length] = 0;
+  $$ = s;
+>>
+drop /\\s+/;
+Start -> id:first id:second <<
+  printf("first is %s\\n", ${first});
+  printf("second is %s\\n", ${second});
+>>
+EOF
+        end
+        run_propane(language: language)
+        compile("spec/test_field_aliases.#{language}", language: language)
+        results = run_test
+        expect(results.stderr).to eq ""
+        expect(results.status).to eq 0
+        expect(results.stdout).to match /first is foo1.*second is bar2/m
+      end
     end
   end
 end
diff --git a/spec/test_ast_field_aliases.c b/spec/test_ast_field_aliases.c
new file mode 100644
index 0000000..5b3716a
--- /dev/null
+++ b/spec/test_ast_field_aliases.c
@@ -0,0 +1,19 @@
+#include "testparser.h"
+#include <assert.h>
+#include <string.h>
+#include "testutils.h"
+
+int main()
+{
+    char const * input = "\na\nb\nc";
+    p_context_t context;
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert(p_parse(&context) == P_SUCCESS);
+    Start * start = p_result(&context);
+
+    assert_eq(TOKEN_a, start->first->pToken->token);
+    assert_eq(TOKEN_b, start->second->pToken->token);
+    assert_eq(TOKEN_c, start->third->pToken->token);
+
+    return 0;
+}
diff --git a/spec/test_ast_field_aliases.d b/spec/test_ast_field_aliases.d
new file mode 100644
index 0000000..907946e
--- /dev/null
+++ b/spec/test_ast_field_aliases.d
@@ -0,0 +1,21 @@
+import testparser;
+import std.stdio;
+import testutils;
+
+int main()
+{
+    return 0;
+}
+
+unittest
+{
+    string input = "\na\nb\nc";
+    p_context_t context;
+    p_context_init(&context, input);
+    assert(p_parse(&context) == P_SUCCESS);
+    Start * start = p_result(&context);
+
+    assert_eq(TOKEN_a, start.first.pToken.token);
+    assert_eq(TOKEN_b, start.second.pToken.token);
+    assert_eq(TOKEN_c, start.third.pToken.token);
+}
diff --git a/spec/test_field_aliases.c b/spec/test_field_aliases.c
new file mode 100644
index 0000000..d02d7b6
--- /dev/null
+++ b/spec/test_field_aliases.c
@@ -0,0 +1,13 @@
+#include "testparser.h"
+#include <assert.h>
+#include <string.h>
+#include "testutils.h"
+
+int main()
+{
+    char const * input = "foo1\nbar2";
+    p_context_t context;
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert(p_parse(&context) == P_SUCCESS);
+    return 0;
+}
diff --git a/spec/test_field_aliases.d b/spec/test_field_aliases.d
new file mode 100644
index 0000000..61f3a2f
--- /dev/null
+++ b/spec/test_field_aliases.d
@@ -0,0 +1,15 @@
+import testparser;
+import std.stdio;
+
+int main()
+{
+    return 0;
+}
+
+unittest
+{
+    string input = "foo1\nbar2";
+    p_context_t context;
+    p_context_init(&context, input);
+    assert(p_parse(&context) == P_SUCCESS);
+}