Add rule field aliases - #24

2024-07-22 17:29:54 -04:00 · 2024-07-22 17:29:54 -04:00 · a7348be95d
commit a7348be95d
parent 9746b3f2bf
10 changed files with 261 additions and 27 deletions
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@ -234,15 +234,15 @@ drop /\\s+/;

 Start -> Items;

-Items -> Item ItemsMore;
+Items -> Item:item ItemsMore;
 Items -> ;

-ItemsMore -> comma Item ItemsMore;
+ItemsMore -> comma Item:item ItemsMore;
 ItemsMore -> ;

 Item -> a;
 Item -> b;
-Item -> lparen Item rparen;
+Item -> lparen Item:item rparen;
 Item -> Dual;

 Dual -> One Two;
@ -263,24 +263,24 @@ Start * start = p_result(&context);
 assert(start.pItems1 !is null);
 assert(start.pItems !is null);
 Items * items = start.pItems;
-assert(items.pItem !is null);
-assert(items.pItem.pToken1 !is null);
-assert_eq(TOKEN_a, items.pItem.pToken1.token);
-assert_eq(11, items.pItem.pToken1.pvalue);
+assert(items.item !is null);
+assert(items.item.pToken1 !is null);
+assert_eq(TOKEN_a, items.item.pToken1.token);
+assert_eq(11, items.item.pToken1.pvalue);
 assert(items.pItemsMore !is null);
 ItemsMore * itemsmore = items.pItemsMore;
-assert(itemsmore.pItem !is null);
-assert(itemsmore.pItem.pItem !is null);
-assert(itemsmore.pItem.pItem.pItem !is null);
-assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
-assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token);
-assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue);
+assert(itemsmore.item !is null);
+assert(itemsmore.item.item !is null);
+assert(itemsmore.item.item.item !is null);
+assert(itemsmore.item.item.item.pToken1 !is null);
+assert_eq(TOKEN_b, itemsmore.item.item.item.pToken1.token);
+assert_eq(22, itemsmore.item.item.item.pToken1.pvalue);
 assert(itemsmore.pItemsMore !is null);
 itemsmore = itemsmore.pItemsMore;
-assert(itemsmore.pItem !is null);
-assert(itemsmore.pItem.pToken1 !is null);
-assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token);
-assert_eq(22, itemsmore.pItem.pToken1.pvalue);
+assert(itemsmore.item !is null);
+assert(itemsmore.item.pToken1 !is null);
+assert_eq(TOKEN_b, itemsmore.item.pToken1.token);
+assert_eq(22, itemsmore.item.pToken1.pvalue);
 assert(itemsmore.pItemsMore is null);
 ```

@ -607,6 +607,10 @@ This can be changed with the `start` statement.
 The grammar file must define a rule with the name of the start rule name which
 will be used as the top-level starting rule that the parser attempts to reduce.

+Rule statements are composed of the name of the rule, a `->` token, the fields
+defining the rule pattern that must be matched, and a terminating semicolon or
+user code block.
+
 Example:

 ```
@ -635,9 +639,13 @@ E4 -> lparen E1 rparen << $$ = $2; >>

 This example uses the default start rule name of `Start`.

-A parser rule has zero or more terms on the right side of its definition.
-Each of these terms is either a token name or a rule name.
-A term can be immediately followed by a `?` character to signify that it is
+A parser rule has zero or more fields on the right side of its definition.
+Each of these fields is either a token name or a rule name.
+A field can optionally be followed by a `:` and then a field alias name.
+If present, the field alias name is used to refer to the field value in user
+code blocks, or if AST mode is active, the field alias name is used as the
+field name in the generated AST node structure.
+A field can be immediately followed by a `?` character to signify that it is
 optional.
 Another example:

@ -647,14 +655,16 @@ token private;
 token int;
 token ident /[a-zA-Z_][a-zA-Z_0-9]*/;
 token semicolon /;/;
-IntegerDeclaration -> Visibility? int ident semicolon;
+IntegerDeclaration -> Visibility? int ident:name semicolon;
 Visibility -> public;
 Visibility -> private;
 ```

-In a parser rule code block, parser values for the right side terms are
-accessible as `$1` for the first term's parser value, `$2` for the second
-term's parser value, etc...
+In a parser rule code block, parser values for the right side fields are
+accessible as `$1` for the first field's parser value, `$2` for the second
+field's parser value, etc...
+For the `IntegerDeclaration` rule, the third field value can also be referred
+to as `${name}`.
 The `$$` symbol accesses the output parser value for this rule.
 The above examples demonstrate how the parser values for the rule components
 can be used to produce the parser value for the accepted rule.
@ -849,6 +859,19 @@ If the first rule is matched, then `pOne1` and `pTwo2` will be non-null while
 `pTwo1` and `pOne2` will be null.
 If the second rule is matched instead, then the opposite would be the case.

+If a field alias is present in a rule definition, an additional field will be
+generated in the AST node with the field alias name.
+For example:
+
+```
+Exp -> Exp:left plus ExpB:right;
+```
+
+In the generated `Exp` structure, the fields `pExp`, `pExp1`, and `left` will
+all point to the same child node (an instance of the `Exp` structure), and the
+fields `pExpB`, `pExpB3`, and `right` will all point to the same child node
+(an instance of the `ExpB` structure).
+
 ##> Functions

 ### `p_context_init`
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -276,6 +276,19 @@ class Propane
            "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
          end
        end
+        code = code.gsub(/\$\{(\w+)\}/) do |match|
+          aliasname = $1
+          if index = rule.aliases[aliasname]
+            case @language
+            when "c"
+              "state_values_stack_index(statevalues, -(int)n_states + #{index})->pvalue.v_#{rule.components[index].ptypename}"
+            when "d"
+              "statevalues[$-n_states+#{index}].pvalue.v_#{rule.components[index].ptypename}"
+            end
+          else
+            raise Error.new("Field alias '#{aliasname}' not found")
+          end
+        end
      else
        code = code.gsub(/\$\$/) do |match|
          if @grammar.ast
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@ -198,7 +198,7 @@ class Propane
        if @ast && ptypename
          raise Error.new("Multiple ptypes are unsupported in AST mode")
        end
-        md = consume!(/((?:#{IDENTIFIER_REGEX}\??\s*)*)\s*/, "expected rule component list")
+        md = consume!(/((?:#{IDENTIFIER_REGEX}(?::#{IDENTIFIER_REGEX})?\??\s*)*)\s*/, "expected rule component list")
        components = md[1].strip.split(/\s+/)
        if @ast
          consume!(/;/, "expected `;'")
--- a/lib/propane/rule.rb
+++ b/lib/propane/rule.rb
@ -6,6 +6,10 @@ class Propane
    #   Rule components.
    attr_reader :components

+    # @return [Hash]
+    #   Field aliases.
+    attr_reader :aliases
+
    # @return [String]
    #   User code associated with the rule.
    attr_reader :code
@ -49,7 +53,19 @@ class Propane
    #   Line number where the rule was defined in the input grammar.
    def initialize(name, components, code, ptypename, line_number)
      @name = name
-      @components = components
+      @aliases = {}
+      @components = components.each_with_index.map do |component, i|
+        if component =~ /(\S+):(\S+)/
+          c, aliasname = $1, $2
+          if @aliases[aliasname]
+            raise Error.new("Error: duplicate field alias `#{aliasname}` for rule #{name} defined on line #{line_number}")
+          end
+          @aliases[aliasname] = i
+          c
+        else
+          component
+        end
+      end
      @rule_set_node_field_index_map = components.map {0}
      @code = code
      @ptypename = ptypename
--- a/lib/propane/rule_set.rb
+++ b/lib/propane/rule_set.rb
@ -100,7 +100,9 @@ class Propane

    # Finalize a RuleSet after adding all Rules to it.
    def finalize(grammar)
-      build_ast_fields(grammar)
+      if grammar.ast
+        build_ast_fields(grammar)
+      end
    end

    private
@ -148,6 +150,18 @@ class Propane
            "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
        end
      end
+      # Now merge in the field aliases as given by the user in the
+      # grammar.
+      field_aliases = {}
+      @rules.each do |rule|
+        rule.aliases.each do |alias_name, index|
+          if field_aliases[alias_name] && field_aliases[alias_name] != index
+            raise Error.new("Error: conflicting AST node field positions for alias `#{alias_name}`")
+          end
+          field_aliases[alias_name] = index
+          @ast_fields[index][alias_name] = @ast_fields[index].first[1]
+        end
+      end
    end

  end
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -213,6 +213,42 @@ EOF
    expect(File.binread("spec/run/testparser.log")).to match %r{Shift/Reduce conflict \(state \d+\) between token b and rule As2\? \(defined on line 4\)}
  end

+  it "errors on duplicate field aliases in a rule" do
+    write_grammar <<EOF
+token a;
+token b;
+Start -> a:foo b:foo;
+EOF
+    results = run_propane(extra_args: %w[-w], capture: true)
+    expect(results.stderr).to match %r{Error: duplicate field alias `foo` for rule Start defined on line 3}
+    expect(results.status).to_not eq 0
+  end
+
+  it "errors when an alias is in different positions for different rules in a rule set when AST mode is enabled" do
+    write_grammar <<EOF
+ast;
+token a;
+token b;
+Start -> a:foo b;
+Start -> b b:foo;
+EOF
+    results = run_propane(extra_args: %w[-w], capture: true)
+    expect(results.stderr).to match %r{Error: conflicting AST node field positions for alias `foo`}
+    expect(results.status).to_not eq 0
+  end
+
+  it "does not error when an alias is in different positions for different rules in a rule set when AST mode is not enabled" do
+    write_grammar <<EOF
+token a;
+token b;
+Start -> a:foo b;
+Start -> b b:foo;
+EOF
+    results = run_propane(extra_args: %w[-w], capture: true)
+    expect(results.stderr).to eq ""
+    expect(results.status).to eq 0
+  end
+
  %w[d c].each do |language|

    context "#{language.upcase} language" do
@ -1120,6 +1156,70 @@ EOF
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
+
+      it "allows specifying field aliases in AST mode" do
+        write_grammar <<EOF
+ast;
+
+token a;
+token b;
+token c;
+drop /\\s+/;
+Start -> T:first T:second T:third;
+T -> a;
+T -> b;
+T -> c;
+EOF
+        run_propane(language: language)
+        compile("spec/test_ast_field_aliases.#{language}", language: language)
+        results = run_test
+        expect(results.stderr).to eq ""
+        expect(results.status).to eq 0
+      end
+
+      it "allows specifying field aliases when AST mode is not enabled" do
+        if language == "d"
+          write_grammar <<EOF
+<<
+import std.stdio;
+>>
+ptype string;
+token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
+  $$ = match;
+>>
+drop /\\s+/;
+Start -> id:first id:second <<
+  writeln("first is ", ${first});
+  writeln("second is ", ${second});
+>>
+EOF
+        else
+          write_grammar <<EOF
+<<
+#include <stdio.h>
+#include <string.h>
+>>
+ptype char const *;
+token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
+  char * s = malloc(match_length + 1);
+  strncpy(s, (char const *)match, match_length);
+  s[match_length] = 0;
+  $$ = s;
+>>
+drop /\\s+/;
+Start -> id:first id:second <<
+  printf("first is %s\\n", ${first});
+  printf("second is %s\\n", ${second});
+>>
+EOF
+        end
+        run_propane(language: language)
+        compile("spec/test_field_aliases.#{language}", language: language)
+        results = run_test
+        expect(results.stderr).to eq ""
+        expect(results.status).to eq 0
+        expect(results.stdout).to match /first is foo1.*second is bar2/m
+      end
    end
  end
 end
--- a/spec/test_ast_field_aliases.c
+++ b/spec/test_ast_field_aliases.c
@ -0,0 +1,19 @@
+#include "testparser.h"
+#include <assert.h>
+#include <string.h>
+#include "testutils.h"
+
+int main()
+{
+    char const * input = "\na\nb\nc";
+    p_context_t context;
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert(p_parse(&context) == P_SUCCESS);
+    Start * start = p_result(&context);
+
+    assert_eq(TOKEN_a, start->first->pToken->token);
+    assert_eq(TOKEN_b, start->second->pToken->token);
+    assert_eq(TOKEN_c, start->third->pToken->token);
+
+    return 0;
+}
--- a/spec/test_ast_field_aliases.d
+++ b/spec/test_ast_field_aliases.d
@ -0,0 +1,21 @@
+import testparser;
+import std.stdio;
+import testutils;
+
+int main()
+{
+    return 0;
+}
+
+unittest
+{
+    string input = "\na\nb\nc";
+    p_context_t context;
+    p_context_init(&context, input);
+    assert(p_parse(&context) == P_SUCCESS);
+    Start * start = p_result(&context);
+
+    assert_eq(TOKEN_a, start.first.pToken.token);
+    assert_eq(TOKEN_b, start.second.pToken.token);
+    assert_eq(TOKEN_c, start.third.pToken.token);
+}
--- a/spec/test_field_aliases.c
+++ b/spec/test_field_aliases.c
@ -0,0 +1,13 @@
+#include "testparser.h"
+#include <assert.h>
+#include <string.h>
+#include "testutils.h"
+
+int main()
+{
+    char const * input = "foo1\nbar2";
+    p_context_t context;
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert(p_parse(&context) == P_SUCCESS);
+    return 0;
+}
--- a/spec/test_field_aliases.d
+++ b/spec/test_field_aliases.d
@ -0,0 +1,15 @@
+import testparser;
+import std.stdio;
+
+int main()
+{
+    return 0;
+}
+
+unittest
+{
+    string input = "foo1\nbar2";
+    p_context_t context;
+    p_context_init(&context, input);
+    assert(p_parse(&context) == P_SUCCESS);
+}