Add rule field aliases - #24

2024-07-22 17:29:54 -04:00 · 2024-07-22 17:29:54 -04:00 · a7348be95d
commit a7348be95d
parent 9746b3f2bf
10 changed files with 261 additions and 27 deletions
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@ -234,15 +234,15 @@ drop /\\s+/;
 Start -> Items;
-Items -> Item ItemsMore;
+Items -> Item:item ItemsMore;
 Items -> ;
-ItemsMore -> comma Item ItemsMore;
+ItemsMore -> comma Item:item ItemsMore;
 ItemsMore -> ;
 Item -> a;
 Item -> b;
-Item -> lparen Item rparen;
+Item -> lparen Item:item rparen;
 Item -> Dual;
 Dual -> One Two;
@ -263,24 +263,24 @@ Start * start = p_result(&context);
 assert(start.pItems1 !is null);
 assert(start.pItems !is null);
 Items * items = start.pItems;
-assert(items.pItem !is null);
+assert(items.item !is null);
-assert(items.pItem.pToken1 !is null);
+assert(items.item.pToken1 !is null);
-assert_eq(TOKEN_a, items.pItem.pToken1.token);
+assert_eq(TOKEN_a, items.item.pToken1.token);
-assert_eq(11, items.pItem.pToken1.pvalue);
+assert_eq(11, items.item.pToken1.pvalue);
 assert(items.pItemsMore !is null);
 ItemsMore * itemsmore = items.pItemsMore;
-assert(itemsmore.pItem !is null);
+assert(itemsmore.item !is null);
-assert(itemsmore.pItem.pItem !is null);
+assert(itemsmore.item.item !is null);
-assert(itemsmore.pItem.pItem.pItem !is null);
+assert(itemsmore.item.item.item !is null);
-assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
+assert(itemsmore.item.item.item.pToken1 !is null);
-assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token);
+assert_eq(TOKEN_b, itemsmore.item.item.item.pToken1.token);
-assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue);
+assert_eq(22, itemsmore.item.item.item.pToken1.pvalue);
 assert(itemsmore.pItemsMore !is null);
 itemsmore = itemsmore.pItemsMore;
-assert(itemsmore.pItem !is null);
+assert(itemsmore.item !is null);
-assert(itemsmore.pItem.pToken1 !is null);
+assert(itemsmore.item.pToken1 !is null);
-assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token);
+assert_eq(TOKEN_b, itemsmore.item.pToken1.token);
-assert_eq(22, itemsmore.pItem.pToken1.pvalue);
+assert_eq(22, itemsmore.item.pToken1.pvalue);
 assert(itemsmore.pItemsMore is null);
 ```
@ -607,6 +607,10 @@ This can be changed with the `start` statement.
 The grammar file must define a rule with the name of the start rule name which
 will be used as the top-level starting rule that the parser attempts to reduce.
 Rule statements are composed of the name of the rule, a `->` token, the fields
 defining the rule pattern that must be matched, and a terminating semicolon or
 user code block.
 Example:
 ```
@ -635,9 +639,13 @@ E4 -> lparen E1 rparen << $$ = $2; >>
 This example uses the default start rule name of `Start`.
-A parser rule has zero or more terms on the right side of its definition.
+A parser rule has zero or more fields on the right side of its definition.
-Each of these terms is either a token name or a rule name.
+Each of these fields is either a token name or a rule name.
-A term can be immediately followed by a `?` character to signify that it is
+A field can optionally be followed by a `:` and then a field alias name.
 If present, the field alias name is used to refer to the field value in user
 code blocks, or if AST mode is active, the field alias name is used as the
 field name in the generated AST node structure.
 A field can be immediately followed by a `?` character to signify that it is
 optional.
 Another example:
@ -647,14 +655,16 @@ token private;
 token int;
 token ident /[a-zA-Z_][a-zA-Z_0-9]*/;
 token semicolon /;/;
-IntegerDeclaration -> Visibility? int ident semicolon;
+IntegerDeclaration -> Visibility? int ident:name semicolon;
 Visibility -> public;
 Visibility -> private;
 ```
-In a parser rule code block, parser values for the right side terms are
+In a parser rule code block, parser values for the right side fields are
-accessible as `$1` for the first term's parser value, `$2` for the second
+accessible as `$1` for the first field's parser value, `$2` for the second
-term's parser value, etc...
+field's parser value, etc...
 For the `IntegerDeclaration` rule, the third field value can also be referred
 to as `${name}`.
 The `$$` symbol accesses the output parser value for this rule.
 The above examples demonstrate how the parser values for the rule components
 can be used to produce the parser value for the accepted rule.
@ -849,6 +859,19 @@ If the first rule is matched, then `pOne1` and `pTwo2` will be non-null while
 `pTwo1` and `pOne2` will be null.
 If the second rule is matched instead, then the opposite would be the case.
 If a field alias is present in a rule definition, an additional field will be
 generated in the AST node with the field alias name.
 For example:
 ```
 Exp -> Exp:left plus ExpB:right;
 ```
 In the generated `Exp` structure, the fields `pExp`, `pExp1`, and `left` will
 all point to the same child node (an instance of the `Exp` structure), and the
 fields `pExpB`, `pExpB3`, and `right` will all point to the same child node
 (an instance of the `ExpB` structure).
 ##> Functions
 ### `p_context_init`
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -276,6 +276,19 @@ class Propane
            "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
          end
        end
        code = code.gsub(/\$\{(\w+)\}/) do |match|
          aliasname = $1
          if index = rule.aliases[aliasname]
            case @language
            when "c"
              "state_values_stack_index(statevalues, -(int)n_states + #{index})->pvalue.v_#{rule.components[index].ptypename}"
            when "d"
              "statevalues[$-n_states+#{index}].pvalue.v_#{rule.components[index].ptypename}"
            end
          else
            raise Error.new("Field alias '#{aliasname}' not found")
          end
        end
      else
        code = code.gsub(/\$\$/) do |match|
          if @grammar.ast
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@ -198,7 +198,7 @@ class Propane
        if @ast && ptypename
          raise Error.new("Multiple ptypes are unsupported in AST mode")
        end
-        md = consume!(/((?:#{IDENTIFIER_REGEX}\??\s*)*)\s*/, "expected rule component list")
+        md = consume!(/((?:#{IDENTIFIER_REGEX}(?::#{IDENTIFIER_REGEX})?\??\s*)*)\s*/, "expected rule component list")
        components = md[1].strip.split(/\s+/)
        if @ast
          consume!(/;/, "expected `;'")
--- a/lib/propane/rule.rb
+++ b/lib/propane/rule.rb
@ -6,6 +6,10 @@ class Propane
    #   Rule components.
    attr_reader :components
    # @return [Hash]
    #   Field aliases.
    attr_reader :aliases
    # @return [String]
    #   User code associated with the rule.
    attr_reader :code
@ -49,7 +53,19 @@ class Propane
    #   Line number where the rule was defined in the input grammar.
    def initialize(name, components, code, ptypename, line_number)
      @name = name
-      @components = components
+      @aliases = {}
      @components = components.each_with_index.map do |component, i|
        if component =~ /(\S+):(\S+)/
          c, aliasname = $1, $2
          if @aliases[aliasname]
            raise Error.new("Error: duplicate field alias `#{aliasname}` for rule #{name} defined on line #{line_number}")
          end
          @aliases[aliasname] = i
          c
        else
          component
        end
      end
      @rule_set_node_field_index_map = components.map {0}
      @code = code
      @ptypename = ptypename
--- a/lib/propane/rule_set.rb
+++ b/lib/propane/rule_set.rb
@ -100,8 +100,10 @@ class Propane
    # Finalize a RuleSet after adding all Rules to it.
    def finalize(grammar)
      if grammar.ast
        build_ast_fields(grammar)
      end
    end
    private
@ -148,6 +150,18 @@ class Propane
            "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
        end
      end
      # Now merge in the field aliases as given by the user in the
      # grammar.
      field_aliases = {}
      @rules.each do |rule|
        rule.aliases.each do |alias_name, index|
          if field_aliases[alias_name] && field_aliases[alias_name] != index
            raise Error.new("Error: conflicting AST node field positions for alias `#{alias_name}`")
          end
          field_aliases[alias_name] = index
          @ast_fields[index][alias_name] = @ast_fields[index].first[1]
        end
      end
    end
  end
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -213,6 +213,42 @@ EOF
    expect(File.binread("spec/run/testparser.log")).to match %r{Shift/Reduce conflict \(state \d+\) between token b and rule As2\? \(defined on line 4\)}
  end
  it "errors on duplicate field aliases in a rule" do
    write_grammar <<EOF
 token a;
 token b;
 Start -> a:foo b:foo;
 EOF
    results = run_propane(extra_args: %w[-w], capture: true)
    expect(results.stderr).to match %r{Error: duplicate field alias `foo` for rule Start defined on line 3}
    expect(results.status).to_not eq 0
  end
  it "errors when an alias is in different positions for different rules in a rule set when AST mode is enabled" do
    write_grammar <<EOF
 ast;
 token a;
 token b;
 Start -> a:foo b;
 Start -> b b:foo;
 EOF
    results = run_propane(extra_args: %w[-w], capture: true)
    expect(results.stderr).to match %r{Error: conflicting AST node field positions for alias `foo`}
    expect(results.status).to_not eq 0
  end
  it "does not error when an alias is in different positions for different rules in a rule set when AST mode is not enabled" do
    write_grammar <<EOF
 token a;
 token b;
 Start -> a:foo b;
 Start -> b b:foo;
 EOF
    results = run_propane(extra_args: %w[-w], capture: true)
    expect(results.stderr).to eq ""
    expect(results.status).to eq 0
  end
  %w[d c].each do |language|
    context "#{language.upcase} language" do
@ -1120,6 +1156,70 @@ EOF
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
      it "allows specifying field aliases in AST mode" do
        write_grammar <<EOF
 ast;
 token a;
 token b;
 token c;
 drop /\\s+/;
 Start -> T:first T:second T:third;
 T -> a;
 T -> b;
 T -> c;
 EOF
        run_propane(language: language)
        compile("spec/test_ast_field_aliases.#{language}", language: language)
        results = run_test
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
      it "allows specifying field aliases when AST mode is not enabled" do
        if language == "d"
          write_grammar <<EOF
 <<
 import std.stdio;
 >>
 ptype string;
 token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
  $$ = match;
 >>
 drop /\\s+/;
 Start -> id:first id:second <<
  writeln("first is ", ${first});
  writeln("second is ", ${second});
 >>
 EOF
        else
          write_grammar <<EOF
 <<
 #include <stdio.h>
 #include <string.h>
 >>
 ptype char const *;
 token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
  char * s = malloc(match_length + 1);
  strncpy(s, (char const *)match, match_length);
  s[match_length] = 0;
  $$ = s;
 >>
 drop /\\s+/;
 Start -> id:first id:second <<
  printf("first is %s\\n", ${first});
  printf("second is %s\\n", ${second});
 >>
 EOF
        end
        run_propane(language: language)
        compile("spec/test_field_aliases.#{language}", language: language)
        results = run_test
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
        expect(results.stdout).to match /first is foo1.*second is bar2/m
      end
    end
  end
 end
--- a/spec/test_ast_field_aliases.c
+++ b/spec/test_ast_field_aliases.c
@ -0,0 +1,19 @@
 #include "testparser.h"
 #include <assert.h>
 #include <string.h>
 #include "testutils.h"
 int main()
 {
    char const * input = "\na\nb\nc";
    p_context_t context;
    p_context_init(&context, (uint8_t const *)input, strlen(input));
    assert(p_parse(&context) == P_SUCCESS);
    Start * start = p_result(&context);
    assert_eq(TOKEN_a, start->first->pToken->token);
    assert_eq(TOKEN_b, start->second->pToken->token);
    assert_eq(TOKEN_c, start->third->pToken->token);
    return 0;
 }
--- a/spec/test_ast_field_aliases.d
+++ b/spec/test_ast_field_aliases.d
@ -0,0 +1,21 @@
 import testparser;
 import std.stdio;
 import testutils;
 int main()
 {
    return 0;
 }
 unittest
 {
    string input = "\na\nb\nc";
    p_context_t context;
    p_context_init(&context, input);
    assert(p_parse(&context) == P_SUCCESS);
    Start * start = p_result(&context);
    assert_eq(TOKEN_a, start.first.pToken.token);
    assert_eq(TOKEN_b, start.second.pToken.token);
    assert_eq(TOKEN_c, start.third.pToken.token);
 }
--- a/spec/test_field_aliases.c
+++ b/spec/test_field_aliases.c
@ -0,0 +1,13 @@
 #include "testparser.h"
 #include <assert.h>
 #include <string.h>
 #include "testutils.h"
 int main()
 {
    char const * input = "foo1\nbar2";
    p_context_t context;
    p_context_init(&context, (uint8_t const *)input, strlen(input));
    assert(p_parse(&context) == P_SUCCESS);
    return 0;
 }
--- a/spec/test_field_aliases.d
+++ b/spec/test_field_aliases.d
@ -0,0 +1,15 @@
 import testparser;
 import std.stdio;
 int main()
 {
    return 0;
 }
 unittest
 {
    string input = "foo1\nbar2";
    p_context_t context;
    p_context_init(&context, input);
    assert(p_parse(&context) == P_SUCCESS);
 }