Allow specifying the start rule name

2024-05-05 12:39:00 -04:00 · 2024-05-05 12:39:00 -04:00 · 494afb7307
commit 494afb7307
parent 508dabe760
12 changed files with 119 additions and 18 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -3,6 +3,7 @@
 ### New Features

 - Allow user to specify AST node name prefix or suffix
+- Allow specifying the start rule name

 ## v1.3.0

--- a/assets/parser.c.erb
+++ b/assets/parser.c.erb
@ -924,7 +924,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
            {
                /* Successful parse. */
 <% if @grammar.ast %>
-                context->parse_result = (<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)state_values_stack_index(&statevalues, -1)->ast_node;
+                context->parse_result = (<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> *)state_values_stack_index(&statevalues, -1)->ast_node;
 <% else %>
                context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue;
 <% end %>
@ -1029,7 +1029,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
 * @return Parse result value.
 */
 <% if @grammar.ast %>
-<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
+<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% else %>
 <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% end %>
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -144,7 +144,7 @@ public struct <%= @grammar.prefix %>context_t

    /** Parse result value. */
 <% if @grammar.ast %>
-    <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
+    <%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * parse_result;
 <% else %>
    <%= @grammar.prefix %>value_t parse_result;
 <% end %>
@ -973,7 +973,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
            {
                /* Successful parse. */
 <% if @grammar.ast %>
-                context.parse_result = cast(<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)statevalues[$-1].ast_node;
+                context.parse_result = cast(<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> *)statevalues[$-1].ast_node;
 <% else %>
                context.parse_result = statevalues[$-1].pvalue;
 <% end %>
@ -1075,7 +1075,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
 * @return Parse result value.
 */
 <% if @grammar.ast %>
-public <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
+public <%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% else %>
 public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
 <% end %>
--- a/assets/parser.h.erb
+++ b/assets/parser.h.erb
@ -144,7 +144,7 @@ typedef struct

    /** Parse result value. */
 <% if @grammar.ast %>
-    <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
+    <%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * parse_result;
 <% else %>
    <%= @grammar.prefix %>value_t parse_result;
 <% end %>
@ -173,7 +173,7 @@ size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%=
 size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context);

 <% if @grammar.ast %>
-<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
+<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
 <% else %>
 <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
 <% end %>
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@ -203,8 +203,10 @@ In AST generation mode various aspects of propane's behavior are changed:
  * Parser user code blocks are not supported.
  * Structure types are generated to represent the parsed tokens and rules as
  defined in the grammar.
-  * The parse result from `p_result()` points to a `Start` structure containing
-  the entire parse tree for the input.
+  * The parse result from `p_result()` points to a `Start` struct containing
+  the entire parse tree for the input. If the user has changed the start rule
+  with the `start` grammar statement, the name of the start struct will be
+  given by the user-specified start rule instead of `Start`.

 Example AST generation grammar:

@ -594,21 +596,24 @@ Rules with the same name define a rule set for that name and act as
 alternatives that the parser can accept when attempting to match a reference to
 that rule.

-The grammar file must define a rule with the name `Start` which will be used as
-the top-level starting rule that the parser attempts to reduce.
+The default start rule name is `Start`.
+This can be changed with the `start` statement.
+The grammar file must define a rule with the name of the start rule name which
+will be used as the top-level starting rule that the parser attempts to reduce.

 Example:

 ```
 ptype ulong;
+start Top;
 token word /[a-z]+/ << $$ = match.length; >>
-Start -> word << $$ = $1; >>
+Top -> word << $$ = $1; >>
 ```

-In the above example the `Start` rule is defined to match a single `word`
+In the above example the `Top` rule is defined to match a single `word`
 token.

-Example:
+Another example:

 ```
 Start -> E1 << $$ = $1; >>
@ -622,6 +627,8 @@ E4 -> integer << $$ = $1; >>
 E4 -> lparen E1 rparen << $$ = $2; >>
 ```

+This example uses the default start rule name of `Start`.
+
 A parser rule has zero or more terms on the right side of its definition.
 Each of these terms is either a token name or a rule name.

@ -635,6 +642,16 @@ can be used to produce the parser value for the accepted rule.
 Parser rule code blocks are not allowed and not used when AST generation mode
 is active.

+##> Specifying the parser start rule name - the `start` statement
+
+The start rule can be changed from the default of `Start` by using the `start`
+statement.
+Example:
+
+```
+start MyStartRule;
+```
+
 ##> Specifying the parser module name - the `module` statement

 The `module` statement can be used to specify the module name for a generated
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -66,11 +66,11 @@ class Propane
        tokens_by_name[token.name] = token
      end
      # Check for user start rule.
-      unless @grammar.rules.find {|rule| rule.name == "Start"}
-        raise Error.new("Start rule not found")
+      unless @grammar.rules.find {|rule| rule.name == @grammar.start_rule}
+        raise Error.new("Start rule `#{@grammar.start_rule}` not found")
      end
      # Add "real" start rule.
-      @grammar.rules.unshift(Rule.new("$Start", ["Start", "$EOF"], nil, nil, nil))
+      @grammar.rules.unshift(Rule.new("$Start", [@grammar.start_rule, "$EOF"], nil, nil, nil))
      rule_sets = {}
      rule_set_id = @grammar.tokens.size
      @grammar.rules.each_with_index do |rule, rule_id|
@ -270,7 +270,7 @@ class Propane
    #   Start rule parser value type name and type string.
    def start_rule_type
      start_rule = @grammar.rules.find do |rule|
-        rule.name == "Start"
+        rule.name == @grammar.start_rule
      end
      [start_rule.ptypename, @grammar.ptypes[start_rule.ptypename]]
    end
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@ -11,6 +11,7 @@ class Propane
    attr_reader :modulename
    attr_reader :patterns
    attr_reader :rules
+    attr_reader :start_rule
    attr_reader :tokens
    attr_reader :code_blocks
    attr_reader :ptypes
@ -18,6 +19,7 @@ class Propane

    def initialize(input)
      @patterns = []
+      @start_rule = "Start"
      @tokens = []
      @rules = []
      @code_blocks = {}
@ -63,6 +65,7 @@ class Propane
      elsif parse_module_statement!
      elsif parse_ptype_statement!
      elsif parse_pattern_statement!
+      elsif parse_start_statement!
      elsif parse_token_statement!
      elsif parse_tokenid_statement!
      elsif parse_drop_statement!
@ -228,6 +231,12 @@ class Propane
      end
    end

+    def parse_start_statement!
+      if md = consume!(/start\s+(\w+)\s*;/)
+        @start_rule = md[1]
+      end
+    end
+
    def parse_code_block_statement!
      if md = consume!(/<<([a-z]*)(.*?)>>\n/m)
        name, code = md[1..2]
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -889,6 +889,27 @@ EOF
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
+
+      it "allows specifying a different start rule" do
+        write_grammar <<EOF
+token hi;
+start Top;
+Top -> hi;
+EOF
+        run_propane(language: language)
+        compile("spec/test_start_rule.#{language}", language: language)
+      end
+
+      it "allows specifying a different start rule with AST generation" do
+        write_grammar <<EOF
+ast;
+token hi;
+start Top;
+Top -> hi;
+EOF
+        run_propane(language: language)
+        compile("spec/test_start_rule_ast.#{language}", language: language)
+      end
    end
  end
 end
--- a/spec/test_start_rule.c
+++ b/spec/test_start_rule.c
@ -0,0 +1,9 @@
+#include "testparser.h"
+#include <assert.h>
+#include <string.h>
+#include "testutils.h"
+
+int main()
+{
+    return 0;
+}
--- a/spec/test_start_rule.d
+++ b/spec/test_start_rule.d
@ -0,0 +1,8 @@
+import testparser;
+import std.stdio;
+import testutils;
+
+int main()
+{
+    return 0;
+}
--- a/spec/test_start_rule_ast.c
+++ b/spec/test_start_rule_ast.c
@ -0,0 +1,17 @@
+#include "testparser.h"
+#include <assert.h>
+#include <string.h>
+#include "testutils.h"
+
+int main()
+{
+    char const * input = "hi";
+    p_context_t context;
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert_eq(P_SUCCESS, p_parse(&context));
+    Top * top = p_result(&context);
+    assert(top->pToken != NULL);
+    assert_eq(TOKEN_hi, top->pToken->token);
+
+    return 0;
+}
--- a/spec/test_start_rule_ast.d
+++ b/spec/test_start_rule_ast.d
@ -0,0 +1,19 @@
+import testparser;
+import std.stdio;
+import testutils;
+
+int main()
+{
+    return 0;
+}
+
+unittest
+{
+    string input = "hi";
+    p_context_t context;
+    p_context_init(&context, input);
+    assert_eq(P_SUCCESS, p_parse(&context));
+    Top * top = p_result(&context);
+    assert(top.pToken !is null);
+    assert_eq(TOKEN_hi, top.pToken.token);
+}