Add support for multiple starting rules - close #38

This commit is contained in:
Josh Holtrop 2026-02-09 21:21:49 -05:00
parent cb426b4be1
commit 6a87bb2d56
14 changed files with 383 additions and 37 deletions

View File

@ -924,6 +924,8 @@ static size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token)
*
* @param context
* Lexer/parser context structure.
* @start_state_id
* ID of the state in which to start.
*
* @retval P_SUCCESS
* The parser successfully matched the input text. The parse result value
@ -936,7 +938,7 @@ static size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token)
* @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern.
*/
size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
static size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t start_state_id)
{
<%= @grammar.prefix %>token_info_t token_info;
<%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID;
@ -949,6 +951,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
<% end %>
state_values_stack_init(&statevalues);
state_values_stack_push(&statevalues);
state_values_stack_index(&statevalues, -1)->state_id = start_state_id;
size_t result;
for (;;)
{
@ -974,7 +977,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{
/* Successful parse. */
<% if @grammar.ast %>
context->parse_result = (<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> *)state_values_stack_index(&statevalues, -1)->ast_node;
context->parse_result = state_values_stack_index(&statevalues, -1)->ast_node;
<% else %>
context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue;
<% end %>
@ -1101,6 +1104,19 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
return result;
}
size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{
return parse_from(context, 0u);
}
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
size_t <%= @grammar.prefix %>parse_<%= start_rule %>(<%= @grammar.prefix %>context_t * context)
{
return parse_from(context, <%= i %>u);
}
<% end %>
/**
* Get the parse result value.
*
@ -1110,17 +1126,28 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
* @return Parse result value.
*/
<% if @grammar.ast %>
<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
<%= @grammar.ast_prefix %><%= @grammar.start_rules[0] %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
{
return (<%= @grammar.ast_prefix %><%= @grammar.start_rules[0] %><%= @grammar.ast_suffix %> *) context->parse_result;
}
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
<%= @grammar.ast_prefix %><%= start_rule %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result_<%= start_rule %>(<%= @grammar.prefix %>context_t * context)
{
return (<%= @grammar.ast_prefix %><%= start_rule %><%= @grammar.ast_suffix %> *) context->parse_result;
}
<% end %>
<% else %>
<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
<% end %>
{
<% if @grammar.ast %>
return context->parse_result;
<% else %>
return context->parse_result.v_<%= start_rule_type[0] %>;
<% end %>
}
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
<%= start_rule_type(i)[1] %> <%= @grammar.prefix %>result_<%= start_rule %>(<%= @grammar.prefix %>context_t * context)
{
return context->parse_result.v_<%= start_rule_type(i)[0] %>;
}
<% end %>
<% end %>
/**
* Get the current text input position.
@ -1184,8 +1211,18 @@ static void free_ast_node(ASTNode * node)
/**
* Free all AST node memory.
*/
void <%= @grammar.prefix %>free_ast(<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * ast)
void <%= @grammar.prefix %>free_ast(<%= @grammar.ast_prefix %><%= @grammar.start_rules[0] %><%= @grammar.ast_suffix %> * ast)
{
free_ast_node((ASTNode *)ast);
}
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
/**
* Free all AST node memory.
*/
void <%= @grammar.prefix %>free_ast_<%= start_rule %>(<%= @grammar.ast_prefix %><%= start_rule %><%= @grammar.ast_suffix %> * ast)
{
free_ast_node((ASTNode *)ast);
}
<% end %>
<% end %>

View File

@ -173,7 +173,7 @@ public struct <%= @grammar.prefix %>context_t
/** Parse result value. */
<% if @grammar.ast %>
<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * parse_result;
void * parse_result;
<% else %>
<%= @grammar.prefix %>value_t parse_result;
<% end %>
@ -985,6 +985,8 @@ private size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token
*
* @param context
* Lexer/parser context structure.
* @start_state_id
* ID of the state in which to start.
*
* @retval P_SUCCESS
* The parser successfully matched the input text. The parse result value
@ -997,11 +999,12 @@ private size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token
* @reval P_UNEXPECTED_INPUT
* Input text does not match any lexer pattern.
*/
public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
private size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t start_state_id)
{
<%= @grammar.prefix %>token_info_t token_info;
<%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID;
state_value_t[] statevalues = new state_value_t[](1);
statevalues[0].state_id = start_state_id;
size_t reduced_rule_set = INVALID_ID;
<% if @grammar.ast %>
void * reduced_parser_node;
@ -1031,7 +1034,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
{
/* Successful parse. */
<% if @grammar.ast %>
context.parse_result = cast(<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> *)statevalues[$-1].ast_node;
context.parse_result = statevalues[$-1].ast_node;
<% else %>
context.parse_result = statevalues[$-1].pvalue;
<% end %>
@ -1147,6 +1150,19 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
}
}
public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{
return parse_from(context, 0u);
}
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
public size_t <%= @grammar.prefix %>parse_<%= start_rule %>(<%= @grammar.prefix %>context_t * context)
{
return parse_from(context, <%= i %>u);
}
<% end %>
/**
* Get the parse result value.
*
@ -1156,17 +1172,28 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
* @return Parse result value.
*/
<% if @grammar.ast %>
public <%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
public <%= @grammar.ast_prefix %><%= @grammar.start_rules[0] %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
{
return cast(<%= @grammar.ast_prefix %><%= @grammar.start_rules[0] %><%= @grammar.ast_suffix %> *)context.parse_result;
}
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
public <%= @grammar.ast_prefix %><%= start_rule %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result_<%= start_rule %>(<%= @grammar.prefix %>context_t * context)
{
return cast(<%= @grammar.ast_prefix %><%= start_rule %><%= @grammar.ast_suffix %> *)context.parse_result;
}
<% end %>
<% else %>
public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
<% end %>
{
<% if @grammar.ast %>
return context.parse_result;
<% else %>
return context.parse_result.v_<%= start_rule_type[0] %>;
<% end %>
}
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
public <%= start_rule_type(i)[1] %> <%= @grammar.prefix %>result_<%= start_rule %>(<%= @grammar.prefix %>context_t * context)
{
return context.parse_result.v_<%= start_rule_type(i)[0] %>;
}
<% end %>
<% end %>
/**
* Get the current text input position.

View File

@ -162,7 +162,7 @@ typedef struct
/** Parse result value. */
<% if @grammar.ast %>
<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * parse_result;
void * parse_result;
<% else %>
<%= @grammar.prefix %>value_t parse_result;
<% end %>
@ -189,13 +189,27 @@ size_t <%= @grammar.prefix %>decode_code_point(uint8_t const * input, size_t inp
size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info);
size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context);
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
size_t <%= @grammar.prefix %>parse_<%= start_rule %>(<%= @grammar.prefix %>context_t * context);
<% end %>
<% if @grammar.ast %>
<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
void <%= @grammar.prefix %>free_ast(<%= @grammar.ast_prefix %><%= @grammar.start_rule %><%= @grammar.ast_suffix %> * ast);
<%= @grammar.ast_prefix %><%= @grammar.start_rules[0] %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
<%= @grammar.ast_prefix %><%= start_rule %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result_<%= start_rule %>(<%= @grammar.prefix %>context_t * context);
<% end %>
<% else %>
<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
<%= start_rule_type(i)[1] %> <%= @grammar.prefix %>result_<%= start_rule %>(<%= @grammar.prefix %>context_t * context);
<% end %>
<% end %>
<% if @grammar.ast %>
void <%= @grammar.prefix %>free_ast(<%= @grammar.ast_prefix %><%= @grammar.start_rules[0] %><%= @grammar.ast_suffix %> * ast);
<% @grammar.start_rules.each_with_index do |start_rule, i| %>
void <%= @grammar.prefix %>free_ast_<%= start_rule %>(<%= @grammar.ast_prefix %><%= start_rule %><%= @grammar.ast_suffix %> * ast);
<% end %>
<% end %>
<%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context);

View File

@ -738,6 +738,22 @@ Example:
start MyStartRule;
```
Multiple start rules can be specified, either with multiple `start` statements
or one `start` statement listing multiple start rules.
Example:
```
start Module ModuleItem Statement Expression;
```
When multiple start rules are specified, multiple `p_parse_*()` functions,
`p_result_*()`, and `p_free_ast_*()` functions (in AST mode) are generated.
A default `p_parse()`, `p_result()`, `p_free_ast()` are generated corresponding
to the first start rule.
Additionally, each start rule causes the generation of another version of each
of these functions, for example `p_parse_Statement()`, `p_result_Statement()`,
and `p_free_ast_Statement()`.
##> Specifying the parser module name - the `module` statement
The `module` statement can be used to specify the module name for a generated
@ -1018,6 +1034,16 @@ p_context_init(&context, input, input_length);
size_t result = p_parse(&context);
```
When multiple start rules are specified, a separate parse function is generated
for each which starts parsing at the given rule.
For example, if `Statement` is specified as a start rule:
```
size_t result = p_parse_Statement(&context);
```
In this case, the parser will start parsing with the `Statement` rule.
### `p_position_valid`
The `p_position_valid()` function is only generated for C targets.
@ -1056,6 +1082,23 @@ if (p_parse(&context) == P_SUCCESS)
If AST generation mode is active, then the `p_result()` function returns a
`Start *` pointing to the `Start` AST structure.
When multiple start rules are specified, a separate result function is generated
for each which returns the parse result for the corresponding rule.
For example, if `Statement` is specified as a start rule:
```
p_context_t context;
p_context_init(&context, input, input_length);
size_t result = p_parse(&context);
if (p_parse_Statement(&context) == P_SUCCESS)
{
result = p_result_Statement(&context);
}
```
In this case, the parser will start parsing with the `Statement` rule and the
parse result from the `Statement` rule will be returned.
### `p_position`
The `p_position()` function can be used to retrieve the parser position where
@ -1142,6 +1185,17 @@ If specified, the `free_token_node` function will be called during the
`p_free_ast()` process to allow user code to free any memory associated with
a token node's `pvalue`.
When multiple start rules are specified, a separate `p_free_ast` function is
generated for each which frees the AST resulting from parsing the given rule.
For example, if `Statement` is specified as a start rule:
```
p_free_ast_Statement(statement_ast);
```
In this case, Propane will free a `Statement` AST structure returned by the
`p_parse_Statement(&context)` function.
##> Data
### `p_token_names`

View File

@ -71,12 +71,15 @@ class Propane
end
tokens_by_name[token.name] = token
end
# Check for user start rule.
unless @grammar.rules.find {|rule| rule.name == @grammar.start_rule}
raise Error.new("Start rule `#{@grammar.start_rule}` not found")
# Create real start rule(s).
real_start_rules = @grammar.start_rules.map do |start_rule|
unless @grammar.rules.find {|rule| rule.name == start_rule}
raise Error.new("Start rule `#{start_rule}` not found")
end
# Add "real" start rule.
@grammar.rules.unshift(Rule.new("$Start", [@grammar.start_rule, "$EOF"], nil, nil, nil))
Rule.new("$#{start_rule}", [start_rule, "$EOF"], nil, nil, nil)
end
# Add real start rules before user-given rules.
@grammar.rules = real_start_rules + @grammar.rules
# Generate and add rules for optional components.
generate_optional_component_rules!(tokens_by_name)
# Build rule sets.
@ -332,9 +335,9 @@ class Propane
#
# @return [Array<String>]
# Start rule parser value type name and type string.
def start_rule_type
def start_rule_type(start_rule_index = 0)
start_rule = @grammar.rules.find do |rule|
rule.name == @grammar.start_rule
rule.name == @grammar.start_rules[start_rule_index]
end
[start_rule.ptypename, @grammar.ptypes[start_rule.ptypename]]
end

View File

@ -11,8 +11,8 @@ class Propane
attr_reader :free_token_node
attr_reader :modulename
attr_reader :patterns
attr_reader :rules
attr_reader :start_rule
attr_accessor :rules
attr_reader :start_rules
attr_reader :tokens
attr_reader :code_blocks
attr_reader :ptypes
@ -20,7 +20,7 @@ class Propane
def initialize(input)
@patterns = []
@start_rule = "Start"
@start_rules = []
@tokens = []
@rules = []
@code_blocks = {}
@ -35,6 +35,7 @@ class Propane
@ast_suffix = ""
@free_token_node = nil
parse_grammar!
@start_rules << "Start" if @start_rules.empty?
end
def ptype
@ -241,8 +242,11 @@ class Propane
end
def parse_start_statement!
if md = consume!(/start\s+(\w+)\s*;/)
@start_rule = md[1]
if md = consume!(/start\s+([\w\s]*);/)
start_rules = md[1].split(/\s+/).map(&:strip)
start_rules.each do |start_rule|
@start_rules << start_rule unless @start_rules.include?(start_rule)
end
end
end

View File

@ -16,11 +16,20 @@ class Propane
@warnings = Set.new
@errors = Set.new
@options = options
start_item = Item.new(grammar.rules.first, 0)
eval_item_sets = Set[ItemSet.new([start_item])]
start_items = grammar.rules[0...grammar.start_rules.length].map do |start_rule|
Item.new(start_rule, 0)
end
start_item_sets = start_items.map {|item| ItemSet.new([item])}
eval_item_sets = Set[*start_item_sets]
while eval_item_sets.size > 0
item_set = eval_item_sets.first
item_set =
if start_item_sets.size > 0
# Ensure we evaluate start_item_sets first in order
start_item_sets.slice!(0)
else
eval_item_sets.first
end
eval_item_sets.delete(item_set)
unless @item_sets_set.include?(item_set)
item_set.id = @item_sets.size

View File

@ -1440,6 +1440,47 @@ EOF
expect(results.status).to eq 0
end
it "allows multiple starting rules" do
write_grammar <<EOF
ptype int;
token a << $$ = 1; >>
token b << $$ = 2; >>
token c << $$ = 3; >>
Start -> a b R;
Start -> Bs:bs << $$ = $1; >>
R -> c:c << $$ = $1; >>
Bs -> << $$ = 0; >>
Bs -> b:b Bs:bs << $$ = $1 + $2; >>
start Start R Bs;
EOF
run_propane(language: language)
compile("spec/test_starting_rules.#{language}", language: language)
results = run_test(language: language)
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "allows multiple starting rules in AST mode" do
write_grammar <<EOF
ast;
ptype int;
token a << $$ = 1; >>
token b << $$ = 2; >>
token c << $$ = 3; >>
Start -> a b R;
Start -> Bs:bs;
R -> c:c;
Bs -> ;
Bs -> b:b Bs:bs;
start Start R Bs;
EOF
run_propane(language: language)
compile("spec/test_starting_rules_ast.#{language}", language: language)
results = run_test(language: language)
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
if %w[c cpp].include?(language)
it "allows a user function to free token node memory in AST mode" do
write_grammar <<EOF

View File

@ -0,0 +1,27 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "bbbb";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
int result = p_result(&context);
assert_eq(8, result);
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse_Bs(&context) == P_SUCCESS);
result = p_result_Bs(&context);
assert_eq(8, result);
input = "c";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse_R(&context) == P_SUCCESS);
result = p_result_R(&context);
assert_eq(3, result);
return 0;
}

View File

@ -0,0 +1,29 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "bbbb";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
int result = p_result(&context);
assert(result == 8);
p_context_init(&context, input);
assert(p_parse_Bs(&context) == P_SUCCESS);
result = p_result_Bs(&context);
assert(result == 8);
input = "c";
p_context_init(&context, input);
assert(p_parse_R(&context) == P_SUCCESS);
result = p_result_R(&context);
assert(result == 3);
}

View File

@ -0,0 +1,37 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "bbbb";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_not_null(start->bs);
assert_not_null(start->bs->b);
assert_not_null(start->bs->bs->b);
assert_not_null(start->bs->bs->bs->b);
assert_not_null(start->bs->bs->bs->bs->b);
p_free_ast(start);
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse_Bs(&context) == P_SUCCESS);
Bs * bs = p_result_Bs(&context);
assert_not_null(bs->b);
assert_not_null(bs->bs->b);
assert_not_null(bs->bs->bs->b);
assert_not_null(bs->bs->bs->bs->b);
p_free_ast_Bs(bs);
input = "c";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse_R(&context) == P_SUCCESS);
R * r = p_result_R(&context);
assert_not_null(r->c);
p_free_ast_R(r);
return 0;
}

View File

@ -0,0 +1,36 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "bbbb";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert(start.bs);
assert(start.bs.b);
assert(start.bs.bs.b);
assert(start.bs.bs.bs.b);
assert(start.bs.bs.bs.bs.b);
p_context_init(&context, input);
assert(p_parse_Bs(&context) == P_SUCCESS);
Bs * bs = p_result_Bs(&context);
assert(bs.b);
assert(bs.bs.b);
assert(bs.bs.bs.b);
assert(bs.bs.bs.bs.b);
input = "c";
p_context_init(&context, input);
assert(p_parse_R(&context) == P_SUCCESS);
R * r = p_result_R(&context);
assert(r.c);
}

View File

@ -14,6 +14,24 @@ void assert_eq_size_t_i(size_t expected, size_t actual, char const * file, size_
}
}
void assert_ne_size_t_i(size_t expected, size_t actual, char const * file, size_t line)
{
if (expected == actual)
{
fprintf(stderr, "%s:%lu: expected not %lu, got %lu\n", file, line, expected, actual);
assert(false);
}
}
void assert_not_null_i(void * ptr, char const * file, size_t line)
{
if (ptr == NULL)
{
fprintf(stderr, "%s:%lu: expected not NULL\n", file, line);
assert(false);
}
}
void str_init(str_t * str, char const * cs)
{
size_t length = strlen(cs);

View File

@ -5,6 +5,16 @@ void assert_eq_size_t_i(size_t expected, size_t actual, char const * file, size_
#define assert_eq(expected, actual) \
assert_eq_size_t_i(expected, actual, __FILE__, __LINE__)
void assert_ne_size_t_i(size_t expected, size_t actual, char const * file, size_t line);
#define assert_ne(expected, actual) \
assert_ne_size_t_i(expected, actual, __FILE__, __LINE__)
void assert_not_null_i(void * ptr, char const * file, size_t line);
#define assert_not_null(ptr) \
assert_not_null_i(ptr, __FILE__, __LINE__)
typedef struct
{
char * cs;