From 5187cff24d255d524a14f4c45335ce2847d609ab Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Mon, 9 Feb 2026 17:58:47 -0500 Subject: [PATCH] Add p_parse_*() API for each start rule --- assets/parser.c.erb | 18 +++++++++++++++++- assets/parser.d.erb | 18 +++++++++++++++++- assets/parser.h.erb | 3 +++ lib/propane/generator.rb | 9 +++++---- lib/propane/grammar.rb | 2 +- lib/propane/parser.rb | 15 ++++++++++++--- 6 files changed, 55 insertions(+), 10 deletions(-) diff --git a/assets/parser.c.erb b/assets/parser.c.erb index 220d398..f70532a 100644 --- a/assets/parser.c.erb +++ b/assets/parser.c.erb @@ -924,6 +924,8 @@ static size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token) * * @param context * Lexer/parser context structure. + * @start_state_id + * ID of the state in which to start. * * @retval P_SUCCESS * The parser successfully matched the input text. The parse result value @@ -936,7 +938,7 @@ static size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token) * @reval P_UNEXPECTED_INPUT * Input text does not match any lexer pattern. */ -size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) +static size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t start_state_id) { <%= @grammar.prefix %>token_info_t token_info; <%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID; @@ -949,6 +951,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) <% end %> state_values_stack_init(&statevalues); state_values_stack_push(&statevalues); + state_values_stack_index(&statevalues, -1)->state_id = start_state_id; size_t result; for (;;) { @@ -1101,6 +1104,19 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) return result; } +size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) +{ + return parse_from(context, 0u); +} + +<% @grammar.start_rules.each_with_index do |start_rule, i| %> + +size_t <%= @grammar.prefix %>parse_<%= start_rule %>(<%= @grammar.prefix %>context_t * context) +{ + return parse_from(context, <%= i %>u); +} +<% end %> + /** * Get the parse result value. * diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 213c3be..99eac22 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -985,6 +985,8 @@ private size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token * * @param context * Lexer/parser context structure. + * @start_state_id + * ID of the state in which to start. * * @retval P_SUCCESS * The parser successfully matched the input text. The parse result value @@ -997,11 +999,12 @@ private size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token * @reval P_UNEXPECTED_INPUT * Input text does not match any lexer pattern. */ -public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) +private size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t start_state_id) { <%= @grammar.prefix %>token_info_t token_info; <%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID; state_value_t[] statevalues = new state_value_t[](1); + statevalues[0].state_id = start_state_id; size_t reduced_rule_set = INVALID_ID; <% if @grammar.ast %> void * reduced_parser_node; @@ -1147,6 +1150,19 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont } } +public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) +{ + return parse_from(context, 0u); +} + +<% @grammar.start_rules.each_with_index do |start_rule, i| %> + +public size_t <%= @grammar.prefix %>parse_<%= start_rule %>(<%= @grammar.prefix %>context_t * context) +{ + return parse_from(context, <%= i %>u); +} +<% end %> + /** * Get the parse result value. * diff --git a/assets/parser.h.erb b/assets/parser.h.erb index 3d36efe..2b8fc43 100644 --- a/assets/parser.h.erb +++ b/assets/parser.h.erb @@ -189,6 +189,9 @@ size_t <%= @grammar.prefix %>decode_code_point(uint8_t const * input, size_t inp size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info); size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context); +<% @grammar.start_rules.each_with_index do |start_rule, i| %> +size_t <%= @grammar.prefix %>parse_<%= start_rule %>(<%= @grammar.prefix %>context_t * context); +<% end %> <% if @grammar.ast %> <%= @grammar.ast_prefix %><%= @grammar.start_rules[0] %><%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index 9c47a90..2fb3b26 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -71,14 +71,15 @@ class Propane end tokens_by_name[token.name] = token end - # Check for user start rules. - @grammar.start_rules.each_with_index do |start_rule, i| + # Create real start rule(s). + real_start_rules = @grammar.start_rules.map do |start_rule| unless @grammar.rules.find {|rule| rule.name == start_rule} raise Error.new("Start rule `#{start_rule}` not found") end - # Add "real" start rule. - @grammar.rules.unshift(Rule.new("$#{start_rule}", [start_rule, "$EOF"], nil, nil, nil)) + Rule.new("$#{start_rule}", [start_rule, "$EOF"], nil, nil, nil) end + # Add real start rules before user-given rules. + @grammar.rules = real_start_rules + @grammar.rules # Generate and add rules for optional components. generate_optional_component_rules!(tokens_by_name) # Build rule sets. diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index cef26eb..4679eb8 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -11,7 +11,7 @@ class Propane attr_reader :free_token_node attr_reader :modulename attr_reader :patterns - attr_reader :rules + attr_accessor :rules attr_reader :start_rules attr_reader :tokens attr_reader :code_blocks diff --git a/lib/propane/parser.rb b/lib/propane/parser.rb index c980142..56432b9 100644 --- a/lib/propane/parser.rb +++ b/lib/propane/parser.rb @@ -16,11 +16,20 @@ class Propane @warnings = Set.new @errors = Set.new @options = options - start_item = Item.new(grammar.rules.first, 0) - eval_item_sets = Set[ItemSet.new([start_item])] + start_items = grammar.rules[0...grammar.start_rules.length].map do |start_rule| + Item.new(start_rule, 0) + end + start_item_sets = start_items.map {|item| ItemSet.new([item])} + eval_item_sets = Set[*start_item_sets] while eval_item_sets.size > 0 - item_set = eval_item_sets.first + item_set = + if start_item_sets.size > 0 + # Ensure we evaluate start_item_sets first in order + start_item_sets.slice!(0) + else + eval_item_sets.first + end eval_item_sets.delete(item_set) unless @item_sets_set.include?(item_set) item_set.id = @item_sets.size