Allow user to terminate the parser - close #13

2024-01-03 22:32:10 -05:00 · 2024-01-03 22:32:10 -05:00 · 24af3590d1
commit 24af3590d1
parent 92c76b74c8
8 changed files with 162 additions and 14 deletions
--- a/assets/parser.c.erb
+++ b/assets/parser.c.erb
@ -21,6 +21,7 @@
 #define P_UNEXPECTED_TOKEN 3u
 #define P_DROP 4u
 #define P_EOF 5u
 #define P_USER_TERMINATED 6u
 <% end %>
 /* An invalid ID value. */
@ -737,12 +738,13 @@ static void state_values_stack_free(state_values_stack_t * stack)
 *
 * @param rule The ID of the rule.
 *
- * @return Parse value.
+ * @retval P_SUCCESS
 *   Continue parsing.
 * @retval P_USER_TERMINATED
 *   User requested to terminate parsing.
 */
-static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_values_stack_t * statevalues, uint32_t n_states)
+static size_t parser_user_code(<%= @grammar.prefix %>value_t * _pvalue, uint32_t rule, state_values_stack_t * statevalues, uint32_t n_states, <%= @grammar.prefix %>context_t * context)
 {
    <%= @grammar.prefix %>value_t _pvalue = {0};
    switch (rule)
    {
 <% @grammar.rules.each do |rule| %>
@ -755,7 +757,7 @@ static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_value
    default: break;
    }
-    return _pvalue;
+    return P_SUCCESS;
 }
 /**
@ -888,7 +890,12 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
        if (reduce_index != INVALID_ID)
        {
            /* We have something to reduce. */
-            reduced_parser_value = parser_user_code(parser_reduce_table[reduce_index].rule, &statevalues, parser_reduce_table[reduce_index].n_states);
+            <%= @grammar.prefix %>value_t reduced_parser_value2 = {0};
            if (parser_user_code(&reduced_parser_value2, parser_reduce_table[reduce_index].rule, &statevalues, parser_reduce_table[reduce_index].n_states, context) == P_USER_TERMINATED)
            {
                return P_USER_TERMINATED;
            }
            reduced_parser_value = reduced_parser_value2;
            reduced_rule_set = parser_reduce_table[reduce_index].rule_set;
            state_values_stack_pop(&statevalues, parser_reduce_table[reduce_index].n_states);
            continue;
@ -933,3 +940,16 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
 {
    return context->text_position;
 }
 /**
 * Get the user terminate code.
 *
 * @param context
 *   Lexer/parser context structure.
 *
 * @return User terminate code.
 */
 size_t <%= @grammar.prefix %>user_terminate_code(<%= @grammar.prefix %>context_t * context)
 {
    return context->user_terminate_code;
 }
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -27,6 +27,7 @@ public enum : size_t
    <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN,
    <%= @grammar.prefix.upcase %>DROP,
    <%= @grammar.prefix.upcase %>EOF,
    <%= @grammar.prefix.upcase %>USER_TERMINATED,
 }
 /** Token type. */
@ -114,6 +115,9 @@ public struct <%= @grammar.prefix %>context_t
    /** Unexpected token received. */
    <%= @grammar.prefix %>token_t token;
    /** User terminate code. */
    size_t user_terminate_code;
 }
 /**************************************************************************
@ -141,6 +145,7 @@ private enum : size_t
    P_UNEXPECTED_TOKEN,
    P_DROP,
    P_EOF,
    P_USER_TERMINATED,
 }
 <% end %>
@ -761,12 +766,13 @@ private immutable parser_state_t[] parser_state_table = [
 *
 * @param rule The ID of the rule.
 *
- * @return Parse value.
+ * @retval P_SUCCESS
 *   Continue parsing.
 * @retval P_USER_TERMINATED
 *   User requested to terminate parsing.
 */
-private <%= @grammar.prefix %>value_t parser_user_code(uint rule, state_value_t[] statevalues, uint n_states)
+private size_t parser_user_code(<%= @grammar.prefix %>value_t * _pvalue, uint rule, state_value_t[] statevalues, uint n_states, <%= @grammar.prefix %>context_t * context)
 {
    <%= @grammar.prefix %>value_t _pvalue;
    switch (rule)
    {
 <% @grammar.rules.each do |rule| %>
@ -779,7 +785,7 @@ private <%= @grammar.prefix %>value_t parser_user_code(uint rule, state_value_t[
    default: break;
    }
-    return _pvalue;
+    return P_SUCCESS;
 }
 /**
@ -906,7 +912,12 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
        if (reduce_index != INVALID_ID)
        {
            /* We have something to reduce. */
-            reduced_parser_value = parser_user_code(parser_reduce_table[reduce_index].rule, statevalues, parser_reduce_table[reduce_index].n_states);
+            <%= @grammar.prefix %>value_t reduced_parser_value2;
            if (parser_user_code(&reduced_parser_value2, parser_reduce_table[reduce_index].rule, statevalues, parser_reduce_table[reduce_index].n_states, context) == P_USER_TERMINATED)
            {
                return P_USER_TERMINATED;
            }
            reduced_parser_value = reduced_parser_value2;
            reduced_rule_set = parser_reduce_table[reduce_index].rule_set;
            statevalues.length -= parser_reduce_table[reduce_index].n_states;
            continue;
@ -948,3 +959,16 @@ public <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @gram
 {
    return context.text_position;
 }
 /**
 * Get the user terminate code.
 *
 * @param context
 *   Lexer/parser context structure.
 *
 * @return User terminate code.
 */
 public size_t <%= @grammar.prefix %>user_terminate_code(<%= @grammar.prefix %>context_t * context)
 {
    return context.user_terminate_code;
 }
--- a/assets/parser.h.erb
+++ b/assets/parser.h.erb
@ -20,6 +20,7 @@
 #define <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN 3u
 #define <%= @grammar.prefix.upcase %>DROP 4u
 #define <%= @grammar.prefix.upcase %>EOF 5u
 #define <%= @grammar.prefix.upcase %>USER_TERMINATED 6u
 /** Token type. */
 typedef <%= get_type_for(@grammar.invalid_token_id) %> <%= @grammar.prefix %>token_t;
@ -109,6 +110,9 @@ typedef struct
    /** Unexpected token received. */
    <%= @grammar.prefix %>token_t token;
    /** User terminate code. */
    size_t user_terminate_code;
 } <%= @grammar.prefix %>context_t;
 void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length);
@ -123,3 +127,5 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context);
 <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
 <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context);
 size_t <%= @grammar.prefix %>user_terminate_code(<%= @grammar.prefix %>context_t * context);
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@ -13,7 +13,7 @@ Propane is a LALR Parser Generator (LPG) which:
  * generates a built-in lexer to tokenize input
  * supports UTF-8 lexer inputs
  * generates a table-driven shift/reduce parser to parse input in linear time
-  * target C or D language outputs
+  * targets C or D language outputs
  * is MIT-licensed
  * is distributable as a standalone Ruby script
@ -574,6 +574,31 @@ default.
 It can also be used when generating multiple lexers/parsers to be used in the
 same program to avoid symbol collisions.
 ##> User termination of the parser
 Propane supports allowing parser user code blocks to terminate execution of the
 parser.
 One example use of this functionality is to detect and report an error before
 the parser continues parsing the remainder of the input.
 Another use of this features is to begin parsing input and determine whether a
 different parser should be used instead.
 To terminate parsing from a parser user code block, use the `$terminate(code)`
 function, passing an integer expression argument.
 For example:
 ```
 NewExpression -> new Expression <<
  $terminate(42);
 >>
 ```
 The value passed to the `$terminate()` function is known as the "user terminate
 code".
 If the parser returns a `P_USER_TERMINATED` result code, then the user
 terminate code can be accessed using the `p_user_terminate_code()` API
 function.
 #> License
 Propane is licensed under the terms of the MIT License:
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -200,8 +200,13 @@ class Propane
      end
      if parser
        code = code.gsub(/\$\$/) do |match|
          case @language
          when "c"
            "_pvalue->v_#{rule.ptypename}"
          when "d"
            "_pvalue.v_#{rule.ptypename}"
          end
        end
        code = code.gsub(/\$(\d+)/) do |match|
          index = $1.to_i
          case @language
@ -211,6 +216,15 @@ class Propane
            "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
          end
        end
        code = code.gsub(/\$terminate\((.*)\);/) do |match|
          user_terminate_code = $1
          case @language
          when "c"
            "context->user_terminate_code = (#{user_terminate_code}); return P_USER_TERMINATED;"
          when "d"
            "context.user_terminate_code = (#{user_terminate_code}); return P_USER_TERMINATED;"
          end
        end
      else
        code = code.gsub(/\$\$/) do |match|
          case @language
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -729,6 +729,26 @@ EOF
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
      it "allows the user to terminate the parser" do
        write_grammar <<EOF
 token a;
 token b;
 token c;
 Start -> Any;
 Any -> a Any;
 Any -> b Any <<
  $terminate(4200);
 >>
 Any -> c Any;
 Any -> ;
 EOF
        build_parser(language: language)
        compile("spec/test_user_terminate.#{language}", language: language)
        results = run
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
    end
  end
 end
--- a/spec/test_user_terminate.c
+++ b/spec/test_user_terminate.c
@ -0,0 +1,19 @@
 #include "testparser.h"
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
 int main()
 {
    char const * input = "aacc";
    p_context_t context;
    p_context_init(&context, (uint8_t const *)input, strlen(input));
    assert(p_parse(&context) == P_SUCCESS);
    input = "abc";
    p_context_init(&context, (uint8_t const *)input, strlen(input));
    assert(p_parse(&context) == P_USER_TERMINATED);
    assert(p_user_terminate_code(&context) == 4200);
    return 0;
 }
--- a/spec/test_user_terminate.d
+++ b/spec/test_user_terminate.d
@ -0,0 +1,20 @@
 import testparser;
 import std.stdio;
 int main()
 {
    return 0;
 }
 unittest
 {
    string input = "aacc";
    p_context_t context;
    p_context_init(&context, input);
    assert(p_parse(&context) == P_SUCCESS);
    input = "abc";
    p_context_init(&context, input);
    assert(p_parse(&context) == P_USER_TERMINATED);
    assert(p_user_terminate_code(&context) == 4200);
 }