Allow user to terminate the parser - close #13

This commit is contained in:
Josh Holtrop 2024-01-03 22:32:10 -05:00
parent 92c76b74c8
commit 24af3590d1
8 changed files with 162 additions and 14 deletions

View File

@ -21,6 +21,7 @@
#define P_UNEXPECTED_TOKEN 3u #define P_UNEXPECTED_TOKEN 3u
#define P_DROP 4u #define P_DROP 4u
#define P_EOF 5u #define P_EOF 5u
#define P_USER_TERMINATED 6u
<% end %> <% end %>
/* An invalid ID value. */ /* An invalid ID value. */
@ -737,12 +738,13 @@ static void state_values_stack_free(state_values_stack_t * stack)
* *
* @param rule The ID of the rule. * @param rule The ID of the rule.
* *
* @return Parse value. * @retval P_SUCCESS
* Continue parsing.
* @retval P_USER_TERMINATED
* User requested to terminate parsing.
*/ */
static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_values_stack_t * statevalues, uint32_t n_states) static size_t parser_user_code(<%= @grammar.prefix %>value_t * _pvalue, uint32_t rule, state_values_stack_t * statevalues, uint32_t n_states, <%= @grammar.prefix %>context_t * context)
{ {
<%= @grammar.prefix %>value_t _pvalue = {0};
switch (rule) switch (rule)
{ {
<% @grammar.rules.each do |rule| %> <% @grammar.rules.each do |rule| %>
@ -755,7 +757,7 @@ static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_value
default: break; default: break;
} }
return _pvalue; return P_SUCCESS;
} }
/** /**
@ -888,7 +890,12 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
if (reduce_index != INVALID_ID) if (reduce_index != INVALID_ID)
{ {
/* We have something to reduce. */ /* We have something to reduce. */
reduced_parser_value = parser_user_code(parser_reduce_table[reduce_index].rule, &statevalues, parser_reduce_table[reduce_index].n_states); <%= @grammar.prefix %>value_t reduced_parser_value2 = {0};
if (parser_user_code(&reduced_parser_value2, parser_reduce_table[reduce_index].rule, &statevalues, parser_reduce_table[reduce_index].n_states, context) == P_USER_TERMINATED)
{
return P_USER_TERMINATED;
}
reduced_parser_value = reduced_parser_value2;
reduced_rule_set = parser_reduce_table[reduce_index].rule_set; reduced_rule_set = parser_reduce_table[reduce_index].rule_set;
state_values_stack_pop(&statevalues, parser_reduce_table[reduce_index].n_states); state_values_stack_pop(&statevalues, parser_reduce_table[reduce_index].n_states);
continue; continue;
@ -933,3 +940,16 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{ {
return context->text_position; return context->text_position;
} }
/**
* Get the user terminate code.
*
* @param context
* Lexer/parser context structure.
*
* @return User terminate code.
*/
size_t <%= @grammar.prefix %>user_terminate_code(<%= @grammar.prefix %>context_t * context)
{
return context->user_terminate_code;
}

View File

@ -27,6 +27,7 @@ public enum : size_t
<%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN, <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN,
<%= @grammar.prefix.upcase %>DROP, <%= @grammar.prefix.upcase %>DROP,
<%= @grammar.prefix.upcase %>EOF, <%= @grammar.prefix.upcase %>EOF,
<%= @grammar.prefix.upcase %>USER_TERMINATED,
} }
/** Token type. */ /** Token type. */
@ -114,6 +115,9 @@ public struct <%= @grammar.prefix %>context_t
/** Unexpected token received. */ /** Unexpected token received. */
<%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>token_t token;
/** User terminate code. */
size_t user_terminate_code;
} }
/************************************************************************** /**************************************************************************
@ -141,6 +145,7 @@ private enum : size_t
P_UNEXPECTED_TOKEN, P_UNEXPECTED_TOKEN,
P_DROP, P_DROP,
P_EOF, P_EOF,
P_USER_TERMINATED,
} }
<% end %> <% end %>
@ -761,12 +766,13 @@ private immutable parser_state_t[] parser_state_table = [
* *
* @param rule The ID of the rule. * @param rule The ID of the rule.
* *
* @return Parse value. * @retval P_SUCCESS
* Continue parsing.
* @retval P_USER_TERMINATED
* User requested to terminate parsing.
*/ */
private <%= @grammar.prefix %>value_t parser_user_code(uint rule, state_value_t[] statevalues, uint n_states) private size_t parser_user_code(<%= @grammar.prefix %>value_t * _pvalue, uint rule, state_value_t[] statevalues, uint n_states, <%= @grammar.prefix %>context_t * context)
{ {
<%= @grammar.prefix %>value_t _pvalue;
switch (rule) switch (rule)
{ {
<% @grammar.rules.each do |rule| %> <% @grammar.rules.each do |rule| %>
@ -779,7 +785,7 @@ private <%= @grammar.prefix %>value_t parser_user_code(uint rule, state_value_t[
default: break; default: break;
} }
return _pvalue; return P_SUCCESS;
} }
/** /**
@ -906,7 +912,12 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
if (reduce_index != INVALID_ID) if (reduce_index != INVALID_ID)
{ {
/* We have something to reduce. */ /* We have something to reduce. */
reduced_parser_value = parser_user_code(parser_reduce_table[reduce_index].rule, statevalues, parser_reduce_table[reduce_index].n_states); <%= @grammar.prefix %>value_t reduced_parser_value2;
if (parser_user_code(&reduced_parser_value2, parser_reduce_table[reduce_index].rule, statevalues, parser_reduce_table[reduce_index].n_states, context) == P_USER_TERMINATED)
{
return P_USER_TERMINATED;
}
reduced_parser_value = reduced_parser_value2;
reduced_rule_set = parser_reduce_table[reduce_index].rule_set; reduced_rule_set = parser_reduce_table[reduce_index].rule_set;
statevalues.length -= parser_reduce_table[reduce_index].n_states; statevalues.length -= parser_reduce_table[reduce_index].n_states;
continue; continue;
@ -948,3 +959,16 @@ public <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @gram
{ {
return context.text_position; return context.text_position;
} }
/**
* Get the user terminate code.
*
* @param context
* Lexer/parser context structure.
*
* @return User terminate code.
*/
public size_t <%= @grammar.prefix %>user_terminate_code(<%= @grammar.prefix %>context_t * context)
{
return context.user_terminate_code;
}

View File

@ -20,6 +20,7 @@
#define <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN 3u #define <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN 3u
#define <%= @grammar.prefix.upcase %>DROP 4u #define <%= @grammar.prefix.upcase %>DROP 4u
#define <%= @grammar.prefix.upcase %>EOF 5u #define <%= @grammar.prefix.upcase %>EOF 5u
#define <%= @grammar.prefix.upcase %>USER_TERMINATED 6u
/** Token type. */ /** Token type. */
typedef <%= get_type_for(@grammar.invalid_token_id) %> <%= @grammar.prefix %>token_t; typedef <%= get_type_for(@grammar.invalid_token_id) %> <%= @grammar.prefix %>token_t;
@ -109,6 +110,9 @@ typedef struct
/** Unexpected token received. */ /** Unexpected token received. */
<%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>token_t token;
/** User terminate code. */
size_t user_terminate_code;
} <%= @grammar.prefix %>context_t; } <%= @grammar.prefix %>context_t;
void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length); void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length);
@ -123,3 +127,5 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context);
<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
<%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context); <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context);
size_t <%= @grammar.prefix %>user_terminate_code(<%= @grammar.prefix %>context_t * context);

View File

@ -13,7 +13,7 @@ Propane is a LALR Parser Generator (LPG) which:
* generates a built-in lexer to tokenize input * generates a built-in lexer to tokenize input
* supports UTF-8 lexer inputs * supports UTF-8 lexer inputs
* generates a table-driven shift/reduce parser to parse input in linear time * generates a table-driven shift/reduce parser to parse input in linear time
* target C or D language outputs * targets C or D language outputs
* is MIT-licensed * is MIT-licensed
* is distributable as a standalone Ruby script * is distributable as a standalone Ruby script
@ -574,6 +574,31 @@ default.
It can also be used when generating multiple lexers/parsers to be used in the It can also be used when generating multiple lexers/parsers to be used in the
same program to avoid symbol collisions. same program to avoid symbol collisions.
##> User termination of the parser
Propane supports allowing parser user code blocks to terminate execution of the
parser.
One example use of this functionality is to detect and report an error before
the parser continues parsing the remainder of the input.
Another use of this features is to begin parsing input and determine whether a
different parser should be used instead.
To terminate parsing from a parser user code block, use the `$terminate(code)`
function, passing an integer expression argument.
For example:
```
NewExpression -> new Expression <<
$terminate(42);
>>
```
The value passed to the `$terminate()` function is known as the "user terminate
code".
If the parser returns a `P_USER_TERMINATED` result code, then the user
terminate code can be accessed using the `p_user_terminate_code()` API
function.
#> License #> License
Propane is licensed under the terms of the MIT License: Propane is licensed under the terms of the MIT License:

View File

@ -200,8 +200,13 @@ class Propane
end end
if parser if parser
code = code.gsub(/\$\$/) do |match| code = code.gsub(/\$\$/) do |match|
case @language
when "c"
"_pvalue->v_#{rule.ptypename}"
when "d"
"_pvalue.v_#{rule.ptypename}" "_pvalue.v_#{rule.ptypename}"
end end
end
code = code.gsub(/\$(\d+)/) do |match| code = code.gsub(/\$(\d+)/) do |match|
index = $1.to_i index = $1.to_i
case @language case @language
@ -211,6 +216,15 @@ class Propane
"statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}" "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
end end
end end
code = code.gsub(/\$terminate\((.*)\);/) do |match|
user_terminate_code = $1
case @language
when "c"
"context->user_terminate_code = (#{user_terminate_code}); return P_USER_TERMINATED;"
when "d"
"context.user_terminate_code = (#{user_terminate_code}); return P_USER_TERMINATED;"
end
end
else else
code = code.gsub(/\$\$/) do |match| code = code.gsub(/\$\$/) do |match|
case @language case @language

View File

@ -729,6 +729,26 @@ EOF
expect(results.stderr).to eq "" expect(results.stderr).to eq ""
expect(results.status).to eq 0 expect(results.status).to eq 0
end end
it "allows the user to terminate the parser" do
write_grammar <<EOF
token a;
token b;
token c;
Start -> Any;
Any -> a Any;
Any -> b Any <<
$terminate(4200);
>>
Any -> c Any;
Any -> ;
EOF
build_parser(language: language)
compile("spec/test_user_terminate.#{language}", language: language)
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end end
end end
end end

View File

@ -0,0 +1,19 @@
#include "testparser.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
int main()
{
char const * input = "aacc";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
input = "abc";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_USER_TERMINATED);
assert(p_user_terminate_code(&context) == 4200);
return 0;
}

View File

@ -0,0 +1,20 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = "aacc";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
input = "abc";
p_context_init(&context, input);
assert(p_parse(&context) == P_USER_TERMINATED);
assert(p_user_terminate_code(&context) == 4200);
}