diff --git a/assets/parser.c.erb b/assets/parser.c.erb index 1dc1b1f..dbdab6b 100644 --- a/assets/parser.c.erb +++ b/assets/parser.c.erb @@ -21,6 +21,7 @@ #define P_UNEXPECTED_TOKEN 3u #define P_DROP 4u #define P_EOF 5u +#define P_USER_TERMINATED 6u <% end %> /* An invalid ID value. */ @@ -737,12 +738,13 @@ static void state_values_stack_free(state_values_stack_t * stack) * * @param rule The ID of the rule. * - * @return Parse value. + * @retval P_SUCCESS + * Continue parsing. + * @retval P_USER_TERMINATED + * User requested to terminate parsing. */ -static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_values_stack_t * statevalues, uint32_t n_states) +static size_t parser_user_code(<%= @grammar.prefix %>value_t * _pvalue, uint32_t rule, state_values_stack_t * statevalues, uint32_t n_states, <%= @grammar.prefix %>context_t * context) { - <%= @grammar.prefix %>value_t _pvalue = {0}; - switch (rule) { <% @grammar.rules.each do |rule| %> @@ -755,7 +757,7 @@ static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_value default: break; } - return _pvalue; + return P_SUCCESS; } /** @@ -888,7 +890,12 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) if (reduce_index != INVALID_ID) { /* We have something to reduce. */ - reduced_parser_value = parser_user_code(parser_reduce_table[reduce_index].rule, &statevalues, parser_reduce_table[reduce_index].n_states); + <%= @grammar.prefix %>value_t reduced_parser_value2 = {0}; + if (parser_user_code(&reduced_parser_value2, parser_reduce_table[reduce_index].rule, &statevalues, parser_reduce_table[reduce_index].n_states, context) == P_USER_TERMINATED) + { + return P_USER_TERMINATED; + } + reduced_parser_value = reduced_parser_value2; reduced_rule_set = parser_reduce_table[reduce_index].rule_set; state_values_stack_pop(&statevalues, parser_reduce_table[reduce_index].n_states); continue; @@ -933,3 +940,16 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) { return context->text_position; } + +/** + * Get the user terminate code. + * + * @param context + * Lexer/parser context structure. + * + * @return User terminate code. + */ +size_t <%= @grammar.prefix %>user_terminate_code(<%= @grammar.prefix %>context_t * context) +{ + return context->user_terminate_code; +} diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 8aec2da..374ce12 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -27,6 +27,7 @@ public enum : size_t <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN, <%= @grammar.prefix.upcase %>DROP, <%= @grammar.prefix.upcase %>EOF, + <%= @grammar.prefix.upcase %>USER_TERMINATED, } /** Token type. */ @@ -114,6 +115,9 @@ public struct <%= @grammar.prefix %>context_t /** Unexpected token received. */ <%= @grammar.prefix %>token_t token; + + /** User terminate code. */ + size_t user_terminate_code; } /************************************************************************** @@ -141,6 +145,7 @@ private enum : size_t P_UNEXPECTED_TOKEN, P_DROP, P_EOF, + P_USER_TERMINATED, } <% end %> @@ -761,12 +766,13 @@ private immutable parser_state_t[] parser_state_table = [ * * @param rule The ID of the rule. * - * @return Parse value. + * @retval P_SUCCESS + * Continue parsing. + * @retval P_USER_TERMINATED + * User requested to terminate parsing. */ -private <%= @grammar.prefix %>value_t parser_user_code(uint rule, state_value_t[] statevalues, uint n_states) +private size_t parser_user_code(<%= @grammar.prefix %>value_t * _pvalue, uint rule, state_value_t[] statevalues, uint n_states, <%= @grammar.prefix %>context_t * context) { - <%= @grammar.prefix %>value_t _pvalue; - switch (rule) { <% @grammar.rules.each do |rule| %> @@ -779,7 +785,7 @@ private <%= @grammar.prefix %>value_t parser_user_code(uint rule, state_value_t[ default: break; } - return _pvalue; + return P_SUCCESS; } /** @@ -906,7 +912,12 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont if (reduce_index != INVALID_ID) { /* We have something to reduce. */ - reduced_parser_value = parser_user_code(parser_reduce_table[reduce_index].rule, statevalues, parser_reduce_table[reduce_index].n_states); + <%= @grammar.prefix %>value_t reduced_parser_value2; + if (parser_user_code(&reduced_parser_value2, parser_reduce_table[reduce_index].rule, statevalues, parser_reduce_table[reduce_index].n_states, context) == P_USER_TERMINATED) + { + return P_USER_TERMINATED; + } + reduced_parser_value = reduced_parser_value2; reduced_rule_set = parser_reduce_table[reduce_index].rule_set; statevalues.length -= parser_reduce_table[reduce_index].n_states; continue; @@ -948,3 +959,16 @@ public <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @gram { return context.text_position; } + +/** + * Get the user terminate code. + * + * @param context + * Lexer/parser context structure. + * + * @return User terminate code. + */ +public size_t <%= @grammar.prefix %>user_terminate_code(<%= @grammar.prefix %>context_t * context) +{ + return context.user_terminate_code; +} diff --git a/assets/parser.h.erb b/assets/parser.h.erb index 883b7d6..fa2f66b 100644 --- a/assets/parser.h.erb +++ b/assets/parser.h.erb @@ -20,6 +20,7 @@ #define <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN 3u #define <%= @grammar.prefix.upcase %>DROP 4u #define <%= @grammar.prefix.upcase %>EOF 5u +#define <%= @grammar.prefix.upcase %>USER_TERMINATED 6u /** Token type. */ typedef <%= get_type_for(@grammar.invalid_token_id) %> <%= @grammar.prefix %>token_t; @@ -109,6 +110,9 @@ typedef struct /** Unexpected token received. */ <%= @grammar.prefix %>token_t token; + + /** User terminate code. */ + size_t user_terminate_code; } <%= @grammar.prefix %>context_t; void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length); @@ -123,3 +127,5 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context); <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context); + +size_t <%= @grammar.prefix %>user_terminate_code(<%= @grammar.prefix %>context_t * context); diff --git a/doc/user_guide.md b/doc/user_guide.md index 561d852..44bfa26 100644 --- a/doc/user_guide.md +++ b/doc/user_guide.md @@ -13,7 +13,7 @@ Propane is a LALR Parser Generator (LPG) which: * generates a built-in lexer to tokenize input * supports UTF-8 lexer inputs * generates a table-driven shift/reduce parser to parse input in linear time - * target C or D language outputs + * targets C or D language outputs * is MIT-licensed * is distributable as a standalone Ruby script @@ -574,6 +574,31 @@ default. It can also be used when generating multiple lexers/parsers to be used in the same program to avoid symbol collisions. +##> User termination of the parser + +Propane supports allowing parser user code blocks to terminate execution of the +parser. +One example use of this functionality is to detect and report an error before +the parser continues parsing the remainder of the input. +Another use of this features is to begin parsing input and determine whether a +different parser should be used instead. + +To terminate parsing from a parser user code block, use the `$terminate(code)` +function, passing an integer expression argument. +For example: + +``` +NewExpression -> new Expression << + $terminate(42); +>> +``` + +The value passed to the `$terminate()` function is known as the "user terminate +code". +If the parser returns a `P_USER_TERMINATED` result code, then the user +terminate code can be accessed using the `p_user_terminate_code()` API +function. + #> License Propane is licensed under the terms of the MIT License: diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index ff8fbca..40d7d9a 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -200,7 +200,12 @@ class Propane end if parser code = code.gsub(/\$\$/) do |match| - "_pvalue.v_#{rule.ptypename}" + case @language + when "c" + "_pvalue->v_#{rule.ptypename}" + when "d" + "_pvalue.v_#{rule.ptypename}" + end end code = code.gsub(/\$(\d+)/) do |match| index = $1.to_i @@ -211,6 +216,15 @@ class Propane "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}" end end + code = code.gsub(/\$terminate\((.*)\);/) do |match| + user_terminate_code = $1 + case @language + when "c" + "context->user_terminate_code = (#{user_terminate_code}); return P_USER_TERMINATED;" + when "d" + "context.user_terminate_code = (#{user_terminate_code}); return P_USER_TERMINATED;" + end + end else code = code.gsub(/\$\$/) do |match| case @language diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index 06ca243..9fd19e8 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -729,6 +729,26 @@ EOF expect(results.stderr).to eq "" expect(results.status).to eq 0 end + + it "allows the user to terminate the parser" do + write_grammar < Any; +Any -> a Any; +Any -> b Any << + $terminate(4200); +>> +Any -> c Any; +Any -> ; +EOF + build_parser(language: language) + compile("spec/test_user_terminate.#{language}", language: language) + results = run + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end end end end diff --git a/spec/test_user_terminate.c b/spec/test_user_terminate.c new file mode 100644 index 0000000..87a537b --- /dev/null +++ b/spec/test_user_terminate.c @@ -0,0 +1,19 @@ +#include "testparser.h" +#include +#include +#include + +int main() +{ + char const * input = "aacc"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + input = "abc"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_USER_TERMINATED); + assert(p_user_terminate_code(&context) == 4200); + + return 0; +} diff --git a/spec/test_user_terminate.d b/spec/test_user_terminate.d new file mode 100644 index 0000000..a05faff --- /dev/null +++ b/spec/test_user_terminate.d @@ -0,0 +1,20 @@ +import testparser; +import std.stdio; + +int main() +{ + return 0; +} + +unittest +{ + string input = "aacc"; + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + + input = "abc"; + p_context_init(&context, input); + assert(p_parse(&context) == P_USER_TERMINATED); + assert(p_user_terminate_code(&context) == 4200); +}