Allow user termination from lexer code blocks - close #15
This commit is contained in:
parent
d55c5e0080
commit
fad7f4fb36
@ -422,6 +422,8 @@ static size_t find_longest_match(<%= @grammar.prefix %>context_t * context,
|
|||||||
* Input text does not match any lexer pattern.
|
* Input text does not match any lexer pattern.
|
||||||
* @retval P_DROP
|
* @retval P_DROP
|
||||||
* A drop pattern was matched so the lexer should continue.
|
* A drop pattern was matched so the lexer should continue.
|
||||||
|
* @retval P_USER_TERMINATED
|
||||||
|
* User code has requested to terminate the lexer.
|
||||||
*/
|
*/
|
||||||
static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
|
static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
|
||||||
{
|
{
|
||||||
@ -441,6 +443,12 @@ static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @
|
|||||||
uint8_t const * match = &context->input[context->input_index];
|
uint8_t const * match = &context->input[context->input_index];
|
||||||
<%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context,
|
<%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context,
|
||||||
match_info.accepting_state->code_id, match, match_info.length, &token_info);
|
match_info.accepting_state->code_id, match, match_info.length, &token_info);
|
||||||
|
/* A TERMINATE_TOKEN_ID return code from lexer_user_code() means
|
||||||
|
* that the user code is requesting to terminate the lexer. */
|
||||||
|
if (user_code_token == TERMINATE_TOKEN_ID)
|
||||||
|
{
|
||||||
|
return P_USER_TERMINATED;
|
||||||
|
}
|
||||||
/* An invalid token returned from lexer_user_code() means that the
|
/* An invalid token returned from lexer_user_code() means that the
|
||||||
* user code did not explicitly return a token. So only override
|
* user code did not explicitly return a token. So only override
|
||||||
* the token to return if the user code does explicitly return a
|
* the token to return if the user code does explicitly return a
|
||||||
@ -511,6 +519,8 @@ static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @
|
|||||||
* The decoder encountered invalid text encoding.
|
* The decoder encountered invalid text encoding.
|
||||||
* @reval P_UNEXPECTED_INPUT
|
* @reval P_UNEXPECTED_INPUT
|
||||||
* Input text does not match any lexer pattern.
|
* Input text does not match any lexer pattern.
|
||||||
|
* @retval P_USER_TERMINATED
|
||||||
|
* User code has requested to terminate the lexer.
|
||||||
*/
|
*/
|
||||||
size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
|
size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
|
||||||
{
|
{
|
||||||
|
@ -31,7 +31,7 @@ public enum : size_t
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Token type. */
|
/** Token type. */
|
||||||
public alias <%= @grammar.prefix %>token_t = <%= get_type_for(@grammar.invalid_token_id) %>;
|
public alias <%= @grammar.prefix %>token_t = <%= get_type_for(@grammar.terminate_token_id) %>;
|
||||||
|
|
||||||
/** Token IDs. */
|
/** Token IDs. */
|
||||||
public enum : <%= @grammar.prefix %>token_t
|
public enum : <%= @grammar.prefix %>token_t
|
||||||
@ -43,6 +43,7 @@ public enum : <%= @grammar.prefix %>token_t
|
|||||||
<% end %>
|
<% end %>
|
||||||
<% end %>
|
<% end %>
|
||||||
INVALID_TOKEN_ID = <%= @grammar.invalid_token_id %>,
|
INVALID_TOKEN_ID = <%= @grammar.invalid_token_id %>,
|
||||||
|
TERMINATE_TOKEN_ID = <%= @grammar.terminate_token_id %>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Code point type. */
|
/** Code point type. */
|
||||||
@ -538,6 +539,8 @@ private size_t find_longest_match(<%= @grammar.prefix %>context_t * context,
|
|||||||
* Input text does not match any lexer pattern.
|
* Input text does not match any lexer pattern.
|
||||||
* @retval P_DROP
|
* @retval P_DROP
|
||||||
* A drop pattern was matched so the lexer should continue.
|
* A drop pattern was matched so the lexer should continue.
|
||||||
|
* @retval P_USER_TERMINATED
|
||||||
|
* User code has requested to terminate the lexer.
|
||||||
*/
|
*/
|
||||||
private size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
|
private size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
|
||||||
{
|
{
|
||||||
@ -557,6 +560,12 @@ private size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%=
|
|||||||
string match = context.input[context.input_index..(context.input_index + match_info.length)];
|
string match = context.input[context.input_index..(context.input_index + match_info.length)];
|
||||||
<%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context,
|
<%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context,
|
||||||
match_info.accepting_state.code_id, match, &token_info);
|
match_info.accepting_state.code_id, match, &token_info);
|
||||||
|
/* A TERMINATE_TOKEN_ID return code from lexer_user_code() means
|
||||||
|
* that the user code is requesting to terminate the lexer. */
|
||||||
|
if (user_code_token == TERMINATE_TOKEN_ID)
|
||||||
|
{
|
||||||
|
return P_USER_TERMINATED;
|
||||||
|
}
|
||||||
/* An invalid token returned from lexer_user_code() means that the
|
/* An invalid token returned from lexer_user_code() means that the
|
||||||
* user code did not explicitly return a token. So only override
|
* user code did not explicitly return a token. So only override
|
||||||
* the token to return if the user code does explicitly return a
|
* the token to return if the user code does explicitly return a
|
||||||
@ -627,6 +636,8 @@ private size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%=
|
|||||||
* The decoder encountered invalid text encoding.
|
* The decoder encountered invalid text encoding.
|
||||||
* @reval P_UNEXPECTED_INPUT
|
* @reval P_UNEXPECTED_INPUT
|
||||||
* Input text does not match any lexer pattern.
|
* Input text does not match any lexer pattern.
|
||||||
|
* @retval P_USER_TERMINATED
|
||||||
|
* User code has requested to terminate the lexer.
|
||||||
*/
|
*/
|
||||||
public size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
|
public size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info)
|
||||||
{
|
{
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
#define <%= @grammar.prefix.upcase %>USER_TERMINATED 6u
|
#define <%= @grammar.prefix.upcase %>USER_TERMINATED 6u
|
||||||
|
|
||||||
/** Token type. */
|
/** Token type. */
|
||||||
typedef <%= get_type_for(@grammar.invalid_token_id) %> <%= @grammar.prefix %>token_t;
|
typedef <%= get_type_for(@grammar.terminate_token_id) %> <%= @grammar.prefix %>token_t;
|
||||||
|
|
||||||
/** Token IDs. */
|
/** Token IDs. */
|
||||||
<% @grammar.tokens.each_with_index do |token, index| %>
|
<% @grammar.tokens.each_with_index do |token, index| %>
|
||||||
@ -33,6 +33,7 @@ typedef <%= get_type_for(@grammar.invalid_token_id) %> <%= @grammar.prefix %>tok
|
|||||||
<% end %>
|
<% end %>
|
||||||
<% end %>
|
<% end %>
|
||||||
#define INVALID_TOKEN_ID <%= @grammar.invalid_token_id %>u
|
#define INVALID_TOKEN_ID <%= @grammar.invalid_token_id %>u
|
||||||
|
#define TERMINATE_TOKEN_ID <%= @grammar.terminate_token_id %>u
|
||||||
|
|
||||||
/** Code point type. */
|
/** Code point type. */
|
||||||
typedef uint32_t <%= @grammar.prefix %>code_point_t;
|
typedef uint32_t <%= @grammar.prefix %>code_point_t;
|
||||||
|
@ -574,17 +574,18 @@ default.
|
|||||||
It can also be used when generating multiple lexers/parsers to be used in the
|
It can also be used when generating multiple lexers/parsers to be used in the
|
||||||
same program to avoid symbol collisions.
|
same program to avoid symbol collisions.
|
||||||
|
|
||||||
##> User termination of the parser
|
##> User termination of the lexer or parser
|
||||||
|
|
||||||
Propane supports allowing parser user code blocks to terminate execution of the
|
Propane supports allowing lexer or parser user code blocks to terminate
|
||||||
parser.
|
execution of the parser.
|
||||||
One example use of this functionality is to detect and report an error before
|
Some example uses of this functionality could be to:
|
||||||
the parser continues parsing the remainder of the input.
|
|
||||||
Another use of this features is to begin parsing input and determine whether a
|
|
||||||
different parser should be used instead.
|
|
||||||
|
|
||||||
To terminate parsing from a parser user code block, use the `$terminate(code)`
|
* Detect integer overflow when lexing an integer literal constant.
|
||||||
function, passing an integer expression argument.
|
* Detect and report an error as soon as possible during parsing before continuing to parse any more of the input.
|
||||||
|
* Determine whether parsing should stop and instead be performed using a different parser version.
|
||||||
|
|
||||||
|
To terminate parsing from a lexer or parser user code block, use the
|
||||||
|
`$terminate(code)` function, passing an integer expression argument.
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -198,6 +198,16 @@ class Propane
|
|||||||
code = code.gsub(/\$token\(([$\w]+)\)/) do |match|
|
code = code.gsub(/\$token\(([$\w]+)\)/) do |match|
|
||||||
"TOKEN_#{Token.code_name($1)}"
|
"TOKEN_#{Token.code_name($1)}"
|
||||||
end
|
end
|
||||||
|
code = code.gsub(/\$terminate\((.*)\);/) do |match|
|
||||||
|
user_terminate_code = $1
|
||||||
|
retval = rule ? "P_USER_TERMINATED" : "TERMINATE_TOKEN_ID"
|
||||||
|
case @language
|
||||||
|
when "c"
|
||||||
|
"context->user_terminate_code = (#{user_terminate_code}); return #{retval};"
|
||||||
|
when "d"
|
||||||
|
"context.user_terminate_code = (#{user_terminate_code}); return #{retval};"
|
||||||
|
end
|
||||||
|
end
|
||||||
if parser
|
if parser
|
||||||
code = code.gsub(/\$\$/) do |match|
|
code = code.gsub(/\$\$/) do |match|
|
||||||
case @language
|
case @language
|
||||||
@ -216,15 +226,6 @@ class Propane
|
|||||||
"statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
|
"statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
code = code.gsub(/\$terminate\((.*)\);/) do |match|
|
|
||||||
user_terminate_code = $1
|
|
||||||
case @language
|
|
||||||
when "c"
|
|
||||||
"context->user_terminate_code = (#{user_terminate_code}); return P_USER_TERMINATED;"
|
|
||||||
when "d"
|
|
||||||
"context.user_terminate_code = (#{user_terminate_code}); return P_USER_TERMINATED;"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
else
|
||||||
code = code.gsub(/\$\$/) do |match|
|
code = code.gsub(/\$\$/) do |match|
|
||||||
case @language
|
case @language
|
||||||
|
@ -35,6 +35,10 @@ class Propane
|
|||||||
@tokens.size
|
@tokens.size
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def terminate_token_id
|
||||||
|
@tokens.size + 1
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def parse_grammar!
|
def parse_grammar!
|
||||||
|
@ -730,6 +730,25 @@ EOF
|
|||||||
expect(results.status).to eq 0
|
expect(results.status).to eq 0
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "allows the user to terminate the lexer" do
|
||||||
|
write_grammar <<EOF
|
||||||
|
token a;
|
||||||
|
token b <<
|
||||||
|
$terminate(8675309);
|
||||||
|
>>
|
||||||
|
token c;
|
||||||
|
Start -> Any;
|
||||||
|
Any -> a;
|
||||||
|
Any -> b;
|
||||||
|
Any -> c;
|
||||||
|
EOF
|
||||||
|
build_parser(language: language)
|
||||||
|
compile("spec/test_user_terminate_lexer.#{language}", language: language)
|
||||||
|
results = run
|
||||||
|
expect(results.stderr).to eq ""
|
||||||
|
expect(results.status).to eq 0
|
||||||
|
end
|
||||||
|
|
||||||
it "allows the user to terminate the parser" do
|
it "allows the user to terminate the parser" do
|
||||||
write_grammar <<EOF
|
write_grammar <<EOF
|
||||||
token a;
|
token a;
|
||||||
|
19
spec/test_user_terminate_lexer.c
Normal file
19
spec/test_user_terminate_lexer.c
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#include "testparser.h"
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
char const * input = "a";
|
||||||
|
p_context_t context;
|
||||||
|
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||||
|
assert(p_parse(&context) == P_SUCCESS);
|
||||||
|
|
||||||
|
input = "b";
|
||||||
|
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||||
|
assert(p_parse(&context) == P_USER_TERMINATED);
|
||||||
|
assert(p_user_terminate_code(&context) == 8675309);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
20
spec/test_user_terminate_lexer.d
Normal file
20
spec/test_user_terminate_lexer.d
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
import testparser;
|
||||||
|
import std.stdio;
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
string input = "a";
|
||||||
|
p_context_t context;
|
||||||
|
p_context_init(&context, input);
|
||||||
|
assert(p_parse(&context) == P_SUCCESS);
|
||||||
|
|
||||||
|
input = "b";
|
||||||
|
p_context_init(&context, input);
|
||||||
|
assert(p_parse(&context) == P_USER_TERMINATED);
|
||||||
|
assert(p_user_terminate_code(&context) == 8675309);
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user