Output position info for various error return codes - close #10

This commit is contained in:
Josh Holtrop 2023-07-12 19:22:44 -04:00
parent ce22e3465b
commit 424ddfe55a
4 changed files with 99 additions and 5 deletions

View File

@ -106,7 +106,7 @@ public struct p_context_t
size_t input_index; size_t input_index;
/** Input text position (row/column). */ /** Input text position (row/column). */
p_position_t input_position; p_position_t text_position;
/** Current lexer mode. */ /** Current lexer mode. */
size_t mode; size_t mode;
@ -443,6 +443,7 @@ private size_t find_longest_match(p_context_t * context,
{ {
lexer_match_info_t longest_match; lexer_match_info_t longest_match;
lexer_match_info_t attempt_match; lexer_match_info_t attempt_match;
*out_match_info = longest_match;
uint current_state = lexer_mode_table[context.mode].state_table_offset; uint current_state = lexer_mode_table[context.mode].state_table_offset;
for (;;) for (;;)
{ {
@ -506,6 +507,13 @@ private size_t find_longest_match(p_context_t * context,
} }
break; break;
case P_DECODE_ERROR:
/* If we see a decode error, we may be partially in the middle of
* matching a pattern, so return the attempted match info so that
* the input text position can be updated. */
*out_match_info = attempt_match;
return result;
default: default:
return result; return result;
} }
@ -533,7 +541,7 @@ private size_t find_longest_match(p_context_t * context,
private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_token_info) private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_token_info)
{ {
p_token_info_t token_info; p_token_info_t token_info;
token_info.position = context.input_position; token_info.position = context.text_position;
token_info.token = INVALID_TOKEN_ID; token_info.token = INVALID_TOKEN_ID;
*out_token_info = token_info; // TODO: remove *out_token_info = token_info; // TODO: remove
lexer_match_info_t match_info; lexer_match_info_t match_info;
@ -560,15 +568,16 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
/* Update the input position tracking. */ /* Update the input position tracking. */
context.input_index += match_info.length; context.input_index += match_info.length;
context.input_position.row += match_info.delta_position.row; context.text_position.row += match_info.delta_position.row;
if (match_info.delta_position.row != 0u) if (match_info.delta_position.row != 0u)
{ {
context.input_position.col = match_info.delta_position.col; context.text_position.col = match_info.delta_position.col;
} }
else else
{ {
context.input_position.col += match_info.delta_position.col; context.text_position.col += match_info.delta_position.col;
} }
if (token_to_accept == INVALID_TOKEN_ID) if (token_to_accept == INVALID_TOKEN_ID)
{ {
return P_DROP; return P_DROP;
@ -583,6 +592,20 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
*out_token_info = token_info; *out_token_info = token_info;
return P_SUCCESS; return P_SUCCESS;
case P_DECODE_ERROR:
/* Update the input position tracking. */
context.input_index += match_info.length;
context.text_position.row += match_info.delta_position.row;
if (match_info.delta_position.row != 0u)
{
context.text_position.col = match_info.delta_position.col;
}
else
{
context.text_position.col += match_info.delta_position.col;
}
return result;
default: default:
return result; return result;
} }
@ -902,6 +925,12 @@ public size_t p_parse(p_context_t * context)
{ {
writeln("{other}"); writeln("{other}");
} }
/* A token was successfully lexed, so the input text position was
* advanced. However, this is an unexpected token, so we want to reset
* the context text position to point to the token rather than the text
* after it, so that if the caller wants to report the error position,
* it will point to the correct position of the unexpected token. */
context.text_position = token_info.position;
return P_UNEXPECTED_TOKEN; return P_UNEXPECTED_TOKEN;
} }
} }
@ -918,3 +947,16 @@ public <%= start_rule_type[1] %> p_result(p_context_t * context)
{ {
return context.parse_result.v_<%= start_rule_type[0] %>; return context.parse_result.v_<%= start_rule_type[0] %>;
} }
/**
* Get the current text input position.
*
* @param context
* Lexer/parser context structure.
*
* @return Current text position.
*/
public p_position_t p_position(p_context_t * context)
{
return context.text_position;
}

View File

@ -359,6 +359,21 @@ EOF
expect(results.status).to eq 0 expect(results.status).to eq 0
end end
it "tracks position of parser errors" do
write_grammar <<EOF
token a;
token num /\\d+/;
drop /\\s+/;
Start -> a num Start;
Start -> a num;
EOF
build_parser
compile("spec/test_error_positions.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "allows creating a JSON parser" do it "allows creating a JSON parser" do
write_grammar(File.read("spec/json_parser.propane")) write_grammar(File.read("spec/json_parser.propane"))
build_parser build_parser

View File

@ -12,6 +12,7 @@ unittest
p_context_t context; p_context_t context;
p_context_init(&context, input); p_context_init(&context, input);
assert(p_parse(&context) == P_UNEXPECTED_TOKEN); assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
assert(p_position(&context) == p_position_t(0, 1));
input = "a b"; input = "a b";
p_context_init(&context, input); p_context_init(&context, input);

View File

@ -0,0 +1,36 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = "a 42";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
input = "a\n123\na a";
p_context_init(&context, input);
assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
assert(p_position(&context) == p_position_t(2, 3));
input = "12";
p_context_init(&context, input);
assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
assert(p_position(&context) == p_position_t(0, 0));
input = "a 12\n\nab";
p_context_init(&context, input);
assert(p_parse(&context) == P_UNEXPECTED_INPUT);
assert(p_position(&context) == p_position_t(2, 1));
input = "a 12\n\na\n\n77\na \xAA";
p_context_init(&context, input);
assert(p_parse(&context) == P_DECODE_ERROR);
writeln(p_position(&context));
assert(p_position(&context) == p_position_t(5, 4));
}