Output position info for various error return codes - close #10
This commit is contained in:
parent
ce22e3465b
commit
424ddfe55a
@ -106,7 +106,7 @@ public struct p_context_t
|
|||||||
size_t input_index;
|
size_t input_index;
|
||||||
|
|
||||||
/** Input text position (row/column). */
|
/** Input text position (row/column). */
|
||||||
p_position_t input_position;
|
p_position_t text_position;
|
||||||
|
|
||||||
/** Current lexer mode. */
|
/** Current lexer mode. */
|
||||||
size_t mode;
|
size_t mode;
|
||||||
@ -443,6 +443,7 @@ private size_t find_longest_match(p_context_t * context,
|
|||||||
{
|
{
|
||||||
lexer_match_info_t longest_match;
|
lexer_match_info_t longest_match;
|
||||||
lexer_match_info_t attempt_match;
|
lexer_match_info_t attempt_match;
|
||||||
|
*out_match_info = longest_match;
|
||||||
uint current_state = lexer_mode_table[context.mode].state_table_offset;
|
uint current_state = lexer_mode_table[context.mode].state_table_offset;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
@ -506,6 +507,13 @@ private size_t find_longest_match(p_context_t * context,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case P_DECODE_ERROR:
|
||||||
|
/* If we see a decode error, we may be partially in the middle of
|
||||||
|
* matching a pattern, so return the attempted match info so that
|
||||||
|
* the input text position can be updated. */
|
||||||
|
*out_match_info = attempt_match;
|
||||||
|
return result;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -533,7 +541,7 @@ private size_t find_longest_match(p_context_t * context,
|
|||||||
private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_token_info)
|
private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_token_info)
|
||||||
{
|
{
|
||||||
p_token_info_t token_info;
|
p_token_info_t token_info;
|
||||||
token_info.position = context.input_position;
|
token_info.position = context.text_position;
|
||||||
token_info.token = INVALID_TOKEN_ID;
|
token_info.token = INVALID_TOKEN_ID;
|
||||||
*out_token_info = token_info; // TODO: remove
|
*out_token_info = token_info; // TODO: remove
|
||||||
lexer_match_info_t match_info;
|
lexer_match_info_t match_info;
|
||||||
@ -560,15 +568,16 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
|
|||||||
|
|
||||||
/* Update the input position tracking. */
|
/* Update the input position tracking. */
|
||||||
context.input_index += match_info.length;
|
context.input_index += match_info.length;
|
||||||
context.input_position.row += match_info.delta_position.row;
|
context.text_position.row += match_info.delta_position.row;
|
||||||
if (match_info.delta_position.row != 0u)
|
if (match_info.delta_position.row != 0u)
|
||||||
{
|
{
|
||||||
context.input_position.col = match_info.delta_position.col;
|
context.text_position.col = match_info.delta_position.col;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
context.input_position.col += match_info.delta_position.col;
|
context.text_position.col += match_info.delta_position.col;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (token_to_accept == INVALID_TOKEN_ID)
|
if (token_to_accept == INVALID_TOKEN_ID)
|
||||||
{
|
{
|
||||||
return P_DROP;
|
return P_DROP;
|
||||||
@ -583,6 +592,20 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
|
|||||||
*out_token_info = token_info;
|
*out_token_info = token_info;
|
||||||
return P_SUCCESS;
|
return P_SUCCESS;
|
||||||
|
|
||||||
|
case P_DECODE_ERROR:
|
||||||
|
/* Update the input position tracking. */
|
||||||
|
context.input_index += match_info.length;
|
||||||
|
context.text_position.row += match_info.delta_position.row;
|
||||||
|
if (match_info.delta_position.row != 0u)
|
||||||
|
{
|
||||||
|
context.text_position.col = match_info.delta_position.col;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
context.text_position.col += match_info.delta_position.col;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -902,6 +925,12 @@ public size_t p_parse(p_context_t * context)
|
|||||||
{
|
{
|
||||||
writeln("{other}");
|
writeln("{other}");
|
||||||
}
|
}
|
||||||
|
/* A token was successfully lexed, so the input text position was
|
||||||
|
* advanced. However, this is an unexpected token, so we want to reset
|
||||||
|
* the context text position to point to the token rather than the text
|
||||||
|
* after it, so that if the caller wants to report the error position,
|
||||||
|
* it will point to the correct position of the unexpected token. */
|
||||||
|
context.text_position = token_info.position;
|
||||||
return P_UNEXPECTED_TOKEN;
|
return P_UNEXPECTED_TOKEN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -918,3 +947,16 @@ public <%= start_rule_type[1] %> p_result(p_context_t * context)
|
|||||||
{
|
{
|
||||||
return context.parse_result.v_<%= start_rule_type[0] %>;
|
return context.parse_result.v_<%= start_rule_type[0] %>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the current text input position.
|
||||||
|
*
|
||||||
|
* @param context
|
||||||
|
* Lexer/parser context structure.
|
||||||
|
*
|
||||||
|
* @return Current text position.
|
||||||
|
*/
|
||||||
|
public p_position_t p_position(p_context_t * context)
|
||||||
|
{
|
||||||
|
return context.text_position;
|
||||||
|
}
|
||||||
|
@ -359,6 +359,21 @@ EOF
|
|||||||
expect(results.status).to eq 0
|
expect(results.status).to eq 0
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "tracks position of parser errors" do
|
||||||
|
write_grammar <<EOF
|
||||||
|
token a;
|
||||||
|
token num /\\d+/;
|
||||||
|
drop /\\s+/;
|
||||||
|
Start -> a num Start;
|
||||||
|
Start -> a num;
|
||||||
|
EOF
|
||||||
|
build_parser
|
||||||
|
compile("spec/test_error_positions.d")
|
||||||
|
results = run
|
||||||
|
expect(results.stderr).to eq ""
|
||||||
|
expect(results.status).to eq 0
|
||||||
|
end
|
||||||
|
|
||||||
it "allows creating a JSON parser" do
|
it "allows creating a JSON parser" do
|
||||||
write_grammar(File.read("spec/json_parser.propane"))
|
write_grammar(File.read("spec/json_parser.propane"))
|
||||||
build_parser
|
build_parser
|
||||||
|
@ -12,6 +12,7 @@ unittest
|
|||||||
p_context_t context;
|
p_context_t context;
|
||||||
p_context_init(&context, input);
|
p_context_init(&context, input);
|
||||||
assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
|
assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
|
||||||
|
assert(p_position(&context) == p_position_t(0, 1));
|
||||||
|
|
||||||
input = "a b";
|
input = "a b";
|
||||||
p_context_init(&context, input);
|
p_context_init(&context, input);
|
||||||
|
36
spec/test_error_positions.d
Normal file
36
spec/test_error_positions.d
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import testparser;
|
||||||
|
import std.stdio;
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unittest
|
||||||
|
{
|
||||||
|
string input = "a 42";
|
||||||
|
p_context_t context;
|
||||||
|
p_context_init(&context, input);
|
||||||
|
assert(p_parse(&context) == P_SUCCESS);
|
||||||
|
|
||||||
|
input = "a\n123\na a";
|
||||||
|
p_context_init(&context, input);
|
||||||
|
assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
|
||||||
|
assert(p_position(&context) == p_position_t(2, 3));
|
||||||
|
|
||||||
|
input = "12";
|
||||||
|
p_context_init(&context, input);
|
||||||
|
assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
|
||||||
|
assert(p_position(&context) == p_position_t(0, 0));
|
||||||
|
|
||||||
|
input = "a 12\n\nab";
|
||||||
|
p_context_init(&context, input);
|
||||||
|
assert(p_parse(&context) == P_UNEXPECTED_INPUT);
|
||||||
|
assert(p_position(&context) == p_position_t(2, 1));
|
||||||
|
|
||||||
|
input = "a 12\n\na\n\n77\na \xAA";
|
||||||
|
p_context_init(&context, input);
|
||||||
|
assert(p_parse(&context) == P_DECODE_ERROR);
|
||||||
|
writeln(p_position(&context));
|
||||||
|
assert(p_position(&context) == p_position_t(5, 4));
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user