From 424ddfe55ae4e72ec9d1e7f46b41c9dcc59e9c26 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Wed, 12 Jul 2023 19:22:44 -0400 Subject: [PATCH] Output position info for various error return codes - close #10 --- assets/parser.d.erb | 52 +++++++++++++++++-- spec/propane_spec.rb | 15 ++++++ .../test_d_parser_rule_from_multiple_states.d | 1 + spec/test_error_positions.d | 36 +++++++++++++ 4 files changed, 99 insertions(+), 5 deletions(-) create mode 100644 spec/test_error_positions.d diff --git a/assets/parser.d.erb b/assets/parser.d.erb index eea5961..5fe07dd 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -106,7 +106,7 @@ public struct p_context_t size_t input_index; /** Input text position (row/column). */ - p_position_t input_position; + p_position_t text_position; /** Current lexer mode. */ size_t mode; @@ -443,6 +443,7 @@ private size_t find_longest_match(p_context_t * context, { lexer_match_info_t longest_match; lexer_match_info_t attempt_match; + *out_match_info = longest_match; uint current_state = lexer_mode_table[context.mode].state_table_offset; for (;;) { @@ -506,6 +507,13 @@ private size_t find_longest_match(p_context_t * context, } break; + case P_DECODE_ERROR: + /* If we see a decode error, we may be partially in the middle of + * matching a pattern, so return the attempted match info so that + * the input text position can be updated. */ + *out_match_info = attempt_match; + return result; + default: return result; } @@ -533,7 +541,7 @@ private size_t find_longest_match(p_context_t * context, private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_token_info) { p_token_info_t token_info; - token_info.position = context.input_position; + token_info.position = context.text_position; token_info.token = INVALID_TOKEN_ID; *out_token_info = token_info; // TODO: remove lexer_match_info_t match_info; @@ -560,15 +568,16 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok /* Update the input position tracking. */ context.input_index += match_info.length; - context.input_position.row += match_info.delta_position.row; + context.text_position.row += match_info.delta_position.row; if (match_info.delta_position.row != 0u) { - context.input_position.col = match_info.delta_position.col; + context.text_position.col = match_info.delta_position.col; } else { - context.input_position.col += match_info.delta_position.col; + context.text_position.col += match_info.delta_position.col; } + if (token_to_accept == INVALID_TOKEN_ID) { return P_DROP; @@ -583,6 +592,20 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok *out_token_info = token_info; return P_SUCCESS; + case P_DECODE_ERROR: + /* Update the input position tracking. */ + context.input_index += match_info.length; + context.text_position.row += match_info.delta_position.row; + if (match_info.delta_position.row != 0u) + { + context.text_position.col = match_info.delta_position.col; + } + else + { + context.text_position.col += match_info.delta_position.col; + } + return result; + default: return result; } @@ -902,6 +925,12 @@ public size_t p_parse(p_context_t * context) { writeln("{other}"); } + /* A token was successfully lexed, so the input text position was + * advanced. However, this is an unexpected token, so we want to reset + * the context text position to point to the token rather than the text + * after it, so that if the caller wants to report the error position, + * it will point to the correct position of the unexpected token. */ + context.text_position = token_info.position; return P_UNEXPECTED_TOKEN; } } @@ -918,3 +947,16 @@ public <%= start_rule_type[1] %> p_result(p_context_t * context) { return context.parse_result.v_<%= start_rule_type[0] %>; } + +/** + * Get the current text input position. + * + * @param context + * Lexer/parser context structure. + * + * @return Current text position. + */ +public p_position_t p_position(p_context_t * context) +{ + return context.text_position; +} diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index f31bc14..1a13360 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -359,6 +359,21 @@ EOF expect(results.status).to eq 0 end + it "tracks position of parser errors" do + write_grammar < a num Start; +Start -> a num; +EOF + build_parser + compile("spec/test_error_positions.d") + results = run + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end + it "allows creating a JSON parser" do write_grammar(File.read("spec/json_parser.propane")) build_parser diff --git a/spec/test_d_parser_rule_from_multiple_states.d b/spec/test_d_parser_rule_from_multiple_states.d index 4671381..e861610 100644 --- a/spec/test_d_parser_rule_from_multiple_states.d +++ b/spec/test_d_parser_rule_from_multiple_states.d @@ -12,6 +12,7 @@ unittest p_context_t context; p_context_init(&context, input); assert(p_parse(&context) == P_UNEXPECTED_TOKEN); + assert(p_position(&context) == p_position_t(0, 1)); input = "a b"; p_context_init(&context, input); diff --git a/spec/test_error_positions.d b/spec/test_error_positions.d new file mode 100644 index 0000000..89bfbaa --- /dev/null +++ b/spec/test_error_positions.d @@ -0,0 +1,36 @@ +import testparser; +import std.stdio; + +int main() +{ + return 0; +} + +unittest +{ + string input = "a 42"; + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + + input = "a\n123\na a"; + p_context_init(&context, input); + assert(p_parse(&context) == P_UNEXPECTED_TOKEN); + assert(p_position(&context) == p_position_t(2, 3)); + + input = "12"; + p_context_init(&context, input); + assert(p_parse(&context) == P_UNEXPECTED_TOKEN); + assert(p_position(&context) == p_position_t(0, 0)); + + input = "a 12\n\nab"; + p_context_init(&context, input); + assert(p_parse(&context) == P_UNEXPECTED_INPUT); + assert(p_position(&context) == p_position_t(2, 1)); + + input = "a 12\n\na\n\n77\na \xAA"; + p_context_init(&context, input); + assert(p_parse(&context) == P_DECODE_ERROR); + writeln(p_position(&context)); + assert(p_position(&context) == p_position_t(5, 4)); +}