Output position info for various error return codes - close #10

2023-07-12 19:22:44 -04:00 · 2023-07-12 19:22:44 -04:00 · 424ddfe55a
commit 424ddfe55a
parent ce22e3465b
4 changed files with 99 additions and 5 deletions
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -106,7 +106,7 @@ public struct p_context_t
    size_t input_index;
    /** Input text position (row/column). */
-    p_position_t input_position;
+    p_position_t text_position;
    /** Current lexer mode. */
    size_t mode;
@ -443,6 +443,7 @@ private size_t find_longest_match(p_context_t * context,
 {
    lexer_match_info_t longest_match;
    lexer_match_info_t attempt_match;
    *out_match_info = longest_match;
    uint current_state = lexer_mode_table[context.mode].state_table_offset;
    for (;;)
    {
@ -506,6 +507,13 @@ private size_t find_longest_match(p_context_t * context,
            }
            break;
        case P_DECODE_ERROR:
            /* If we see a decode error, we may be partially in the middle of
             * matching a pattern, so return the attempted match info so that
             * the input text position can be updated. */
            *out_match_info = attempt_match;
            return result;
        default:
            return result;
        }
@ -533,7 +541,7 @@ private size_t find_longest_match(p_context_t * context,
 private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_token_info)
 {
    p_token_info_t token_info;
-    token_info.position = context.input_position;
+    token_info.position = context.text_position;
    token_info.token = INVALID_TOKEN_ID;
    *out_token_info = token_info; // TODO: remove
    lexer_match_info_t match_info;
@ -560,15 +568,16 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
        /* Update the input position tracking. */
        context.input_index += match_info.length;
-        context.input_position.row += match_info.delta_position.row;
+        context.text_position.row += match_info.delta_position.row;
        if (match_info.delta_position.row != 0u)
        {
-            context.input_position.col = match_info.delta_position.col;
+            context.text_position.col = match_info.delta_position.col;
        }
        else
        {
-            context.input_position.col += match_info.delta_position.col;
+            context.text_position.col += match_info.delta_position.col;
        }
        if (token_to_accept == INVALID_TOKEN_ID)
        {
            return P_DROP;
@ -583,6 +592,20 @@ private size_t attempt_lex_token(p_context_t * context, p_token_info_t * out_tok
        *out_token_info = token_info;
        return P_SUCCESS;
    case P_DECODE_ERROR:
        /* Update the input position tracking. */
        context.input_index += match_info.length;
        context.text_position.row += match_info.delta_position.row;
        if (match_info.delta_position.row != 0u)
        {
            context.text_position.col = match_info.delta_position.col;
        }
        else
        {
            context.text_position.col += match_info.delta_position.col;
        }
        return result;
    default:
        return result;
    }
@ -902,6 +925,12 @@ public size_t p_parse(p_context_t * context)
        {
            writeln("{other}");
        }
        /* A token was successfully lexed, so the input text position was
         * advanced. However, this is an unexpected token, so we want to reset
         * the context text position to point to the token rather than the text
         * after it, so that if the caller wants to report the error position,
         * it will point to the correct position of the unexpected token. */
        context.text_position = token_info.position;
        return P_UNEXPECTED_TOKEN;
    }
 }
@ -918,3 +947,16 @@ public <%= start_rule_type[1] %> p_result(p_context_t * context)
 {
    return context.parse_result.v_<%= start_rule_type[0] %>;
 }
 /**
 * Get the current text input position.
 *
 * @param context
 *   Lexer/parser context structure.
 *
 * @return Current text position.
 */
 public p_position_t p_position(p_context_t * context)
 {
    return context.text_position;
 }
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -359,6 +359,21 @@ EOF
    expect(results.status).to eq 0
  end
  it "tracks position of parser errors" do
    write_grammar <<EOF
 token a;
 token num /\\d+/;
 drop /\\s+/;
 Start -> a num Start;
 Start -> a num;
 EOF
    build_parser
    compile("spec/test_error_positions.d")
    results = run
    expect(results.stderr).to eq ""
    expect(results.status).to eq 0
  end
  it "allows creating a JSON parser" do
    write_grammar(File.read("spec/json_parser.propane"))
    build_parser
--- a/spec/test_d_parser_rule_from_multiple_states.d
+++ b/spec/test_d_parser_rule_from_multiple_states.d
@ -12,6 +12,7 @@ unittest
    p_context_t context;
    p_context_init(&context, input);
    assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
    assert(p_position(&context) == p_position_t(0, 1));
    input = "a b";
    p_context_init(&context, input);
--- a/spec/test_error_positions.d
+++ b/spec/test_error_positions.d
@ -0,0 +1,36 @@
 import testparser;
 import std.stdio;
 int main()
 {
    return 0;
 }
 unittest
 {
    string input = "a 42";
    p_context_t context;
    p_context_init(&context, input);
    assert(p_parse(&context) == P_SUCCESS);
    input = "a\n123\na  a";
    p_context_init(&context, input);
    assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
    assert(p_position(&context) == p_position_t(2, 3));
    input = "12";
    p_context_init(&context, input);
    assert(p_parse(&context) == P_UNEXPECTED_TOKEN);
    assert(p_position(&context) == p_position_t(0, 0));
    input = "a 12\n\nab";
    p_context_init(&context, input);
    assert(p_parse(&context) == P_UNEXPECTED_INPUT);
    assert(p_position(&context) == p_position_t(2, 1));
    input = "a 12\n\na\n\n77\na   \xAA";
    p_context_init(&context, input);
    assert(p_parse(&context) == P_DECODE_ERROR);
    writeln(p_position(&context));
    assert(p_position(&context) == p_position_t(5, 4));
 }