Track token position in AST Token node

2024-05-27 22:10:05 -04:00 · 2024-05-27 22:10:05 -04:00 · 911e9505b7
commit 911e9505b7
parent aaeb0c4db1
7 changed files with 125 additions and 29 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,3 +1,9 @@
+## v1.5.0
+
+### New Features
+
+- Track token position in AST Token node
+
 ## v1.4.0

 ### New Features
--- a/assets/parser.c.erb
+++ b/assets/parser.c.erb
@ -951,6 +951,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
                <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
                token_ast_node->token = token;
                token_ast_node->pvalue = token_info.pvalue;
+                token_ast_node->position = token_info.position;
                state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node;
 <% else %>
                state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue;
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -49,6 +49,20 @@ public enum : <%= @grammar.prefix %>token_t
 /** Code point type. */
 public alias <%= @grammar.prefix %>code_point_t = uint;

+/**
+ * A structure to keep track of input position.
+ *
+ * This is useful for reporting errors, etc...
+ */
+public struct <%= @grammar.prefix %>position_t
+{
+    /** Input text row (0-based). */
+    uint row;
+
+    /** Input text column (0-based). */
+    uint col;
+}
+
 <% if @grammar.ast %>
 /** Parser values type. */
 public alias <%= @grammar.prefix %>value_t = <%= @grammar.ptype %>;
@ -68,6 +82,7 @@ public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
 {
    <%= @grammar.prefix %>token_t token;
    <%= @grammar.prefix %>value_t pvalue;
+    <%= @grammar.prefix %>position_t position;
 }

 <%   @parser.rule_sets.each do |name, rule_set| %>
@ -89,20 +104,6 @@ public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
 /** @} */
 <% end %>

-/**
- * A structure to keep track of parser position.
- *
- * This is useful for reporting errors, etc...
- */
-public struct <%= @grammar.prefix %>position_t
-{
-    /** Input text row (0-based). */
-    uint row;
-
-    /** Input text column (0-based). */
-    uint col;
-}
-
 /** Lexed token information. */
 public struct <%= @grammar.prefix %>token_info_t
 {
@ -996,7 +997,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
            {
                /* We shifted a token, mark it consumed. */
 <% if @grammar.ast %>
-                <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue);
+                <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue, token_info.position);
                statevalues[$-1].ast_node = token_ast_node;
 <% else %>
                statevalues[$-1].pvalue = token_info.pvalue;
--- a/assets/parser.h.erb
+++ b/assets/parser.h.erb
@ -38,6 +38,20 @@ typedef <%= get_type_for(@grammar.terminate_token_id) %> <%= @grammar.prefix %>t
 /** Code point type. */
 typedef uint32_t <%= @grammar.prefix %>code_point_t;

+/**
+ * A structure to keep track of input position.
+ *
+ * This is useful for reporting errors, etc...
+ */
+typedef struct
+{
+    /** Input text row (0-based). */
+    uint32_t row;
+
+    /** Input text column (0-based). */
+    uint32_t col;
+} <%= @grammar.prefix %>position_t;
+
 /** User header code blocks. */
 <%= @grammar.code_blocks.fetch("header", "") %>

@ -60,6 +74,7 @@ typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
 {
    <%= @grammar.prefix %>token_t token;
    <%= @grammar.prefix %>value_t pvalue;
+    <%= @grammar.prefix %>position_t position;
 } <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>;

 <%   @parser.rule_sets.each do |name, rule_set| %>
@ -87,20 +102,6 @@ typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
 /** @} */
 <% end %>

-/**
- * A structure to keep track of parser position.
- *
- * This is useful for reporting errors, etc...
- */
-typedef struct
-{
-    /** Input text row (0-based). */
-    uint32_t row;
-
-    /** Input text column (0-based). */
-    uint32_t col;
-} <%= @grammar.prefix %>position_t;
-
 /** Lexed token information. */
 typedef struct
 {
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -1051,6 +1051,26 @@ EOF
        expect(results.stderr).to eq ""
        expect(results.status).to eq 0
      end
+
+      it "stores the token position in the AST Token node" do
+        write_grammar <<EOF
+ast;
+
+token a;
+token b;
+token c;
+drop /\\s+/;
+Start -> T T T;
+T -> a;
+T -> b;
+T -> c;
+EOF
+        run_propane(language: language)
+        compile("spec/test_ast_token_positions.#{language}", language: language)
+        results = run_test
+        expect(results.stderr).to eq ""
+        expect(results.status).to eq 0
+      end
    end
  end
 end
--- a/spec/test_ast_token_positions.c
+++ b/spec/test_ast_token_positions.c
@ -0,0 +1,33 @@
+#include "testparser.h"
+#include <assert.h>
+#include <string.h>
+#include "testutils.h"
+
+int main()
+{
+    char const * input = "abc";
+    p_context_t context;
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert(p_parse(&context) == P_SUCCESS);
+    Start * start = p_result(&context);
+    assert_eq(0, start->pT1->pToken->position.row);
+    assert_eq(0, start->pT1->pToken->position.col);
+    assert_eq(0, start->pT2->pToken->position.row);
+    assert_eq(1, start->pT2->pToken->position.col);
+    assert_eq(0, start->pT3->pToken->position.row);
+    assert_eq(2, start->pT3->pToken->position.col);
+
+    input = "\n\n  a\nc\n\n     a";
+    p_context_init(&context, (uint8_t const *)input, strlen(input));
+    assert(p_parse(&context) == P_SUCCESS);
+    start = p_result(&context);
+    assert_eq(2, start->pT1->pToken->position.row);
+    assert_eq(2, start->pT1->pToken->position.col);
+    assert_eq(3, start->pT2->pToken->position.row);
+    assert_eq(0, start->pT2->pToken->position.col);
+    assert_eq(5, start->pT3->pToken->position.row);
+    assert_eq(5, start->pT3->pToken->position.col);
+
+    return 0;
+}
+
--- a/spec/test_ast_token_positions.d
+++ b/spec/test_ast_token_positions.d
@ -0,0 +1,34 @@
+import testparser;
+import std.stdio;
+import testutils;
+
+int main()
+{
+    return 0;
+}
+
+unittest
+{
+    string input = "abc";
+    p_context_t context;
+    p_context_init(&context, input);
+    assert(p_parse(&context) == P_SUCCESS);
+    Start * start = p_result(&context);
+    assert_eq(0, start.pT1.pToken.position.row);
+    assert_eq(0, start.pT1.pToken.position.col);
+    assert_eq(0, start.pT2.pToken.position.row);
+    assert_eq(1, start.pT2.pToken.position.col);
+    assert_eq(0, start.pT3.pToken.position.row);
+    assert_eq(2, start.pT3.pToken.position.col);
+
+    input = "\n\n  a\nc\n\n     a";
+    p_context_init(&context, input);
+    assert(p_parse(&context) == P_SUCCESS);
+    start = p_result(&context);
+    assert_eq(2, start.pT1.pToken.position.row);
+    assert_eq(2, start.pT1.pToken.position.col);
+    assert_eq(3, start.pT2.pToken.position.row);
+    assert_eq(0, start.pT2.pToken.position.col);
+    assert_eq(5, start.pT3.pToken.position.row);
+    assert_eq(5, start.pT3.pToken.position.col);
+}