Track token position in AST Token node

This commit is contained in:
Josh Holtrop 2024-05-27 22:10:05 -04:00
parent aaeb0c4db1
commit 911e9505b7
7 changed files with 125 additions and 29 deletions

View File

@ -1,3 +1,9 @@
## v1.5.0
### New Features
- Track token position in AST Token node
## v1.4.0 ## v1.4.0
### New Features ### New Features

View File

@ -951,6 +951,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>)); <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
token_ast_node->token = token; token_ast_node->token = token;
token_ast_node->pvalue = token_info.pvalue; token_ast_node->pvalue = token_info.pvalue;
token_ast_node->position = token_info.position;
state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node; state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node;
<% else %> <% else %>
state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue; state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue;

View File

@ -49,6 +49,20 @@ public enum : <%= @grammar.prefix %>token_t
/** Code point type. */ /** Code point type. */
public alias <%= @grammar.prefix %>code_point_t = uint; public alias <%= @grammar.prefix %>code_point_t = uint;
/**
* A structure to keep track of input position.
*
* This is useful for reporting errors, etc...
*/
public struct <%= @grammar.prefix %>position_t
{
/** Input text row (0-based). */
uint row;
/** Input text column (0-based). */
uint col;
}
<% if @grammar.ast %> <% if @grammar.ast %>
/** Parser values type. */ /** Parser values type. */
public alias <%= @grammar.prefix %>value_t = <%= @grammar.ptype %>; public alias <%= @grammar.prefix %>value_t = <%= @grammar.ptype %>;
@ -68,6 +82,7 @@ public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
{ {
<%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue; <%= @grammar.prefix %>value_t pvalue;
<%= @grammar.prefix %>position_t position;
} }
<% @parser.rule_sets.each do |name, rule_set| %> <% @parser.rule_sets.each do |name, rule_set| %>
@ -89,20 +104,6 @@ public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
/** @} */ /** @} */
<% end %> <% end %>
/**
* A structure to keep track of parser position.
*
* This is useful for reporting errors, etc...
*/
public struct <%= @grammar.prefix %>position_t
{
/** Input text row (0-based). */
uint row;
/** Input text column (0-based). */
uint col;
}
/** Lexed token information. */ /** Lexed token information. */
public struct <%= @grammar.prefix %>token_info_t public struct <%= @grammar.prefix %>token_info_t
{ {
@ -996,7 +997,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
{ {
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.ast %> <% if @grammar.ast %>
<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue); <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue, token_info.position);
statevalues[$-1].ast_node = token_ast_node; statevalues[$-1].ast_node = token_ast_node;
<% else %> <% else %>
statevalues[$-1].pvalue = token_info.pvalue; statevalues[$-1].pvalue = token_info.pvalue;

View File

@ -38,6 +38,20 @@ typedef <%= get_type_for(@grammar.terminate_token_id) %> <%= @grammar.prefix %>t
/** Code point type. */ /** Code point type. */
typedef uint32_t <%= @grammar.prefix %>code_point_t; typedef uint32_t <%= @grammar.prefix %>code_point_t;
/**
* A structure to keep track of input position.
*
* This is useful for reporting errors, etc...
*/
typedef struct
{
/** Input text row (0-based). */
uint32_t row;
/** Input text column (0-based). */
uint32_t col;
} <%= @grammar.prefix %>position_t;
/** User header code blocks. */ /** User header code blocks. */
<%= @grammar.code_blocks.fetch("header", "") %> <%= @grammar.code_blocks.fetch("header", "") %>
@ -60,6 +74,7 @@ typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
{ {
<%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue; <%= @grammar.prefix %>value_t pvalue;
<%= @grammar.prefix %>position_t position;
} <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>; } <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>;
<% @parser.rule_sets.each do |name, rule_set| %> <% @parser.rule_sets.each do |name, rule_set| %>
@ -87,20 +102,6 @@ typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
/** @} */ /** @} */
<% end %> <% end %>
/**
* A structure to keep track of parser position.
*
* This is useful for reporting errors, etc...
*/
typedef struct
{
/** Input text row (0-based). */
uint32_t row;
/** Input text column (0-based). */
uint32_t col;
} <%= @grammar.prefix %>position_t;
/** Lexed token information. */ /** Lexed token information. */
typedef struct typedef struct
{ {

View File

@ -1051,6 +1051,26 @@ EOF
expect(results.stderr).to eq "" expect(results.stderr).to eq ""
expect(results.status).to eq 0 expect(results.status).to eq 0
end end
it "stores the token position in the AST Token node" do
write_grammar <<EOF
ast;
token a;
token b;
token c;
drop /\\s+/;
Start -> T T T;
T -> a;
T -> b;
T -> c;
EOF
run_propane(language: language)
compile("spec/test_ast_token_positions.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end end
end end
end end

View File

@ -0,0 +1,33 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "abc";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(0, start->pT1->pToken->position.row);
assert_eq(0, start->pT1->pToken->position.col);
assert_eq(0, start->pT2->pToken->position.row);
assert_eq(1, start->pT2->pToken->position.col);
assert_eq(0, start->pT3->pToken->position.row);
assert_eq(2, start->pT3->pToken->position.col);
input = "\n\n a\nc\n\n a";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(2, start->pT1->pToken->position.row);
assert_eq(2, start->pT1->pToken->position.col);
assert_eq(3, start->pT2->pToken->position.row);
assert_eq(0, start->pT2->pToken->position.col);
assert_eq(5, start->pT3->pToken->position.row);
assert_eq(5, start->pT3->pToken->position.col);
return 0;
}

View File

@ -0,0 +1,34 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "abc";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(0, start.pT1.pToken.position.row);
assert_eq(0, start.pT1.pToken.position.col);
assert_eq(0, start.pT2.pToken.position.row);
assert_eq(1, start.pT2.pToken.position.col);
assert_eq(0, start.pT3.pToken.position.row);
assert_eq(2, start.pT3.pToken.position.col);
input = "\n\n a\nc\n\n a";
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(2, start.pT1.pToken.position.row);
assert_eq(2, start.pT1.pToken.position.col);
assert_eq(3, start.pT2.pToken.position.row);
assert_eq(0, start.pT2.pToken.position.col);
assert_eq(5, start.pT3.pToken.position.row);
assert_eq(5, start.pT3.pToken.position.col);
}