Track start and end position of rules in AST nodes - #27

This commit is contained in:
Josh Holtrop 2024-07-19 15:37:37 -04:00
parent f4ae1b8601
commit e647248e34
5 changed files with 143 additions and 14 deletions

View File

@ -972,10 +972,10 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.ast %> <% if @grammar.ast %>
<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>)); <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
token_ast_node->token = token;
token_ast_node->pvalue = token_info.pvalue;
token_ast_node->position = token_info.position; token_ast_node->position = token_info.position;
token_ast_node->end_position = token_info.end_position; token_ast_node->end_position = token_info.end_position;
token_ast_node->token = token;
token_ast_node->pvalue = token_info.pvalue;
state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node; state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node;
<% else %> <% else %>
state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue; state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue;
@ -1010,6 +1010,8 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{ {
size_t n_fields = parser_reduce_table[reduce_index].rule_set_node_field_array_size; size_t n_fields = parser_reduce_table[reduce_index].rule_set_node_field_array_size;
ASTNode * node = (ASTNode *)malloc(sizeof(ASTNode) + n_fields * sizeof(void *)); ASTNode * node = (ASTNode *)malloc(sizeof(ASTNode) + n_fields * sizeof(void *));
node->position = INVALID_POSITION;
node->end_position = INVALID_POSITION;
for (size_t i = 0; i < n_fields; i++) for (size_t i = 0; i < n_fields; i++)
{ {
node->fields[i] = NULL; node->fields[i] = NULL;
@ -1028,6 +1030,20 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
node->fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = state_values_stack_index(&statevalues, -(int)parser_reduce_table[reduce_index].n_states + (int)i)->ast_node; node->fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = state_values_stack_index(&statevalues, -(int)parser_reduce_table[reduce_index].n_states + (int)i)->ast_node;
} }
} }
bool position_found = false;
for (size_t i = 0; i < n_fields; i++)
{
ASTNode * child = (ASTNode *)node->fields[i];
if ((child != NULL) && <%= @grammar.prefix %>position_valid(child->position))
{
if (!position_found)
{
node->position = child->position;
position_found = true;
}
node->end_position = child->end_position;
}
}
reduced_parser_node = node; reduced_parser_node = node;
} }
else else

View File

@ -63,6 +63,15 @@ public struct <%= @grammar.prefix %>position_t
/** Input text column (0-based). */ /** Input text column (0-based). */
uint col; uint col;
/** Invalid position value. */
enum INVALID = <%= @grammar.prefix %>position_t(0xFFFF_FFFF, 0xFFFF_FFFF);
/** Return whether the position is valid. */
public @property bool valid()
{
return row != 0xFFFF_FFFFu;
}
} }
<% if @grammar.ast %> <% if @grammar.ast %>
@ -79,15 +88,6 @@ public union <%= @grammar.prefix %>value_t
<% end %> <% end %>
<% if @grammar.ast %> <% if @grammar.ast %>
/** AST node types. @{ */
public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
{
<%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue;
<%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position;
}
/** Common AST node structure. */ /** Common AST node structure. */
private struct ASTNode private struct ASTNode
{ {
@ -96,6 +96,16 @@ private struct ASTNode
void *[0] fields; void *[0] fields;
} }
/** AST node types. @{ */
public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
{
/* ASTNode fields must be present in the same order here. */
<%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position;
<%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue;
}
<% @parser.rule_sets.each do |name, rule_set| %> <% @parser.rule_sets.each do |name, rule_set| %>
<% next if name.start_with?("$") %> <% next if name.start_with?("$") %>
<% next if rule_set.optional? %> <% next if rule_set.optional? %>
@ -1028,7 +1038,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
{ {
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.ast %> <% if @grammar.ast %>
<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue, token_info.position, token_info.end_position); <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token_info.position, token_info.end_position, token, token_info.pvalue);
statevalues[$-1].ast_node = token_ast_node; statevalues[$-1].ast_node = token_ast_node;
<% else %> <% else %>
statevalues[$-1].pvalue = token_info.pvalue; statevalues[$-1].pvalue = token_info.pvalue;
@ -1063,6 +1073,8 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
{ {
size_t n_fields = parser_reduce_table[reduce_index].rule_set_node_field_array_size; size_t n_fields = parser_reduce_table[reduce_index].rule_set_node_field_array_size;
ASTNode * node = cast(ASTNode *)malloc(ASTNode.sizeof + n_fields * (void *).sizeof); ASTNode * node = cast(ASTNode *)malloc(ASTNode.sizeof + n_fields * (void *).sizeof);
node.position = <%= @grammar.prefix %>position_t.INVALID;
node.end_position = <%= @grammar.prefix %>position_t.INVALID;
foreach (i; 0..n_fields) foreach (i; 0..n_fields)
{ {
node.fields[i] = null; node.fields[i] = null;
@ -1081,6 +1093,20 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
node.fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = statevalues[$ - parser_reduce_table[reduce_index].n_states + i].ast_node; node.fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = statevalues[$ - parser_reduce_table[reduce_index].n_states + i].ast_node;
} }
} }
bool position_found = false;
foreach (i; 0..n_fields)
{
ASTNode * child = cast(ASTNode *)node.fields[i];
if (child && child.position.valid)
{
if (!position_found)
{
node.position = child.position;
position_found = true;
}
node.end_position = child.end_position;
}
}
reduced_parser_node = node; reduced_parser_node = node;
} }
else else

View File

@ -52,6 +52,12 @@ typedef struct
uint32_t col; uint32_t col;
} <%= @grammar.prefix %>position_t; } <%= @grammar.prefix %>position_t;
/** Invalid position value. */
#define INVALID_POSITION (<%= @grammar.prefix %>position_t){0xFFFFFFFFu, 0xFFFFFFFFu}
/** Return whether the position is valid. */
#define <%= @grammar.prefix %>position_valid(p) ((p).row != 0xFFFFFFFFu)
/** User header code blocks. */ /** User header code blocks. */
<%= @grammar.code_blocks.fetch("header", "") %> <%= @grammar.code_blocks.fetch("header", "") %>
@ -72,10 +78,11 @@ typedef union
/** AST node types. @{ */ /** AST node types. @{ */
typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
{ {
<%= @grammar.prefix %>token_t token; /* ASTNode fields must be present in the same order here. */
<%= @grammar.prefix %>value_t pvalue;
<%= @grammar.prefix %>position_t position; <%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position; <%= @grammar.prefix %>position_t end_position;
<%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue;
} <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>; } <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>;
<% @parser.rule_sets.each do |name, rule_set| %> <% @parser.rule_sets.each do |name, rule_set| %>

View File

@ -10,35 +10,75 @@ int main()
p_context_init(&context, (uint8_t const *)input, strlen(input)); p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS); assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context); Start * start = p_result(&context);
assert_eq(0, start->pT1->pToken->position.row); assert_eq(0, start->pT1->pToken->position.row);
assert_eq(0, start->pT1->pToken->position.col); assert_eq(0, start->pT1->pToken->position.col);
assert_eq(0, start->pT1->pToken->end_position.row); assert_eq(0, start->pT1->pToken->end_position.row);
assert_eq(0, start->pT1->pToken->end_position.col); assert_eq(0, start->pT1->pToken->end_position.col);
assert_eq(0, start->pT1->position.row);
assert_eq(0, start->pT1->position.col);
assert_eq(0, start->pT1->end_position.row);
assert_eq(0, start->pT1->end_position.col);
assert_eq(0, start->pT2->pToken->position.row); assert_eq(0, start->pT2->pToken->position.row);
assert_eq(1, start->pT2->pToken->position.col); assert_eq(1, start->pT2->pToken->position.col);
assert_eq(0, start->pT2->pToken->end_position.row); assert_eq(0, start->pT2->pToken->end_position.row);
assert_eq(2, start->pT2->pToken->end_position.col); assert_eq(2, start->pT2->pToken->end_position.col);
assert_eq(0, start->pT2->position.row);
assert_eq(1, start->pT2->position.col);
assert_eq(0, start->pT2->end_position.row);
assert_eq(2, start->pT2->end_position.col);
assert_eq(0, start->pT3->pToken->position.row); assert_eq(0, start->pT3->pToken->position.row);
assert_eq(3, start->pT3->pToken->position.col); assert_eq(3, start->pT3->pToken->position.col);
assert_eq(0, start->pT3->pToken->end_position.row); assert_eq(0, start->pT3->pToken->end_position.row);
assert_eq(5, start->pT3->pToken->end_position.col); assert_eq(5, start->pT3->pToken->end_position.col);
assert_eq(0, start->pT3->position.row);
assert_eq(3, start->pT3->position.col);
assert_eq(0, start->pT3->end_position.row);
assert_eq(5, start->pT3->end_position.col);
assert_eq(0, start->position.row);
assert_eq(0, start->position.col);
assert_eq(0, start->end_position.row);
assert_eq(5, start->end_position.col);
input = "\n\n bb\nc\ncc\n\n a"; input = "\n\n bb\nc\ncc\n\n a";
p_context_init(&context, (uint8_t const *)input, strlen(input)); p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS); assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context); start = p_result(&context);
assert_eq(2, start->pT1->pToken->position.row); assert_eq(2, start->pT1->pToken->position.row);
assert_eq(2, start->pT1->pToken->position.col); assert_eq(2, start->pT1->pToken->position.col);
assert_eq(2, start->pT1->pToken->end_position.row); assert_eq(2, start->pT1->pToken->end_position.row);
assert_eq(3, start->pT1->pToken->end_position.col); assert_eq(3, start->pT1->pToken->end_position.col);
assert_eq(2, start->pT1->position.row);
assert_eq(2, start->pT1->position.col);
assert_eq(2, start->pT1->end_position.row);
assert_eq(3, start->pT1->end_position.col);
assert_eq(3, start->pT2->pToken->position.row); assert_eq(3, start->pT2->pToken->position.row);
assert_eq(0, start->pT2->pToken->position.col); assert_eq(0, start->pT2->pToken->position.col);
assert_eq(4, start->pT2->pToken->end_position.row); assert_eq(4, start->pT2->pToken->end_position.row);
assert_eq(1, start->pT2->pToken->end_position.col); assert_eq(1, start->pT2->pToken->end_position.col);
assert_eq(3, start->pT2->position.row);
assert_eq(0, start->pT2->position.col);
assert_eq(4, start->pT2->end_position.row);
assert_eq(1, start->pT2->end_position.col);
assert_eq(6, start->pT3->pToken->position.row); assert_eq(6, start->pT3->pToken->position.row);
assert_eq(5, start->pT3->pToken->position.col); assert_eq(5, start->pT3->pToken->position.col);
assert_eq(6, start->pT3->pToken->end_position.row); assert_eq(6, start->pT3->pToken->end_position.row);
assert_eq(5, start->pT3->pToken->end_position.col); assert_eq(5, start->pT3->pToken->end_position.col);
assert_eq(6, start->pT3->position.row);
assert_eq(5, start->pT3->position.col);
assert_eq(6, start->pT3->end_position.row);
assert_eq(5, start->pT3->end_position.col);
assert_eq(2, start->position.row);
assert_eq(2, start->position.col);
assert_eq(6, start->end_position.row);
assert_eq(5, start->end_position.col);
return 0; return 0;
} }

View File

@ -14,33 +14,73 @@ unittest
p_context_init(&context, input); p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS); assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context); Start * start = p_result(&context);
assert_eq(0, start.pT1.pToken.position.row); assert_eq(0, start.pT1.pToken.position.row);
assert_eq(0, start.pT1.pToken.position.col); assert_eq(0, start.pT1.pToken.position.col);
assert_eq(0, start.pT1.pToken.end_position.row); assert_eq(0, start.pT1.pToken.end_position.row);
assert_eq(0, start.pT1.pToken.end_position.col); assert_eq(0, start.pT1.pToken.end_position.col);
assert_eq(0, start.pT1.position.row);
assert_eq(0, start.pT1.position.col);
assert_eq(0, start.pT1.end_position.row);
assert_eq(0, start.pT1.end_position.col);
assert_eq(0, start.pT2.pToken.position.row); assert_eq(0, start.pT2.pToken.position.row);
assert_eq(1, start.pT2.pToken.position.col); assert_eq(1, start.pT2.pToken.position.col);
assert_eq(0, start.pT2.pToken.end_position.row); assert_eq(0, start.pT2.pToken.end_position.row);
assert_eq(2, start.pT2.pToken.end_position.col); assert_eq(2, start.pT2.pToken.end_position.col);
assert_eq(0, start.pT2.position.row);
assert_eq(1, start.pT2.position.col);
assert_eq(0, start.pT2.end_position.row);
assert_eq(2, start.pT2.end_position.col);
assert_eq(0, start.pT3.pToken.position.row); assert_eq(0, start.pT3.pToken.position.row);
assert_eq(3, start.pT3.pToken.position.col); assert_eq(3, start.pT3.pToken.position.col);
assert_eq(0, start.pT3.pToken.end_position.row); assert_eq(0, start.pT3.pToken.end_position.row);
assert_eq(5, start.pT3.pToken.end_position.col); assert_eq(5, start.pT3.pToken.end_position.col);
assert_eq(0, start.pT3.position.row);
assert_eq(3, start.pT3.position.col);
assert_eq(0, start.pT3.end_position.row);
assert_eq(5, start.pT3.end_position.col);
assert_eq(0, start.position.row);
assert_eq(0, start.position.col);
assert_eq(0, start.end_position.row);
assert_eq(5, start.end_position.col);
input = "\n\n bb\nc\ncc\n\n a"; input = "\n\n bb\nc\ncc\n\n a";
p_context_init(&context, input); p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS); assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context); start = p_result(&context);
assert_eq(2, start.pT1.pToken.position.row); assert_eq(2, start.pT1.pToken.position.row);
assert_eq(2, start.pT1.pToken.position.col); assert_eq(2, start.pT1.pToken.position.col);
assert_eq(2, start.pT1.pToken.end_position.row); assert_eq(2, start.pT1.pToken.end_position.row);
assert_eq(3, start.pT1.pToken.end_position.col); assert_eq(3, start.pT1.pToken.end_position.col);
assert_eq(2, start.pT1.position.row);
assert_eq(2, start.pT1.position.col);
assert_eq(2, start.pT1.end_position.row);
assert_eq(3, start.pT1.end_position.col);
assert_eq(3, start.pT2.pToken.position.row); assert_eq(3, start.pT2.pToken.position.row);
assert_eq(0, start.pT2.pToken.position.col); assert_eq(0, start.pT2.pToken.position.col);
assert_eq(4, start.pT2.pToken.end_position.row); assert_eq(4, start.pT2.pToken.end_position.row);
assert_eq(1, start.pT2.pToken.end_position.col); assert_eq(1, start.pT2.pToken.end_position.col);
assert_eq(3, start.pT2.position.row);
assert_eq(0, start.pT2.position.col);
assert_eq(4, start.pT2.end_position.row);
assert_eq(1, start.pT2.end_position.col);
assert_eq(6, start.pT3.pToken.position.row); assert_eq(6, start.pT3.pToken.position.row);
assert_eq(5, start.pT3.pToken.position.col); assert_eq(5, start.pT3.pToken.position.col);
assert_eq(6, start.pT3.pToken.end_position.row); assert_eq(6, start.pT3.pToken.end_position.row);
assert_eq(5, start.pT3.pToken.end_position.col); assert_eq(5, start.pT3.pToken.end_position.col);
assert_eq(6, start.pT3.position.row);
assert_eq(5, start.pT3.position.col);
assert_eq(6, start.pT3.end_position.row);
assert_eq(5, start.pT3.end_position.col);
assert_eq(2, start.position.row);
assert_eq(2, start.position.col);
assert_eq(6, start.end_position.row);
assert_eq(5, start.end_position.col);
} }