diff --git a/assets/parser.c.erb b/assets/parser.c.erb index ec05d77..6bba016 100644 --- a/assets/parser.c.erb +++ b/assets/parser.c.erb @@ -972,10 +972,10 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) /* We shifted a token, mark it consumed. */ <% if @grammar.ast %> <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>)); - token_ast_node->token = token; - token_ast_node->pvalue = token_info.pvalue; token_ast_node->position = token_info.position; token_ast_node->end_position = token_info.end_position; + token_ast_node->token = token; + token_ast_node->pvalue = token_info.pvalue; state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node; <% else %> state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue; @@ -1010,6 +1010,8 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) { size_t n_fields = parser_reduce_table[reduce_index].rule_set_node_field_array_size; ASTNode * node = (ASTNode *)malloc(sizeof(ASTNode) + n_fields * sizeof(void *)); + node->position = INVALID_POSITION; + node->end_position = INVALID_POSITION; for (size_t i = 0; i < n_fields; i++) { node->fields[i] = NULL; @@ -1028,6 +1030,20 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) node->fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = state_values_stack_index(&statevalues, -(int)parser_reduce_table[reduce_index].n_states + (int)i)->ast_node; } } + bool position_found = false; + for (size_t i = 0; i < n_fields; i++) + { + ASTNode * child = (ASTNode *)node->fields[i]; + if ((child != NULL) && <%= @grammar.prefix %>position_valid(child->position)) + { + if (!position_found) + { + node->position = child->position; + position_found = true; + } + node->end_position = child->end_position; + } + } reduced_parser_node = node; } else diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 9ec5cb0..4dac5bd 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -63,6 +63,15 @@ public struct <%= @grammar.prefix %>position_t /** Input text column (0-based). */ uint col; + + /** Invalid position value. */ + enum INVALID = <%= @grammar.prefix %>position_t(0xFFFF_FFFF, 0xFFFF_FFFF); + + /** Return whether the position is valid. */ + public @property bool valid() + { + return row != 0xFFFF_FFFFu; + } } <% if @grammar.ast %> @@ -79,15 +88,6 @@ public union <%= @grammar.prefix %>value_t <% end %> <% if @grammar.ast %> -/** AST node types. @{ */ -public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> -{ - <%= @grammar.prefix %>token_t token; - <%= @grammar.prefix %>value_t pvalue; - <%= @grammar.prefix %>position_t position; - <%= @grammar.prefix %>position_t end_position; -} - /** Common AST node structure. */ private struct ASTNode { @@ -96,6 +96,16 @@ private struct ASTNode void *[0] fields; } +/** AST node types. @{ */ +public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> +{ + /* ASTNode fields must be present in the same order here. */ + <%= @grammar.prefix %>position_t position; + <%= @grammar.prefix %>position_t end_position; + <%= @grammar.prefix %>token_t token; + <%= @grammar.prefix %>value_t pvalue; +} + <% @parser.rule_sets.each do |name, rule_set| %> <% next if name.start_with?("$") %> <% next if rule_set.optional? %> @@ -1028,7 +1038,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont { /* We shifted a token, mark it consumed. */ <% if @grammar.ast %> - <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue, token_info.position, token_info.end_position); + <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token_info.position, token_info.end_position, token, token_info.pvalue); statevalues[$-1].ast_node = token_ast_node; <% else %> statevalues[$-1].pvalue = token_info.pvalue; @@ -1063,6 +1073,8 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont { size_t n_fields = parser_reduce_table[reduce_index].rule_set_node_field_array_size; ASTNode * node = cast(ASTNode *)malloc(ASTNode.sizeof + n_fields * (void *).sizeof); + node.position = <%= @grammar.prefix %>position_t.INVALID; + node.end_position = <%= @grammar.prefix %>position_t.INVALID; foreach (i; 0..n_fields) { node.fields[i] = null; @@ -1081,6 +1093,20 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont node.fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = statevalues[$ - parser_reduce_table[reduce_index].n_states + i].ast_node; } } + bool position_found = false; + foreach (i; 0..n_fields) + { + ASTNode * child = cast(ASTNode *)node.fields[i]; + if (child && child.position.valid) + { + if (!position_found) + { + node.position = child.position; + position_found = true; + } + node.end_position = child.end_position; + } + } reduced_parser_node = node; } else diff --git a/assets/parser.h.erb b/assets/parser.h.erb index a7f1744..6b14593 100644 --- a/assets/parser.h.erb +++ b/assets/parser.h.erb @@ -52,6 +52,12 @@ typedef struct uint32_t col; } <%= @grammar.prefix %>position_t; +/** Invalid position value. */ +#define INVALID_POSITION (<%= @grammar.prefix %>position_t){0xFFFFFFFFu, 0xFFFFFFFFu} + +/** Return whether the position is valid. */ +#define <%= @grammar.prefix %>position_valid(p) ((p).row != 0xFFFFFFFFu) + /** User header code blocks. */ <%= @grammar.code_blocks.fetch("header", "") %> @@ -72,10 +78,11 @@ typedef union /** AST node types. @{ */ typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> { - <%= @grammar.prefix %>token_t token; - <%= @grammar.prefix %>value_t pvalue; + /* ASTNode fields must be present in the same order here. */ <%= @grammar.prefix %>position_t position; <%= @grammar.prefix %>position_t end_position; + <%= @grammar.prefix %>token_t token; + <%= @grammar.prefix %>value_t pvalue; } <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>; <% @parser.rule_sets.each do |name, rule_set| %> diff --git a/spec/test_ast_token_positions.c b/spec/test_ast_token_positions.c index 71e93e6..09c835b 100644 --- a/spec/test_ast_token_positions.c +++ b/spec/test_ast_token_positions.c @@ -10,35 +10,75 @@ int main() p_context_init(&context, (uint8_t const *)input, strlen(input)); assert(p_parse(&context) == P_SUCCESS); Start * start = p_result(&context); + assert_eq(0, start->pT1->pToken->position.row); assert_eq(0, start->pT1->pToken->position.col); assert_eq(0, start->pT1->pToken->end_position.row); assert_eq(0, start->pT1->pToken->end_position.col); + assert_eq(0, start->pT1->position.row); + assert_eq(0, start->pT1->position.col); + assert_eq(0, start->pT1->end_position.row); + assert_eq(0, start->pT1->end_position.col); + assert_eq(0, start->pT2->pToken->position.row); assert_eq(1, start->pT2->pToken->position.col); assert_eq(0, start->pT2->pToken->end_position.row); assert_eq(2, start->pT2->pToken->end_position.col); + assert_eq(0, start->pT2->position.row); + assert_eq(1, start->pT2->position.col); + assert_eq(0, start->pT2->end_position.row); + assert_eq(2, start->pT2->end_position.col); + assert_eq(0, start->pT3->pToken->position.row); assert_eq(3, start->pT3->pToken->position.col); assert_eq(0, start->pT3->pToken->end_position.row); assert_eq(5, start->pT3->pToken->end_position.col); + assert_eq(0, start->pT3->position.row); + assert_eq(3, start->pT3->position.col); + assert_eq(0, start->pT3->end_position.row); + assert_eq(5, start->pT3->end_position.col); + + assert_eq(0, start->position.row); + assert_eq(0, start->position.col); + assert_eq(0, start->end_position.row); + assert_eq(5, start->end_position.col); input = "\n\n bb\nc\ncc\n\n a"; p_context_init(&context, (uint8_t const *)input, strlen(input)); assert(p_parse(&context) == P_SUCCESS); start = p_result(&context); + assert_eq(2, start->pT1->pToken->position.row); assert_eq(2, start->pT1->pToken->position.col); assert_eq(2, start->pT1->pToken->end_position.row); assert_eq(3, start->pT1->pToken->end_position.col); + assert_eq(2, start->pT1->position.row); + assert_eq(2, start->pT1->position.col); + assert_eq(2, start->pT1->end_position.row); + assert_eq(3, start->pT1->end_position.col); + assert_eq(3, start->pT2->pToken->position.row); assert_eq(0, start->pT2->pToken->position.col); assert_eq(4, start->pT2->pToken->end_position.row); assert_eq(1, start->pT2->pToken->end_position.col); + assert_eq(3, start->pT2->position.row); + assert_eq(0, start->pT2->position.col); + assert_eq(4, start->pT2->end_position.row); + assert_eq(1, start->pT2->end_position.col); + assert_eq(6, start->pT3->pToken->position.row); assert_eq(5, start->pT3->pToken->position.col); assert_eq(6, start->pT3->pToken->end_position.row); assert_eq(5, start->pT3->pToken->end_position.col); + assert_eq(6, start->pT3->position.row); + assert_eq(5, start->pT3->position.col); + assert_eq(6, start->pT3->end_position.row); + assert_eq(5, start->pT3->end_position.col); + + assert_eq(2, start->position.row); + assert_eq(2, start->position.col); + assert_eq(6, start->end_position.row); + assert_eq(5, start->end_position.col); return 0; } diff --git a/spec/test_ast_token_positions.d b/spec/test_ast_token_positions.d index a7312d2..58fe83e 100644 --- a/spec/test_ast_token_positions.d +++ b/spec/test_ast_token_positions.d @@ -14,33 +14,73 @@ unittest p_context_init(&context, input); assert(p_parse(&context) == P_SUCCESS); Start * start = p_result(&context); + assert_eq(0, start.pT1.pToken.position.row); assert_eq(0, start.pT1.pToken.position.col); assert_eq(0, start.pT1.pToken.end_position.row); assert_eq(0, start.pT1.pToken.end_position.col); + assert_eq(0, start.pT1.position.row); + assert_eq(0, start.pT1.position.col); + assert_eq(0, start.pT1.end_position.row); + assert_eq(0, start.pT1.end_position.col); + assert_eq(0, start.pT2.pToken.position.row); assert_eq(1, start.pT2.pToken.position.col); assert_eq(0, start.pT2.pToken.end_position.row); assert_eq(2, start.pT2.pToken.end_position.col); + assert_eq(0, start.pT2.position.row); + assert_eq(1, start.pT2.position.col); + assert_eq(0, start.pT2.end_position.row); + assert_eq(2, start.pT2.end_position.col); + assert_eq(0, start.pT3.pToken.position.row); assert_eq(3, start.pT3.pToken.position.col); assert_eq(0, start.pT3.pToken.end_position.row); assert_eq(5, start.pT3.pToken.end_position.col); + assert_eq(0, start.pT3.position.row); + assert_eq(3, start.pT3.position.col); + assert_eq(0, start.pT3.end_position.row); + assert_eq(5, start.pT3.end_position.col); + + assert_eq(0, start.position.row); + assert_eq(0, start.position.col); + assert_eq(0, start.end_position.row); + assert_eq(5, start.end_position.col); input = "\n\n bb\nc\ncc\n\n a"; p_context_init(&context, input); assert(p_parse(&context) == P_SUCCESS); start = p_result(&context); + assert_eq(2, start.pT1.pToken.position.row); assert_eq(2, start.pT1.pToken.position.col); assert_eq(2, start.pT1.pToken.end_position.row); assert_eq(3, start.pT1.pToken.end_position.col); + assert_eq(2, start.pT1.position.row); + assert_eq(2, start.pT1.position.col); + assert_eq(2, start.pT1.end_position.row); + assert_eq(3, start.pT1.end_position.col); + assert_eq(3, start.pT2.pToken.position.row); assert_eq(0, start.pT2.pToken.position.col); assert_eq(4, start.pT2.pToken.end_position.row); assert_eq(1, start.pT2.pToken.end_position.col); + assert_eq(3, start.pT2.position.row); + assert_eq(0, start.pT2.position.col); + assert_eq(4, start.pT2.end_position.row); + assert_eq(1, start.pT2.end_position.col); + assert_eq(6, start.pT3.pToken.position.row); assert_eq(5, start.pT3.pToken.position.col); assert_eq(6, start.pT3.pToken.end_position.row); assert_eq(5, start.pT3.pToken.end_position.col); + assert_eq(6, start.pT3.position.row); + assert_eq(5, start.pT3.position.col); + assert_eq(6, start.pT3.end_position.row); + assert_eq(5, start.pT3.end_position.col); + + assert_eq(2, start.position.row); + assert_eq(2, start.position.col); + assert_eq(6, start.end_position.row); + assert_eq(5, start.end_position.col); }