diff --git a/CHANGELOG.md b/CHANGELOG.md index bde2b2e..77b1b78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## v1.5.0 + +### New Features + +- Track token position in AST Token node + ## v1.4.0 ### New Features diff --git a/assets/parser.c.erb b/assets/parser.c.erb index c13980d..a23ed7f 100644 --- a/assets/parser.c.erb +++ b/assets/parser.c.erb @@ -951,6 +951,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>)); token_ast_node->token = token; token_ast_node->pvalue = token_info.pvalue; + token_ast_node->position = token_info.position; state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node; <% else %> state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue; diff --git a/assets/parser.d.erb b/assets/parser.d.erb index ad488b1..7fe35c1 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -49,6 +49,20 @@ public enum : <%= @grammar.prefix %>token_t /** Code point type. */ public alias <%= @grammar.prefix %>code_point_t = uint; +/** + * A structure to keep track of input position. + * + * This is useful for reporting errors, etc... + */ +public struct <%= @grammar.prefix %>position_t +{ + /** Input text row (0-based). */ + uint row; + + /** Input text column (0-based). */ + uint col; +} + <% if @grammar.ast %> /** Parser values type. */ public alias <%= @grammar.prefix %>value_t = <%= @grammar.ptype %>; @@ -68,6 +82,7 @@ public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> { <%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>value_t pvalue; + <%= @grammar.prefix %>position_t position; } <% @parser.rule_sets.each do |name, rule_set| %> @@ -89,20 +104,6 @@ public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %> /** @} */ <% end %> -/** - * A structure to keep track of parser position. - * - * This is useful for reporting errors, etc... - */ -public struct <%= @grammar.prefix %>position_t -{ - /** Input text row (0-based). */ - uint row; - - /** Input text column (0-based). */ - uint col; -} - /** Lexed token information. */ public struct <%= @grammar.prefix %>token_info_t { @@ -996,7 +997,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont { /* We shifted a token, mark it consumed. */ <% if @grammar.ast %> - <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue); + <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue, token_info.position); statevalues[$-1].ast_node = token_ast_node; <% else %> statevalues[$-1].pvalue = token_info.pvalue; diff --git a/assets/parser.h.erb b/assets/parser.h.erb index a701d39..e5d21e8 100644 --- a/assets/parser.h.erb +++ b/assets/parser.h.erb @@ -38,6 +38,20 @@ typedef <%= get_type_for(@grammar.terminate_token_id) %> <%= @grammar.prefix %>t /** Code point type. */ typedef uint32_t <%= @grammar.prefix %>code_point_t; +/** + * A structure to keep track of input position. + * + * This is useful for reporting errors, etc... + */ +typedef struct +{ + /** Input text row (0-based). */ + uint32_t row; + + /** Input text column (0-based). */ + uint32_t col; +} <%= @grammar.prefix %>position_t; + /** User header code blocks. */ <%= @grammar.code_blocks.fetch("header", "") %> @@ -60,6 +74,7 @@ typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> { <%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>value_t pvalue; + <%= @grammar.prefix %>position_t position; } <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>; <% @parser.rule_sets.each do |name, rule_set| %> @@ -87,20 +102,6 @@ typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %> /** @} */ <% end %> -/** - * A structure to keep track of parser position. - * - * This is useful for reporting errors, etc... - */ -typedef struct -{ - /** Input text row (0-based). */ - uint32_t row; - - /** Input text column (0-based). */ - uint32_t col; -} <%= @grammar.prefix %>position_t; - /** Lexed token information. */ typedef struct { diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index 43b11d0..331b20a 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -1051,6 +1051,26 @@ EOF expect(results.stderr).to eq "" expect(results.status).to eq 0 end + + it "stores the token position in the AST Token node" do + write_grammar < T T T; +T -> a; +T -> b; +T -> c; +EOF + run_propane(language: language) + compile("spec/test_ast_token_positions.#{language}", language: language) + results = run_test + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end end end end diff --git a/spec/test_ast_token_positions.c b/spec/test_ast_token_positions.c new file mode 100644 index 0000000..1547445 --- /dev/null +++ b/spec/test_ast_token_positions.c @@ -0,0 +1,33 @@ +#include "testparser.h" +#include +#include +#include "testutils.h" + +int main() +{ + char const * input = "abc"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + Start * start = p_result(&context); + assert_eq(0, start->pT1->pToken->position.row); + assert_eq(0, start->pT1->pToken->position.col); + assert_eq(0, start->pT2->pToken->position.row); + assert_eq(1, start->pT2->pToken->position.col); + assert_eq(0, start->pT3->pToken->position.row); + assert_eq(2, start->pT3->pToken->position.col); + + input = "\n\n a\nc\n\n a"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + start = p_result(&context); + assert_eq(2, start->pT1->pToken->position.row); + assert_eq(2, start->pT1->pToken->position.col); + assert_eq(3, start->pT2->pToken->position.row); + assert_eq(0, start->pT2->pToken->position.col); + assert_eq(5, start->pT3->pToken->position.row); + assert_eq(5, start->pT3->pToken->position.col); + + return 0; +} + diff --git a/spec/test_ast_token_positions.d b/spec/test_ast_token_positions.d new file mode 100644 index 0000000..cc0768c --- /dev/null +++ b/spec/test_ast_token_positions.d @@ -0,0 +1,34 @@ +import testparser; +import std.stdio; +import testutils; + +int main() +{ + return 0; +} + +unittest +{ + string input = "abc"; + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + Start * start = p_result(&context); + assert_eq(0, start.pT1.pToken.position.row); + assert_eq(0, start.pT1.pToken.position.col); + assert_eq(0, start.pT2.pToken.position.row); + assert_eq(1, start.pT2.pToken.position.col); + assert_eq(0, start.pT3.pToken.position.row); + assert_eq(2, start.pT3.pToken.position.col); + + input = "\n\n a\nc\n\n a"; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + start = p_result(&context); + assert_eq(2, start.pT1.pToken.position.row); + assert_eq(2, start.pT1.pToken.position.col); + assert_eq(3, start.pT2.pToken.position.row); + assert_eq(0, start.pT2.pToken.position.col); + assert_eq(5, start.pT3.pToken.position.row); + assert_eq(5, start.pT3.pToken.position.col); +}