Allow user to specify custom token node fields

This commit is contained in:
Josh Holtrop 2026-02-21 21:06:40 -05:00
parent ff61dd05d9
commit d4ad67c23d
9 changed files with 295 additions and 13 deletions

View File

@ -75,6 +75,10 @@ const char * <%= @grammar.prefix %>token_names[] = {
/** /**
* Deinitialize and deallocate lexer/parser context structure. * Deinitialize and deallocate lexer/parser context structure.
* *
* For C++, destructors will be called for any context user fields. However, if
* pointers are used to store allocated resources, the user should free them
* before calling this function.
*
* @param context * @param context
* Lexer/parser context structure allocated with <%= @grammar.prefix %>context_new(). * Lexer/parser context structure allocated with <%= @grammar.prefix %>context_new().
*/ */
@ -1014,13 +1018,18 @@ static size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t start
{ {
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.tree %> <% if @grammar.tree %>
<% if @cpp %>
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = new <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>();
<% else %>
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = (<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *)malloc(sizeof(<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>)); <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = (<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *)malloc(sizeof(<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>));
<% end %>
token_tree_node->position = token_info.position; token_tree_node->position = token_info.position;
token_tree_node->end_position = token_info.end_position; token_tree_node->end_position = token_info.end_position;
token_tree_node->n_fields = 0u; token_tree_node->n_fields = 0u;
token_tree_node->is_token = 1u; token_tree_node->is_token = 1u;
token_tree_node->token = token; token_tree_node->token = token;
token_tree_node->pvalue = token_info.pvalue; token_tree_node->pvalue = token_info.pvalue;
<%= expand_code(@grammar.on_token_node, false, nil, nil) %>
state_values_stack_index(&statevalues, -1)->tree_node = token_tree_node; state_values_stack_index(&statevalues, -1)->tree_node = token_tree_node;
<% else %> <% else %>
state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue; state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue;
@ -1210,10 +1219,16 @@ static void tree_delete(TreeNode * node)
{ {
if (node->is_token) if (node->is_token)
{ {
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = (<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *) node;
<% if @grammar.free_token_node %> <% if @grammar.free_token_node %>
<%= @grammar.free_token_node %>((<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *) node); <%= @grammar.free_token_node %>(token_tree_node);
<% end %>
<%= expand_code(@grammar.free_token_user_fields, false, nil, nil) %>
<% if @cpp %>
delete token_tree_node;
<% else %>
free(token_tree_node);
<% end %> <% end %>
/* TODO: free value_t */
} }
else if (node->n_fields > 0u) else if (node->n_fields > 0u)
{ {
@ -1224,8 +1239,8 @@ static void tree_delete(TreeNode * node)
tree_delete(node->fields[i]); tree_delete(node->fields[i]);
} }
} }
}
free(node); free(node);
}
} }
/** /**

View File

@ -103,9 +103,9 @@ public struct <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>
/* TreeNode fields must be present in the same order here. */ /* TreeNode fields must be present in the same order here. */
<%= @grammar.prefix %>position_t position; <%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position; <%= @grammar.prefix %>position_t end_position;
<%= @grammar.token_user_fields %>
<%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue; <%= @grammar.prefix %>value_t pvalue;
<%= @grammar.token_user_fields %>
} }
<% @parser.rule_sets.each do |name, rule_set| %> <% @parser.rule_sets.each do |name, rule_set| %>
@ -1055,6 +1055,7 @@ private size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t star
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.tree %> <% if @grammar.tree %>
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = new <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>(token_info.position, token_info.end_position, token, token_info.pvalue); <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = new <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>(token_info.position, token_info.end_position, token, token_info.pvalue);
<%= expand_code(@grammar.on_token_node, false, nil, nil) %>
statevalues[$-1].tree_node = token_tree_node; statevalues[$-1].tree_node = token_tree_node;
<% else %> <% else %>
statevalues[$-1].pvalue = token_info.pvalue; statevalues[$-1].pvalue = token_info.pvalue;

View File

@ -256,6 +256,95 @@ If a pointer to any allocated memory is stored in a user-defined context field,
it is up to the user to free any memory when the program is finished using the it is up to the user to free any memory when the program is finished using the
context structure. context structure.
### Custom token fields code blocks: the `token_user_fields` statement
When tree generation mode is active, Propane generates a tree node structure
and a token node structure for each matching rule and token instance in the
input string.
The user may add custom fields to token tree nodes using the `token_user_fields`
statement.
The code block supplied to the `token_user_fields` is inserted in the `struct`
generated by the parser to hold a token tree node.
Example (D/C++):
```
token_user_fields <<
string mytokenval;
>>
```
The `on_token_node` statement can be used to provide code that initializes
any token user fields when a token tree node instance is created.
For example (C++):
```
context_user_fields <<
std::string comments;
>>
token_user_fields <<
std::string comments;
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = "";
>>
drop /#(.*)\n/ <<
/* Accumulate comments before the next parser tree node. */
${context.comments} += std::string((const char *)match, match_length);
>>
```
If a pointer to any allocated memory is stored in a user-defined context field,
the `free_token_user_fields` statement can be used to supply a code block which
will be executed immediately before the token node is freed.
For C++, the `delete` statement is used to free the token tree node, so the
destructor for any custom token user fields will be called.
### Custom initialization of a token tree node - the `on_tree_node` statement
The `on_token_node` statement can be used to provide code that initializes
any token user fields when a token tree node instance is created.
For example (C++):
```
context_user_fields <<
std::string comments;
>>
token_user_fields <<
std::string comments;
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = "";
>>
drop /#(.*)\n/ <<
/* Accumulate comments before the next parser tree node. */
${context.comments} += std::string((const char *)match, match_length);
>>
```
### Freeing allocated memory in a custom token user field - the `free_token_user_fields` statement
The `free_token_user_fields` statement allows the user to provide a code block
which will be executed immediately prior to freeing the token tree node.
For example (C):
```
token_user_fields <<
char * comments;
>>
on_token_node <<
${token.comments} = (char *)malloc(some_len);
>>
free_token_user_fields <<
free(${token.comments});
>>
```
##> Tree generation mode - the `tree` statement ##> Tree generation mode - the `tree` statement
To activate tree generation mode, place the `tree` statement in your grammar file: To activate tree generation mode, place the `tree` statement in your grammar file:

View File

@ -20,7 +20,7 @@ syn match propaneOperator "->"
syn match propaneFieldAlias ":[a-zA-Z0-9_]\+" contains=propaneFieldOperator syn match propaneFieldAlias ":[a-zA-Z0-9_]\+" contains=propaneFieldOperator
syn match propaneFieldOperator ":" contained syn match propaneFieldOperator ":" contained
syn match propaneOperator "?" syn match propaneOperator "?"
syn keyword propaneKeyword drop free_token_node module prefix ptype start token tokenid tree tree_prefix tree_suffix syn keyword propaneKeyword drop free_token_node free_token_user_fields module prefix ptype start token token_user_fields tokenid tree tree_prefix tree_suffix
syn region propaneRegex start="/" end="/" skip="\v\\\\|\\/" syn region propaneRegex start="/" end="/" skip="\v\\\\|\\/"

View File

@ -13,8 +13,13 @@ class Propane
@language = @language =
if output_file.end_with?(".d") if output_file.end_with?(".d")
"d" "d"
else elsif output_file.end_with?(".c")
"c" "c"
elsif output_file =~ %r{\.(cc|cpp|cxx)$}
@cpp = true
"c"
else
raise Error.new("Could not determine target language from output file name (#{output_file})")
end end
@options = options @options = options
process_grammar! process_grammar!
@ -274,6 +279,15 @@ class Propane
"context.#{fieldname}" "context.#{fieldname}"
end end
end end
code = code.gsub(/\$\{token\.(\w+)\}/) do |match|
fieldname = $1
case @language
when "c"
"token_tree_node->#{fieldname}"
when "d"
"token_tree_node.#{fieldname}"
end
end
if parser if parser
code = code.gsub(/\$\$/) do |match| code = code.gsub(/\$\$/) do |match|
case @language case @language

View File

@ -18,8 +18,9 @@ class Propane
attr_reader :code_blocks attr_reader :code_blocks
attr_reader :ptypes attr_reader :ptypes
attr_reader :prefix attr_reader :prefix
attr_reader :token_node attr_reader :on_token_node
attr_reader :token_user_fields attr_reader :token_user_fields
attr_reader :free_token_user_fields
def initialize(input) def initialize(input)
@patterns = [] @patterns = []
@ -38,8 +39,9 @@ class Propane
@tree_suffix = "" @tree_suffix = ""
@free_token_node = nil @free_token_node = nil
@context_user_fields = nil @context_user_fields = nil
@token_node = nil @on_token_node = ""
@token_user_fields = nil @token_user_fields = nil
@free_token_user_fields = ""
parse_grammar! parse_grammar!
@start_rules << "Start" if @start_rules.empty? @start_rules << "Start" if @start_rules.empty?
end end
@ -74,8 +76,9 @@ class Propane
elsif parse_tree_suffix_statement! elsif parse_tree_suffix_statement!
elsif parse_free_token_node_statement! elsif parse_free_token_node_statement!
elsif parse_module_statement! elsif parse_module_statement!
elsif parse_token_node_statement! elsif parse_on_token_node_statement!
elsif parse_token_user_fields_statement! elsif parse_token_user_fields_statement!
elsif parse_free_token_user_fields_statement!
elsif parse_ptype_statement! elsif parse_ptype_statement!
elsif parse_pattern_statement! elsif parse_pattern_statement!
elsif parse_start_statement! elsif parse_start_statement!
@ -151,13 +154,12 @@ class Propane
end end
end end
def parse_token_node_statement! def parse_on_token_node_statement!
if md = consume!(/token_node\b\s*/) if md = consume!(/on_token_node\b\s*/)
unless code = parse_code_block! unless code = parse_code_block!
raise Error.new("Line #{@line_number}: expected code block") raise Error.new("Line #{@line_number}: expected code block")
end end
@token_node ||= "" @on_token_node += code
@token_node += code
end end
end end
@ -171,6 +173,15 @@ class Propane
end end
end end
def parse_free_token_user_fields_statement!
if md = consume!(/free_token_user_fields\b\s*/)
unless code = parse_code_block!
raise Error.new("Line #{@line_number}: expected code block")
end
@free_token_user_fields += code
end
end
def parse_ptype_statement! def parse_ptype_statement!
if consume!(/ptype\s+/) if consume!(/ptype\s+/)
name = "default" name = "default"

View File

@ -14,6 +14,7 @@ describe Propane do
end end
def run_propane(options = {}) def run_propane(options = {})
options[:language] ||= "d"
@statics[:build_test_id] ||= 0 @statics[:build_test_id] ||= 0
@statics[:build_test_id] += 1 @statics[:build_test_id] += 1
if ENV["dist_specs"] if ENV["dist_specs"]
@ -1604,6 +1605,103 @@ EOF
expect(results.stderr).to include %r{acount: 11\n} expect(results.stderr).to include %r{acount: 11\n}
expect(results.status).to eq 0 expect(results.status).to eq 0
end end
it "allows custom token user fields" do
if language == "d"
write_grammar <<EOF
context_user_fields <<
string comments;
>>
token_user_fields <<
string comments;
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = "";
>>
tree;
drop /\\s+/;
drop /#(.*)\\n/ <<
${context.comments} ~= match;
>>
token id /\\w+/;
Start -> IDs;
IDs -> ;
IDs -> id IDs;
EOF
elsif language == "c"
write_grammar <<EOF
<<
#include <string.h>
#include <stdlib.h>
>>
context_user_fields <<
char * comments;
>>
token_user_fields <<
char * comments;
>>
free_token_user_fields <<
free(${token.comments});
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = (char *)malloc(1);
${context.comments}[0] = '\\0';
>>
tree;
drop /\\s+/;
drop /#(.*)\\n/ <<
size_t cur_len = 0u;
if (${context.comments} != NULL)
cur_len = strlen(${context.comments});
char * commentsnew = (char *)malloc(cur_len + match_length + 1);
if (${context.comments} != NULL)
memcpy(commentsnew, ${context.comments}, cur_len);
memcpy(&commentsnew[cur_len], match, match_length);
commentsnew[cur_len + match_length] = '\\0';
if (${context.comments} != NULL)
{
free(${context.comments});
}
${context.comments} = commentsnew;
>>
token id /\\w+/;
Start -> IDs;
IDs -> ;
IDs -> id IDs;
EOF
else # C++
write_grammar <<EOF
<<header
#include <string>
>>
context_user_fields <<
std::string comments;
>>
token_user_fields <<
std::string comments;
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = "";
>>
tree;
drop /\\s+/;
drop /#(.*)\\n/ <<
${context.comments} += std::string((const char *)match, match_length);
>>
token id /\\w+/;
Start -> IDs;
IDs -> ;
IDs -> id IDs;
EOF
end
run_propane(language: language)
compile("spec/test_token_user_fields.#{language}", language: language)
results = run_test(language: language)
expect(results.status).to eq 0
end
end end
end end
end end

View File

@ -0,0 +1,30 @@
#include "testparser.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main()
{
char const * input =
"# c1\n"
"# c2\n"
"\n"
"first\n"
"\n \n \n"
" # s1\n"
" # s2\n"
"second\n";
p_context_t * context;
context = p_context_new((uint8_t const *)input, strlen(input));
assert(p_parse(context) == P_SUCCESS);
Start * start = p_result(context);
#ifndef __cplusplus
free(context->comments);
#endif
p_context_delete(context);
p_tree_delete(start);
return 0;
}

View File

@ -0,0 +1,24 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input =
"# c1\n" ~
"# c2\n" ~
"\n" ~
"first\n" ~
"\n \n \n" ~
" # s1\n" ~
" # s2\n" ~
"second\n";
p_context_t * context;
context = p_context_new(input);
assert(p_parse(context) == P_SUCCESS);
Start * start = p_result(context);
}