Allow user to specify custom token node fields

This commit is contained in:
Josh Holtrop 2026-02-21 21:06:40 -05:00
parent ff61dd05d9
commit d4ad67c23d
9 changed files with 295 additions and 13 deletions

View File

@ -75,6 +75,10 @@ const char * <%= @grammar.prefix %>token_names[] = {
/**
* Deinitialize and deallocate lexer/parser context structure.
*
* For C++, destructors will be called for any context user fields. However, if
* pointers are used to store allocated resources, the user should free them
* before calling this function.
*
* @param context
* Lexer/parser context structure allocated with <%= @grammar.prefix %>context_new().
*/
@ -1014,13 +1018,18 @@ static size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t start
{
/* We shifted a token, mark it consumed. */
<% if @grammar.tree %>
<% if @cpp %>
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = new <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>();
<% else %>
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = (<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *)malloc(sizeof(<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>));
<% end %>
token_tree_node->position = token_info.position;
token_tree_node->end_position = token_info.end_position;
token_tree_node->n_fields = 0u;
token_tree_node->is_token = 1u;
token_tree_node->token = token;
token_tree_node->pvalue = token_info.pvalue;
<%= expand_code(@grammar.on_token_node, false, nil, nil) %>
state_values_stack_index(&statevalues, -1)->tree_node = token_tree_node;
<% else %>
state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue;
@ -1210,10 +1219,16 @@ static void tree_delete(TreeNode * node)
{
if (node->is_token)
{
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = (<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *) node;
<% if @grammar.free_token_node %>
<%= @grammar.free_token_node %>((<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *) node);
<%= @grammar.free_token_node %>(token_tree_node);
<% end %>
<%= expand_code(@grammar.free_token_user_fields, false, nil, nil) %>
<% if @cpp %>
delete token_tree_node;
<% else %>
free(token_tree_node);
<% end %>
/* TODO: free value_t */
}
else if (node->n_fields > 0u)
{
@ -1224,8 +1239,8 @@ static void tree_delete(TreeNode * node)
tree_delete(node->fields[i]);
}
}
free(node);
}
free(node);
}
/**

View File

@ -103,9 +103,9 @@ public struct <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>
/* TreeNode fields must be present in the same order here. */
<%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position;
<%= @grammar.token_user_fields %>
<%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue;
<%= @grammar.token_user_fields %>
}
<% @parser.rule_sets.each do |name, rule_set| %>
@ -1055,6 +1055,7 @@ private size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t star
/* We shifted a token, mark it consumed. */
<% if @grammar.tree %>
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = new <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>(token_info.position, token_info.end_position, token, token_info.pvalue);
<%= expand_code(@grammar.on_token_node, false, nil, nil) %>
statevalues[$-1].tree_node = token_tree_node;
<% else %>
statevalues[$-1].pvalue = token_info.pvalue;

View File

@ -256,6 +256,95 @@ If a pointer to any allocated memory is stored in a user-defined context field,
it is up to the user to free any memory when the program is finished using the
context structure.
### Custom token fields code blocks: the `token_user_fields` statement
When tree generation mode is active, Propane generates a tree node structure
and a token node structure for each matching rule and token instance in the
input string.
The user may add custom fields to token tree nodes using the `token_user_fields`
statement.
The code block supplied to the `token_user_fields` is inserted in the `struct`
generated by the parser to hold a token tree node.
Example (D/C++):
```
token_user_fields <<
string mytokenval;
>>
```
The `on_token_node` statement can be used to provide code that initializes
any token user fields when a token tree node instance is created.
For example (C++):
```
context_user_fields <<
std::string comments;
>>
token_user_fields <<
std::string comments;
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = "";
>>
drop /#(.*)\n/ <<
/* Accumulate comments before the next parser tree node. */
${context.comments} += std::string((const char *)match, match_length);
>>
```
If a pointer to any allocated memory is stored in a user-defined context field,
the `free_token_user_fields` statement can be used to supply a code block which
will be executed immediately before the token node is freed.
For C++, the `delete` statement is used to free the token tree node, so the
destructor for any custom token user fields will be called.
### Custom initialization of a token tree node - the `on_tree_node` statement
The `on_token_node` statement can be used to provide code that initializes
any token user fields when a token tree node instance is created.
For example (C++):
```
context_user_fields <<
std::string comments;
>>
token_user_fields <<
std::string comments;
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = "";
>>
drop /#(.*)\n/ <<
/* Accumulate comments before the next parser tree node. */
${context.comments} += std::string((const char *)match, match_length);
>>
```
### Freeing allocated memory in a custom token user field - the `free_token_user_fields` statement
The `free_token_user_fields` statement allows the user to provide a code block
which will be executed immediately prior to freeing the token tree node.
For example (C):
```
token_user_fields <<
char * comments;
>>
on_token_node <<
${token.comments} = (char *)malloc(some_len);
>>
free_token_user_fields <<
free(${token.comments});
>>
```
##> Tree generation mode - the `tree` statement
To activate tree generation mode, place the `tree` statement in your grammar file:

View File

@ -20,7 +20,7 @@ syn match propaneOperator "->"
syn match propaneFieldAlias ":[a-zA-Z0-9_]\+" contains=propaneFieldOperator
syn match propaneFieldOperator ":" contained
syn match propaneOperator "?"
syn keyword propaneKeyword drop free_token_node module prefix ptype start token tokenid tree tree_prefix tree_suffix
syn keyword propaneKeyword drop free_token_node free_token_user_fields module prefix ptype start token token_user_fields tokenid tree tree_prefix tree_suffix
syn region propaneRegex start="/" end="/" skip="\v\\\\|\\/"

View File

@ -13,8 +13,13 @@ class Propane
@language =
if output_file.end_with?(".d")
"d"
else
elsif output_file.end_with?(".c")
"c"
elsif output_file =~ %r{\.(cc|cpp|cxx)$}
@cpp = true
"c"
else
raise Error.new("Could not determine target language from output file name (#{output_file})")
end
@options = options
process_grammar!
@ -274,6 +279,15 @@ class Propane
"context.#{fieldname}"
end
end
code = code.gsub(/\$\{token\.(\w+)\}/) do |match|
fieldname = $1
case @language
when "c"
"token_tree_node->#{fieldname}"
when "d"
"token_tree_node.#{fieldname}"
end
end
if parser
code = code.gsub(/\$\$/) do |match|
case @language

View File

@ -18,8 +18,9 @@ class Propane
attr_reader :code_blocks
attr_reader :ptypes
attr_reader :prefix
attr_reader :token_node
attr_reader :on_token_node
attr_reader :token_user_fields
attr_reader :free_token_user_fields
def initialize(input)
@patterns = []
@ -38,8 +39,9 @@ class Propane
@tree_suffix = ""
@free_token_node = nil
@context_user_fields = nil
@token_node = nil
@on_token_node = ""
@token_user_fields = nil
@free_token_user_fields = ""
parse_grammar!
@start_rules << "Start" if @start_rules.empty?
end
@ -74,8 +76,9 @@ class Propane
elsif parse_tree_suffix_statement!
elsif parse_free_token_node_statement!
elsif parse_module_statement!
elsif parse_token_node_statement!
elsif parse_on_token_node_statement!
elsif parse_token_user_fields_statement!
elsif parse_free_token_user_fields_statement!
elsif parse_ptype_statement!
elsif parse_pattern_statement!
elsif parse_start_statement!
@ -151,13 +154,12 @@ class Propane
end
end
def parse_token_node_statement!
if md = consume!(/token_node\b\s*/)
def parse_on_token_node_statement!
if md = consume!(/on_token_node\b\s*/)
unless code = parse_code_block!
raise Error.new("Line #{@line_number}: expected code block")
end
@token_node ||= ""
@token_node += code
@on_token_node += code
end
end
@ -171,6 +173,15 @@ class Propane
end
end
def parse_free_token_user_fields_statement!
if md = consume!(/free_token_user_fields\b\s*/)
unless code = parse_code_block!
raise Error.new("Line #{@line_number}: expected code block")
end
@free_token_user_fields += code
end
end
def parse_ptype_statement!
if consume!(/ptype\s+/)
name = "default"

View File

@ -14,6 +14,7 @@ describe Propane do
end
def run_propane(options = {})
options[:language] ||= "d"
@statics[:build_test_id] ||= 0
@statics[:build_test_id] += 1
if ENV["dist_specs"]
@ -1604,6 +1605,103 @@ EOF
expect(results.stderr).to include %r{acount: 11\n}
expect(results.status).to eq 0
end
it "allows custom token user fields" do
if language == "d"
write_grammar <<EOF
context_user_fields <<
string comments;
>>
token_user_fields <<
string comments;
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = "";
>>
tree;
drop /\\s+/;
drop /#(.*)\\n/ <<
${context.comments} ~= match;
>>
token id /\\w+/;
Start -> IDs;
IDs -> ;
IDs -> id IDs;
EOF
elsif language == "c"
write_grammar <<EOF
<<
#include <string.h>
#include <stdlib.h>
>>
context_user_fields <<
char * comments;
>>
token_user_fields <<
char * comments;
>>
free_token_user_fields <<
free(${token.comments});
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = (char *)malloc(1);
${context.comments}[0] = '\\0';
>>
tree;
drop /\\s+/;
drop /#(.*)\\n/ <<
size_t cur_len = 0u;
if (${context.comments} != NULL)
cur_len = strlen(${context.comments});
char * commentsnew = (char *)malloc(cur_len + match_length + 1);
if (${context.comments} != NULL)
memcpy(commentsnew, ${context.comments}, cur_len);
memcpy(&commentsnew[cur_len], match, match_length);
commentsnew[cur_len + match_length] = '\\0';
if (${context.comments} != NULL)
{
free(${context.comments});
}
${context.comments} = commentsnew;
>>
token id /\\w+/;
Start -> IDs;
IDs -> ;
IDs -> id IDs;
EOF
else # C++
write_grammar <<EOF
<<header
#include <string>
>>
context_user_fields <<
std::string comments;
>>
token_user_fields <<
std::string comments;
>>
on_token_node <<
${token.comments} = ${context.comments};
${context.comments} = "";
>>
tree;
drop /\\s+/;
drop /#(.*)\\n/ <<
${context.comments} += std::string((const char *)match, match_length);
>>
token id /\\w+/;
Start -> IDs;
IDs -> ;
IDs -> id IDs;
EOF
end
run_propane(language: language)
compile("spec/test_token_user_fields.#{language}", language: language)
results = run_test(language: language)
expect(results.status).to eq 0
end
end
end
end

View File

@ -0,0 +1,30 @@
#include "testparser.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main()
{
char const * input =
"# c1\n"
"# c2\n"
"\n"
"first\n"
"\n \n \n"
" # s1\n"
" # s2\n"
"second\n";
p_context_t * context;
context = p_context_new((uint8_t const *)input, strlen(input));
assert(p_parse(context) == P_SUCCESS);
Start * start = p_result(context);
#ifndef __cplusplus
free(context->comments);
#endif
p_context_delete(context);
p_tree_delete(start);
return 0;
}

View File

@ -0,0 +1,24 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input =
"# c1\n" ~
"# c2\n" ~
"\n" ~
"first\n" ~
"\n \n \n" ~
" # s1\n" ~
" # s2\n" ~
"second\n";
p_context_t * context;
context = p_context_new(input);
assert(p_parse(context) == P_SUCCESS);
Start * start = p_result(context);
}