Allow user to specify custom token node fields
This commit is contained in:
parent
ff61dd05d9
commit
d4ad67c23d
@ -75,6 +75,10 @@ const char * <%= @grammar.prefix %>token_names[] = {
|
||||
/**
|
||||
* Deinitialize and deallocate lexer/parser context structure.
|
||||
*
|
||||
* For C++, destructors will be called for any context user fields. However, if
|
||||
* pointers are used to store allocated resources, the user should free them
|
||||
* before calling this function.
|
||||
*
|
||||
* @param context
|
||||
* Lexer/parser context structure allocated with <%= @grammar.prefix %>context_new().
|
||||
*/
|
||||
@ -1014,13 +1018,18 @@ static size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t start
|
||||
{
|
||||
/* We shifted a token, mark it consumed. */
|
||||
<% if @grammar.tree %>
|
||||
<% if @cpp %>
|
||||
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = new <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>();
|
||||
<% else %>
|
||||
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = (<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *)malloc(sizeof(<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>));
|
||||
<% end %>
|
||||
token_tree_node->position = token_info.position;
|
||||
token_tree_node->end_position = token_info.end_position;
|
||||
token_tree_node->n_fields = 0u;
|
||||
token_tree_node->is_token = 1u;
|
||||
token_tree_node->token = token;
|
||||
token_tree_node->pvalue = token_info.pvalue;
|
||||
<%= expand_code(@grammar.on_token_node, false, nil, nil) %>
|
||||
state_values_stack_index(&statevalues, -1)->tree_node = token_tree_node;
|
||||
<% else %>
|
||||
state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue;
|
||||
@ -1210,10 +1219,16 @@ static void tree_delete(TreeNode * node)
|
||||
{
|
||||
if (node->is_token)
|
||||
{
|
||||
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = (<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *) node;
|
||||
<% if @grammar.free_token_node %>
|
||||
<%= @grammar.free_token_node %>((<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> *) node);
|
||||
<%= @grammar.free_token_node %>(token_tree_node);
|
||||
<% end %>
|
||||
<%= expand_code(@grammar.free_token_user_fields, false, nil, nil) %>
|
||||
<% if @cpp %>
|
||||
delete token_tree_node;
|
||||
<% else %>
|
||||
free(token_tree_node);
|
||||
<% end %>
|
||||
/* TODO: free value_t */
|
||||
}
|
||||
else if (node->n_fields > 0u)
|
||||
{
|
||||
@ -1224,8 +1239,8 @@ static void tree_delete(TreeNode * node)
|
||||
tree_delete(node->fields[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
free(node);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -103,9 +103,9 @@ public struct <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>
|
||||
/* TreeNode fields must be present in the same order here. */
|
||||
<%= @grammar.prefix %>position_t position;
|
||||
<%= @grammar.prefix %>position_t end_position;
|
||||
<%= @grammar.token_user_fields %>
|
||||
<%= @grammar.prefix %>token_t token;
|
||||
<%= @grammar.prefix %>value_t pvalue;
|
||||
<%= @grammar.token_user_fields %>
|
||||
}
|
||||
|
||||
<% @parser.rule_sets.each do |name, rule_set| %>
|
||||
@ -1055,6 +1055,7 @@ private size_t parse_from(<%= @grammar.prefix %>context_t * context, size_t star
|
||||
/* We shifted a token, mark it consumed. */
|
||||
<% if @grammar.tree %>
|
||||
<%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> * token_tree_node = new <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>(token_info.position, token_info.end_position, token, token_info.pvalue);
|
||||
<%= expand_code(@grammar.on_token_node, false, nil, nil) %>
|
||||
statevalues[$-1].tree_node = token_tree_node;
|
||||
<% else %>
|
||||
statevalues[$-1].pvalue = token_info.pvalue;
|
||||
|
||||
@ -256,6 +256,95 @@ If a pointer to any allocated memory is stored in a user-defined context field,
|
||||
it is up to the user to free any memory when the program is finished using the
|
||||
context structure.
|
||||
|
||||
### Custom token fields code blocks: the `token_user_fields` statement
|
||||
|
||||
When tree generation mode is active, Propane generates a tree node structure
|
||||
and a token node structure for each matching rule and token instance in the
|
||||
input string.
|
||||
The user may add custom fields to token tree nodes using the `token_user_fields`
|
||||
statement.
|
||||
The code block supplied to the `token_user_fields` is inserted in the `struct`
|
||||
generated by the parser to hold a token tree node.
|
||||
|
||||
Example (D/C++):
|
||||
|
||||
```
|
||||
token_user_fields <<
|
||||
string mytokenval;
|
||||
>>
|
||||
```
|
||||
|
||||
The `on_token_node` statement can be used to provide code that initializes
|
||||
any token user fields when a token tree node instance is created.
|
||||
|
||||
For example (C++):
|
||||
|
||||
```
|
||||
context_user_fields <<
|
||||
std::string comments;
|
||||
>>
|
||||
token_user_fields <<
|
||||
std::string comments;
|
||||
>>
|
||||
on_token_node <<
|
||||
${token.comments} = ${context.comments};
|
||||
${context.comments} = "";
|
||||
>>
|
||||
drop /#(.*)\n/ <<
|
||||
/* Accumulate comments before the next parser tree node. */
|
||||
${context.comments} += std::string((const char *)match, match_length);
|
||||
>>
|
||||
```
|
||||
|
||||
If a pointer to any allocated memory is stored in a user-defined context field,
|
||||
the `free_token_user_fields` statement can be used to supply a code block which
|
||||
will be executed immediately before the token node is freed.
|
||||
For C++, the `delete` statement is used to free the token tree node, so the
|
||||
destructor for any custom token user fields will be called.
|
||||
|
||||
### Custom initialization of a token tree node - the `on_tree_node` statement
|
||||
|
||||
The `on_token_node` statement can be used to provide code that initializes
|
||||
any token user fields when a token tree node instance is created.
|
||||
|
||||
For example (C++):
|
||||
|
||||
```
|
||||
context_user_fields <<
|
||||
std::string comments;
|
||||
>>
|
||||
token_user_fields <<
|
||||
std::string comments;
|
||||
>>
|
||||
on_token_node <<
|
||||
${token.comments} = ${context.comments};
|
||||
${context.comments} = "";
|
||||
>>
|
||||
drop /#(.*)\n/ <<
|
||||
/* Accumulate comments before the next parser tree node. */
|
||||
${context.comments} += std::string((const char *)match, match_length);
|
||||
>>
|
||||
```
|
||||
|
||||
### Freeing allocated memory in a custom token user field - the `free_token_user_fields` statement
|
||||
|
||||
The `free_token_user_fields` statement allows the user to provide a code block
|
||||
which will be executed immediately prior to freeing the token tree node.
|
||||
|
||||
For example (C):
|
||||
|
||||
```
|
||||
token_user_fields <<
|
||||
char * comments;
|
||||
>>
|
||||
on_token_node <<
|
||||
${token.comments} = (char *)malloc(some_len);
|
||||
>>
|
||||
free_token_user_fields <<
|
||||
free(${token.comments});
|
||||
>>
|
||||
```
|
||||
|
||||
##> Tree generation mode - the `tree` statement
|
||||
|
||||
To activate tree generation mode, place the `tree` statement in your grammar file:
|
||||
|
||||
@ -20,7 +20,7 @@ syn match propaneOperator "->"
|
||||
syn match propaneFieldAlias ":[a-zA-Z0-9_]\+" contains=propaneFieldOperator
|
||||
syn match propaneFieldOperator ":" contained
|
||||
syn match propaneOperator "?"
|
||||
syn keyword propaneKeyword drop free_token_node module prefix ptype start token tokenid tree tree_prefix tree_suffix
|
||||
syn keyword propaneKeyword drop free_token_node free_token_user_fields module prefix ptype start token token_user_fields tokenid tree tree_prefix tree_suffix
|
||||
|
||||
syn region propaneRegex start="/" end="/" skip="\v\\\\|\\/"
|
||||
|
||||
|
||||
@ -13,8 +13,13 @@ class Propane
|
||||
@language =
|
||||
if output_file.end_with?(".d")
|
||||
"d"
|
||||
else
|
||||
elsif output_file.end_with?(".c")
|
||||
"c"
|
||||
elsif output_file =~ %r{\.(cc|cpp|cxx)$}
|
||||
@cpp = true
|
||||
"c"
|
||||
else
|
||||
raise Error.new("Could not determine target language from output file name (#{output_file})")
|
||||
end
|
||||
@options = options
|
||||
process_grammar!
|
||||
@ -274,6 +279,15 @@ class Propane
|
||||
"context.#{fieldname}"
|
||||
end
|
||||
end
|
||||
code = code.gsub(/\$\{token\.(\w+)\}/) do |match|
|
||||
fieldname = $1
|
||||
case @language
|
||||
when "c"
|
||||
"token_tree_node->#{fieldname}"
|
||||
when "d"
|
||||
"token_tree_node.#{fieldname}"
|
||||
end
|
||||
end
|
||||
if parser
|
||||
code = code.gsub(/\$\$/) do |match|
|
||||
case @language
|
||||
|
||||
@ -18,8 +18,9 @@ class Propane
|
||||
attr_reader :code_blocks
|
||||
attr_reader :ptypes
|
||||
attr_reader :prefix
|
||||
attr_reader :token_node
|
||||
attr_reader :on_token_node
|
||||
attr_reader :token_user_fields
|
||||
attr_reader :free_token_user_fields
|
||||
|
||||
def initialize(input)
|
||||
@patterns = []
|
||||
@ -38,8 +39,9 @@ class Propane
|
||||
@tree_suffix = ""
|
||||
@free_token_node = nil
|
||||
@context_user_fields = nil
|
||||
@token_node = nil
|
||||
@on_token_node = ""
|
||||
@token_user_fields = nil
|
||||
@free_token_user_fields = ""
|
||||
parse_grammar!
|
||||
@start_rules << "Start" if @start_rules.empty?
|
||||
end
|
||||
@ -74,8 +76,9 @@ class Propane
|
||||
elsif parse_tree_suffix_statement!
|
||||
elsif parse_free_token_node_statement!
|
||||
elsif parse_module_statement!
|
||||
elsif parse_token_node_statement!
|
||||
elsif parse_on_token_node_statement!
|
||||
elsif parse_token_user_fields_statement!
|
||||
elsif parse_free_token_user_fields_statement!
|
||||
elsif parse_ptype_statement!
|
||||
elsif parse_pattern_statement!
|
||||
elsif parse_start_statement!
|
||||
@ -151,13 +154,12 @@ class Propane
|
||||
end
|
||||
end
|
||||
|
||||
def parse_token_node_statement!
|
||||
if md = consume!(/token_node\b\s*/)
|
||||
def parse_on_token_node_statement!
|
||||
if md = consume!(/on_token_node\b\s*/)
|
||||
unless code = parse_code_block!
|
||||
raise Error.new("Line #{@line_number}: expected code block")
|
||||
end
|
||||
@token_node ||= ""
|
||||
@token_node += code
|
||||
@on_token_node += code
|
||||
end
|
||||
end
|
||||
|
||||
@ -171,6 +173,15 @@ class Propane
|
||||
end
|
||||
end
|
||||
|
||||
def parse_free_token_user_fields_statement!
|
||||
if md = consume!(/free_token_user_fields\b\s*/)
|
||||
unless code = parse_code_block!
|
||||
raise Error.new("Line #{@line_number}: expected code block")
|
||||
end
|
||||
@free_token_user_fields += code
|
||||
end
|
||||
end
|
||||
|
||||
def parse_ptype_statement!
|
||||
if consume!(/ptype\s+/)
|
||||
name = "default"
|
||||
|
||||
@ -14,6 +14,7 @@ describe Propane do
|
||||
end
|
||||
|
||||
def run_propane(options = {})
|
||||
options[:language] ||= "d"
|
||||
@statics[:build_test_id] ||= 0
|
||||
@statics[:build_test_id] += 1
|
||||
if ENV["dist_specs"]
|
||||
@ -1604,6 +1605,103 @@ EOF
|
||||
expect(results.stderr).to include %r{acount: 11\n}
|
||||
expect(results.status).to eq 0
|
||||
end
|
||||
|
||||
it "allows custom token user fields" do
|
||||
if language == "d"
|
||||
write_grammar <<EOF
|
||||
context_user_fields <<
|
||||
string comments;
|
||||
>>
|
||||
token_user_fields <<
|
||||
string comments;
|
||||
>>
|
||||
on_token_node <<
|
||||
${token.comments} = ${context.comments};
|
||||
${context.comments} = "";
|
||||
>>
|
||||
tree;
|
||||
drop /\\s+/;
|
||||
drop /#(.*)\\n/ <<
|
||||
${context.comments} ~= match;
|
||||
>>
|
||||
token id /\\w+/;
|
||||
Start -> IDs;
|
||||
IDs -> ;
|
||||
IDs -> id IDs;
|
||||
EOF
|
||||
elsif language == "c"
|
||||
write_grammar <<EOF
|
||||
<<
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
>>
|
||||
context_user_fields <<
|
||||
char * comments;
|
||||
>>
|
||||
token_user_fields <<
|
||||
char * comments;
|
||||
>>
|
||||
free_token_user_fields <<
|
||||
free(${token.comments});
|
||||
>>
|
||||
on_token_node <<
|
||||
${token.comments} = ${context.comments};
|
||||
${context.comments} = (char *)malloc(1);
|
||||
${context.comments}[0] = '\\0';
|
||||
>>
|
||||
tree;
|
||||
drop /\\s+/;
|
||||
drop /#(.*)\\n/ <<
|
||||
size_t cur_len = 0u;
|
||||
if (${context.comments} != NULL)
|
||||
cur_len = strlen(${context.comments});
|
||||
char * commentsnew = (char *)malloc(cur_len + match_length + 1);
|
||||
if (${context.comments} != NULL)
|
||||
memcpy(commentsnew, ${context.comments}, cur_len);
|
||||
memcpy(&commentsnew[cur_len], match, match_length);
|
||||
commentsnew[cur_len + match_length] = '\\0';
|
||||
if (${context.comments} != NULL)
|
||||
{
|
||||
free(${context.comments});
|
||||
}
|
||||
${context.comments} = commentsnew;
|
||||
>>
|
||||
token id /\\w+/;
|
||||
Start -> IDs;
|
||||
IDs -> ;
|
||||
IDs -> id IDs;
|
||||
EOF
|
||||
else # C++
|
||||
write_grammar <<EOF
|
||||
<<header
|
||||
#include <string>
|
||||
>>
|
||||
context_user_fields <<
|
||||
std::string comments;
|
||||
>>
|
||||
token_user_fields <<
|
||||
std::string comments;
|
||||
>>
|
||||
on_token_node <<
|
||||
${token.comments} = ${context.comments};
|
||||
${context.comments} = "";
|
||||
>>
|
||||
tree;
|
||||
drop /\\s+/;
|
||||
drop /#(.*)\\n/ <<
|
||||
${context.comments} += std::string((const char *)match, match_length);
|
||||
>>
|
||||
token id /\\w+/;
|
||||
Start -> IDs;
|
||||
IDs -> ;
|
||||
IDs -> id IDs;
|
||||
EOF
|
||||
end
|
||||
run_propane(language: language)
|
||||
compile("spec/test_token_user_fields.#{language}", language: language)
|
||||
results = run_test(language: language)
|
||||
expect(results.status).to eq 0
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
30
spec/test_token_user_fields.c
Normal file
30
spec/test_token_user_fields.c
Normal file
@ -0,0 +1,30 @@
|
||||
#include "testparser.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
char const * input =
|
||||
"# c1\n"
|
||||
"# c2\n"
|
||||
"\n"
|
||||
"first\n"
|
||||
"\n \n \n"
|
||||
" # s1\n"
|
||||
" # s2\n"
|
||||
"second\n";
|
||||
p_context_t * context;
|
||||
context = p_context_new((uint8_t const *)input, strlen(input));
|
||||
assert(p_parse(context) == P_SUCCESS);
|
||||
Start * start = p_result(context);
|
||||
|
||||
#ifndef __cplusplus
|
||||
free(context->comments);
|
||||
#endif
|
||||
p_context_delete(context);
|
||||
p_tree_delete(start);
|
||||
|
||||
return 0;
|
||||
}
|
||||
24
spec/test_token_user_fields.d
Normal file
24
spec/test_token_user_fields.d
Normal file
@ -0,0 +1,24 @@
|
||||
import testparser;
|
||||
import std.stdio;
|
||||
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
string input =
|
||||
"# c1\n" ~
|
||||
"# c2\n" ~
|
||||
"\n" ~
|
||||
"first\n" ~
|
||||
"\n \n \n" ~
|
||||
" # s1\n" ~
|
||||
" # s2\n" ~
|
||||
"second\n";
|
||||
p_context_t * context;
|
||||
context = p_context_new(input);
|
||||
assert(p_parse(context) == P_SUCCESS);
|
||||
Start * start = p_result(context);
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user