Allow user to specify AST node prefix or suffix

Add ast_prefix and ast_suffix grammar statements.
This commit is contained in:
Josh Holtrop 2024-05-04 11:57:28 -04:00
parent d0f542cbd7
commit 153f9d28f8
10 changed files with 272 additions and 53 deletions

View File

@ -924,7 +924,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{ {
/* Successful parse. */ /* Successful parse. */
<% if @grammar.ast %> <% if @grammar.ast %>
context->parse_result = (Start *)state_values_stack_index(&statevalues, -1)->ast_node; context->parse_result = (<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)state_values_stack_index(&statevalues, -1)->ast_node;
<% else %> <% else %>
context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue; context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue;
<% end %> <% end %>
@ -941,7 +941,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
{ {
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.ast %> <% if @grammar.ast %>
Token * token_ast_node = malloc(sizeof(Token)); <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
token_ast_node->token = token; token_ast_node->token = token;
token_ast_node->pvalue = token_info.pvalue; token_ast_node->pvalue = token_info.pvalue;
state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node; state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node;
@ -1029,7 +1029,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
* @return Parse result value. * @return Parse result value.
*/ */
<% if @grammar.ast %> <% if @grammar.ast %>
Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
<% else %> <% else %>
<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
<% end %> <% end %>

View File

@ -64,7 +64,7 @@ public union <%= @grammar.prefix %>value_t
<% if @grammar.ast %> <% if @grammar.ast %>
/** AST node types. @{ */ /** AST node types. @{ */
public struct Token public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
{ {
<%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue; <%= @grammar.prefix %>value_t pvalue;
@ -72,7 +72,7 @@ public struct Token
<% @parser.rule_sets.each do |name, rule_set| %> <% @parser.rule_sets.each do |name, rule_set| %>
<% next if name.start_with?("$") %> <% next if name.start_with?("$") %>
public struct <%= name %> public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
{ {
<% rule_set.ast_fields.each do |fields| %> <% rule_set.ast_fields.each do |fields| %>
union union
@ -144,7 +144,7 @@ public struct <%= @grammar.prefix %>context_t
/** Parse result value. */ /** Parse result value. */
<% if @grammar.ast %> <% if @grammar.ast %>
Start * parse_result; <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
<% else %> <% else %>
<%= @grammar.prefix %>value_t parse_result; <%= @grammar.prefix %>value_t parse_result;
<% end %> <% end %>
@ -973,7 +973,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
{ {
/* Successful parse. */ /* Successful parse. */
<% if @grammar.ast %> <% if @grammar.ast %>
context.parse_result = cast(Start *)statevalues[$-1].ast_node; context.parse_result = cast(<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)statevalues[$-1].ast_node;
<% else %> <% else %>
context.parse_result = statevalues[$-1].pvalue; context.parse_result = statevalues[$-1].pvalue;
<% end %> <% end %>
@ -988,7 +988,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
{ {
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.ast %> <% if @grammar.ast %>
Token * token_ast_node = new Token(token, token_info.pvalue); <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue);
statevalues[$-1].ast_node = token_ast_node; statevalues[$-1].ast_node = token_ast_node;
<% else %> <% else %>
statevalues[$-1].pvalue = token_info.pvalue; statevalues[$-1].pvalue = token_info.pvalue;
@ -1075,7 +1075,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
* @return Parse result value. * @return Parse result value.
*/ */
<% if @grammar.ast %> <% if @grammar.ast %>
public Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) public <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
<% else %> <% else %>
public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
<% end %> <% end %>

View File

@ -56,11 +56,11 @@ typedef union
<% if @grammar.ast %> <% if @grammar.ast %>
/** AST node types. @{ */ /** AST node types. @{ */
typedef struct Token typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
{ {
<%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue; <%= @grammar.prefix %>value_t pvalue;
} Token; } <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>;
<% @parser.rule_sets.each do |name, rule_set| %> <% @parser.rule_sets.each do |name, rule_set| %>
<% next if name.start_with?("$") %> <% next if name.start_with?("$") %>
@ -69,7 +69,7 @@ struct <%= name %>;
<% @parser.rule_sets.each do |name, rule_set| %> <% @parser.rule_sets.each do |name, rule_set| %>
<% next if name.start_with?("$") %> <% next if name.start_with?("$") %>
typedef struct <%= name %> typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
{ {
<% rule_set.ast_fields.each do |fields| %> <% rule_set.ast_fields.each do |fields| %>
union union
@ -79,7 +79,7 @@ typedef struct <%= name %>
<% end %> <% end %>
}; };
<% end %> <% end %>
} <%= name %>; } <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>;
<% end %> <% end %>
/** @} */ /** @} */
@ -144,7 +144,7 @@ typedef struct
/** Parse result value. */ /** Parse result value. */
<% if @grammar.ast %> <% if @grammar.ast %>
Start * parse_result; <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
<% else %> <% else %>
<%= @grammar.prefix %>value_t parse_result; <%= @grammar.prefix %>value_t parse_result;
<% end %> <% end %>
@ -173,7 +173,7 @@ size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%=
size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context); size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context);
<% if @grammar.ast %> <% if @grammar.ast %>
Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
<% else %> <% else %>
<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
<% end %> <% end %>

View File

@ -276,6 +276,48 @@ assert_eq(22, itemsmore.pItem.pToken1.pvalue);
assert(itemsmore.pItemsMore is null); assert(itemsmore.pItemsMore is null);
``` ```
## `ast_prefix` and `ast_suffix` statements
In AST generation mode, structure types are defined and named based on the
rules in the grammar.
Additionally, a structure type called `Token` is generated to hold parsed
token information.
These structure names can be modified by using the `ast_prefix` or `ast_suffix`
statements in the grammar file.
The field names that point to instances of the structures are not affected by
the `ast_prefix` or `ast_suffix` values.
For example, if the following two lines were added to the example above:
```
ast_prefix ABC;
ast_suffix XYZ;
```
Then the types would be used as such instead:
```
string input = "a, ((b)), b";
p_context_t context;
p_context_init(&context, input);
assert_eq(P_SUCCESS, p_parse(&context));
ABCStartXYZ * start = p_result(&context);
assert(start.pItems1 !is null);
assert(start.pItems !is null);
ABCItemsXYZ * items = start.pItems;
assert(items.pItem !is null);
assert(items.pItem.pToken1 !is null);
assert_eq(TOKEN_a, items.pItem.pToken1.token);
assert_eq(11, items.pItem.pToken1.pvalue);
assert(items.pItemsMore !is null);
ABCItemsMoreXYZ * itemsmore = items.pItemsMore;
assert(itemsmore.pItem !is null);
assert(itemsmore.pItem.pItem !is null);
assert(itemsmore.pItem.pItem.pItem !is null);
assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
```
##> Specifying tokens - the `token` statement ##> Specifying tokens - the `token` statement
The `token` statement allows defining a lexer token and a pattern to match that The `token` statement allows defining a lexer token and a pattern to match that

View File

@ -120,7 +120,7 @@ class Propane
end end
determine_possibly_empty_rulesets!(rule_sets) determine_possibly_empty_rulesets!(rule_sets)
rule_sets.each do |name, rule_set| rule_sets.each do |name, rule_set|
rule_set.finalize rule_set.finalize(@grammar)
end end
# Generate the lexer. # Generate the lexer.
@lexer = Lexer.new(@grammar) @lexer = Lexer.new(@grammar)

View File

@ -6,6 +6,8 @@ class Propane
IDENTIFIER_REGEX = /(?:[a-zA-Z]|_[a-zA-Z0-9])[a-zA-Z_0-9]*/ IDENTIFIER_REGEX = /(?:[a-zA-Z]|_[a-zA-Z0-9])[a-zA-Z_0-9]*/
attr_reader :ast attr_reader :ast
attr_reader :ast_prefix
attr_reader :ast_suffix
attr_reader :modulename attr_reader :modulename
attr_reader :patterns attr_reader :patterns
attr_reader :rules attr_reader :rules
@ -26,6 +28,8 @@ class Propane
@ptypes = {"default" => "void *"} @ptypes = {"default" => "void *"}
@prefix = "p_" @prefix = "p_"
@ast = false @ast = false
@ast_prefix = ""
@ast_suffix = ""
parse_grammar! parse_grammar!
end end
@ -54,6 +58,8 @@ class Propane
elsif parse_comment_line! elsif parse_comment_line!
elsif @mode.nil? && parse_mode_label! elsif @mode.nil? && parse_mode_label!
elsif parse_ast_statement! elsif parse_ast_statement!
elsif parse_ast_prefix_statement!
elsif parse_ast_suffix_statement!
elsif parse_module_statement! elsif parse_module_statement!
elsif parse_ptype_statement! elsif parse_ptype_statement!
elsif parse_pattern_statement! elsif parse_pattern_statement!
@ -91,6 +97,18 @@ class Propane
end end
end end
def parse_ast_prefix_statement!
if md = consume!(/ast_prefix\s+(\w+)\s*;/)
@ast_prefix = md[1]
end
end
def parse_ast_suffix_statement!
if md = consume!(/ast_suffix\s+(\w+)\s*;/)
@ast_suffix = md[1]
end
end
def parse_module_statement! def parse_module_statement!
if consume!(/module\s+/) if consume!(/module\s+/)
md = consume!(/([\w.]+)\s*/, "expected module name") md = consume!(/([\w.]+)\s*/, "expected module name")

View File

@ -3,6 +3,10 @@ class Propane
# A RuleSet collects all grammar rules of the same name. # A RuleSet collects all grammar rules of the same name.
class RuleSet class RuleSet
# @return [Array<Hash>]
# AST fields.
attr_reader :ast_fields
# @return [Integer] # @return [Integer]
# ID of the RuleSet. # ID of the RuleSet.
attr_reader :id attr_reader :id
@ -76,6 +80,13 @@ class Propane
@_start_token_set @_start_token_set
end end
# Finalize a RuleSet after adding all Rules to it.
def finalize(grammar)
build_ast_fields(grammar)
end
private
# Build the set of AST fields for this RuleSet. # Build the set of AST fields for this RuleSet.
# #
# This is an Array of Hashes. Each entry in the Array corresponds to a # This is an Array of Hashes. Each entry in the Array corresponds to a
@ -84,14 +95,11 @@ class Propane
# a key. It may also have the field name without the positional suffix if # a key. It may also have the field name without the positional suffix if
# that field only exists in one position across all Rules in the RuleSet. # that field only exists in one position across all Rules in the RuleSet.
# #
# @return [Array<Hash>] # @return [void]
# AST fields. def build_ast_fields(grammar)
def ast_fields
@_ast_fields ||=
begin
field_ast_node_indexes = {} field_ast_node_indexes = {}
field_indexes_across_all_rules = {} field_indexes_across_all_rules = {}
ast_node_fields = [] @ast_fields = []
@rules.each do |rule| @rules.each do |rule|
rule.components.each_with_index do |component, i| rule.components.each_with_index do |component, i|
if component.is_a?(Token) if component.is_a?(Token)
@ -99,10 +107,11 @@ class Propane
else else
node_name = component.name node_name = component.name
end end
struct_name = "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
field_name = "p#{node_name}#{i + 1}" field_name = "p#{node_name}#{i + 1}"
unless field_ast_node_indexes[field_name] unless field_ast_node_indexes[field_name]
field_ast_node_indexes[field_name] = ast_node_fields.size field_ast_node_indexes[field_name] = @ast_fields.size
ast_node_fields << {field_name => node_name} @ast_fields << {field_name => struct_name}
end end
field_indexes_across_all_rules[node_name] ||= Set.new field_indexes_across_all_rules[node_name] ||= Set.new
field_indexes_across_all_rules[node_name] << field_ast_node_indexes[field_name] field_indexes_across_all_rules[node_name] << field_ast_node_indexes[field_name]
@ -114,16 +123,10 @@ class Propane
# If this field was only seen in one position across all rules, # If this field was only seen in one position across all rules,
# then add an alias to the positional field name that does not # then add an alias to the positional field name that does not
# include the position. # include the position.
ast_node_fields[indexes_across_all_rules.first]["p#{node_name}"] = node_name @ast_fields[indexes_across_all_rules.first]["p#{node_name}"] =
"#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
end end
end end
ast_node_fields
end
end
# Finalize a RuleSet after adding all Rules to it.
def finalize
ast_fields
end end
end end

View File

@ -845,6 +845,50 @@ EOF
expect(results.stderr).to eq "" expect(results.stderr).to eq ""
expect(results.status).to eq 0 expect(results.status).to eq 0
end end
it "supports AST node prefix and suffix" do
write_grammar <<EOF
ast;
ast_prefix P ;
ast_suffix S;
ptype int;
token a << $$ = 11; >>
token b << $$ = 22; >>
token one /1/;
token two /2/;
token comma /,/ <<
$$ = 42;
>>
token lparen /\\(/;
token rparen /\\)/;
drop /\\s+/;
Start -> Items;
Items -> Item ItemsMore;
Items -> ;
ItemsMore -> comma Item ItemsMore;
ItemsMore -> ;
Item -> a;
Item -> b;
Item -> lparen Item rparen;
Item -> Dual;
Dual -> One Two;
Dual -> Two One;
One -> one;
Two -> two;
EOF
run_propane(language: language)
compile("spec/test_ast_ps.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
end end
end end
end end

55
spec/test_ast_ps.c Normal file
View File

@ -0,0 +1,55 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "a, ((b)), b";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert_eq(P_SUCCESS, p_parse(&context));
PStartS * start = p_result(&context);
assert(start->pItems1 != NULL);
assert(start->pItems != NULL);
PItemsS * items = start->pItems;
assert(items->pItem != NULL);
assert(items->pItem->pToken1 != NULL);
assert_eq(TOKEN_a, items->pItem->pToken1->token);
assert_eq(11, items->pItem->pToken1->pvalue);
assert(items->pItemsMore != NULL);
PItemsMoreS * itemsmore = items->pItemsMore;
assert(itemsmore->pItem != NULL);
assert(itemsmore->pItem->pItem != NULL);
assert(itemsmore->pItem->pItem->pItem != NULL);
assert(itemsmore->pItem->pItem->pItem->pToken1 != NULL);
assert_eq(TOKEN_b, itemsmore->pItem->pItem->pItem->pToken1->token);
assert_eq(22, itemsmore->pItem->pItem->pItem->pToken1->pvalue);
assert(itemsmore->pItemsMore != NULL);
itemsmore = itemsmore->pItemsMore;
assert(itemsmore->pItem != NULL);
assert(itemsmore->pItem->pToken1 != NULL);
assert_eq(TOKEN_b, itemsmore->pItem->pToken1->token);
assert_eq(22, itemsmore->pItem->pToken1->pvalue);
assert(itemsmore->pItemsMore == NULL);
input = "";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert_eq(P_SUCCESS, p_parse(&context));
start = p_result(&context);
assert(start->pItems == NULL);
input = "2 1";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert_eq(P_SUCCESS, p_parse(&context));
start = p_result(&context);
assert(start->pItems != NULL);
assert(start->pItems->pItem != NULL);
assert(start->pItems->pItem->pDual != NULL);
assert(start->pItems->pItem->pDual->pTwo1 != NULL);
assert(start->pItems->pItem->pDual->pOne2 != NULL);
assert(start->pItems->pItem->pDual->pTwo2 == NULL);
assert(start->pItems->pItem->pDual->pOne1 == NULL);
return 0;
}

57
spec/test_ast_ps.d Normal file
View File

@ -0,0 +1,57 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "a, ((b)), b";
p_context_t context;
p_context_init(&context, input);
assert_eq(P_SUCCESS, p_parse(&context));
PStartS * start = p_result(&context);
assert(start.pItems1 !is null);
assert(start.pItems !is null);
PItemsS * items = start.pItems;
assert(items.pItem !is null);
assert(items.pItem.pToken1 !is null);
assert_eq(TOKEN_a, items.pItem.pToken1.token);
assert_eq(11, items.pItem.pToken1.pvalue);
assert(items.pItemsMore !is null);
PItemsMoreS * itemsmore = items.pItemsMore;
assert(itemsmore.pItem !is null);
assert(itemsmore.pItem.pItem !is null);
assert(itemsmore.pItem.pItem.pItem !is null);
assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token);
assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue);
assert(itemsmore.pItemsMore !is null);
itemsmore = itemsmore.pItemsMore;
assert(itemsmore.pItem !is null);
assert(itemsmore.pItem.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token);
assert_eq(22, itemsmore.pItem.pToken1.pvalue);
assert(itemsmore.pItemsMore is null);
input = "";
p_context_init(&context, input);
assert_eq(P_SUCCESS, p_parse(&context));
start = p_result(&context);
assert(start.pItems is null);
input = "2 1";
p_context_init(&context, input);
assert_eq(P_SUCCESS, p_parse(&context));
start = p_result(&context);
assert(start.pItems !is null);
assert(start.pItems.pItem !is null);
assert(start.pItems.pItem.pDual !is null);
assert(start.pItems.pItem.pDual.pTwo1 !is null);
assert(start.pItems.pItem.pDual.pOne2 !is null);
assert(start.pItems.pItem.pDual.pTwo2 is null);
assert(start.pItems.pItem.pDual.pOne1 is null);
}