Allow user to specify AST node prefix or suffix
Add ast_prefix and ast_suffix grammar statements.
This commit is contained in:
parent
d0f542cbd7
commit
153f9d28f8
@ -924,7 +924,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
|
||||
{
|
||||
/* Successful parse. */
|
||||
<% if @grammar.ast %>
|
||||
context->parse_result = (Start *)state_values_stack_index(&statevalues, -1)->ast_node;
|
||||
context->parse_result = (<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)state_values_stack_index(&statevalues, -1)->ast_node;
|
||||
<% else %>
|
||||
context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue;
|
||||
<% end %>
|
||||
@ -941,7 +941,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
|
||||
{
|
||||
/* We shifted a token, mark it consumed. */
|
||||
<% if @grammar.ast %>
|
||||
Token * token_ast_node = malloc(sizeof(Token));
|
||||
<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
|
||||
token_ast_node->token = token;
|
||||
token_ast_node->pvalue = token_info.pvalue;
|
||||
state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node;
|
||||
@ -1029,7 +1029,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
|
||||
* @return Parse result value.
|
||||
*/
|
||||
<% if @grammar.ast %>
|
||||
Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
|
||||
<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
|
||||
<% else %>
|
||||
<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
|
||||
<% end %>
|
||||
|
@ -64,7 +64,7 @@ public union <%= @grammar.prefix %>value_t
|
||||
|
||||
<% if @grammar.ast %>
|
||||
/** AST node types. @{ */
|
||||
public struct Token
|
||||
public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
|
||||
{
|
||||
<%= @grammar.prefix %>token_t token;
|
||||
<%= @grammar.prefix %>value_t pvalue;
|
||||
@ -72,7 +72,7 @@ public struct Token
|
||||
|
||||
<% @parser.rule_sets.each do |name, rule_set| %>
|
||||
<% next if name.start_with?("$") %>
|
||||
public struct <%= name %>
|
||||
public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
|
||||
{
|
||||
<% rule_set.ast_fields.each do |fields| %>
|
||||
union
|
||||
@ -144,7 +144,7 @@ public struct <%= @grammar.prefix %>context_t
|
||||
|
||||
/** Parse result value. */
|
||||
<% if @grammar.ast %>
|
||||
Start * parse_result;
|
||||
<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
|
||||
<% else %>
|
||||
<%= @grammar.prefix %>value_t parse_result;
|
||||
<% end %>
|
||||
@ -973,7 +973,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
|
||||
{
|
||||
/* Successful parse. */
|
||||
<% if @grammar.ast %>
|
||||
context.parse_result = cast(Start *)statevalues[$-1].ast_node;
|
||||
context.parse_result = cast(<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> *)statevalues[$-1].ast_node;
|
||||
<% else %>
|
||||
context.parse_result = statevalues[$-1].pvalue;
|
||||
<% end %>
|
||||
@ -988,7 +988,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
|
||||
{
|
||||
/* We shifted a token, mark it consumed. */
|
||||
<% if @grammar.ast %>
|
||||
Token * token_ast_node = new Token(token, token_info.pvalue);
|
||||
<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue);
|
||||
statevalues[$-1].ast_node = token_ast_node;
|
||||
<% else %>
|
||||
statevalues[$-1].pvalue = token_info.pvalue;
|
||||
@ -1075,7 +1075,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
|
||||
* @return Parse result value.
|
||||
*/
|
||||
<% if @grammar.ast %>
|
||||
public Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
|
||||
public <%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
|
||||
<% else %>
|
||||
public <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context)
|
||||
<% end %>
|
||||
|
@ -56,11 +56,11 @@ typedef union
|
||||
|
||||
<% if @grammar.ast %>
|
||||
/** AST node types. @{ */
|
||||
typedef struct Token
|
||||
typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
|
||||
{
|
||||
<%= @grammar.prefix %>token_t token;
|
||||
<%= @grammar.prefix %>value_t pvalue;
|
||||
} Token;
|
||||
} <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>;
|
||||
|
||||
<% @parser.rule_sets.each do |name, rule_set| %>
|
||||
<% next if name.start_with?("$") %>
|
||||
@ -69,7 +69,7 @@ struct <%= name %>;
|
||||
|
||||
<% @parser.rule_sets.each do |name, rule_set| %>
|
||||
<% next if name.start_with?("$") %>
|
||||
typedef struct <%= name %>
|
||||
typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
|
||||
{
|
||||
<% rule_set.ast_fields.each do |fields| %>
|
||||
union
|
||||
@ -79,7 +79,7 @@ typedef struct <%= name %>
|
||||
<% end %>
|
||||
};
|
||||
<% end %>
|
||||
} <%= name %>;
|
||||
} <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>;
|
||||
|
||||
<% end %>
|
||||
/** @} */
|
||||
@ -144,7 +144,7 @@ typedef struct
|
||||
|
||||
/** Parse result value. */
|
||||
<% if @grammar.ast %>
|
||||
Start * parse_result;
|
||||
<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * parse_result;
|
||||
<% else %>
|
||||
<%= @grammar.prefix %>value_t parse_result;
|
||||
<% end %>
|
||||
@ -173,7 +173,7 @@ size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%=
|
||||
size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context);
|
||||
|
||||
<% if @grammar.ast %>
|
||||
Start * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
|
||||
<%= @grammar.ast_prefix %>Start<%= @grammar.ast_suffix %> * <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
|
||||
<% else %>
|
||||
<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context);
|
||||
<% end %>
|
||||
|
@ -276,6 +276,48 @@ assert_eq(22, itemsmore.pItem.pToken1.pvalue);
|
||||
assert(itemsmore.pItemsMore is null);
|
||||
```
|
||||
|
||||
## `ast_prefix` and `ast_suffix` statements
|
||||
|
||||
In AST generation mode, structure types are defined and named based on the
|
||||
rules in the grammar.
|
||||
Additionally, a structure type called `Token` is generated to hold parsed
|
||||
token information.
|
||||
|
||||
These structure names can be modified by using the `ast_prefix` or `ast_suffix`
|
||||
statements in the grammar file.
|
||||
The field names that point to instances of the structures are not affected by
|
||||
the `ast_prefix` or `ast_suffix` values.
|
||||
|
||||
For example, if the following two lines were added to the example above:
|
||||
|
||||
```
|
||||
ast_prefix ABC;
|
||||
ast_suffix XYZ;
|
||||
```
|
||||
|
||||
Then the types would be used as such instead:
|
||||
|
||||
```
|
||||
string input = "a, ((b)), b";
|
||||
p_context_t context;
|
||||
p_context_init(&context, input);
|
||||
assert_eq(P_SUCCESS, p_parse(&context));
|
||||
ABCStartXYZ * start = p_result(&context);
|
||||
assert(start.pItems1 !is null);
|
||||
assert(start.pItems !is null);
|
||||
ABCItemsXYZ * items = start.pItems;
|
||||
assert(items.pItem !is null);
|
||||
assert(items.pItem.pToken1 !is null);
|
||||
assert_eq(TOKEN_a, items.pItem.pToken1.token);
|
||||
assert_eq(11, items.pItem.pToken1.pvalue);
|
||||
assert(items.pItemsMore !is null);
|
||||
ABCItemsMoreXYZ * itemsmore = items.pItemsMore;
|
||||
assert(itemsmore.pItem !is null);
|
||||
assert(itemsmore.pItem.pItem !is null);
|
||||
assert(itemsmore.pItem.pItem.pItem !is null);
|
||||
assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
|
||||
```
|
||||
|
||||
##> Specifying tokens - the `token` statement
|
||||
|
||||
The `token` statement allows defining a lexer token and a pattern to match that
|
||||
|
@ -120,7 +120,7 @@ class Propane
|
||||
end
|
||||
determine_possibly_empty_rulesets!(rule_sets)
|
||||
rule_sets.each do |name, rule_set|
|
||||
rule_set.finalize
|
||||
rule_set.finalize(@grammar)
|
||||
end
|
||||
# Generate the lexer.
|
||||
@lexer = Lexer.new(@grammar)
|
||||
|
@ -6,6 +6,8 @@ class Propane
|
||||
IDENTIFIER_REGEX = /(?:[a-zA-Z]|_[a-zA-Z0-9])[a-zA-Z_0-9]*/
|
||||
|
||||
attr_reader :ast
|
||||
attr_reader :ast_prefix
|
||||
attr_reader :ast_suffix
|
||||
attr_reader :modulename
|
||||
attr_reader :patterns
|
||||
attr_reader :rules
|
||||
@ -26,6 +28,8 @@ class Propane
|
||||
@ptypes = {"default" => "void *"}
|
||||
@prefix = "p_"
|
||||
@ast = false
|
||||
@ast_prefix = ""
|
||||
@ast_suffix = ""
|
||||
parse_grammar!
|
||||
end
|
||||
|
||||
@ -54,6 +58,8 @@ class Propane
|
||||
elsif parse_comment_line!
|
||||
elsif @mode.nil? && parse_mode_label!
|
||||
elsif parse_ast_statement!
|
||||
elsif parse_ast_prefix_statement!
|
||||
elsif parse_ast_suffix_statement!
|
||||
elsif parse_module_statement!
|
||||
elsif parse_ptype_statement!
|
||||
elsif parse_pattern_statement!
|
||||
@ -91,6 +97,18 @@ class Propane
|
||||
end
|
||||
end
|
||||
|
||||
def parse_ast_prefix_statement!
|
||||
if md = consume!(/ast_prefix\s+(\w+)\s*;/)
|
||||
@ast_prefix = md[1]
|
||||
end
|
||||
end
|
||||
|
||||
def parse_ast_suffix_statement!
|
||||
if md = consume!(/ast_suffix\s+(\w+)\s*;/)
|
||||
@ast_suffix = md[1]
|
||||
end
|
||||
end
|
||||
|
||||
def parse_module_statement!
|
||||
if consume!(/module\s+/)
|
||||
md = consume!(/([\w.]+)\s*/, "expected module name")
|
||||
|
@ -3,6 +3,10 @@ class Propane
|
||||
# A RuleSet collects all grammar rules of the same name.
|
||||
class RuleSet
|
||||
|
||||
# @return [Array<Hash>]
|
||||
# AST fields.
|
||||
attr_reader :ast_fields
|
||||
|
||||
# @return [Integer]
|
||||
# ID of the RuleSet.
|
||||
attr_reader :id
|
||||
@ -76,6 +80,13 @@ class Propane
|
||||
@_start_token_set
|
||||
end
|
||||
|
||||
# Finalize a RuleSet after adding all Rules to it.
|
||||
def finalize(grammar)
|
||||
build_ast_fields(grammar)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Build the set of AST fields for this RuleSet.
|
||||
#
|
||||
# This is an Array of Hashes. Each entry in the Array corresponds to a
|
||||
@ -84,14 +95,11 @@ class Propane
|
||||
# a key. It may also have the field name without the positional suffix if
|
||||
# that field only exists in one position across all Rules in the RuleSet.
|
||||
#
|
||||
# @return [Array<Hash>]
|
||||
# AST fields.
|
||||
def ast_fields
|
||||
@_ast_fields ||=
|
||||
begin
|
||||
# @return [void]
|
||||
def build_ast_fields(grammar)
|
||||
field_ast_node_indexes = {}
|
||||
field_indexes_across_all_rules = {}
|
||||
ast_node_fields = []
|
||||
@ast_fields = []
|
||||
@rules.each do |rule|
|
||||
rule.components.each_with_index do |component, i|
|
||||
if component.is_a?(Token)
|
||||
@ -99,10 +107,11 @@ class Propane
|
||||
else
|
||||
node_name = component.name
|
||||
end
|
||||
struct_name = "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
|
||||
field_name = "p#{node_name}#{i + 1}"
|
||||
unless field_ast_node_indexes[field_name]
|
||||
field_ast_node_indexes[field_name] = ast_node_fields.size
|
||||
ast_node_fields << {field_name => node_name}
|
||||
field_ast_node_indexes[field_name] = @ast_fields.size
|
||||
@ast_fields << {field_name => struct_name}
|
||||
end
|
||||
field_indexes_across_all_rules[node_name] ||= Set.new
|
||||
field_indexes_across_all_rules[node_name] << field_ast_node_indexes[field_name]
|
||||
@ -114,16 +123,10 @@ class Propane
|
||||
# If this field was only seen in one position across all rules,
|
||||
# then add an alias to the positional field name that does not
|
||||
# include the position.
|
||||
ast_node_fields[indexes_across_all_rules.first]["p#{node_name}"] = node_name
|
||||
@ast_fields[indexes_across_all_rules.first]["p#{node_name}"] =
|
||||
"#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
|
||||
end
|
||||
end
|
||||
ast_node_fields
|
||||
end
|
||||
end
|
||||
|
||||
# Finalize a RuleSet after adding all Rules to it.
|
||||
def finalize
|
||||
ast_fields
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -845,6 +845,50 @@ EOF
|
||||
expect(results.stderr).to eq ""
|
||||
expect(results.status).to eq 0
|
||||
end
|
||||
|
||||
it "supports AST node prefix and suffix" do
|
||||
write_grammar <<EOF
|
||||
ast;
|
||||
ast_prefix P ;
|
||||
ast_suffix S;
|
||||
|
||||
ptype int;
|
||||
|
||||
token a << $$ = 11; >>
|
||||
token b << $$ = 22; >>
|
||||
token one /1/;
|
||||
token two /2/;
|
||||
token comma /,/ <<
|
||||
$$ = 42;
|
||||
>>
|
||||
token lparen /\\(/;
|
||||
token rparen /\\)/;
|
||||
drop /\\s+/;
|
||||
|
||||
Start -> Items;
|
||||
|
||||
Items -> Item ItemsMore;
|
||||
Items -> ;
|
||||
|
||||
ItemsMore -> comma Item ItemsMore;
|
||||
ItemsMore -> ;
|
||||
|
||||
Item -> a;
|
||||
Item -> b;
|
||||
Item -> lparen Item rparen;
|
||||
Item -> Dual;
|
||||
|
||||
Dual -> One Two;
|
||||
Dual -> Two One;
|
||||
One -> one;
|
||||
Two -> two;
|
||||
EOF
|
||||
run_propane(language: language)
|
||||
compile("spec/test_ast_ps.#{language}", language: language)
|
||||
results = run_test
|
||||
expect(results.stderr).to eq ""
|
||||
expect(results.status).to eq 0
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
55
spec/test_ast_ps.c
Normal file
55
spec/test_ast_ps.c
Normal file
@ -0,0 +1,55 @@
|
||||
#include "testparser.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "testutils.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
char const * input = "a, ((b)), b";
|
||||
p_context_t context;
|
||||
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||
assert_eq(P_SUCCESS, p_parse(&context));
|
||||
PStartS * start = p_result(&context);
|
||||
assert(start->pItems1 != NULL);
|
||||
assert(start->pItems != NULL);
|
||||
PItemsS * items = start->pItems;
|
||||
assert(items->pItem != NULL);
|
||||
assert(items->pItem->pToken1 != NULL);
|
||||
assert_eq(TOKEN_a, items->pItem->pToken1->token);
|
||||
assert_eq(11, items->pItem->pToken1->pvalue);
|
||||
assert(items->pItemsMore != NULL);
|
||||
PItemsMoreS * itemsmore = items->pItemsMore;
|
||||
assert(itemsmore->pItem != NULL);
|
||||
assert(itemsmore->pItem->pItem != NULL);
|
||||
assert(itemsmore->pItem->pItem->pItem != NULL);
|
||||
assert(itemsmore->pItem->pItem->pItem->pToken1 != NULL);
|
||||
assert_eq(TOKEN_b, itemsmore->pItem->pItem->pItem->pToken1->token);
|
||||
assert_eq(22, itemsmore->pItem->pItem->pItem->pToken1->pvalue);
|
||||
assert(itemsmore->pItemsMore != NULL);
|
||||
itemsmore = itemsmore->pItemsMore;
|
||||
assert(itemsmore->pItem != NULL);
|
||||
assert(itemsmore->pItem->pToken1 != NULL);
|
||||
assert_eq(TOKEN_b, itemsmore->pItem->pToken1->token);
|
||||
assert_eq(22, itemsmore->pItem->pToken1->pvalue);
|
||||
assert(itemsmore->pItemsMore == NULL);
|
||||
|
||||
input = "";
|
||||
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||
assert_eq(P_SUCCESS, p_parse(&context));
|
||||
start = p_result(&context);
|
||||
assert(start->pItems == NULL);
|
||||
|
||||
input = "2 1";
|
||||
p_context_init(&context, (uint8_t const *)input, strlen(input));
|
||||
assert_eq(P_SUCCESS, p_parse(&context));
|
||||
start = p_result(&context);
|
||||
assert(start->pItems != NULL);
|
||||
assert(start->pItems->pItem != NULL);
|
||||
assert(start->pItems->pItem->pDual != NULL);
|
||||
assert(start->pItems->pItem->pDual->pTwo1 != NULL);
|
||||
assert(start->pItems->pItem->pDual->pOne2 != NULL);
|
||||
assert(start->pItems->pItem->pDual->pTwo2 == NULL);
|
||||
assert(start->pItems->pItem->pDual->pOne1 == NULL);
|
||||
|
||||
return 0;
|
||||
}
|
57
spec/test_ast_ps.d
Normal file
57
spec/test_ast_ps.d
Normal file
@ -0,0 +1,57 @@
|
||||
import testparser;
|
||||
import std.stdio;
|
||||
import testutils;
|
||||
|
||||
int main()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
string input = "a, ((b)), b";
|
||||
p_context_t context;
|
||||
p_context_init(&context, input);
|
||||
assert_eq(P_SUCCESS, p_parse(&context));
|
||||
PStartS * start = p_result(&context);
|
||||
assert(start.pItems1 !is null);
|
||||
assert(start.pItems !is null);
|
||||
PItemsS * items = start.pItems;
|
||||
assert(items.pItem !is null);
|
||||
assert(items.pItem.pToken1 !is null);
|
||||
assert_eq(TOKEN_a, items.pItem.pToken1.token);
|
||||
assert_eq(11, items.pItem.pToken1.pvalue);
|
||||
assert(items.pItemsMore !is null);
|
||||
PItemsMoreS * itemsmore = items.pItemsMore;
|
||||
assert(itemsmore.pItem !is null);
|
||||
assert(itemsmore.pItem.pItem !is null);
|
||||
assert(itemsmore.pItem.pItem.pItem !is null);
|
||||
assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
|
||||
assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token);
|
||||
assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue);
|
||||
assert(itemsmore.pItemsMore !is null);
|
||||
itemsmore = itemsmore.pItemsMore;
|
||||
assert(itemsmore.pItem !is null);
|
||||
assert(itemsmore.pItem.pToken1 !is null);
|
||||
assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token);
|
||||
assert_eq(22, itemsmore.pItem.pToken1.pvalue);
|
||||
assert(itemsmore.pItemsMore is null);
|
||||
|
||||
input = "";
|
||||
p_context_init(&context, input);
|
||||
assert_eq(P_SUCCESS, p_parse(&context));
|
||||
start = p_result(&context);
|
||||
assert(start.pItems is null);
|
||||
|
||||
input = "2 1";
|
||||
p_context_init(&context, input);
|
||||
assert_eq(P_SUCCESS, p_parse(&context));
|
||||
start = p_result(&context);
|
||||
assert(start.pItems !is null);
|
||||
assert(start.pItems.pItem !is null);
|
||||
assert(start.pItems.pItem.pDual !is null);
|
||||
assert(start.pItems.pItem.pDual.pTwo1 !is null);
|
||||
assert(start.pItems.pItem.pDual.pOne2 !is null);
|
||||
assert(start.pItems.pItem.pDual.pTwo2 is null);
|
||||
assert(start.pItems.pItem.pDual.pOne1 is null);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user