Add rule field aliases - #24

This commit is contained in:
Josh Holtrop 2024-07-22 17:29:54 -04:00
parent 9746b3f2bf
commit a7348be95d
10 changed files with 261 additions and 27 deletions

View File

@ -234,15 +234,15 @@ drop /\\s+/;
Start -> Items; Start -> Items;
Items -> Item ItemsMore; Items -> Item:item ItemsMore;
Items -> ; Items -> ;
ItemsMore -> comma Item ItemsMore; ItemsMore -> comma Item:item ItemsMore;
ItemsMore -> ; ItemsMore -> ;
Item -> a; Item -> a;
Item -> b; Item -> b;
Item -> lparen Item rparen; Item -> lparen Item:item rparen;
Item -> Dual; Item -> Dual;
Dual -> One Two; Dual -> One Two;
@ -263,24 +263,24 @@ Start * start = p_result(&context);
assert(start.pItems1 !is null); assert(start.pItems1 !is null);
assert(start.pItems !is null); assert(start.pItems !is null);
Items * items = start.pItems; Items * items = start.pItems;
assert(items.pItem !is null); assert(items.item !is null);
assert(items.pItem.pToken1 !is null); assert(items.item.pToken1 !is null);
assert_eq(TOKEN_a, items.pItem.pToken1.token); assert_eq(TOKEN_a, items.item.pToken1.token);
assert_eq(11, items.pItem.pToken1.pvalue); assert_eq(11, items.item.pToken1.pvalue);
assert(items.pItemsMore !is null); assert(items.pItemsMore !is null);
ItemsMore * itemsmore = items.pItemsMore; ItemsMore * itemsmore = items.pItemsMore;
assert(itemsmore.pItem !is null); assert(itemsmore.item !is null);
assert(itemsmore.pItem.pItem !is null); assert(itemsmore.item.item !is null);
assert(itemsmore.pItem.pItem.pItem !is null); assert(itemsmore.item.item.item !is null);
assert(itemsmore.pItem.pItem.pItem.pToken1 !is null); assert(itemsmore.item.item.item.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token); assert_eq(TOKEN_b, itemsmore.item.item.item.pToken1.token);
assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue); assert_eq(22, itemsmore.item.item.item.pToken1.pvalue);
assert(itemsmore.pItemsMore !is null); assert(itemsmore.pItemsMore !is null);
itemsmore = itemsmore.pItemsMore; itemsmore = itemsmore.pItemsMore;
assert(itemsmore.pItem !is null); assert(itemsmore.item !is null);
assert(itemsmore.pItem.pToken1 !is null); assert(itemsmore.item.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token); assert_eq(TOKEN_b, itemsmore.item.pToken1.token);
assert_eq(22, itemsmore.pItem.pToken1.pvalue); assert_eq(22, itemsmore.item.pToken1.pvalue);
assert(itemsmore.pItemsMore is null); assert(itemsmore.pItemsMore is null);
``` ```
@ -607,6 +607,10 @@ This can be changed with the `start` statement.
The grammar file must define a rule with the name of the start rule name which The grammar file must define a rule with the name of the start rule name which
will be used as the top-level starting rule that the parser attempts to reduce. will be used as the top-level starting rule that the parser attempts to reduce.
Rule statements are composed of the name of the rule, a `->` token, the fields
defining the rule pattern that must be matched, and a terminating semicolon or
user code block.
Example: Example:
``` ```
@ -635,9 +639,13 @@ E4 -> lparen E1 rparen << $$ = $2; >>
This example uses the default start rule name of `Start`. This example uses the default start rule name of `Start`.
A parser rule has zero or more terms on the right side of its definition. A parser rule has zero or more fields on the right side of its definition.
Each of these terms is either a token name or a rule name. Each of these fields is either a token name or a rule name.
A term can be immediately followed by a `?` character to signify that it is A field can optionally be followed by a `:` and then a field alias name.
If present, the field alias name is used to refer to the field value in user
code blocks, or if AST mode is active, the field alias name is used as the
field name in the generated AST node structure.
A field can be immediately followed by a `?` character to signify that it is
optional. optional.
Another example: Another example:
@ -647,14 +655,16 @@ token private;
token int; token int;
token ident /[a-zA-Z_][a-zA-Z_0-9]*/; token ident /[a-zA-Z_][a-zA-Z_0-9]*/;
token semicolon /;/; token semicolon /;/;
IntegerDeclaration -> Visibility? int ident semicolon; IntegerDeclaration -> Visibility? int ident:name semicolon;
Visibility -> public; Visibility -> public;
Visibility -> private; Visibility -> private;
``` ```
In a parser rule code block, parser values for the right side terms are In a parser rule code block, parser values for the right side fields are
accessible as `$1` for the first term's parser value, `$2` for the second accessible as `$1` for the first field's parser value, `$2` for the second
term's parser value, etc... field's parser value, etc...
For the `IntegerDeclaration` rule, the third field value can also be referred
to as `${name}`.
The `$$` symbol accesses the output parser value for this rule. The `$$` symbol accesses the output parser value for this rule.
The above examples demonstrate how the parser values for the rule components The above examples demonstrate how the parser values for the rule components
can be used to produce the parser value for the accepted rule. can be used to produce the parser value for the accepted rule.
@ -849,6 +859,19 @@ If the first rule is matched, then `pOne1` and `pTwo2` will be non-null while
`pTwo1` and `pOne2` will be null. `pTwo1` and `pOne2` will be null.
If the second rule is matched instead, then the opposite would be the case. If the second rule is matched instead, then the opposite would be the case.
If a field alias is present in a rule definition, an additional field will be
generated in the AST node with the field alias name.
For example:
```
Exp -> Exp:left plus ExpB:right;
```
In the generated `Exp` structure, the fields `pExp`, `pExp1`, and `left` will
all point to the same child node (an instance of the `Exp` structure), and the
fields `pExpB`, `pExpB3`, and `right` will all point to the same child node
(an instance of the `ExpB` structure).
##> Functions ##> Functions
### `p_context_init` ### `p_context_init`

View File

@ -276,6 +276,19 @@ class Propane
"statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}" "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
end end
end end
code = code.gsub(/\$\{(\w+)\}/) do |match|
aliasname = $1
if index = rule.aliases[aliasname]
case @language
when "c"
"state_values_stack_index(statevalues, -(int)n_states + #{index})->pvalue.v_#{rule.components[index].ptypename}"
when "d"
"statevalues[$-n_states+#{index}].pvalue.v_#{rule.components[index].ptypename}"
end
else
raise Error.new("Field alias '#{aliasname}' not found")
end
end
else else
code = code.gsub(/\$\$/) do |match| code = code.gsub(/\$\$/) do |match|
if @grammar.ast if @grammar.ast

View File

@ -198,7 +198,7 @@ class Propane
if @ast && ptypename if @ast && ptypename
raise Error.new("Multiple ptypes are unsupported in AST mode") raise Error.new("Multiple ptypes are unsupported in AST mode")
end end
md = consume!(/((?:#{IDENTIFIER_REGEX}\??\s*)*)\s*/, "expected rule component list") md = consume!(/((?:#{IDENTIFIER_REGEX}(?::#{IDENTIFIER_REGEX})?\??\s*)*)\s*/, "expected rule component list")
components = md[1].strip.split(/\s+/) components = md[1].strip.split(/\s+/)
if @ast if @ast
consume!(/;/, "expected `;'") consume!(/;/, "expected `;'")

View File

@ -6,6 +6,10 @@ class Propane
# Rule components. # Rule components.
attr_reader :components attr_reader :components
# @return [Hash]
# Field aliases.
attr_reader :aliases
# @return [String] # @return [String]
# User code associated with the rule. # User code associated with the rule.
attr_reader :code attr_reader :code
@ -49,7 +53,19 @@ class Propane
# Line number where the rule was defined in the input grammar. # Line number where the rule was defined in the input grammar.
def initialize(name, components, code, ptypename, line_number) def initialize(name, components, code, ptypename, line_number)
@name = name @name = name
@components = components @aliases = {}
@components = components.each_with_index.map do |component, i|
if component =~ /(\S+):(\S+)/
c, aliasname = $1, $2
if @aliases[aliasname]
raise Error.new("Error: duplicate field alias `#{aliasname}` for rule #{name} defined on line #{line_number}")
end
@aliases[aliasname] = i
c
else
component
end
end
@rule_set_node_field_index_map = components.map {0} @rule_set_node_field_index_map = components.map {0}
@code = code @code = code
@ptypename = ptypename @ptypename = ptypename

View File

@ -100,8 +100,10 @@ class Propane
# Finalize a RuleSet after adding all Rules to it. # Finalize a RuleSet after adding all Rules to it.
def finalize(grammar) def finalize(grammar)
if grammar.ast
build_ast_fields(grammar) build_ast_fields(grammar)
end end
end
private private
@ -148,6 +150,18 @@ class Propane
"#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}" "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
end end
end end
# Now merge in the field aliases as given by the user in the
# grammar.
field_aliases = {}
@rules.each do |rule|
rule.aliases.each do |alias_name, index|
if field_aliases[alias_name] && field_aliases[alias_name] != index
raise Error.new("Error: conflicting AST node field positions for alias `#{alias_name}`")
end
field_aliases[alias_name] = index
@ast_fields[index][alias_name] = @ast_fields[index].first[1]
end
end
end end
end end

View File

@ -213,6 +213,42 @@ EOF
expect(File.binread("spec/run/testparser.log")).to match %r{Shift/Reduce conflict \(state \d+\) between token b and rule As2\? \(defined on line 4\)} expect(File.binread("spec/run/testparser.log")).to match %r{Shift/Reduce conflict \(state \d+\) between token b and rule As2\? \(defined on line 4\)}
end end
it "errors on duplicate field aliases in a rule" do
write_grammar <<EOF
token a;
token b;
Start -> a:foo b:foo;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to match %r{Error: duplicate field alias `foo` for rule Start defined on line 3}
expect(results.status).to_not eq 0
end
it "errors when an alias is in different positions for different rules in a rule set when AST mode is enabled" do
write_grammar <<EOF
ast;
token a;
token b;
Start -> a:foo b;
Start -> b b:foo;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to match %r{Error: conflicting AST node field positions for alias `foo`}
expect(results.status).to_not eq 0
end
it "does not error when an alias is in different positions for different rules in a rule set when AST mode is not enabled" do
write_grammar <<EOF
token a;
token b;
Start -> a:foo b;
Start -> b b:foo;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
%w[d c].each do |language| %w[d c].each do |language|
context "#{language.upcase} language" do context "#{language.upcase} language" do
@ -1120,6 +1156,70 @@ EOF
expect(results.stderr).to eq "" expect(results.stderr).to eq ""
expect(results.status).to eq 0 expect(results.status).to eq 0
end end
it "allows specifying field aliases in AST mode" do
write_grammar <<EOF
ast;
token a;
token b;
token c;
drop /\\s+/;
Start -> T:first T:second T:third;
T -> a;
T -> b;
T -> c;
EOF
run_propane(language: language)
compile("spec/test_ast_field_aliases.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "allows specifying field aliases when AST mode is not enabled" do
if language == "d"
write_grammar <<EOF
<<
import std.stdio;
>>
ptype string;
token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
$$ = match;
>>
drop /\\s+/;
Start -> id:first id:second <<
writeln("first is ", ${first});
writeln("second is ", ${second});
>>
EOF
else
write_grammar <<EOF
<<
#include <stdio.h>
#include <string.h>
>>
ptype char const *;
token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
char * s = malloc(match_length + 1);
strncpy(s, (char const *)match, match_length);
s[match_length] = 0;
$$ = s;
>>
drop /\\s+/;
Start -> id:first id:second <<
printf("first is %s\\n", ${first});
printf("second is %s\\n", ${second});
>>
EOF
end
run_propane(language: language)
compile("spec/test_field_aliases.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
expect(results.stdout).to match /first is foo1.*second is bar2/m
end
end end
end end
end end

View File

@ -0,0 +1,19 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "\na\nb\nc";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(TOKEN_a, start->first->pToken->token);
assert_eq(TOKEN_b, start->second->pToken->token);
assert_eq(TOKEN_c, start->third->pToken->token);
return 0;
}

View File

@ -0,0 +1,21 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "\na\nb\nc";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(TOKEN_a, start.first.pToken.token);
assert_eq(TOKEN_b, start.second.pToken.token);
assert_eq(TOKEN_c, start.third.pToken.token);
}

13
spec/test_field_aliases.c Normal file
View File

@ -0,0 +1,13 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "foo1\nbar2";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
return 0;
}

15
spec/test_field_aliases.d Normal file
View File

@ -0,0 +1,15 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = "foo1\nbar2";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
}