Compare commits

...

10 Commits

Author SHA1 Message Date
fec2c28693 Only calculate lookahead tokens when needed - #28
Lookahead tokens are only need if either:
(1) There is more than one rule that could be reduced in a given parser
state, or
(2) There are shift actions for a state and at least one rule that could
be reduced in the same state (to warn about shift/reduce conflicts).
2024-07-26 22:08:25 -04:00
61339aeae9 Avoid recalculating reduce_rules - #28 2024-07-26 21:36:41 -04:00
95b3dc6550 Cache ItemSet#next_symbols - #28 2024-07-25 20:33:15 -04:00
74d94fef72 Do not build ItemSet follow sets - #28 2024-07-25 20:02:00 -04:00
588c5e21c7 Cache ItemSet#leading_item_sets return values - #28 2024-07-25 10:42:43 -04:00
5f1c306273 Update CLI usage in README 2024-07-22 21:35:32 -04:00
343e8a7f9e v1.5.0 2024-07-22 21:23:38 -04:00
b3a134bf8d Update vim syntax to highlight "?" and field alias names 2024-07-22 20:39:59 -04:00
4a71dc74fb Update CHANGELOG for v1.5.0 2024-07-22 20:26:04 -04:00
a7348be95d Add rule field aliases - #24 2024-07-22 20:16:52 -04:00
16 changed files with 328 additions and 113 deletions

View File

@ -2,7 +2,14 @@
### New Features
- Track token position in AST Token node
- Track start and end text positions for tokens and rules in AST node structures (#27)
- Add warnings for shift/reduce conflicts to log file (#25)
- Add -w command line switch to treat warnings as errors and output to stderr (#26)
- Add rule field aliases (#24)
### Improvements
- Show line numbers of rules on conflict (#23)
## v1.4.0

View File

@ -31,9 +31,14 @@ Propane is typically invoked from the command-line as `./propane`.
Usage: ./propane [options] <input-file> <output-file>
Options:
--log LOG Write log file
--version Show program version and exit
-h, --help Show this usage and exit
-h, --help Show this usage and exit.
--log LOG Write log file. This will show all parser states and their
associated shifts and reduces. It can be helpful when
debugging a grammar.
--version Show program version and exit.
-w Treat warnings as errors. This option will treat shift/reduce
conflicts as fatal errors and will print them to stderr in
addition to the log file.
The user must specify the path to a Propane input grammar file and a path to an
output file.

View File

@ -234,15 +234,15 @@ drop /\\s+/;
Start -> Items;
Items -> Item ItemsMore;
Items -> Item:item ItemsMore;
Items -> ;
ItemsMore -> comma Item ItemsMore;
ItemsMore -> comma Item:item ItemsMore;
ItemsMore -> ;
Item -> a;
Item -> b;
Item -> lparen Item rparen;
Item -> lparen Item:item rparen;
Item -> Dual;
Dual -> One Two;
@ -263,24 +263,24 @@ Start * start = p_result(&context);
assert(start.pItems1 !is null);
assert(start.pItems !is null);
Items * items = start.pItems;
assert(items.pItem !is null);
assert(items.pItem.pToken1 !is null);
assert_eq(TOKEN_a, items.pItem.pToken1.token);
assert_eq(11, items.pItem.pToken1.pvalue);
assert(items.item !is null);
assert(items.item.pToken1 !is null);
assert_eq(TOKEN_a, items.item.pToken1.token);
assert_eq(11, items.item.pToken1.pvalue);
assert(items.pItemsMore !is null);
ItemsMore * itemsmore = items.pItemsMore;
assert(itemsmore.pItem !is null);
assert(itemsmore.pItem.pItem !is null);
assert(itemsmore.pItem.pItem.pItem !is null);
assert(itemsmore.pItem.pItem.pItem.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token);
assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue);
assert(itemsmore.item !is null);
assert(itemsmore.item.item !is null);
assert(itemsmore.item.item.item !is null);
assert(itemsmore.item.item.item.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.item.item.item.pToken1.token);
assert_eq(22, itemsmore.item.item.item.pToken1.pvalue);
assert(itemsmore.pItemsMore !is null);
itemsmore = itemsmore.pItemsMore;
assert(itemsmore.pItem !is null);
assert(itemsmore.pItem.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token);
assert_eq(22, itemsmore.pItem.pToken1.pvalue);
assert(itemsmore.item !is null);
assert(itemsmore.item.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.item.pToken1.token);
assert_eq(22, itemsmore.item.pToken1.pvalue);
assert(itemsmore.pItemsMore is null);
```
@ -607,6 +607,10 @@ This can be changed with the `start` statement.
The grammar file must define a rule with the name of the start rule name which
will be used as the top-level starting rule that the parser attempts to reduce.
Rule statements are composed of the name of the rule, a `->` token, the fields
defining the rule pattern that must be matched, and a terminating semicolon or
user code block.
Example:
```
@ -635,9 +639,13 @@ E4 -> lparen E1 rparen << $$ = $2; >>
This example uses the default start rule name of `Start`.
A parser rule has zero or more terms on the right side of its definition.
Each of these terms is either a token name or a rule name.
A term can be immediately followed by a `?` character to signify that it is
A parser rule has zero or more fields on the right side of its definition.
Each of these fields is either a token name or a rule name.
A field can optionally be followed by a `:` and then a field alias name.
If present, the field alias name is used to refer to the field value in user
code blocks, or if AST mode is active, the field alias name is used as the
field name in the generated AST node structure.
A field can be immediately followed by a `?` character to signify that it is
optional.
Another example:
@ -647,14 +655,16 @@ token private;
token int;
token ident /[a-zA-Z_][a-zA-Z_0-9]*/;
token semicolon /;/;
IntegerDeclaration -> Visibility? int ident semicolon;
IntegerDeclaration -> Visibility? int ident:name semicolon;
Visibility -> public;
Visibility -> private;
```
In a parser rule code block, parser values for the right side terms are
accessible as `$1` for the first term's parser value, `$2` for the second
term's parser value, etc...
In a parser rule code block, parser values for the right side fields are
accessible as `$1` for the first field's parser value, `$2` for the second
field's parser value, etc...
For the `IntegerDeclaration` rule, the third field value can also be referred
to as `${name}`.
The `$$` symbol accesses the output parser value for this rule.
The above examples demonstrate how the parser values for the rule components
can be used to produce the parser value for the accepted rule.
@ -849,6 +859,19 @@ If the first rule is matched, then `pOne1` and `pTwo2` will be non-null while
`pTwo1` and `pOne2` will be null.
If the second rule is matched instead, then the opposite would be the case.
If a field alias is present in a rule definition, an additional field will be
generated in the AST node with the field alias name.
For example:
```
Exp -> Exp:left plus ExpB:right;
```
In the generated `Exp` structure, the fields `pExp`, `pExp1`, and `left` will
all point to the same child node (an instance of the `Exp` structure), and the
fields `pExpB`, `pExpB3`, and `right` will all point to the same child node
(an instance of the `ExpB` structure).
##> Functions
### `p_context_init`

View File

@ -17,6 +17,9 @@ syn region propaneTarget matchgroup=propaneDelimiter start="<<" end=">>$" contai
syn match propaneComment "#.*"
syn match propaneOperator "->"
syn match propaneFieldAlias ":[a-zA-Z0-9_]\+" contains=propaneFieldOperator
syn match propaneFieldOperator ":" contained
syn match propaneOperator "?"
syn keyword propaneKeyword ast ast_prefix ast_suffix drop module prefix ptype start token tokenid
syn region propaneRegex start="/" end="/" skip="\\/"
@ -25,4 +28,6 @@ hi def link propaneComment Comment
hi def link propaneKeyword Keyword
hi def link propaneRegex String
hi def link propaneOperator Operator
hi def link propaneFieldOperator Operator
hi def link propaneDelimiter Delimiter
hi def link propaneFieldAlias Identifier

View File

@ -276,6 +276,19 @@ class Propane
"statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
end
end
code = code.gsub(/\$\{(\w+)\}/) do |match|
aliasname = $1
if index = rule.aliases[aliasname]
case @language
when "c"
"state_values_stack_index(statevalues, -(int)n_states + #{index})->pvalue.v_#{rule.components[index].ptypename}"
when "d"
"statevalues[$-n_states+#{index}].pvalue.v_#{rule.components[index].ptypename}"
end
else
raise Error.new("Field alias '#{aliasname}' not found")
end
end
else
code = code.gsub(/\$\$/) do |match|
if @grammar.ast

View File

@ -198,7 +198,7 @@ class Propane
if @ast && ptypename
raise Error.new("Multiple ptypes are unsupported in AST mode")
end
md = consume!(/((?:#{IDENTIFIER_REGEX}\??\s*)*)\s*/, "expected rule component list")
md = consume!(/((?:#{IDENTIFIER_REGEX}(?::#{IDENTIFIER_REGEX})?\??\s*)*)\s*/, "expected rule component list")
components = md[1].strip.split(/\s+/)
if @ast
consume!(/;/, "expected `;'")

View File

@ -39,7 +39,6 @@ class Propane
end
build_reduce_actions!
build_follow_sets!
build_tables!
write_log!
if @warnings.size > 0 && @options[:warnings_as_errors]
@ -66,10 +65,10 @@ class Propane
state_id: state_id,
}
end
if item_set.reduce_actions
unless item_set.reduce_rules.empty?
shift_entries.each do |shift_entry|
token = shift_entry[:symbol]
if item_set.reduce_actions.include?(token)
if get_lookahead_reduce_actions_for_item_set(item_set).include?(token)
rule = item_set.reduce_actions[token]
@warnings << "Shift/Reduce conflict (state #{item_set.id}) between token #{token.name} and rule #{rule.name} (defined on line #{rule.line_number})"
end
@ -115,7 +114,7 @@ class Propane
# @return [void]
def build_reduce_actions!
@item_sets.each do |item_set|
item_set.reduce_actions = build_reduce_actions_for_item_set(item_set)
build_reduce_actions_for_item_set(item_set)
end
end
@ -124,27 +123,36 @@ class Propane
# @param item_set [ItemSet]
# ItemSet (parser state)
#
# @return [nil, Hash]
# If no reduce actions are possible for the given item set, nil.
# Otherwise, a mapping of lookahead Tokens to the Rules to reduce.
# @return [void]
def build_reduce_actions_for_item_set(item_set)
# To build the reduce actions, we start by looking at any
# "complete" items, i.e., items where the parse position is at the
# end of a rule. These are the only rules that are candidates for
# reduction in the current ItemSet.
reduce_rules = Set.new(item_set.items.select(&:complete?).map(&:rule))
item_set.reduce_rules = Set.new(item_set.items.select(&:complete?).map(&:rule))
if reduce_rules.size == 1
item_set.reduce_rule = reduce_rules.first
if item_set.reduce_rules.size == 1
item_set.reduce_rule = item_set.reduce_rules.first
end
if reduce_rules.size == 0
nil
else
build_lookahead_reduce_actions_for_item_set(item_set)
if item_set.reduce_rules.size > 1
# Force item_set.reduce_actions to be built to store the lookahead
# tokens for the possible reduce rules if there is more than one.
get_lookahead_reduce_actions_for_item_set(item_set)
end
end
# Get the reduce actions for a single item set (parser state).
#
# @param item_set [ItemSet]
# ItemSet (parser state)
#
# @return [Hash]
# Mapping of lookahead Tokens to the Rules to reduce.
def get_lookahead_reduce_actions_for_item_set(item_set)
item_set.reduce_actions ||= build_lookahead_reduce_actions_for_item_set(item_set)
end
# Build the reduce actions for a single item set (parser state).
#
# @param item_set [ItemSet]
@ -153,15 +161,13 @@ class Propane
# @return [Hash]
# Mapping of lookahead Tokens to the Rules to reduce.
def build_lookahead_reduce_actions_for_item_set(item_set)
reduce_rules = Set.new(item_set.items.select(&:complete?).map(&:rule))
# We will be looking for all possible tokens that can follow instances of
# these rules. Rather than looking through the entire grammar for the
# possible following tokens, we will only look in the item sets leading
# up to this one. This restriction gives us a more precise lookahead set,
# and allows us to parse LALR grammars.
item_sets = Set[item_set] + item_set.leading_item_sets
reduce_rules.reduce({}) do |reduce_actions, reduce_rule|
item_set.reduce_rules.reduce({}) do |reduce_actions, reduce_rule|
lookahead_tokens_for_rule = build_lookahead_tokens_to_reduce(reduce_rule, item_sets)
lookahead_tokens_for_rule.each do |lookahead_token|
if existing_reduce_rule = reduce_actions[lookahead_token]
@ -233,51 +239,6 @@ class Propane
lookahead_tokens
end
# Build the follow sets for each ItemSet.
#
# @return [void]
def build_follow_sets!
@item_sets.each do |item_set|
item_set.follow_set = build_follow_set_for_item_set(item_set)
end
end
# Build the follow set for the given ItemSet.
#
# @param item_set [ItemSet]
# The ItemSet to build the follow set for.
#
# @return [Set]
# Follow set for the given ItemSet.
def build_follow_set_for_item_set(item_set)
follow_set = Set.new
rule_sets_to_check_after = Set.new
item_set.items.each do |item|
(1..).each do |offset|
case symbol = item.next_symbol(offset)
when nil
rule_sets_to_check_after << item.rule.rule_set
break
when Token
follow_set << symbol
break
when RuleSet
follow_set += symbol.start_token_set
unless symbol.could_be_empty?
break
end
end
end
end
reduce_lookaheads = build_lookahead_reduce_actions_for_item_set(item_set)
reduce_lookaheads.each do |token, rule_set|
if rule_sets_to_check_after.include?(rule_set)
follow_set << token
end
end
follow_set
end
def write_log!
@log.puts Util.banner("Parser Rules")
@grammar.rules.each do |rule|

View File

@ -2,7 +2,7 @@ class Propane
class Parser
# Represent a parser "item set", which is a set of possible items that the
# parser could currently be parsing.
# parser could currently be parsing. This is equivalent to a parser state.
class ItemSet
# @return [Set<Item>]
@ -25,15 +25,15 @@ class Propane
# Rule to reduce if there is only one possibility.
attr_accessor :reduce_rule
# @return [Set<Rule>]
# Set of rules that could be reduced in this parser state.
attr_accessor :reduce_rules
# @return [nil, Hash]
# Reduce actions, mapping lookahead tokens to rules, if there is
# more than one rule that could be reduced.
attr_accessor :reduce_actions
# @return [Set<Token>]
# Follow set for the ItemSet.
attr_accessor :follow_set
# Build an ItemSet.
#
# @param items [Array<Item>]
@ -50,7 +50,7 @@ class Propane
# @return [Set<Token, RuleSet>]
# Set of next symbols for all Items in this ItemSet.
def next_symbols
Set.new(@items.map(&:next_symbol).compact)
@_next_symbols ||= Set.new(@items.map(&:next_symbol).compact)
end
# Build a next ItemSet for the given next symbol.
@ -99,6 +99,8 @@ class Propane
# @return [Set<ItemSet>]
# Set of all ItemSets that lead up to this ItemSet.
def leading_item_sets
@_leading_item_sets ||=
begin
result = Set.new
eval_sets = Set[self]
evaled = Set.new
@ -115,6 +117,7 @@ class Propane
end
result
end
end
# Represent the ItemSet as a String.
#

View File

@ -6,6 +6,10 @@ class Propane
# Rule components.
attr_reader :components
# @return [Hash]
# Field aliases.
attr_reader :aliases
# @return [String]
# User code associated with the rule.
attr_reader :code
@ -49,7 +53,19 @@ class Propane
# Line number where the rule was defined in the input grammar.
def initialize(name, components, code, ptypename, line_number)
@name = name
@components = components
@aliases = {}
@components = components.each_with_index.map do |component, i|
if component =~ /(\S+):(\S+)/
c, aliasname = $1, $2
if @aliases[aliasname]
raise Error.new("Error: duplicate field alias `#{aliasname}` for rule #{name} defined on line #{line_number}")
end
@aliases[aliasname] = i
c
else
component
end
end
@rule_set_node_field_index_map = components.map {0}
@code = code
@ptypename = ptypename

View File

@ -100,8 +100,10 @@ class Propane
# Finalize a RuleSet after adding all Rules to it.
def finalize(grammar)
if grammar.ast
build_ast_fields(grammar)
end
end
private
@ -148,6 +150,18 @@ class Propane
"#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
end
end
# Now merge in the field aliases as given by the user in the
# grammar.
field_aliases = {}
@rules.each do |rule|
rule.aliases.each do |alias_name, index|
if field_aliases[alias_name] && field_aliases[alias_name] != index
raise Error.new("Error: conflicting AST node field positions for alias `#{alias_name}`")
end
field_aliases[alias_name] = index
@ast_fields[index][alias_name] = @ast_fields[index].first[1]
end
end
end
end

View File

@ -1,3 +1,3 @@
class Propane
VERSION = "1.4.0"
VERSION = "1.5.0"
end

View File

@ -213,6 +213,42 @@ EOF
expect(File.binread("spec/run/testparser.log")).to match %r{Shift/Reduce conflict \(state \d+\) between token b and rule As2\? \(defined on line 4\)}
end
it "errors on duplicate field aliases in a rule" do
write_grammar <<EOF
token a;
token b;
Start -> a:foo b:foo;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to match %r{Error: duplicate field alias `foo` for rule Start defined on line 3}
expect(results.status).to_not eq 0
end
it "errors when an alias is in different positions for different rules in a rule set when AST mode is enabled" do
write_grammar <<EOF
ast;
token a;
token b;
Start -> a:foo b;
Start -> b b:foo;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to match %r{Error: conflicting AST node field positions for alias `foo`}
expect(results.status).to_not eq 0
end
it "does not error when an alias is in different positions for different rules in a rule set when AST mode is not enabled" do
write_grammar <<EOF
token a;
token b;
Start -> a:foo b;
Start -> b b:foo;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
%w[d c].each do |language|
context "#{language.upcase} language" do
@ -1120,6 +1156,70 @@ EOF
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "allows specifying field aliases in AST mode" do
write_grammar <<EOF
ast;
token a;
token b;
token c;
drop /\\s+/;
Start -> T:first T:second T:third;
T -> a;
T -> b;
T -> c;
EOF
run_propane(language: language)
compile("spec/test_ast_field_aliases.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "allows specifying field aliases when AST mode is not enabled" do
if language == "d"
write_grammar <<EOF
<<
import std.stdio;
>>
ptype string;
token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
$$ = match;
>>
drop /\\s+/;
Start -> id:first id:second <<
writeln("first is ", ${first});
writeln("second is ", ${second});
>>
EOF
else
write_grammar <<EOF
<<
#include <stdio.h>
#include <string.h>
>>
ptype char const *;
token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
char * s = malloc(match_length + 1);
strncpy(s, (char const *)match, match_length);
s[match_length] = 0;
$$ = s;
>>
drop /\\s+/;
Start -> id:first id:second <<
printf("first is %s\\n", ${first});
printf("second is %s\\n", ${second});
>>
EOF
end
run_propane(language: language)
compile("spec/test_field_aliases.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
expect(results.stdout).to match /first is foo1.*second is bar2/m
end
end
end
end

View File

@ -0,0 +1,19 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "\na\nb\nc";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(TOKEN_a, start->first->pToken->token);
assert_eq(TOKEN_b, start->second->pToken->token);
assert_eq(TOKEN_c, start->third->pToken->token);
return 0;
}

View File

@ -0,0 +1,21 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "\na\nb\nc";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(TOKEN_a, start.first.pToken.token);
assert_eq(TOKEN_b, start.second.pToken.token);
assert_eq(TOKEN_c, start.third.pToken.token);
}

13
spec/test_field_aliases.c Normal file
View File

@ -0,0 +1,13 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "foo1\nbar2";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
return 0;
}

15
spec/test_field_aliases.d Normal file
View File

@ -0,0 +1,15 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = "foo1\nbar2";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
}