Compare commits

...

27 Commits

Author SHA1 Message Date
c24f323ff0 v1.5.1 2024-07-26 22:30:48 -04:00
fec2c28693 Only calculate lookahead tokens when needed - #28
Lookahead tokens are only need if either:
(1) There is more than one rule that could be reduced in a given parser
state, or
(2) There are shift actions for a state and at least one rule that could
be reduced in the same state (to warn about shift/reduce conflicts).
2024-07-26 22:08:25 -04:00
61339aeae9 Avoid recalculating reduce_rules - #28 2024-07-26 21:36:41 -04:00
95b3dc6550 Cache ItemSet#next_symbols - #28 2024-07-25 20:33:15 -04:00
74d94fef72 Do not build ItemSet follow sets - #28 2024-07-25 20:02:00 -04:00
588c5e21c7 Cache ItemSet#leading_item_sets return values - #28 2024-07-25 10:42:43 -04:00
5f1c306273 Update CLI usage in README 2024-07-22 21:35:32 -04:00
343e8a7f9e v1.5.0 2024-07-22 21:23:38 -04:00
b3a134bf8d Update vim syntax to highlight "?" and field alias names 2024-07-22 20:39:59 -04:00
4a71dc74fb Update CHANGELOG for v1.5.0 2024-07-22 20:26:04 -04:00
a7348be95d Add rule field aliases - #24 2024-07-22 20:16:52 -04:00
9746b3f2bf Document position tracking fields in user guide - #27 2024-07-21 14:04:51 -04:00
c5b8fc28bd Move INVALID_POSITION from header to C source - #27 2024-07-21 13:39:34 -04:00
092fce61eb Test position validity for empty matching rules - #27 2024-07-21 13:39:30 -04:00
e647248e34 Track start and end position of rules in AST nodes - #27 2024-07-19 15:37:37 -04:00
f4ae1b8601 Add position fields to AST nodes (not populated yet) - #27 2024-07-19 14:34:50 -04:00
eae2e17f41 Test tracking token end positions when the token spans a newline - #27 2024-07-18 12:09:26 -04:00
87d6d29d60 Store token end position - #27 2024-07-18 12:03:44 -04:00
3aced70356 Show line numbers of rules upon conflict - close #23 2024-07-14 20:52:52 -04:00
2dd89445fc Add command line switch to output warnings to stderr - close #26 2024-07-14 15:36:07 -04:00
4ae5ab79b3 Warn on shift/reduce conflicts 2024-07-13 21:35:53 -04:00
69cc8fa67d Always compute lookahead tokens for reduce rules
Even if they won't be needed for the generated parser, they'll be useful
to detect shift/reduce conflicts.
2024-07-13 21:01:44 -04:00
7f3eb8f315 Calculate follow token set for an ItemSet 2024-07-13 20:48:28 -04:00
d76e12fea1 Rename "following" to "next" - #25
The term "following" could potentially imply an association with the
"follow set", however it was used in a non-closed manner.
2024-07-08 10:14:09 -04:00
911e9505b7 Track token position in AST Token node 2024-05-27 22:10:05 -04:00
aaeb0c4db1 Remove leftover TODO from earlier restructuring 2024-05-27 20:44:42 -04:00
fd89c5c6b3 Add Vim syntax highlighting files for Propane 2024-05-26 14:49:30 -04:00
29 changed files with 1166 additions and 193 deletions

View File

@ -1,3 +1,22 @@
## v1.5.1
### Improvements
- Improve performance (#28)
## v1.5.0
### New Features
- Track start and end text positions for tokens and rules in AST node structures (#27)
- Add warnings for shift/reduce conflicts to log file (#25)
- Add -w command line switch to treat warnings as errors and output to stderr (#26)
- Add rule field aliases (#24)
### Improvements
- Show line numbers of rules on conflict (#23)
## v1.4.0 ## v1.4.0
### New Features ### New Features

View File

@ -31,9 +31,14 @@ Propane is typically invoked from the command-line as `./propane`.
Usage: ./propane [options] <input-file> <output-file> Usage: ./propane [options] <input-file> <output-file>
Options: Options:
--log LOG Write log file -h, --help Show this usage and exit.
--version Show program version and exit --log LOG Write log file. This will show all parser states and their
-h, --help Show this usage and exit associated shifts and reduces. It can be helpful when
debugging a grammar.
--version Show program version and exit.
-w Treat warnings as errors. This option will treat shift/reduce
conflicts as fatal errors and will print them to stderr in
addition to the log file.
The user must specify the path to a Propane input grammar file and a path to an The user must specify the path to a Propane input grammar file and a path to an
output file. output file.

View File

@ -226,7 +226,10 @@ typedef struct
/** Number of bytes of input text used to match. */ /** Number of bytes of input text used to match. */
size_t length; size_t length;
/** Input text position delta. */ /** Input text position delta to end of token. */
<%= @grammar.prefix %>position_t end_delta_position;
/** Input text position delta to next code point after token end. */
<%= @grammar.prefix %>position_t delta_position; <%= @grammar.prefix %>position_t delta_position;
/** Accepting lexer state from the match. */ /** Accepting lexer state from the match. */
@ -358,6 +361,7 @@ static size_t find_longest_match(<%= @grammar.prefix %>context_t * context,
if (transition_state != INVALID_LEXER_STATE_ID) if (transition_state != INVALID_LEXER_STATE_ID)
{ {
attempt_match.length += code_point_length; attempt_match.length += code_point_length;
attempt_match.end_delta_position = attempt_match.delta_position;
if (code_point == '\n') if (code_point == '\n')
{ {
attempt_match.delta_position.row++; attempt_match.delta_position.row++;
@ -444,7 +448,6 @@ static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @
<%= @grammar.prefix %>token_info_t token_info = {0}; <%= @grammar.prefix %>token_info_t token_info = {0};
token_info.position = context->text_position; token_info.position = context->text_position;
token_info.token = INVALID_TOKEN_ID; token_info.token = INVALID_TOKEN_ID;
*out_token_info = token_info; // TODO: remove
lexer_match_info_t match_info; lexer_match_info_t match_info;
size_t unexpected_input_length; size_t unexpected_input_length;
size_t result = find_longest_match(context, &match_info, &unexpected_input_length); size_t result = find_longest_match(context, &match_info, &unexpected_input_length);
@ -491,11 +494,22 @@ static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @
} }
token_info.token = token_to_accept; token_info.token = token_to_accept;
token_info.length = match_info.length; token_info.length = match_info.length;
if (match_info.end_delta_position.row != 0u)
{
token_info.end_position.row = token_info.position.row + match_info.end_delta_position.row;
token_info.end_position.col = match_info.end_delta_position.col;
}
else
{
token_info.end_position.row = token_info.position.row;
token_info.end_position.col = token_info.position.col + match_info.end_delta_position.col;
}
*out_token_info = token_info; *out_token_info = token_info;
return P_SUCCESS; return P_SUCCESS;
case P_EOF: case P_EOF:
token_info.token = TOKEN___EOF; token_info.token = TOKEN___EOF;
token_info.end_position = token_info.position;
*out_token_info = token_info; *out_token_info = token_info;
return P_SUCCESS; return P_SUCCESS;
@ -552,6 +566,9 @@ size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%=
* Parser * Parser
*************************************************************************/ *************************************************************************/
/** Invalid position value. */
#define INVALID_POSITION (<%= @grammar.prefix %>position_t){0xFFFFFFFFu, 0xFFFFFFFFu}
/** Reduce ID type. */ /** Reduce ID type. */
typedef <%= get_type_for(@parser.reduce_table.size) %> reduce_id_t; typedef <%= get_type_for(@parser.reduce_table.size) %> reduce_id_t;
@ -667,10 +684,18 @@ typedef struct
<% end %> <% end %>
} state_value_t; } state_value_t;
/** Common AST node structure. */
typedef struct
{
<%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position;
void * fields[];
} ASTNode;
/** Parser shift table. */ /** Parser shift table. */
static const shift_t parser_shift_table[] = { static const shift_t parser_shift_table[] = {
<% @parser.shift_table.each do |shift| %> <% @parser.shift_table.each do |shift| %>
{<%= shift[:symbol_id] %>u, <%= shift[:state_id] %>u}, {<%= shift[:symbol].id %>u, <%= shift[:state_id] %>u},
<% end %> <% end %>
}; };
@ -950,6 +975,8 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.ast %> <% if @grammar.ast %>
<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>)); <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = malloc(sizeof(<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>));
token_ast_node->position = token_info.position;
token_ast_node->end_position = token_info.end_position;
token_ast_node->token = token; token_ast_node->token = token;
token_ast_node->pvalue = token_info.pvalue; token_ast_node->pvalue = token_info.pvalue;
state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node; state_values_stack_index(&statevalues, -1)->ast_node = token_ast_node;
@ -984,22 +1011,43 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context)
} }
else if (parser_reduce_table[reduce_index].n_states > 0) else if (parser_reduce_table[reduce_index].n_states > 0)
{ {
void ** node_fields = calloc(parser_reduce_table[reduce_index].rule_set_node_field_array_size, sizeof(void *)); size_t n_fields = parser_reduce_table[reduce_index].rule_set_node_field_array_size;
ASTNode * node = (ASTNode *)malloc(sizeof(ASTNode) + n_fields * sizeof(void *));
node->position = INVALID_POSITION;
node->end_position = INVALID_POSITION;
for (size_t i = 0; i < n_fields; i++)
{
node->fields[i] = NULL;
}
if (parser_reduce_table[reduce_index].rule_set_node_field_index_map == NULL) if (parser_reduce_table[reduce_index].rule_set_node_field_index_map == NULL)
{ {
for (size_t i = 0; i < parser_reduce_table[reduce_index].n_states; i++) for (size_t i = 0; i < parser_reduce_table[reduce_index].n_states; i++)
{ {
node_fields[i] = state_values_stack_index(&statevalues, -(int)parser_reduce_table[reduce_index].n_states + (int)i)->ast_node; node->fields[i] = state_values_stack_index(&statevalues, -(int)parser_reduce_table[reduce_index].n_states + (int)i)->ast_node;
} }
} }
else else
{ {
for (size_t i = 0; i < parser_reduce_table[reduce_index].n_states; i++) for (size_t i = 0; i < parser_reduce_table[reduce_index].n_states; i++)
{ {
node_fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = state_values_stack_index(&statevalues, -(int)parser_reduce_table[reduce_index].n_states + (int)i)->ast_node; node->fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = state_values_stack_index(&statevalues, -(int)parser_reduce_table[reduce_index].n_states + (int)i)->ast_node;
} }
} }
reduced_parser_node = node_fields; bool position_found = false;
for (size_t i = 0; i < n_fields; i++)
{
ASTNode * child = (ASTNode *)node->fields[i];
if ((child != NULL) && <%= @grammar.prefix %>position_valid(child->position))
{
if (!position_found)
{
node->position = child->position;
position_found = true;
}
node->end_position = child->end_position;
}
}
reduced_parser_node = node;
} }
else else
{ {

View File

@ -8,6 +8,8 @@
module <%= @grammar.modulename %>; module <%= @grammar.modulename %>;
<% end %> <% end %>
import core.stdc.stdlib : malloc;
/************************************************************************** /**************************************************************************
* User code blocks * User code blocks
*************************************************************************/ *************************************************************************/
@ -49,6 +51,29 @@ public enum : <%= @grammar.prefix %>token_t
/** Code point type. */ /** Code point type. */
public alias <%= @grammar.prefix %>code_point_t = uint; public alias <%= @grammar.prefix %>code_point_t = uint;
/**
* A structure to keep track of input position.
*
* This is useful for reporting errors, etc...
*/
public struct <%= @grammar.prefix %>position_t
{
/** Input text row (0-based). */
uint row;
/** Input text column (0-based). */
uint col;
/** Invalid position value. */
enum INVALID = <%= @grammar.prefix %>position_t(0xFFFF_FFFF, 0xFFFF_FFFF);
/** Return whether the position is valid. */
public @property bool valid()
{
return row != 0xFFFF_FFFFu;
}
}
<% if @grammar.ast %> <% if @grammar.ast %>
/** Parser values type. */ /** Parser values type. */
public alias <%= @grammar.prefix %>value_t = <%= @grammar.ptype %>; public alias <%= @grammar.prefix %>value_t = <%= @grammar.ptype %>;
@ -63,9 +88,20 @@ public union <%= @grammar.prefix %>value_t
<% end %> <% end %>
<% if @grammar.ast %> <% if @grammar.ast %>
/** Common AST node structure. */
private struct ASTNode
{
<%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position;
void *[0] fields;
}
/** AST node types. @{ */ /** AST node types. @{ */
public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
{ {
/* ASTNode fields must be present in the same order here. */
<%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position;
<%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue; <%= @grammar.prefix %>value_t pvalue;
} }
@ -75,6 +111,8 @@ public struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
<% next if rule_set.optional? %> <% next if rule_set.optional? %>
public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %> public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
{ {
<%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position;
<% rule_set.ast_fields.each do |fields| %> <% rule_set.ast_fields.each do |fields| %>
union union
{ {
@ -89,26 +127,15 @@ public struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
/** @} */ /** @} */
<% end %> <% end %>
/**
* A structure to keep track of parser position.
*
* This is useful for reporting errors, etc...
*/
public struct <%= @grammar.prefix %>position_t
{
/** Input text row (0-based). */
uint row;
/** Input text column (0-based). */
uint col;
}
/** Lexed token information. */ /** Lexed token information. */
public struct <%= @grammar.prefix %>token_info_t public struct <%= @grammar.prefix %>token_info_t
{ {
/** Text position where the token was found. */ /** Text position of first code point in token. */
<%= @grammar.prefix %>position_t position; <%= @grammar.prefix %>position_t position;
/** Text position of last code point in token. */
<%= @grammar.prefix %>position_t end_position;
/** Number of input bytes used by the token. */ /** Number of input bytes used by the token. */
size_t length; size_t length;
@ -372,7 +399,10 @@ private struct lexer_match_info_t
/** Number of bytes of input text used to match. */ /** Number of bytes of input text used to match. */
size_t length; size_t length;
/** Input text position delta. */ /** Input text position delta to end of token. */
<%= @grammar.prefix %>position_t end_delta_position;
/** Input text position delta to next code point after token end. */
<%= @grammar.prefix %>position_t delta_position; <%= @grammar.prefix %>position_t delta_position;
/** Accepting lexer state from the match. */ /** Accepting lexer state from the match. */
@ -500,6 +530,7 @@ private size_t find_longest_match(<%= @grammar.prefix %>context_t * context,
if (transition_state != INVALID_LEXER_STATE_ID) if (transition_state != INVALID_LEXER_STATE_ID)
{ {
attempt_match.length += code_point_length; attempt_match.length += code_point_length;
attempt_match.end_delta_position = attempt_match.delta_position;
if (code_point == '\n') if (code_point == '\n')
{ {
attempt_match.delta_position.row++; attempt_match.delta_position.row++;
@ -586,7 +617,6 @@ private size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%=
<%= @grammar.prefix %>token_info_t token_info; <%= @grammar.prefix %>token_info_t token_info;
token_info.position = context.text_position; token_info.position = context.text_position;
token_info.token = INVALID_TOKEN_ID; token_info.token = INVALID_TOKEN_ID;
*out_token_info = token_info; // TODO: remove
lexer_match_info_t match_info; lexer_match_info_t match_info;
size_t unexpected_input_length; size_t unexpected_input_length;
size_t result = find_longest_match(context, &match_info, &unexpected_input_length); size_t result = find_longest_match(context, &match_info, &unexpected_input_length);
@ -633,11 +663,22 @@ private size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%=
} }
token_info.token = token_to_accept; token_info.token = token_to_accept;
token_info.length = match_info.length; token_info.length = match_info.length;
if (match_info.end_delta_position.row != 0u)
{
token_info.end_position.row = token_info.position.row + match_info.end_delta_position.row;
token_info.end_position.col = match_info.end_delta_position.col;
}
else
{
token_info.end_position.row = token_info.position.row;
token_info.end_position.col = token_info.position.col + match_info.end_delta_position.col;
}
*out_token_info = token_info; *out_token_info = token_info;
return P_SUCCESS; return P_SUCCESS;
case P_EOF: case P_EOF:
token_info.token = TOKEN___EOF; token_info.token = TOKEN___EOF;
token_info.end_position = token_info.position;
*out_token_info = token_info; *out_token_info = token_info;
return P_SUCCESS; return P_SUCCESS;
@ -817,7 +858,7 @@ private struct state_value_t
/** Parser shift table. */ /** Parser shift table. */
private immutable shift_t[] parser_shift_table = [ private immutable shift_t[] parser_shift_table = [
<% @parser.shift_table.each do |shift| %> <% @parser.shift_table.each do |shift| %>
shift_t(<%= shift[:symbol_id] %>u, <%= shift[:state_id] %>u), shift_t(<%= shift[:symbol].id %>u, <%= shift[:state_id] %>u),
<% end %> <% end %>
]; ];
@ -997,7 +1038,7 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
{ {
/* We shifted a token, mark it consumed. */ /* We shifted a token, mark it consumed. */
<% if @grammar.ast %> <% if @grammar.ast %>
<%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token, token_info.pvalue); <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> * token_ast_node = new <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>(token_info.position, token_info.end_position, token, token_info.pvalue);
statevalues[$-1].ast_node = token_ast_node; statevalues[$-1].ast_node = token_ast_node;
<% else %> <% else %>
statevalues[$-1].pvalue = token_info.pvalue; statevalues[$-1].pvalue = token_info.pvalue;
@ -1030,26 +1071,43 @@ public size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * cont
} }
else if (parser_reduce_table[reduce_index].n_states > 0) else if (parser_reduce_table[reduce_index].n_states > 0)
{ {
void *[] node_fields = new void *[parser_reduce_table[reduce_index].rule_set_node_field_array_size]; size_t n_fields = parser_reduce_table[reduce_index].rule_set_node_field_array_size;
foreach (i; 0..parser_reduce_table[reduce_index].rule_set_node_field_array_size) ASTNode * node = cast(ASTNode *)malloc(ASTNode.sizeof + n_fields * (void *).sizeof);
node.position = <%= @grammar.prefix %>position_t.INVALID;
node.end_position = <%= @grammar.prefix %>position_t.INVALID;
foreach (i; 0..n_fields)
{ {
node_fields[i] = null; node.fields[i] = null;
} }
if (parser_reduce_table[reduce_index].rule_set_node_field_index_map is null) if (parser_reduce_table[reduce_index].rule_set_node_field_index_map is null)
{ {
foreach (i; 0..parser_reduce_table[reduce_index].n_states) foreach (i; 0..parser_reduce_table[reduce_index].n_states)
{ {
node_fields[i] = statevalues[$ - parser_reduce_table[reduce_index].n_states + i].ast_node; node.fields[i] = statevalues[$ - parser_reduce_table[reduce_index].n_states + i].ast_node;
} }
} }
else else
{ {
foreach (i; 0..parser_reduce_table[reduce_index].n_states) foreach (i; 0..parser_reduce_table[reduce_index].n_states)
{ {
node_fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = statevalues[$ - parser_reduce_table[reduce_index].n_states + i].ast_node; node.fields[parser_reduce_table[reduce_index].rule_set_node_field_index_map[i]] = statevalues[$ - parser_reduce_table[reduce_index].n_states + i].ast_node;
} }
} }
reduced_parser_node = node_fields.ptr; bool position_found = false;
foreach (i; 0..n_fields)
{
ASTNode * child = cast(ASTNode *)node.fields[i];
if (child && child.position.valid)
{
if (!position_found)
{
node.position = child.position;
position_found = true;
}
node.end_position = child.end_position;
}
}
reduced_parser_node = node;
} }
else else
{ {

View File

@ -38,6 +38,23 @@ typedef <%= get_type_for(@grammar.terminate_token_id) %> <%= @grammar.prefix %>t
/** Code point type. */ /** Code point type. */
typedef uint32_t <%= @grammar.prefix %>code_point_t; typedef uint32_t <%= @grammar.prefix %>code_point_t;
/**
* A structure to keep track of input position.
*
* This is useful for reporting errors, etc...
*/
typedef struct
{
/** Input text row (0-based). */
uint32_t row;
/** Input text column (0-based). */
uint32_t col;
} <%= @grammar.prefix %>position_t;
/** Return whether the position is valid. */
#define <%= @grammar.prefix %>position_valid(p) ((p).row != 0xFFFFFFFFu)
/** User header code blocks. */ /** User header code blocks. */
<%= @grammar.code_blocks.fetch("header", "") %> <%= @grammar.code_blocks.fetch("header", "") %>
@ -58,6 +75,9 @@ typedef union
/** AST node types. @{ */ /** AST node types. @{ */
typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %> typedef struct <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>
{ {
/* ASTNode fields must be present in the same order here. */
<%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position;
<%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>token_t token;
<%= @grammar.prefix %>value_t pvalue; <%= @grammar.prefix %>value_t pvalue;
} <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>; } <%= @grammar.ast_prefix %>Token<%= @grammar.ast_suffix %>;
@ -73,6 +93,8 @@ struct <%= name %>;
<% next if rule_set.optional? %> <% next if rule_set.optional? %>
typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %> typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
{ {
<%= @grammar.prefix %>position_t position;
<%= @grammar.prefix %>position_t end_position;
<% rule_set.ast_fields.each do |fields| %> <% rule_set.ast_fields.each do |fields| %>
union union
{ {
@ -87,26 +109,15 @@ typedef struct <%= @grammar.ast_prefix %><%= name %><%= @grammar.ast_suffix %>
/** @} */ /** @} */
<% end %> <% end %>
/**
* A structure to keep track of parser position.
*
* This is useful for reporting errors, etc...
*/
typedef struct
{
/** Input text row (0-based). */
uint32_t row;
/** Input text column (0-based). */
uint32_t col;
} <%= @grammar.prefix %>position_t;
/** Lexed token information. */ /** Lexed token information. */
typedef struct typedef struct
{ {
/** Text position where the token was found. */ /** Text position of first code point in token. */
<%= @grammar.prefix %>position_t position; <%= @grammar.prefix %>position_t position;
/** Text position of last code point in token. */
<%= @grammar.prefix %>position_t end_position;
/** Number of input bytes used by the token. */ /** Number of input bytes used by the token. */
size_t length; size_t length;

View File

@ -15,6 +15,7 @@ Propane is a LALR Parser Generator (LPG) which:
* generates a table-driven shift/reduce parser to parse input in linear time * generates a table-driven shift/reduce parser to parse input in linear time
* targets C or D language outputs * targets C or D language outputs
* optionally supports automatic full AST generation * optionally supports automatic full AST generation
* tracks input text start and end positions for all matched tokens/rules
* is MIT-licensed * is MIT-licensed
* is distributable as a standalone Ruby script * is distributable as a standalone Ruby script
@ -35,9 +36,14 @@ Propane is typically invoked from the command-line as `./propane`.
Usage: ./propane [options] <input-file> <output-file> Usage: ./propane [options] <input-file> <output-file>
Options: Options:
--log LOG Write log file -h, --help Show this usage and exit.
--version Show program version and exit --log LOG Write log file. This will show all parser states and their
-h, --help Show this usage and exit associated shifts and reduces. It can be helpful when
debugging a grammar.
--version Show program version and exit.
-w Treat warnings as errors. This option will treat shift/reduce
conflicts as fatal errors and will print them to stderr in
addition to the log file.
The user must specify the path to a Propane input grammar file and a path to an The user must specify the path to a Propane input grammar file and a path to an
output file. output file.
@ -228,15 +234,15 @@ drop /\\s+/;
Start -> Items; Start -> Items;
Items -> Item ItemsMore; Items -> Item:item ItemsMore;
Items -> ; Items -> ;
ItemsMore -> comma Item ItemsMore; ItemsMore -> comma Item:item ItemsMore;
ItemsMore -> ; ItemsMore -> ;
Item -> a; Item -> a;
Item -> b; Item -> b;
Item -> lparen Item rparen; Item -> lparen Item:item rparen;
Item -> Dual; Item -> Dual;
Dual -> One Two; Dual -> One Two;
@ -257,24 +263,24 @@ Start * start = p_result(&context);
assert(start.pItems1 !is null); assert(start.pItems1 !is null);
assert(start.pItems !is null); assert(start.pItems !is null);
Items * items = start.pItems; Items * items = start.pItems;
assert(items.pItem !is null); assert(items.item !is null);
assert(items.pItem.pToken1 !is null); assert(items.item.pToken1 !is null);
assert_eq(TOKEN_a, items.pItem.pToken1.token); assert_eq(TOKEN_a, items.item.pToken1.token);
assert_eq(11, items.pItem.pToken1.pvalue); assert_eq(11, items.item.pToken1.pvalue);
assert(items.pItemsMore !is null); assert(items.pItemsMore !is null);
ItemsMore * itemsmore = items.pItemsMore; ItemsMore * itemsmore = items.pItemsMore;
assert(itemsmore.pItem !is null); assert(itemsmore.item !is null);
assert(itemsmore.pItem.pItem !is null); assert(itemsmore.item.item !is null);
assert(itemsmore.pItem.pItem.pItem !is null); assert(itemsmore.item.item.item !is null);
assert(itemsmore.pItem.pItem.pItem.pToken1 !is null); assert(itemsmore.item.item.item.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.pItem.pItem.pItem.pToken1.token); assert_eq(TOKEN_b, itemsmore.item.item.item.pToken1.token);
assert_eq(22, itemsmore.pItem.pItem.pItem.pToken1.pvalue); assert_eq(22, itemsmore.item.item.item.pToken1.pvalue);
assert(itemsmore.pItemsMore !is null); assert(itemsmore.pItemsMore !is null);
itemsmore = itemsmore.pItemsMore; itemsmore = itemsmore.pItemsMore;
assert(itemsmore.pItem !is null); assert(itemsmore.item !is null);
assert(itemsmore.pItem.pToken1 !is null); assert(itemsmore.item.pToken1 !is null);
assert_eq(TOKEN_b, itemsmore.pItem.pToken1.token); assert_eq(TOKEN_b, itemsmore.item.pToken1.token);
assert_eq(22, itemsmore.pItem.pToken1.pvalue); assert_eq(22, itemsmore.item.pToken1.pvalue);
assert(itemsmore.pItemsMore is null); assert(itemsmore.pItemsMore is null);
``` ```
@ -502,7 +508,7 @@ tokenid str;
mystringvalue = ""; mystringvalue = "";
$mode(string); $mode(string);
>> >>
string: /[^"]+/ << mystringvalue += match; >> string: /[^"]+/ << mystringvalue ~= match; >>
string: /"/ << string: /"/ <<
$mode(default); $mode(default);
return $token(str); return $token(str);
@ -601,6 +607,10 @@ This can be changed with the `start` statement.
The grammar file must define a rule with the name of the start rule name which The grammar file must define a rule with the name of the start rule name which
will be used as the top-level starting rule that the parser attempts to reduce. will be used as the top-level starting rule that the parser attempts to reduce.
Rule statements are composed of the name of the rule, a `->` token, the fields
defining the rule pattern that must be matched, and a terminating semicolon or
user code block.
Example: Example:
``` ```
@ -629,9 +639,13 @@ E4 -> lparen E1 rparen << $$ = $2; >>
This example uses the default start rule name of `Start`. This example uses the default start rule name of `Start`.
A parser rule has zero or more terms on the right side of its definition. A parser rule has zero or more fields on the right side of its definition.
Each of these terms is either a token name or a rule name. Each of these fields is either a token name or a rule name.
A term can be immediately followed by a `?` character to signify that it is A field can optionally be followed by a `:` and then a field alias name.
If present, the field alias name is used to refer to the field value in user
code blocks, or if AST mode is active, the field alias name is used as the
field name in the generated AST node structure.
A field can be immediately followed by a `?` character to signify that it is
optional. optional.
Another example: Another example:
@ -641,14 +655,16 @@ token private;
token int; token int;
token ident /[a-zA-Z_][a-zA-Z_0-9]*/; token ident /[a-zA-Z_][a-zA-Z_0-9]*/;
token semicolon /;/; token semicolon /;/;
IntegerDeclaration -> Visibility? int ident semicolon; IntegerDeclaration -> Visibility? int ident:name semicolon;
Visibility -> public; Visibility -> public;
Visibility -> private; Visibility -> private;
``` ```
In a parser rule code block, parser values for the right side terms are In a parser rule code block, parser values for the right side fields are
accessible as `$1` for the first term's parser value, `$2` for the second accessible as `$1` for the first field's parser value, `$2` for the second
term's parser value, etc... field's parser value, etc...
For the `IntegerDeclaration` rule, the third field value can also be referred
to as `${name}`.
The `$$` symbol accesses the output parser value for this rule. The `$$` symbol accesses the output parser value for this rule.
The above examples demonstrate how the parser values for the rule components The above examples demonstrate how the parser values for the rule components
can be used to produce the parser value for the accepted rule. can be used to produce the parser value for the accepted rule.
@ -762,6 +778,13 @@ A pointer to this instance is passed to the generated functions.
The `p_position_t` structure contains two fields `row` and `col`. The `p_position_t` structure contains two fields `row` and `col`.
These fields contain the 0-based row and column describing a parser position. These fields contain the 0-based row and column describing a parser position.
For D targets, the `p_position_t` structure can be checked for validity by
querying the `valid` property.
For C targets, the `p_position_t` structure can be checked for validity by
calling `p_position_valid(pos)` where `pos` is a `p_position_t` structure
instance.
### AST Node Types ### AST Node Types
If AST generation mode is enabled, a structure type for each rule will be If AST generation mode is enabled, a structure type for each rule will be
@ -772,13 +795,26 @@ AST node which refers to a raw parser token rather than a composite rule.
#### AST Node Fields #### AST Node Fields
A `Token` node has two fields: All AST nodes have a `position` field specifying the text position of the
beginning of the matched token or rule, and an `end_position` field specifying
the text position of the end of the matched token or rule.
Each of these fields are instances of the `p_position_t` structure.
A `Token` node will always have a valid `position` and `end_position`.
A rule node may not have valid positions if the rule allows for an empty match.
In this case the `position` structure should be checked for validity before
using it.
For C targets this can be accomplished with
`if (p_position_valid(node->position))` and for D targets this can be
accomplished with `if (node.position.valid)`.
A `Token` node has the following additional fields:
* `token` which specifies which token was parsed (one of `TOKEN_*`) * `token` which specifies which token was parsed (one of `TOKEN_*`)
* `pvalue` which specifies the parser value for the token. If a lexer user * `pvalue` which specifies the parser value for the token. If a lexer user
code block assigned to `$$`, the assigned value will be stored here. code block assigned to `$$`, the assigned value will be stored here.
The other generated AST node structures have fields generated based on the AST node structures for rules contain generated fields based on the
right hand side components specified for all rules of a given name. right hand side components specified for all rules of a given name.
In this example: In this example:
@ -802,7 +838,7 @@ The `Items` structure will have fields:
If a rule can be empty (for example in the second `Items` rule above), then If a rule can be empty (for example in the second `Items` rule above), then
an instance of a pointer to that rule's generated AST node will be null if the an instance of a pointer to that rule's generated AST node will be null if the
parser matches the empty rule definition. parser matches the empty rule pattern.
The non-positional AST node field pointer will not be generated if there are The non-positional AST node field pointer will not be generated if there are
multiple positions in which an instance of the node it points to could be multiple positions in which an instance of the node it points to could be
@ -823,6 +859,19 @@ If the first rule is matched, then `pOne1` and `pTwo2` will be non-null while
`pTwo1` and `pOne2` will be null. `pTwo1` and `pOne2` will be null.
If the second rule is matched instead, then the opposite would be the case. If the second rule is matched instead, then the opposite would be the case.
If a field alias is present in a rule definition, an additional field will be
generated in the AST node with the field alias name.
For example:
```
Exp -> Exp:left plus ExpB:right;
```
In the generated `Exp` structure, the fields `pExp`, `pExp1`, and `left` will
all point to the same child node (an instance of the `Exp` structure), and the
fields `pExpB`, `pExpB3`, and `right` will all point to the same child node
(an instance of the `ExpB` structure).
##> Functions ##> Functions
### `p_context_init` ### `p_context_init`
@ -859,6 +908,24 @@ p_context_init(&context, input, input_length);
size_t result = p_parse(&context); size_t result = p_parse(&context);
``` ```
### `p_position_valid`
The `p_position_valid()` function is only generated for C targets.
it is used to determine whether or not a `p_position_t` structure is valid.
Example:
```
if (p_position_valid(node->position))
{
....
}
```
For D targets, rather than using `p_position_valid()`, the `valid` property
function of the `p_position_t` structure can be queried
(e.g. `if (node.position.valid)`).
### `p_result` ### `p_result`
The `p_result()` function can be used to retrieve the final parse value after The `p_result()` function can be used to retrieve the final parse value after

View File

@ -0,0 +1 @@
au BufNewFile,BufRead *.propane set filetype=propane

View File

@ -0,0 +1,33 @@
" Vim syntax file for Propane
" Language: propane
" Maintainer: Josh Holtrop
" URL: https://github.com/holtrop/propane
if exists("b:current_syntax")
finish
endif
if !exists("b:propane_subtype")
let b:propane_subtype = "d"
endif
exe "syn include @propaneTarget syntax/".b:propane_subtype.".vim"
syn region propaneTarget matchgroup=propaneDelimiter start="<<" end=">>$" contains=@propaneTarget keepend
syn match propaneComment "#.*"
syn match propaneOperator "->"
syn match propaneFieldAlias ":[a-zA-Z0-9_]\+" contains=propaneFieldOperator
syn match propaneFieldOperator ":" contained
syn match propaneOperator "?"
syn keyword propaneKeyword ast ast_prefix ast_suffix drop module prefix ptype start token tokenid
syn region propaneRegex start="/" end="/" skip="\\/"
hi def link propaneComment Comment
hi def link propaneKeyword Keyword
hi def link propaneRegex String
hi def link propaneOperator Operator
hi def link propaneFieldOperator Operator
hi def link propaneDelimiter Delimiter
hi def link propaneFieldAlias Identifier

View File

@ -31,10 +31,10 @@ class Propane
class << self class << self
def run(input_file, output_file, log_file) def run(input_file, output_file, log_file, options)
begin begin
grammar = Grammar.new(File.read(input_file)) grammar = Grammar.new(File.read(input_file))
generator = Generator.new(grammar, output_file, log_file) generator = Generator.new(grammar, output_file, log_file, options)
generator.generate generator.generate
rescue Error => e rescue Error => e
$stderr.puts e.message $stderr.puts e.message

View File

@ -4,15 +4,21 @@ class Propane
USAGE = <<EOF USAGE = <<EOF
Usage: #{$0} [options] <input-file> <output-file> Usage: #{$0} [options] <input-file> <output-file>
Options: Options:
--log LOG Write log file -h, --help Show this usage and exit.
--version Show program version and exit --log LOG Write log file. This will show all parser states and their
-h, --help Show this usage and exit associated shifts and reduces. It can be helpful when
debugging a grammar.
--version Show program version and exit.
-w Treat warnings as errors. This option will treat shift/reduce
conflicts as fatal errors and will print them to stderr in
addition to the log file.
EOF EOF
class << self class << self
def run(args) def run(args)
params = [] params = []
options = {}
log_file = nil log_file = nil
i = 0 i = 0
while i < args.size while i < args.size
@ -29,6 +35,8 @@ EOF
when "-h", "--help" when "-h", "--help"
puts USAGE puts USAGE
return 0 return 0
when "-w"
options[:warnings_as_errors] = true
when /^-/ when /^-/
$stderr.puts "Error: unknown option #{arg}" $stderr.puts "Error: unknown option #{arg}"
return 1 return 1
@ -45,7 +53,7 @@ EOF
$stderr.puts "Error: cannot read #{params[0]}" $stderr.puts "Error: cannot read #{params[0]}"
return 2 return 2
end end
Propane.run(*params, log_file) Propane.run(*params, log_file, options)
end end
end end

View File

@ -2,7 +2,7 @@ class Propane
class Generator class Generator
def initialize(grammar, output_file, log_file) def initialize(grammar, output_file, log_file, options)
@grammar = grammar @grammar = grammar
@output_file = output_file @output_file = output_file
if log_file if log_file
@ -16,6 +16,7 @@ class Propane
else else
"d" "d"
end end
@options = options
process_grammar! process_grammar!
end end
@ -129,7 +130,7 @@ class Propane
# Generate the lexer. # Generate the lexer.
@lexer = Lexer.new(@grammar) @lexer = Lexer.new(@grammar)
# Generate the parser. # Generate the parser.
@parser = Parser.new(@grammar, rule_sets, @log) @parser = Parser.new(@grammar, rule_sets, @log, @options)
end end
# Check that any referenced ptypes have been defined. # Check that any referenced ptypes have been defined.
@ -275,6 +276,19 @@ class Propane
"statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}" "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}"
end end
end end
code = code.gsub(/\$\{(\w+)\}/) do |match|
aliasname = $1
if index = rule.aliases[aliasname]
case @language
when "c"
"state_values_stack_index(statevalues, -(int)n_states + #{index})->pvalue.v_#{rule.components[index].ptypename}"
when "d"
"statevalues[$-n_states+#{index}].pvalue.v_#{rule.components[index].ptypename}"
end
else
raise Error.new("Field alias '#{aliasname}' not found")
end
end
else else
code = code.gsub(/\$\$/) do |match| code = code.gsub(/\$\$/) do |match|
if @grammar.ast if @grammar.ast

View File

@ -198,7 +198,7 @@ class Propane
if @ast && ptypename if @ast && ptypename
raise Error.new("Multiple ptypes are unsupported in AST mode") raise Error.new("Multiple ptypes are unsupported in AST mode")
end end
md = consume!(/((?:#{IDENTIFIER_REGEX}\??\s*)*)\s*/, "expected rule component list") md = consume!(/((?:#{IDENTIFIER_REGEX}(?::#{IDENTIFIER_REGEX})?\??\s*)*)\s*/, "expected rule component list")
components = md[1].strip.split(/\s+/) components = md[1].strip.split(/\s+/)
if @ast if @ast
consume!(/;/, "expected `;'") consume!(/;/, "expected `;'")

View File

@ -7,12 +7,14 @@ class Propane
attr_reader :reduce_table attr_reader :reduce_table
attr_reader :rule_sets attr_reader :rule_sets
def initialize(grammar, rule_sets, log) def initialize(grammar, rule_sets, log, options)
@grammar = grammar @grammar = grammar
@rule_sets = rule_sets @rule_sets = rule_sets
@log = log @log = log
@item_sets = [] @item_sets = []
@item_sets_set = {} @item_sets_set = {}
@warnings = Set.new
@options = options
start_item = Item.new(grammar.rules.first, 0) start_item = Item.new(grammar.rules.first, 0)
eval_item_sets = Set[ItemSet.new([start_item])] eval_item_sets = Set[ItemSet.new([start_item])]
@ -23,10 +25,10 @@ class Propane
item_set.id = @item_sets.size item_set.id = @item_sets.size
@item_sets << item_set @item_sets << item_set
@item_sets_set[item_set] = item_set @item_sets_set[item_set] = item_set
item_set.following_symbols.each do |following_symbol| item_set.next_symbols.each do |next_symbol|
unless following_symbol.name == "$EOF" unless next_symbol.name == "$EOF"
following_set = item_set.build_following_item_set(following_symbol) next_item_set = item_set.build_next_item_set(next_symbol)
eval_item_sets << following_set eval_item_sets << next_item_set
end end
end end
end end
@ -37,8 +39,11 @@ class Propane
end end
build_reduce_actions! build_reduce_actions!
write_log!
build_tables! build_tables!
write_log!
if @warnings.size > 0 && @options[:warnings_as_errors]
raise Error.new("Fatal errors (-w):\n" + @warnings.join("\n"))
end
end end
private private
@ -48,26 +53,34 @@ class Propane
@shift_table = [] @shift_table = []
@reduce_table = [] @reduce_table = []
@item_sets.each do |item_set| @item_sets.each do |item_set|
shift_entries = item_set.following_symbols.map do |following_symbol| shift_entries = item_set.next_symbols.map do |next_symbol|
state_id = state_id =
if following_symbol.name == "$EOF" if next_symbol.name == "$EOF"
0 0
else else
item_set.following_item_set[following_symbol].id item_set.next_item_set[next_symbol].id
end end
{ {
symbol_id: following_symbol.id, symbol: next_symbol,
state_id: state_id, state_id: state_id,
} }
end end
unless item_set.reduce_rules.empty?
shift_entries.each do |shift_entry|
token = shift_entry[:symbol]
if get_lookahead_reduce_actions_for_item_set(item_set).include?(token)
rule = item_set.reduce_actions[token]
@warnings << "Shift/Reduce conflict (state #{item_set.id}) between token #{token.name} and rule #{rule.name} (defined on line #{rule.line_number})"
end
end
end
reduce_entries = reduce_entries =
case ra = item_set.reduce_actions if rule = item_set.reduce_rule
when Rule [{token_id: @grammar.invalid_token_id, rule_id: rule.id, rule: rule,
[{token_id: @grammar.invalid_token_id, rule_id: ra.id, rule: ra, rule_set_id: rule.rule_set.id, n_states: rule.components.size,
rule_set_id: ra.rule_set.id, n_states: ra.components.size, propagate_optional_target: rule.optional? && rule.components.size == 1}]
propagate_optional_target: ra.optional? && ra.components.size == 1}] elsif reduce_actions = item_set.reduce_actions
when Hash reduce_actions.map do |token, rule|
ra.map do |token, rule|
{token_id: token.id, rule_id: rule.id, rule: rule, {token_id: token.id, rule_id: rule.id, rule: rule,
rule_set_id: rule.rule_set.id, n_states: rule.components.size, rule_set_id: rule.rule_set.id, n_states: rule.components.size,
propagate_optional_target: rule.optional? && rule.components.size == 1} propagate_optional_target: rule.optional? && rule.components.size == 1}
@ -87,11 +100,11 @@ class Propane
end end
def process_item_set(item_set) def process_item_set(item_set)
item_set.following_symbols.each do |following_symbol| item_set.next_symbols.each do |next_symbol|
unless following_symbol.name == "$EOF" unless next_symbol.name == "$EOF"
following_set = @item_sets_set[item_set.build_following_item_set(following_symbol)] next_item_set = @item_sets_set[item_set.build_next_item_set(next_symbol)]
item_set.following_item_set[following_symbol] = following_set item_set.next_item_set[next_symbol] = next_item_set
following_set.in_sets << item_set next_item_set.in_sets << item_set
end end
end end
end end
@ -101,7 +114,7 @@ class Propane
# @return [void] # @return [void]
def build_reduce_actions! def build_reduce_actions!
@item_sets.each do |item_set| @item_sets.each do |item_set|
item_set.reduce_actions = build_reduce_actions_for_item_set(item_set) build_reduce_actions_for_item_set(item_set)
end end
end end
@ -110,38 +123,55 @@ class Propane
# @param item_set [ItemSet] # @param item_set [ItemSet]
# ItemSet (parser state) # ItemSet (parser state)
# #
# @return [nil, Rule, Hash] # @return [void]
# If no reduce actions are possible for the given item set, nil.
# If only one reduce action is possible for the given item set, the Rule
# to reduce.
# Otherwise, a mapping of lookahead Tokens to the Rules to reduce.
def build_reduce_actions_for_item_set(item_set) def build_reduce_actions_for_item_set(item_set)
# To build the reduce actions, we start by looking at any # To build the reduce actions, we start by looking at any
# "complete" items, i.e., items where the parse position is at the # "complete" items, i.e., items where the parse position is at the
# end of a rule. These are the only rules that are candidates for # end of a rule. These are the only rules that are candidates for
# reduction in the current ItemSet. # reduction in the current ItemSet.
reduce_rules = Set.new(item_set.items.select(&:complete?).map(&:rule)) item_set.reduce_rules = Set.new(item_set.items.select(&:complete?).map(&:rule))
# If there are no rules to reduce for this ItemSet, we're done here. if item_set.reduce_rules.size == 1
return nil if reduce_rules.size == 0 item_set.reduce_rule = item_set.reduce_rules.first
end
# If there is exactly one rule to reduce for this ItemSet, then do not if item_set.reduce_rules.size > 1
# figure out the lookaheads; just reduce it. # Force item_set.reduce_actions to be built to store the lookahead
return reduce_rules.first if reduce_rules.size == 1 # tokens for the possible reduce rules if there is more than one.
get_lookahead_reduce_actions_for_item_set(item_set)
end
end
# Otherwise, we have more than one possible rule to reduce. # Get the reduce actions for a single item set (parser state).
#
# @param item_set [ItemSet]
# ItemSet (parser state)
#
# @return [Hash]
# Mapping of lookahead Tokens to the Rules to reduce.
def get_lookahead_reduce_actions_for_item_set(item_set)
item_set.reduce_actions ||= build_lookahead_reduce_actions_for_item_set(item_set)
end
# Build the reduce actions for a single item set (parser state).
#
# @param item_set [ItemSet]
# ItemSet (parser state)
#
# @return [Hash]
# Mapping of lookahead Tokens to the Rules to reduce.
def build_lookahead_reduce_actions_for_item_set(item_set)
# We will be looking for all possible tokens that can follow instances of # We will be looking for all possible tokens that can follow instances of
# these rules. Rather than looking through the entire grammar for the # these rules. Rather than looking through the entire grammar for the
# possible following tokens, we will only look in the item sets leading # possible following tokens, we will only look in the item sets leading
# up to this one. This restriction gives us a more precise lookahead set, # up to this one. This restriction gives us a more precise lookahead set,
# and allows us to parse LALR grammars. # and allows us to parse LALR grammars.
item_sets = Set[item_set] + item_set.leading_item_sets item_sets = Set[item_set] + item_set.leading_item_sets
reduce_rules.reduce({}) do |reduce_actions, reduce_rule| item_set.reduce_rules.reduce({}) do |reduce_actions, reduce_rule|
lookahead_tokens_for_rule = build_lookahead_tokens_to_reduce(reduce_rule, item_sets) lookahead_tokens_for_rule = build_lookahead_tokens_to_reduce(reduce_rule, item_sets)
lookahead_tokens_for_rule.each do |lookahead_token| lookahead_tokens_for_rule.each do |lookahead_token|
if existing_reduce_rule = reduce_actions[lookahead_token] if existing_reduce_rule = reduce_actions[lookahead_token]
raise Error.new("Error: reduce/reduce conflict between rule #{existing_reduce_rule.id} (#{existing_reduce_rule.name}) and rule #{reduce_rule.id} (#{reduce_rule.name})") raise Error.new("Error: reduce/reduce conflict (state #{item_set.id}) between rule #{existing_reduce_rule.name}##{existing_reduce_rule.id} (defined on line #{existing_reduce_rule.line_number}) and rule #{reduce_rule.name}##{reduce_rule.id} (defined on line #{reduce_rule.line_number})")
end end
reduce_actions[lookahead_token] = reduce_rule reduce_actions[lookahead_token] = reduce_rule
end end
@ -183,9 +213,9 @@ class Propane
# tokens to form the lookahead token set. # tokens to form the lookahead token set.
item_sets.each do |item_set| item_sets.each do |item_set|
item_set.items.each do |item| item_set.items.each do |item|
if item.following_symbol == rule_set if item.next_symbol == rule_set
(1..).each do |offset| (1..).each do |offset|
case symbol = item.following_symbol(offset) case symbol = item.next_symbol(offset)
when nil when nil
rule_set = item.rule.rule_set rule_set = item.rule.rule_set
unless checked_rule_sets.include?(rule_set) unless checked_rule_sets.include?(rule_set)
@ -242,20 +272,26 @@ class Propane
@log.puts @log.puts
@log.puts " Incoming states: #{incoming_ids.join(", ")}" @log.puts " Incoming states: #{incoming_ids.join(", ")}"
@log.puts " Outgoing states:" @log.puts " Outgoing states:"
item_set.following_item_set.each do |following_symbol, following_item_set| item_set.next_item_set.each do |next_symbol, next_item_set|
@log.puts " #{following_symbol.name} => #{following_item_set.id}" @log.puts " #{next_symbol.name} => #{next_item_set.id}"
end end
@log.puts @log.puts
@log.puts " Reduce actions:" @log.puts " Reduce actions:"
case item_set.reduce_actions if item_set.reduce_rule
when Rule @log.puts " * => rule #{item_set.reduce_rule.id}, rule set #{@rule_sets[item_set.reduce_rule.name].id} (#{item_set.reduce_rule.name})"
@log.puts " * => rule #{item_set.reduce_actions.id}, rule set #{@rule_sets[item_set.reduce_actions.name].id} (#{item_set.reduce_actions.name})" elsif item_set.reduce_actions
when Hash
item_set.reduce_actions.each do |token, rule| item_set.reduce_actions.each do |token, rule|
@log.puts " lookahead #{token.name} => #{rule.name} (#{rule.id}), rule set ##{rule.rule_set.id}" @log.puts " lookahead #{token.name} => #{rule.name} (#{rule.id}), rule set ##{rule.rule_set.id}"
end end
end end
end end
if @warnings.size > 0
@log.puts
@log.puts "Warnings:"
@warnings.each do |warning|
@log.puts " #{warning}"
end
end
end end
end end

View File

@ -56,7 +56,7 @@ class Propane
# Return the set of Items obtained by "closing" the current item. # Return the set of Items obtained by "closing" the current item.
# #
# If the following symbol for the current item is another Rule name, then # If the next symbol for the current item is another Rule name, then
# this method will return all Items for that Rule with a position of 0. # this method will return all Items for that Rule with a position of 0.
# Otherwise, an empty Array is returned. # Otherwise, an empty Array is returned.
# #
@ -81,17 +81,17 @@ class Propane
@position == @rule.components.size @position == @rule.components.size
end end
# Get the following symbol for the Item. # Get the next symbol for the Item.
# #
# That is, the symbol which follows the parse position marker in the # That is, the symbol which is after the parse position marker in the
# current Item. # current Item.
# #
# @param offset [Integer] # @param offset [Integer]
# Offset from current parse position to examine. # Offset from current parse position to examine.
# #
# @return [Token, RuleSet, nil] # @return [Token, RuleSet, nil]
# Following symbol for the Item. # Next symbol for the Item.
def following_symbol(offset = 0) def next_symbol(offset = 0)
@rule.components[@position + offset] @rule.components[@position + offset]
end end
@ -108,25 +108,25 @@ class Propane
end end
end end
# Get whether this Item is followed by the provided symbol. # Get whether this Item's next symbol is the given symbol.
# #
# @param symbol [Token, RuleSet] # @param symbol [Token, RuleSet]
# Symbol to query. # Symbol to query.
# #
# @return [Boolean] # @return [Boolean]
# Whether this Item is followed by the provided symbol. # Whether this Item's next symbol is the given symbol.
def followed_by?(symbol) def next_symbol?(symbol)
following_symbol == symbol next_symbol == symbol
end end
# Get the following item for this Item. # Get the next item for this Item.
# #
# That is, the Item formed by moving the parse position marker one place # That is, the Item formed by moving the parse position marker one place
# forward from its position in this Item. # forward from its position in this Item.
# #
# @return [Item] # @return [Item]
# The following item for this Item. # The next item for this Item.
def following_item def next_item
Item.new(@rule, @position + 1) Item.new(@rule, @position + 1)
end end

View File

@ -2,7 +2,7 @@ class Propane
class Parser class Parser
# Represent a parser "item set", which is a set of possible items that the # Represent a parser "item set", which is a set of possible items that the
# parser could currently be parsing. # parser could currently be parsing. This is equivalent to a parser state.
class ItemSet class ItemSet
# @return [Set<Item>] # @return [Set<Item>]
@ -14,15 +14,24 @@ class Propane
attr_accessor :id attr_accessor :id
# @return [Hash] # @return [Hash]
# Maps a following symbol to its ItemSet. # Maps a next symbol to its ItemSet.
attr_reader :following_item_set attr_reader :next_item_set
# @return [Set<ItemSet>] # @return [Set<ItemSet>]
# ItemSets leading to this item set. # ItemSets leading to this item set.
attr_reader :in_sets attr_reader :in_sets
# @return [nil, Rule, Hash] # @return [nil, Rule]
# Reduce actions, mapping lookahead tokens to rules. # Rule to reduce if there is only one possibility.
attr_accessor :reduce_rule
# @return [Set<Rule>]
# Set of rules that could be reduced in this parser state.
attr_accessor :reduce_rules
# @return [nil, Hash]
# Reduce actions, mapping lookahead tokens to rules, if there is
# more than one rule that could be reduced.
attr_accessor :reduce_actions attr_accessor :reduce_actions
# Build an ItemSet. # Build an ItemSet.
@ -31,28 +40,28 @@ class Propane
# Items in this ItemSet. # Items in this ItemSet.
def initialize(items) def initialize(items)
@items = Set.new(items) @items = Set.new(items)
@following_item_set = {} @next_item_set = {}
@in_sets = Set.new @in_sets = Set.new
close! close!
end end
# Get the set of following symbols for all Items in this ItemSet. # Get the set of next symbols for all Items in this ItemSet.
# #
# @return [Set<Token, RuleSet>] # @return [Set<Token, RuleSet>]
# Set of following symbols for all Items in this ItemSet. # Set of next symbols for all Items in this ItemSet.
def following_symbols def next_symbols
Set.new(@items.map(&:following_symbol).compact) @_next_symbols ||= Set.new(@items.map(&:next_symbol).compact)
end end
# Build a following ItemSet for the given following symbol. # Build a next ItemSet for the given next symbol.
# #
# @param symbol [Token, RuleSet] # @param symbol [Token, RuleSet]
# Following symbol to build the following ItemSet for. # Next symbol to build the next ItemSet for.
# #
# @return [ItemSet] # @return [ItemSet]
# Following ItemSet for the given following symbol. # Next ItemSet for the given next symbol.
def build_following_item_set(symbol) def build_next_item_set(symbol)
ItemSet.new(items_followed_by(symbol).map(&:following_item)) ItemSet.new(items_with_next(symbol).map(&:next_item))
end end
# Hash function. # Hash function.
@ -90,21 +99,24 @@ class Propane
# @return [Set<ItemSet>] # @return [Set<ItemSet>]
# Set of all ItemSets that lead up to this ItemSet. # Set of all ItemSets that lead up to this ItemSet.
def leading_item_sets def leading_item_sets
result = Set.new @_leading_item_sets ||=
eval_sets = Set[self] begin
evaled = Set.new result = Set.new
while eval_sets.size > 0 eval_sets = Set[self]
eval_set = eval_sets.first evaled = Set.new
eval_sets.delete(eval_set) while eval_sets.size > 0
evaled << eval_set eval_set = eval_sets.first
eval_set.in_sets.each do |in_set| eval_sets.delete(eval_set)
result << in_set evaled << eval_set
unless evaled.include?(in_set) eval_set.in_sets.each do |in_set|
eval_sets << in_set result << in_set
unless evaled.include?(in_set)
eval_sets << in_set
end
end
end end
result
end end
end
result
end end
# Represent the ItemSet as a String. # Represent the ItemSet as a String.
@ -137,16 +149,16 @@ class Propane
end end
end end
# Get the Items followed by the given following symbol. # Get the Items with the given next symbol.
# #
# @param symbol [Token, RuleSet] # @param symbol [Token, RuleSet]
# Following symbol. # Next symbol.
# #
# @return [Array<Item>] # @return [Array<Item>]
# Items followed by the given following symbol. # Items with the given next symbol.
def items_followed_by(symbol) def items_with_next(symbol)
@items.select do |item| @items.select do |item|
item.followed_by?(symbol) item.next_symbol?(symbol)
end end
end end

View File

@ -6,6 +6,10 @@ class Propane
# Rule components. # Rule components.
attr_reader :components attr_reader :components
# @return [Hash]
# Field aliases.
attr_reader :aliases
# @return [String] # @return [String]
# User code associated with the rule. # User code associated with the rule.
attr_reader :code attr_reader :code
@ -49,7 +53,19 @@ class Propane
# Line number where the rule was defined in the input grammar. # Line number where the rule was defined in the input grammar.
def initialize(name, components, code, ptypename, line_number) def initialize(name, components, code, ptypename, line_number)
@name = name @name = name
@components = components @aliases = {}
@components = components.each_with_index.map do |component, i|
if component =~ /(\S+):(\S+)/
c, aliasname = $1, $2
if @aliases[aliasname]
raise Error.new("Error: duplicate field alias `#{aliasname}` for rule #{name} defined on line #{line_number}")
end
@aliases[aliasname] = i
c
else
component
end
end
@rule_set_node_field_index_map = components.map {0} @rule_set_node_field_index_map = components.map {0}
@code = code @code = code
@ptypename = ptypename @ptypename = ptypename

View File

@ -100,7 +100,9 @@ class Propane
# Finalize a RuleSet after adding all Rules to it. # Finalize a RuleSet after adding all Rules to it.
def finalize(grammar) def finalize(grammar)
build_ast_fields(grammar) if grammar.ast
build_ast_fields(grammar)
end
end end
private private
@ -148,6 +150,18 @@ class Propane
"#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}" "#{grammar.ast_prefix}#{node_name}#{grammar.ast_suffix}"
end end
end end
# Now merge in the field aliases as given by the user in the
# grammar.
field_aliases = {}
@rules.each do |rule|
rule.aliases.each do |alias_name, index|
if field_aliases[alias_name] && field_aliases[alias_name] != index
raise Error.new("Error: conflicting AST node field positions for alias `#{alias_name}`")
end
field_aliases[alias_name] = index
@ast_fields[index][alias_name] = @ast_fields[index].first[1]
end
end
end end
end end

View File

@ -1,3 +1,3 @@
class Propane class Propane
VERSION = "1.4.0" VERSION = "1.5.1"
end end

View File

@ -54,6 +54,7 @@ EOF
else else
command += %W[spec/run/testparser#{options[:name]}.propane spec/run/testparser#{options[:name]}.#{options[:language]} --log spec/run/testparser#{options[:name]}.log] command += %W[spec/run/testparser#{options[:name]}.propane spec/run/testparser#{options[:name]}.#{options[:language]} --log spec/run/testparser#{options[:name]}.log]
end end
command += (options[:extra_args] || [])
if (options[:capture]) if (options[:capture])
stdout, stderr, status = Open3.capture3(*command) stdout, stderr, status = Open3.capture3(*command)
Results.new(stdout, stderr, status) Results.new(stdout, stderr, status)
@ -184,6 +185,70 @@ EOF
expect(results.status).to_not eq 0 expect(results.status).to_not eq 0
end end
it "warns on shift/reduce conflicts" do
write_grammar <<EOF
token a;
token b;
Start -> As? b?;
As -> a As2?;
As2 -> b a As2?;
EOF
results = run_propane(capture: true)
expect(results.stderr).to eq ""
expect(results.status).to eq 0
expect(File.binread("spec/run/testparser.log")).to match %r{Shift/Reduce conflict \(state \d+\) between token b and rule As2\? \(defined on line 4\)}
end
it "errors on shift/reduce conflicts with -w" do
write_grammar <<EOF
token a;
token b;
Start -> As? b?;
As -> a As2?;
As2 -> b a As2?;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to match %r{Shift/Reduce conflict \(state \d+\) between token b and rule As2\? \(defined on line 4\)}m
expect(results.status).to_not eq 0
expect(File.binread("spec/run/testparser.log")).to match %r{Shift/Reduce conflict \(state \d+\) between token b and rule As2\? \(defined on line 4\)}
end
it "errors on duplicate field aliases in a rule" do
write_grammar <<EOF
token a;
token b;
Start -> a:foo b:foo;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to match %r{Error: duplicate field alias `foo` for rule Start defined on line 3}
expect(results.status).to_not eq 0
end
it "errors when an alias is in different positions for different rules in a rule set when AST mode is enabled" do
write_grammar <<EOF
ast;
token a;
token b;
Start -> a:foo b;
Start -> b b:foo;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to match %r{Error: conflicting AST node field positions for alias `foo`}
expect(results.status).to_not eq 0
end
it "does not error when an alias is in different positions for different rules in a rule set when AST mode is not enabled" do
write_grammar <<EOF
token a;
token b;
Start -> a:foo b;
Start -> b b:foo;
EOF
results = run_propane(extra_args: %w[-w], capture: true)
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
%w[d c].each do |language| %w[d c].each do |language|
context "#{language.upcase} language" do context "#{language.upcase} language" do
@ -623,7 +688,7 @@ F -> e;
EOF EOF
results = run_propane(capture: true, language: language) results = run_propane(capture: true, language: language)
expect(results.status).to_not eq 0 expect(results.status).to_not eq 0
expect(results.stderr).to match %r{reduce/reduce conflict.*\(E\).*\(F\)} expect(results.stderr).to match %r{Error: reduce/reduce conflict \(state \d+\) between rule E#\d+ \(defined on line 10\) and rule F#\d+ \(defined on line 11\)}
end end
it "provides matched text to user code blocks" do it "provides matched text to user code blocks" do
@ -1051,6 +1116,110 @@ EOF
expect(results.stderr).to eq "" expect(results.stderr).to eq ""
expect(results.status).to eq 0 expect(results.status).to eq 0
end end
it "stores token and rule positions in AST nodes" do
write_grammar <<EOF
ast;
token a;
token bb;
token c /c(.|\\n)*c/;
drop /\\s+/;
Start -> T T T;
T -> a;
T -> bb;
T -> c;
EOF
run_propane(language: language)
compile("spec/test_ast_token_positions.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "stores invalid positions for empty rule matches" do
write_grammar <<EOF
ast;
token a;
token bb;
token c /c(.|\\n)*c/;
drop /\\s+/;
Start -> T Start;
Start -> ;
T -> a A;
A -> bb? c?;
EOF
run_propane(language: language)
compile("spec/test_ast_invalid_positions.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "allows specifying field aliases in AST mode" do
write_grammar <<EOF
ast;
token a;
token b;
token c;
drop /\\s+/;
Start -> T:first T:second T:third;
T -> a;
T -> b;
T -> c;
EOF
run_propane(language: language)
compile("spec/test_ast_field_aliases.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "allows specifying field aliases when AST mode is not enabled" do
if language == "d"
write_grammar <<EOF
<<
import std.stdio;
>>
ptype string;
token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
$$ = match;
>>
drop /\\s+/;
Start -> id:first id:second <<
writeln("first is ", ${first});
writeln("second is ", ${second});
>>
EOF
else
write_grammar <<EOF
<<
#include <stdio.h>
#include <string.h>
>>
ptype char const *;
token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
char * s = malloc(match_length + 1);
strncpy(s, (char const *)match, match_length);
s[match_length] = 0;
$$ = s;
>>
drop /\\s+/;
Start -> id:first id:second <<
printf("first is %s\\n", ${first});
printf("second is %s\\n", ${second});
>>
EOF
end
run_propane(language: language)
compile("spec/test_field_aliases.#{language}", language: language)
results = run_test
expect(results.stderr).to eq ""
expect(results.status).to eq 0
expect(results.stdout).to match /first is foo1.*second is bar2/m
end
end end
end end
end end

View File

@ -0,0 +1,19 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "\na\nb\nc";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(TOKEN_a, start->first->pToken->token);
assert_eq(TOKEN_b, start->second->pToken->token);
assert_eq(TOKEN_c, start->third->pToken->token);
return 0;
}

View File

@ -0,0 +1,21 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "\na\nb\nc";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(TOKEN_a, start.first.pToken.token);
assert_eq(TOKEN_b, start.second.pToken.token);
assert_eq(TOKEN_c, start.third.pToken.token);
}

View File

@ -0,0 +1,102 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "\na\n bb ccc";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(1, start->pT1->pToken->position.row);
assert_eq(0, start->pT1->pToken->position.col);
assert_eq(1, start->pT1->pToken->end_position.row);
assert_eq(0, start->pT1->pToken->end_position.col);
assert(p_position_valid(start->pT1->pA->position));
assert_eq(2, start->pT1->pA->position.row);
assert_eq(2, start->pT1->pA->position.col);
assert_eq(2, start->pT1->pA->end_position.row);
assert_eq(7, start->pT1->pA->end_position.col);
assert_eq(1, start->pT1->position.row);
assert_eq(0, start->pT1->position.col);
assert_eq(2, start->pT1->end_position.row);
assert_eq(7, start->pT1->end_position.col);
assert_eq(1, start->position.row);
assert_eq(0, start->position.col);
assert_eq(2, start->end_position.row);
assert_eq(7, start->end_position.col);
input = "a\nbb";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(0, start->pT1->pToken->position.row);
assert_eq(0, start->pT1->pToken->position.col);
assert_eq(0, start->pT1->pToken->end_position.row);
assert_eq(0, start->pT1->pToken->end_position.col);
assert(p_position_valid(start->pT1->pA->position));
assert_eq(1, start->pT1->pA->position.row);
assert_eq(0, start->pT1->pA->position.col);
assert_eq(1, start->pT1->pA->end_position.row);
assert_eq(1, start->pT1->pA->end_position.col);
assert_eq(0, start->pT1->position.row);
assert_eq(0, start->pT1->position.col);
assert_eq(1, start->pT1->end_position.row);
assert_eq(1, start->pT1->end_position.col);
assert_eq(0, start->position.row);
assert_eq(0, start->position.col);
assert_eq(1, start->end_position.row);
assert_eq(1, start->end_position.col);
input = "a\nc\nc";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(0, start->pT1->pToken->position.row);
assert_eq(0, start->pT1->pToken->position.col);
assert_eq(0, start->pT1->pToken->end_position.row);
assert_eq(0, start->pT1->pToken->end_position.col);
assert(p_position_valid(start->pT1->pA->position));
assert_eq(1, start->pT1->pA->position.row);
assert_eq(0, start->pT1->pA->position.col);
assert_eq(2, start->pT1->pA->end_position.row);
assert_eq(0, start->pT1->pA->end_position.col);
assert_eq(0, start->pT1->position.row);
assert_eq(0, start->pT1->position.col);
assert_eq(2, start->pT1->end_position.row);
assert_eq(0, start->pT1->end_position.col);
assert_eq(0, start->position.row);
assert_eq(0, start->position.col);
assert_eq(2, start->end_position.row);
assert_eq(0, start->end_position.col);
input = "a";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(0, start->pT1->pToken->position.row);
assert_eq(0, start->pT1->pToken->position.col);
assert_eq(0, start->pT1->pToken->end_position.row);
assert_eq(0, start->pT1->pToken->end_position.col);
assert(!p_position_valid(start->pT1->pA->position));
assert_eq(0, start->pT1->position.row);
assert_eq(0, start->pT1->position.col);
assert_eq(0, start->pT1->end_position.row);
assert_eq(0, start->pT1->end_position.col);
assert_eq(0, start->position.row);
assert_eq(0, start->position.col);
assert_eq(0, start->end_position.row);
assert_eq(0, start->end_position.col);
return 0;
}

View File

@ -0,0 +1,104 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "\na\n bb ccc";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(1, start.pT1.pToken.position.row);
assert_eq(0, start.pT1.pToken.position.col);
assert_eq(1, start.pT1.pToken.end_position.row);
assert_eq(0, start.pT1.pToken.end_position.col);
assert(start.pT1.pA.position.valid);
assert_eq(2, start.pT1.pA.position.row);
assert_eq(2, start.pT1.pA.position.col);
assert_eq(2, start.pT1.pA.end_position.row);
assert_eq(7, start.pT1.pA.end_position.col);
assert_eq(1, start.pT1.position.row);
assert_eq(0, start.pT1.position.col);
assert_eq(2, start.pT1.end_position.row);
assert_eq(7, start.pT1.end_position.col);
assert_eq(1, start.position.row);
assert_eq(0, start.position.col);
assert_eq(2, start.end_position.row);
assert_eq(7, start.end_position.col);
input = "a\nbb";
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(0, start.pT1.pToken.position.row);
assert_eq(0, start.pT1.pToken.position.col);
assert_eq(0, start.pT1.pToken.end_position.row);
assert_eq(0, start.pT1.pToken.end_position.col);
assert(start.pT1.pA.position.valid);
assert_eq(1, start.pT1.pA.position.row);
assert_eq(0, start.pT1.pA.position.col);
assert_eq(1, start.pT1.pA.end_position.row);
assert_eq(1, start.pT1.pA.end_position.col);
assert_eq(0, start.pT1.position.row);
assert_eq(0, start.pT1.position.col);
assert_eq(1, start.pT1.end_position.row);
assert_eq(1, start.pT1.end_position.col);
assert_eq(0, start.position.row);
assert_eq(0, start.position.col);
assert_eq(1, start.end_position.row);
assert_eq(1, start.end_position.col);
input = "a\nc\nc";
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(0, start.pT1.pToken.position.row);
assert_eq(0, start.pT1.pToken.position.col);
assert_eq(0, start.pT1.pToken.end_position.row);
assert_eq(0, start.pT1.pToken.end_position.col);
assert(start.pT1.pA.position.valid);
assert_eq(1, start.pT1.pA.position.row);
assert_eq(0, start.pT1.pA.position.col);
assert_eq(2, start.pT1.pA.end_position.row);
assert_eq(0, start.pT1.pA.end_position.col);
assert_eq(0, start.pT1.position.row);
assert_eq(0, start.pT1.position.col);
assert_eq(2, start.pT1.end_position.row);
assert_eq(0, start.pT1.end_position.col);
assert_eq(0, start.position.row);
assert_eq(0, start.position.col);
assert_eq(2, start.end_position.row);
assert_eq(0, start.end_position.col);
input = "a";
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(0, start.pT1.pToken.position.row);
assert_eq(0, start.pT1.pToken.position.col);
assert_eq(0, start.pT1.pToken.end_position.row);
assert_eq(0, start.pT1.pToken.end_position.col);
assert(!start.pT1.pA.position.valid);
assert_eq(0, start.pT1.position.row);
assert_eq(0, start.pT1.position.col);
assert_eq(0, start.pT1.end_position.row);
assert_eq(0, start.pT1.end_position.col);
assert_eq(0, start.position.row);
assert_eq(0, start.position.col);
assert_eq(0, start.end_position.row);
assert_eq(0, start.end_position.col);
}

View File

@ -0,0 +1,84 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "abbccc";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(0, start->pT1->pToken->position.row);
assert_eq(0, start->pT1->pToken->position.col);
assert_eq(0, start->pT1->pToken->end_position.row);
assert_eq(0, start->pT1->pToken->end_position.col);
assert_eq(0, start->pT1->position.row);
assert_eq(0, start->pT1->position.col);
assert_eq(0, start->pT1->end_position.row);
assert_eq(0, start->pT1->end_position.col);
assert_eq(0, start->pT2->pToken->position.row);
assert_eq(1, start->pT2->pToken->position.col);
assert_eq(0, start->pT2->pToken->end_position.row);
assert_eq(2, start->pT2->pToken->end_position.col);
assert_eq(0, start->pT2->position.row);
assert_eq(1, start->pT2->position.col);
assert_eq(0, start->pT2->end_position.row);
assert_eq(2, start->pT2->end_position.col);
assert_eq(0, start->pT3->pToken->position.row);
assert_eq(3, start->pT3->pToken->position.col);
assert_eq(0, start->pT3->pToken->end_position.row);
assert_eq(5, start->pT3->pToken->end_position.col);
assert_eq(0, start->pT3->position.row);
assert_eq(3, start->pT3->position.col);
assert_eq(0, start->pT3->end_position.row);
assert_eq(5, start->pT3->end_position.col);
assert_eq(0, start->position.row);
assert_eq(0, start->position.col);
assert_eq(0, start->end_position.row);
assert_eq(5, start->end_position.col);
input = "\n\n bb\nc\ncc\n\n a";
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(2, start->pT1->pToken->position.row);
assert_eq(2, start->pT1->pToken->position.col);
assert_eq(2, start->pT1->pToken->end_position.row);
assert_eq(3, start->pT1->pToken->end_position.col);
assert_eq(2, start->pT1->position.row);
assert_eq(2, start->pT1->position.col);
assert_eq(2, start->pT1->end_position.row);
assert_eq(3, start->pT1->end_position.col);
assert_eq(3, start->pT2->pToken->position.row);
assert_eq(0, start->pT2->pToken->position.col);
assert_eq(4, start->pT2->pToken->end_position.row);
assert_eq(1, start->pT2->pToken->end_position.col);
assert_eq(3, start->pT2->position.row);
assert_eq(0, start->pT2->position.col);
assert_eq(4, start->pT2->end_position.row);
assert_eq(1, start->pT2->end_position.col);
assert_eq(6, start->pT3->pToken->position.row);
assert_eq(5, start->pT3->pToken->position.col);
assert_eq(6, start->pT3->pToken->end_position.row);
assert_eq(5, start->pT3->pToken->end_position.col);
assert_eq(6, start->pT3->position.row);
assert_eq(5, start->pT3->position.col);
assert_eq(6, start->pT3->end_position.row);
assert_eq(5, start->pT3->end_position.col);
assert_eq(2, start->position.row);
assert_eq(2, start->position.col);
assert_eq(6, start->end_position.row);
assert_eq(5, start->end_position.col);
return 0;
}

View File

@ -0,0 +1,86 @@
import testparser;
import std.stdio;
import testutils;
int main()
{
return 0;
}
unittest
{
string input = "abbccc";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
Start * start = p_result(&context);
assert_eq(0, start.pT1.pToken.position.row);
assert_eq(0, start.pT1.pToken.position.col);
assert_eq(0, start.pT1.pToken.end_position.row);
assert_eq(0, start.pT1.pToken.end_position.col);
assert_eq(0, start.pT1.position.row);
assert_eq(0, start.pT1.position.col);
assert_eq(0, start.pT1.end_position.row);
assert_eq(0, start.pT1.end_position.col);
assert_eq(0, start.pT2.pToken.position.row);
assert_eq(1, start.pT2.pToken.position.col);
assert_eq(0, start.pT2.pToken.end_position.row);
assert_eq(2, start.pT2.pToken.end_position.col);
assert_eq(0, start.pT2.position.row);
assert_eq(1, start.pT2.position.col);
assert_eq(0, start.pT2.end_position.row);
assert_eq(2, start.pT2.end_position.col);
assert_eq(0, start.pT3.pToken.position.row);
assert_eq(3, start.pT3.pToken.position.col);
assert_eq(0, start.pT3.pToken.end_position.row);
assert_eq(5, start.pT3.pToken.end_position.col);
assert_eq(0, start.pT3.position.row);
assert_eq(3, start.pT3.position.col);
assert_eq(0, start.pT3.end_position.row);
assert_eq(5, start.pT3.end_position.col);
assert_eq(0, start.position.row);
assert_eq(0, start.position.col);
assert_eq(0, start.end_position.row);
assert_eq(5, start.end_position.col);
input = "\n\n bb\nc\ncc\n\n a";
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
start = p_result(&context);
assert_eq(2, start.pT1.pToken.position.row);
assert_eq(2, start.pT1.pToken.position.col);
assert_eq(2, start.pT1.pToken.end_position.row);
assert_eq(3, start.pT1.pToken.end_position.col);
assert_eq(2, start.pT1.position.row);
assert_eq(2, start.pT1.position.col);
assert_eq(2, start.pT1.end_position.row);
assert_eq(3, start.pT1.end_position.col);
assert_eq(3, start.pT2.pToken.position.row);
assert_eq(0, start.pT2.pToken.position.col);
assert_eq(4, start.pT2.pToken.end_position.row);
assert_eq(1, start.pT2.pToken.end_position.col);
assert_eq(3, start.pT2.position.row);
assert_eq(0, start.pT2.position.col);
assert_eq(4, start.pT2.end_position.row);
assert_eq(1, start.pT2.end_position.col);
assert_eq(6, start.pT3.pToken.position.row);
assert_eq(5, start.pT3.pToken.position.col);
assert_eq(6, start.pT3.pToken.end_position.row);
assert_eq(5, start.pT3.pToken.end_position.col);
assert_eq(6, start.pT3.position.row);
assert_eq(5, start.pT3.position.col);
assert_eq(6, start.pT3.end_position.row);
assert_eq(5, start.pT3.end_position.col);
assert_eq(2, start.position.row);
assert_eq(2, start.position.col);
assert_eq(6, start.end_position.row);
assert_eq(5, start.end_position.col);
}

13
spec/test_field_aliases.c Normal file
View File

@ -0,0 +1,13 @@
#include "testparser.h"
#include <assert.h>
#include <string.h>
#include "testutils.h"
int main()
{
char const * input = "foo1\nbar2";
p_context_t context;
p_context_init(&context, (uint8_t const *)input, strlen(input));
assert(p_parse(&context) == P_SUCCESS);
return 0;
}

15
spec/test_field_aliases.d Normal file
View File

@ -0,0 +1,15 @@
import testparser;
import std.stdio;
int main()
{
return 0;
}
unittest
{
string input = "foo1\nbar2";
p_context_t context;
p_context_init(&context, input);
assert(p_parse(&context) == P_SUCCESS);
}

View File

@ -43,41 +43,57 @@ int main()
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u); assert(token_info.position.row == 0u);
assert(token_info.position.col == 0u); assert(token_info.position.col == 0u);
assert(token_info.end_position.row == 0u);
assert(token_info.end_position.col == 0u);
assert(token_info.length == 1u); assert(token_info.length == 1u);
assert(token_info.token == TOKEN_int); assert(token_info.token == TOKEN_int);
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u); assert(token_info.position.row == 0u);
assert(token_info.position.col == 2u); assert(token_info.position.col == 2u);
assert(token_info.end_position.row == 0u);
assert(token_info.end_position.col == 2u);
assert(token_info.length == 1u); assert(token_info.length == 1u);
assert(token_info.token == TOKEN_plus); assert(token_info.token == TOKEN_plus);
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u); assert(token_info.position.row == 0u);
assert(token_info.position.col == 4u); assert(token_info.position.col == 4u);
assert(token_info.end_position.row == 0u);
assert(token_info.end_position.col == 4u);
assert(token_info.length == 1u); assert(token_info.length == 1u);
assert(token_info.token == TOKEN_int); assert(token_info.token == TOKEN_int);
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u); assert(token_info.position.row == 0u);
assert(token_info.position.col == 6u); assert(token_info.position.col == 6u);
assert(token_info.end_position.row == 0u);
assert(token_info.end_position.col == 6u);
assert(token_info.length == 1u); assert(token_info.length == 1u);
assert(token_info.token == TOKEN_times); assert(token_info.token == TOKEN_times);
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 1u); assert(token_info.position.row == 1u);
assert(token_info.position.col == 0u); assert(token_info.position.col == 0u);
assert(token_info.end_position.row == 1u);
assert(token_info.end_position.col == 2u);
assert(token_info.length == 3u); assert(token_info.length == 3u);
assert(token_info.token == TOKEN_int); assert(token_info.token == TOKEN_int);
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 1u); assert(token_info.position.row == 1u);
assert(token_info.position.col == 4u); assert(token_info.position.col == 4u);
assert(token_info.end_position.row == 1u);
assert(token_info.end_position.col == 4u);
assert(token_info.length == 1u); assert(token_info.length == 1u);
assert(token_info.token == TOKEN_plus); assert(token_info.token == TOKEN_plus);
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 1u); assert(token_info.position.row == 1u);
assert(token_info.position.col == 6u); assert(token_info.position.col == 6u);
assert(token_info.end_position.row == 1u);
assert(token_info.end_position.col == 8u);
assert(token_info.length == 3u); assert(token_info.length == 3u);
assert(token_info.token == TOKEN_int); assert(token_info.token == TOKEN_int);
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 1u); assert(token_info.position.row == 1u);
assert(token_info.position.col == 9u); assert(token_info.position.col == 9u);
assert(token_info.end_position.row == 1u);
assert(token_info.end_position.col == 9u);
assert(token_info.length == 0u); assert(token_info.length == 0u);
assert(token_info.token == TOKEN___EOF); assert(token_info.token == TOKEN___EOF);
@ -85,6 +101,8 @@ int main()
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info.position.row == 0u); assert(token_info.position.row == 0u);
assert(token_info.position.col == 0u); assert(token_info.position.col == 0u);
assert(token_info.end_position.row == 0u);
assert(token_info.end_position.col == 0u);
assert(token_info.length == 0u); assert(token_info.length == 0u);
assert(token_info.token == TOKEN___EOF); assert(token_info.token == TOKEN___EOF);

View File

@ -47,23 +47,23 @@ unittest
p_context_t context; p_context_t context;
p_context_init(&context, input); p_context_init(&context, input);
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info == p_token_info_t(p_position_t(0, 0), 1, TOKEN_int)); assert(token_info == p_token_info_t(p_position_t(0, 0), p_position_t(0, 0), 1, TOKEN_int));
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info == p_token_info_t(p_position_t(0, 2), 1, TOKEN_plus)); assert(token_info == p_token_info_t(p_position_t(0, 2), p_position_t(0, 2), 1, TOKEN_plus));
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info == p_token_info_t(p_position_t(0, 4), 1, TOKEN_int)); assert(token_info == p_token_info_t(p_position_t(0, 4), p_position_t(0, 4), 1, TOKEN_int));
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info == p_token_info_t(p_position_t(0, 6), 1, TOKEN_times)); assert(token_info == p_token_info_t(p_position_t(0, 6), p_position_t(0, 6), 1, TOKEN_times));
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info == p_token_info_t(p_position_t(1, 0), 3, TOKEN_int)); assert(token_info == p_token_info_t(p_position_t(1, 0), p_position_t(1, 2), 3, TOKEN_int));
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info == p_token_info_t(p_position_t(1, 4), 1, TOKEN_plus)); assert(token_info == p_token_info_t(p_position_t(1, 4), p_position_t(1, 4), 1, TOKEN_plus));
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info == p_token_info_t(p_position_t(1, 6), 3, TOKEN_int)); assert(token_info == p_token_info_t(p_position_t(1, 6), p_position_t(1, 8), 3, TOKEN_int));
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info == p_token_info_t(p_position_t(1, 9), 0, TOKEN___EOF)); assert(token_info == p_token_info_t(p_position_t(1, 9), p_position_t(1, 9), 0, TOKEN___EOF));
p_context_init(&context, ""); p_context_init(&context, "");
assert(p_lex(&context, &token_info) == P_SUCCESS); assert(p_lex(&context, &token_info) == P_SUCCESS);
assert(token_info == p_token_info_t(p_position_t(0, 0), 0, TOKEN___EOF)); assert(token_info == p_token_info_t(p_position_t(0, 0), p_position_t(0, 0), 0, TOKEN___EOF));
} }