From 9746b3f2bfa60e188060fcddbbecd720979e7419 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sun, 21 Jul 2024 14:04:51 -0400 Subject: [PATCH] Document position tracking fields in user guide - #27 --- doc/user_guide.md | 58 ++++++++++++++++++++++++++++++++++++++++------ lib/propane/cli.rb | 2 +- 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/doc/user_guide.md b/doc/user_guide.md index a20887d..6ac3fd3 100644 --- a/doc/user_guide.md +++ b/doc/user_guide.md @@ -15,6 +15,7 @@ Propane is a LALR Parser Generator (LPG) which: * generates a table-driven shift/reduce parser to parse input in linear time * targets C or D language outputs * optionally supports automatic full AST generation + * tracks input text start and end positions for all matched tokens/rules * is MIT-licensed * is distributable as a standalone Ruby script @@ -35,9 +36,14 @@ Propane is typically invoked from the command-line as `./propane`. Usage: ./propane [options] Options: - --log LOG Write log file - --version Show program version and exit - -h, --help Show this usage and exit + -h, --help Show this usage and exit. + --log LOG Write log file. This will show all parser states and their + associated shifts and reduces. It can be helpful when + debugging a grammar. + --version Show program version and exit. + -w Treat warnings as errors. This option will treat shift/reduce + conflicts as fatal errors and will print them to stderr in + addition to the log file. The user must specify the path to a Propane input grammar file and a path to an output file. @@ -502,7 +508,7 @@ tokenid str; mystringvalue = ""; $mode(string); >> -string: /[^"]+/ << mystringvalue += match; >> +string: /[^"]+/ << mystringvalue ~= match; >> string: /"/ << $mode(default); return $token(str); @@ -762,6 +768,13 @@ A pointer to this instance is passed to the generated functions. The `p_position_t` structure contains two fields `row` and `col`. These fields contain the 0-based row and column describing a parser position. +For D targets, the `p_position_t` structure can be checked for validity by +querying the `valid` property. + +For C targets, the `p_position_t` structure can be checked for validity by +calling `p_position_valid(pos)` where `pos` is a `p_position_t` structure +instance. + ### AST Node Types If AST generation mode is enabled, a structure type for each rule will be @@ -772,13 +785,26 @@ AST node which refers to a raw parser token rather than a composite rule. #### AST Node Fields -A `Token` node has two fields: +All AST nodes have a `position` field specifying the text position of the +beginning of the matched token or rule, and an `end_position` field specifying +the text position of the end of the matched token or rule. +Each of these fields are instances of the `p_position_t` structure. + +A `Token` node will always have a valid `position` and `end_position`. +A rule node may not have valid positions if the rule allows for an empty match. +In this case the `position` structure should be checked for validity before +using it. +For C targets this can be accomplished with +`if (p_position_valid(node->position))` and for D targets this can be +accomplished with `if (node.position.valid)`. + +A `Token` node has the following additional fields: * `token` which specifies which token was parsed (one of `TOKEN_*`) * `pvalue` which specifies the parser value for the token. If a lexer user code block assigned to `$$`, the assigned value will be stored here. -The other generated AST node structures have fields generated based on the +AST node structures for rules contain generated fields based on the right hand side components specified for all rules of a given name. In this example: @@ -802,7 +828,7 @@ The `Items` structure will have fields: If a rule can be empty (for example in the second `Items` rule above), then an instance of a pointer to that rule's generated AST node will be null if the -parser matches the empty rule definition. +parser matches the empty rule pattern. The non-positional AST node field pointer will not be generated if there are multiple positions in which an instance of the node it points to could be @@ -859,6 +885,24 @@ p_context_init(&context, input, input_length); size_t result = p_parse(&context); ``` +### `p_position_valid` + +The `p_position_valid()` function is only generated for C targets. +it is used to determine whether or not a `p_position_t` structure is valid. + +Example: + +``` +if (p_position_valid(node->position)) +{ + .... +} +``` + +For D targets, rather than using `p_position_valid()`, the `valid` property +function of the `p_position_t` structure can be queried +(e.g. `if (node.position.valid)`). + ### `p_result` The `p_result()` function can be used to retrieve the final parse value after diff --git a/lib/propane/cli.rb b/lib/propane/cli.rb index 46a5542..5a5b5af 100644 --- a/lib/propane/cli.rb +++ b/lib/propane/cli.rb @@ -4,11 +4,11 @@ class Propane USAGE = < Options: + -h, --help Show this usage and exit. --log LOG Write log file. This will show all parser states and their associated shifts and reduces. It can be helpful when debugging a grammar. --version Show program version and exit. - -h, --help Show this usage and exit. -w Treat warnings as errors. This option will treat shift/reduce conflicts as fatal errors and will print them to stderr in addition to the log file.