From 9f2fe6f84bd653712cc4de4e1aefebbcbf1ae0e3 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sat, 14 Feb 2026 21:00:53 -0500 Subject: [PATCH] Add token_node and token_user_fields grammar statements --- assets/parser.c.erb | 2 ++ assets/parser.d.erb | 3 ++- assets/parser.h.erb | 3 ++- doc/user_guide.md | 8 ++++---- lib/propane/grammar.rb | 39 +++++++++++++++++++++++++++++++++------ spec/propane_spec.rb | 4 ++-- 6 files changed, 45 insertions(+), 14 deletions(-) diff --git a/assets/parser.c.erb b/assets/parser.c.erb index 130883c..03c7061 100644 --- a/assets/parser.c.erb +++ b/assets/parser.c.erb @@ -694,6 +694,7 @@ typedef struct <% end %> } state_value_t; +<% if @grammar.tree %> /** Common tree node structure. */ typedef struct TreeNode_s { @@ -703,6 +704,7 @@ typedef struct TreeNode_s uint8_t is_token; struct TreeNode_s * fields[]; } TreeNode; +<% end %> /** Parser shift table. */ static const shift_t parser_shift_table[] = { diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 809ce4a..86a48cf 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -103,6 +103,7 @@ public struct <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> /* TreeNode fields must be present in the same order here. */ <%= @grammar.prefix %>position_t position; <%= @grammar.prefix %>position_t end_position; +<%= @grammar.token_user_fields %> <%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>value_t pvalue; } @@ -184,7 +185,7 @@ public struct <%= @grammar.prefix %>context_t /** User terminate code. */ size_t user_terminate_code; -<%= @grammar.context_user_code %> +<%= @grammar.context_user_fields %> } /************************************************************************** diff --git a/assets/parser.h.erb b/assets/parser.h.erb index 878271f..0be7966 100644 --- a/assets/parser.h.erb +++ b/assets/parser.h.erb @@ -80,6 +80,7 @@ typedef struct <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %> <%= @grammar.prefix %>position_t end_position; uint16_t n_fields; uint8_t is_token; +<%= @grammar.token_user_fields %> <%= @grammar.prefix %>token_t token; <%= @grammar.prefix %>value_t pvalue; } <%= @grammar.tree_prefix %>Token<%= @grammar.tree_suffix %>; @@ -173,7 +174,7 @@ typedef struct /** User terminate code. */ size_t user_terminate_code; -<%= @grammar.context_user_code %> +<%= @grammar.context_user_fields %> } <%= @grammar.prefix %>context_t; /************************************************************************** diff --git a/doc/user_guide.md b/doc/user_guide.md index b1d1e85..709c46e 100644 --- a/doc/user_guide.md +++ b/doc/user_guide.md @@ -221,18 +221,18 @@ Parser rule code blocks are not available in tree generation mode. In tree generation mode, a full parse tree is automatically constructed in memory for user code to traverse after parsing is complete. -### Context code blocks: the `context` statement +### Context code blocks: the `context_user_fields` statement Propane uses a context structure for lexer and parser operations. Custom fields may be added to the context structure by using the grammar -`context` statement. +`context_user_fields` statement. This allows lexer pattern or parser rule code blocks to access user-defined fields within the context structure. Example: ``` -context << +context_user_fields << int mycontextval; >> ``` @@ -243,7 +243,7 @@ context fields by using the `${context.}` syntax. C++ example: ``` -context << +context_user_fields << std::string comments; >> drop /#(.*)\n/ << diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index ffe57dc..5f64449 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -5,7 +5,7 @@ class Propane # Reserve identifiers beginning with a double-underscore for internal use. IDENTIFIER_REGEX = /(?:[a-zA-Z]|_[a-zA-Z0-9])[a-zA-Z_0-9]*/ - attr_reader :context_user_code + attr_reader :context_user_fields attr_reader :tree attr_reader :tree_prefix attr_reader :tree_suffix @@ -18,6 +18,8 @@ class Propane attr_reader :code_blocks attr_reader :ptypes attr_reader :prefix + attr_reader :token_node + attr_reader :token_user_fields def initialize(input) @patterns = [] @@ -35,7 +37,9 @@ class Propane @tree_prefix = "" @tree_suffix = "" @free_token_node = nil - @context_user_code = "" + @context_user_fields = nil + @token_node = nil + @token_user_fields = nil parse_grammar! @start_rules << "Start" if @start_rules.empty? end @@ -64,12 +68,14 @@ class Propane if parse_white_space! elsif parse_comment_line! elsif @modeline.nil? && parse_mode_label! - elsif parse_context_statement! + elsif parse_context_user_fields_statement! elsif parse_tree_statement! elsif parse_tree_prefix_statement! elsif parse_tree_suffix_statement! elsif parse_free_token_node_statement! elsif parse_module_statement! + elsif parse_token_node_statement! + elsif parse_token_user_fields_statement! elsif parse_ptype_statement! elsif parse_pattern_statement! elsif parse_start_statement! @@ -101,12 +107,13 @@ class Propane consume!(/#.*\n/) end - def parse_context_statement! - if md = consume!(/context\b\s*/) + def parse_context_user_fields_statement! + if md = consume!(/context_user_fields\b\s*/) unless code = parse_code_block! raise Error.new("Line #{@line_number}: expected code block") end - @context_user_code += code + @context_user_fields ||= "" + @context_user_fields += code end end @@ -144,6 +151,26 @@ class Propane end end + def parse_token_node_statement! + if md = consume!(/token_node\b\s*/) + unless code = parse_code_block! + raise Error.new("Line #{@line_number}: expected code block") + end + @token_node ||= "" + @token_node += code + end + end + + def parse_token_user_fields_statement! + if md = consume!(/token_user_fields\b\s*/) + unless code = parse_code_block! + raise Error.new("Line #{@line_number}: expected code block") + end + @token_user_fields ||= "" + @token_user_fields += code + end + end + def parse_ptype_statement! if consume!(/ptype\s+/) name = "default" diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index 3dd5177..e90d554 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -1548,7 +1548,7 @@ EOF it "allows user-defined context fields" do if language == "d" write_grammar <> @@ -1569,7 +1569,7 @@ EOF #include #include >> -context << +context_user_fields << char * comments; unsigned int acount; >>