From c6ea4f83c21c9b39fa058491c001bf4f7c8dcc00 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sun, 13 Nov 2022 22:20:30 -0500 Subject: [PATCH] Store parser values according to the rule/pattern type --- assets/parser.d.erb | 35 ++++++++++++++++++------------ lib/propane/generator.rb | 42 ++++++++++++++++++++++++++++++------ lib/propane/grammar.rb | 7 ++++-- lib/propane/pattern.rb | 5 +++++ lib/propane/rule.rb | 2 +- lib/propane/rule_set.rb | 4 ++++ lib/propane/token.rb | 2 +- spec/propane/grammar_spec.rb | 14 ++++++++++++ 8 files changed, 87 insertions(+), 24 deletions(-) diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 51554d7..2270e18 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -27,6 +27,13 @@ class <%= @classname %> <% end %> ]; + static union ParserValue + { +<% @grammar.ptypes.each do |name, typestring| %> + <%= typestring %> v_<%= name %>; +<% end %> + } + static class Decoder { enum @@ -156,7 +163,7 @@ class <%= @classname %> size_t col; size_t length; uint token; - <%= @grammar.ptype %> pvalue; + ParserValue pvalue; } private string m_input; @@ -200,7 +207,7 @@ class <%= @classname %> <% @grammar.patterns.each do |pattern| %> <% if pattern.code_id %> case <%= pattern.code_id %>u: { -<%= expand_code(pattern.code, false) %> +<%= expand_code(pattern.code, false, nil, pattern) %> } break; <% end %> <% end %> @@ -349,7 +356,7 @@ class <%= @classname %> private struct StateValue { uint state; - <%= @grammar.ptype %> pvalue; + ParserValue pvalue; this(uint state) { @@ -378,7 +385,7 @@ class <%= @classname %> private Lexer m_lexer; - private <%= @grammar.ptype %> parse_result; + private ParserValue parse_result; this(string input) { @@ -391,7 +398,7 @@ class <%= @classname %> uint token = _TOKEN_COUNT; StateValue[] statevalues = new StateValue[](1); uint reduced_rule_set = 0xFFFFFFFFu; - <%= @grammar.ptype %> reduced_parse_result; + ParserValue reduced_parser_value; for (;;) { if (token == _TOKEN_COUNT) @@ -427,9 +434,9 @@ class <%= @classname %> else { /* We shifted a RuleSet. */ - statevalues[$-1].pvalue = reduced_parse_result; - <%= @grammar.ptype %> new_parse_result; - reduced_parse_result = new_parse_result; + statevalues[$-1].pvalue = reduced_parser_value; + ParserValue new_parse_result; + reduced_parser_value = new_parse_result; reduced_rule_set = 0xFFFFFFFFu; } continue; @@ -439,7 +446,7 @@ class <%= @classname %> if (reduce_index != 0xFFFFFFFFu) { /* We have something to reduce. */ - reduced_parse_result = user_code(reduces[reduce_index].rule, statevalues, reduces[reduce_index].n_states); + reduced_parser_value = user_code(reduces[reduce_index].rule, statevalues, reduces[reduce_index].n_states); reduced_rule_set = reduces[reduce_index].rule_set; statevalues.length -= reduces[reduce_index].n_states; continue; @@ -459,9 +466,9 @@ class <%= @classname %> } } - @property <%= @grammar.ptype %> result() + @property <%= start_rule_type[1] %> result() { - return parse_result; + return parse_result.v_<%= start_rule_type[0] %>; } private uint check_shift(uint state, uint symbol) @@ -517,16 +524,16 @@ class <%= @classname %> * * @return Parse value. */ - private <%= @grammar.ptype %> user_code(uint rule, StateValue[] statevalues, uint n_states) + private ParserValue user_code(uint rule, StateValue[] statevalues, uint n_states) { - <%= @grammar.ptype %> _pvalue; + ParserValue _pvalue; switch (rule) { <% @grammar.rules.each do |rule| %> <% if rule.code %> case <%= rule.id %>u: { -<%= expand_code(rule.code, true) %> +<%= expand_code(rule.code, true, rule, nil) %> } break; <% end %> <% end %> diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index 1d9c9cc..839bd1b 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -33,6 +33,7 @@ class Propane pattern.mode = "default" found_default = true end + pattern.ptypename ||= "default" end unless found_default raise Error.new("No patterns found for default mode") @@ -43,6 +44,8 @@ class Propane @grammar.tokens.each_with_index do |token, token_id| # Assign token ID. token.id = token_id + # Set default ptypename if none given. + token.ptypename ||= "default" # Check for token name conflicts. if tokens_by_name.include?(token.name) raise Error.new("Duplicate token name #{token.name.inspect}") @@ -69,8 +72,20 @@ class Propane rule_sets[rule.name] = RuleSet.new(rule.name, rule_set_id) rule_set_id += 1 end - rule.rule_set = rule_sets[rule.name] - rule_sets[rule.name] << rule + rule_set = rule_sets[rule.name] + if rule_set.ptypename && rule.ptypename && rule_set.ptypename != rule.ptypename + raise Error.new("Conflicting ptypes for rule #{rule.name}") + end + rule_set.ptypename ||= rule.ptypename + rule.rule_set = rule_set + rule_set << rule + end + rule_sets.each do |name, rule_set| + rule_set.ptypename ||= "default" + # Assign rule set ptypenames back to rules. + rule_set.rules.each do |rule| + rule.ptypename = rule_set.ptypename + end end # Generate lexer user code IDs for lexer patterns with user code blocks. @grammar.patterns.select do |pattern| @@ -159,24 +174,28 @@ class Propane # User code block. # @param parser [Boolean] # Whether the user code is for the parser or lexer. + # @param rule [Rule, nil] + # The Rule associated with the user code if user code is for the parser. + # @param pattern [Pattern, nil] + # The Pattern associated with the user code if user code is for the lexer. # # @return [String] # Expanded user code block. - def expand_code(code, parser) + def expand_code(code, parser, rule, pattern) code = code.gsub(/\$token\(([$\w]+)\)/) do |match| "TOKEN_#{Token.code_name($1)}" end if parser code = code.gsub(/\$\$/) do |match| - "_pvalue" + "_pvalue.v_#{rule.ptypename}" end code = code.gsub(/\$(\d+)/) do |match| index = $1.to_i - "statevalues[$-1-n_states+#{index}].pvalue" + "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.ptypename}" end else code = code.gsub(/\$\$/) do |match| - "lt.pvalue" + "lt.pvalue.v_#{pattern.ptypename}" end code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match| mode_name = $1 @@ -190,6 +209,17 @@ class Propane code end + # Get the parser value type for the start rule. + # + # @return [Array] + # Start rule parser value type name and type string. + def start_rule_type + start_rule = @grammar.rules.find do |rule| + rule.name == "Start" + end + [start_rule.ptypename, @grammar.ptypes[start_rule.ptypename]] + end + end end diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index 2c2c8ac..ba86a76 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -117,7 +117,7 @@ class Propane end token = Token.new(name, ptypename, @line_number) @tokens << token - pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, mode: @mode) + pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename) @patterns << pattern @mode = nil true @@ -170,10 +170,13 @@ class Propane def parse_pattern_statement! if pattern = parse_pattern! consume!(/\s+/) + if md = consume!(/\((#{IDENTIFIER_REGEX})\)\s*/) + ptypename = md[1] + end unless code = parse_code_block! raise Error.new("Line #{@line_number}: expected code block to follow pattern") end - @patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, mode: @mode) + @patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename) @mode = nil true end diff --git a/lib/propane/pattern.rb b/lib/propane/pattern.rb index a111eac..7e87ccc 100644 --- a/lib/propane/pattern.rb +++ b/lib/propane/pattern.rb @@ -30,6 +30,10 @@ class Propane # Lexer mode for this pattern. attr_accessor :mode + # @return [String, nil] + # Parser value type name. + attr_accessor :ptypename + # Construct a Pattern. # # @param options [Hash] @@ -53,6 +57,7 @@ class Propane @token = options[:token] @line_number = options[:line_number] @mode = options[:mode] + @ptypename = options[:ptypename] regex = Regex.new(@pattern) regex.nfa.end_state.accepts = self @nfa = regex.nfa diff --git a/lib/propane/rule.rb b/lib/propane/rule.rb index 0b005d3..dd4d7eb 100644 --- a/lib/propane/rule.rb +++ b/lib/propane/rule.rb @@ -16,7 +16,7 @@ class Propane # @return [String, nil] # Parser type name. - attr_reader :ptypename + attr_accessor :ptypename # @return [Integer] # Line number where the rule was defined in the input grammar. diff --git a/lib/propane/rule_set.rb b/lib/propane/rule_set.rb index 0437481..68e1705 100644 --- a/lib/propane/rule_set.rb +++ b/lib/propane/rule_set.rb @@ -10,6 +10,10 @@ class Propane # Name of the RuleSet. attr_reader :name + # @return [String, nil] + # Parser type name. + attr_accessor :ptypename + # @return [Array] # Rules in the RuleSet. attr_reader :rules diff --git a/lib/propane/token.rb b/lib/propane/token.rb index abe81f9..0bf91e5 100644 --- a/lib/propane/token.rb +++ b/lib/propane/token.rb @@ -20,7 +20,7 @@ class Propane # @return [String, nil] # Parser value type name. - attr_reader :ptypename + attr_accessor :ptypename # @return [Integer, nil] # Token ID. diff --git a/spec/propane/grammar_spec.rb b/spec/propane/grammar_spec.rb index 03ee210..16392d7 100644 --- a/spec/propane/grammar_spec.rb +++ b/spec/propane/grammar_spec.rb @@ -186,6 +186,12 @@ token abc(string); token bar; tokenid int(integer); +/xyz/ (string) << +>> + +/z28/ << +>> + Start (node) -> R; R -> abc int; EOF @@ -210,6 +216,14 @@ EOF o = grammar.rules.find {|rule| rule.name == "R"} expect(o).to_not be_nil expect(o.ptypename).to be_nil + + o = grammar.patterns.find {|pattern| pattern.pattern == "xyz"} + expect(o).to_not be_nil + expect(o.ptypename).to eq "string" + + o = grammar.patterns.find {|pattern| pattern.pattern == "z28"} + expect(o).to_not be_nil + expect(o.ptypename).to be_nil end end end