Allow storing a result value for a token from a lexer code block

2022-10-16 21:40:25 -04:00 · 2022-10-16 21:40:25 -04:00 · bca0a14371
commit bca0a14371
parent ca8a360c0e
4 changed files with 74 additions and 19 deletions
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -156,6 +156,7 @@ class <%= @classname %>
            size_t col;
            size_t length;
            uint token;
+            <%= @grammar.result_type %> result;
        }

        private string m_input;
@ -187,18 +188,19 @@ class <%= @classname %>
         *
         * @param code_id The ID of the user code block to execute.
         * @param match Matched text for this pattern.
+         * @param lt LexedToken lexer result in progress.
         *
         * @return Token ID to accept, or _TOKEN_COUNT if the user code does
         *   not explicitly return a token.
         */
-        private uint user_code(uint code_id, string match)
+        private uint user_code(uint code_id, string match, LexedToken * lt)
        {
            switch (code_id)
            {
 <% @grammar.patterns.each do |pattern| %>
 <%   if pattern.code_id %>
            case <%= pattern.code_id %>u: {
-<%= expand_code(pattern.code) %>
+<%= expand_code(pattern.code, false) %>
            } break;
 <%   end %>
 <% end %>
@ -210,7 +212,10 @@ class <%= @classname %>

        private LexedToken attempt_lex_token()
        {
-            LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_COUNT);
+            LexedToken lt;
+            lt.row = m_input_row;
+            lt.col = m_input_col;
+            lt.token = _TOKEN_COUNT;
            struct MatchInfo
            {
                size_t length;
@ -269,7 +274,7 @@ class <%= @classname %>
                    uint token_to_accept = longest_match_info.token;
                    if (longest_match_info.code_id != 0xFFFF_FFFFu)
                    {
-                        uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)]);
+                        uint user_code_token = user_code(longest_match_info.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], &lt);
                        /* A return of _TOKEN_COUNT from user_code() means
                         * that the user code did not explicitly return a
                         * token. So only override the token to return if the
@ -417,6 +422,7 @@ class <%= @classname %>
                    {
                        /* We shifted a token, mark it consumed. */
                        token = _TOKEN_COUNT;
+                        stateresults[$-1].result = lexed_token.result;
                    }
                    else
                    {
@ -520,7 +526,7 @@ class <%= @classname %>
 <% @grammar.rules.each do |rule| %>
 <%   if rule.code %>
            case <%= rule.id %>u: {
-<%= expand_code(rule.code) %>
+<%= expand_code(rule.code, true) %>
            } break;
 <%   end %>
 <% end %>
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -157,25 +157,37 @@ class Propane
    #
    # @param code [String]
    #   User code block.
+    # @param parser [Boolean]
+    #   Whether the user code is for the parser or lexer.
    #
    # @return [String]
    #   Expanded user code block.
-    def expand_code(code)
-      code.gsub(/\$token\(([$\w]+)\)/) do |match|
+    def expand_code(code, parser)
+      code = code.gsub(/\$token\(([$\w]+)\)/) do |match|
        "TOKEN_#{Token.code_name($1)}"
-      end.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
-        mode_name = $1
-        mode_id = @lexer.mode_id(mode_name)
-        unless mode_id
-          raise Error.new("Lexer mode '#{mode_name}' not found")
-        end
-        "m_mode = #{mode_id}u"
-      end.gsub(/\$\$/) do |match|
-        "_result"
-      end.gsub(/\$(\d+)/) do |match|
-        index = $1.to_i
-        "stateresults[$-1-n_states+#{index}].result"
      end
+      if parser
+        code = code.gsub(/\$\$/) do |match|
+          "_result"
+        end
+        code = code.gsub(/\$(\d+)/) do |match|
+          index = $1.to_i
+          "stateresults[$-1-n_states+#{index}].result"
+        end
+      else
+        code = code.gsub(/\$\$/) do |match|
+          "lt.result"
+        end
+        code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match|
+          mode_name = $1
+          mode_id = @lexer.mode_id(mode_name)
+          unless mode_id
+            raise Error.new("Lexer mode '#{mode_name}' not found")
+          end
+          "m_mode = #{mode_id}u"
+        end
+      end
+      code
    end

  end
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -317,4 +317,21 @@ EOF
      "pass1",
    ])
  end
+
+  it "allows storing a result value for the lexer" do
+    write_grammar <<EOF
+result_type ulong;
+token word /[a-z]+/ <<
+  $$ = match.length;
+>>
+Start -> word <<
+  $$ = $1;
+>>
+EOF
+    build_parser
+    compile("spec/test_lexer_result_value.d")
+    results = run
+    expect(results.stderr).to eq ""
+    expect(results.status).to eq 0
+  end
 end
--- a/spec/test_lexer_result_value.d
+++ b/spec/test_lexer_result_value.d
@ -0,0 +1,20 @@
+import testparser;
+import std.stdio;
+
+int main()
+{
+    return 0;
+}
+
+unittest
+{
+    string input = `x`;
+    auto parser = new Testparser.Parser(input);
+    assert(parser.parse() == true);
+    assert(parser.result == 1u);
+
+    input = `fabulous`;
+    parser = new Testparser.Parser(input);
+    assert(parser.parse() == true);
+    assert(parser.result == 8u);
+}