Allow multiple lexer modes to be specified for a lexer pattern - close #35

2025-03-11 20:40:10 -04:00 · 2025-03-11 20:40:10 -04:00 · eb9d9026fc
commit eb9d9026fc
parent 54bb3307cd
11 changed files with 166 additions and 30 deletions
--- a/1
+++ b/1
@ -1,5 +1,6 @@
 source "https://rubygems.org"
 gem "base64"
 gem "rake"
 gem "rspec"
 gem "rdoc"
--- a/Gemfile.lock
+++ b/Gemfile.lock
@ -1,6 +1,7 @@
 GEM
  remote: https://rubygems.org/
  specs:
    base64 (0.2.0)
    date (3.4.1)
    diff-lcs (1.5.1)
    docile (1.4.1)
@ -37,6 +38,7 @@ PLATFORMS
  ruby
 DEPENDENCIES
  base64
  rake
  rdoc
  redcarpet
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@ -536,6 +536,28 @@ It also returns the `str` token now that the token is complete.
 Note that the token name `str` above could have been `string` instead - the
 namespace for token names is distinct from the namespace for lexer modes.
 Multiple modes can be specified for a token or pattern or drop statement.
 For example, if the grammar wanted to only recognize an identifier following
 a `.` token and not other keywords, it could switch to an `identonly` mode
 when matching a `.`
 The `ident` token pattern will be matched in either the `default` or
 `identonly` mode.
 ```
 ptype char;
 token abc;
 token def;
 default, identonly: token ident /[a-z]+/ <<
  $$ = match[0];
  $mode(default);
  return $token(ident);
 >>
 token dot /\\./ <<
  $mode(identonly);
 >>
 default, identonly: drop /\\s+/;
 ```
 ##> Specifying parser value types - the `ptype` statement
 The `ptype` statement is used to define parser value type(s).
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@ -43,8 +43,8 @@ class Propane
      # Assign default pattern mode to patterns without a mode assigned.
      found_default = false
      @grammar.patterns.each do |pattern|
-        if pattern.mode.nil?
+        if pattern.modes.empty?
-          pattern.mode = "default"
+          pattern.modes << "default"
          found_default = true
        end
        pattern.ptypename ||= "default"
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@ -25,7 +25,7 @@ class Propane
      @code_blocks = {}
      @line_number = 1
      @next_line_number = @line_number
-      @mode = nil
+      @modeline = nil
      @input = input.gsub("\r\n", "\n")
      @ptypes = {"default" => "void *"}
      @prefix = "p_"
@ -58,7 +58,7 @@ class Propane
    def parse_statement!
      if parse_white_space!
      elsif parse_comment_line!
-      elsif @mode.nil? && parse_mode_label!
+      elsif @modeline.nil? && parse_mode_label!
      elsif parse_ast_statement!
      elsif parse_ast_prefix_statement!
      elsif parse_ast_suffix_statement!
@ -81,8 +81,8 @@ class Propane
    end
    def parse_mode_label!
-      if md = consume!(/(#{IDENTIFIER_REGEX})\s*:/)
+      if md = consume!(/(#{IDENTIFIER_REGEX}(?:\s*,\s*#{IDENTIFIER_REGEX})*)\s*:/)
-        @mode = md[1]
+        @modeline = md[1]
      end
    end
@ -117,7 +117,7 @@ class Propane
        md = consume!(/([\w.]+)\s*/, "expected module name")
        @modulename = md[1]
        consume!(/;/, "expected `;'")
-        @mode = nil
+        @modeline = nil
        true
      end
    end
@ -153,9 +153,9 @@ class Propane
        end
        token = Token.new(name, ptypename, @line_number)
        @tokens << token
-        pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename)
+        pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, modes: get_modes_from_modeline, ptypename: ptypename)
        @patterns << pattern
-        @mode = nil
+        @modeline = nil
        true
      end
    end
@ -173,7 +173,7 @@ class Propane
        consume!(/;/, "expected `;'");
        token = Token.new(name, ptypename, @line_number)
        @tokens << token
-        @mode = nil
+        @modeline = nil
        true
      end
    end
@ -186,8 +186,8 @@ class Propane
        end
        consume!(/\s+/)
        consume!(/;/, "expected `;'")
-        @patterns << Pattern.new(pattern: pattern, line_number: @line_number, mode: @mode)
+        @patterns << Pattern.new(pattern: pattern, line_number: @line_number, modes: get_modes_from_modeline)
-        @mode = nil
+        @modeline = nil
        true
      end
    end
@ -208,7 +208,7 @@ class Propane
          end
        end
        @rules << Rule.new(rule_name, components, code, ptypename, @line_number)
-        @mode = nil
+        @modeline = nil
        true
      end
    end
@ -225,8 +225,8 @@ class Propane
        unless code = parse_code_block!
          raise Error.new("Line #{@line_number}: expected code block to follow pattern")
        end
-        @patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, mode: @mode, ptypename: ptypename)
+        @patterns << Pattern.new(pattern: pattern, line_number: @line_number, code: code, modes: get_modes_from_modeline, ptypename: ptypename)
-        @mode = nil
+        @modeline = nil
        true
      end
    end
@ -247,7 +247,7 @@ class Propane
        else
          @code_blocks[name] = code
        end
-        @mode = nil
+        @modeline = nil
        true
      end
    end
@ -315,6 +315,14 @@ class Propane
      end
    end
    def get_modes_from_modeline
      if @modeline
        Set[*@modeline.split(",").map(&:strip)]
      else
        Set.new
      end
    end
  end
 end
--- a/lib/propane/lexer.rb
+++ b/lib/propane/lexer.rb
@ -26,8 +26,14 @@ class Propane
    private
    def build_tables!
-      @modes = @grammar.patterns.group_by do |pattern|
+      modenames = @grammar.patterns.reduce(Set.new) do |result, pattern|
-        pattern.mode
+        result + pattern.modes
      end
      @modes = modenames.reduce({}) do |result, modename|
        result[modename] = @grammar.patterns.select do |pattern|
          pattern.modes.include?(modename)
        end
        result
      end.transform_values do |patterns|
        {dfa: DFA.new(patterns)}
      end
--- a/lib/propane/pattern.rb
+++ b/lib/propane/pattern.rb
@ -26,9 +26,9 @@ class Propane
    #   Regex NFA for matching the pattern.
    attr_reader :nfa
-    # @return [String, nil]
+    # @return [Set]
-    #   Lexer mode for this pattern.
+    #   Lexer modes for this pattern.
-    attr_accessor :mode
+    attr_accessor :modes
    # @return [String, nil]
    #   Parser value type name.
@ -46,14 +46,14 @@ class Propane
    #   Token to be returned by this pattern.
    # @option options [Integer, nil] :line_number
    #   Line number where the token was defined in the input grammar.
-    # @option options [String, nil] :mode
+    # @option options [String, nil] :modes
-    #   Lexer mode for this pattern.
+    #   Lexer modes for this pattern.
    def initialize(options)
      @code = options[:code]
      @pattern = options[:pattern]
      @token = options[:token]
      @line_number = options[:line_number]
-      @mode = options[:mode]
+      @modes = options[:modes]
      @ptypename = options[:ptypename]
      regex = Regex.new(@pattern)
      regex.nfa.end_state.accepts = self
--- a/spec/propane/grammar_spec.rb
+++ b/spec/propane/grammar_spec.rb
@ -151,30 +151,30 @@ EOF
      o = grammar.patterns.find {|pattern| pattern.token == o}
      expect(o).to_not be_nil
-      expect(o.mode).to be_nil
+      expect(o.modes).to be_empty
      o = grammar.tokens.find {|token| token.name == "b"}
      expect(o).to_not be_nil
      o = grammar.patterns.find {|pattern| pattern.token == o}
      expect(o).to_not be_nil
-      expect(o.mode).to eq "m1"
+      expect(o.modes).to eq Set["m1"]
      o = grammar.patterns.find {|pattern| pattern.pattern == "foo"}
      expect(o).to_not be_nil
-      expect(o.mode).to be_nil
+      expect(o.modes).to be_empty
      o = grammar.patterns.find {|pattern| pattern.pattern == "bar"}
      expect(o).to_not be_nil
-      expect(o.mode).to eq "m2"
+      expect(o.modes).to eq Set["m2"]
      o = grammar.patterns.find {|pattern| pattern.pattern == "q"}
      expect(o).to_not be_nil
-      expect(o.mode).to be_nil
+      expect(o.modes).to be_empty
      o = grammar.patterns.find {|pattern| pattern.pattern == "r"}
      expect(o).to_not be_nil
-      expect(o.mode).to eq "m3"
+      expect(o.modes).to eq Set["m3"]
    end
    it "allows assigning ptypes to tokens and rules" do
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -621,6 +621,62 @@ EOF
        ])
      end
      it "multiple lexer modes may apply to a pattern" do
        case language
        when "c"
          write_grammar <<EOF
 <<
 #include <stdio.h>
 >>
 ptype char;
 token abc;
 token def;
 default, identonly: token ident /[a-z]+/ <<
  $$ = match[0];
  $mode(default);
  return $token(ident);
 >>
 token dot /\\./ <<
  $mode(identonly);
 >>
 default, identonly: drop /\\s+/;
 Start -> abc dot ident <<
  printf("ident: %c\\n", $3);
 >>
 EOF
        when "d"
          write_grammar <<EOF
 <<
 import std.stdio;
 >>
 ptype char;
 token abc;
 token def;
 default, identonly: token ident /[a-z]+/ <<
  $$ = match[0];
  $mode(default);
 >>
 token dot /\\./ <<
  $mode(identonly);
 >>
 default, identonly: drop /\\s+/;
 Start -> abc dot ident <<
  writeln("ident: ", $3);
 >>
 EOF
        end
        run_propane(language: language)
        compile("spec/test_lexer_multiple_modes.#{language}", language: language)
        results = run_test
        expect(results.status).to eq 0
        verify_lines(results.stdout, [
          "ident: d",
          "pass1",
          "ident: a",
          "pass2",
        ])
      end
      it "executes user code associated with a parser rule" do
        case language
        when "c"
--- a/spec/test_lexer_multiple_modes.c
+++ b/spec/test_lexer_multiple_modes.c
@ -0,0 +1,20 @@
 #include "testparser.h"
 #include <assert.h>
 #include <string.h>
 #include <stdio.h>
 int main()
 {
    char const * input = "abc.def";
    p_context_t context;
    p_context_init(&context, (uint8_t const *)input, strlen(input));
    assert(p_parse(&context) == P_SUCCESS);
    printf("pass1\n");
    input = "abc .  abc";
    p_context_init(&context, (uint8_t const *)input, strlen(input));
    assert(p_parse(&context) == P_SUCCESS);
    printf("pass2\n");
    return 0;
 }
--- a/spec/test_lexer_multiple_modes.d
+++ b/spec/test_lexer_multiple_modes.d
@ -0,0 +1,21 @@
 import testparser;
 import std.stdio;
 int main()
 {
    return 0;
 }
 unittest
 {
    string input = `abc.def`;
    p_context_t context;
    p_context_init(&context, input);
    assert(p_parse(&context) == P_SUCCESS);
    writeln("pass1");
    input = `abc .  abc`;
    p_context_init(&context, input);
    assert(p_parse(&context) == P_SUCCESS);
    writeln("pass2");
 }