Add forward slashes around patterns and parse more robustly

2022-09-28 23:05:01 -04:00 · 2022-09-28 23:05:01 -04:00 · 04367db0ac
commit 04367db0ac
parent 1547528ecf
4 changed files with 86 additions and 44 deletions
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@ -14,6 +14,7 @@ class Propane
      @rules = []
      @code_id = 0
      @line_number = 1
      @next_line_number = @line_number
      @input = input.gsub("\r\n", "\n")
      parse_grammar!
    end
@ -27,7 +28,6 @@ class Propane
    end
    def parse_statement!
      @next_line_number = @line_number
      if parse_white_space!
      elsif parse_comment_line!
      elsif parse_module_statement!
@ -42,53 +42,56 @@ class Propane
        end
        raise Error.new("Unexpected grammar input at line #{@line_number}: #{@input.chomp}")
      end
      @line_number = @next_line_number
    end
    def parse_white_space!
-      consume!(/\A\s+/)
+      consume!(/\s+/)
    end
    def parse_comment_line!
-      consume!(/\A#.*\n/)
+      consume!(/#.*\n/)
    end
    def parse_module_statement!
-      if md = consume!(/\Amodule\s+(\S+)\s*;/)
+      if consume!(/module\s+/)
        md = consume!(/([\w.]+)\s*/, "expected module name")
        @modulename = md[1]
        consume!(/;/, "expected `;'")
      end
    end
    def parse_class_statement!
-      if md = consume!(/\Aclass\s+(\S+)\s*;/)
+      if consume!(/class\s+/)
        md = consume!(/([\w.]+)\s*/, "expected class name")
        @classname = md[1]
        consume!(/;/, "expected `;'")
      end
    end
    def parse_token_statement!
-      if md = consume!(/\Atoken\s+(\S+?)(?:\s+([^\n]+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
+      if consume!(/token\s+/)
-        name, pattern, code = *md[1, 3]
+        md = consume!(/([a-zA-Z_][a-zA-Z_0-9]*)/, "expected token name")
-        if pattern.nil?
+        name = md[1]
-          pattern = name
+        if consume!(/\s+/)
          pattern = parse_pattern!
        end
-        unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
+        pattern ||= name
-          raise Error.new("Invalid token name #{name.inspect}")
+        consume!(/\s+/)
-        end
+        if code = parse_code_block!
        token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
        @tokens << token
        if code
          code_id = @code_id
          @code_id += 1
        else
-          code_id = nil
+          consume!(/;/, "expected pattern or `;' or code block")
        end
        token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
        @tokens << token
        pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, code_id: code_id)
        @patterns << pattern
      end
    end
    def parse_tokenid_statement!
-      if md = consume!(/\Atokenid\s+(\S+?)\s*;/m)
+      if md = consume!(/tokenid\s+(\S+?)\s*;/m)
        name = md[1]
        unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
          raise Error.new("Invalid token name #{name.inspect}")
@ -99,14 +102,19 @@ class Propane
    end
    def parse_drop_statement!
-      if md = consume!(/\Adrop\s+(\S+)\s*;/)
+      if md = consume!(/drop\s+/)
-        pattern = md[1]
+        pattern = parse_pattern!
        unless pattern
          raise Error.new("Line #{@line_number}: expected pattern to follow `drop'")
        end
        consume!(/\s+/)
        consume!(/;/, "expected `;'")
        @patterns << Pattern.new(pattern: pattern, line_number: @line_number, drop: true)
      end
    end
    def parse_rule_statement!
-      if md = consume!(/\A(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
+      if md = consume!(/(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
        rule_name, components, code = *md[1, 3]
        unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
          raise Error.new("Invalid rule name #{name.inspect}")
@ -117,22 +125,56 @@ class Propane
      end
    end
    def parse_pattern!
      if md = consume!(%r{/})
        pattern = ""
        while !consume!(%r{/})
          if consume!(%r{\\})
            pattern += "\\"
            if md = consume!(%r{(.)})
              pattern += md[1]
            else
              raise Error.new("Line #{@line_number}: unterminated escape sequence")
            end
          elsif md = consume!(%r{(.)})
            pattern += md[1]
          end
        end
        pattern
      end
    end
    def parse_code_block!
      if md = consume!(/<<\n(.*?)^>>\n/m)
        md[1]
      end
    end
    # Check if the input string matches the given regex.
    #
    # If so, remove the match from the input string, and update the line
-    # number.
+    # number. If the regex is not matched and an error message is provided,
    # the error is raised.
    #
    # @param regex [Regexp]
    #   Regex to attempt to match.
    # @param error_message [String, nil]
    #   Error message to display if the regex is not matched. If nil and the
    #   regex is not matched, an error is not raised.
    #
    # @return [MatchData, nil]
    #   MatchData for the given regex if it was matched and removed from the
    #   input.
-    def consume!(regex)
+    def consume!(regex, error_message = nil)
-      if md = @input.match(regex)
+      @line_number = @next_line_number
      if md = @input.match(/\A#{regex}/)
        @input.slice!(0, md[0].size)
        @next_line_number += md[0].count("\n")
        md
      elsif error_message
        raise Error.new("Line #{@line_number}: Error: #{error_message}")
      else
        false
      end
    end
--- a/spec/propane/grammar_spec.rb
+++ b/spec/propane/grammar_spec.rb
@ -10,7 +10,7 @@ class Foobar;
 token while;
 token id
-  [a-zA-Z_][a-zA-Z_0-9]*;
+  /[a-zA-Z_][a-zA-Z_0-9]*/;
 token token_with_code <<
 Code for the token
@ -18,7 +18,7 @@ Code for the token
 tokenid token_with_no_pattern;
-drop \\s+;
+drop /\\s+/;
 A -> B <<
  a = 42;
@ -46,13 +46,13 @@ EOF
      o = grammar.tokens.find {|token| token.name == "id"}
      expect(o).to_not be_nil
-      expect(o.line_number).to eq 8
+      expect(o.line_number).to eq 9
      expect(o.id).to eq 1
      o = grammar.patterns.find {|pattern| pattern.token == o}
      expect(o).to_not be_nil
      expect(o.pattern).to eq "[a-zA-Z_][a-zA-Z_0-9]*"
-      expect(o.line_number).to eq 8
+      expect(o.line_number).to eq 9
      expect(o.code_id).to be_nil
      expect(o.code).to be_nil
--- a/spec/propane/lexer/dfa_spec.rb
+++ b/spec/propane/lexer/dfa_spec.rb
@ -82,15 +82,15 @@ EOF
    expect(run(<<EOF, "foobar")).to eq expected
 token foo;
 token bar;
-token identifier [a-z]+;
+token identifier /[a-z]+/;
 EOF
    expected = [
      ["plusplus", "++"],
      ["plus", "+"],
    ]
    expect(run(<<EOF, "+++")).to eq expected
-token plus \\+;
+token plus /\\+/;
-token plusplus \\+\\+;
+token plusplus /\\+\\+/;
 EOF
  end
@ -103,7 +103,7 @@ EOF
    expect(run(<<EOF, "foo \tbar")).to eq expected
 token foo;
 token bar;
-token WS \\s+;
+token WS /\\s+/;
 EOF
  end
@ -116,7 +116,7 @@ EOF
    expect(run(<<EOF, "foo \tbar")).to eq expected
 token foo;
 token bar;
-drop \\s+;
+drop /\\s+/;
 EOF
  end
@ -125,7 +125,7 @@ EOF
      ["semicolon", ";"],
    ]
    expect(run(<<EOF, ";")).to eq expected
-token semicolon \;;
+token semicolon /;/;
 EOF
  end
 end
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@ -27,10 +27,10 @@ describe Propane do
  it "generates a D lexer" do
    write_grammar <<EOF
-token int \\d+;
+token int /\\d+/;
-token plus \\+;
+token plus /\\+/;
-token times \\*;
+token times /\\*/;
-drop \\s+;
+drop /\\s+/;
 Start -> Foo;
 Foo -> int <<
 >>
@ -44,10 +44,10 @@ EOF
  it "generates a parser" do
    write_grammar <<EOF
-token plus \\+;
+token plus /\\+/;
-token times \\*;
+token times /\\*/;
-token zero 0;
+token zero /0/;
-token one 1;
+token one /1/;
 Start -> E;
 E -> E times B;
 E -> E plus B;
@ -60,7 +60,7 @@ EOF
  it "generates an SLR parser" do
    write_grammar <<EOF
-token one 1;
+token one /1/;
 Start -> E;
 E -> one E;
 E -> one;
@ -86,7 +86,7 @@ EOF
    write_grammar <<EOF
 token a;
 token b;
-drop \\s+;
+drop /\\s+/;
 Start -> a R1;
 Start -> b R1;
 R1 -> b;