4 changed files with 77 additions and 280 deletions
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@ -432,14 +432,14 @@ class <%= @classname %>
            {
                if (shifts[i].symbol == symbol)
                {
-//                    if (symbol < _TOKEN_COUNT)
+                    if (symbol < _TOKEN_COUNT)
-//                    {
+                    {
-//                        writeln("Shifting ", token_names[symbol]);
+                        writeln("Shifting ", token_names[symbol]);
-//                    }
+                    }
-//                    else
+                    else
-//                    {
+                    {
-//                        writeln("Shifting rule set ", symbol);
+                        writeln("Shifting rule set ", symbol);
-//                    }
+                    }
                    return shifts[i].state;
                }
            }
@ -455,19 +455,19 @@ class <%= @classname %>
                if ((reduces[i].token == token) ||
                    (reduces[i].token == _TOKEN_NONE))
                {
-//                    write("Reducing rule ", reduces[i].rule, ", rule set ", reduces[i].rule_set, " lookahead ");
+                    write("Reducing rule ", reduces[i].rule, ", rule set ", reduces[i].rule_set, " lookahead ");
-//                    if (token < _TOKEN_COUNT)
+                    if (token < _TOKEN_COUNT)
-//                    {
+                    {
-//                        writeln(token_names[token]);
+                        writeln(token_names[token]);
-//                    }
+                    }
-//                    else if (token == _TOKEN_EOF)
+                    else if (token == _TOKEN_EOF)
-//                    {
+                    {
-//                        writeln("{EOF}");
+                        writeln("{EOF}");
-//                    }
+                    }
-//                    else
+                    else
-//                    {
+                    {
-//                        writeln("{other}");
+                        writeln("{other}");
-//                    }
+                    }
                    return i;
                }
            }
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@ -13,126 +13,66 @@ class Propane
      @tokens = []
      @rules = []
      @code_id = 0
-      @line_number = 1
+      input = input.gsub("\r\n", "\n")
-      @input = input.gsub("\r\n", "\n")
+      parse_grammar(input)
      parse_grammar!
    end
    private
-    def parse_grammar!
+    def parse_grammar(input)
-      while @input.size > 0
+      line_number = 1
-        parse_statement!
+      while !input.empty?
-      end
+        if sliced = input.slice!(/\A\s+/)
-    end
+          # Skip white space.
-
+        elsif sliced = input.slice!(/\A#.*\n/)
-    def parse_statement!
+          # Skip comment lines.
-      @next_line_number = @line_number
+        elsif sliced = input.slice!(/\Amodule\s+(\S+)\s*;/)
-      if parse_white_space!
+          @modulename = $1
-      elsif parse_comment_line!
+        elsif sliced = input.slice!(/\Aclass\s+(\S+)\s*;/)
-      elsif parse_module_statement!
+          @classname = $1
-      elsif parse_class_statement!
+        elsif sliced = input.slice!(/\Atoken\s+(\S+?)(?:\s+([^\n]+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
-      elsif parse_token_statement!
+          name, pattern, code = $1, $2, $3
-      elsif parse_tokenid_statement!
+          if pattern.nil?
-      elsif parse_drop_statement!
+            pattern = name
-      elsif parse_rule_statement!
+          end
-      else
+          unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
-        if @input.size > 25
+            raise Error.new("Invalid token name #{name.inspect}")
-          @input = @input.slice(0..20) + "..."
+          end
-        end
+          token = Token.new(name: name, id: @tokens.size, line_number: line_number)
-        raise Error.new("Unexpected grammar input at line #{@line_number}: #{@input.chomp}")
+          @tokens << token
-      end
+          if code
-      @line_number = @next_line_number
+            code_id = @code_id
-    end
+            @code_id += 1
-
+          else
-    def parse_white_space!
+            code_id = nil
-      consume!(/\A\s+/)
+          end
-    end
+          pattern = Pattern.new(pattern: pattern, token: token, line_number: line_number, code: code, code_id: code_id)
-
+          @patterns << pattern
-    def parse_comment_line!
+        elsif sliced = input.slice!(/\Atokenid\s+(\S+?)\s*;/m)
-      consume!(/\A#.*\n/)
+          name = $1
-    end
+          unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
-
+            raise Error.new("Invalid token name #{name.inspect}")
-    def parse_module_statement!
+          end
-      if md = consume!(/\Amodule\s+(\S+)\s*;/)
+          token = Token.new(name: name, id: @tokens.size, line_number: line_number)
-        @modulename = md[1]
+          @tokens << token
-      end
+        elsif sliced = input.slice!(/\Adrop\s+(\S+)\s*;/)
-    end
+          pattern = $1
-
+          @patterns << Pattern.new(pattern: pattern, line_number: line_number, drop: true)
-    def parse_class_statement!
+        elsif sliced = input.slice!(/\A(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
-      if md = consume!(/\Aclass\s+(\S+)\s*;/)
+          rule_name, components, code = $1, $2, $3
-        @classname = md[1]
+          unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
-      end
+            raise Error.new("Invalid rule name #{name.inspect}")
-    end
+          end
-
+          components = components.strip.split(/\s+/)
-    def parse_token_statement!
+          # Reserve rule ID 0 for the "real" start rule.
-      if md = consume!(/\Atoken\s+(\S+?)(?:\s+([^\n]+?))?\s*(?:;|<<\n(.*?)^>>\n)/m)
+          @rules << Rule.new(rule_name, components, code, line_number, @rules.size + 1)
        name, pattern, code = *md[1, 3]
        if pattern.nil?
          pattern = name
        end
        unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
          raise Error.new("Invalid token name #{name.inspect}")
        end
        token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
        @tokens << token
        if code
          code_id = @code_id
          @code_id += 1
        else
-          code_id = nil
+          if input.size > 25
            input = input.slice(0..20) + "..."
          end
          raise Error.new("Unexpected grammar input at line #{line_number}: #{input.chomp}")
        end
-        pattern = Pattern.new(pattern: pattern, token: token, line_number: @line_number, code: code, code_id: code_id)
+        line_number += sliced.count("\n")
        @patterns << pattern
      end
    end
    def parse_tokenid_statement!
      if md = consume!(/\Atokenid\s+(\S+?)\s*;/m)
        name = md[1]
        unless name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
          raise Error.new("Invalid token name #{name.inspect}")
        end
        token = Token.new(name: name, id: @tokens.size, line_number: @line_number)
        @tokens << token
      end
    end
    def parse_drop_statement!
      if md = consume!(/\Adrop\s+(\S+)\s*;/)
        pattern = md[1]
        @patterns << Pattern.new(pattern: pattern, line_number: @line_number, drop: true)
      end
    end
    def parse_rule_statement!
      if md = consume!(/\A(\S+)\s*->\s*([^\n]*?)(?:;|<<\n(.*?)^>>\n)/m)
        rule_name, components, code = *md[1, 3]
        unless rule_name =~ /^[a-zA-Z_][a-zA-Z_0-9]*$/
          raise Error.new("Invalid rule name #{name.inspect}")
        end
        components = components.strip.split(/\s+/)
        # Reserve rule ID 0 for the "real" start rule.
        @rules << Rule.new(rule_name, components, code, @line_number, @rules.size + 1)
      end
    end
    # Check if the input string matches the given regex.
    #
    # If so, remove the match from the input string, and update the line
    # number.
    #
    # @param regex [Regexp]
    #   Regex to attempt to match.
    #
    # @return [MatchData, nil]
    #   MatchData for the given regex if it was matched and removed from the
    #   input.
    def consume!(regex)
      if md = @input.match(regex)
        @input.slice!(0, md[0].size)
        @next_line_number += md[0].count("\n")
        md
      end
    end
--- a/spec/propane/grammar_spec.rb
+++ b/spec/propane/grammar_spec.rb
@ -1,145 +0,0 @@
 class Propane
  describe Grammar do
    it "parses a user grammar" do
      input = <<EOF
 # Comment line
 module a.b;
 class Foobar;
 token while;
 token id
  [a-zA-Z_][a-zA-Z_0-9]*;
 token token_with_code <<
 Code for the token
 >>
 tokenid token_with_no_pattern;
 drop \\s+;
 A -> B <<
  a = 42;
 >>
 B -> C while id;
 B -> <<
  b = 0;
 >>
 EOF
      grammar = Grammar.new(input)
      expect(grammar.classname).to eq "Foobar"
      expect(grammar.modulename).to eq "a.b"
      o = grammar.tokens.find {|token| token.name == "while"}
      expect(o).to_not be_nil
      expect(o.line_number).to eq 6
      expect(o.id).to eq 0
      o = grammar.patterns.find {|pattern| pattern.token == o}
      expect(o).to_not be_nil
      expect(o.pattern).to eq "while"
      expect(o.line_number).to eq 6
      expect(o.code_id).to be_nil
      expect(o.code).to be_nil
      o = grammar.tokens.find {|token| token.name == "id"}
      expect(o).to_not be_nil
      expect(o.line_number).to eq 8
      expect(o.id).to eq 1
      o = grammar.patterns.find {|pattern| pattern.token == o}
      expect(o).to_not be_nil
      expect(o.pattern).to eq "[a-zA-Z_][a-zA-Z_0-9]*"
      expect(o.line_number).to eq 8
      expect(o.code_id).to be_nil
      expect(o.code).to be_nil
      o = grammar.tokens.find {|token| token.name == "token_with_code"}
      expect(o).to_not be_nil
      expect(o.line_number).to eq 11
      expect(o.id).to eq 2
      o = grammar.patterns.find {|pattern| pattern.token == o}
      expect(o).to_not be_nil
      expect(o.pattern).to eq "token_with_code"
      expect(o.line_number).to eq 11
      expect(o.code_id).to eq 0
      expect(o.code).to eq "Code for the token\n"
      o = grammar.tokens.find {|token| token.name == "token_with_no_pattern"}
      expect(o).to_not be_nil
      expect(o.line_number).to eq 15
      o = grammar.patterns.find {|pattern| pattern.token == o}
      expect(o).to be_nil
      o = grammar.patterns.find {|pattern| pattern.pattern == "\\s+"}
      expect(o).to_not be_nil
      expect(o.line_number).to eq 17
      expect(o.token).to be_nil
      expect(o.code_id).to be_nil
      expect(o.code).to be_nil
      expect(grammar.rules.size).to eq 3
      o = grammar.rules[0]
      expect(o.name).to eq "A"
      expect(o.components).to eq %w[B]
      expect(o.id).to eq 1
      expect(o.line_number).to eq 19
      expect(o.code).to eq "  a = 42;\n"
      o = grammar.rules[1]
      expect(o.name).to eq "B"
      expect(o.components).to eq %w[C while id]
      expect(o.id).to eq 2
      expect(o.line_number).to eq 22
      expect(o.code).to be_nil
      o = grammar.rules[2]
      expect(o.name).to eq "B"
      expect(o.components).to eq []
      expect(o.id).to eq 3
      expect(o.line_number).to eq 23
      expect(o.code).to eq "  b = 0;\n"
    end
    it "parses code segments with semicolons" do
      input = <<EOF
 token code1 <<
  a = b;
  return c;
 >>
 token code2 <<
  writeln("Hello there");
 >>
 tokenid token_with_no_pattern;
 EOF
      grammar = Grammar.new(input)
      o = grammar.tokens.find {|token| token.name == "code1"}
      expect(o).to_not be_nil
      expect(o.id).to eq 0
      expect(o.line_number).to eq 1
      o = grammar.patterns.find {|pattern| pattern.token == o}
      expect(o).to_not be_nil
      expect(o.code_id).to eq 0
      expect(o.code).to eq "  a = b;\n  return c;\n"
      o = grammar.tokens.find {|token| token.name == "code2"}
      expect(o).to_not be_nil
      expect(o.id).to eq 1
      expect(o.line_number).to eq 6
      o = grammar.patterns.find {|pattern| pattern.token == o}
      expect(o).to_not be_nil
      expect(o.code_id).to eq 1
      expect(o.code).to eq %[  writeln("Hello there");\n]
    end
  end
 end
--- a/spec/test_user_code.d
+++ b/spec/test_user_code.d
@ -11,8 +11,10 @@ unittest
    string input = "abcdef";
    auto parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
    assert(parser.parse() == true);
    writeln("pass1");
    input = "abcabcdef";
    parser = new Testparser.Parser(cast(const(ubyte) *)input.ptr, input.length);
    assert(parser.parse() == true);
    writeln("pass2");
 }