propane/spec/propane_spec.rb

require "fileutils"
require "open3"

Results = Struct.new(:stdout, :stderr, :status)

describe Propane do
  def write_grammar(grammar)
    File.write("spec/run/testparser.propane", grammar)
  end

  def build_parser(options = {})
    command = %w[./propane.sh spec/run/testparser.propane spec/run/testparser.d --log spec/run/testparser.log]
    if (options[:capture])
      stdout, stderr, status = Open3.capture3(*command)
      Results.new(stdout, stderr, status)
    else
      result = system(*command)
      expect(result).to be_truthy
    end
  end

  def compile(*test_files)
    result = system(*%w[gdc -funittest -o spec/run/testparser spec/run/testparser.d -Ispec], *test_files)
    expect(result).to be_truthy
  end

  def run
    stdout, stderr, status = Open3.capture3("spec/run/testparser")
    File.binwrite("spec/run/.stderr", stderr)
    File.binwrite("spec/run/.stdout", stdout)
    Results.new(stdout, stderr, status)
  end

  def lines(str)
    str.lines.map(&:chomp)
  end

  def verify_lines(lines, patterns)
    if lines.is_a?(String)
      lines = lines.lines.map(&:chomp)
    end
    patterns.each_with_index do |pattern, i|
      found_index =
        if pattern.is_a?(Regexp)
          lines.find_index {|line| line =~ pattern}
        else
          lines.find_index do |line|
            line.chomp == pattern.chomp
          end
        end
      unless found_index
        $stderr.puts "Lines:"
        $stderr.puts lines
        raise "A line matching #{pattern.inspect} (index #{i}) was not found."
      end
    end
  end

  before(:each) do
    FileUtils.rm_rf("spec/run")
    FileUtils.mkdir_p("spec/run")
  end

  it "generates a D lexer" do
    write_grammar <<EOF
token int /\\d+/;
token plus /\\+/;
token times /\\*/;
drop /\\s+/;
Start -> Foo;
Foo -> int <<
>>
Foo -> plus <<
>>
EOF
    build_parser
    compile("spec/test_d_lexer.d")
    results = run
    expect(results.stderr).to eq ""
    expect(results.status).to eq 0
  end

  it "generates a parser" do
    write_grammar <<EOF
token plus /\\+/;
token times /\\*/;
token zero /0/;
token one /1/;
Start -> E;
E -> E times B;
E -> E plus B;
E -> B;
B -> zero;
B -> one;
EOF
    build_parser
  end

  it "generates an SLR parser" do
    write_grammar <<EOF
token one /1/;
Start -> E;
E -> one E;
E -> one;
EOF
    build_parser
  end

  it "distinguishes between multiple identical rules with lookahead symbol" do
    write_grammar <<EOF
token a;
token b;
Start -> R1 a;
Start -> R2 b;
R1 -> a b;
R2 -> a b;
EOF
    build_parser
    compile("spec/test_d_parser_identical_rules_lookahead.d")
    results = run
    expect(results.status).to eq 0
  end

  it "handles reducing a rule that could be arrived at from multiple states" do
    write_grammar <<EOF
token a;
token b;
drop /\\s+/;
Start -> a R1;
Start -> b R1;
R1 -> b;
EOF
    build_parser
    compile("spec/test_d_parser_rule_from_multiple_states.d")
    results = run
    expect(results.status).to eq 0
  end

  it "executes user code when matching lexer token" do
    write_grammar <<EOF
token abc <<
  writeln("abc!");
>>
token def;
Start -> Abcs def;
Abcs -> ;
Abcs -> abc Abcs;
EOF
    build_parser
    compile("spec/test_user_code.d")
    results = run
    expect(results.status).to eq 0
    verify_lines(results.stdout, [
      "abc!",
      "pass1",
      "abc!",
      "abc!",
      "pass2",
    ])
  end

  it "supports a pattern statement" do
    write_grammar <<EOF
token abc;
/def/ <<
  writeln("def!");
>>
Start -> abc;
EOF
    build_parser
    compile("spec/test_pattern.d")
    results = run
    expect(results.status).to eq 0
    verify_lines(results.stdout, [
      "def!",
      "pass1",
      "def!",
      "def!",
      "pass2",
    ])
  end

  it "supports returning tokens from pattern code blocks" do
    write_grammar <<EOF
token abc;
/def/ <<
  writeln("def!");
>>
/ghi/ <<
  writeln("ghi!");
  return $token(abc);
>>
Start -> abc;
EOF
    build_parser
    compile("spec/test_return_token_from_pattern.d")
    results = run
    expect(results.status).to eq 0
    verify_lines(results.stdout, [
      "def!",
      "ghi!",
      "def!",
    ])
  end

  it "supports lexer modes" do
    write_grammar <<EOF
token abc;
token def;
tokenid string;
drop /\\s+/;
/"/ <<
  writeln("begin string mode");
  $mode(string);
>>
string: /[^"]+/ <<
  writeln("captured string");
>>
string: /"/ <<
  $mode(default);
  return $token(string);
>>
Start -> abc string def;
EOF
    build_parser
    compile("spec/test_lexer_modes.d")
    results = run
    expect(results.status).to eq 0
    verify_lines(results.stdout, [
      "begin string mode",
      "captured string",
      "pass1",
      "begin string mode",
      "captured string",
      "pass2",
    ])
  end

  it "executes user code associated with a parser rule" do
    write_grammar <<EOF
token a;
token b;
Start -> A B <<
  writeln("Start!");
>>
A -> a <<
  writeln("A!");
>>
B -> b <<
  writeln("B!");
>>
EOF
    build_parser
    compile("spec/test_parser_rule_user_code.d")
    results = run
    expect(results.status).to eq 0
    verify_lines(results.stdout, [
      "A!",
      "B!",
      "Start!",
    ])
  end

  it "parses lists" do
    write_grammar <<EOF
ptype uint;
token a;
Start -> As <<
  $$ = $1;
>>
As -> <<
  $$ = 0u;
>>
As -> As a <<
  $$ = $1 + 1u;
>>
EOF
    build_parser
    compile("spec/test_parsing_lists.d")
    results = run
    expect(results.status).to eq 0
    expect(results.stderr).to eq ""
  end

  it "fails to generate a parser for a LR(1) grammar that is not LALR" do
    write_grammar <<EOF
token a;
token b;
token c;
token d;
token e;
Start -> a E c;
Start -> a F d;
Start -> b F c;
Start -> b E d;
E -> e;
F -> e;
EOF
    results = build_parser(capture: true)
    expect(results.status).to_not eq 0
    expect(results.stderr).to match %r{reduce/reduce conflict.*\(E\).*\(F\)}
  end

  it "provides matched text to user code blocks" do
    write_grammar <<EOF
token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
  writeln("Matched token is ", match);
>>
Start -> id;
EOF
    build_parser
    compile("spec/test_lexer_match_text.d")
    results = run
    expect(results.status).to eq 0
    verify_lines(results.stdout, [
      "Matched token is identifier_123",
      "pass1",
    ])
  end

  it "allows storing a result value for the lexer" do
    write_grammar <<EOF
ptype ulong;
token word /[a-z]+/ <<
  $$ = match.length;
>>
Start -> word <<
  $$ = $1;
>>
EOF
    build_parser
    compile("spec/test_lexer_result_value.d")
    results = run
    expect(results.stderr).to eq ""
    expect(results.status).to eq 0
  end

  it "allows creating a JSON parser" do
    write_grammar <<EOF
<<
    import std.math;
    import json_types;
    string string_value;
>>

ptype JSONValue;
ptype array = JSONValue[];
ptype dict = JSONValue[string];
ptype string = string;

drop /\\s+/;
token lbrace /\\{/;
token rbrace /\\}/;
token lbracket /\\[/;
token rbracket /\\]/;
token comma /,/;
token colon /:/;
token number /-?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][-+]?[0-9]+)?/ <<
    double n;
    bool negative;
    size_t i = 0u;
    if (match[i] == '-')
    {
        negative = true;
        i++;
    }
    while ('0' <= match[i] && match[i] <= '9')
    {
        n *= 10.0;
        n += (match[i] - '0');
        i++;
    }
    if (match[i] == '.')
    {
        i++;
        double mult = 0.1;
        while ('0' <= match[i] && match[i] <= '9')
        {
            n += mult * (match[i] - '0');
            mult /= 10.0;
            i++;
        }
    }
    if (match[i] == 'e' || match[i] == 'E')
    {
        bool exp_negative;
        i++;
        if (match[i] == '-')
        {
            exp_negative = true;
            i++;
        }
        else if (match[i] == '+')
        {
            i++;
        }
        long exp;
        while ('0' <= match[i] && match[i] <= '9')
        {
            exp *= 10;
            exp += (match[i] - '0');
            i++;
        }
        if (exp_negative)
        {
            exp = -exp;
        }
        n = pow(n, exp);
    }
    if (negative)
    {
        n = -n;
    }
    $$ = new JSONNumber(n);
>>
token true <<
  $$ = new JSONTrue();
>>
token false <<
  $$ = new JSONFalse();
>>
token null <<
  $$ = new JSONNull();
>>
/"/ <<
  $mode(string);
  string_value = "";
>>
string: token string (string) /"/ <<
  $$ = string_value;
  $mode(default);
>>
string: /\\\\"/ <<
  string_value ~= "\\"";
>>
string: /\\\\\\\\/ <<
  string_value ~= "\\\\";
>>
string: /\\\\\\// <<
  string_value ~= "/";
>>
string: /\\\\b/ <<
  string_value ~= "\\b";
>>
string: /\\\\f/ <<
  string_value ~= "\\f";
>>
string: /\\\\n/ <<
  string_value ~= "\\n";
>>
string: /\\\\r/ <<
  string_value ~= "\\r";
>>
string: /\\\\t/ <<
  string_value ~= "\\t";
>>
string: /\\\\u[0-9a-fA-F]{4}/ <<
  /* Not actually going to encode the code point for this example... */
  string_value ~= "{" ~ match[2..6] ~ "}";
>>
string: /[^\\\\]/ <<
  string_value ~= match;
>>
Start -> Value <<
  $$ = $1;
>>
Value -> string <<
  $$ = new JSONString($1);
>>
Value -> number <<
  $$ = $1;
>>
Value -> Object <<
  $$ = $1;
>>
Value -> Array <<
  $$ = $1;
>>
Value -> true <<
  $$ = $1;
>>
Value -> false <<
  $$ = $1;
>>
Value -> null <<
  $$ = $1;
>>
Object -> lbrace rbrace <<
  $$ = new JSONObject();
>>
Object -> lbrace KeyValues rbrace <<
  $$ = new JSONObject($2);
>>
KeyValues (dict) -> KeyValue <<
  $$ = $1;
>>
KeyValues -> KeyValues comma KeyValue <<
  foreach (key, value; $3)
  {
    $1[key] = value;
  }
  $$ = $1;
>>
KeyValue (dict) -> string colon Value <<
  $$ = [$1: $3];
>>
Array -> lbracket rbracket <<
  $$ = new JSONArray();
>>
Array -> lbracket Values rbracket <<
  $$ = new JSONArray($2);
>>
Values (array) -> Value <<
  $$ = [$1];
>>
Values -> Values comma Value <<
  $$ = $1 ~ [$3];
>>
EOF
    build_parser
    compile("spec/test_parsing_json.d", "spec/json_types.d")
  end
end