propane/spec/propane_spec.rb

524 lines
9.6 KiB
Ruby

require "fileutils"
require "open3"
Results = Struct.new(:stdout, :stderr, :status)
describe Propane do
def write_grammar(grammar)
File.write("spec/run/testparser.propane", grammar)
end
def build_parser(options = {})
command = %w[./propane.sh spec/run/testparser.propane spec/run/testparser.d --log spec/run/testparser.log]
if (options[:capture])
stdout, stderr, status = Open3.capture3(*command)
Results.new(stdout, stderr, status)
else
result = system(*command)
expect(result).to be_truthy
end
end
def compile(*test_files)
result = system(*%w[gdc -funittest -o spec/run/testparser spec/run/testparser.d -Ispec], *test_files)
expect(result).to be_truthy
end
def run
stdout, stderr, status = Open3.capture3("spec/run/testparser")
File.binwrite("spec/run/.stderr", stderr)
File.binwrite("spec/run/.stdout", stdout)
Results.new(stdout, stderr, status)
end
def lines(str)
str.lines.map(&:chomp)
end
def verify_lines(lines, patterns)
if lines.is_a?(String)
lines = lines.lines.map(&:chomp)
end
patterns.each_with_index do |pattern, i|
found_index =
if pattern.is_a?(Regexp)
lines.find_index {|line| line =~ pattern}
else
lines.find_index do |line|
line.chomp == pattern.chomp
end
end
unless found_index
$stderr.puts "Lines:"
$stderr.puts lines
raise "A line matching #{pattern.inspect} (index #{i}) was not found."
end
end
end
before(:each) do
FileUtils.rm_rf("spec/run")
FileUtils.mkdir_p("spec/run")
end
it "generates a D lexer" do
write_grammar <<EOF
token int /\\d+/;
token plus /\\+/;
token times /\\*/;
drop /\\s+/;
Start -> Foo;
Foo -> int <<
>>
Foo -> plus <<
>>
EOF
build_parser
compile("spec/test_d_lexer.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "generates a parser" do
write_grammar <<EOF
token plus /\\+/;
token times /\\*/;
token zero /0/;
token one /1/;
Start -> E;
E -> E times B;
E -> E plus B;
E -> B;
B -> zero;
B -> one;
EOF
build_parser
end
it "generates an SLR parser" do
write_grammar <<EOF
token one /1/;
Start -> E;
E -> one E;
E -> one;
EOF
build_parser
end
it "distinguishes between multiple identical rules with lookahead symbol" do
write_grammar <<EOF
token a;
token b;
Start -> R1 a;
Start -> R2 b;
R1 -> a b;
R2 -> a b;
EOF
build_parser
compile("spec/test_d_parser_identical_rules_lookahead.d")
results = run
expect(results.status).to eq 0
end
it "handles reducing a rule that could be arrived at from multiple states" do
write_grammar <<EOF
token a;
token b;
drop /\\s+/;
Start -> a R1;
Start -> b R1;
R1 -> b;
EOF
build_parser
compile("spec/test_d_parser_rule_from_multiple_states.d")
results = run
expect(results.status).to eq 0
end
it "executes user code when matching lexer token" do
write_grammar <<EOF
token abc <<
writeln("abc!");
>>
token def;
Start -> Abcs def;
Abcs -> ;
Abcs -> abc Abcs;
EOF
build_parser
compile("spec/test_user_code.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"abc!",
"pass1",
"abc!",
"abc!",
"pass2",
])
end
it "supports a pattern statement" do
write_grammar <<EOF
token abc;
/def/ <<
writeln("def!");
>>
Start -> abc;
EOF
build_parser
compile("spec/test_pattern.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"def!",
"pass1",
"def!",
"def!",
"pass2",
])
end
it "supports returning tokens from pattern code blocks" do
write_grammar <<EOF
token abc;
/def/ <<
writeln("def!");
>>
/ghi/ <<
writeln("ghi!");
return $token(abc);
>>
Start -> abc;
EOF
build_parser
compile("spec/test_return_token_from_pattern.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"def!",
"ghi!",
"def!",
])
end
it "supports lexer modes" do
write_grammar <<EOF
token abc;
token def;
tokenid string;
drop /\\s+/;
/"/ <<
writeln("begin string mode");
$mode(string);
>>
string: /[^"]+/ <<
writeln("captured string");
>>
string: /"/ <<
$mode(default);
return $token(string);
>>
Start -> abc string def;
EOF
build_parser
compile("spec/test_lexer_modes.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"begin string mode",
"captured string",
"pass1",
"begin string mode",
"captured string",
"pass2",
])
end
it "executes user code associated with a parser rule" do
write_grammar <<EOF
token a;
token b;
Start -> A B <<
writeln("Start!");
>>
A -> a <<
writeln("A!");
>>
B -> b <<
writeln("B!");
>>
EOF
build_parser
compile("spec/test_parser_rule_user_code.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"A!",
"B!",
"Start!",
])
end
it "parses lists" do
write_grammar <<EOF
ptype uint;
token a;
Start -> As <<
$$ = $1;
>>
As -> <<
$$ = 0u;
>>
As -> As a <<
$$ = $1 + 1u;
>>
EOF
build_parser
compile("spec/test_parsing_lists.d")
results = run
expect(results.status).to eq 0
expect(results.stderr).to eq ""
end
it "fails to generate a parser for a LR(1) grammar that is not LALR" do
write_grammar <<EOF
token a;
token b;
token c;
token d;
token e;
Start -> a E c;
Start -> a F d;
Start -> b F c;
Start -> b E d;
E -> e;
F -> e;
EOF
results = build_parser(capture: true)
expect(results.status).to_not eq 0
expect(results.stderr).to match %r{reduce/reduce conflict.*\(E\).*\(F\)}
end
it "provides matched text to user code blocks" do
write_grammar <<EOF
token id /[a-zA-Z_][a-zA-Z0-9_]*/ <<
writeln("Matched token is ", match);
>>
Start -> id;
EOF
build_parser
compile("spec/test_lexer_match_text.d")
results = run
expect(results.status).to eq 0
verify_lines(results.stdout, [
"Matched token is identifier_123",
"pass1",
])
end
it "allows storing a result value for the lexer" do
write_grammar <<EOF
ptype ulong;
token word /[a-z]+/ <<
$$ = match.length;
>>
Start -> word <<
$$ = $1;
>>
EOF
build_parser
compile("spec/test_lexer_result_value.d")
results = run
expect(results.stderr).to eq ""
expect(results.status).to eq 0
end
it "allows creating a JSON parser" do
write_grammar <<EOF
<<
import std.math;
import json_types;
string string_value;
>>
ptype JSONValue;
ptype array = JSONValue[];
ptype dict = JSONValue[string];
ptype string = string;
drop /\\s+/;
token lbrace /\\{/;
token rbrace /\\}/;
token lbracket /\\[/;
token rbracket /\\]/;
token comma /,/;
token colon /:/;
token number /-?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][-+]?[0-9]+)?/ <<
double n;
bool negative;
size_t i = 0u;
if (match[i] == '-')
{
negative = true;
i++;
}
while ('0' <= match[i] && match[i] <= '9')
{
n *= 10.0;
n += (match[i] - '0');
i++;
}
if (match[i] == '.')
{
i++;
double mult = 0.1;
while ('0' <= match[i] && match[i] <= '9')
{
n += mult * (match[i] - '0');
mult /= 10.0;
i++;
}
}
if (match[i] == 'e' || match[i] == 'E')
{
bool exp_negative;
i++;
if (match[i] == '-')
{
exp_negative = true;
i++;
}
else if (match[i] == '+')
{
i++;
}
long exp;
while ('0' <= match[i] && match[i] <= '9')
{
exp *= 10;
exp += (match[i] - '0');
i++;
}
if (exp_negative)
{
exp = -exp;
}
n = pow(n, exp);
}
if (negative)
{
n = -n;
}
$$ = new JSONNumber(n);
>>
token true <<
$$ = new JSONTrue();
>>
token false <<
$$ = new JSONFalse();
>>
token null <<
$$ = new JSONNull();
>>
/"/ <<
$mode(string);
string_value = "";
>>
string: token string (string) /"/ <<
$$ = string_value;
$mode(default);
>>
string: /\\\\"/ <<
string_value ~= "\\"";
>>
string: /\\\\\\\\/ <<
string_value ~= "\\\\";
>>
string: /\\\\\\// <<
string_value ~= "/";
>>
string: /\\\\b/ <<
string_value ~= "\\b";
>>
string: /\\\\f/ <<
string_value ~= "\\f";
>>
string: /\\\\n/ <<
string_value ~= "\\n";
>>
string: /\\\\r/ <<
string_value ~= "\\r";
>>
string: /\\\\t/ <<
string_value ~= "\\t";
>>
string: /\\\\u[0-9a-fA-F]{4}/ <<
/* Not actually going to encode the code point for this example... */
string_value ~= "{" ~ match[2..6] ~ "}";
>>
string: /[^\\\\]/ <<
string_value ~= match;
>>
Start -> Value <<
$$ = $1;
>>
Value -> string <<
$$ = new JSONString($1);
>>
Value -> number <<
$$ = $1;
>>
Value -> Object <<
$$ = $1;
>>
Value -> Array <<
$$ = $1;
>>
Value -> true <<
$$ = $1;
>>
Value -> false <<
$$ = $1;
>>
Value -> null <<
$$ = $1;
>>
Object -> lbrace rbrace <<
$$ = new JSONObject();
>>
Object -> lbrace KeyValues rbrace <<
$$ = new JSONObject($2);
>>
KeyValues (dict) -> KeyValue <<
$$ = $1;
>>
KeyValues -> KeyValues comma KeyValue <<
foreach (key, value; $3)
{
$1[key] = value;
}
$$ = $1;
>>
KeyValue (dict) -> string colon Value <<
$$ = [$1: $3];
>>
Array -> lbracket rbracket <<
$$ = new JSONArray();
>>
Array -> lbracket Values rbracket <<
$$ = new JSONArray($2);
>>
Values (array) -> Value <<
$$ = [$1];
>>
Values -> Values comma Value <<
$$ = $1 ~ [$3];
>>
EOF
build_parser
compile("spec/test_parsing_json.d", "spec/json_types.d")
end
end