require "fileutils" require "open3" Results = Struct.new(:stdout, :stderr, :status) describe Propane do def write_grammar(grammar) File.write("spec/run/testparser.propane", grammar) end def build_parser(options = {}) command = %w[./propane.sh spec/run/testparser.propane spec/run/testparser.d --log spec/run/testparser.log] if (options[:capture]) stdout, stderr, status = Open3.capture3(*command) Results.new(stdout, stderr, status) else result = system(*command) expect(result).to be_truthy end end def compile(*test_files) result = system(*%w[gdc -funittest -o spec/run/testparser spec/run/testparser.d -Ispec], *test_files) expect(result).to be_truthy end def run stdout, stderr, status = Open3.capture3("spec/run/testparser") File.binwrite("spec/run/.stderr", stderr) File.binwrite("spec/run/.stdout", stdout) Results.new(stdout, stderr, status) end def lines(str) str.lines.map(&:chomp) end def verify_lines(lines, patterns) if lines.is_a?(String) lines = lines.lines.map(&:chomp) end patterns.each_with_index do |pattern, i| found_index = if pattern.is_a?(Regexp) lines.find_index {|line| line =~ pattern} else lines.find_index do |line| line.chomp == pattern.chomp end end unless found_index $stderr.puts "Lines:" $stderr.puts lines raise "A line matching #{pattern.inspect} (index #{i}) was not found." end end end before(:each) do FileUtils.rm_rf("spec/run") FileUtils.mkdir_p("spec/run") end it "generates a D lexer" do write_grammar < Foo; Foo -> int << >> Foo -> plus << >> EOF build_parser compile("spec/test_d_lexer.d") results = run expect(results.stderr).to eq "" expect(results.status).to eq 0 end it "generates a parser" do write_grammar < E; E -> E times B; E -> E plus B; E -> B; B -> zero; B -> one; EOF build_parser end it "generates an SLR parser" do write_grammar < E; E -> one E; E -> one; EOF build_parser end it "distinguishes between multiple identical rules with lookahead symbol" do write_grammar < R1 a; Start -> R2 b; R1 -> a b; R2 -> a b; EOF build_parser compile("spec/test_d_parser_identical_rules_lookahead.d") results = run expect(results.status).to eq 0 end it "handles reducing a rule that could be arrived at from multiple states" do write_grammar < a R1; Start -> b R1; R1 -> b; EOF build_parser compile("spec/test_d_parser_rule_from_multiple_states.d") results = run expect(results.status).to eq 0 end it "executes user code when matching lexer token" do write_grammar <> token def; Start -> Abcs def; Abcs -> ; Abcs -> abc Abcs; EOF build_parser compile("spec/test_user_code.d") results = run expect(results.status).to eq 0 verify_lines(results.stdout, [ "abc!", "pass1", "abc!", "abc!", "pass2", ]) end it "supports a pattern statement" do write_grammar <> Start -> abc; EOF build_parser compile("spec/test_pattern.d") results = run expect(results.status).to eq 0 verify_lines(results.stdout, [ "def!", "pass1", "def!", "def!", "pass2", ]) end it "supports returning tokens from pattern code blocks" do write_grammar <> /ghi/ << writeln("ghi!"); return $token(abc); >> Start -> abc; EOF build_parser compile("spec/test_return_token_from_pattern.d") results = run expect(results.status).to eq 0 verify_lines(results.stdout, [ "def!", "ghi!", "def!", ]) end it "supports lexer modes" do write_grammar <> string: /[^"]+/ << writeln("captured string"); >> string: /"/ << $mode(default); return $token(string); >> Start -> abc string def; EOF build_parser compile("spec/test_lexer_modes.d") results = run expect(results.status).to eq 0 verify_lines(results.stdout, [ "begin string mode", "captured string", "pass1", "begin string mode", "captured string", "pass2", ]) end it "executes user code associated with a parser rule" do write_grammar < A B << writeln("Start!"); >> A -> a << writeln("A!"); >> B -> b << writeln("B!"); >> EOF build_parser compile("spec/test_parser_rule_user_code.d") results = run expect(results.status).to eq 0 verify_lines(results.stdout, [ "A!", "B!", "Start!", ]) end it "parses lists" do write_grammar < As << $$ = $1; >> As -> << $$ = 0u; >> As -> As a << $$ = $1 + 1u; >> EOF build_parser compile("spec/test_parsing_lists.d") results = run expect(results.status).to eq 0 expect(results.stderr).to eq "" end it "fails to generate a parser for a LR(1) grammar that is not LALR" do write_grammar < a E c; Start -> a F d; Start -> b F c; Start -> b E d; E -> e; F -> e; EOF results = build_parser(capture: true) expect(results.status).to_not eq 0 expect(results.stderr).to match %r{reduce/reduce conflict.*\(E\).*\(F\)} end it "provides matched text to user code blocks" do write_grammar <> Start -> id; EOF build_parser compile("spec/test_lexer_match_text.d") results = run expect(results.status).to eq 0 verify_lines(results.stdout, [ "Matched token is identifier_123", "pass1", ]) end it "allows storing a result value for the lexer" do write_grammar <> Start -> word << $$ = $1; >> EOF build_parser compile("spec/test_lexer_result_value.d") results = run expect(results.stderr).to eq "" expect(results.status).to eq 0 end it "allows creating a JSON parser" do write_grammar <> ptype JSONValue; ptype array = JSONValue[]; ptype dict = JSONValue[string]; ptype string = string; drop /\\s+/; token lbrace /\\{/; token rbrace /\\}/; token lbracket /\\[/; token rbracket /\\]/; token comma /,/; token colon /:/; token number /-?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][-+]?[0-9]+)?/ << double n; bool negative; size_t i = 0u; if (match[i] == '-') { negative = true; i++; } while ('0' <= match[i] && match[i] <= '9') { n *= 10.0; n += (match[i] - '0'); i++; } if (match[i] == '.') { i++; double mult = 0.1; while ('0' <= match[i] && match[i] <= '9') { n += mult * (match[i] - '0'); mult /= 10.0; i++; } } if (match[i] == 'e' || match[i] == 'E') { bool exp_negative; i++; if (match[i] == '-') { exp_negative = true; i++; } else if (match[i] == '+') { i++; } long exp; while ('0' <= match[i] && match[i] <= '9') { exp *= 10; exp += (match[i] - '0'); i++; } if (exp_negative) { exp = -exp; } n = pow(n, exp); } if (negative) { n = -n; } $$ = new JSONNumber(n); >> token true << $$ = new JSONTrue(); >> token false << $$ = new JSONFalse(); >> token null << $$ = new JSONNull(); >> /"/ << $mode(string); string_value = ""; >> string: token string (string) /"/ << $$ = string_value; $mode(default); >> string: /\\\\"/ << string_value ~= "\\""; >> string: /\\\\\\\\/ << string_value ~= "\\\\"; >> string: /\\\\\\// << string_value ~= "/"; >> string: /\\\\b/ << string_value ~= "\\b"; >> string: /\\\\f/ << string_value ~= "\\f"; >> string: /\\\\n/ << string_value ~= "\\n"; >> string: /\\\\r/ << string_value ~= "\\r"; >> string: /\\\\t/ << string_value ~= "\\t"; >> string: /\\\\u[0-9a-fA-F]{4}/ << /* Not actually going to encode the code point for this example... */ string_value ~= "{" ~ match[2..6] ~ "}"; >> string: /[^\\\\]/ << string_value ~= match; >> Start -> Value << $$ = $1; >> Value -> string << $$ = new JSONString($1); >> Value -> number << $$ = $1; >> Value -> Object << $$ = $1; >> Value -> Array << $$ = $1; >> Value -> true << $$ = $1; >> Value -> false << $$ = $1; >> Value -> null << $$ = $1; >> Object -> lbrace rbrace << $$ = new JSONObject(); >> Object -> lbrace KeyValues rbrace << $$ = new JSONObject($2); >> KeyValues (dict) -> KeyValue << $$ = $1; >> KeyValues -> KeyValues comma KeyValue << foreach (key, value; $3) { $1[key] = value; } $$ = $1; >> KeyValue (dict) -> string colon Value << $$ = [$1: $3]; >> Array -> lbracket rbracket << $$ = new JSONArray(); >> Array -> lbracket Values rbracket << $$ = new JSONArray($2); >> Values (array) -> Value << $$ = [$1]; >> Values -> Values comma Value << $$ = $1 ~ [$3]; >> EOF build_parser compile("spec/test_parsing_json.d", "spec/json_types.d") end end