Issue #7 - change .rsconscache from using YAML to JSON to speed up parsing

This commit is contained in:
Josh Holtrop 2014-04-14 16:52:21 -04:00
parent d8dda291a5
commit c61380b354
2 changed files with 126 additions and 125 deletions

View File

@ -1,59 +1,59 @@
require 'yaml'
require 'fileutils'
require 'digest/md5'
require 'set'
require 'rscons/version'
require "digest/md5"
require "fileutils"
require "json"
require "set"
require "rscons/version"
module Rscons
# The Cache class keeps track of file checksums, build target commands and
# dependencies in a YAML file which persists from one invocation to the next.
# dependencies in a JSON file which persists from one invocation to the next.
# Example cache:
# {
# version: '1.2.3',
# targets: {
# 'program' => {
# 'checksum' => 'A1B2C3D4',
# 'command' => ['gcc', '-o', 'program', 'program.o'],
# 'deps' => [
# "version" => "1.2.3",
# "targets" => {
# "program" => {
# "checksum" => "A1B2C3D4",
# "command" => ["gcc", "-o", "program", "program.o"],
# "deps" => [
# {
# 'fname' => 'program.o',
# 'checksum' => '87654321',
# "fname" => "program.o",
# "checksum" => "87654321",
# },
# ],
# 'user_deps' => [
# "user_deps" => [
# {
# 'fname' => 'lscript.ld',
# 'checksum' => '77551133',
# "fname" => "lscript.ld",
# "checksum" => "77551133",
# },
# ],
# },
# 'program.o' => {
# 'checksum' => '87654321',
# 'command' => ['gcc', '-c', '-o', 'program.o', 'program.c'],
# 'deps' => [
# "program.o" => {
# "checksum" => "87654321",
# "command" => ["gcc", "-c", "-o", "program.o", "program.c"],
# "deps" => [
# {
# 'fname' => 'program.c',
# 'checksum' => '456789ABC',
# "fname" => "program.c",
# "checksum" => "456789ABC",
# },
# {
# 'fname' => 'program.h',
# 'checksum' => '7979764643',
# "fname" => "program.h",
# "checksum" => "7979764643",
# },
# ],
# 'user_deps' => [],
# "user_deps" => [],
# }
# },
# directories: {
# 'build' => true,
# 'build/one' => true,
# 'build/two' => true,
# "directories" => {
# "build" => true,
# "build/one" => true,
# "build/two" => true,
# },
# }
class Cache
#### Constants
# Name of the file to store cache information in
CACHE_FILE = '.rsconscache'
CACHE_FILE = ".rsconscache"
#### Class Methods
@ -67,21 +67,21 @@ module Rscons
# Create a Cache object and load in the previous contents from the cache
# file.
def initialize
@cache = YAML.load(File.read(CACHE_FILE)) rescue {}
@cache = JSON.load(File.read(CACHE_FILE)) rescue {}
unless @cache.is_a?(Hash)
$stderr.puts "Warning: #{CACHE_FILE} was corrupt. Contents:\n#{@cache.inspect}"
@cache = {}
end
@cache[:targets] ||= {}
@cache[:directories] ||= {}
@cache["targets"] ||= {}
@cache["directories"] ||= {}
@lookup_checksums = {}
end
# Write the cache to disk to be loaded next time.
def write
@cache[:version] = VERSION
File.open(CACHE_FILE, 'w') do |fh|
fh.puts(YAML.dump(@cache))
@cache["version"] = VERSION
File.open(CACHE_FILE, "w") do |fh|
fh.puts(JSON.dump(@cache))
end
end
@ -90,11 +90,12 @@ module Rscons
# @param command [String, Array] The command used to build the target.
# @param deps [Array] List of the target's dependency files.
# @param env [Environment] The Rscons::Environment.
# @param options [Hash] Optional options. Can contain the following keys:
# :strict_deps::
# Only consider a target up to date if its list of dependencies is
# exactly equal (including order) to the cached list of dependencies
# @return true if the targets are all up to date, meaning that,
# @param options [Hash] Optional options.
# @option options [Boolean] :strict_deps
# Only consider a target up to date if its list of dependencies is
# exactly equal (including order) to the cached list of dependencies
# @return [Boolean]
# True value if the targets are all up to date, meaning that,
# for each target:
# - the target exists on disk
# - the cache has information for the target
@ -111,16 +112,16 @@ module Rscons
return false unless File.exists?(target)
# target must be registered in the cache
return false unless @cache[:targets].has_key?(target)
return false unless @cache["targets"].has_key?(target)
# target must have the same checksum as when it was built last
return false unless @cache[:targets][target][:checksum] == lookup_checksum(target)
return false unless @cache["targets"][target]["checksum"] == lookup_checksum(target)
# command used to build target must be identical
return false unless @cache[:targets][target][:command] == command
return false unless @cache["targets"][target]["command"] == command
cached_deps = @cache[:targets][target][:deps] || []
cached_deps_fnames = cached_deps.map { |dc| dc[:fname] }
cached_deps = @cache["targets"][target]["deps"] || []
cached_deps_fnames = cached_deps.map { |dc| dc["fname"] }
if options[:strict_deps]
# depedencies passed in must exactly equal those in the cache
return false unless deps == cached_deps_fnames
@ -131,13 +132,13 @@ module Rscons
# set of user dependencies must match
user_deps = env.get_user_deps(target) || []
cached_user_deps = @cache[:targets][target][:user_deps] || []
cached_user_deps_fnames = cached_user_deps.map { |dc| dc[:fname] }
cached_user_deps = @cache["targets"][target]["user_deps"] || []
cached_user_deps_fnames = cached_user_deps.map { |dc| dc["fname"] }
return false unless user_deps == cached_user_deps_fnames
# all cached dependencies must have their checksums match
(cached_deps + cached_user_deps).each do |dep_cache|
return false unless dep_cache[:checksum] == lookup_checksum(dep_cache[:fname])
return false unless dep_cache["checksum"] == lookup_checksum(dep_cache["fname"])
end
end
@ -148,22 +149,22 @@ module Rscons
# @param targets [String, Array] The name of the target(s) built.
# @param command [String, Array] The command used to build the target.
# @param deps [Array] List of dependencies for the target.
# @param env [Environment] The Rscons::Environment.
# @param env [Environment] The {Rscons::Environment}.
def register_build(targets, command, deps, env)
Array(targets).each do |target|
@cache[:targets][target.encode(__ENCODING__)] = {
command: command,
checksum: calculate_checksum(target),
deps: deps.map do |dep|
@cache["targets"][target.encode(__ENCODING__)] = {
"command" => command,
"checksum" => calculate_checksum(target),
"deps" => deps.map do |dep|
{
fname: dep.encode(__ENCODING__),
checksum: lookup_checksum(dep),
"fname" => dep.encode(__ENCODING__),
"checksum" => lookup_checksum(dep),
}
end,
user_deps: (env.get_user_deps(target) || []).map do |dep|
"user_deps" => (env.get_user_deps(target) || []).map do |dep|
{
fname: dep.encode(__ENCODING__),
checksum: lookup_checksum(dep),
"fname" => dep.encode(__ENCODING__),
"checksum" => lookup_checksum(dep),
}
end,
}
@ -172,25 +173,25 @@ module Rscons
# Return a list of targets that have been built
def targets
@cache[:targets].keys
@cache["targets"].keys
end
# Make any needed directories and record the ones that are created for
# removal upon a "clean" operation.
def mkdir_p(path)
parts = path.split(/[\\\/]/)
(0..parts.size-1).each do |i|
parts.each_index do |i|
subpath = File.join(*parts[0, i + 1]).encode(__ENCODING__)
unless File.exists?(subpath)
FileUtils.mkdir(subpath)
@cache[:directories][subpath] = true
@cache["directories"][subpath] = true
end
end
end
# Return a list of directories which were created as a part of the build
def directories
@cache[:directories].keys
@cache["directories"].keys
end
# Private Instance Methods
@ -206,7 +207,7 @@ module Rscons
# Calculate and return a file's checksum
# @param file [String] The file name.
def calculate_checksum(file)
@lookup_checksums[file] = Digest::MD5.hexdigest(File.read(file, mode: 'rb')).encode(__ENCODING__) rescue ''
@lookup_checksums[file] = Digest::MD5.hexdigest(File.read(file, mode: "rb")).encode(__ENCODING__) rescue ""
end
end
end

View File

@ -5,7 +5,7 @@ module Rscons
end
def build_from(cache)
YAML.should_receive(:load).and_return(cache)
JSON.should_receive(:load).and_return(cache)
Cache.new
end
@ -19,7 +19,7 @@ module Rscons
describe "#initialize" do
context "when corrupt" do
it "prints a warning and defaults to an empty hash" do
YAML.should_receive(:load).and_return("string")
JSON.should_receive(:load).and_return("string")
$stderr.should_receive(:puts).with(/Warning:.*was.corrupt/)
Cache.new.instance_variable_get(:@cache).is_a?(Hash).should be_true
end
@ -27,13 +27,13 @@ module Rscons
end
describe "#write" do
it "should fill in :version and write to file" do
it "should fill in 'version' and write to file" do
cache = {}
fh = $stdout
fh.should_receive(:puts)
File.should_receive(:open).and_yield(fh)
build_from(cache).write
cache[:version].should == Rscons::VERSION
cache["version"].should == Rscons::VERSION
end
end
@ -54,7 +54,7 @@ module Rscons
end
it "returns false when the target's checksum does not match" do
_cache = {targets: {"target" => {checksum: "abc"}}}
_cache = {"targets" => {"target" => {"checksum" => "abc"}}}
cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("def")
@ -62,7 +62,7 @@ module Rscons
end
it "returns false when the build command has changed" do
_cache = {targets: {"target" => {checksum: "abc", command: "old command"}}}
_cache = {"targets" => {"target" => {"checksum" => "abc", "command" => "old command"}}}
cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -70,9 +70,9 @@ module Rscons
end
it "returns false when there is a new dependency" do
_cache = {targets: {"target" => {checksum: "abc",
command: "command",
deps: [{fname: "dep.1"}]}}}
_cache = {"targets" => {"target" => {"checksum" => "abc",
"command" => "command",
"deps" => [{"fname" => "dep.1"}]}}}
cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -80,15 +80,15 @@ module Rscons
end
it "returns false when a dependency's checksum has changed" do
_cache = {targets: {"target" => {checksum: "abc",
command: "command",
deps: [{fname: "dep.1",
checksum: "dep.1.chk"},
{fname: "dep.2",
checksum: "dep.2.chk"},
{fname: "extra.dep",
checksum: "extra.dep.chk"}],
user_deps: []}}}
_cache = {"targets" => {"target" => {"checksum" => "abc",
"command" => "command",
"deps" => [{"fname" => "dep.1",
"checksum" => "dep.1.chk"},
{"fname" => "dep.2",
"checksum" => "dep.2.chk"},
{"fname" => "extra.dep",
"checksum" => "extra.dep.chk"}],
"user_deps" => []}}}
cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -98,15 +98,15 @@ module Rscons
end
it "returns false with strict_deps=true when cache has an extra dependency" do
_cache = {targets: {"target" => {checksum: "abc",
command: "command",
deps: [{fname: "dep.1",
checksum: "dep.1.chk"},
{fname: "dep.2",
checksum: "dep.2.chk"},
{fname: "extra.dep",
checksum: "extra.dep.chk"}],
user_deps: []}}}
_cache = {"targets" => {"target" => {"checksum" => "abc",
"command" => "command",
"deps" => [{"fname" => "dep.1",
"checksum" => "dep.1.chk"},
{"fname" => "dep.2",
"checksum" => "dep.2.chk"},
{"fname" => "extra.dep",
"checksum" => "extra.dep.chk"}],
"user_deps" => []}}}
cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -114,10 +114,10 @@ module Rscons
end
it "returns false when there is a new user dependency" do
_cache = {targets: {"target" => {checksum: "abc",
command: "command",
deps: [{fname: "dep.1"}],
user_deps: []}}}
_cache = {"targets" => {"target" => {"checksum" => "abc",
"command" => "command",
"deps" => [{"fname" => "dep.1"}],
"user_deps" => []}}}
cache = build_from(_cache)
env = "env"
env.should_receive(:get_user_deps).with("target").and_return(["file.ld"])
@ -127,16 +127,16 @@ module Rscons
end
it "returns false when a user dependency checksum has changed" do
_cache = {targets: {"target" => {checksum: "abc",
command: "command",
deps: [{fname: "dep.1",
checksum: "dep.1.chk"},
{fname: "dep.2",
checksum: "dep.2.chk"},
{fname: "extra.dep",
checksum: "extra.dep.chk"}],
user_deps: [{fname: "user.dep",
checksum: "user.dep.chk"}]}}}
_cache = {"targets" => {"target" => {"checksum" => "abc",
"command" => "command",
"deps" => [{"fname" => "dep.1",
"checksum" => "dep.1.chk"},
{"fname" => "dep.2",
"checksum" => "dep.2.chk"},
{"fname" => "extra.dep",
"checksum" => "extra.dep.chk"}],
"user_deps" => [{"fname" => "user.dep",
"checksum" => "user.dep.chk"}]}}}
cache = build_from(_cache)
env = "env"
env.should_receive(:get_user_deps).with("target").and_return(["user.dep"])
@ -150,15 +150,15 @@ module Rscons
end
it "returns true when no condition for false is met" do
_cache = {targets: {"target" => {checksum: "abc",
command: "command",
deps: [{fname: "dep.1",
checksum: "dep.1.chk"},
{fname: "dep.2",
checksum: "dep.2.chk"},
{fname: "extra.dep",
checksum: "extra.dep.chk"}],
user_deps: []}}}
_cache = {"targets" => {"target" => {"checksum" => "abc",
"command" => "command",
"deps" => [{"fname" => "dep.1",
"checksum" => "dep.1.chk"},
{"fname" => "dep.2",
"checksum" => "dep.2.chk"},
{"fname" => "extra.dep",
"checksum" => "extra.dep.chk"}],
"user_deps" => []}}}
cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -180,23 +180,23 @@ module Rscons
cache.should_receive(:calculate_checksum).with("dep 2").and_return("dep 2 checksum")
cache.should_receive(:calculate_checksum).with("user.dep").and_return("user.dep checksum")
cache.register_build("the target", "the command", ["dep 1", "dep 2"], env)
cached_target = cache.instance_variable_get(:@cache)[:targets]["the target"]
cached_target = cache.instance_variable_get(:@cache)["targets"]["the target"]
cached_target.should_not be_nil
cached_target[:command].should == "the command"
cached_target[:checksum].should == "the checksum"
cached_target[:deps].should == [
{fname: "dep 1", checksum: "dep 1 checksum"},
{fname: "dep 2", checksum: "dep 2 checksum"},
cached_target["command"].should == "the command"
cached_target["checksum"].should == "the checksum"
cached_target["deps"].should == [
{"fname" => "dep 1", "checksum" => "dep 1 checksum"},
{"fname" => "dep 2", "checksum" => "dep 2 checksum"},
]
cached_target[:user_deps].should == [
{fname: "user.dep", checksum: "user.dep checksum"},
cached_target["user_deps"].should == [
{"fname" => "user.dep", "checksum" => "user.dep checksum"},
]
end
end
describe "#targets" do
it "returns a list of targets that are cached" do
cache = {targets: {"t1" => {}, "t2" => {}, "t3" => {}}}
cache = {"targets" => {"t1" => {}, "t2" => {}, "t3" => {}}}
build_from(cache).targets.should == ["t1", "t2", "t3"]
end
end
@ -222,7 +222,7 @@ module Rscons
describe "#directories" do
it "returns a list of directories that are cached" do
_cache = {directories: {"dir1" => true, "dir2" => true}}
_cache = {"directories" => {"dir1" => true, "dir2" => true}}
build_from(_cache).directories.should == ["dir1", "dir2"]
end
end