Issue #7 - change .rsconscache from using YAML to JSON to speed up parsing

This commit is contained in:
Josh Holtrop 2014-04-14 16:52:21 -04:00
parent d8dda291a5
commit c61380b354
2 changed files with 126 additions and 125 deletions

View File

@ -1,59 +1,59 @@
require 'yaml' require "digest/md5"
require 'fileutils' require "fileutils"
require 'digest/md5' require "json"
require 'set' require "set"
require 'rscons/version' require "rscons/version"
module Rscons module Rscons
# The Cache class keeps track of file checksums, build target commands and # The Cache class keeps track of file checksums, build target commands and
# dependencies in a YAML file which persists from one invocation to the next. # dependencies in a JSON file which persists from one invocation to the next.
# Example cache: # Example cache:
# { # {
# version: '1.2.3', # "version" => "1.2.3",
# targets: { # "targets" => {
# 'program' => { # "program" => {
# 'checksum' => 'A1B2C3D4', # "checksum" => "A1B2C3D4",
# 'command' => ['gcc', '-o', 'program', 'program.o'], # "command" => ["gcc", "-o", "program", "program.o"],
# 'deps' => [ # "deps" => [
# { # {
# 'fname' => 'program.o', # "fname" => "program.o",
# 'checksum' => '87654321', # "checksum" => "87654321",
# }, # },
# ], # ],
# 'user_deps' => [ # "user_deps" => [
# { # {
# 'fname' => 'lscript.ld', # "fname" => "lscript.ld",
# 'checksum' => '77551133', # "checksum" => "77551133",
# }, # },
# ], # ],
# }, # },
# 'program.o' => { # "program.o" => {
# 'checksum' => '87654321', # "checksum" => "87654321",
# 'command' => ['gcc', '-c', '-o', 'program.o', 'program.c'], # "command" => ["gcc", "-c", "-o", "program.o", "program.c"],
# 'deps' => [ # "deps" => [
# { # {
# 'fname' => 'program.c', # "fname" => "program.c",
# 'checksum' => '456789ABC', # "checksum" => "456789ABC",
# }, # },
# { # {
# 'fname' => 'program.h', # "fname" => "program.h",
# 'checksum' => '7979764643', # "checksum" => "7979764643",
# }, # },
# ], # ],
# 'user_deps' => [], # "user_deps" => [],
# } # }
# }, # },
# directories: { # "directories" => {
# 'build' => true, # "build" => true,
# 'build/one' => true, # "build/one" => true,
# 'build/two' => true, # "build/two" => true,
# }, # },
# } # }
class Cache class Cache
#### Constants #### Constants
# Name of the file to store cache information in # Name of the file to store cache information in
CACHE_FILE = '.rsconscache' CACHE_FILE = ".rsconscache"
#### Class Methods #### Class Methods
@ -67,21 +67,21 @@ module Rscons
# Create a Cache object and load in the previous contents from the cache # Create a Cache object and load in the previous contents from the cache
# file. # file.
def initialize def initialize
@cache = YAML.load(File.read(CACHE_FILE)) rescue {} @cache = JSON.load(File.read(CACHE_FILE)) rescue {}
unless @cache.is_a?(Hash) unless @cache.is_a?(Hash)
$stderr.puts "Warning: #{CACHE_FILE} was corrupt. Contents:\n#{@cache.inspect}" $stderr.puts "Warning: #{CACHE_FILE} was corrupt. Contents:\n#{@cache.inspect}"
@cache = {} @cache = {}
end end
@cache[:targets] ||= {} @cache["targets"] ||= {}
@cache[:directories] ||= {} @cache["directories"] ||= {}
@lookup_checksums = {} @lookup_checksums = {}
end end
# Write the cache to disk to be loaded next time. # Write the cache to disk to be loaded next time.
def write def write
@cache[:version] = VERSION @cache["version"] = VERSION
File.open(CACHE_FILE, 'w') do |fh| File.open(CACHE_FILE, "w") do |fh|
fh.puts(YAML.dump(@cache)) fh.puts(JSON.dump(@cache))
end end
end end
@ -90,11 +90,12 @@ module Rscons
# @param command [String, Array] The command used to build the target. # @param command [String, Array] The command used to build the target.
# @param deps [Array] List of the target's dependency files. # @param deps [Array] List of the target's dependency files.
# @param env [Environment] The Rscons::Environment. # @param env [Environment] The Rscons::Environment.
# @param options [Hash] Optional options. Can contain the following keys: # @param options [Hash] Optional options.
# :strict_deps:: # @option options [Boolean] :strict_deps
# Only consider a target up to date if its list of dependencies is # Only consider a target up to date if its list of dependencies is
# exactly equal (including order) to the cached list of dependencies # exactly equal (including order) to the cached list of dependencies
# @return true if the targets are all up to date, meaning that, # @return [Boolean]
# True value if the targets are all up to date, meaning that,
# for each target: # for each target:
# - the target exists on disk # - the target exists on disk
# - the cache has information for the target # - the cache has information for the target
@ -111,16 +112,16 @@ module Rscons
return false unless File.exists?(target) return false unless File.exists?(target)
# target must be registered in the cache # target must be registered in the cache
return false unless @cache[:targets].has_key?(target) return false unless @cache["targets"].has_key?(target)
# target must have the same checksum as when it was built last # target must have the same checksum as when it was built last
return false unless @cache[:targets][target][:checksum] == lookup_checksum(target) return false unless @cache["targets"][target]["checksum"] == lookup_checksum(target)
# command used to build target must be identical # command used to build target must be identical
return false unless @cache[:targets][target][:command] == command return false unless @cache["targets"][target]["command"] == command
cached_deps = @cache[:targets][target][:deps] || [] cached_deps = @cache["targets"][target]["deps"] || []
cached_deps_fnames = cached_deps.map { |dc| dc[:fname] } cached_deps_fnames = cached_deps.map { |dc| dc["fname"] }
if options[:strict_deps] if options[:strict_deps]
# depedencies passed in must exactly equal those in the cache # depedencies passed in must exactly equal those in the cache
return false unless deps == cached_deps_fnames return false unless deps == cached_deps_fnames
@ -131,13 +132,13 @@ module Rscons
# set of user dependencies must match # set of user dependencies must match
user_deps = env.get_user_deps(target) || [] user_deps = env.get_user_deps(target) || []
cached_user_deps = @cache[:targets][target][:user_deps] || [] cached_user_deps = @cache["targets"][target]["user_deps"] || []
cached_user_deps_fnames = cached_user_deps.map { |dc| dc[:fname] } cached_user_deps_fnames = cached_user_deps.map { |dc| dc["fname"] }
return false unless user_deps == cached_user_deps_fnames return false unless user_deps == cached_user_deps_fnames
# all cached dependencies must have their checksums match # all cached dependencies must have their checksums match
(cached_deps + cached_user_deps).each do |dep_cache| (cached_deps + cached_user_deps).each do |dep_cache|
return false unless dep_cache[:checksum] == lookup_checksum(dep_cache[:fname]) return false unless dep_cache["checksum"] == lookup_checksum(dep_cache["fname"])
end end
end end
@ -148,22 +149,22 @@ module Rscons
# @param targets [String, Array] The name of the target(s) built. # @param targets [String, Array] The name of the target(s) built.
# @param command [String, Array] The command used to build the target. # @param command [String, Array] The command used to build the target.
# @param deps [Array] List of dependencies for the target. # @param deps [Array] List of dependencies for the target.
# @param env [Environment] The Rscons::Environment. # @param env [Environment] The {Rscons::Environment}.
def register_build(targets, command, deps, env) def register_build(targets, command, deps, env)
Array(targets).each do |target| Array(targets).each do |target|
@cache[:targets][target.encode(__ENCODING__)] = { @cache["targets"][target.encode(__ENCODING__)] = {
command: command, "command" => command,
checksum: calculate_checksum(target), "checksum" => calculate_checksum(target),
deps: deps.map do |dep| "deps" => deps.map do |dep|
{ {
fname: dep.encode(__ENCODING__), "fname" => dep.encode(__ENCODING__),
checksum: lookup_checksum(dep), "checksum" => lookup_checksum(dep),
} }
end, end,
user_deps: (env.get_user_deps(target) || []).map do |dep| "user_deps" => (env.get_user_deps(target) || []).map do |dep|
{ {
fname: dep.encode(__ENCODING__), "fname" => dep.encode(__ENCODING__),
checksum: lookup_checksum(dep), "checksum" => lookup_checksum(dep),
} }
end, end,
} }
@ -172,25 +173,25 @@ module Rscons
# Return a list of targets that have been built # Return a list of targets that have been built
def targets def targets
@cache[:targets].keys @cache["targets"].keys
end end
# Make any needed directories and record the ones that are created for # Make any needed directories and record the ones that are created for
# removal upon a "clean" operation. # removal upon a "clean" operation.
def mkdir_p(path) def mkdir_p(path)
parts = path.split(/[\\\/]/) parts = path.split(/[\\\/]/)
(0..parts.size-1).each do |i| parts.each_index do |i|
subpath = File.join(*parts[0, i + 1]).encode(__ENCODING__) subpath = File.join(*parts[0, i + 1]).encode(__ENCODING__)
unless File.exists?(subpath) unless File.exists?(subpath)
FileUtils.mkdir(subpath) FileUtils.mkdir(subpath)
@cache[:directories][subpath] = true @cache["directories"][subpath] = true
end end
end end
end end
# Return a list of directories which were created as a part of the build # Return a list of directories which were created as a part of the build
def directories def directories
@cache[:directories].keys @cache["directories"].keys
end end
# Private Instance Methods # Private Instance Methods
@ -206,7 +207,7 @@ module Rscons
# Calculate and return a file's checksum # Calculate and return a file's checksum
# @param file [String] The file name. # @param file [String] The file name.
def calculate_checksum(file) def calculate_checksum(file)
@lookup_checksums[file] = Digest::MD5.hexdigest(File.read(file, mode: 'rb')).encode(__ENCODING__) rescue '' @lookup_checksums[file] = Digest::MD5.hexdigest(File.read(file, mode: "rb")).encode(__ENCODING__) rescue ""
end end
end end
end end

View File

@ -5,7 +5,7 @@ module Rscons
end end
def build_from(cache) def build_from(cache)
YAML.should_receive(:load).and_return(cache) JSON.should_receive(:load).and_return(cache)
Cache.new Cache.new
end end
@ -19,7 +19,7 @@ module Rscons
describe "#initialize" do describe "#initialize" do
context "when corrupt" do context "when corrupt" do
it "prints a warning and defaults to an empty hash" do it "prints a warning and defaults to an empty hash" do
YAML.should_receive(:load).and_return("string") JSON.should_receive(:load).and_return("string")
$stderr.should_receive(:puts).with(/Warning:.*was.corrupt/) $stderr.should_receive(:puts).with(/Warning:.*was.corrupt/)
Cache.new.instance_variable_get(:@cache).is_a?(Hash).should be_true Cache.new.instance_variable_get(:@cache).is_a?(Hash).should be_true
end end
@ -27,13 +27,13 @@ module Rscons
end end
describe "#write" do describe "#write" do
it "should fill in :version and write to file" do it "should fill in 'version' and write to file" do
cache = {} cache = {}
fh = $stdout fh = $stdout
fh.should_receive(:puts) fh.should_receive(:puts)
File.should_receive(:open).and_yield(fh) File.should_receive(:open).and_yield(fh)
build_from(cache).write build_from(cache).write
cache[:version].should == Rscons::VERSION cache["version"].should == Rscons::VERSION
end end
end end
@ -54,7 +54,7 @@ module Rscons
end end
it "returns false when the target's checksum does not match" do it "returns false when the target's checksum does not match" do
_cache = {targets: {"target" => {checksum: "abc"}}} _cache = {"targets" => {"target" => {"checksum" => "abc"}}}
cache = build_from(_cache) cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true) File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("def") cache.should_receive(:calculate_checksum).with("target").and_return("def")
@ -62,7 +62,7 @@ module Rscons
end end
it "returns false when the build command has changed" do it "returns false when the build command has changed" do
_cache = {targets: {"target" => {checksum: "abc", command: "old command"}}} _cache = {"targets" => {"target" => {"checksum" => "abc", "command" => "old command"}}}
cache = build_from(_cache) cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true) File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc") cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -70,9 +70,9 @@ module Rscons
end end
it "returns false when there is a new dependency" do it "returns false when there is a new dependency" do
_cache = {targets: {"target" => {checksum: "abc", _cache = {"targets" => {"target" => {"checksum" => "abc",
command: "command", "command" => "command",
deps: [{fname: "dep.1"}]}}} "deps" => [{"fname" => "dep.1"}]}}}
cache = build_from(_cache) cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true) File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc") cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -80,15 +80,15 @@ module Rscons
end end
it "returns false when a dependency's checksum has changed" do it "returns false when a dependency's checksum has changed" do
_cache = {targets: {"target" => {checksum: "abc", _cache = {"targets" => {"target" => {"checksum" => "abc",
command: "command", "command" => "command",
deps: [{fname: "dep.1", "deps" => [{"fname" => "dep.1",
checksum: "dep.1.chk"}, "checksum" => "dep.1.chk"},
{fname: "dep.2", {"fname" => "dep.2",
checksum: "dep.2.chk"}, "checksum" => "dep.2.chk"},
{fname: "extra.dep", {"fname" => "extra.dep",
checksum: "extra.dep.chk"}], "checksum" => "extra.dep.chk"}],
user_deps: []}}} "user_deps" => []}}}
cache = build_from(_cache) cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true) File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc") cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -98,15 +98,15 @@ module Rscons
end end
it "returns false with strict_deps=true when cache has an extra dependency" do it "returns false with strict_deps=true when cache has an extra dependency" do
_cache = {targets: {"target" => {checksum: "abc", _cache = {"targets" => {"target" => {"checksum" => "abc",
command: "command", "command" => "command",
deps: [{fname: "dep.1", "deps" => [{"fname" => "dep.1",
checksum: "dep.1.chk"}, "checksum" => "dep.1.chk"},
{fname: "dep.2", {"fname" => "dep.2",
checksum: "dep.2.chk"}, "checksum" => "dep.2.chk"},
{fname: "extra.dep", {"fname" => "extra.dep",
checksum: "extra.dep.chk"}], "checksum" => "extra.dep.chk"}],
user_deps: []}}} "user_deps" => []}}}
cache = build_from(_cache) cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true) File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc") cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -114,10 +114,10 @@ module Rscons
end end
it "returns false when there is a new user dependency" do it "returns false when there is a new user dependency" do
_cache = {targets: {"target" => {checksum: "abc", _cache = {"targets" => {"target" => {"checksum" => "abc",
command: "command", "command" => "command",
deps: [{fname: "dep.1"}], "deps" => [{"fname" => "dep.1"}],
user_deps: []}}} "user_deps" => []}}}
cache = build_from(_cache) cache = build_from(_cache)
env = "env" env = "env"
env.should_receive(:get_user_deps).with("target").and_return(["file.ld"]) env.should_receive(:get_user_deps).with("target").and_return(["file.ld"])
@ -127,16 +127,16 @@ module Rscons
end end
it "returns false when a user dependency checksum has changed" do it "returns false when a user dependency checksum has changed" do
_cache = {targets: {"target" => {checksum: "abc", _cache = {"targets" => {"target" => {"checksum" => "abc",
command: "command", "command" => "command",
deps: [{fname: "dep.1", "deps" => [{"fname" => "dep.1",
checksum: "dep.1.chk"}, "checksum" => "dep.1.chk"},
{fname: "dep.2", {"fname" => "dep.2",
checksum: "dep.2.chk"}, "checksum" => "dep.2.chk"},
{fname: "extra.dep", {"fname" => "extra.dep",
checksum: "extra.dep.chk"}], "checksum" => "extra.dep.chk"}],
user_deps: [{fname: "user.dep", "user_deps" => [{"fname" => "user.dep",
checksum: "user.dep.chk"}]}}} "checksum" => "user.dep.chk"}]}}}
cache = build_from(_cache) cache = build_from(_cache)
env = "env" env = "env"
env.should_receive(:get_user_deps).with("target").and_return(["user.dep"]) env.should_receive(:get_user_deps).with("target").and_return(["user.dep"])
@ -150,15 +150,15 @@ module Rscons
end end
it "returns true when no condition for false is met" do it "returns true when no condition for false is met" do
_cache = {targets: {"target" => {checksum: "abc", _cache = {"targets" => {"target" => {"checksum" => "abc",
command: "command", "command" => "command",
deps: [{fname: "dep.1", "deps" => [{"fname" => "dep.1",
checksum: "dep.1.chk"}, "checksum" => "dep.1.chk"},
{fname: "dep.2", {"fname" => "dep.2",
checksum: "dep.2.chk"}, "checksum" => "dep.2.chk"},
{fname: "extra.dep", {"fname" => "extra.dep",
checksum: "extra.dep.chk"}], "checksum" => "extra.dep.chk"}],
user_deps: []}}} "user_deps" => []}}}
cache = build_from(_cache) cache = build_from(_cache)
File.should_receive(:exists?).with("target").and_return(true) File.should_receive(:exists?).with("target").and_return(true)
cache.should_receive(:calculate_checksum).with("target").and_return("abc") cache.should_receive(:calculate_checksum).with("target").and_return("abc")
@ -180,23 +180,23 @@ module Rscons
cache.should_receive(:calculate_checksum).with("dep 2").and_return("dep 2 checksum") cache.should_receive(:calculate_checksum).with("dep 2").and_return("dep 2 checksum")
cache.should_receive(:calculate_checksum).with("user.dep").and_return("user.dep checksum") cache.should_receive(:calculate_checksum).with("user.dep").and_return("user.dep checksum")
cache.register_build("the target", "the command", ["dep 1", "dep 2"], env) cache.register_build("the target", "the command", ["dep 1", "dep 2"], env)
cached_target = cache.instance_variable_get(:@cache)[:targets]["the target"] cached_target = cache.instance_variable_get(:@cache)["targets"]["the target"]
cached_target.should_not be_nil cached_target.should_not be_nil
cached_target[:command].should == "the command" cached_target["command"].should == "the command"
cached_target[:checksum].should == "the checksum" cached_target["checksum"].should == "the checksum"
cached_target[:deps].should == [ cached_target["deps"].should == [
{fname: "dep 1", checksum: "dep 1 checksum"}, {"fname" => "dep 1", "checksum" => "dep 1 checksum"},
{fname: "dep 2", checksum: "dep 2 checksum"}, {"fname" => "dep 2", "checksum" => "dep 2 checksum"},
] ]
cached_target[:user_deps].should == [ cached_target["user_deps"].should == [
{fname: "user.dep", checksum: "user.dep checksum"}, {"fname" => "user.dep", "checksum" => "user.dep checksum"},
] ]
end end
end end
describe "#targets" do describe "#targets" do
it "returns a list of targets that are cached" do it "returns a list of targets that are cached" do
cache = {targets: {"t1" => {}, "t2" => {}, "t3" => {}}} cache = {"targets" => {"t1" => {}, "t2" => {}, "t3" => {}}}
build_from(cache).targets.should == ["t1", "t2", "t3"] build_from(cache).targets.should == ["t1", "t2", "t3"]
end end
end end
@ -222,7 +222,7 @@ module Rscons
describe "#directories" do describe "#directories" do
it "returns a list of directories that are cached" do it "returns a list of directories that are cached" do
_cache = {directories: {"dir1" => true, "dir2" => true}} _cache = {"directories" => {"dir1" => true, "dir2" => true}}
build_from(_cache).directories.should == ["dir1", "dir2"] build_from(_cache).directories.should == ["dir1", "dir2"]
end end
end end