Skip to content

Commit

Permalink
builtins.fetchGit: Use clean, unsmudged files
Browse files Browse the repository at this point in the history
  • Loading branch information
roberth committed Mar 18, 2021
1 parent 17d3064 commit 446ca7c
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 61 deletions.
207 changes: 150 additions & 57 deletions src/libfetchers/git.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,14 @@ struct GitInputScheme : InputScheme
if (submodules) cacheType += "-submodules";
if (allRefs) cacheType += "-all-refs";

bool isDirtyTree = false;

// Ideally we'd use `write-tree` for the submodules as well.
// For now, we use a diff to get the submodule changes, which should
// work as long as the generated diff does not depend on smudge/clean
// behavior.
std::string dirtySubmoduleDiff;

auto getImmutableAttrs = [&]()
{
return Attrs({
Expand All @@ -196,6 +204,13 @@ struct GitInputScheme : InputScheme
if (!shallow)
input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount"));
input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified"));

// If the tree is dirty, we use a tree hash internally, but we don't
// want to expose it.
if (isDirtyTree) {
input.attrs.insert_or_assign("rev", "0000000000000000000000000000000000000000");
}

return {
Tree(store->toRealPath(storePath), std::move(storePath)),
input
Expand All @@ -210,6 +225,8 @@ struct GitInputScheme : InputScheme
auto [isLocal, actualUrl_] = getActualUrl(input);
auto actualUrl = actualUrl_; // work around clang bug

bool haveHEAD = true;

// If this is a local directory and no ref or revision is
// given, then allow the use of an unclean working tree.
if (!input.getRef() && !input.getRev() && isLocal) {
Expand All @@ -226,10 +243,10 @@ struct GitInputScheme : InputScheme
if (commonGitDir != ".git")
gitDir = commonGitDir;

bool haveCommits = !readDirectory(gitDir + "/refs/heads").empty();
haveHEAD = !readDirectory(gitDir + "/refs/heads").empty();

try {
if (haveCommits) {
if (haveHEAD) {
runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" });
clean = true;
}
Expand All @@ -239,52 +256,75 @@ struct GitInputScheme : InputScheme

if (!clean) {

/* This is an unclean working tree. So copy all tracked files. */
/* This is an unclean working tree. We can't use the worktree
files, because those may be smudged. */

if (!settings.allowDirty)
throw Error("Git tree '%s' is dirty", actualUrl);

if (settings.warnDirty)
warn("Git tree '%s' is dirty", actualUrl);

auto gitOpts = Strings({ "-C", actualUrl, "ls-files", "-z" });
if (submodules)
gitOpts.emplace_back("--recurse-submodules");

auto files = tokenizeString<std::set<std::string>>(
runProgram("git", true, gitOpts), "\0"s);

PathFilter filter = [&](const Path & p) -> bool {
assert(hasPrefix(p, actualUrl));
std::string file(p, actualUrl.size() + 1);

auto st = lstat(p);

if (S_ISDIR(st.st_mode)) {
auto prefix = file + "/";
auto i = files.lower_bound(prefix);
return i != files.end() && hasPrefix(*i, prefix);
}

return files.count(file);
};
isDirtyTree = true;

// We can't use an existing file for the temporary git index,
// so we need to use a tmpdir instead of a tmpfile.
// Non-submodule changes are captured by the tree we build using
// this temporary index.
Path tmpIndexDir = createTempDir();
AutoDelete delTmpIndexDir(tmpIndexDir, true);
Path tmpIndex = tmpIndexDir + "/tmp-git-index";

std::set<Path> files = tokenizeString<std::set<std::string>>(
runProgram("git", true, { "-C", actualUrl, "ls-files", "-z" }),
"\0"s);

{
RunOptions gitOptions("git", { "-C", actualUrl, "add", "--" });
auto env = getEnv();
env["GIT_INDEX_FILE"] = tmpIndex;
gitOptions.environment = env;
for (auto file : files)
gitOptions.args.push_back(file);

auto result = runProgram(gitOptions);
if (result.first)
throw ExecError(result.first, fmt("program git add -u %1%", statusToString(result.first)));
}
std::string tree;
{
RunOptions gitOptions("git", { "-C", actualUrl, "write-tree" });
auto env = getEnv();
env["GIT_INDEX_FILE"] = tmpIndex;
gitOptions.environment = env;

auto result = runProgram(gitOptions);
if (result.first)
throw ExecError(result.first, fmt("program git write-tree %1%", statusToString(result.first)));
tree = trim(result.second);

// Note [tree as rev]
// We set `rev` to a tree object, even if it's normally a
// commit object. This way, we get some use out of the
// cache, to avoid copying files unnecessarily.
input.attrs.insert_or_assign("rev", trim(result.second));
}

auto storePath = store->addToStore("source", actualUrl, FileIngestionMethod::Recursive, htSHA256, filter);
// Use a diff to gather submodule changes as well. See `dirtySubmoduleDiff`
if (submodules) {
RunOptions gitOptions("git", { "-C", actualUrl, "diff", tree, "--submodule=diff" });

// FIXME: maybe we should use the timestamp of the last
// modified dirty file?
input.attrs.insert_or_assign(
"lastModified",
haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
auto result = runProgram(gitOptions);
if (result.first)
throw ExecError(result.first, fmt("program git diff %1%", statusToString(result.first)));

return {
Tree(store->toRealPath(storePath), std::move(storePath)),
input
};
dirtySubmoduleDiff = result.second;
}
}
}

if (!input.getRef()) input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master");
if (!input.getRef() && haveHEAD)
input.attrs.insert_or_assign("ref", isLocal ? readHead(actualUrl) : "master");

Attrs mutableAttrs({
{"type", cacheType},
Expand Down Expand Up @@ -403,49 +443,96 @@ struct GitInputScheme : InputScheme
AutoDelete delTmpDir(tmpDir, true);
PathFilter filter = defaultPathFilter;

RunOptions checkCommitOpts(
"git",
{ "-C", repoDir, "cat-file", "commit", input.getRev()->gitRev() }
);
checkCommitOpts.searchPath = true;
checkCommitOpts.mergeStderrToStdout = true;

auto result = runProgram(checkCommitOpts);
if (WEXITSTATUS(result.first) == 128
&& result.second.find("bad file") != std::string::npos
) {
throw Error(
"Cannot find Git revision '%s' in ref '%s' of repository '%s'! "
"Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the "
ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD
"allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".",
input.getRev()->gitRev(),
*input.getRef(),
actualUrl
// Skip check if rev is set to a tree object. See Note [tree as rev]
if (!isDirtyTree) {
RunOptions checkCommitOpts(
"git",
{ "-C", repoDir, "cat-file", "commit", input.getRev()->gitRev() }
);
checkCommitOpts.searchPath = true;
checkCommitOpts.mergeStderrToStdout = true;

auto result = runProgram(checkCommitOpts);
if (WEXITSTATUS(result.first) == 128
&& result.second.find("bad file") != std::string::npos
) {
throw Error(
"Cannot find Git revision '%s' in ref '%s' of repository '%s'! "
"Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the "
ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD
"allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".",
input.getRev()->gitRev(),
*input.getRef(),
actualUrl
);
}
}

if (submodules) {
Path tmpGitDir = createTempDir();
AutoDelete delTmpGitDir(tmpGitDir, true);

// For this checkout approach, we need a commit, not just a treeish.
if (isDirtyTree) {
RunOptions gitOptions("git", { "-C", actualUrl, "commit-tree", "-m", "temporary commit for dirty tree", input.getRev()->gitRev() });
auto result = runProgram(gitOptions);
if (result.first)
throw ExecError(result.first, fmt("program git commit-tree %1%", statusToString(result.first)));
input.attrs.insert_or_assign("rev", trim(result.second));
}

runProgram("git", true, { "init", tmpDir, "--separate-git-dir", tmpGitDir });
// TODO: repoDir might lack the ref (it only checks if rev
// exists, see FIXME above) so use a big hammer and fetch
// everything to ensure we get the rev.
runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force",
"--update-head-ok", "--", repoDir, "refs/*:refs/*" });
"--update-head-ok", "--", repoDir, "refs/*:refs/*",
input.getRev()->gitRev() });

runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getRev()->gitRev() });
runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", actualUrl });
runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" });

if (dirtySubmoduleDiff.size()) {
RunOptions gitOptions("git", { "-C", tmpDir, "apply" });
StringSource s(dirtySubmoduleDiff);
gitOptions.standardIn = &s;
auto result = runProgram(gitOptions);
if (result.first)
throw ExecError(result.first, fmt("program git apply %1%", statusToString(result.first)));
}

filter = isNotDotGitDirectory;
} else {
Strings noSmudgeOptions;
{
RunOptions gitOptions("git", { "-C", repoDir, "config", "-l" });
auto result = runProgram(gitOptions);
auto ss = std::stringstream{result.second};
StringSet filters;

for (std::string line; std::getline(ss, line, '\n');) {
std::string prefix = "filter.";
std::string infix = ".smudge=";
auto infixPos = line.find(infix);
if (hasPrefix(line, prefix) && infixPos != std::string::npos) {
filters.emplace(line.substr(prefix.size(), infixPos - prefix.size()));
}
}
for (auto filter : filters) {
noSmudgeOptions.emplace_back("-c");
noSmudgeOptions.emplace_back("filter." + filter + ".smudge=cat --");
}
}

// FIXME: should pipe this, or find some better way to extract a
// revision.
auto source = sinkToSource([&](Sink & sink) {
RunOptions gitOptions("git", { "-C", repoDir, "archive", input.getRev()->gitRev() });
RunOptions gitOptions("git", noSmudgeOptions);
gitOptions.args.push_back("-C");
gitOptions.args.push_back(repoDir);
gitOptions.args.push_back("archive");
gitOptions.args.push_back(input.getRev()->gitRev());
gitOptions.standardOut = &sink;
runProgram2(gitOptions);
});
Expand All @@ -455,7 +542,13 @@ struct GitInputScheme : InputScheme

auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter);

auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() }));
const auto now = std::chrono::system_clock::now();

// FIXME: when isDirtyTree, maybe we should use the timestamp
// of the last modified dirty file?
auto lastModified = isDirtyTree ?
(std::chrono::duration_cast<std::chrono::seconds>(now.time_since_epoch()).count())
: std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() }));

Attrs infoAttrs({
{"rev", input.getRev()->gitRev()},
Expand Down
35 changes: 31 additions & 4 deletions tests/fetchGit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,19 @@ fi
clearStore

repo=$TEST_ROOT/git
submodule_upstream=$TEST_ROOT/submodule-upstream

export _NIX_FORCE_HTTP=1

rm -rf $repo ${repo}-tmp $TEST_HOME/.cache/nix $TEST_ROOT/worktree $TEST_ROOT/shallow

git init $repo
git -C $repo config user.email "[email protected]"
git -C $repo config user.name "Foobar"
init_repo() {
git init $1
git -C $1 config user.email "[email protected]"
git -C $1 config user.name "Foobar"
}

init_repo $repo

echo utrecht > $repo/hello
touch $repo/.gitignore
Expand Down Expand Up @@ -283,7 +288,29 @@ cmp $path7/einstein.q <(echo "> Insanity is building the same thing over and ove
path8=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; rev = \"$rev4\"; }).outPath")
[[ $path7 = $path8 ]]

# Files are clean when fetching from a local repo with local changes without ref or rev and submodules = true.
echo "All impurity needs to gain a foothold is for people of good conscience to remain unaware of undeclared inputs." >$repo/jefferson.q
git -C $repo add -N $repo/jefferson.q

path8=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; submodules = true; }).outPath")
cmp $path8/jefferson.q <(echo "> All impurity needs to gain a foothold is for people of good conscience to remain unaware of undeclared inputs.")

# Files are clean when fetching from a local repo with local changes without ref or rev.
path9=$(nix eval --impure --raw --expr "(builtins.fetchGit $repo).outPath")
cmp $path9/jefferson.q <(echo "> All impurity needs to gain a foothold is for people of good conscience to remain unaware of undeclared inputs.")

# Changes in submodules are included
init_repo $submodule_upstream
echo "# Auxiliary quotes" >$submodule_upstream/README.md
git -C $submodule_upstream add $submodule_upstream/README.md
git -C $submodule_upstream commit -m 'First commit'
git -C $repo submodule add $submodule_upstream aux-quotes
echo > $repo/aux-quotes/wisdom "Teach a man to write hello and you'll confuse him for a day. Teach a man to write hello world, and you'll confuse him for a lifetime."
echo > $repo/aux-quotes/actual-einstein "Nix doesn't play dice."
git -C $repo/aux-quotes add -N $repo/aux-quotes/wisdom
git -C $repo/aux-quotes add $repo/aux-quotes/actual-einstein
# git -C $repo diff --submodule=diff
path10=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; submodules = true; }).outPath")
cmp $path10/aux-quotes/wisdom <(echo "Teach a man to write hello and you'll confuse him for a day. Teach a man to write hello world, and you'll confuse him for a lifetime.")
cmp $path10/aux-quotes/actual-einstein <(echo "Nix doesn't play dice.")

# TBD

0 comments on commit 446ca7c

Please sign in to comment.