From f58604ac32d0ab0718a0e68b3c1b050e4ab121cb Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Fri, 25 Oct 2019 17:45:14 +0200 Subject: [PATCH 01/27] Add fetchSubmodules to builtins.fetchGit There are some downsides to this features: - Submodules are not cached (unlike the root repo), - Full checkouts are created in a temporary directory. --- src/libexpr/primops/fetchGit.cc | 45 +++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index 4aee1073e..b08e48404 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -22,13 +22,14 @@ struct GitInfo std::string rev; std::string shortRev; uint64_t revCount = 0; + bool submodules = false; }; std::regex revRegex("^[0-9a-fA-F]{40}$"); GitInfo exportGit(ref store, const std::string & uri, std::optional ref, std::string rev, - const std::string & name) + const std::string & name, bool fetchSubmodules) { if (evalSettings.pureEval && rev == "") throw Error("in pure evaluation mode, 'fetchGit' requires a Git revision"); @@ -145,7 +146,8 @@ GitInfo exportGit(ref store, const std::string & uri, printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri); - std::string storeLinkName = hashString(htSHA512, name + std::string("\0"s) + gitInfo.rev).to_string(Base32, false); + std::string storeLinkName = hashString(htSHA512, name + std::string("\0"s) + gitInfo.rev + + (fetchSubmodules ? "submodules" : "")).to_string(Base32, false); Path storeLink = cacheDir + "/" + storeLinkName + ".link"; PathLocks storeLinkLock({storeLink}, fmt("waiting for lock on '%1%'...", storeLink)); // FIXME: broken @@ -165,16 +167,35 @@ GitInfo exportGit(ref store, const std::string & uri, if (e.errNo != ENOENT) throw; } - auto source = sinkToSource([&](Sink & sink) { - RunOptions gitOptions("git", { "-C", cacheDir, "archive", gitInfo.rev }); - gitOptions.standardOut = &sink; - runProgram2(gitOptions); - }); - Path tmpDir = createTempDir(); AutoDelete delTmpDir(tmpDir, true); - unpackTarfile(*source, tmpDir); + // Submodule support can be improved by adding caching to the submodules themselves. At the moment, only the root + // repo is cached. + if (fetchSubmodules) { + Path tmpGitDir = createTempDir(); + AutoDelete delTmpGitDir(tmpGitDir, true); + + runProgram("git", true, { "init", tmpDir, "--separate-git-dir", tmpGitDir }); + runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force", + "--", cacheDir, fmt("%s:%s", *ref, *ref) }); + + runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", "FETCH_HEAD" }); + runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", uri }); + runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init" }); + + deletePath(tmpDir + "/.git"); + + gitInfo.submodules = true; + } else { + auto source = sinkToSource([&](Sink & sink) { + RunOptions gitOptions("git", { "-C", cacheDir, "archive", gitInfo.rev }); + gitOptions.standardOut = &sink; + runProgram2(gitOptions); + }); + + unpackTarfile(*source, tmpDir); + } gitInfo.storePath = store->printStorePath(store->addToStore(name, tmpDir)); @@ -198,6 +219,7 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va std::optional ref; std::string rev; std::string name = "source"; + bool fetchSubmodules = false; PathSet context; state.forceValue(*args[0]); @@ -216,6 +238,8 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va rev = state.forceStringNoCtx(*attr.value, *attr.pos); else if (n == "name") name = state.forceStringNoCtx(*attr.value, *attr.pos); + else if (n == "fetchSubmodules") + fetchSubmodules = state.forceBool(*attr.value, *attr.pos); else throw EvalError("unsupported argument '%s' to 'fetchGit', at %s", attr.name, *attr.pos); } @@ -230,13 +254,14 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va // whitelist. Ah well. state.checkURI(url); - auto gitInfo = exportGit(state.store, url, ref, rev, name); + auto gitInfo = exportGit(state.store, url, ref, rev, name, fetchSubmodules); state.mkAttrs(v, 8); mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, PathSet({gitInfo.storePath})); mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev); mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.shortRev); mkInt(*state.allocAttr(v, state.symbols.create("revCount")), gitInfo.revCount); + mkBool(*state.allocAttr(v, state.symbols.create("submodules")), gitInfo.submodules); v.attrs->sort(); if (state.allowedPaths) From c2a24c2b8876a0862ede39f63e684cb875cc7aab Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Sat, 26 Oct 2019 11:09:50 +0200 Subject: [PATCH 02/27] Add test for fetchGit submodule support --- tests/fetchGitSubmodules.sh | 50 +++++++++++++++++++++++++++++++++++++ tests/local.mk | 1 + 2 files changed, 51 insertions(+) create mode 100644 tests/fetchGitSubmodules.sh diff --git a/tests/fetchGitSubmodules.sh b/tests/fetchGitSubmodules.sh new file mode 100644 index 000000000..c821d2caa --- /dev/null +++ b/tests/fetchGitSubmodules.sh @@ -0,0 +1,50 @@ +source common.sh + +set -u + +if [[ -z $(type -p git) ]]; then + echo "Git not installed; skipping Git submodule tests" + exit 99 +fi + +clearStore + +rootRepo=$TEST_ROOT/gitSubmodulesRoot +subRepo=$TEST_ROOT/gitSubmodulesSub + +rm -rf ${rootRepo} ${subRepo} $TEST_HOME/.cache/nix/gitv2 + +initGitRepo() { + git init $1 + git -C $1 config user.email "foobar@example.com" + git -C $1 config user.name "Foobar" +} + +addGitContent() { + echo "lorem ipsum" > $1/content + git -C $1 add content + git -C $1 commit -m "Initial commit" +} + +initGitRepo $subRepo +addGitContent $subRepo + +initGitRepo $rootRepo + +git -C $rootRepo submodule init +git -C $rootRepo submodule add $subRepo sub +git -C $rootRepo add sub +git -C $rootRepo commit -m "Add submodule" + +rev=$(git -C $rootRepo rev-parse HEAD) + +pathWithoutSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath") +pathWithSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; fetchSubmodules = true; }).outPath") + +# The resulting store path cannot be the same. +[[ $pathWithoutSubmodules != $pathWithSubmodules ]] + +[[ ! -e $pathWithoutSubmodules/sub/content ]] +[[ -e $pathWithSubmodules/sub/content ]] + + diff --git a/tests/local.mk b/tests/local.mk index dab3a23b6..01fac4fcd 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -17,6 +17,7 @@ nix_tests = \ nar-access.sh \ structured-attrs.sh \ fetchGit.sh \ + fetchGitSubmodules.sh \ fetchMercurial.sh \ signing.sh \ run.sh \ From ea861be292d790db9835c08abee734baa77b9cf7 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Sat, 26 Oct 2019 11:13:08 +0200 Subject: [PATCH 03/27] Add documentation for submodule support in fetchGit --- doc/manual/expressions/builtins.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/manual/expressions/builtins.xml b/doc/manual/expressions/builtins.xml index 394e1fc32..838edae3c 100644 --- a/doc/manual/expressions/builtins.xml +++ b/doc/manual/expressions/builtins.xml @@ -422,6 +422,16 @@ stdenv.mkDerivation { … } + + fetchSubmodules + + + A boolean parameter that specifies whether submodules + should be checked out. Defaults to + false. + + + From c8d33de77781fe7850d04a7374c6c22a2e995b70 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Mon, 2 Mar 2020 14:42:19 +0100 Subject: [PATCH 04/27] Add git submodule fixes from @bjornfor This fixes fetching repositories with no submodules and also cleans up .git files in checkouts. --- src/libexpr/local.mk | 1 + src/libexpr/primops/fetchGit.cc | 9 +++++++-- tests/fetchGitSubmodules.sh | 3 ++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/libexpr/local.mk b/src/libexpr/local.mk index 8a9b3c2ea..9b5fcc561 100644 --- a/src/libexpr/local.mk +++ b/src/libexpr/local.mk @@ -9,6 +9,7 @@ libexpr_SOURCES := $(wildcard $(d)/*.cc) $(wildcard $(d)/primops/*.cc) $(d)/lexe libexpr_LIBS = libutil libstore libnixrust libexpr_LDFLAGS = + ifneq ($(OS), FreeBSD) libexpr_LDFLAGS += -ldl endif diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index b08e48404..c58d28bbc 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -6,6 +6,7 @@ #include "hash.hh" #include "tarfile.hh" +#include #include #include @@ -182,9 +183,13 @@ GitInfo exportGit(ref store, const std::string & uri, runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", "FETCH_HEAD" }); runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", uri }); - runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init" }); + runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }); - deletePath(tmpDir + "/.git"); + for (const auto& p : std::filesystem::recursive_directory_iterator(tmpDir)) { + if (p.path().filename() == ".git") { + std::filesystem::remove_all(p.path()); + } + } gitInfo.submodules = true; } else { diff --git a/tests/fetchGitSubmodules.sh b/tests/fetchGitSubmodules.sh index c821d2caa..987bfd69e 100644 --- a/tests/fetchGitSubmodules.sh +++ b/tests/fetchGitSubmodules.sh @@ -47,4 +47,5 @@ pathWithSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo [[ ! -e $pathWithoutSubmodules/sub/content ]] [[ -e $pathWithSubmodules/sub/content ]] - +# No .git directory or submodule reference files must be left +test "$(find "$pathWithSubmodules" -name .git)" = "" From c846abb5cc900d5fcb5aeb48427a1eac646cc0f3 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Mon, 2 Mar 2020 14:46:19 +0100 Subject: [PATCH 05/27] Add more test for git submodule functionality --- tests/fetchGitSubmodules.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/fetchGitSubmodules.sh b/tests/fetchGitSubmodules.sh index 987bfd69e..c252d42f7 100644 --- a/tests/fetchGitSubmodules.sh +++ b/tests/fetchGitSubmodules.sh @@ -40,12 +40,23 @@ rev=$(git -C $rootRepo rev-parse HEAD) pathWithoutSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath") pathWithSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; fetchSubmodules = true; }).outPath") +pathWithSubmodulesAgain=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; fetchSubmodules = true; }).outPath") # The resulting store path cannot be the same. [[ $pathWithoutSubmodules != $pathWithSubmodules ]] +# Checking out the same repo with submodules returns in the same store path. +[[ $pathWithSubmodules == $pathWithSubmodulesAgain ]] + +# The submodules flag is actually honored. [[ ! -e $pathWithoutSubmodules/sub/content ]] [[ -e $pathWithSubmodules/sub/content ]] # No .git directory or submodule reference files must be left test "$(find "$pathWithSubmodules" -name .git)" = "" + +# Git repos without submodules can be fetched with submodules = true. +noSubmoduleRepoBaseline=$(nix eval --raw "(builtins.fetchGit { url = file://$subRepo; rev = \"$rev\"; }).outPath") +noSubmoduleRepo=$(nix eval --raw "(builtins.fetchGit { url = file://$subRepo; rev = \"$rev\"; fetchSubmodules = true; }).outPath") + +[[ $noSubmoduleRepoBaseline == $noSubmoduleRepo ]] From 435366ed3c292756bb74fd12f47ff96e93cd48ac Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Mon, 2 Mar 2020 14:50:13 +0100 Subject: [PATCH 06/27] Rename fetchGit fetchSubmodules to just submodules --- src/libexpr/primops/fetchGit.cc | 2 +- tests/fetchGitSubmodules.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index c58d28bbc..e8905b548 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -243,7 +243,7 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va rev = state.forceStringNoCtx(*attr.value, *attr.pos); else if (n == "name") name = state.forceStringNoCtx(*attr.value, *attr.pos); - else if (n == "fetchSubmodules") + else if (n == "submodules") fetchSubmodules = state.forceBool(*attr.value, *attr.pos); else throw EvalError("unsupported argument '%s' to 'fetchGit', at %s", attr.name, *attr.pos); diff --git a/tests/fetchGitSubmodules.sh b/tests/fetchGitSubmodules.sh index c252d42f7..6e4ef270b 100644 --- a/tests/fetchGitSubmodules.sh +++ b/tests/fetchGitSubmodules.sh @@ -39,8 +39,8 @@ git -C $rootRepo commit -m "Add submodule" rev=$(git -C $rootRepo rev-parse HEAD) pathWithoutSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath") -pathWithSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; fetchSubmodules = true; }).outPath") -pathWithSubmodulesAgain=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; fetchSubmodules = true; }).outPath") +pathWithSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") +pathWithSubmodulesAgain=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") # The resulting store path cannot be the same. [[ $pathWithoutSubmodules != $pathWithSubmodules ]] @@ -57,6 +57,6 @@ test "$(find "$pathWithSubmodules" -name .git)" = "" # Git repos without submodules can be fetched with submodules = true. noSubmoduleRepoBaseline=$(nix eval --raw "(builtins.fetchGit { url = file://$subRepo; rev = \"$rev\"; }).outPath") -noSubmoduleRepo=$(nix eval --raw "(builtins.fetchGit { url = file://$subRepo; rev = \"$rev\"; fetchSubmodules = true; }).outPath") +noSubmoduleRepo=$(nix eval --raw "(builtins.fetchGit { url = file://$subRepo; rev = \"$rev\"; submodules = true; }).outPath") [[ $noSubmoduleRepoBaseline == $noSubmoduleRepo ]] From 6c00a9545f2162496032c490aadbadd85856662c Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Mon, 2 Mar 2020 15:50:39 +0100 Subject: [PATCH 07/27] Fix typo in submodule test --- tests/fetchGitSubmodules.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/fetchGitSubmodules.sh b/tests/fetchGitSubmodules.sh index 6e4ef270b..9a21421cc 100644 --- a/tests/fetchGitSubmodules.sh +++ b/tests/fetchGitSubmodules.sh @@ -56,7 +56,8 @@ pathWithSubmodulesAgain=$(nix eval --raw "(builtins.fetchGit { url = file://$roo test "$(find "$pathWithSubmodules" -name .git)" = "" # Git repos without submodules can be fetched with submodules = true. -noSubmoduleRepoBaseline=$(nix eval --raw "(builtins.fetchGit { url = file://$subRepo; rev = \"$rev\"; }).outPath") -noSubmoduleRepo=$(nix eval --raw "(builtins.fetchGit { url = file://$subRepo; rev = \"$rev\"; submodules = true; }).outPath") +subRev=$(git -C $subRepo rev-parse HEAD) +noSubmoduleRepoBaseline=$(nix eval --raw "(builtins.fetchGit { url = file://$subRepo; rev = \"$subRev\"; }).outPath") +noSubmoduleRepo=$(nix eval --raw "(builtins.fetchGit { url = file://$subRepo; rev = \"$subRev\"; submodules = true; }).outPath") [[ $noSubmoduleRepoBaseline == $noSubmoduleRepo ]] From cc4fe977e5eedf00d8e3d267ccbc3676b3fac1a0 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Mon, 9 Mar 2020 14:07:26 +0100 Subject: [PATCH 08/27] Link to stdc++fs Some platforms seem to still require linking with stdc++fs to enable STL std::filesystem support. --- src/libexpr/local.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libexpr/local.mk b/src/libexpr/local.mk index 9b5fcc561..b6ee82424 100644 --- a/src/libexpr/local.mk +++ b/src/libexpr/local.mk @@ -8,7 +8,11 @@ libexpr_SOURCES := $(wildcard $(d)/*.cc) $(wildcard $(d)/primops/*.cc) $(d)/lexe libexpr_LIBS = libutil libstore libnixrust +ifeq ($(CXX), g++) +libexpr_LDFLAGS = -lstdc++fs +else libexpr_LDFLAGS = +endif ifneq ($(OS), FreeBSD) libexpr_LDFLAGS += -ldl From 002a3a95dcb7391a8372ba7a74f4e7e9bd48b59b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Forsman?= Date: Mon, 16 Mar 2020 14:47:21 +0100 Subject: [PATCH 09/27] fetchGit: fix "fatal: couldn't find remote ref refs/heads/master" issue with submodules --- src/libexpr/primops/fetchGit.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index e8905b548..66259d778 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -179,7 +179,7 @@ GitInfo exportGit(ref store, const std::string & uri, runProgram("git", true, { "init", tmpDir, "--separate-git-dir", tmpGitDir }); runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force", - "--", cacheDir, fmt("%s:%s", *ref, *ref) }); + "--", cacheDir, fmt("%s", *ref) }); runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", "FETCH_HEAD" }); runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", uri }); From 587e259bfdda248fb2d81389faca816b8efdb5f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Forsman?= Date: Mon, 16 Mar 2020 14:47:55 +0100 Subject: [PATCH 10/27] tests/fetchGitSubmodules: add more tests --- tests/fetchGitSubmodules.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/fetchGitSubmodules.sh b/tests/fetchGitSubmodules.sh index 9a21421cc..8ee1bb067 100644 --- a/tests/fetchGitSubmodules.sh +++ b/tests/fetchGitSubmodules.sh @@ -42,6 +42,25 @@ pathWithoutSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootR pathWithSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") pathWithSubmodulesAgain=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") +r1=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath") +r2=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = false; }).outPath") +r3=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") + +[[ $r1 == $r2 ]] +[[ $r2 != $r3 ]] + +r4=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; ref = \"refs/heads/master\"; rev = \"$rev\"; }).outPath") +r5=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; ref = \"refs/heads/master\"; rev = \"$rev\"; submodules = false; }).outPath") +r6=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; ref = \"refs/heads/master\"; rev = \"$rev\"; submodules = true; }).outPath") +r7=$(nix eval --raw "(builtins.fetchGit { url = $rootRepo; ref = \"refs/heads/master\"; rev = \"$rev\"; submodules = true; }).outPath") +r8=$(nix eval --raw "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = true; }).outPath") + +[[ $r1 == $r4 ]] +[[ $r4 == $r5 ]] +[[ $r3 == $r6 ]] +[[ $r6 == $r7 ]] +[[ $r7 == $r8 ]] + # The resulting store path cannot be the same. [[ $pathWithoutSubmodules != $pathWithSubmodules ]] From 6864ad7cf510d96f0cfc0cdbeffc1188a4eab44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Forsman?= Date: Tue, 17 Mar 2020 14:27:14 +0100 Subject: [PATCH 11/27] fetchGit: fix submodule output attribute Before this change it would be false for all evaluations but the first. Now it follows the input argument (as it should). --- src/libexpr/primops/fetchGit.cc | 7 +++---- tests/fetchGitSubmodules.sh | 9 +++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index 66259d778..653d99b53 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -32,6 +32,9 @@ GitInfo exportGit(ref store, const std::string & uri, std::optional ref, std::string rev, const std::string & name, bool fetchSubmodules) { + GitInfo gitInfo; + gitInfo.submodules = fetchSubmodules; + if (evalSettings.pureEval && rev == "") throw Error("in pure evaluation mode, 'fetchGit' requires a Git revision"); @@ -49,7 +52,6 @@ GitInfo exportGit(ref store, const std::string & uri, if (!clean) { /* This is an unclean working tree. So copy all tracked files. */ - GitInfo gitInfo; gitInfo.rev = "0000000000000000000000000000000000000000"; gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); @@ -141,7 +143,6 @@ GitInfo exportGit(ref store, const std::string & uri, } // FIXME: check whether rev is an ancestor of ref. - GitInfo gitInfo; gitInfo.rev = rev != "" ? rev : chomp(readFile(localRefFile)); gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); @@ -190,8 +191,6 @@ GitInfo exportGit(ref store, const std::string & uri, std::filesystem::remove_all(p.path()); } } - - gitInfo.submodules = true; } else { auto source = sinkToSource([&](Sink & sink) { RunOptions gitOptions("git", { "-C", cacheDir, "archive", gitInfo.rev }); diff --git a/tests/fetchGitSubmodules.sh b/tests/fetchGitSubmodules.sh index 8ee1bb067..2d625c376 100644 --- a/tests/fetchGitSubmodules.sh +++ b/tests/fetchGitSubmodules.sh @@ -61,6 +61,15 @@ r8=$(nix eval --raw "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submo [[ $r6 == $r7 ]] [[ $r7 == $r8 ]] +have_submodules=$(nix eval "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; }).submodules") +[[ $have_submodules == false ]] + +have_submodules=$(nix eval "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = false; }).submodules") +[[ $have_submodules == false ]] + +have_submodules=$(nix eval "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = true; }).submodules") +[[ $have_submodules == true ]] + # The resulting store path cannot be the same. [[ $pathWithoutSubmodules != $pathWithSubmodules ]] From 369fffd6f1f7d939e528b3591dca49989c51ae35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Forsman?= Date: Tue, 17 Mar 2020 14:27:58 +0100 Subject: [PATCH 12/27] fetchGit: add submodules attribute to the .link file The .link file is used as a lock, so I think we should put the "submodule" attribute in there since turning on submodules creates a new .link file path. --- src/libexpr/primops/fetchGit.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index 653d99b53..969b1a4ed 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -211,6 +211,7 @@ GitInfo exportGit(ref store, const std::string & uri, json["name"] = name; json["rev"] = gitInfo.rev; json["revCount"] = gitInfo.revCount; + json["submodules"] = gitInfo.submodules; writeFile(storeLink, json.dump()); From be84049baf047fd2eee08def5c8013797c0832a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Forsman?= Date: Tue, 17 Mar 2020 20:14:19 +0100 Subject: [PATCH 13/27] tests/fetchGitSubmodules.sh: more checks --- tests/fetchGitSubmodules.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/fetchGitSubmodules.sh b/tests/fetchGitSubmodules.sh index 2d625c376..30a8737bd 100644 --- a/tests/fetchGitSubmodules.sh +++ b/tests/fetchGitSubmodules.sh @@ -38,10 +38,6 @@ git -C $rootRepo commit -m "Add submodule" rev=$(git -C $rootRepo rev-parse HEAD) -pathWithoutSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath") -pathWithSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") -pathWithSubmodulesAgain=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") - r1=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath") r2=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = false; }).outPath") r3=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") @@ -70,16 +66,26 @@ have_submodules=$(nix eval "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\" have_submodules=$(nix eval "(builtins.fetchGit { url = $rootRepo; rev = \"$rev\"; submodules = true; }).submodules") [[ $have_submodules == true ]] +pathWithoutSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; }).outPath") +pathWithSubmodules=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") +pathWithSubmodulesAgain=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; rev = \"$rev\"; submodules = true; }).outPath") +pathWithSubmodulesAgainWithRef=$(nix eval --raw "(builtins.fetchGit { url = file://$rootRepo; ref = \"refs/heads/master\"; rev = \"$rev\"; submodules = true; }).outPath") + # The resulting store path cannot be the same. [[ $pathWithoutSubmodules != $pathWithSubmodules ]] # Checking out the same repo with submodules returns in the same store path. [[ $pathWithSubmodules == $pathWithSubmodulesAgain ]] +# Checking out the same repo with submodules returns in the same store path. +[[ $pathWithSubmodulesAgain == $pathWithSubmodulesAgainWithRef ]] + # The submodules flag is actually honored. [[ ! -e $pathWithoutSubmodules/sub/content ]] [[ -e $pathWithSubmodules/sub/content ]] +[[ -e $pathWithSubmodulesAgainWithRef/sub/content ]] + # No .git directory or submodule reference files must be left test "$(find "$pathWithSubmodules" -name .git)" = "" From b306b7039ef3f2b76dbcc8020dcf67ed27442e7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Forsman?= Date: Wed, 18 Mar 2020 08:44:37 +0100 Subject: [PATCH 14/27] fetchGit: checkout rev instead of latest ref Major bugfix for the submodules = true code path. TODO: Add tests. --- src/libexpr/primops/fetchGit.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index 969b1a4ed..0c6bc6dc0 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -182,7 +182,7 @@ GitInfo exportGit(ref store, const std::string & uri, runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force", "--", cacheDir, fmt("%s", *ref) }); - runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", "FETCH_HEAD" }); + runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", gitInfo.rev }); runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", uri }); runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }); From cc522d0d23b71faeac95998d264abe14ddceeed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Forsman?= Date: Wed, 18 Mar 2020 08:45:31 +0100 Subject: [PATCH 15/27] fetchGit: fix submodules = true for dirty trees --- src/libexpr/primops/fetchGit.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index 0c6bc6dc0..66e1d7b98 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -55,8 +55,12 @@ GitInfo exportGit(ref store, const std::string & uri, gitInfo.rev = "0000000000000000000000000000000000000000"; gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); + auto gitOpts = Strings({ "-C", uri, "ls-files", "-z" }); + if (fetchSubmodules) { + gitOpts.emplace_back("--recurse-submodules"); + } auto files = tokenizeString>( - runProgram("git", true, { "-C", uri, "ls-files", "-z" }), "\0"s); + runProgram("git", true, gitOpts), "\0"s); PathFilter filter = [&](const Path & p) -> bool { assert(hasPrefix(p, uri)); From f686efeed4d6305101c56136855e2d6b87e649e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Forsman?= Date: Fri, 20 Mar 2020 14:36:10 +0100 Subject: [PATCH 16/27] fetchGit: fix submodule corner case by fetching all refs from cacheDir Due to fetchGit not checking if rev is an ancestor of ref (there is even a FIXME comment about it in the code), the cache repo might not have the ref even though it has the rev. This doesn't matter when submodule = false, but the submodule = true code blows up because it tries to fetch the (missing) ref from the cache repo. Fix this in the simplest way possible: fetch all refs from the local cache repo when submodules = true. TODO: Add tests. --- src/libexpr/primops/fetchGit.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index 66e1d7b98..f138b755c 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -183,8 +183,11 @@ GitInfo exportGit(ref store, const std::string & uri, AutoDelete delTmpGitDir(tmpGitDir, true); runProgram("git", true, { "init", tmpDir, "--separate-git-dir", tmpGitDir }); + // TODO: the cacheDir repo might lack the ref (it only checks if rev + // exists, see FIXME above) so use a big hammer and fetch everything to + // ensure we get the rev. runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force", - "--", cacheDir, fmt("%s", *ref) }); + "--update-head-ok", "--", cacheDir, "refs/*:refs/*" }); runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", gitInfo.rev }); runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", uri }); From 40c023ecfe49fea6e66db34c5f841fcf7001cbeb Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Sun, 29 Mar 2020 23:47:48 +0200 Subject: [PATCH 17/27] fetchGit: don't use std::filesystem to filter git repos Using std::filesystem means also having to link with -lstdc++fs on some platforms and it's hard to discover for what platforms this is needed. As all the functionality is already implemented as utilities, use those instead. --- src/libexpr/local.mk | 4 ---- src/libexpr/primops/fetchGit.cc | 17 ++++++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/libexpr/local.mk b/src/libexpr/local.mk index b6ee82424..9b5fcc561 100644 --- a/src/libexpr/local.mk +++ b/src/libexpr/local.mk @@ -8,11 +8,7 @@ libexpr_SOURCES := $(wildcard $(d)/*.cc) $(wildcard $(d)/primops/*.cc) $(d)/lexe libexpr_LIBS = libutil libstore libnixrust -ifeq ($(CXX), g++) -libexpr_LDFLAGS = -lstdc++fs -else libexpr_LDFLAGS = -endif ifneq ($(OS), FreeBSD) libexpr_LDFLAGS += -ldl diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index f138b755c..d60e6b803 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -6,7 +6,6 @@ #include "hash.hh" #include "tarfile.hh" -#include #include #include @@ -28,6 +27,13 @@ struct GitInfo std::regex revRegex("^[0-9a-fA-F]{40}$"); +static bool isNotDotGitDirectory(const Path & path) +{ + static const std::regex gitDirRegex("^(?:.*/)?\\.git$"); + + return not std::regex_match(path, gitDirRegex); +} + GitInfo exportGit(ref store, const std::string & uri, std::optional ref, std::string rev, const std::string & name, bool fetchSubmodules) @@ -175,6 +181,7 @@ GitInfo exportGit(ref store, const std::string & uri, Path tmpDir = createTempDir(); AutoDelete delTmpDir(tmpDir, true); + PathFilter filter = defaultPathFilter; // Submodule support can be improved by adding caching to the submodules themselves. At the moment, only the root // repo is cached. @@ -193,11 +200,7 @@ GitInfo exportGit(ref store, const std::string & uri, runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", uri }); runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }); - for (const auto& p : std::filesystem::recursive_directory_iterator(tmpDir)) { - if (p.path().filename() == ".git") { - std::filesystem::remove_all(p.path()); - } - } + filter = isNotDotGitDirectory; } else { auto source = sinkToSource([&](Sink & sink) { RunOptions gitOptions("git", { "-C", cacheDir, "archive", gitInfo.rev }); @@ -208,7 +211,7 @@ GitInfo exportGit(ref store, const std::string & uri, unpackTarfile(*source, tmpDir); } - gitInfo.storePath = store->printStorePath(store->addToStore(name, tmpDir)); + gitInfo.storePath = store->printStorePath(store->addToStore(name, tmpDir, true, htSHA256, filter)); gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", cacheDir, "rev-list", "--count", gitInfo.rev })); From 4fc4eb6c93511c61a278ba0e5555ffb24435ab59 Mon Sep 17 00:00:00 2001 From: Andreas Rammhold Date: Thu, 2 Apr 2020 17:04:00 +0200 Subject: [PATCH 18/27] libexpr: remove unused attrError The attrError variable is no longer used but still allocated on every call to the findAlongAttrPath function. --- src/libexpr/attr-path.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/libexpr/attr-path.cc b/src/libexpr/attr-path.cc index 4545bfd72..76d101b98 100644 --- a/src/libexpr/attr-path.cc +++ b/src/libexpr/attr-path.cc @@ -37,9 +37,6 @@ std::pair findAlongAttrPath(EvalState & state, const string & attr { Strings tokens = parseAttrPath(attrPath); - Error attrError = - Error(format("attribute selection path '%1%' does not match expression") % attrPath); - Value * v = &vIn; Pos pos = noPos; From 63fa92605b2b0a8a6d948a27e1d331cc46b199bd Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 2 Apr 2020 11:54:48 +0200 Subject: [PATCH 19/27] nix-env: Refuse to operate on a new-style profile This prevents users from accidentally nuking their profile via nix-env. (cherry picked from commit 021634e3e3edb327089d33ab41b743f0a40126da) --- src/nix-env/user-env.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nix-env/user-env.cc b/src/nix-env/user-env.cc index 717431b7a..f852916d8 100644 --- a/src/nix-env/user-env.cc +++ b/src/nix-env/user-env.cc @@ -15,6 +15,8 @@ namespace nix { DrvInfos queryInstalled(EvalState & state, const Path & userEnv) { DrvInfos elems; + if (pathExists(userEnv + "/manifest.json")) + throw Error("profile '%s' is incompatible with 'nix-env'; please use 'nix profile' instead", userEnv); Path manifestFile = userEnv + "/manifest.nix"; if (pathExists(manifestFile)) { Value v; From 5e7ccdc9e3ddd61dc85e20c898001345bfb497a5 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 3 Apr 2020 20:06:26 +0200 Subject: [PATCH 20/27] Publish a tarball containing the crates we depend on This is needed since we no longer produce a source tarball. (cherry picked from commit bf70a047a0b2da606f65a88f13ce2994065cd9c4) --- release.nix | 94 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/release.nix b/release.nix index 9b591229a..65b035957 100644 --- a/release.nix +++ b/release.nix @@ -12,52 +12,64 @@ let builtins.readFile ./.version + (if officialRelease then "" else "pre${toString nix.revCount}_${nix.shortRev}"); + # Create a "vendor" directory that contains the crates listed in + # Cargo.lock. This allows Nix to be built without network access. + vendoredCrates' = + let + lockFile = builtins.fromTOML (builtins.readFile nix-rust/Cargo.lock); + + files = map (pkg: import { + url = "https://crates.io/api/v1/crates/${pkg.name}/${pkg.version}/download"; + sha256 = lockFile.metadata."checksum ${pkg.name} ${pkg.version} (registry+https://github.com/rust-lang/crates.io-index)"; + }) (builtins.filter (pkg: pkg.source or "" == "registry+https://github.com/rust-lang/crates.io-index") lockFile.package); + + in pkgs.runCommand "cargo-vendor-dir" {} + '' + mkdir -p $out/vendor + + cat > $out/vendor/config < "$dir/.cargo-checksum.json" + + # Clean up some cruft from the winapi crates. FIXME: find + # a way to remove winapi* from our dependencies. + if [[ $dir =~ /winapi ]]; then + find $dir -name "*.a" -print0 | xargs -0 rm -f -- + fi + + mv "$dir" $out/vendor/ + + rm -rf $out/vendor/tmp + '') files)} + ''; + jobs = rec { - # Create a "vendor" directory that contains the crates listed in - # Cargo.lock. This allows Nix to be built without network access. vendoredCrates = - let - lockFile = builtins.fromTOML (builtins.readFile nix-rust/Cargo.lock); - - files = map (pkg: import { - url = "https://crates.io/api/v1/crates/${pkg.name}/${pkg.version}/download"; - sha256 = lockFile.metadata."checksum ${pkg.name} ${pkg.version} (registry+https://github.com/rust-lang/crates.io-index)"; - }) (builtins.filter (pkg: pkg.source or "" == "registry+https://github.com/rust-lang/crates.io-index") lockFile.package); - - in pkgs.runCommand "cargo-vendor-dir" {} + with pkgs; + runCommand "vendored-crates" {} '' - mkdir -p $out/vendor - - cat > $out/vendor/config < "$dir/.cargo-checksum.json" - - # Clean up some cruft from the winapi crates. FIXME: find - # a way to remove winapi* from our dependencies. - if [[ $dir =~ /winapi ]]; then - find $dir -name "*.a" -print0 | xargs -0 rm -f -- - fi - - mv "$dir" $out/vendor/ - - rm -rf $out/vendor/tmp - '') files)} + mkdir -p $out/nix-support + name=nix-vendored-crates-${version} + fn=$out/$name.tar.xz + tar cvfJ $fn -C ${vendoredCrates'} vendor \ + --owner=0 --group=0 --mode=u+rw,uga+r \ + --transform "s,vendor,$name," + echo "file crates-tarball $fn" >> $out/nix-support/hydra-build-products ''; - build = pkgs.lib.genAttrs systems (system: let pkgs = import nixpkgs { inherit system; }; in @@ -89,7 +101,7 @@ let patchelf --set-rpath $out/lib:${stdenv.cc.cc.lib}/lib $out/lib/libboost_thread.so.* ''} - ln -sfn ${vendoredCrates}/vendor/ nix-rust/vendor + ln -sfn ${vendoredCrates'}/vendor/ nix-rust/vendor (cd perl; autoreconf --install --force --verbose) ''; From c976cb0b8ab5d0f2c4ab8c9826fc7db56e2f1b3e Mon Sep 17 00:00:00 2001 From: Cole Helbling Date: Sat, 4 Apr 2020 12:27:39 -0700 Subject: [PATCH 21/27] Don't retry on "unsupported protocol" error When encountering an unsupported protocol, there's no need to retry. Chances are, it won't suddenly be supported between retry attempts; error instead. Otherwise, you see something like the following: $ nix-env -i -f git://git@github.com/foo/bar warning: unable to download 'git://git@github.com/foo/bar': Unsupported protocol (1); retrying in 335 ms warning: unable to download 'git://git@github.com/foo/bar': Unsupported protocol (1); retrying in 604 ms warning: unable to download 'git://git@github.com/foo/bar': Unsupported protocol (1); retrying in 1340 ms warning: unable to download 'git://git@github.com/foo/bar': Unsupported protocol (1); retrying in 2685 ms With this change, you now see: $ nix-env -i -f git://git@github.com/foo/bar error: unable to download 'git://git@github.com/foo/bar': Unsupported protocol (1) --- src/libstore/download.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libstore/download.cc b/src/libstore/download.cc index 149c84765..5967d0425 100644 --- a/src/libstore/download.cc +++ b/src/libstore/download.cc @@ -390,6 +390,7 @@ struct CurlDownloader : public Downloader case CURLE_SSL_CACERT_BADFILE: case CURLE_TOO_MANY_REDIRECTS: case CURLE_WRITE_ERROR: + case CURLE_UNSUPPORTED_PROTOCOL: err = Misc; break; default: // Shut up warnings From 462421d34588c227eb736b07f7b40a38aa0972a6 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 30 Mar 2020 16:04:18 +0200 Subject: [PATCH 22/27] Backport libfetchers from the flakes branch This provides a pluggable mechanism for defining new fetchers. It adds a builtin function 'fetchTree' that generalizes existing fetchers like 'fetchGit', 'fetchMercurial' and 'fetchTarball'. 'fetchTree' takes a set of attributes, e.g. fetchTree { type = "git"; url = "https://example.org/repo.git"; ref = "some-branch"; rev = "abcdef..."; } The existing fetchers are just wrappers around this. Note that the input attributes to fetchTree are the same as flake input specifications and flake lock file entries. All fetchers share a common cache stored in ~/.cache/nix/fetcher-cache-v1.sqlite. This replaces the ad hoc caching mechanisms in fetchGit and download.cc (e.g. ~/.cache/nix/{tarballs,git-revs*}). This also adds support for Git worktrees (c169ea59049f861aaba429f48b828d0820b74d1d). --- Makefile | 1 + src/libexpr/common-eval-args.cc | 8 +- src/libexpr/local.mk | 4 +- src/libexpr/parser.y | 6 +- src/libexpr/primops.cc | 67 ----- src/libexpr/primops.hh | 1 + src/libexpr/primops/fetchGit.cc | 223 ++------------ src/libexpr/primops/fetchMercurial.cc | 207 +++---------- src/libexpr/primops/fetchTree.cc | 165 +++++++++++ src/libfetchers/attrs.cc | 92 ++++++ src/libfetchers/attrs.hh | 37 +++ src/libfetchers/cache.cc | 121 ++++++++ src/libfetchers/cache.hh | 34 +++ src/libfetchers/fetchers.cc | 73 +++++ src/libfetchers/fetchers.hh | 103 +++++++ src/libfetchers/git.cc | 401 ++++++++++++++++++++++++++ src/libfetchers/github.cc | 195 +++++++++++++ src/libfetchers/local.mk | 11 + src/libfetchers/mercurial.cc | 303 +++++++++++++++++++ src/libfetchers/tarball.cc | 277 ++++++++++++++++++ src/libfetchers/tree-info.cc | 14 + src/libfetchers/tree-info.hh | 29 ++ src/libstore/derivations.cc | 2 +- src/libstore/download.cc | 137 --------- src/libstore/download.hh | 31 +- src/libstore/globals.hh | 9 + src/libstore/store-api.cc | 25 +- src/libstore/store-api.hh | 1 + src/libutil/types.hh | 8 + src/libutil/url.cc | 137 +++++++++ src/libutil/url.hh | 62 ++++ src/nix-channel/nix-channel.cc | 17 +- src/nix/local.mk | 4 +- tests/fetchGit.sh | 32 +- tests/init.sh | 2 +- tests/tarball.sh | 7 + 36 files changed, 2199 insertions(+), 647 deletions(-) create mode 100644 src/libexpr/primops/fetchTree.cc create mode 100644 src/libfetchers/attrs.cc create mode 100644 src/libfetchers/attrs.hh create mode 100644 src/libfetchers/cache.cc create mode 100644 src/libfetchers/cache.hh create mode 100644 src/libfetchers/fetchers.cc create mode 100644 src/libfetchers/fetchers.hh create mode 100644 src/libfetchers/git.cc create mode 100644 src/libfetchers/github.cc create mode 100644 src/libfetchers/local.mk create mode 100644 src/libfetchers/mercurial.cc create mode 100644 src/libfetchers/tarball.cc create mode 100644 src/libfetchers/tree-info.cc create mode 100644 src/libfetchers/tree-info.hh create mode 100644 src/libutil/url.cc create mode 100644 src/libutil/url.hh diff --git a/Makefile b/Makefile index 469070533..e3057c36c 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ makefiles = \ nix-rust/local.mk \ src/libutil/local.mk \ src/libstore/local.mk \ + src/libfetchers/local.mk \ src/libmain/local.mk \ src/libexpr/local.mk \ src/nix/local.mk \ diff --git a/src/libexpr/common-eval-args.cc b/src/libexpr/common-eval-args.cc index 13950ab8d..82bfeac36 100644 --- a/src/libexpr/common-eval-args.cc +++ b/src/libexpr/common-eval-args.cc @@ -3,6 +3,8 @@ #include "download.hh" #include "util.hh" #include "eval.hh" +#include "fetchers.hh" +#include "store-api.hh" namespace nix { @@ -46,9 +48,9 @@ Bindings * MixEvalArgs::getAutoArgs(EvalState & state) Path lookupFileArg(EvalState & state, string s) { if (isUri(s)) { - CachedDownloadRequest request(s); - request.unpack = true; - return getDownloader()->downloadCached(state.store, request).path; + return state.store->toRealPath( + fetchers::downloadTarball( + state.store, resolveUri(s), "source", false).storePath); } else if (s.size() > 2 && s.at(0) == '<' && s.at(s.size() - 1) == '>') { Path p = s.substr(1, s.size() - 2); return state.findFile(p); diff --git a/src/libexpr/local.mk b/src/libexpr/local.mk index a4ccab376..917e8a1c7 100644 --- a/src/libexpr/local.mk +++ b/src/libexpr/local.mk @@ -6,9 +6,9 @@ libexpr_DIR := $(d) libexpr_SOURCES := $(wildcard $(d)/*.cc) $(wildcard $(d)/primops/*.cc) $(d)/lexer-tab.cc $(d)/parser-tab.cc -libexpr_CXXFLAGS += -I src/libutil -I src/libstore -I src/libmain -I src/libexpr +libexpr_CXXFLAGS += -I src/libutil -I src/libstore -I src/libfetchers -I src/libmain -I src/libexpr -libexpr_LIBS = libutil libstore libnixrust +libexpr_LIBS = libutil libstore libfetchers libnixrust libexpr_LDFLAGS = ifneq ($(OS), FreeBSD) diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 9c769e803..a30fb44b5 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -545,6 +545,7 @@ formal #include "eval.hh" #include "download.hh" +#include "fetchers.hh" #include "store-api.hh" @@ -687,9 +688,8 @@ std::pair EvalState::resolveSearchPathElem(const SearchPathEl if (isUri(elem.second)) { try { - CachedDownloadRequest request(elem.second); - request.unpack = true; - res = { true, getDownloader()->downloadCached(store, request).path }; + res = { true, store->toRealPath(fetchers::downloadTarball( + store, resolveUri(elem.second), "source", false).storePath) }; } catch (DownloadError & e) { printError(format("warning: Nix search path entry '%1%' cannot be downloaded, ignoring") % elem.second); res = { false, "" }; diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 8de234951..629f3da15 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1,6 +1,5 @@ #include "archive.hh" #include "derivations.hh" -#include "download.hh" #include "eval-inline.hh" #include "eval.hh" #include "globals.hh" @@ -2045,68 +2044,6 @@ static void prim_splitVersion(EvalState & state, const Pos & pos, Value * * args } -/************************************************************* - * Networking - *************************************************************/ - - -void fetch(EvalState & state, const Pos & pos, Value * * args, Value & v, - const string & who, bool unpack, const std::string & defaultName) -{ - CachedDownloadRequest request(""); - request.unpack = unpack; - request.name = defaultName; - - state.forceValue(*args[0]); - - if (args[0]->type == tAttrs) { - - state.forceAttrs(*args[0], pos); - - for (auto & attr : *args[0]->attrs) { - string n(attr.name); - if (n == "url") - request.uri = state.forceStringNoCtx(*attr.value, *attr.pos); - else if (n == "sha256") - request.expectedHash = Hash(state.forceStringNoCtx(*attr.value, *attr.pos), htSHA256); - else if (n == "name") - request.name = state.forceStringNoCtx(*attr.value, *attr.pos); - else - throw EvalError(format("unsupported argument '%1%' to '%2%', at %3%") % attr.name % who % attr.pos); - } - - if (request.uri.empty()) - throw EvalError(format("'url' argument required, at %1%") % pos); - - } else - request.uri = state.forceStringNoCtx(*args[0], pos); - - state.checkURI(request.uri); - - if (evalSettings.pureEval && !request.expectedHash) - throw Error("in pure evaluation mode, '%s' requires a 'sha256' argument", who); - - auto res = getDownloader()->downloadCached(state.store, request); - - if (state.allowedPaths) - state.allowedPaths->insert(res.path); - - mkString(v, res.storePath, PathSet({res.storePath})); -} - - -static void prim_fetchurl(EvalState & state, const Pos & pos, Value * * args, Value & v) -{ - fetch(state, pos, args, v, "fetchurl", false, ""); -} - - -static void prim_fetchTarball(EvalState & state, const Pos & pos, Value * * args, Value & v) -{ - fetch(state, pos, args, v, "fetchTarball", true, "source"); -} - - /************************************************************* * Primop registration *************************************************************/ @@ -2289,10 +2226,6 @@ void EvalState::createBaseEnv() addPrimOp("derivationStrict", 1, prim_derivationStrict); addPrimOp("placeholder", 1, prim_placeholder); - // Networking - addPrimOp("__fetchurl", 1, prim_fetchurl); - addPrimOp("fetchTarball", 1, prim_fetchTarball); - /* Add a wrapper around the derivation primop that computes the `drvPath' and `outPath' attributes lazily. */ string path = canonPath(settings.nixDataDir + "/nix/corepkgs/derivation.nix", true); diff --git a/src/libexpr/primops.hh b/src/libexpr/primops.hh index c790b30f6..05d0792ef 100644 --- a/src/libexpr/primops.hh +++ b/src/libexpr/primops.hh @@ -20,6 +20,7 @@ struct RegisterPrimOp them. */ /* Load a ValueInitializer from a DSO and return whatever it initializes */ void prim_importNative(EvalState & state, const Pos & pos, Value * * args, Value & v); + /* Execute a program and parse its output */ void prim_exec(EvalState & state, const Pos & pos, Value * * args, Value & v); diff --git a/src/libexpr/primops/fetchGit.cc b/src/libexpr/primops/fetchGit.cc index 4aee1073e..812de9d91 100644 --- a/src/libexpr/primops/fetchGit.cc +++ b/src/libexpr/primops/fetchGit.cc @@ -1,202 +1,17 @@ #include "primops.hh" #include "eval-inline.hh" -#include "download.hh" #include "store-api.hh" -#include "pathlocks.hh" #include "hash.hh" -#include "tarfile.hh" - -#include - -#include - -#include - -using namespace std::string_literals; +#include "fetchers.hh" +#include "url.hh" namespace nix { -struct GitInfo -{ - Path storePath; - std::string rev; - std::string shortRev; - uint64_t revCount = 0; -}; - -std::regex revRegex("^[0-9a-fA-F]{40}$"); - -GitInfo exportGit(ref store, const std::string & uri, - std::optional ref, std::string rev, - const std::string & name) -{ - if (evalSettings.pureEval && rev == "") - throw Error("in pure evaluation mode, 'fetchGit' requires a Git revision"); - - if (!ref && rev == "" && hasPrefix(uri, "/") && pathExists(uri + "/.git")) { - - bool clean = true; - - try { - runProgram("git", true, { "-C", uri, "diff-index", "--quiet", "HEAD", "--" }); - } catch (ExecError & e) { - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - clean = false; - } - - if (!clean) { - - /* This is an unclean working tree. So copy all tracked files. */ - GitInfo gitInfo; - gitInfo.rev = "0000000000000000000000000000000000000000"; - gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); - - auto files = tokenizeString>( - runProgram("git", true, { "-C", uri, "ls-files", "-z" }), "\0"s); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, uri)); - std::string file(p, uri.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - gitInfo.storePath = store->printStorePath(store->addToStore("source", uri, true, htSHA256, filter)); - - return gitInfo; - } - - // clean working tree, but no ref or rev specified. Use 'HEAD'. - rev = chomp(runProgram("git", true, { "-C", uri, "rev-parse", "HEAD" })); - ref = "HEAD"s; - } - - if (!ref) ref = "HEAD"s; - - if (rev != "" && !std::regex_match(rev, revRegex)) - throw Error("invalid Git revision '%s'", rev); - - deletePath(getCacheDir() + "/nix/git"); - - Path cacheDir = getCacheDir() + "/nix/gitv2/" + hashString(htSHA256, uri).to_string(Base32, false); - - if (!pathExists(cacheDir)) { - createDirs(dirOf(cacheDir)); - runProgram("git", true, { "init", "--bare", cacheDir }); - } - - Path localRefFile; - if (ref->compare(0, 5, "refs/") == 0) - localRefFile = cacheDir + "/" + *ref; - else - localRefFile = cacheDir + "/refs/heads/" + *ref; - - bool doFetch; - time_t now = time(0); - /* If a rev was specified, we need to fetch if it's not in the - repo. */ - if (rev != "") { - try { - runProgram("git", true, { "-C", cacheDir, "cat-file", "-e", rev }); - doFetch = false; - } catch (ExecError & e) { - if (WIFEXITED(e.status)) { - doFetch = true; - } else { - throw; - } - } - } else { - /* If the local ref is older than ‘tarball-ttl’ seconds, do a - git fetch to update the local ref to the remote ref. */ - struct stat st; - doFetch = stat(localRefFile.c_str(), &st) != 0 || - (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; - } - if (doFetch) - { - Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", uri)); - - // FIXME: git stderr messes up our progress indicator, so - // we're using --quiet for now. Should process its stderr. - runProgram("git", true, { "-C", cacheDir, "fetch", "--quiet", "--force", "--", uri, fmt("%s:%s", *ref, *ref) }); - - struct timeval times[2]; - times[0].tv_sec = now; - times[0].tv_usec = 0; - times[1].tv_sec = now; - times[1].tv_usec = 0; - - utimes(localRefFile.c_str(), times); - } - - // FIXME: check whether rev is an ancestor of ref. - GitInfo gitInfo; - gitInfo.rev = rev != "" ? rev : chomp(readFile(localRefFile)); - gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); - - printTalkative("using revision %s of repo '%s'", gitInfo.rev, uri); - - std::string storeLinkName = hashString(htSHA512, name + std::string("\0"s) + gitInfo.rev).to_string(Base32, false); - Path storeLink = cacheDir + "/" + storeLinkName + ".link"; - PathLocks storeLinkLock({storeLink}, fmt("waiting for lock on '%1%'...", storeLink)); // FIXME: broken - - try { - auto json = nlohmann::json::parse(readFile(storeLink)); - - assert(json["name"] == name && json["rev"] == gitInfo.rev); - - gitInfo.storePath = json["storePath"]; - - if (store->isValidPath(store->parseStorePath(gitInfo.storePath))) { - gitInfo.revCount = json["revCount"]; - return gitInfo; - } - - } catch (SysError & e) { - if (e.errNo != ENOENT) throw; - } - - auto source = sinkToSource([&](Sink & sink) { - RunOptions gitOptions("git", { "-C", cacheDir, "archive", gitInfo.rev }); - gitOptions.standardOut = &sink; - runProgram2(gitOptions); - }); - - Path tmpDir = createTempDir(); - AutoDelete delTmpDir(tmpDir, true); - - unpackTarfile(*source, tmpDir); - - gitInfo.storePath = store->printStorePath(store->addToStore(name, tmpDir)); - - gitInfo.revCount = std::stoull(runProgram("git", true, { "-C", cacheDir, "rev-list", "--count", gitInfo.rev })); - - nlohmann::json json; - json["storePath"] = gitInfo.storePath; - json["uri"] = uri; - json["name"] = name; - json["rev"] = gitInfo.rev; - json["revCount"] = gitInfo.revCount; - - writeFile(storeLink, json.dump()); - - return gitInfo; -} - static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Value & v) { std::string url; std::optional ref; - std::string rev; + std::optional rev; std::string name = "source"; PathSet context; @@ -213,7 +28,7 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va else if (n == "ref") ref = state.forceStringNoCtx(*attr.value, *attr.pos); else if (n == "rev") - rev = state.forceStringNoCtx(*attr.value, *attr.pos); + rev = Hash(state.forceStringNoCtx(*attr.value, *attr.pos), htSHA1); else if (n == "name") name = state.forceStringNoCtx(*attr.value, *attr.pos); else @@ -230,17 +45,35 @@ static void prim_fetchGit(EvalState & state, const Pos & pos, Value * * args, Va // whitelist. Ah well. state.checkURI(url); - auto gitInfo = exportGit(state.store, url, ref, rev, name); + if (evalSettings.pureEval && !rev) + throw Error("in pure evaluation mode, 'fetchGit' requires a Git revision"); + + auto parsedUrl = parseURL( + url.find("://") != std::string::npos + ? "git+" + url + : "git+file://" + url); + if (ref) parsedUrl.query.insert_or_assign("ref", *ref); + if (rev) parsedUrl.query.insert_or_assign("rev", rev->gitRev()); + // FIXME: use name + auto input = fetchers::inputFromURL(parsedUrl); + + auto [tree, input2] = input->fetchTree(state.store); state.mkAttrs(v, 8); - mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, PathSet({gitInfo.storePath})); - mkString(*state.allocAttr(v, state.symbols.create("rev")), gitInfo.rev); - mkString(*state.allocAttr(v, state.symbols.create("shortRev")), gitInfo.shortRev); - mkInt(*state.allocAttr(v, state.symbols.create("revCount")), gitInfo.revCount); + auto storePath = state.store->printStorePath(tree.storePath); + mkString(*state.allocAttr(v, state.sOutPath), storePath, PathSet({storePath})); + // Backward compatibility: set 'rev' to + // 0000000000000000000000000000000000000000 for a dirty tree. + auto rev2 = input2->getRev().value_or(Hash(htSHA1)); + mkString(*state.allocAttr(v, state.symbols.create("rev")), rev2.gitRev()); + mkString(*state.allocAttr(v, state.symbols.create("shortRev")), rev2.gitShortRev()); + // Backward compatibility: set 'revCount' to 0 for a dirty tree. + mkInt(*state.allocAttr(v, state.symbols.create("revCount")), + tree.info.revCount.value_or(0)); v.attrs->sort(); if (state.allowedPaths) - state.allowedPaths->insert(state.store->toRealPath(gitInfo.storePath)); + state.allowedPaths->insert(tree.actualPath); } static RegisterPrimOp r("fetchGit", 1, prim_fetchGit); diff --git a/src/libexpr/primops/fetchMercurial.cc b/src/libexpr/primops/fetchMercurial.cc index db274fa4f..f18351646 100644 --- a/src/libexpr/primops/fetchMercurial.cc +++ b/src/libexpr/primops/fetchMercurial.cc @@ -1,174 +1,18 @@ #include "primops.hh" #include "eval-inline.hh" -#include "download.hh" #include "store-api.hh" -#include "pathlocks.hh" - -#include +#include "fetchers.hh" +#include "url.hh" #include -#include - -using namespace std::string_literals; - namespace nix { -struct HgInfo -{ - Path storePath; - std::string branch; - std::string rev; - uint64_t revCount = 0; -}; - -std::regex commitHashRegex("^[0-9a-fA-F]{40}$"); - -HgInfo exportMercurial(ref store, const std::string & uri, - std::string rev, const std::string & name) -{ - if (evalSettings.pureEval && rev == "") - throw Error("in pure evaluation mode, 'fetchMercurial' requires a Mercurial revision"); - - if (rev == "" && hasPrefix(uri, "/") && pathExists(uri + "/.hg")) { - - bool clean = runProgram("hg", true, { "status", "-R", uri, "--modified", "--added", "--removed" }) == ""; - - if (!clean) { - - /* This is an unclean working tree. So copy all tracked - files. */ - - printTalkative("copying unclean Mercurial working tree '%s'", uri); - - HgInfo hgInfo; - hgInfo.rev = "0000000000000000000000000000000000000000"; - hgInfo.branch = chomp(runProgram("hg", true, { "branch", "-R", uri })); - - auto files = tokenizeString>( - runProgram("hg", true, { "status", "-R", uri, "--clean", "--modified", "--added", "--no-status", "--print0" }), "\0"s); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, uri)); - std::string file(p, uri.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - hgInfo.storePath = store->printStorePath(store->addToStore("source", uri, true, htSHA256, filter)); - - return hgInfo; - } - } - - if (rev == "") rev = "default"; - - Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(htSHA256, uri).to_string(Base32, false)); - - Path stampFile = fmt("%s/.hg/%s.stamp", cacheDir, hashString(htSHA512, rev).to_string(Base32, false)); - - /* If we haven't pulled this repo less than ‘tarball-ttl’ seconds, - do so now. */ - time_t now = time(0); - struct stat st; - if (stat(stampFile.c_str(), &st) != 0 || - (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now) - { - /* Except that if this is a commit hash that we already have, - we don't have to pull again. */ - if (!(std::regex_match(rev, commitHashRegex) - && pathExists(cacheDir) - && runProgram( - RunOptions("hg", { "log", "-R", cacheDir, "-r", rev, "--template", "1" }) - .killStderr(true)).second == "1")) - { - Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Mercurial repository '%s'", uri)); - - if (pathExists(cacheDir)) { - try { - runProgram("hg", true, { "pull", "-R", cacheDir, "--", uri }); - } - catch (ExecError & e) { - string transJournal = cacheDir + "/.hg/store/journal"; - /* hg throws "abandoned transaction" error only if this file exists */ - if (pathExists(transJournal)) { - runProgram("hg", true, { "recover", "-R", cacheDir }); - runProgram("hg", true, { "pull", "-R", cacheDir, "--", uri }); - } else { - throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status))); - } - } - } else { - createDirs(dirOf(cacheDir)); - runProgram("hg", true, { "clone", "--noupdate", "--", uri, cacheDir }); - } - } - - writeFile(stampFile, ""); - } - - auto tokens = tokenizeString>( - runProgram("hg", true, { "log", "-R", cacheDir, "-r", rev, "--template", "{node} {rev} {branch}" })); - assert(tokens.size() == 3); - - HgInfo hgInfo; - hgInfo.rev = tokens[0]; - hgInfo.revCount = std::stoull(tokens[1]); - hgInfo.branch = tokens[2]; - - std::string storeLinkName = hashString(htSHA512, name + std::string("\0"s) + hgInfo.rev).to_string(Base32, false); - Path storeLink = fmt("%s/.hg/%s.link", cacheDir, storeLinkName); - - try { - auto json = nlohmann::json::parse(readFile(storeLink)); - - assert(json["name"] == name && json["rev"] == hgInfo.rev); - - hgInfo.storePath = json["storePath"]; - - if (store->isValidPath(store->parseStorePath(hgInfo.storePath))) { - printTalkative("using cached Mercurial store path '%s'", hgInfo.storePath); - return hgInfo; - } - - } catch (SysError & e) { - if (e.errNo != ENOENT) throw; - } - - Path tmpDir = createTempDir(); - AutoDelete delTmpDir(tmpDir, true); - - runProgram("hg", true, { "archive", "-R", cacheDir, "-r", rev, tmpDir }); - - deletePath(tmpDir + "/.hg_archival.txt"); - - hgInfo.storePath = store->printStorePath(store->addToStore(name, tmpDir)); - - nlohmann::json json; - json["storePath"] = hgInfo.storePath; - json["uri"] = uri; - json["name"] = name; - json["branch"] = hgInfo.branch; - json["rev"] = hgInfo.rev; - json["revCount"] = hgInfo.revCount; - - writeFile(storeLink, json.dump()); - - return hgInfo; -} - static void prim_fetchMercurial(EvalState & state, const Pos & pos, Value * * args, Value & v) { std::string url; - std::string rev; + std::optional rev; + std::optional ref; std::string name = "source"; PathSet context; @@ -182,8 +26,15 @@ static void prim_fetchMercurial(EvalState & state, const Pos & pos, Value * * ar string n(attr.name); if (n == "url") url = state.coerceToString(*attr.pos, *attr.value, context, false, false); - else if (n == "rev") - rev = state.forceStringNoCtx(*attr.value, *attr.pos); + else if (n == "rev") { + // Ugly: unlike fetchGit, here the "rev" attribute can + // be both a revision or a branch/tag name. + auto value = state.forceStringNoCtx(*attr.value, *attr.pos); + if (std::regex_match(value, revRegex)) + rev = Hash(value, htSHA1); + else + ref = value; + } else if (n == "name") name = state.forceStringNoCtx(*attr.value, *attr.pos); else @@ -200,18 +51,36 @@ static void prim_fetchMercurial(EvalState & state, const Pos & pos, Value * * ar // whitelist. Ah well. state.checkURI(url); - auto hgInfo = exportMercurial(state.store, url, rev, name); + if (evalSettings.pureEval && !rev) + throw Error("in pure evaluation mode, 'fetchMercurial' requires a Mercurial revision"); + + auto parsedUrl = parseURL( + url.find("://") != std::string::npos + ? "hg+" + url + : "hg+file://" + url); + if (rev) parsedUrl.query.insert_or_assign("rev", rev->gitRev()); + if (ref) parsedUrl.query.insert_or_assign("ref", *ref); + // FIXME: use name + auto input = fetchers::inputFromURL(parsedUrl); + + auto [tree, input2] = input->fetchTree(state.store); state.mkAttrs(v, 8); - mkString(*state.allocAttr(v, state.sOutPath), hgInfo.storePath, PathSet({hgInfo.storePath})); - mkString(*state.allocAttr(v, state.symbols.create("branch")), hgInfo.branch); - mkString(*state.allocAttr(v, state.symbols.create("rev")), hgInfo.rev); - mkString(*state.allocAttr(v, state.symbols.create("shortRev")), std::string(hgInfo.rev, 0, 12)); - mkInt(*state.allocAttr(v, state.symbols.create("revCount")), hgInfo.revCount); + auto storePath = state.store->printStorePath(tree.storePath); + mkString(*state.allocAttr(v, state.sOutPath), storePath, PathSet({storePath})); + if (input2->getRef()) + mkString(*state.allocAttr(v, state.symbols.create("branch")), *input2->getRef()); + // Backward compatibility: set 'rev' to + // 0000000000000000000000000000000000000000 for a dirty tree. + auto rev2 = input2->getRev().value_or(Hash(htSHA1)); + mkString(*state.allocAttr(v, state.symbols.create("rev")), rev2.gitRev()); + mkString(*state.allocAttr(v, state.symbols.create("shortRev")), std::string(rev2.gitRev(), 0, 12)); + if (tree.info.revCount) + mkInt(*state.allocAttr(v, state.symbols.create("revCount")), *tree.info.revCount); v.attrs->sort(); if (state.allowedPaths) - state.allowedPaths->insert(state.store->toRealPath(hgInfo.storePath)); + state.allowedPaths->insert(tree.actualPath); } static RegisterPrimOp r("fetchMercurial", 1, prim_fetchMercurial); diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc new file mode 100644 index 000000000..9586f71ed --- /dev/null +++ b/src/libexpr/primops/fetchTree.cc @@ -0,0 +1,165 @@ +#include "primops.hh" +#include "eval-inline.hh" +#include "store-api.hh" +#include "fetchers.hh" +#include "download.hh" + +#include +#include + +namespace nix { + +void emitTreeAttrs( + EvalState & state, + const fetchers::Tree & tree, + std::shared_ptr input, + Value & v) +{ + state.mkAttrs(v, 8); + + auto storePath = state.store->printStorePath(tree.storePath); + + mkString(*state.allocAttr(v, state.sOutPath), storePath, PathSet({storePath})); + + assert(tree.info.narHash); + mkString(*state.allocAttr(v, state.symbols.create("narHash")), + tree.info.narHash.to_string(SRI)); + + if (input->getRev()) { + mkString(*state.allocAttr(v, state.symbols.create("rev")), input->getRev()->gitRev()); + mkString(*state.allocAttr(v, state.symbols.create("shortRev")), input->getRev()->gitShortRev()); + } + + if (tree.info.revCount) + mkInt(*state.allocAttr(v, state.symbols.create("revCount")), *tree.info.revCount); + + if (tree.info.lastModified) + mkString(*state.allocAttr(v, state.symbols.create("lastModified")), + fmt("%s", std::put_time(std::gmtime(&*tree.info.lastModified), "%Y%m%d%H%M%S"))); + + v.attrs->sort(); +} + +static void prim_fetchTree(EvalState & state, const Pos & pos, Value * * args, Value & v) +{ + settings.requireExperimentalFeature("flakes"); + + std::shared_ptr input; + PathSet context; + + state.forceValue(*args[0]); + + if (args[0]->type == tAttrs) { + state.forceAttrs(*args[0], pos); + + fetchers::Attrs attrs; + + for (auto & attr : *args[0]->attrs) { + state.forceValue(*attr.value); + if (attr.value->type == tString) + attrs.emplace(attr.name, attr.value->string.s); + else if (attr.value->type == tBool) + attrs.emplace(attr.name, attr.value->boolean); + else + throw TypeError("fetchTree argument '%s' is %s while a string or Boolean is expected", + attr.name, showType(*attr.value)); + } + + if (!attrs.count("type")) + throw Error("attribute 'type' is missing in call to 'fetchTree', at %s", pos); + + input = fetchers::inputFromAttrs(attrs); + } else + input = fetchers::inputFromURL(state.coerceToString(pos, *args[0], context, false, false)); + + if (evalSettings.pureEval && !input->isImmutable()) + throw Error("in pure evaluation mode, 'fetchTree' requires an immutable input"); + + // FIXME: use fetchOrSubstituteTree + auto [tree, input2] = input->fetchTree(state.store); + + if (state.allowedPaths) + state.allowedPaths->insert(tree.actualPath); + + emitTreeAttrs(state, tree, input2, v); +} + +static RegisterPrimOp r("fetchTree", 1, prim_fetchTree); + +static void fetch(EvalState & state, const Pos & pos, Value * * args, Value & v, + const string & who, bool unpack, std::string name) +{ + std::optional url; + std::optional expectedHash; + + state.forceValue(*args[0]); + + if (args[0]->type == tAttrs) { + + state.forceAttrs(*args[0], pos); + + for (auto & attr : *args[0]->attrs) { + string n(attr.name); + if (n == "url") + url = state.forceStringNoCtx(*attr.value, *attr.pos); + else if (n == "sha256") + expectedHash = Hash(state.forceStringNoCtx(*attr.value, *attr.pos), htSHA256); + else if (n == "name") + name = state.forceStringNoCtx(*attr.value, *attr.pos); + else + throw EvalError("unsupported argument '%s' to '%s', at %s", + attr.name, who, attr.pos); + } + + if (!url) + throw EvalError("'url' argument required, at %s", pos); + + } else + url = state.forceStringNoCtx(*args[0], pos); + + url = resolveUri(*url); + + state.checkURI(*url); + + if (name == "") + name = baseNameOf(*url); + + if (evalSettings.pureEval && !expectedHash) + throw Error("in pure evaluation mode, '%s' requires a 'sha256' argument", who); + + auto storePath = + unpack + ? fetchers::downloadTarball(state.store, *url, name, (bool) expectedHash).storePath + : fetchers::downloadFile(state.store, *url, name, (bool) expectedHash).storePath; + + auto path = state.store->toRealPath(storePath); + + if (expectedHash) { + auto hash = unpack + ? state.store->queryPathInfo(storePath)->narHash + : hashFile(htSHA256, path); + if (hash != *expectedHash) + throw Error((unsigned int) 102, "hash mismatch in file downloaded from '%s':\n wanted: %s\n got: %s", + *url, expectedHash->to_string(), hash.to_string()); + } + + if (state.allowedPaths) + state.allowedPaths->insert(path); + + mkString(v, path, PathSet({path})); +} + +static void prim_fetchurl(EvalState & state, const Pos & pos, Value * * args, Value & v) +{ + fetch(state, pos, args, v, "fetchurl", false, ""); +} + +static void prim_fetchTarball(EvalState & state, const Pos & pos, Value * * args, Value & v) +{ + fetch(state, pos, args, v, "fetchTarball", true, "source"); +} + +static RegisterPrimOp r2("__fetchurl", 1, prim_fetchurl); +static RegisterPrimOp r3("fetchTarball", 1, prim_fetchTarball); + +} diff --git a/src/libfetchers/attrs.cc b/src/libfetchers/attrs.cc new file mode 100644 index 000000000..40c02de42 --- /dev/null +++ b/src/libfetchers/attrs.cc @@ -0,0 +1,92 @@ +#include "attrs.hh" +#include "fetchers.hh" + +#include + +namespace nix::fetchers { + +Attrs jsonToAttrs(const nlohmann::json & json) +{ + Attrs attrs; + + for (auto & i : json.items()) { + if (i.value().is_number()) + attrs.emplace(i.key(), i.value().get()); + else if (i.value().is_string()) + attrs.emplace(i.key(), i.value().get()); + else if (i.value().is_boolean()) + attrs.emplace(i.key(), i.value().get()); + else + throw Error("unsupported input attribute type in lock file"); + } + + return attrs; +} + +nlohmann::json attrsToJson(const Attrs & attrs) +{ + nlohmann::json json; + for (auto & attr : attrs) { + if (auto v = std::get_if(&attr.second)) { + json[attr.first] = *v; + } else if (auto v = std::get_if(&attr.second)) { + json[attr.first] = *v; + } else if (auto v = std::get_if>(&attr.second)) { + json[attr.first] = v->t; + } else abort(); + } + return json; +} + +std::optional maybeGetStrAttr(const Attrs & attrs, const std::string & name) +{ + auto i = attrs.find(name); + if (i == attrs.end()) return {}; + if (auto v = std::get_if(&i->second)) + return *v; + throw Error("input attribute '%s' is not a string %s", name, attrsToJson(attrs).dump()); +} + +std::string getStrAttr(const Attrs & attrs, const std::string & name) +{ + auto s = maybeGetStrAttr(attrs, name); + if (!s) + throw Error("input attribute '%s' is missing", name); + return *s; +} + +std::optional maybeGetIntAttr(const Attrs & attrs, const std::string & name) +{ + auto i = attrs.find(name); + if (i == attrs.end()) return {}; + if (auto v = std::get_if(&i->second)) + return *v; + throw Error("input attribute '%s' is not an integer", name); +} + +int64_t getIntAttr(const Attrs & attrs, const std::string & name) +{ + auto s = maybeGetIntAttr(attrs, name); + if (!s) + throw Error("input attribute '%s' is missing", name); + return *s; +} + +std::optional maybeGetBoolAttr(const Attrs & attrs, const std::string & name) +{ + auto i = attrs.find(name); + if (i == attrs.end()) return {}; + if (auto v = std::get_if(&i->second)) + return *v; + throw Error("input attribute '%s' is not a Boolean", name); +} + +bool getBoolAttr(const Attrs & attrs, const std::string & name) +{ + auto s = maybeGetBoolAttr(attrs, name); + if (!s) + throw Error("input attribute '%s' is missing", name); + return *s; +} + +} diff --git a/src/libfetchers/attrs.hh b/src/libfetchers/attrs.hh new file mode 100644 index 000000000..2c9e772d2 --- /dev/null +++ b/src/libfetchers/attrs.hh @@ -0,0 +1,37 @@ +#pragma once + +#include "types.hh" + +#include + +#include + +namespace nix::fetchers { + +/* Wrap bools to prevent string literals (i.e. 'char *') from being + cast to a bool in Attr. */ +template +struct Explicit { + T t; +}; + +typedef std::variant> Attr; +typedef std::map Attrs; + +Attrs jsonToAttrs(const nlohmann::json & json); + +nlohmann::json attrsToJson(const Attrs & attrs); + +std::optional maybeGetStrAttr(const Attrs & attrs, const std::string & name); + +std::string getStrAttr(const Attrs & attrs, const std::string & name); + +std::optional maybeGetIntAttr(const Attrs & attrs, const std::string & name); + +int64_t getIntAttr(const Attrs & attrs, const std::string & name); + +std::optional maybeGetBoolAttr(const Attrs & attrs, const std::string & name); + +bool getBoolAttr(const Attrs & attrs, const std::string & name); + +} diff --git a/src/libfetchers/cache.cc b/src/libfetchers/cache.cc new file mode 100644 index 000000000..e1c7f3dee --- /dev/null +++ b/src/libfetchers/cache.cc @@ -0,0 +1,121 @@ +#include "cache.hh" +#include "sqlite.hh" +#include "sync.hh" +#include "store-api.hh" + +#include + +namespace nix::fetchers { + +static const char * schema = R"sql( + +create table if not exists Cache ( + input text not null, + info text not null, + path text not null, + immutable integer not null, + timestamp integer not null, + primary key (input) +); +)sql"; + +struct CacheImpl : Cache +{ + struct State + { + SQLite db; + SQLiteStmt add, lookup; + }; + + Sync _state; + + CacheImpl() + { + auto state(_state.lock()); + + auto dbPath = getCacheDir() + "/nix/fetcher-cache-v1.sqlite"; + createDirs(dirOf(dbPath)); + + state->db = SQLite(dbPath); + state->db.isCache(); + state->db.exec(schema); + + state->add.create(state->db, + "insert or replace into Cache(input, info, path, immutable, timestamp) values (?, ?, ?, ?, ?)"); + + state->lookup.create(state->db, + "select info, path, immutable, timestamp from Cache where input = ?"); + } + + void add( + ref store, + const Attrs & inAttrs, + const Attrs & infoAttrs, + const StorePath & storePath, + bool immutable) override + { + _state.lock()->add.use() + (attrsToJson(inAttrs).dump()) + (attrsToJson(infoAttrs).dump()) + (store->printStorePath(storePath)) + (immutable) + (time(0)).exec(); + } + + std::optional> lookup( + ref store, + const Attrs & inAttrs) override + { + if (auto res = lookupExpired(store, inAttrs)) { + if (!res->expired) + return std::make_pair(std::move(res->infoAttrs), std::move(res->storePath)); + debug("ignoring expired cache entry '%s'", + attrsToJson(inAttrs).dump()); + } + return {}; + } + + std::optional lookupExpired( + ref store, + const Attrs & inAttrs) override + { + auto state(_state.lock()); + + auto inAttrsJson = attrsToJson(inAttrs).dump(); + + auto stmt(state->lookup.use()(inAttrsJson)); + if (!stmt.next()) { + debug("did not find cache entry for '%s'", inAttrsJson); + return {}; + } + + auto infoJson = stmt.getStr(0); + auto storePath = store->parseStorePath(stmt.getStr(1)); + auto immutable = stmt.getInt(2) != 0; + auto timestamp = stmt.getInt(3); + + store->addTempRoot(storePath); + if (!store->isValidPath(storePath)) { + // FIXME: we could try to substitute 'storePath'. + debug("ignoring disappeared cache entry '%s'", inAttrsJson); + return {}; + } + + debug("using cache entry '%s' -> '%s', '%s'", + inAttrsJson, infoJson, store->printStorePath(storePath)); + + return Result { + .expired = !immutable && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)), + .infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJson)), + .storePath = std::move(storePath) + }; + } +}; + +ref getCache() +{ + static auto cache = std::make_shared(); + return ref(cache); +} + +} diff --git a/src/libfetchers/cache.hh b/src/libfetchers/cache.hh new file mode 100644 index 000000000..d76ab1233 --- /dev/null +++ b/src/libfetchers/cache.hh @@ -0,0 +1,34 @@ +#pragma once + +#include "fetchers.hh" + +namespace nix::fetchers { + +struct Cache +{ + virtual void add( + ref store, + const Attrs & inAttrs, + const Attrs & infoAttrs, + const StorePath & storePath, + bool immutable) = 0; + + virtual std::optional> lookup( + ref store, + const Attrs & inAttrs) = 0; + + struct Result + { + bool expired = false; + Attrs infoAttrs; + StorePath storePath; + }; + + virtual std::optional lookupExpired( + ref store, + const Attrs & inAttrs) = 0; +}; + +ref getCache(); + +} diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc new file mode 100644 index 000000000..25d3da431 --- /dev/null +++ b/src/libfetchers/fetchers.cc @@ -0,0 +1,73 @@ +#include "fetchers.hh" +#include "store-api.hh" + +#include + +namespace nix::fetchers { + +std::unique_ptr>> inputSchemes = nullptr; + +void registerInputScheme(std::unique_ptr && inputScheme) +{ + if (!inputSchemes) inputSchemes = std::make_unique>>(); + inputSchemes->push_back(std::move(inputScheme)); +} + +std::unique_ptr inputFromURL(const ParsedURL & url) +{ + for (auto & inputScheme : *inputSchemes) { + auto res = inputScheme->inputFromURL(url); + if (res) return res; + } + throw Error("input '%s' is unsupported", url.url); +} + +std::unique_ptr inputFromURL(const std::string & url) +{ + return inputFromURL(parseURL(url)); +} + +std::unique_ptr inputFromAttrs(const Attrs & attrs) +{ + for (auto & inputScheme : *inputSchemes) { + auto res = inputScheme->inputFromAttrs(attrs); + if (res) { + if (auto narHash = maybeGetStrAttr(attrs, "narHash")) + // FIXME: require SRI hash. + res->narHash = Hash(*narHash); + return res; + } + } + throw Error("input '%s' is unsupported", attrsToJson(attrs)); +} + +Attrs Input::toAttrs() const +{ + auto attrs = toAttrsInternal(); + if (narHash) + attrs.emplace("narHash", narHash->to_string(SRI)); + attrs.emplace("type", type()); + return attrs; +} + +std::pair> Input::fetchTree(ref store) const +{ + auto [tree, input] = fetchTreeInternal(store); + + if (tree.actualPath == "") + tree.actualPath = store->toRealPath(tree.storePath); + + if (!tree.info.narHash) + tree.info.narHash = store->queryPathInfo(tree.storePath)->narHash; + + if (input->narHash) + assert(input->narHash == tree.info.narHash); + + if (narHash && narHash != input->narHash) + throw Error("NAR hash mismatch in input '%s' (%s), expected '%s', got '%s'", + to_string(), tree.actualPath, narHash->to_string(SRI), input->narHash->to_string(SRI)); + + return {std::move(tree), input}; +} + +} diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh new file mode 100644 index 000000000..59a58ae67 --- /dev/null +++ b/src/libfetchers/fetchers.hh @@ -0,0 +1,103 @@ +#pragma once + +#include "types.hh" +#include "hash.hh" +#include "path.hh" +#include "tree-info.hh" +#include "attrs.hh" +#include "url.hh" + +#include + +namespace nix { class Store; } + +namespace nix::fetchers { + +struct Input; + +struct Tree +{ + Path actualPath; + StorePath storePath; + TreeInfo info; +}; + +struct Input : std::enable_shared_from_this +{ + std::optional narHash; // FIXME: implement + + virtual std::string type() const = 0; + + virtual ~Input() { } + + virtual bool operator ==(const Input & other) const { return false; } + + /* Check whether this is a "direct" input, that is, not + one that goes through a registry. */ + virtual bool isDirect() const { return true; } + + /* Check whether this is an "immutable" input, that is, + one that contains a commit hash or content hash. */ + virtual bool isImmutable() const { return (bool) narHash; } + + virtual bool contains(const Input & other) const { return false; } + + virtual std::optional getRef() const { return {}; } + + virtual std::optional getRev() const { return {}; } + + virtual ParsedURL toURL() const = 0; + + std::string to_string() const + { + return toURL().to_string(); + } + + Attrs toAttrs() const; + + std::pair> fetchTree(ref store) const; + +private: + + virtual std::pair> fetchTreeInternal(ref store) const = 0; + + virtual Attrs toAttrsInternal() const = 0; +}; + +struct InputScheme +{ + virtual ~InputScheme() { } + + virtual std::unique_ptr inputFromURL(const ParsedURL & url) = 0; + + virtual std::unique_ptr inputFromAttrs(const Attrs & attrs) = 0; +}; + +std::unique_ptr inputFromURL(const ParsedURL & url); + +std::unique_ptr inputFromURL(const std::string & url); + +std::unique_ptr inputFromAttrs(const Attrs & attrs); + +void registerInputScheme(std::unique_ptr && fetcher); + +struct DownloadFileResult +{ + StorePath storePath; + std::string etag; + std::string effectiveUrl; +}; + +DownloadFileResult downloadFile( + ref store, + const std::string & url, + const std::string & name, + bool immutable); + +Tree downloadTarball( + ref store, + const std::string & url, + const std::string & name, + bool immutable); + +} diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc new file mode 100644 index 000000000..cee0713f4 --- /dev/null +++ b/src/libfetchers/git.cc @@ -0,0 +1,401 @@ +#include "fetchers.hh" +#include "cache.hh" +#include "globals.hh" +#include "tarfile.hh" +#include "store-api.hh" + +#include + +using namespace std::string_literals; + +namespace nix::fetchers { + +static std::string readHead(const Path & path) +{ + return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" })); +} + +struct GitInput : Input +{ + ParsedURL url; + std::optional ref; + std::optional rev; + bool shallow = false; + + GitInput(const ParsedURL & url) : url(url) + { } + + std::string type() const override { return "git"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast(&other); + return + other2 + && url == other2->url + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev; + } + + std::optional getRef() const override { return ref; } + + std::optional getRev() const override { return rev; } + + ParsedURL toURL() const override + { + ParsedURL url2(url); + if (url2.scheme != "git") url2.scheme = "git+" + url2.scheme; + if (rev) url2.query.insert_or_assign("rev", rev->gitRev()); + if (ref) url2.query.insert_or_assign("ref", *ref); + if (shallow) url2.query.insert_or_assign("shallow", "1"); + return url2; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("url", url.to_string()); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + if (shallow) + attrs.emplace("shallow", true); + return attrs; + } + + std::pair getActualUrl() const + { + // Don't clone file:// URIs (but otherwise treat them the + // same as remote URIs, i.e. don't use the working tree or + // HEAD). + static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing + bool isLocal = url.scheme == "file" && !forceHttp; + return {isLocal, isLocal ? url.path : url.base}; + } + + std::pair> fetchTreeInternal(nix::ref store) const override + { + auto name = "source"; + + auto input = std::make_shared(*this); + + assert(!rev || rev->type == htSHA1); + + auto cacheType = shallow ? "git-shallow" : "git"; + + auto getImmutableAttrs = [&]() + { + return Attrs({ + {"type", cacheType}, + {"name", name}, + {"rev", input->rev->gitRev()}, + }); + }; + + auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath) + -> std::pair> + { + assert(input->rev); + assert(!rev || rev == input->rev); + return { + Tree { + .actualPath = store->toRealPath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + .revCount = shallow ? std::nullopt : std::optional(getIntAttr(infoAttrs, "revCount")), + .lastModified = getIntAttr(infoAttrs, "lastModified"), + }, + }, + input + }; + }; + + if (rev) { + if (auto res = getCache()->lookup(store, getImmutableAttrs())) + return makeResult(res->first, std::move(res->second)); + } + + auto [isLocal, actualUrl_] = getActualUrl(); + auto actualUrl = actualUrl_; // work around clang bug + + // If this is a local directory and no ref or revision is + // given, then allow the use of an unclean working tree. + if (!input->ref && !input->rev && isLocal) { + bool clean = false; + + /* Check whether this repo has any commits. There are + probably better ways to do this. */ + auto gitDir = actualUrl + "/.git"; + auto commonGitDir = chomp(runProgram( + "git", + true, + { "-C", actualUrl, "rev-parse", "--git-common-dir" } + )); + if (commonGitDir != ".git") + gitDir = commonGitDir; + + bool haveCommits = !readDirectory(gitDir + "/refs/heads").empty(); + + try { + if (haveCommits) { + runProgram("git", true, { "-C", actualUrl, "diff-index", "--quiet", "HEAD", "--" }); + clean = true; + } + } catch (ExecError & e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; + } + + if (!clean) { + + /* This is an unclean working tree. So copy all tracked files. */ + + if (!settings.allowDirty) + throw Error("Git tree '%s' is dirty", actualUrl); + + if (settings.warnDirty) + warn("Git tree '%s' is dirty", actualUrl); + + auto files = tokenizeString>( + runProgram("git", true, { "-C", actualUrl, "ls-files", "-z" }), "\0"s); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualUrl)); + std::string file(p, actualUrl.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore("source", actualUrl, true, htSHA256, filter); + + auto tree = Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + // FIXME: maybe we should use the timestamp of the last + // modified dirty file? + .lastModified = haveCommits ? std::stoull(runProgram("git", true, { "-C", actualUrl, "log", "-1", "--format=%ct", "HEAD" })) : 0, + } + }; + + return {std::move(tree), input}; + } + } + + if (!input->ref) input->ref = isLocal ? readHead(actualUrl) : "master"; + + Attrs mutableAttrs({ + {"type", cacheType}, + {"name", name}, + {"url", actualUrl}, + {"ref", *input->ref}, + }); + + Path repoDir; + + if (isLocal) { + + if (!input->rev) + input->rev = Hash(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input->ref })), htSHA1); + + repoDir = actualUrl; + + } else { + + if (auto res = getCache()->lookup(store, mutableAttrs)) { + auto rev2 = Hash(getStrAttr(res->first, "rev"), htSHA1); + if (!rev || rev == rev2) { + input->rev = rev2; + return makeResult(res->first, std::move(res->second)); + } + } + + Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); + repoDir = cacheDir; + + if (!pathExists(cacheDir)) { + createDirs(dirOf(cacheDir)); + runProgram("git", true, { "init", "--bare", repoDir }); + } + + Path localRefFile = + input->ref->compare(0, 5, "refs/") == 0 + ? cacheDir + "/" + *input->ref + : cacheDir + "/refs/heads/" + *input->ref; + + bool doFetch; + time_t now = time(0); + + /* If a rev was specified, we need to fetch if it's not in the + repo. */ + if (input->rev) { + try { + runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input->rev->gitRev() }); + doFetch = false; + } catch (ExecError & e) { + if (WIFEXITED(e.status)) { + doFetch = true; + } else { + throw; + } + } + } else { + /* If the local ref is older than ‘tarball-ttl’ seconds, do a + git fetch to update the local ref to the remote ref. */ + struct stat st; + doFetch = stat(localRefFile.c_str(), &st) != 0 || + (uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now; + } + + if (doFetch) { + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", actualUrl)); + + // FIXME: git stderr messes up our progress indicator, so + // we're using --quiet for now. Should process its stderr. + try { + runProgram("git", true, { "-C", repoDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", *input->ref, *input->ref) }); + } catch (Error & e) { + if (!pathExists(localRefFile)) throw; + warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); + } + + struct timeval times[2]; + times[0].tv_sec = now; + times[0].tv_usec = 0; + times[1].tv_sec = now; + times[1].tv_usec = 0; + + utimes(localRefFile.c_str(), times); + } + + if (!input->rev) + input->rev = Hash(chomp(readFile(localRefFile)), htSHA1); + } + + bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true"; + + if (isShallow && !shallow) + throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", actualUrl); + + // FIXME: check whether rev is an ancestor of ref. + + printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl); + + /* Now that we know the ref, check again whether we have it in + the store. */ + if (auto res = getCache()->lookup(store, getImmutableAttrs())) + return makeResult(res->first, std::move(res->second)); + + // FIXME: should pipe this, or find some better way to extract a + // revision. + auto source = sinkToSource([&](Sink & sink) { + RunOptions gitOptions("git", { "-C", repoDir, "archive", input->rev->gitRev() }); + gitOptions.standardOut = &sink; + runProgram2(gitOptions); + }); + + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + + unpackTarfile(*source, tmpDir); + + auto storePath = store->addToStore(name, tmpDir); + + auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() })); + + Attrs infoAttrs({ + {"rev", input->rev->gitRev()}, + {"lastModified", lastModified}, + }); + + if (!shallow) + infoAttrs.insert_or_assign("revCount", + std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() }))); + + if (!this->rev) + getCache()->add( + store, + mutableAttrs, + infoAttrs, + storePath, + false); + + getCache()->add( + store, + getImmutableAttrs(), + infoAttrs, + storePath, + true); + + return makeResult(infoAttrs, std::move(storePath)); + } +}; + +struct GitInputScheme : InputScheme +{ + std::unique_ptr inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "git" && + url.scheme != "git+http" && + url.scheme != "git+https" && + url.scheme != "git+ssh" && + url.scheme != "git+file") return nullptr; + + auto url2(url); + if (hasPrefix(url2.scheme, "git+")) url2.scheme = std::string(url2.scheme, 4); + url2.query.clear(); + + Attrs attrs; + attrs.emplace("type", "git"); + + for (auto &[name, value] : url.query) { + if (name == "rev" || name == "ref") + attrs.emplace(name, value); + else + url2.query.emplace(name, value); + } + + attrs.emplace("url", url2.to_string()); + + return inputFromAttrs(attrs); + } + + std::unique_ptr inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "git") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow") + throw Error("unsupported Git input attribute '%s'", name); + + auto input = std::make_unique(parseURL(getStrAttr(attrs, "url"))); + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) + throw BadURL("invalid Git branch/tag name '%s'", *ref); + input->ref = *ref; + } + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + + input->shallow = maybeGetBoolAttr(attrs, "shallow").value_or(false); + + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique()); }); + +} diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc new file mode 100644 index 000000000..3fc95ff51 --- /dev/null +++ b/src/libfetchers/github.cc @@ -0,0 +1,195 @@ +#include "download.hh" +#include "cache.hh" +#include "fetchers.hh" +#include "globals.hh" +#include "store-api.hh" + +#include + +namespace nix::fetchers { + +std::regex ownerRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript); +std::regex repoRegex("[a-zA-Z][a-zA-Z0-9_-]*", std::regex::ECMAScript); + +struct GitHubInput : Input +{ + std::string owner; + std::string repo; + std::optional ref; + std::optional rev; + + std::string type() const override { return "github"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast(&other); + return + other2 + && owner == other2->owner + && repo == other2->repo + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev; + } + + std::optional getRef() const override { return ref; } + + std::optional getRev() const override { return rev; } + + ParsedURL toURL() const override + { + auto path = owner + "/" + repo; + assert(!(ref && rev)); + if (ref) path += "/" + *ref; + if (rev) path += "/" + rev->to_string(Base16, false); + return ParsedURL { + .scheme = "github", + .path = path, + }; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("owner", owner); + attrs.emplace("repo", repo); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + return attrs; + } + + std::pair> fetchTreeInternal(nix::ref store) const override + { + auto rev = this->rev; + auto ref = this->ref.value_or("master"); + + if (!rev) { + auto url = fmt("https://api.github.com/repos/%s/%s/commits/%s", + owner, repo, ref); + auto json = nlohmann::json::parse( + readFile( + store->toRealPath( + downloadFile(store, url, "source", false).storePath))); + rev = Hash(json["sha"], htSHA1); + debug("HEAD revision for '%s' is %s", url, rev->gitRev()); + } + + auto input = std::make_shared(*this); + input->ref = {}; + input->rev = *rev; + + Attrs immutableAttrs({ + {"type", "git-tarball"}, + {"rev", rev->gitRev()}, + }); + + if (auto res = getCache()->lookup(store, immutableAttrs)) { + return { + Tree{ + .actualPath = store->toRealPath(res->second), + .storePath = std::move(res->second), + .info = TreeInfo { + .lastModified = getIntAttr(res->first, "lastModified"), + }, + }, + input + }; + } + + // FIXME: use regular /archive URLs instead? api.github.com + // might have stricter rate limits. + + auto url = fmt("https://api.github.com/repos/%s/%s/tarball/%s", + owner, repo, rev->to_string(Base16, false)); + + std::string accessToken = settings.githubAccessToken.get(); + if (accessToken != "") + url += "?access_token=" + accessToken; + + auto tree = downloadTarball(store, url, "source", true); + + getCache()->add( + store, + immutableAttrs, + { + {"rev", rev->gitRev()}, + {"lastModified", *tree.info.lastModified} + }, + tree.storePath, + true); + + return {std::move(tree), input}; + } +}; + +struct GitHubInputScheme : InputScheme +{ + std::unique_ptr inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "github") return nullptr; + + auto path = tokenizeString>(url.path, "/"); + auto input = std::make_unique(); + + if (path.size() == 2) { + } else if (path.size() == 3) { + if (std::regex_match(path[2], revRegex)) + input->rev = Hash(path[2], htSHA1); + else if (std::regex_match(path[2], refRegex)) + input->ref = path[2]; + else + throw BadURL("in GitHub URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]); + } else + throw BadURL("GitHub URL '%s' is invalid", url.url); + + for (auto &[name, value] : url.query) { + if (name == "rev") { + if (input->rev) + throw BadURL("GitHub URL '%s' contains multiple commit hashes", url.url); + input->rev = Hash(value, htSHA1); + } + else if (name == "ref") { + if (!std::regex_match(value, refRegex)) + throw BadURL("GitHub URL '%s' contains an invalid branch/tag name", url.url); + if (input->ref) + throw BadURL("GitHub URL '%s' contains multiple branch/tag names", url.url); + input->ref = value; + } + } + + if (input->ref && input->rev) + throw BadURL("GitHub URL '%s' contains both a commit hash and a branch/tag name", url.url); + + input->owner = path[0]; + input->repo = path[1]; + + return input; + } + + std::unique_ptr inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "github") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev") + throw Error("unsupported GitHub input attribute '%s'", name); + + auto input = std::make_unique(); + input->owner = getStrAttr(attrs, "owner"); + input->repo = getStrAttr(attrs, "repo"); + input->ref = maybeGetStrAttr(attrs, "ref"); + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique()); }); + +} diff --git a/src/libfetchers/local.mk b/src/libfetchers/local.mk new file mode 100644 index 000000000..d7143d8a6 --- /dev/null +++ b/src/libfetchers/local.mk @@ -0,0 +1,11 @@ +libraries += libfetchers + +libfetchers_NAME = libnixfetchers + +libfetchers_DIR := $(d) + +libfetchers_SOURCES := $(wildcard $(d)/*.cc) + +libfetchers_CXXFLAGS += -I src/libutil -I src/libstore + +libfetchers_LIBS = libutil libstore libnixrust diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc new file mode 100644 index 000000000..790f7c753 --- /dev/null +++ b/src/libfetchers/mercurial.cc @@ -0,0 +1,303 @@ +#include "fetchers.hh" +#include "cache.hh" +#include "globals.hh" +#include "tarfile.hh" +#include "store-api.hh" + +#include + +using namespace std::string_literals; + +namespace nix::fetchers { + +struct MercurialInput : Input +{ + ParsedURL url; + std::optional ref; + std::optional rev; + + MercurialInput(const ParsedURL & url) : url(url) + { } + + std::string type() const override { return "hg"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast(&other); + return + other2 + && url == other2->url + && rev == other2->rev + && ref == other2->ref; + } + + bool isImmutable() const override + { + return (bool) rev; + } + + std::optional getRef() const override { return ref; } + + std::optional getRev() const override { return rev; } + + ParsedURL toURL() const override + { + ParsedURL url2(url); + url2.scheme = "hg+" + url2.scheme; + if (rev) url2.query.insert_or_assign("rev", rev->gitRev()); + if (ref) url2.query.insert_or_assign("ref", *ref); + return url; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("url", url.to_string()); + if (ref) + attrs.emplace("ref", *ref); + if (rev) + attrs.emplace("rev", rev->gitRev()); + return attrs; + } + + std::pair getActualUrl() const + { + bool isLocal = url.scheme == "file"; + return {isLocal, isLocal ? url.path : url.base}; + } + + std::pair> fetchTreeInternal(nix::ref store) const override + { + auto name = "source"; + + auto input = std::make_shared(*this); + + auto [isLocal, actualUrl_] = getActualUrl(); + auto actualUrl = actualUrl_; // work around clang bug + + // FIXME: return lastModified. + + // FIXME: don't clone local repositories. + + if (!input->ref && !input->rev && isLocal && pathExists(actualUrl + "/.hg")) { + + bool clean = runProgram("hg", true, { "status", "-R", actualUrl, "--modified", "--added", "--removed" }) == ""; + + if (!clean) { + + /* This is an unclean working tree. So copy all tracked + files. */ + + if (!settings.allowDirty) + throw Error("Mercurial tree '%s' is unclean", actualUrl); + + if (settings.warnDirty) + warn("Mercurial tree '%s' is unclean", actualUrl); + + input->ref = chomp(runProgram("hg", true, { "branch", "-R", actualUrl })); + + auto files = tokenizeString>( + runProgram("hg", true, { "status", "-R", actualUrl, "--clean", "--modified", "--added", "--no-status", "--print0" }), "\0"s); + + PathFilter filter = [&](const Path & p) -> bool { + assert(hasPrefix(p, actualUrl)); + std::string file(p, actualUrl.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + auto storePath = store->addToStore("source", actualUrl, true, htSHA256, filter); + + return {Tree { + .actualPath = store->printStorePath(storePath), + .storePath = std::move(storePath), + }, input}; + } + } + + if (!input->ref) input->ref = "default"; + + auto getImmutableAttrs = [&]() + { + return Attrs({ + {"type", "hg"}, + {"name", name}, + {"rev", input->rev->gitRev()}, + }); + }; + + auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath) + -> std::pair> + { + assert(input->rev); + assert(!rev || rev == input->rev); + return { + Tree{ + .actualPath = store->toRealPath(storePath), + .storePath = std::move(storePath), + .info = TreeInfo { + .revCount = getIntAttr(infoAttrs, "revCount"), + }, + }, + input + }; + }; + + if (input->rev) { + if (auto res = getCache()->lookup(store, getImmutableAttrs())) + return makeResult(res->first, std::move(res->second)); + } + + assert(input->rev || input->ref); + auto revOrRef = input->rev ? input->rev->gitRev() : *input->ref; + + Attrs mutableAttrs({ + {"type", "hg"}, + {"name", name}, + {"url", actualUrl}, + {"ref", *input->ref}, + }); + + if (auto res = getCache()->lookup(store, mutableAttrs)) { + auto rev2 = Hash(getStrAttr(res->first, "rev"), htSHA1); + if (!rev || rev == rev2) { + input->rev = rev2; + return makeResult(res->first, std::move(res->second)); + } + } + + Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(htSHA256, actualUrl).to_string(Base32, false)); + + /* If this is a commit hash that we already have, we don't + have to pull again. */ + if (!(input->rev + && pathExists(cacheDir) + && runProgram( + RunOptions("hg", { "log", "-R", cacheDir, "-r", input->rev->gitRev(), "--template", "1" }) + .killStderr(true)).second == "1")) + { + Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Mercurial repository '%s'", actualUrl)); + + if (pathExists(cacheDir)) { + try { + runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl }); + } + catch (ExecError & e) { + string transJournal = cacheDir + "/.hg/store/journal"; + /* hg throws "abandoned transaction" error only if this file exists */ + if (pathExists(transJournal)) { + runProgram("hg", true, { "recover", "-R", cacheDir }); + runProgram("hg", true, { "pull", "-R", cacheDir, "--", actualUrl }); + } else { + throw ExecError(e.status, fmt("'hg pull' %s", statusToString(e.status))); + } + } + } else { + createDirs(dirOf(cacheDir)); + runProgram("hg", true, { "clone", "--noupdate", "--", actualUrl, cacheDir }); + } + } + + auto tokens = tokenizeString>( + runProgram("hg", true, { "log", "-R", cacheDir, "-r", revOrRef, "--template", "{node} {rev} {branch}" })); + assert(tokens.size() == 3); + + input->rev = Hash(tokens[0], htSHA1); + auto revCount = std::stoull(tokens[1]); + input->ref = tokens[2]; + + if (auto res = getCache()->lookup(store, getImmutableAttrs())) + return makeResult(res->first, std::move(res->second)); + + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + + runProgram("hg", true, { "archive", "-R", cacheDir, "-r", input->rev->gitRev(), tmpDir }); + + deletePath(tmpDir + "/.hg_archival.txt"); + + auto storePath = store->addToStore(name, tmpDir); + + Attrs infoAttrs({ + {"rev", input->rev->gitRev()}, + {"revCount", (int64_t) revCount}, + }); + + if (!this->rev) + getCache()->add( + store, + mutableAttrs, + infoAttrs, + storePath, + false); + + getCache()->add( + store, + getImmutableAttrs(), + infoAttrs, + storePath, + true); + + return makeResult(infoAttrs, std::move(storePath)); + } +}; + +struct MercurialInputScheme : InputScheme +{ + std::unique_ptr inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "hg+http" && + url.scheme != "hg+https" && + url.scheme != "hg+ssh" && + url.scheme != "hg+file") return nullptr; + + auto url2(url); + url2.scheme = std::string(url2.scheme, 3); + url2.query.clear(); + + Attrs attrs; + attrs.emplace("type", "hg"); + + for (auto &[name, value] : url.query) { + if (name == "rev" || name == "ref") + attrs.emplace(name, value); + else + url2.query.emplace(name, value); + } + + attrs.emplace("url", url2.to_string()); + + return inputFromAttrs(attrs); + } + + std::unique_ptr inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "hg") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "url" && name != "ref" && name != "rev") + throw Error("unsupported Mercurial input attribute '%s'", name); + + auto input = std::make_unique(parseURL(getStrAttr(attrs, "url"))); + if (auto ref = maybeGetStrAttr(attrs, "ref")) { + if (!std::regex_match(*ref, refRegex)) + throw BadURL("invalid Mercurial branch/tag name '%s'", *ref); + input->ref = *ref; + } + if (auto rev = maybeGetStrAttr(attrs, "rev")) + input->rev = Hash(*rev, htSHA1); + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique()); }); + +} diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc new file mode 100644 index 000000000..b0a83001e --- /dev/null +++ b/src/libfetchers/tarball.cc @@ -0,0 +1,277 @@ +#include "fetchers.hh" +#include "cache.hh" +#include "download.hh" +#include "globals.hh" +#include "store-api.hh" +#include "archive.hh" +#include "tarfile.hh" + +namespace nix::fetchers { + +DownloadFileResult downloadFile( + ref store, + const std::string & url, + const std::string & name, + bool immutable) +{ + // FIXME: check store + + Attrs inAttrs({ + {"type", "file"}, + {"url", url}, + {"name", name}, + }); + + auto cached = getCache()->lookupExpired(store, inAttrs); + + auto useCached = [&]() -> DownloadFileResult + { + return { + .storePath = std::move(cached->storePath), + .etag = getStrAttr(cached->infoAttrs, "etag"), + .effectiveUrl = getStrAttr(cached->infoAttrs, "url") + }; + }; + + if (cached && !cached->expired) + return useCached(); + + DownloadRequest request(url); + if (cached) + request.expectedETag = getStrAttr(cached->infoAttrs, "etag"); + DownloadResult res; + try { + res = getDownloader()->download(request); + } catch (DownloadError & e) { + if (cached) { + warn("%s; using cached version", e.msg()); + return useCached(); + } else + throw; + } + + // FIXME: write to temporary file. + + Attrs infoAttrs({ + {"etag", res.etag}, + {"url", res.effectiveUri}, + }); + + std::optional storePath; + + if (res.cached) { + assert(cached); + assert(request.expectedETag == res.etag); + storePath = std::move(cached->storePath); + } else { + StringSink sink; + dumpString(*res.data, sink); + auto hash = hashString(htSHA256, *res.data); + ValidPathInfo info(store->makeFixedOutputPath(false, hash, name)); + info.narHash = hashString(htSHA256, *sink.s); + info.narSize = sink.s->size(); + info.ca = makeFixedOutputCA(false, hash); + store->addToStore(info, sink.s, NoRepair, NoCheckSigs); + storePath = std::move(info.path); + } + + getCache()->add( + store, + inAttrs, + infoAttrs, + *storePath, + immutable); + + if (url != res.effectiveUri) + getCache()->add( + store, + { + {"type", "file"}, + {"url", res.effectiveUri}, + {"name", name}, + }, + infoAttrs, + *storePath, + immutable); + + return { + .storePath = std::move(*storePath), + .etag = res.etag, + .effectiveUrl = res.effectiveUri, + }; +} + +Tree downloadTarball( + ref store, + const std::string & url, + const std::string & name, + bool immutable) +{ + Attrs inAttrs({ + {"type", "tarball"}, + {"url", url}, + {"name", name}, + }); + + auto cached = getCache()->lookupExpired(store, inAttrs); + + if (cached && !cached->expired) + return Tree { + .actualPath = store->toRealPath(cached->storePath), + .storePath = std::move(cached->storePath), + .info = TreeInfo { + .lastModified = getIntAttr(cached->infoAttrs, "lastModified"), + }, + }; + + auto res = downloadFile(store, url, name, immutable); + + std::optional unpackedStorePath; + time_t lastModified; + + if (cached && res.etag != "" && getStrAttr(cached->infoAttrs, "etag") == res.etag) { + unpackedStorePath = std::move(cached->storePath); + lastModified = getIntAttr(cached->infoAttrs, "lastModified"); + } else { + Path tmpDir = createTempDir(); + AutoDelete autoDelete(tmpDir, true); + unpackTarfile(store->toRealPath(res.storePath), tmpDir); + auto members = readDirectory(tmpDir); + if (members.size() != 1) + throw nix::Error("tarball '%s' contains an unexpected number of top-level files", url); + auto topDir = tmpDir + "/" + members.begin()->name; + lastModified = lstat(topDir).st_mtime; + unpackedStorePath = store->addToStore(name, topDir, true, htSHA256, defaultPathFilter, NoRepair); + } + + Attrs infoAttrs({ + {"lastModified", lastModified}, + {"etag", res.etag}, + }); + + getCache()->add( + store, + inAttrs, + infoAttrs, + *unpackedStorePath, + immutable); + + return Tree { + .actualPath = store->toRealPath(*unpackedStorePath), + .storePath = std::move(*unpackedStorePath), + .info = TreeInfo { + .lastModified = lastModified, + }, + }; +} + +struct TarballInput : Input +{ + ParsedURL url; + std::optional hash; + + TarballInput(const ParsedURL & url) : url(url) + { } + + std::string type() const override { return "tarball"; } + + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast(&other); + return + other2 + && to_string() == other2->to_string() + && hash == other2->hash; + } + + bool isImmutable() const override + { + return hash || narHash; + } + + ParsedURL toURL() const override + { + auto url2(url); + // NAR hashes are preferred over file hashes since tar/zip files + // don't have a canonical representation. + if (narHash) + url2.query.insert_or_assign("narHash", narHash->to_string(SRI)); + else if (hash) + url2.query.insert_or_assign("hash", hash->to_string(SRI)); + return url2; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("url", url.to_string()); + if (narHash) + attrs.emplace("narHash", narHash->to_string(SRI)); + else if (hash) + attrs.emplace("hash", hash->to_string(SRI)); + return attrs; + } + + std::pair> fetchTreeInternal(nix::ref store) const override + { + auto tree = downloadTarball(store, url.to_string(), "source", false); + + auto input = std::make_shared(*this); + input->narHash = store->queryPathInfo(tree.storePath)->narHash; + + return {std::move(tree), input}; + } +}; + +struct TarballInputScheme : InputScheme +{ + std::unique_ptr inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "file" && url.scheme != "http" && url.scheme != "https") return nullptr; + + if (!hasSuffix(url.path, ".zip") + && !hasSuffix(url.path, ".tar") + && !hasSuffix(url.path, ".tar.gz") + && !hasSuffix(url.path, ".tar.xz") + && !hasSuffix(url.path, ".tar.bz2")) + return nullptr; + + auto input = std::make_unique(url); + + auto hash = input->url.query.find("hash"); + if (hash != input->url.query.end()) { + // FIXME: require SRI hash. + input->hash = Hash(hash->second); + input->url.query.erase(hash); + } + + auto narHash = input->url.query.find("narHash"); + if (narHash != input->url.query.end()) { + // FIXME: require SRI hash. + input->narHash = Hash(narHash->second); + input->url.query.erase(narHash); + } + + return input; + } + + std::unique_ptr inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "tarball") return {}; + + for (auto & [name, value] : attrs) + if (name != "type" && name != "url" && name != "hash" && name != "narHash") + throw Error("unsupported tarball input attribute '%s'", name); + + auto input = std::make_unique(parseURL(getStrAttr(attrs, "url"))); + if (auto hash = maybeGetStrAttr(attrs, "hash")) + // FIXME: require SRI hash. + input->hash = Hash(*hash); + + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique()); }); + +} diff --git a/src/libfetchers/tree-info.cc b/src/libfetchers/tree-info.cc new file mode 100644 index 000000000..5788e94a1 --- /dev/null +++ b/src/libfetchers/tree-info.cc @@ -0,0 +1,14 @@ +#include "tree-info.hh" +#include "store-api.hh" + +#include + +namespace nix::fetchers { + +StorePath TreeInfo::computeStorePath(Store & store) const +{ + assert(narHash); + return store.makeFixedOutputPath(true, narHash, "source"); +} + +} diff --git a/src/libfetchers/tree-info.hh b/src/libfetchers/tree-info.hh new file mode 100644 index 000000000..2c7347281 --- /dev/null +++ b/src/libfetchers/tree-info.hh @@ -0,0 +1,29 @@ +#pragma once + +#include "path.hh" +#include "hash.hh" + +#include + +namespace nix { class Store; } + +namespace nix::fetchers { + +struct TreeInfo +{ + Hash narHash; + std::optional revCount; + std::optional lastModified; + + bool operator ==(const TreeInfo & other) const + { + return + narHash == other.narHash + && revCount == other.revCount + && lastModified == other.lastModified; + } + + StorePath computeStorePath(Store & store) const; +}; + +} diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 973ddc86a..0067032af 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -378,7 +378,7 @@ Hash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOutput if (h == drvHashes.end()) { assert(store.isValidPath(i.first)); h = drvHashes.insert_or_assign(i.first.clone(), hashDerivationModulo(store, - readDerivation(store, store.toRealPath(store.printStorePath(i.first))), false)).first; + readDerivation(store, store.toRealPath(i.first)), false)).first; } inputs2.insert_or_assign(h->second.to_string(Base16, false), i.second); } diff --git a/src/libstore/download.cc b/src/libstore/download.cc index 5967d0425..215046b72 100644 --- a/src/libstore/download.cc +++ b/src/libstore/download.cc @@ -35,10 +35,6 @@ DownloadSettings downloadSettings; static GlobalConfig::Register r1(&downloadSettings); -CachedDownloadRequest::CachedDownloadRequest(const std::string & uri) - : uri(uri), ttl(settings.tarballTtl) -{ } - std::string resolveUri(const std::string & uri) { if (uri.compare(0, 8, "channel:") == 0) @@ -802,139 +798,6 @@ void Downloader::download(DownloadRequest && request, Sink & sink) } } -CachedDownloadResult Downloader::downloadCached( - ref store, const CachedDownloadRequest & request) -{ - auto url = resolveUri(request.uri); - - auto name = request.name; - if (name == "") { - auto p = url.rfind('/'); - if (p != string::npos) name = string(url, p + 1); - } - - std::optional expectedStorePath; - if (request.expectedHash) { - expectedStorePath = store->makeFixedOutputPath(request.unpack, request.expectedHash, name); - if (store->isValidPath(*expectedStorePath)) { - CachedDownloadResult result; - result.storePath = store->printStorePath(*expectedStorePath); - result.path = store->toRealPath(result.storePath); - return result; - } - } - - Path cacheDir = getCacheDir() + "/nix/tarballs"; - createDirs(cacheDir); - - string urlHash = hashString(htSHA256, name + std::string("\0"s) + url).to_string(Base32, false); - - Path dataFile = cacheDir + "/" + urlHash + ".info"; - Path fileLink = cacheDir + "/" + urlHash + "-file"; - - PathLocks lock({fileLink}, fmt("waiting for lock on '%1%'...", fileLink)); - - std::optional storePath; - - string expectedETag; - - bool skip = false; - - CachedDownloadResult result; - - if (pathExists(fileLink) && pathExists(dataFile)) { - storePath = store->parseStorePath(readLink(fileLink)); - // FIXME - store->addTempRoot(*storePath); - if (store->isValidPath(*storePath)) { - auto ss = tokenizeString>(readFile(dataFile), "\n"); - if (ss.size() >= 3 && ss[0] == url) { - time_t lastChecked; - if (string2Int(ss[2], lastChecked) && (uint64_t) lastChecked + request.ttl >= (uint64_t) time(0)) { - skip = true; - result.effectiveUri = request.uri; - result.etag = ss[1]; - } else if (!ss[1].empty()) { - debug(format("verifying previous ETag '%1%'") % ss[1]); - expectedETag = ss[1]; - } - } - } else - storePath.reset(); - } - - if (!skip) { - - try { - DownloadRequest request2(url); - request2.expectedETag = expectedETag; - auto res = download(request2); - result.effectiveUri = res.effectiveUri; - result.etag = res.etag; - - if (!res.cached) { - StringSink sink; - dumpString(*res.data, sink); - Hash hash = hashString(request.expectedHash ? request.expectedHash.type : htSHA256, *res.data); - ValidPathInfo info(store->makeFixedOutputPath(false, hash, name)); - info.narHash = hashString(htSHA256, *sink.s); - info.narSize = sink.s->size(); - info.ca = makeFixedOutputCA(false, hash); - store->addToStore(info, sink.s, NoRepair, NoCheckSigs); - storePath = info.path.clone(); - } - - assert(storePath); - replaceSymlink(store->printStorePath(*storePath), fileLink); - - writeFile(dataFile, url + "\n" + res.etag + "\n" + std::to_string(time(0)) + "\n"); - } catch (DownloadError & e) { - if (!storePath) throw; - warn("warning: %s; using cached result", e.msg()); - result.etag = expectedETag; - } - } - - if (request.unpack) { - Path unpackedLink = cacheDir + "/" + ((std::string) storePath->to_string()) + "-unpacked"; - PathLocks lock2({unpackedLink}, fmt("waiting for lock on '%1%'...", unpackedLink)); - std::optional unpackedStorePath; - if (pathExists(unpackedLink)) { - unpackedStorePath = store->parseStorePath(readLink(unpackedLink)); - // FIXME - store->addTempRoot(*unpackedStorePath); - if (!store->isValidPath(*unpackedStorePath)) - unpackedStorePath.reset(); - } - if (!unpackedStorePath) { - printInfo("unpacking '%s'...", url); - Path tmpDir = createTempDir(); - AutoDelete autoDelete(tmpDir, true); - unpackTarfile(store->toRealPath(store->printStorePath(*storePath)), tmpDir); - auto members = readDirectory(tmpDir); - if (members.size() != 1) - throw nix::Error("tarball '%s' contains an unexpected number of top-level files", url); - auto topDir = tmpDir + "/" + members.begin()->name; - unpackedStorePath = store->addToStore(name, topDir, true, htSHA256, defaultPathFilter, NoRepair); - } - replaceSymlink(store->printStorePath(*unpackedStorePath), unpackedLink); - storePath = std::move(*unpackedStorePath); - } - - if (expectedStorePath && *storePath != *expectedStorePath) { - unsigned int statusCode = 102; - Hash gotHash = request.unpack - ? hashPath(request.expectedHash.type, store->toRealPath(store->printStorePath(*storePath))).first - : hashFile(request.expectedHash.type, store->toRealPath(store->printStorePath(*storePath))); - throw nix::Error(statusCode, "hash mismatch in file downloaded from '%s':\n wanted: %s\n got: %s", - url, request.expectedHash.to_string(), gotHash.to_string()); - } - - result.storePath = store->printStorePath(*storePath); - result.path = store->toRealPath(result.storePath); - return result; -} - bool isUri(const string & s) { diff --git a/src/libstore/download.hh b/src/libstore/download.hh index 5a131c704..28c8a9162 100644 --- a/src/libstore/download.hh +++ b/src/libstore/download.hh @@ -65,28 +65,6 @@ struct DownloadResult uint64_t bodySize = 0; }; -struct CachedDownloadRequest -{ - std::string uri; - bool unpack = false; - std::string name; - Hash expectedHash; - unsigned int ttl; - - CachedDownloadRequest(const std::string & uri); - CachedDownloadRequest() = delete; -}; - -struct CachedDownloadResult -{ - // Note: 'storePath' may be different from 'path' when using a - // chroot store. - Path storePath; - Path path; - std::optional etag; - std::string effectiveUri; -}; - class Store; struct Downloader @@ -108,12 +86,6 @@ struct Downloader invoked on the thread of the caller. */ void download(DownloadRequest && request, Sink & sink); - /* Check if the specified file is already in ~/.cache/nix/tarballs - and is more recent than ‘tarball-ttl’ seconds. Otherwise, - use the recorded ETag to verify if the server has a more - recent version, and if so, download it to the Nix store. */ - CachedDownloadResult downloadCached(ref store, const CachedDownloadRequest & request); - enum Error { NotFound, Forbidden, Misc, Transient, Interrupted }; }; @@ -135,4 +107,7 @@ public: bool isUri(const string & s); +/* Resolve deprecated 'channel:' URLs. */ +std::string resolveUri(const std::string & uri); + } diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 3aa3653f3..40f350f0b 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -351,12 +351,21 @@ public: Setting pluginFiles{this, {}, "plugin-files", "Plugins to dynamically load at nix initialization time."}; + Setting githubAccessToken{this, "", "github-access-token", + "GitHub access token to get access to GitHub data through the GitHub API for github:<..> flakes."}; + Setting experimentalFeatures{this, {}, "experimental-features", "Experimental Nix features to enable."}; bool isExperimentalFeatureEnabled(const std::string & name); void requireExperimentalFeature(const std::string & name); + + Setting allowDirty{this, true, "allow-dirty", + "Whether to allow dirty Git/Mercurial trees."}; + + Setting warnDirty{this, true, "warn-dirty", + "Whether to warn about dirty Git/Mercurial trees."}; }; diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index b9e894a9a..e5282bb30 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -6,6 +6,7 @@ #include "thread-pool.hh" #include "json.hh" #include "derivations.hh" +#include "url.hh" #include @@ -40,7 +41,7 @@ Path Store::followLinksToStore(std::string_view _path) const path = absPath(target, dirOf(path)); } if (!isInStore(path)) - throw Error(format("path '%1%' is not in the Nix store") % path); + throw NotInStore("path '%1%' is not in the Nix store", path); return path; } @@ -866,27 +867,7 @@ std::pair splitUriAndParams(const std::string & uri_ Store::Params params; auto q = uri.find('?'); if (q != std::string::npos) { - for (auto s : tokenizeString(uri.substr(q + 1), "&")) { - auto e = s.find('='); - if (e != std::string::npos) { - auto value = s.substr(e + 1); - std::string decoded; - for (size_t i = 0; i < value.size(); ) { - if (value[i] == '%') { - if (i + 2 >= value.size()) - throw Error("invalid URI parameter '%s'", value); - try { - decoded += std::stoul(std::string(value, i + 1, 2), 0, 16); - i += 3; - } catch (...) { - throw Error("invalid URI parameter '%s'", value); - } - } else - decoded += value[i++]; - } - params[s.substr(0, e)] = decoded; - } - } + params = decodeQuery(uri.substr(q + 1)); uri = uri_.substr(0, q); } return {uri, params}; diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index 0fa59be6a..81014763d 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -28,6 +28,7 @@ MakeError(InvalidPath, Error); MakeError(Unsupported, Error); MakeError(SubstituteGone, Error); MakeError(SubstituterDisabled, Error); +MakeError(NotInStore, Error); struct BasicDerivation; diff --git a/src/libutil/types.hh b/src/libutil/types.hh index 20b96a85c..a1ce7b372 100644 --- a/src/libutil/types.hh +++ b/src/libutil/types.hh @@ -157,4 +157,12 @@ typedef list Paths; typedef set PathSet; +/* Helper class to run code at startup. */ +template +struct OnStartup +{ + OnStartup(T && t) { t(); } +}; + + } diff --git a/src/libutil/url.cc b/src/libutil/url.cc new file mode 100644 index 000000000..5d5328e5d --- /dev/null +++ b/src/libutil/url.cc @@ -0,0 +1,137 @@ +#include "url.hh" +#include "util.hh" + +namespace nix { + +std::regex refRegex(refRegexS, std::regex::ECMAScript); +std::regex revRegex(revRegexS, std::regex::ECMAScript); +std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript); + +ParsedURL parseURL(const std::string & url) +{ + static std::regex uriRegex( + "((" + schemeRegex + "):" + + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))" + + "(?:\\?(" + queryRegex + "))?" + + "(?:#(" + queryRegex + "))?", + std::regex::ECMAScript); + + std::smatch match; + + if (std::regex_match(url, match, uriRegex)) { + auto & base = match[1]; + std::string scheme = match[2]; + auto authority = match[3].matched + ? std::optional(match[3]) : std::nullopt; + std::string path = match[4].matched ? match[4] : match[5]; + auto & query = match[6]; + auto & fragment = match[7]; + + auto isFile = scheme.find("file") != std::string::npos; + + if (authority && *authority != "" && isFile) + throw Error("file:// URL '%s' has unexpected authority '%s'", + url, *authority); + + if (isFile && path.empty()) + path = "/"; + + return ParsedURL{ + .url = url, + .base = base, + .scheme = scheme, + .authority = authority, + .path = path, + .query = decodeQuery(query), + .fragment = percentDecode(std::string(fragment)) + }; + } + + else + throw BadURL("'%s' is not a valid URL", url); +} + +std::string percentDecode(std::string_view in) +{ + std::string decoded; + for (size_t i = 0; i < in.size(); ) { + if (in[i] == '%') { + if (i + 2 >= in.size()) + throw BadURL("invalid URI parameter '%s'", in); + try { + decoded += std::stoul(std::string(in, i + 1, 2), 0, 16); + i += 3; + } catch (...) { + throw BadURL("invalid URI parameter '%s'", in); + } + } else + decoded += in[i++]; + } + return decoded; +} + +std::map decodeQuery(const std::string & query) +{ + std::map result; + + for (auto s : tokenizeString(query, "&")) { + auto e = s.find('='); + if (e != std::string::npos) + result.emplace( + s.substr(0, e), + percentDecode(std::string_view(s).substr(e + 1))); + } + + return result; +} + +std::string percentEncode(std::string_view s) +{ + std::string res; + for (auto & c : s) + if ((c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || strchr("-._~!$&'()*+,;=:@", c)) + res += c; + else + res += fmt("%%%02x", (unsigned int) c); + return res; +} + +std::string encodeQuery(const std::map & ss) +{ + std::string res; + bool first = true; + for (auto & [name, value] : ss) { + if (!first) res += '&'; + first = false; + res += percentEncode(name); + res += '='; + res += percentEncode(value); + } + return res; +} + +std::string ParsedURL::to_string() const +{ + return + scheme + + ":" + + (authority ? "//" + *authority : "") + + path + + (query.empty() ? "" : "?" + encodeQuery(query)) + + (fragment.empty() ? "" : "#" + percentEncode(fragment)); +} + +bool ParsedURL::operator ==(const ParsedURL & other) const +{ + return + scheme == other.scheme + && authority == other.authority + && path == other.path + && query == other.query + && fragment == other.fragment; +} + +} diff --git a/src/libutil/url.hh b/src/libutil/url.hh new file mode 100644 index 000000000..1503023a2 --- /dev/null +++ b/src/libutil/url.hh @@ -0,0 +1,62 @@ +#pragma once + +#include "types.hh" + +#include + +namespace nix { + +struct ParsedURL +{ + std::string url; + std::string base; // URL without query/fragment + std::string scheme; + std::optional authority; + std::string path; + std::map query; + std::string fragment; + + std::string to_string() const; + + bool operator ==(const ParsedURL & other) const; +}; + +MakeError(BadURL, Error); + +std::string percentDecode(std::string_view in); + +std::map decodeQuery(const std::string & query); + +ParsedURL parseURL(const std::string & url); + +// URI stuff. +const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])"; +const static std::string schemeRegex = "(?:[a-z+]+)"; +const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])"; +const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])"; +const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])"; +const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)"; +const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")"; +const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)"; +const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?"; +const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])"; +const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*"; +const static std::string segmentRegex = "(?:" + pcharRegex + "+)"; +const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)"; +const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)"; + +// A Git ref (i.e. branch or tag name). +const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check +extern std::regex refRegex; + +// A Git revision (a SHA-1 commit hash). +const static std::string revRegexS = "[0-9a-fA-F]{40}"; +extern std::regex revRegex; + +// A ref or revision, or a ref followed by a revision. +const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))"; + +const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*"; +extern std::regex flakeIdRegex; + +} diff --git a/src/nix-channel/nix-channel.cc b/src/nix-channel/nix-channel.cc index a2639579d..2a9defb4e 100755 --- a/src/nix-channel/nix-channel.cc +++ b/src/nix-channel/nix-channel.cc @@ -3,6 +3,7 @@ #include "download.hh" #include "store-api.hh" #include "../nix/legacy.hh" +#include "fetchers.hh" #include #include @@ -86,12 +87,9 @@ static void update(const StringSet & channelNames) // We want to download the url to a file to see if it's a tarball while also checking if we // got redirected in the process, so that we can grab the various parts of a nix channel // definition from a consistent location if the redirect changes mid-download. - CachedDownloadRequest request(url); - request.ttl = 0; - auto dl = getDownloader(); - auto result = dl->downloadCached(store, request); - auto filename = result.path; - url = chomp(result.effectiveUri); + auto result = fetchers::downloadFile(store, url, std::string(baseNameOf(url)), false); + auto filename = store->toRealPath(result.storePath); + url = result.effectiveUrl; // If the URL contains a version number, append it to the name // attribute (so that "nix-env -q" on the channels profile @@ -114,11 +112,10 @@ static void update(const StringSet & channelNames) if (!unpacked) { // Download the channel tarball. try { - filename = dl->downloadCached(store, CachedDownloadRequest(url + "/nixexprs.tar.xz")).path; + filename = store->toRealPath(fetchers::downloadFile(store, url + "/nixexprs.tar.xz", "nixexprs.tar.xz", false).storePath); } catch (DownloadError & e) { - filename = dl->downloadCached(store, CachedDownloadRequest(url + "/nixexprs.tar.bz2")).path; + filename = store->toRealPath(fetchers::downloadFile(store, url + "/nixexprs.tar.bz2", "nixexprs.tar.bz2", false).storePath); } - chomp(filename); } // Regardless of where it came from, add the expression representing this channel to accumulated expression @@ -185,6 +182,8 @@ static int _main(int argc, char ** argv) } else if (*arg == "--rollback") { cmd = cRollback; } else { + if (hasPrefix(*arg, "-")) + throw UsageError("unsupported argument '%s'", *arg); args.push_back(std::move(*arg)); } return true; diff --git a/src/nix/local.mk b/src/nix/local.mk index 50a18efd7..033675e89 100644 --- a/src/nix/local.mk +++ b/src/nix/local.mk @@ -15,9 +15,9 @@ nix_SOURCES := \ $(wildcard src/nix-prefetch-url/*.cc) \ $(wildcard src/nix-store/*.cc) \ -nix_CXXFLAGS += -I src/libutil -I src/libstore -I src/libexpr -I src/libmain +nix_CXXFLAGS += -I src/libutil -I src/libstore -I src/libfetchers -I src/libexpr -I src/libmain -nix_LIBS = libexpr libmain libstore libutil libnixrust +nix_LIBS = libexpr libmain libfetchers libstore libutil libnixrust nix_LDFLAGS = -pthread $(SODIUM_LIBS) $(EDITLINE_LIBS) $(BOOST_LDFLAGS) -lboost_context -lboost_thread -lboost_system diff --git a/tests/fetchGit.sh b/tests/fetchGit.sh index ed8fa14d6..d9c9874f5 100644 --- a/tests/fetchGit.sh +++ b/tests/fetchGit.sh @@ -11,7 +11,7 @@ repo=$TEST_ROOT/git export _NIX_FORCE_HTTP=1 -rm -rf $repo ${repo}-tmp $TEST_HOME/.cache/nix/gitv2 +rm -rf $repo ${repo}-tmp $TEST_HOME/.cache/nix $TEST_ROOT/worktree $TEST_ROOT/shallow git init $repo git -C $repo config user.email "foobar@example.com" @@ -25,8 +25,16 @@ rev1=$(git -C $repo rev-parse HEAD) echo world > $repo/hello git -C $repo commit -m 'Bla2' -a +git -C $repo worktree add $TEST_ROOT/worktree +echo hello >> $TEST_ROOT/worktree/hello rev2=$(git -C $repo rev-parse HEAD) +# Fetch a worktree +unset _NIX_FORCE_HTTP +path0=$(nix eval --raw "(builtins.fetchGit file://$TEST_ROOT/worktree).outPath") +export _NIX_FORCE_HTTP=1 +[[ $(tail -n 1 $path0/hello) = "hello" ]] + # Fetch the default branch. path=$(nix eval --raw "(builtins.fetchGit file://$repo).outPath") [[ $(cat $path/hello) = world ]] @@ -50,9 +58,6 @@ path2=$(nix eval --raw "(builtins.fetchGit file://$repo).outPath") [[ $(nix eval "(builtins.fetchGit file://$repo).revCount") = 2 ]] [[ $(nix eval --raw "(builtins.fetchGit file://$repo).rev") = $rev2 ]] -# But with TTL 0, it should fail. -(! nix eval --tarball-ttl 0 "(builtins.fetchGit file://$repo)" -vvvvv) - # Fetching with a explicit hash should succeed. path2=$(nix eval --tarball-ttl 0 --raw "(builtins.fetchGit { url = file://$repo; rev = \"$rev2\"; }).outPath") [[ $path = $path2 ]] @@ -74,6 +79,7 @@ echo bar > $repo/dir2/bar git -C $repo add dir1/foo git -C $repo rm hello +unset _NIX_FORCE_HTTP path2=$(nix eval --raw "(builtins.fetchGit $repo).outPath") [ ! -e $path2/hello ] [ ! -e $path2/bar ] @@ -110,9 +116,9 @@ path=$(nix eval --raw "(builtins.fetchGit file://$repo).outPath") git -C $repo checkout $rev2 -b dev echo dev > $repo/hello -# File URI uses 'master' unless specified otherwise +# File URI uses dirty tree unless specified otherwise path2=$(nix eval --raw "(builtins.fetchGit file://$repo).outPath") -[[ $path = $path2 ]] +[ $(cat $path2/hello) = dev ] # Using local path with branch other than 'master' should work when clean or dirty path3=$(nix eval --raw "(builtins.fetchGit $repo).outPath") @@ -131,9 +137,9 @@ path5=$(nix eval --raw "(builtins.fetchGit { url = $repo; ref = \"dev\"; }).outP # Nuke the cache -rm -rf $TEST_HOME/.cache/nix/gitv2 +rm -rf $TEST_HOME/.cache/nix -# Try again, but without 'git' on PATH +# Try again, but without 'git' on PATH. This should fail. NIX=$(command -v nix) # This should fail (! PATH= $NIX eval --raw "(builtins.fetchGit { url = $repo; ref = \"dev\"; }).outPath" ) @@ -141,3 +147,13 @@ NIX=$(command -v nix) # Try again, with 'git' available. This should work. path5=$(nix eval --raw "(builtins.fetchGit { url = $repo; ref = \"dev\"; }).outPath") [[ $path3 = $path5 ]] + +# Fetching a shallow repo shouldn't work by default, because we can't +# return a revCount. +git clone --depth 1 file://$repo $TEST_ROOT/shallow +(! nix eval --raw "(builtins.fetchGit { url = $TEST_ROOT/shallow; ref = \"dev\"; }).outPath") + +# But you can request a shallow clone, which won't return a revCount. +path6=$(nix eval --raw "(builtins.fetchTree { type = \"git\"; url = \"file://$TEST_ROOT/shallow\"; ref = \"dev\"; shallow = true; }).outPath") +[[ $path3 = $path6 ]] +[[ $(nix eval "(builtins.fetchTree { type = \"git\"; url = \"file://$TEST_ROOT/shallow\"; ref = \"dev\"; shallow = true; }).revCount or 123") == 123 ]] diff --git a/tests/init.sh b/tests/init.sh index 6a119aad0..c62c4856a 100644 --- a/tests/init.sh +++ b/tests/init.sh @@ -17,7 +17,7 @@ cat > "$NIX_CONF_DIR"/nix.conf <&1 | grep 'NAR hash mismatch in input' + nix-instantiate --eval -E '1 + 2' -I fnord=file://no-such-tarball.tar$ext nix-instantiate --eval -E 'with ; 1 + 2' -I fnord=file://no-such-tarball$ext (! nix-instantiate --eval -E ' 1' -I fnord=file://no-such-tarball$ext) From 670feb000a9fac76f0996711f061ec466a53dc97 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 2 Apr 2020 14:56:20 +0200 Subject: [PATCH 23/27] Add 'path' fetcher This fetchers copies a plain directory (i.e. not a Git/Mercurial repository) to the store (or does nothing if the path is already a store path). One use case is to pin the 'nixpkgs' flake used to build the current NixOS system, and prevent it from being garbage-collected, via a system registry entry like this: { "from": { "id": "nixpkgs", "type": "indirect" }, "to": { "type": "path", "path": "/nix/store/rralhl3wj4rdwzjn16g7d93mibvlr521-source", "lastModified": 1585388205, "rev": "b0c285807d6a9f1b7562ec417c24fa1a30ecc31a" }, "exact": true } Note the fake "lastModified" and "rev" attributes that ensure that the flake gives the same evaluation results as the corresponding Git/GitHub inputs. (cherry picked from commit 12f9379123eba828f2ae06f7978a37b7045c2b23) --- src/libfetchers/attrs.cc | 15 +++++ src/libfetchers/attrs.hh | 2 + src/libfetchers/path.cc | 130 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 src/libfetchers/path.cc diff --git a/src/libfetchers/attrs.cc b/src/libfetchers/attrs.cc index 40c02de42..feb0a6085 100644 --- a/src/libfetchers/attrs.cc +++ b/src/libfetchers/attrs.cc @@ -89,4 +89,19 @@ bool getBoolAttr(const Attrs & attrs, const std::string & name) return *s; } +std::map attrsToQuery(const Attrs & attrs) +{ + std::map query; + for (auto & attr : attrs) { + if (auto v = std::get_if(&attr.second)) { + query.insert_or_assign(attr.first, fmt("%d", *v)); + } else if (auto v = std::get_if(&attr.second)) { + query.insert_or_assign(attr.first, *v); + } else if (auto v = std::get_if>(&attr.second)) { + query.insert_or_assign(attr.first, v->t ? "1" : "0"); + } else abort(); + } + return query; +} + } diff --git a/src/libfetchers/attrs.hh b/src/libfetchers/attrs.hh index 2c9e772d2..d6e0ae000 100644 --- a/src/libfetchers/attrs.hh +++ b/src/libfetchers/attrs.hh @@ -34,4 +34,6 @@ std::optional maybeGetBoolAttr(const Attrs & attrs, const std::string & na bool getBoolAttr(const Attrs & attrs, const std::string & name); +std::map attrsToQuery(const Attrs & attrs); + } diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc new file mode 100644 index 000000000..9801d9b86 --- /dev/null +++ b/src/libfetchers/path.cc @@ -0,0 +1,130 @@ +#include "fetchers.hh" +#include "store-api.hh" + +namespace nix::fetchers { + +struct PathInput : Input +{ + Path path; + + /* Allow the user to pass in "fake" tree info attributes. This is + useful for making a pinned tree work the same as the repository + from which is exported + (e.g. path:/nix/store/...-source?lastModified=1585388205&rev=b0c285...). */ + std::optional rev; + std::optional revCount; + std::optional lastModified; + + std::string type() const override { return "path"; } + + std::optional getRev() const override { return rev; } + + ParsedURL toURL() const override + { + auto query = attrsToQuery(toAttrsInternal()); + query.erase("path"); + return ParsedURL { + .scheme = "path", + .path = path, + .query = query, + }; + } + + Attrs toAttrsInternal() const override + { + Attrs attrs; + attrs.emplace("path", path); + if (rev) + attrs.emplace("rev", rev->gitRev()); + if (revCount) + attrs.emplace("revCount", *revCount); + if (lastModified) + attrs.emplace("lastModified", *lastModified); + return attrs; + } + + std::pair> fetchTreeInternal(nix::ref store) const override + { + auto input = std::make_shared(*this); + + auto storePath = store->maybeParseStorePath(path); + + if (storePath) + store->addTempRoot(*storePath); + + if (!storePath || storePath->name() != "source" || !store->isValidPath(*storePath)) + // FIXME: try to substitute storePath. + storePath = store->addToStore("name", path); + + return + { + Tree { + .actualPath = store->toRealPath(*storePath), + .storePath = std::move(*storePath), + .info = TreeInfo { + .revCount = revCount, + .lastModified = lastModified + } + }, + input + }; + } + +}; + +struct PathInputScheme : InputScheme +{ + std::unique_ptr inputFromURL(const ParsedURL & url) override + { + if (url.scheme != "path") return nullptr; + + auto input = std::make_unique(); + input->path = url.path; + + for (auto & [name, value] : url.query) + if (name == "rev") + input->rev = Hash(value, htSHA1); + else if (name == "revCount") { + uint64_t revCount; + if (!string2Int(value, revCount)) + throw Error("path URL '%s' has invalid parameter '%s'", url.to_string(), name); + input->revCount = revCount; + } + else if (name == "lastModified") { + time_t lastModified; + if (!string2Int(value, lastModified)) + throw Error("path URL '%s' has invalid parameter '%s'", url.to_string(), name); + input->lastModified = lastModified; + } + else + throw Error("path URL '%s' has unsupported parameter '%s'", url.to_string(), name); + + return input; + } + + std::unique_ptr inputFromAttrs(const Attrs & attrs) override + { + if (maybeGetStrAttr(attrs, "type") != "path") return {}; + + auto input = std::make_unique(); + input->path = getStrAttr(attrs, "path"); + + for (auto & [name, value] : attrs) + if (name == "rev") + input->rev = Hash(getStrAttr(attrs, "rev"), htSHA1); + else if (name == "revCount") + input->revCount = getIntAttr(attrs, "revCount"); + else if (name == "lastModified") + input->lastModified = getIntAttr(attrs, "lastModified"); + else if (name == "type" || name == "path") + ; + else + throw Error("unsupported path input attribute '%s'", name); + + return input; + } +}; + +static auto r1 = OnStartup([] { registerInputScheme(std::make_unique()); }); + +} From f58a9b0e62879e28cc7cac59b489a52c924bbc36 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 2 Apr 2020 19:04:33 +0200 Subject: [PATCH 24/27] Respect the narHash attribute in more input types (cherry picked from commit a6ff66b658b61aef80d936f0183447fe4cb46000) --- src/libfetchers/fetchers.cc | 4 +++- src/libfetchers/git.cc | 2 +- src/libfetchers/github.cc | 2 +- src/libfetchers/mercurial.cc | 2 +- src/libfetchers/tarball.cc | 6 ++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index 25d3da431..94ac30e38 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -29,8 +29,10 @@ std::unique_ptr inputFromURL(const std::string & url) std::unique_ptr inputFromAttrs(const Attrs & attrs) { + auto attrs2(attrs); + attrs2.erase("narHash"); for (auto & inputScheme : *inputSchemes) { - auto res = inputScheme->inputFromAttrs(attrs); + auto res = inputScheme->inputFromAttrs(attrs2); if (res) { if (auto narHash = maybeGetStrAttr(attrs, "narHash")) // FIXME: require SRI hash. diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index cee0713f4..3f94d9bdd 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -39,7 +39,7 @@ struct GitInput : Input bool isImmutable() const override { - return (bool) rev; + return (bool) rev || narHash; } std::optional getRef() const override { return ref; } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 3fc95ff51..ef27eaa76 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -33,7 +33,7 @@ struct GitHubInput : Input bool isImmutable() const override { - return (bool) rev; + return (bool) rev || narHash; } std::optional getRef() const override { return ref; } diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 790f7c753..1d6571571 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -33,7 +33,7 @@ struct MercurialInput : Input bool isImmutable() const override { - return (bool) rev; + return (bool) rev || narHash; } std::optional getRef() const override { return ref; } diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index b0a83001e..4c4e5828e 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -205,9 +205,7 @@ struct TarballInput : Input { Attrs attrs; attrs.emplace("url", url.to_string()); - if (narHash) - attrs.emplace("narHash", narHash->to_string(SRI)); - else if (hash) + if (hash) attrs.emplace("hash", hash->to_string(SRI)); return attrs; } @@ -260,7 +258,7 @@ struct TarballInputScheme : InputScheme if (maybeGetStrAttr(attrs, "type") != "tarball") return {}; for (auto & [name, value] : attrs) - if (name != "type" && name != "url" && name != "hash" && name != "narHash") + if (name != "type" && name != "url" && name != "hash") throw Error("unsupported tarball input attribute '%s'", name); auto input = std::make_unique(parseURL(getStrAttr(attrs, "url"))); From a6dfa3cb85b59a0979de7fe02c9d67939c4ac217 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 2 Apr 2020 19:04:27 +0200 Subject: [PATCH 25/27] PathInput: Add some methods (cherry picked from commit 78ad5b3d91507427fa563f3474dc52da608ad224) --- src/libfetchers/path.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc index 9801d9b86..037404726 100644 --- a/src/libfetchers/path.cc +++ b/src/libfetchers/path.cc @@ -19,6 +19,22 @@ struct PathInput : Input std::optional getRev() const override { return rev; } + bool operator ==(const Input & other) const override + { + auto other2 = dynamic_cast(&other); + return + other2 + && path == other2->path + && rev == other2->rev + && revCount == other2->revCount + && lastModified == other2->lastModified; + } + + bool isImmutable() const override + { + return (bool) narHash; + } + ParsedURL toURL() const override { auto query = attrsToQuery(toAttrsInternal()); From 26aeeb7653fa051ddec913bdc2c578b9066bc08f Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 6 Apr 2020 14:28:37 +0200 Subject: [PATCH 26/27] Add FIXME (cherry picked from commit 2f494531b7811b45f6b76787f225495a14d28a7f) --- src/libfetchers/path.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc index 037404726..7c7e20f4e 100644 --- a/src/libfetchers/path.cc +++ b/src/libfetchers/path.cc @@ -63,6 +63,8 @@ struct PathInput : Input { auto input = std::make_shared(*this); + // FIXME: check whether access to 'path' is allowed. + auto storePath = store->maybeParseStorePath(path); if (storePath) From cd39709003eee4b85a31799b8bc2de59721930ce Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 7 Apr 2020 09:27:17 +0200 Subject: [PATCH 27/27] Cleanup --- src/libstore/download.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/libstore/download.cc b/src/libstore/download.cc index 215046b72..af69699a8 100644 --- a/src/libstore/download.cc +++ b/src/libstore/download.cc @@ -1,14 +1,10 @@ #include "download.hh" #include "util.hh" #include "globals.hh" -#include "hash.hh" #include "store-api.hh" -#include "archive.hh" #include "s3.hh" #include "compression.hh" -#include "pathlocks.hh" #include "finally.hh" -#include "tarfile.hh" #ifdef ENABLE_S3 #include