Revert "libfetchers: make attribute / URL query handling consistent"

This reverts commit 35eec921af.

Reason for revert: Regressed nix-eval-jobs, and it appears to be this change is buggy/missing a case. It just needs another pass.

Code causing the problem in n-e-j, when invoked with `nix-eval-jobs --flake '.#hydraJobs'`:

```
n-e-j/tests/assets » ../../build/src/nix-eval-jobs --meta --workers 1 --flake .#hydraJobs
warning: unknown setting 'trusted-users'
warning: `--gc-roots-dir' not specified
error: unsupported Git input attribute 'dir'
error: worker error: error: unsupported Git input attribute 'dir'
```

```
  nix::Value *vRoot = [&]() {
        if (args.flake) {
            auto [flakeRef, fragment, outputSpec] =
                nix::parseFlakeRefWithFragmentAndExtendedOutputsSpec(
                    args.releaseExpr, nix::absPath("."));
            nix::InstallableFlake flake{
                {}, state, std::move(flakeRef), fragment, outputSpec,
                {}, {},    args.lockFlags};

            return flake.toValue(*state).first;
        } else {
            return releaseExprTopLevelValue(*state, autoArgs, args);
        }
    }();
```

Inspecting the program behaviour reveals that `dir` was in fact set in the URL going into the fetcher. This is in turn because unlike in the case changed in this commit, it was not erased before handing it to libfetchers, which is probably just a mistake.

```
(rr) up
3  0x00007ffff60262ae in nix::fetchers::Input::fromURL (url=..., requireTree=requireTree@entry=true) at src/libfetchers/fetchers.cc:39
warning: Source file is more recent than executable.
39              auto res = inputScheme->inputFromURL(url, requireTree);
(rr) p url
$1 = (const nix::ParsedURL &) @0x7fffdc874190: {url = "git+file:///home/jade/lix/nix-eval-jobs", 
  base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", authority = std::optional<std::string> = {[contained value] = ""}, 
  path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = {["dir"] = "tests/assets"}, fragment = ""}
(rr) up
4  0x00007ffff789d904 in nix::parseFlakeRefWithFragment (url=".#hydraJobs", baseDir=std::optional<std::string> = {...}, 
    allowMissing=allowMissing@entry=false, isFlake=isFlake@entry=true) at src/libexpr/flake/flakeref.cc:179
warning: Source file is more recent than executable.
179                                 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")),
(rr) p parsedURL
$2 = {url = "git+file:///home/jade/lix/nix-eval-jobs", base = "git+file:///home/jade/lix/nix-eval-jobs", scheme = "git+file", 
  authority = std::optional<std::string> = {[contained value] = ""}, path = "/home/jade/lix/nix-eval-jobs", query = std::map with 1 element = {
    ["dir"] = "tests/assets"}, fragment = ""}
(rr) list
174
175                             if (pathExists(flakeRoot + "/.git/shallow"))
176                                 parsedURL.query.insert_or_assign("shallow", "1");
177
178                             return std::make_pair(
179                                 FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")),
180                                 fragment);
181                         }
```

Change-Id: Ib55a882eaeb3e59228857761dc1e3b2e366b0f5e
This commit is contained in:
jade 2024-06-24 22:49:17 +00:00 committed by Gerrit Code Review
parent 35eec921af
commit 3e151d4d77
10 changed files with 96 additions and 235 deletions

View file

@ -204,13 +204,7 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
std::string fragment; std::string fragment;
std::swap(fragment, parsedURL.fragment); std::swap(fragment, parsedURL.fragment);
// This has a special meaning for flakes and must not be passed to libfetchers. auto input = Input::fromURL(parsedURL, isFlake);
// Of course this means that libfetchers cannot have fetchers
// expecting an argument `dir` 🫠
ParsedURL urlForFetchers(parsedURL);
urlForFetchers.query.erase("dir");
auto input = Input::fromURL(urlForFetchers, isFlake);
input.parent = baseDir; input.parent = baseDir;
return std::make_pair( return std::make_pair(

View file

@ -159,37 +159,6 @@ struct InputScheme
std::optional<std::string> commitMsg) const; std::optional<std::string> commitMsg) const;
virtual std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) = 0; virtual std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) = 0;
protected:
void emplaceURLQueryIntoAttrs(
const ParsedURL & parsedURL,
Attrs & attrs,
const StringSet & numericParams,
const StringSet & booleanParams) const
{
for (auto &[name, value] : parsedURL.query) {
if (name == "url") {
throw BadURL(
"URL '%s' must not override url via query param!",
parsedURL.to_string()
);
} else if (numericParams.count(name) != 0) {
if (auto n = string2Int<uint64_t>(value)) {
attrs.insert_or_assign(name, *n);
} else {
throw BadURL(
"URL '%s' has non-numeric parameter '%s'",
parsedURL.to_string(),
name
);
}
} else if (booleanParams.count(name) != 0) {
attrs.emplace(name, Explicit<bool> { value == "1" });
} else {
attrs.emplace(name, value);
}
}
}
}; };
void registerInputScheme(std::shared_ptr<InputScheme> && fetcher); void registerInputScheme(std::shared_ptr<InputScheme> && fetcher);

View file

@ -273,14 +273,17 @@ struct GitInputScheme : InputScheme
Attrs attrs; Attrs attrs;
attrs.emplace("type", "git"); attrs.emplace("type", "git");
attrs.emplace("url", url2.to_string());
emplaceURLQueryIntoAttrs( for (auto & [name, value] : url.query) {
url, if (name == "rev" || name == "ref")
attrs, attrs.emplace(name, value);
{"lastModified", "revCount"}, else if (name == "shallow" || name == "submodules" || name == "allRefs")
{"shallow", "submodules", "allRefs"} attrs.emplace(name, Explicit<bool> { value == "1" });
); else
url2.query.emplace(name, value);
}
attrs.emplace("url", url2.to_string());
return inputFromAttrs(attrs); return inputFromAttrs(attrs);
} }

View file

@ -1,4 +1,3 @@
#include "attrs.hh"
#include "filetransfer.hh" #include "filetransfer.hh"
#include "cache.hh" #include "cache.hh"
#include "globals.hh" #include "globals.hh"
@ -37,11 +36,18 @@ struct GitArchiveInputScheme : InputScheme
auto path = tokenizeString<std::vector<std::string>>(url.path, "/"); auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
std::optional<std::string> refOrRev; std::optional<Hash> rev;
std::optional<std::string> ref;
std::optional<std::string> host_url;
auto size = path.size(); auto size = path.size();
if (size == 3) { if (size == 3) {
refOrRev = path[2]; if (std::regex_match(path[2], revRegex))
rev = Hash::parseAny(path[2], htSHA1);
else if (std::regex_match(path[2], refRegex))
ref = path[2];
else
throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]);
} else if (size > 3) { } else if (size > 3) {
std::string rs; std::string rs;
for (auto i = std::next(path.begin(), 2); i != path.end(); i++) { for (auto i = std::next(path.begin(), 2); i != path.end(); i++) {
@ -52,91 +58,61 @@ struct GitArchiveInputScheme : InputScheme
} }
if (std::regex_match(rs, refRegex)) { if (std::regex_match(rs, refRegex)) {
refOrRev = rs; ref = rs;
} else { } else {
throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs); throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs);
} }
} else if (size < 2) } else if (size < 2)
throw BadURL("URL '%s' is invalid", url.url); throw BadURL("URL '%s' is invalid", url.url);
Attrs attrs;
attrs.emplace("type", type());
attrs.emplace("owner", path[0]);
attrs.emplace("repo", path[1]);
for (auto &[name, value] : url.query) { for (auto &[name, value] : url.query) {
if (name == "rev" || name == "ref") { if (name == "rev") {
if (refOrRev) { if (rev)
throw BadURL("URL '%s' already contains a ref or rev", url.url); throw BadURL("URL '%s' contains multiple commit hashes", url.url);
} else { rev = Hash::parseAny(value, htSHA1);
refOrRev = value;
} }
} else if (name == "lastModified") { else if (name == "ref") {
if (auto n = string2Int<uint64_t>(value)) { if (!std::regex_match(value, refRegex))
attrs.emplace(name, *n); throw BadURL("URL '%s' contains an invalid branch/tag name", url.url);
} else { if (ref)
throw Error( throw BadURL("URL '%s' contains multiple branch/tag names", url.url);
"Attribute 'lastModified' in URL '%s' must be an integer", ref = value;
url.to_string()
);
} }
} else { else if (name == "host") {
attrs.emplace(name, value); if (!std::regex_match(value, hostRegex))
throw BadURL("URL '%s' contains an invalid instance host", url.url);
host_url = value;
} }
// FIXME: barf on unsupported attributes
} }
if (refOrRev) attrs.emplace("refOrRev", *refOrRev); if (ref && rev)
throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev());
return inputFromAttrs(attrs); Input input;
input.attrs.insert_or_assign("type", type());
input.attrs.insert_or_assign("owner", path[0]);
input.attrs.insert_or_assign("repo", path[1]);
if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
if (ref) input.attrs.insert_or_assign("ref", *ref);
if (host_url) input.attrs.insert_or_assign("host", *host_url);
return input;
} }
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
{ {
// Attributes can contain refOrRev and it needs to be figured out if (maybeGetStrAttr(attrs, "type") != type()) return {};
// which one it is (see inputFromURL for when that may happen).
// The correct one (ref or rev) will be written into finalAttrs and
// it needs to be mutable for that.
Attrs finalAttrs(attrs);
auto type_ = maybeGetStrAttr(finalAttrs, "type");
if (type_ != type()) return {};
auto owner = getStrAttr(finalAttrs, "owner"); for (auto & [name, value] : attrs)
auto repo = getStrAttr(finalAttrs, "repo"); if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host")
auto url = fmt("%s:%s/%s", *type_, owner, repo);
if (auto host = maybeGetStrAttr(finalAttrs, "host")) {
if (!std::regex_match(*host, hostRegex)) {
throw BadURL("URL '%s' contains an invalid instance host", url);
}
}
if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) {
finalAttrs.erase("refOrRev");
if (std::regex_match(*refOrRev, revRegex)) {
finalAttrs.emplace("rev", *refOrRev);
} else if (std::regex_match(*refOrRev, refRegex)) {
finalAttrs.emplace("ref", *refOrRev);
} else {
throw Error(
"in URL '%s', '%s' is not a commit hash or a branch/tag name",
url,
*refOrRev
);
}
} else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) {
if (!std::regex_match(*ref, refRegex)) {
throw BadURL("URL '%s' contains an invalid branch/tag name", url);
}
}
for (auto & [name, value] : finalAttrs) {
if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") {
throw Error("unsupported input attribute '%s'", name); throw Error("unsupported input attribute '%s'", name);
}
} getStrAttr(attrs, "owner");
getStrAttr(attrs, "repo");
Input input; Input input;
input.attrs = finalAttrs; input.attrs = attrs;
return input; return input;
} }

View file

@ -17,8 +17,6 @@ struct IndirectInputScheme : InputScheme
std::optional<Hash> rev; std::optional<Hash> rev;
std::optional<std::string> ref; std::optional<std::string> ref;
Attrs attrs;
if (path.size() == 1) { if (path.size() == 1) {
} else if (path.size() == 2) { } else if (path.size() == 2) {
if (std::regex_match(path[1], revRegex)) if (std::regex_match(path[1], revRegex))
@ -28,21 +26,29 @@ struct IndirectInputScheme : InputScheme
else else
throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]); throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]);
} else if (path.size() == 3) { } else if (path.size() == 3) {
if (!std::regex_match(path[1], refRegex))
throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]);
ref = path[1]; ref = path[1];
if (!std::regex_match(path[2], revRegex))
throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]);
rev = Hash::parseAny(path[2], htSHA1); rev = Hash::parseAny(path[2], htSHA1);
} else } else
throw BadURL("GitHub URL '%s' is invalid", url.url); throw BadURL("GitHub URL '%s' is invalid", url.url);
std::string id = path[0]; std::string id = path[0];
if (!std::regex_match(id, flakeRegex))
throw BadURL("'%s' is not a valid flake ID", id);
attrs.emplace("type", "indirect"); // FIXME: forbid query params?
attrs.emplace("id", id);
if (rev) attrs.emplace("rev", rev->gitRev());
if (ref) attrs.emplace("ref", *ref);
emplaceURLQueryIntoAttrs(url, attrs, {}, {}); Input input;
input.direct = false;
input.attrs.insert_or_assign("type", "indirect");
input.attrs.insert_or_assign("id", id);
if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
if (ref) input.attrs.insert_or_assign("ref", *ref);
return inputFromAttrs(attrs); return input;
} }
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
@ -57,18 +63,6 @@ struct IndirectInputScheme : InputScheme
if (!std::regex_match(id, flakeRegex)) if (!std::regex_match(id, flakeRegex))
throw BadURL("'%s' is not a valid flake ID", id); throw BadURL("'%s' is not a valid flake ID", id);
// TODO come up with a nicer error message for those two.
if (auto rev = maybeGetStrAttr(attrs, "rev")) {
if (!std::regex_match(*rev, revRegex)) {
throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev);
}
}
if (auto ref = maybeGetStrAttr(attrs, "ref")) {
if (!std::regex_match(*ref, refRegex)) {
throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref);
}
}
Input input; Input input;
input.direct = false; input.direct = false;
input.attrs = attrs; input.attrs = attrs;

View file

@ -57,7 +57,12 @@ struct MercurialInputScheme : InputScheme
Attrs attrs; Attrs attrs;
attrs.emplace("type", "hg"); attrs.emplace("type", "hg");
emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); for (auto &[name, value] : url.query) {
if (name == "rev" || name == "ref")
attrs.emplace(name, value);
else
url2.query.emplace(name, value);
}
attrs.emplace("url", url2.to_string()); attrs.emplace("url", url2.to_string());

View file

@ -201,17 +201,29 @@ struct CurlInputScheme : InputScheme
if (!isValidURL(_url, requireTree)) if (!isValidURL(_url, requireTree))
return std::nullopt; return std::nullopt;
auto url = _url; Input input;
Attrs attrs; auto url = _url;
attrs.emplace("type", inputType());
url.scheme = parseUrlScheme(url.scheme).transport; url.scheme = parseUrlScheme(url.scheme).transport;
emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {}); auto narHash = url.query.find("narHash");
if (narHash != url.query.end())
input.attrs.insert_or_assign("narHash", narHash->second);
attrs.emplace("url", url.to_string()); if (auto i = get(url.query, "rev"))
return inputFromAttrs(attrs); input.attrs.insert_or_assign("rev", *i);
if (auto i = get(url.query, "revCount"))
if (auto n = string2Int<uint64_t>(*i))
input.attrs.insert_or_assign("revCount", *n);
url.query.erase("rev");
url.query.erase("revCount");
input.attrs.insert_or_assign("type", inputType());
input.attrs.insert_or_assign("url", url.to_string());
return input;
} }
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
@ -223,7 +235,7 @@ struct CurlInputScheme : InputScheme
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"}; std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"};
for (auto & [name, value] : attrs) for (auto & [name, value] : attrs)
if (!allowedNames.count(name)) if (!allowedNames.count(name))
throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name); throw Error("unsupported %s input attribute '%s'", *type, name);
Input input; Input input;
input.attrs = attrs; input.attrs = attrs;

View file

@ -1,91 +0,0 @@
source common.sh
requireGit
clearStore
testFetchTreeError() {
rawFetchTreeArg="${1?fetchTree arg missing}"
messageSubstring="${2?messageSubstring missing}"
output="$(nix eval --impure --raw --expr "(builtins.fetchTree $rawFetchTreeArg).outPath" 2>&1)" && status=0 || status=$?
grepQuiet "$messageSubstring" <<<"$output"
test "$status" -ne 0
}
# github/gitlab/sourcehut fetcher input validation
for provider in github gitlab sourcehut; do
# ref/rev validation
testFetchTreeError \
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; ref = \",\"; }" \
"URL '$provider:foo/bar' contains an invalid branch/tag name"
testFetchTreeError \
"\"$provider://host/foo/bar/,\"" \
"URL '$provider:foo/bar', ',' is not a commit hash or a branch/tag name"
testFetchTreeError \
"\"$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc\"" \
"URL '$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc' already contains a ref or rev"
testFetchTreeError \
"\"$provider://host/foo/bar/ref?ref=ref2\"" \
"URL '$provider://host/foo/bar/ref?ref=ref2' already contains a ref or rev"
# host validation
testFetchTreeError \
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; host = \"git_hub.com\"; }" \
"URL '$provider:foo/bar' contains an invalid instance host"
testFetchTreeError \
"\"$provider://host/foo/bar/ref?host=git_hub.com\"" \
"URL '$provider:foo/bar' contains an invalid instance host"
# invalid attributes
testFetchTreeError \
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; wrong = true; }" \
"unsupported input attribute 'wrong'"
testFetchTreeError \
"\"$provider://host/foo/bar/ref?wrong=1\"" \
"unsupported input attribute 'wrong'"
done
# unsupported attributes w/ tarball fetcher
testFetchTreeError \
"\"https://host/foo?wrong=1\"" \
"unsupported tarball input attribute 'wrong'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }"
# test for unsupported attributes / validation in git fetcher
testFetchTreeError \
"\"git+https://github.com/owner/repo?invalid=1\"" \
"unsupported Git input attribute 'invalid'"
testFetchTreeError \
"\"git+https://github.com/owner/repo?url=foo\"" \
"URL 'git+https://github.com/owner/repo?url=foo' must not override url via query param!"
testFetchTreeError \
"\"git+https://github.com/owner/repo?ref=foo.lock\"" \
"invalid Git branch/tag name 'foo.lock'"
testFetchTreeError \
"{ type = \"git\"; url =\"https://github.com/owner/repo\"; ref = \"foo.lock\"; }" \
"invalid Git branch/tag name 'foo.lock'"
# same for mercurial
testFetchTreeError \
"\"hg+https://forge.tld/owner/repo?invalid=1\"" \
"unsupported Mercurial input attribute 'invalid'"
testFetchTreeError \
"{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; invalid = 1; }" \
"unsupported Mercurial input attribute 'invalid'"
testFetchTreeError \
"\"hg+https://forge.tld/owner/repo?ref=,\"" \
"invalid Mercurial branch/tag name ','"
testFetchTreeError \
"{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; ref = \",\"; }" \
"invalid Mercurial branch/tag name ','"

View file

@ -93,7 +93,6 @@ functional_tests_scripts = [
'fetchGitRefs.sh', 'fetchGitRefs.sh',
'gc-runtime.sh', 'gc-runtime.sh',
'tarball.sh', 'tarball.sh',
'fetchers.sh',
'fetchGit.sh', 'fetchGit.sh',
'fetchurl.sh', 'fetchurl.sh',
'fetchPath.sh', 'fetchPath.sh',

View file

@ -69,7 +69,7 @@ in
# Check that we got redirected to the immutable URL. # Check that we got redirected to the immutable URL.
locked_url = info["locked"]["url"] locked_url = info["locked"]["url"]
assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz"
# Check that we got the rev and revCount attributes. # Check that we got the rev and revCount attributes.
revision = info["revision"] revision = info["revision"]