Reapply "libfetchers: make attribute / URL query handling consistent"

The original attempt at this introduced a regression; this commit
reverts the revert and fixes the regression.

This reverts commit 3e151d4d77.

Fix to the regression:

flakeref: fix handling of `?dir=` param for flakes in subdirs

As reported in #419[1], accessing a flake in a subdir of a Git
repository fails with the previous commit[2] applied with the error

    error: unsupported Git input attribute 'dir'

The problem is that the `dir`-param is inserted into the parsed URL if a
flake is fetched from the subdir of a Git repository. However, for the
fetching part this isn't even needed. The fix is to just pass `subdir`
as second argument to `FlakeRef` (which needs a `basedir` that can be
empty) and leave the parsedURL as-is.

Added a regression test to make sure we don't run into this again.

[1] https://git.lix.systems/lix-project/lix/issues/419
[2] e22172aaf6b6a366cecd3c025590e68fa2b91bcc,
    originally 3e151d4d77

Change-Id: I2c72d5a32e406a7ca308e271730bd0af01c5d18b
This commit is contained in:
Maximilian Bosch 2024-06-25 12:31:52 +02:00 committed by Jade Lovelace
parent 6abad7cb23
commit 87fd1f024c
11 changed files with 257 additions and 98 deletions

View file

@ -169,14 +169,13 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
if (subdir != "") { if (subdir != "") {
if (parsedURL.query.count("dir")) if (parsedURL.query.count("dir"))
throw Error("flake URL '%s' has an inconsistent 'dir' parameter", url); throw Error("flake URL '%s' has an inconsistent 'dir' parameter", url);
parsedURL.query.insert_or_assign("dir", subdir);
} }
if (pathExists(flakeRoot + "/.git/shallow")) if (pathExists(flakeRoot + "/.git/shallow"))
parsedURL.query.insert_or_assign("shallow", "1"); parsedURL.query.insert_or_assign("shallow", "1");
return std::make_pair( return std::make_pair(
FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")), FlakeRef(Input::fromURL(parsedURL, isFlake), subdir),
fragment); fragment);
} }
@ -204,7 +203,13 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
std::string fragment; std::string fragment;
std::swap(fragment, parsedURL.fragment); std::swap(fragment, parsedURL.fragment);
auto input = Input::fromURL(parsedURL, isFlake); // This has a special meaning for flakes and must not be passed to libfetchers.
// Of course this means that libfetchers cannot have fetchers
// expecting an argument `dir` 🫠
ParsedURL urlForFetchers(parsedURL);
urlForFetchers.query.erase("dir");
auto input = Input::fromURL(urlForFetchers, isFlake);
input.parent = baseDir; input.parent = baseDir;
return std::make_pair( return std::make_pair(

View file

@ -159,6 +159,37 @@ struct InputScheme
std::optional<std::string> commitMsg) const; std::optional<std::string> commitMsg) const;
virtual std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) = 0; virtual std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) = 0;
protected:
void emplaceURLQueryIntoAttrs(
const ParsedURL & parsedURL,
Attrs & attrs,
const StringSet & numericParams,
const StringSet & booleanParams) const
{
for (auto &[name, value] : parsedURL.query) {
if (name == "url") {
throw BadURL(
"URL '%s' must not override url via query param!",
parsedURL.to_string()
);
} else if (numericParams.count(name) != 0) {
if (auto n = string2Int<uint64_t>(value)) {
attrs.insert_or_assign(name, *n);
} else {
throw BadURL(
"URL '%s' has non-numeric parameter '%s'",
parsedURL.to_string(),
name
);
}
} else if (booleanParams.count(name) != 0) {
attrs.emplace(name, Explicit<bool> { value == "1" });
} else {
attrs.emplace(name, value);
}
}
}
}; };
void registerInputScheme(std::shared_ptr<InputScheme> && fetcher); void registerInputScheme(std::shared_ptr<InputScheme> && fetcher);

View file

@ -273,18 +273,15 @@ struct GitInputScheme : InputScheme
Attrs attrs; Attrs attrs;
attrs.emplace("type", "git"); attrs.emplace("type", "git");
for (auto & [name, value] : url.query) {
if (name == "rev" || name == "ref")
attrs.emplace(name, value);
else if (name == "shallow" || name == "submodules" || name == "allRefs")
attrs.emplace(name, Explicit<bool> { value == "1" });
else
url2.query.emplace(name, value);
}
attrs.emplace("url", url2.to_string()); attrs.emplace("url", url2.to_string());
emplaceURLQueryIntoAttrs(
url,
attrs,
{"lastModified", "revCount"},
{"shallow", "submodules", "allRefs"}
);
return inputFromAttrs(attrs); return inputFromAttrs(attrs);
} }

View file

@ -1,3 +1,4 @@
#include "attrs.hh"
#include "filetransfer.hh" #include "filetransfer.hh"
#include "cache.hh" #include "cache.hh"
#include "globals.hh" #include "globals.hh"
@ -36,18 +37,11 @@ struct GitArchiveInputScheme : InputScheme
auto path = tokenizeString<std::vector<std::string>>(url.path, "/"); auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
std::optional<Hash> rev; std::optional<std::string> refOrRev;
std::optional<std::string> ref;
std::optional<std::string> host_url;
auto size = path.size(); auto size = path.size();
if (size == 3) { if (size == 3) {
if (std::regex_match(path[2], revRegex)) refOrRev = path[2];
rev = Hash::parseAny(path[2], htSHA1);
else if (std::regex_match(path[2], refRegex))
ref = path[2];
else
throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]);
} else if (size > 3) { } else if (size > 3) {
std::string rs; std::string rs;
for (auto i = std::next(path.begin(), 2); i != path.end(); i++) { for (auto i = std::next(path.begin(), 2); i != path.end(); i++) {
@ -58,61 +52,91 @@ struct GitArchiveInputScheme : InputScheme
} }
if (std::regex_match(rs, refRegex)) { if (std::regex_match(rs, refRegex)) {
ref = rs; refOrRev = rs;
} else { } else {
throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs); throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs);
} }
} else if (size < 2) } else if (size < 2)
throw BadURL("URL '%s' is invalid", url.url); throw BadURL("URL '%s' is invalid", url.url);
Attrs attrs;
attrs.emplace("type", type());
attrs.emplace("owner", path[0]);
attrs.emplace("repo", path[1]);
for (auto &[name, value] : url.query) { for (auto &[name, value] : url.query) {
if (name == "rev") { if (name == "rev" || name == "ref") {
if (rev) if (refOrRev) {
throw BadURL("URL '%s' contains multiple commit hashes", url.url); throw BadURL("URL '%s' already contains a ref or rev", url.url);
rev = Hash::parseAny(value, htSHA1); } else {
refOrRev = value;
} }
else if (name == "ref") { } else if (name == "lastModified") {
if (!std::regex_match(value, refRegex)) if (auto n = string2Int<uint64_t>(value)) {
throw BadURL("URL '%s' contains an invalid branch/tag name", url.url); attrs.emplace(name, *n);
if (ref) } else {
throw BadURL("URL '%s' contains multiple branch/tag names", url.url); throw Error(
ref = value; "Attribute 'lastModified' in URL '%s' must be an integer",
url.to_string()
);
} }
else if (name == "host") { } else {
if (!std::regex_match(value, hostRegex)) attrs.emplace(name, value);
throw BadURL("URL '%s' contains an invalid instance host", url.url);
host_url = value;
} }
// FIXME: barf on unsupported attributes
} }
if (ref && rev) if (refOrRev) attrs.emplace("refOrRev", *refOrRev);
throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev());
Input input; return inputFromAttrs(attrs);
input.attrs.insert_or_assign("type", type());
input.attrs.insert_or_assign("owner", path[0]);
input.attrs.insert_or_assign("repo", path[1]);
if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
if (ref) input.attrs.insert_or_assign("ref", *ref);
if (host_url) input.attrs.insert_or_assign("host", *host_url);
return input;
} }
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
{ {
if (maybeGetStrAttr(attrs, "type") != type()) return {}; // Attributes can contain refOrRev and it needs to be figured out
// which one it is (see inputFromURL for when that may happen).
// The correct one (ref or rev) will be written into finalAttrs and
// it needs to be mutable for that.
Attrs finalAttrs(attrs);
auto type_ = maybeGetStrAttr(finalAttrs, "type");
if (type_ != type()) return {};
for (auto & [name, value] : attrs) auto owner = getStrAttr(finalAttrs, "owner");
if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") auto repo = getStrAttr(finalAttrs, "repo");
auto url = fmt("%s:%s/%s", *type_, owner, repo);
if (auto host = maybeGetStrAttr(finalAttrs, "host")) {
if (!std::regex_match(*host, hostRegex)) {
throw BadURL("URL '%s' contains an invalid instance host", url);
}
}
if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) {
finalAttrs.erase("refOrRev");
if (std::regex_match(*refOrRev, revRegex)) {
finalAttrs.emplace("rev", *refOrRev);
} else if (std::regex_match(*refOrRev, refRegex)) {
finalAttrs.emplace("ref", *refOrRev);
} else {
throw Error(
"in URL '%s', '%s' is not a commit hash or a branch/tag name",
url,
*refOrRev
);
}
} else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) {
if (!std::regex_match(*ref, refRegex)) {
throw BadURL("URL '%s' contains an invalid branch/tag name", url);
}
}
for (auto & [name, value] : finalAttrs) {
if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") {
throw Error("unsupported input attribute '%s'", name); throw Error("unsupported input attribute '%s'", name);
}
getStrAttr(attrs, "owner"); }
getStrAttr(attrs, "repo");
Input input; Input input;
input.attrs = attrs; input.attrs = finalAttrs;
return input; return input;
} }

View file

@ -17,6 +17,8 @@ struct IndirectInputScheme : InputScheme
std::optional<Hash> rev; std::optional<Hash> rev;
std::optional<std::string> ref; std::optional<std::string> ref;
Attrs attrs;
if (path.size() == 1) { if (path.size() == 1) {
} else if (path.size() == 2) { } else if (path.size() == 2) {
if (std::regex_match(path[1], revRegex)) if (std::regex_match(path[1], revRegex))
@ -26,29 +28,21 @@ struct IndirectInputScheme : InputScheme
else else
throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]); throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]);
} else if (path.size() == 3) { } else if (path.size() == 3) {
if (!std::regex_match(path[1], refRegex))
throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]);
ref = path[1]; ref = path[1];
if (!std::regex_match(path[2], revRegex))
throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]);
rev = Hash::parseAny(path[2], htSHA1); rev = Hash::parseAny(path[2], htSHA1);
} else } else
throw BadURL("GitHub URL '%s' is invalid", url.url); throw BadURL("GitHub URL '%s' is invalid", url.url);
std::string id = path[0]; std::string id = path[0];
if (!std::regex_match(id, flakeRegex))
throw BadURL("'%s' is not a valid flake ID", id);
// FIXME: forbid query params? attrs.emplace("type", "indirect");
attrs.emplace("id", id);
if (rev) attrs.emplace("rev", rev->gitRev());
if (ref) attrs.emplace("ref", *ref);
Input input; emplaceURLQueryIntoAttrs(url, attrs, {}, {});
input.direct = false;
input.attrs.insert_or_assign("type", "indirect");
input.attrs.insert_or_assign("id", id);
if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
if (ref) input.attrs.insert_or_assign("ref", *ref);
return input; return inputFromAttrs(attrs);
} }
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
@ -63,6 +57,18 @@ struct IndirectInputScheme : InputScheme
if (!std::regex_match(id, flakeRegex)) if (!std::regex_match(id, flakeRegex))
throw BadURL("'%s' is not a valid flake ID", id); throw BadURL("'%s' is not a valid flake ID", id);
// TODO come up with a nicer error message for those two.
if (auto rev = maybeGetStrAttr(attrs, "rev")) {
if (!std::regex_match(*rev, revRegex)) {
throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev);
}
}
if (auto ref = maybeGetStrAttr(attrs, "ref")) {
if (!std::regex_match(*ref, refRegex)) {
throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref);
}
}
Input input; Input input;
input.direct = false; input.direct = false;
input.attrs = attrs; input.attrs = attrs;

View file

@ -56,12 +56,7 @@ struct MercurialInputScheme : InputScheme
Attrs attrs; Attrs attrs;
attrs.emplace("type", "hg"); attrs.emplace("type", "hg");
for (auto &[name, value] : url.query) { emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {});
if (name == "rev" || name == "ref")
attrs.emplace(name, value);
else
url2.query.emplace(name, value);
}
attrs.emplace("url", url2.to_string()); attrs.emplace("url", url2.to_string());

View file

@ -201,29 +201,17 @@ struct CurlInputScheme : InputScheme
if (!isValidURL(_url, requireTree)) if (!isValidURL(_url, requireTree))
return std::nullopt; return std::nullopt;
Input input;
auto url = _url; auto url = _url;
Attrs attrs;
attrs.emplace("type", inputType());
url.scheme = parseUrlScheme(url.scheme).transport; url.scheme = parseUrlScheme(url.scheme).transport;
auto narHash = url.query.find("narHash"); emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {});
if (narHash != url.query.end())
input.attrs.insert_or_assign("narHash", narHash->second);
if (auto i = get(url.query, "rev")) attrs.emplace("url", url.to_string());
input.attrs.insert_or_assign("rev", *i); return inputFromAttrs(attrs);
if (auto i = get(url.query, "revCount"))
if (auto n = string2Int<uint64_t>(*i))
input.attrs.insert_or_assign("revCount", *n);
url.query.erase("rev");
url.query.erase("revCount");
input.attrs.insert_or_assign("type", inputType());
input.attrs.insert_or_assign("url", url.to_string());
return input;
} }
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
@ -235,7 +223,7 @@ struct CurlInputScheme : InputScheme
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"}; std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"};
for (auto & [name, value] : attrs) for (auto & [name, value] : attrs)
if (!allowedNames.count(name)) if (!allowedNames.count(name))
throw Error("unsupported %s input attribute '%s'", *type, name); throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name);
Input input; Input input;
input.attrs = attrs; input.attrs = attrs;

View file

@ -0,0 +1,91 @@
source common.sh
requireGit
clearStore
testFetchTreeError() {
rawFetchTreeArg="${1?fetchTree arg missing}"
messageSubstring="${2?messageSubstring missing}"
output="$(nix eval --impure --raw --expr "(builtins.fetchTree $rawFetchTreeArg).outPath" 2>&1)" && status=0 || status=$?
grepQuiet "$messageSubstring" <<<"$output"
test "$status" -ne 0
}
# github/gitlab/sourcehut fetcher input validation
for provider in github gitlab sourcehut; do
# ref/rev validation
testFetchTreeError \
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; ref = \",\"; }" \
"URL '$provider:foo/bar' contains an invalid branch/tag name"
testFetchTreeError \
"\"$provider://host/foo/bar/,\"" \
"URL '$provider:foo/bar', ',' is not a commit hash or a branch/tag name"
testFetchTreeError \
"\"$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc\"" \
"URL '$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc' already contains a ref or rev"
testFetchTreeError \
"\"$provider://host/foo/bar/ref?ref=ref2\"" \
"URL '$provider://host/foo/bar/ref?ref=ref2' already contains a ref or rev"
# host validation
testFetchTreeError \
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; host = \"git_hub.com\"; }" \
"URL '$provider:foo/bar' contains an invalid instance host"
testFetchTreeError \
"\"$provider://host/foo/bar/ref?host=git_hub.com\"" \
"URL '$provider:foo/bar' contains an invalid instance host"
# invalid attributes
testFetchTreeError \
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; wrong = true; }" \
"unsupported input attribute 'wrong'"
testFetchTreeError \
"\"$provider://host/foo/bar/ref?wrong=1\"" \
"unsupported input attribute 'wrong'"
done
# unsupported attributes w/ tarball fetcher
testFetchTreeError \
"\"https://host/foo?wrong=1\"" \
"unsupported tarball input attribute 'wrong'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }"
# test for unsupported attributes / validation in git fetcher
testFetchTreeError \
"\"git+https://github.com/owner/repo?invalid=1\"" \
"unsupported Git input attribute 'invalid'"
testFetchTreeError \
"\"git+https://github.com/owner/repo?url=foo\"" \
"URL 'git+https://github.com/owner/repo?url=foo' must not override url via query param!"
testFetchTreeError \
"\"git+https://github.com/owner/repo?ref=foo.lock\"" \
"invalid Git branch/tag name 'foo.lock'"
testFetchTreeError \
"{ type = \"git\"; url =\"https://github.com/owner/repo\"; ref = \"foo.lock\"; }" \
"invalid Git branch/tag name 'foo.lock'"
# same for mercurial
testFetchTreeError \
"\"hg+https://forge.tld/owner/repo?invalid=1\"" \
"unsupported Mercurial input attribute 'invalid'"
testFetchTreeError \
"{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; invalid = 1; }" \
"unsupported Mercurial input attribute 'invalid'"
testFetchTreeError \
"\"hg+https://forge.tld/owner/repo?ref=,\"" \
"invalid Mercurial branch/tag name ','"
testFetchTreeError \
"{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; ref = \",\"; }" \
"invalid Mercurial branch/tag name ','"

View file

@ -0,0 +1,20 @@
source common.sh
requireGit
clearStore
container="$TEST_HOME"/flake-container
flake_dir="$container"/flake-dir
createGitRepo "$container"
mkdir -p "$flake_dir"
writeSimpleFlake "$flake_dir"
git -C "$container" add flake-dir
pushd "$flake_dir" &>/dev/null
info="$(nix flake info --json)"
[[ "$(jq -r '.resolvedUrl' <<<"$info")" == git+file://*/flake-container?dir=flake-dir ]]
[[ "$(jq -r '.url' <<<"$info")" == git+file://*/flake-container?dir=flake-dir ]]
# Make sure we can actually access & build stuff in this flake.
nix build "path:$flake_dir#foo" -L
popd &>/dev/null

View file

@ -71,6 +71,7 @@ functional_tests_scripts = [
'flakes/build-paths.sh', 'flakes/build-paths.sh',
'flakes/flake-registry.sh', 'flakes/flake-registry.sh',
'flakes/flake-in-submodule.sh', 'flakes/flake-in-submodule.sh',
'flakes/subdir-flake.sh',
'gc.sh', 'gc.sh',
'nix-collect-garbage-d.sh', 'nix-collect-garbage-d.sh',
'nix-collect-garbage-dry-run.sh', 'nix-collect-garbage-dry-run.sh',
@ -94,6 +95,7 @@ functional_tests_scripts = [
'fetchGitRefs.sh', 'fetchGitRefs.sh',
'gc-runtime.sh', 'gc-runtime.sh',
'tarball.sh', 'tarball.sh',
'fetchers.sh',
'fetchGit.sh', 'fetchGit.sh',
'fetchurl.sh', 'fetchurl.sh',
'fetchPath.sh', 'fetchPath.sh',

View file

@ -69,7 +69,7 @@ in
# Check that we got redirected to the immutable URL. # Check that we got redirected to the immutable URL.
locked_url = info["locked"]["url"] locked_url = info["locked"]["url"]
assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz" assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz"
# Check that we got the rev and revCount attributes. # Check that we got the rev and revCount attributes.
revision = info["revision"] revision = info["revision"]