libfetchers: make attribute / URL query handling consistent
The original idea was to fix lix#174, but for a user friendly solution, I figured that we'd need more consistency: * Invalid query params will cause an error, just like invalid attributes. This has the following two consequences: * The `?dir=`-param from flakes will be removed before the URL to be fetched is passed to libfetchers. * The tarball fetcher doesn't allow URLs with custom query params anymore. I think this was questionable anyways given that an arbitrary set of query params was silently removed from the URL you wanted to fetch. The correct way is to use an attribute-set with a key `url` that contains the tarball URL to fetch. * Same for the git & mercurial fetchers: in that case it doesn't even matter though: both fetchers added unused query params to the URL that's passed from the input scheme to the fetcher (`url2` in the code). It turns out that this was never used since the query parameters were erased again in `getActualUrl`. * Validation happens for both attributes and URLs. Previously, a lot of fetchers validated e.g. refs/revs only when specified in a URL and the validity of attribute names only in `inputFromAttrs`. Now, all the validation is done in `inputFromAttrs` and `inputFromURL` constructs attributes that will be passed to `inputFromAttrs`. * Accept all attributes as URL query parameters. That also includes lesser used ones such as `narHash`. And "output" attributes like `lastModified`: these could be declared already when declaring inputs as attribute rather than URL. Now the behavior is at least consistent. Personally, I think we should differentiate in the future between "fetched input" (basically the attr-set that ends up in the lock-file) and "unfetched input" earlier: both inputFrom{Attrs,URL} entrypoints are probably OK for unfetched inputs, but for locked/fetched inputs a custom entrypoint should be used. Then, the current entrypoints wouldn't have to allow these attributes anymore. Change-Id: I1be1992249f7af8287cfc37891ab505ddaa2e8cd
This commit is contained in:
parent
21865ccce0
commit
35eec921af
10 changed files with 235 additions and 96 deletions
|
@ -204,7 +204,13 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
|
|||
std::string fragment;
|
||||
std::swap(fragment, parsedURL.fragment);
|
||||
|
||||
auto input = Input::fromURL(parsedURL, isFlake);
|
||||
// This has a special meaning for flakes and must not be passed to libfetchers.
|
||||
// Of course this means that libfetchers cannot have fetchers
|
||||
// expecting an argument `dir` 🫠
|
||||
ParsedURL urlForFetchers(parsedURL);
|
||||
urlForFetchers.query.erase("dir");
|
||||
|
||||
auto input = Input::fromURL(urlForFetchers, isFlake);
|
||||
input.parent = baseDir;
|
||||
|
||||
return std::make_pair(
|
||||
|
|
|
@ -159,6 +159,37 @@ struct InputScheme
|
|||
std::optional<std::string> commitMsg) const;
|
||||
|
||||
virtual std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) = 0;
|
||||
|
||||
protected:
|
||||
void emplaceURLQueryIntoAttrs(
|
||||
const ParsedURL & parsedURL,
|
||||
Attrs & attrs,
|
||||
const StringSet & numericParams,
|
||||
const StringSet & booleanParams) const
|
||||
{
|
||||
for (auto &[name, value] : parsedURL.query) {
|
||||
if (name == "url") {
|
||||
throw BadURL(
|
||||
"URL '%s' must not override url via query param!",
|
||||
parsedURL.to_string()
|
||||
);
|
||||
} else if (numericParams.count(name) != 0) {
|
||||
if (auto n = string2Int<uint64_t>(value)) {
|
||||
attrs.insert_or_assign(name, *n);
|
||||
} else {
|
||||
throw BadURL(
|
||||
"URL '%s' has non-numeric parameter '%s'",
|
||||
parsedURL.to_string(),
|
||||
name
|
||||
);
|
||||
}
|
||||
} else if (booleanParams.count(name) != 0) {
|
||||
attrs.emplace(name, Explicit<bool> { value == "1" });
|
||||
} else {
|
||||
attrs.emplace(name, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void registerInputScheme(std::shared_ptr<InputScheme> && fetcher);
|
||||
|
|
|
@ -273,18 +273,15 @@ struct GitInputScheme : InputScheme
|
|||
|
||||
Attrs attrs;
|
||||
attrs.emplace("type", "git");
|
||||
|
||||
for (auto & [name, value] : url.query) {
|
||||
if (name == "rev" || name == "ref")
|
||||
attrs.emplace(name, value);
|
||||
else if (name == "shallow" || name == "submodules" || name == "allRefs")
|
||||
attrs.emplace(name, Explicit<bool> { value == "1" });
|
||||
else
|
||||
url2.query.emplace(name, value);
|
||||
}
|
||||
|
||||
attrs.emplace("url", url2.to_string());
|
||||
|
||||
emplaceURLQueryIntoAttrs(
|
||||
url,
|
||||
attrs,
|
||||
{"lastModified", "revCount"},
|
||||
{"shallow", "submodules", "allRefs"}
|
||||
);
|
||||
|
||||
return inputFromAttrs(attrs);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "attrs.hh"
|
||||
#include "filetransfer.hh"
|
||||
#include "cache.hh"
|
||||
#include "globals.hh"
|
||||
|
@ -36,18 +37,11 @@ struct GitArchiveInputScheme : InputScheme
|
|||
|
||||
auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
|
||||
|
||||
std::optional<Hash> rev;
|
||||
std::optional<std::string> ref;
|
||||
std::optional<std::string> host_url;
|
||||
std::optional<std::string> refOrRev;
|
||||
|
||||
auto size = path.size();
|
||||
if (size == 3) {
|
||||
if (std::regex_match(path[2], revRegex))
|
||||
rev = Hash::parseAny(path[2], htSHA1);
|
||||
else if (std::regex_match(path[2], refRegex))
|
||||
ref = path[2];
|
||||
else
|
||||
throw BadURL("in URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[2]);
|
||||
refOrRev = path[2];
|
||||
} else if (size > 3) {
|
||||
std::string rs;
|
||||
for (auto i = std::next(path.begin(), 2); i != path.end(); i++) {
|
||||
|
@ -58,61 +52,91 @@ struct GitArchiveInputScheme : InputScheme
|
|||
}
|
||||
|
||||
if (std::regex_match(rs, refRegex)) {
|
||||
ref = rs;
|
||||
refOrRev = rs;
|
||||
} else {
|
||||
throw BadURL("in URL '%s', '%s' is not a branch/tag name", url.url, rs);
|
||||
}
|
||||
} else if (size < 2)
|
||||
throw BadURL("URL '%s' is invalid", url.url);
|
||||
|
||||
Attrs attrs;
|
||||
attrs.emplace("type", type());
|
||||
attrs.emplace("owner", path[0]);
|
||||
attrs.emplace("repo", path[1]);
|
||||
|
||||
for (auto &[name, value] : url.query) {
|
||||
if (name == "rev") {
|
||||
if (rev)
|
||||
throw BadURL("URL '%s' contains multiple commit hashes", url.url);
|
||||
rev = Hash::parseAny(value, htSHA1);
|
||||
if (name == "rev" || name == "ref") {
|
||||
if (refOrRev) {
|
||||
throw BadURL("URL '%s' already contains a ref or rev", url.url);
|
||||
} else {
|
||||
refOrRev = value;
|
||||
}
|
||||
else if (name == "ref") {
|
||||
if (!std::regex_match(value, refRegex))
|
||||
throw BadURL("URL '%s' contains an invalid branch/tag name", url.url);
|
||||
if (ref)
|
||||
throw BadURL("URL '%s' contains multiple branch/tag names", url.url);
|
||||
ref = value;
|
||||
} else if (name == "lastModified") {
|
||||
if (auto n = string2Int<uint64_t>(value)) {
|
||||
attrs.emplace(name, *n);
|
||||
} else {
|
||||
throw Error(
|
||||
"Attribute 'lastModified' in URL '%s' must be an integer",
|
||||
url.to_string()
|
||||
);
|
||||
}
|
||||
else if (name == "host") {
|
||||
if (!std::regex_match(value, hostRegex))
|
||||
throw BadURL("URL '%s' contains an invalid instance host", url.url);
|
||||
host_url = value;
|
||||
} else {
|
||||
attrs.emplace(name, value);
|
||||
}
|
||||
// FIXME: barf on unsupported attributes
|
||||
}
|
||||
|
||||
if (ref && rev)
|
||||
throw BadURL("URL '%s' contains both a commit hash and a branch/tag name %s %s", url.url, *ref, rev->gitRev());
|
||||
if (refOrRev) attrs.emplace("refOrRev", *refOrRev);
|
||||
|
||||
Input input;
|
||||
input.attrs.insert_or_assign("type", type());
|
||||
input.attrs.insert_or_assign("owner", path[0]);
|
||||
input.attrs.insert_or_assign("repo", path[1]);
|
||||
if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
|
||||
if (ref) input.attrs.insert_or_assign("ref", *ref);
|
||||
if (host_url) input.attrs.insert_or_assign("host", *host_url);
|
||||
|
||||
return input;
|
||||
return inputFromAttrs(attrs);
|
||||
}
|
||||
|
||||
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
|
||||
{
|
||||
if (maybeGetStrAttr(attrs, "type") != type()) return {};
|
||||
// Attributes can contain refOrRev and it needs to be figured out
|
||||
// which one it is (see inputFromURL for when that may happen).
|
||||
// The correct one (ref or rev) will be written into finalAttrs and
|
||||
// it needs to be mutable for that.
|
||||
Attrs finalAttrs(attrs);
|
||||
auto type_ = maybeGetStrAttr(finalAttrs, "type");
|
||||
if (type_ != type()) return {};
|
||||
|
||||
for (auto & [name, value] : attrs)
|
||||
if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host")
|
||||
auto owner = getStrAttr(finalAttrs, "owner");
|
||||
auto repo = getStrAttr(finalAttrs, "repo");
|
||||
|
||||
auto url = fmt("%s:%s/%s", *type_, owner, repo);
|
||||
if (auto host = maybeGetStrAttr(finalAttrs, "host")) {
|
||||
if (!std::regex_match(*host, hostRegex)) {
|
||||
throw BadURL("URL '%s' contains an invalid instance host", url);
|
||||
}
|
||||
}
|
||||
|
||||
if (auto refOrRev = maybeGetStrAttr(finalAttrs, "refOrRev")) {
|
||||
finalAttrs.erase("refOrRev");
|
||||
if (std::regex_match(*refOrRev, revRegex)) {
|
||||
finalAttrs.emplace("rev", *refOrRev);
|
||||
} else if (std::regex_match(*refOrRev, refRegex)) {
|
||||
finalAttrs.emplace("ref", *refOrRev);
|
||||
} else {
|
||||
throw Error(
|
||||
"in URL '%s', '%s' is not a commit hash or a branch/tag name",
|
||||
url,
|
||||
*refOrRev
|
||||
);
|
||||
}
|
||||
} else if (auto ref = maybeGetStrAttr(finalAttrs, "ref")) {
|
||||
if (!std::regex_match(*ref, refRegex)) {
|
||||
throw BadURL("URL '%s' contains an invalid branch/tag name", url);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto & [name, value] : finalAttrs) {
|
||||
if (name != "type" && name != "owner" && name != "repo" && name != "ref" && name != "rev" && name != "narHash" && name != "lastModified" && name != "host") {
|
||||
throw Error("unsupported input attribute '%s'", name);
|
||||
|
||||
getStrAttr(attrs, "owner");
|
||||
getStrAttr(attrs, "repo");
|
||||
}
|
||||
}
|
||||
|
||||
Input input;
|
||||
input.attrs = attrs;
|
||||
input.attrs = finalAttrs;
|
||||
return input;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@ struct IndirectInputScheme : InputScheme
|
|||
std::optional<Hash> rev;
|
||||
std::optional<std::string> ref;
|
||||
|
||||
Attrs attrs;
|
||||
|
||||
if (path.size() == 1) {
|
||||
} else if (path.size() == 2) {
|
||||
if (std::regex_match(path[1], revRegex))
|
||||
|
@ -26,29 +28,21 @@ struct IndirectInputScheme : InputScheme
|
|||
else
|
||||
throw BadURL("in flake URL '%s', '%s' is not a commit hash or branch/tag name", url.url, path[1]);
|
||||
} else if (path.size() == 3) {
|
||||
if (!std::regex_match(path[1], refRegex))
|
||||
throw BadURL("in flake URL '%s', '%s' is not a branch/tag name", url.url, path[1]);
|
||||
ref = path[1];
|
||||
if (!std::regex_match(path[2], revRegex))
|
||||
throw BadURL("in flake URL '%s', '%s' is not a commit hash", url.url, path[2]);
|
||||
rev = Hash::parseAny(path[2], htSHA1);
|
||||
} else
|
||||
throw BadURL("GitHub URL '%s' is invalid", url.url);
|
||||
|
||||
std::string id = path[0];
|
||||
if (!std::regex_match(id, flakeRegex))
|
||||
throw BadURL("'%s' is not a valid flake ID", id);
|
||||
|
||||
// FIXME: forbid query params?
|
||||
attrs.emplace("type", "indirect");
|
||||
attrs.emplace("id", id);
|
||||
if (rev) attrs.emplace("rev", rev->gitRev());
|
||||
if (ref) attrs.emplace("ref", *ref);
|
||||
|
||||
Input input;
|
||||
input.direct = false;
|
||||
input.attrs.insert_or_assign("type", "indirect");
|
||||
input.attrs.insert_or_assign("id", id);
|
||||
if (rev) input.attrs.insert_or_assign("rev", rev->gitRev());
|
||||
if (ref) input.attrs.insert_or_assign("ref", *ref);
|
||||
emplaceURLQueryIntoAttrs(url, attrs, {}, {});
|
||||
|
||||
return input;
|
||||
return inputFromAttrs(attrs);
|
||||
}
|
||||
|
||||
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
|
||||
|
@ -63,6 +57,18 @@ struct IndirectInputScheme : InputScheme
|
|||
if (!std::regex_match(id, flakeRegex))
|
||||
throw BadURL("'%s' is not a valid flake ID", id);
|
||||
|
||||
// TODO come up with a nicer error message for those two.
|
||||
if (auto rev = maybeGetStrAttr(attrs, "rev")) {
|
||||
if (!std::regex_match(*rev, revRegex)) {
|
||||
throw BadURL("in flake '%s', '%s' is not a commit hash", id, *rev);
|
||||
}
|
||||
}
|
||||
if (auto ref = maybeGetStrAttr(attrs, "ref")) {
|
||||
if (!std::regex_match(*ref, refRegex)) {
|
||||
throw BadURL("in flake '%s', '%s' is not a valid branch/tag name", id, *ref);
|
||||
}
|
||||
}
|
||||
|
||||
Input input;
|
||||
input.direct = false;
|
||||
input.attrs = attrs;
|
||||
|
|
|
@ -57,12 +57,7 @@ struct MercurialInputScheme : InputScheme
|
|||
Attrs attrs;
|
||||
attrs.emplace("type", "hg");
|
||||
|
||||
for (auto &[name, value] : url.query) {
|
||||
if (name == "rev" || name == "ref")
|
||||
attrs.emplace(name, value);
|
||||
else
|
||||
url2.query.emplace(name, value);
|
||||
}
|
||||
emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {});
|
||||
|
||||
attrs.emplace("url", url2.to_string());
|
||||
|
||||
|
|
|
@ -201,29 +201,17 @@ struct CurlInputScheme : InputScheme
|
|||
if (!isValidURL(_url, requireTree))
|
||||
return std::nullopt;
|
||||
|
||||
Input input;
|
||||
|
||||
auto url = _url;
|
||||
|
||||
Attrs attrs;
|
||||
attrs.emplace("type", inputType());
|
||||
|
||||
url.scheme = parseUrlScheme(url.scheme).transport;
|
||||
|
||||
auto narHash = url.query.find("narHash");
|
||||
if (narHash != url.query.end())
|
||||
input.attrs.insert_or_assign("narHash", narHash->second);
|
||||
emplaceURLQueryIntoAttrs(url, attrs, {"revCount"}, {});
|
||||
|
||||
if (auto i = get(url.query, "rev"))
|
||||
input.attrs.insert_or_assign("rev", *i);
|
||||
|
||||
if (auto i = get(url.query, "revCount"))
|
||||
if (auto n = string2Int<uint64_t>(*i))
|
||||
input.attrs.insert_or_assign("revCount", *n);
|
||||
|
||||
url.query.erase("rev");
|
||||
url.query.erase("revCount");
|
||||
|
||||
input.attrs.insert_or_assign("type", inputType());
|
||||
input.attrs.insert_or_assign("url", url.to_string());
|
||||
return input;
|
||||
attrs.emplace("url", url.to_string());
|
||||
return inputFromAttrs(attrs);
|
||||
}
|
||||
|
||||
std::optional<Input> inputFromAttrs(const Attrs & attrs) const override
|
||||
|
@ -235,7 +223,7 @@ struct CurlInputScheme : InputScheme
|
|||
std::set<std::string> allowedNames = {"type", "url", "narHash", "name", "unpack", "rev", "revCount", "lastModified"};
|
||||
for (auto & [name, value] : attrs)
|
||||
if (!allowedNames.count(name))
|
||||
throw Error("unsupported %s input attribute '%s'", *type, name);
|
||||
throw Error("unsupported %s input attribute '%s'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }'", *type, name);
|
||||
|
||||
Input input;
|
||||
input.attrs = attrs;
|
||||
|
|
91
tests/functional/fetchers.sh
Normal file
91
tests/functional/fetchers.sh
Normal file
|
@ -0,0 +1,91 @@
|
|||
source common.sh
|
||||
|
||||
requireGit
|
||||
|
||||
clearStore
|
||||
|
||||
testFetchTreeError() {
|
||||
rawFetchTreeArg="${1?fetchTree arg missing}"
|
||||
messageSubstring="${2?messageSubstring missing}"
|
||||
|
||||
output="$(nix eval --impure --raw --expr "(builtins.fetchTree $rawFetchTreeArg).outPath" 2>&1)" && status=0 || status=$?
|
||||
grepQuiet "$messageSubstring" <<<"$output"
|
||||
test "$status" -ne 0
|
||||
}
|
||||
|
||||
# github/gitlab/sourcehut fetcher input validation
|
||||
for provider in github gitlab sourcehut; do
|
||||
# ref/rev validation
|
||||
testFetchTreeError \
|
||||
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; ref = \",\"; }" \
|
||||
"URL '$provider:foo/bar' contains an invalid branch/tag name"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/,\"" \
|
||||
"URL '$provider:foo/bar', ',' is not a commit hash or a branch/tag name"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc\"" \
|
||||
"URL '$provider://host/foo/bar/f16d8f43dd0998cdb315a2cccf2e4d10027e7ca4?rev=abc' already contains a ref or rev"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/ref?ref=ref2\"" \
|
||||
"URL '$provider://host/foo/bar/ref?ref=ref2' already contains a ref or rev"
|
||||
|
||||
# host validation
|
||||
testFetchTreeError \
|
||||
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; host = \"git_hub.com\"; }" \
|
||||
"URL '$provider:foo/bar' contains an invalid instance host"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/ref?host=git_hub.com\"" \
|
||||
"URL '$provider:foo/bar' contains an invalid instance host"
|
||||
|
||||
# invalid attributes
|
||||
testFetchTreeError \
|
||||
"{ type = \"$provider\"; owner = \"foo\"; repo = \"bar\"; wrong = true; }" \
|
||||
"unsupported input attribute 'wrong'"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"$provider://host/foo/bar/ref?wrong=1\"" \
|
||||
"unsupported input attribute 'wrong'"
|
||||
done
|
||||
|
||||
# unsupported attributes w/ tarball fetcher
|
||||
testFetchTreeError \
|
||||
"\"https://host/foo?wrong=1\"" \
|
||||
"unsupported tarball input attribute 'wrong'. If you wanted to fetch a tarball with a query parameter, please use '{ type = \"tarball\"; url = \"...\"; }"
|
||||
|
||||
# test for unsupported attributes / validation in git fetcher
|
||||
testFetchTreeError \
|
||||
"\"git+https://github.com/owner/repo?invalid=1\"" \
|
||||
"unsupported Git input attribute 'invalid'"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"git+https://github.com/owner/repo?url=foo\"" \
|
||||
"URL 'git+https://github.com/owner/repo?url=foo' must not override url via query param!"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"git+https://github.com/owner/repo?ref=foo.lock\"" \
|
||||
"invalid Git branch/tag name 'foo.lock'"
|
||||
|
||||
testFetchTreeError \
|
||||
"{ type = \"git\"; url =\"https://github.com/owner/repo\"; ref = \"foo.lock\"; }" \
|
||||
"invalid Git branch/tag name 'foo.lock'"
|
||||
|
||||
# same for mercurial
|
||||
testFetchTreeError \
|
||||
"\"hg+https://forge.tld/owner/repo?invalid=1\"" \
|
||||
"unsupported Mercurial input attribute 'invalid'"
|
||||
|
||||
testFetchTreeError \
|
||||
"{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; invalid = 1; }" \
|
||||
"unsupported Mercurial input attribute 'invalid'"
|
||||
|
||||
testFetchTreeError \
|
||||
"\"hg+https://forge.tld/owner/repo?ref=,\"" \
|
||||
"invalid Mercurial branch/tag name ','"
|
||||
|
||||
testFetchTreeError \
|
||||
"{ type = \"hg\"; url = \"https://forge.tld/owner/repo\"; ref = \",\"; }" \
|
||||
"invalid Mercurial branch/tag name ','"
|
|
@ -93,6 +93,7 @@ functional_tests_scripts = [
|
|||
'fetchGitRefs.sh',
|
||||
'gc-runtime.sh',
|
||||
'tarball.sh',
|
||||
'fetchers.sh',
|
||||
'fetchGit.sh',
|
||||
'fetchurl.sh',
|
||||
'fetchPath.sh',
|
||||
|
|
|
@ -69,7 +69,7 @@ in
|
|||
|
||||
# Check that we got redirected to the immutable URL.
|
||||
locked_url = info["locked"]["url"]
|
||||
assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz"
|
||||
assert locked_url == "http://localhost/stable/${nixpkgs.rev}.tar.gz?rev=${nixpkgs.rev}&revCount=1234", f"{locked_url=} != http://localhost/stable/${nixpkgs.rev}.tar.gz"
|
||||
|
||||
# Check that we got the rev and revCount attributes.
|
||||
revision = info["revision"]
|
||||
|
|
Loading…
Reference in a new issue