Separate cgroup support from auto-uid-allocation
The new experimental feature 'cgroups' enables the use of cgroups for all builds. This allows better containment and enables setting resource limits and getting some build stats.
This commit is contained in:
parent
f1ab082ac4
commit
128910ba23
6 changed files with 93 additions and 108 deletions
|
@ -15,6 +15,7 @@
|
||||||
#include "topo-sort.hh"
|
#include "topo-sort.hh"
|
||||||
#include "callback.hh"
|
#include "callback.hh"
|
||||||
#include "json-utils.hh"
|
#include "json-utils.hh"
|
||||||
|
#include "cgroup.hh"
|
||||||
|
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
@ -129,26 +130,36 @@ void LocalDerivationGoal::killChild()
|
||||||
if (pid != -1) {
|
if (pid != -1) {
|
||||||
worker.childTerminated(this);
|
worker.childTerminated(this);
|
||||||
|
|
||||||
if (buildUser) {
|
/* If we're using a build user, then there is a tricky race
|
||||||
/* If we're using a build user, then there is a tricky
|
condition: if we kill the build user before the child has
|
||||||
race condition: if we kill the build user before the
|
done its setuid() to the build user uid, then it won't be
|
||||||
child has done its setuid() to the build user uid, then
|
killed, and we'll potentially lock up in pid.wait(). So
|
||||||
it won't be killed, and we'll potentially lock up in
|
also send a conventional kill to the child. */
|
||||||
pid.wait(). So also send a conventional kill to the
|
::kill(-pid, SIGKILL); /* ignore the result */
|
||||||
child. */
|
|
||||||
::kill(-pid, SIGKILL); /* ignore the result */
|
|
||||||
buildUser->kill();
|
|
||||||
pid.wait();
|
|
||||||
} else
|
|
||||||
pid.kill();
|
|
||||||
|
|
||||||
assert(pid == -1);
|
killSandbox();
|
||||||
|
|
||||||
|
pid.wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
DerivationGoal::killChild();
|
DerivationGoal::killChild();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void LocalDerivationGoal::killSandbox()
|
||||||
|
{
|
||||||
|
if (cgroup) {
|
||||||
|
destroyCgroup(*cgroup);
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (buildUser) {
|
||||||
|
auto uid = buildUser->getUID();
|
||||||
|
assert(uid != 0);
|
||||||
|
killUser(uid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void LocalDerivationGoal::tryLocalBuild() {
|
void LocalDerivationGoal::tryLocalBuild() {
|
||||||
unsigned int curBuilds = worker.getNrLocalBuilds();
|
unsigned int curBuilds = worker.getNrLocalBuilds();
|
||||||
if (curBuilds >= settings.maxBuildJobs) {
|
if (curBuilds >= settings.maxBuildJobs) {
|
||||||
|
@ -169,10 +180,6 @@ void LocalDerivationGoal::tryLocalBuild() {
|
||||||
worker.waitForAWhile(shared_from_this());
|
worker.waitForAWhile(shared_from_this());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Make sure that no other processes are executing under this
|
|
||||||
uid. */
|
|
||||||
buildUser->kill();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
actLock.reset();
|
actLock.reset();
|
||||||
|
@ -263,7 +270,7 @@ void LocalDerivationGoal::cleanupPostChildKill()
|
||||||
malicious user from leaving behind a process that keeps files
|
malicious user from leaving behind a process that keeps files
|
||||||
open and modifies them after they have been chown'ed to
|
open and modifies them after they have been chown'ed to
|
||||||
root. */
|
root. */
|
||||||
if (buildUser) buildUser->kill();
|
killSandbox();
|
||||||
|
|
||||||
/* Terminate the recursive Nix daemon. */
|
/* Terminate the recursive Nix daemon. */
|
||||||
stopDaemon();
|
stopDaemon();
|
||||||
|
@ -356,6 +363,55 @@ static void linkOrCopy(const Path & from, const Path & to)
|
||||||
|
|
||||||
void LocalDerivationGoal::startBuilder()
|
void LocalDerivationGoal::startBuilder()
|
||||||
{
|
{
|
||||||
|
if ((buildUser && buildUser->getUIDCount() != 1)
|
||||||
|
|| settings.isExperimentalFeatureEnabled(Xp::Cgroups))
|
||||||
|
{
|
||||||
|
#if __linux__
|
||||||
|
auto ourCgroups = getCgroups("/proc/self/cgroup");
|
||||||
|
auto ourCgroup = ourCgroups[""];
|
||||||
|
if (ourCgroup == "")
|
||||||
|
throw Error("cannot determine cgroup name from /proc/self/cgroup");
|
||||||
|
|
||||||
|
auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup);
|
||||||
|
|
||||||
|
if (!pathExists(ourCgroupPath))
|
||||||
|
throw Error("expected cgroup directory '%s'", ourCgroupPath);
|
||||||
|
|
||||||
|
static std::atomic<unsigned int> counter{0};
|
||||||
|
|
||||||
|
cgroup = buildUser
|
||||||
|
? fmt("%s/nix-build-uid-%d", ourCgroupPath, buildUser->getUID())
|
||||||
|
: fmt("%s/nix-build-pid-%d-%d", ourCgroupPath, getpid(), counter++);
|
||||||
|
|
||||||
|
debug("using cgroup '%s'", *cgroup);
|
||||||
|
|
||||||
|
/* When using a build user, record the cgroup we used for that
|
||||||
|
user so that if we got interrupted previously, we can kill
|
||||||
|
any left-over cgroup first. */
|
||||||
|
if (buildUser) {
|
||||||
|
auto cgroupsDir = settings.nixStateDir + "/cgroups";
|
||||||
|
createDirs(cgroupsDir);
|
||||||
|
|
||||||
|
auto cgroupFile = fmt("%s/%d", cgroupsDir, buildUser->getUID());
|
||||||
|
|
||||||
|
if (pathExists(cgroupFile)) {
|
||||||
|
auto prevCgroup = readFile(cgroupFile);
|
||||||
|
destroyCgroup(prevCgroup);
|
||||||
|
}
|
||||||
|
|
||||||
|
writeFile(cgroupFile, *cgroup);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
throw Error("cgroups are not supported on this platform");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make sure that no other processes are executing under the
|
||||||
|
sandbox uids. This must be done before any chownToBuilder()
|
||||||
|
calls. */
|
||||||
|
killSandbox();
|
||||||
|
|
||||||
/* Right platform? */
|
/* Right platform? */
|
||||||
if (!parsedDrv->canBuildLocally(worker.store))
|
if (!parsedDrv->canBuildLocally(worker.store))
|
||||||
throw Error("a '%s' with features {%s} is required to build '%s', but I am a '%s' with features {%s}",
|
throw Error("a '%s' with features {%s} is required to build '%s', but I am a '%s' with features {%s}",
|
||||||
|
@ -646,13 +702,13 @@ void LocalDerivationGoal::startBuilder()
|
||||||
dirsInChroot.erase(worker.store.printStorePath(*i.second.second));
|
dirsInChroot.erase(worker.store.printStorePath(*i.second.second));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buildUser) {
|
if (cgroup) {
|
||||||
if (auto cgroup = buildUser->getCgroup()) {
|
if (mkdir(cgroup->c_str(), 0755) != 0)
|
||||||
chownToBuilder(*cgroup);
|
throw SysError("creating cgroup '%s'", *cgroup);
|
||||||
chownToBuilder(*cgroup + "/cgroup.procs");
|
chownToBuilder(*cgroup);
|
||||||
chownToBuilder(*cgroup + "/cgroup.threads");
|
chownToBuilder(*cgroup + "/cgroup.procs");
|
||||||
//chownToBuilder(*cgroup + "/cgroup.subtree_control");
|
chownToBuilder(*cgroup + "/cgroup.threads");
|
||||||
}
|
//chownToBuilder(*cgroup + "/cgroup.subtree_control");
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
@ -965,10 +1021,8 @@ void LocalDerivationGoal::startBuilder()
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Move the child into its own cgroup. */
|
/* Move the child into its own cgroup. */
|
||||||
if (buildUser) {
|
if (cgroup)
|
||||||
if (auto cgroup = buildUser->getCgroup())
|
writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid));
|
||||||
writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Signal the builder that we've updated its user namespace. */
|
/* Signal the builder that we've updated its user namespace. */
|
||||||
writeFull(userNamespaceSync.writeSide.get(), "1");
|
writeFull(userNamespaceSync.writeSide.get(), "1");
|
||||||
|
@ -1838,7 +1892,7 @@ void LocalDerivationGoal::runChild()
|
||||||
/* Unshare the cgroup namespace. This means
|
/* Unshare the cgroup namespace. This means
|
||||||
/proc/self/cgroup will show the child's cgroup as '/'
|
/proc/self/cgroup will show the child's cgroup as '/'
|
||||||
rather than whatever it is in the parent. */
|
rather than whatever it is in the parent. */
|
||||||
if (buildUser && buildUser->getUIDCount() != 1 && unshare(CLONE_NEWCGROUP) == -1)
|
if (cgroup && unshare(CLONE_NEWCGROUP) == -1)
|
||||||
throw SysError("unsharing cgroup namespace");
|
throw SysError("unsharing cgroup namespace");
|
||||||
|
|
||||||
/* Do the chroot(). */
|
/* Do the chroot(). */
|
||||||
|
|
|
@ -15,6 +15,9 @@ struct LocalDerivationGoal : public DerivationGoal
|
||||||
/* The process ID of the builder. */
|
/* The process ID of the builder. */
|
||||||
Pid pid;
|
Pid pid;
|
||||||
|
|
||||||
|
/* The cgroup of the builder, if any. */
|
||||||
|
std::optional<Path> cgroup;
|
||||||
|
|
||||||
/* The temporary directory. */
|
/* The temporary directory. */
|
||||||
Path tmpDir;
|
Path tmpDir;
|
||||||
|
|
||||||
|
@ -197,6 +200,10 @@ struct LocalDerivationGoal : public DerivationGoal
|
||||||
/* Forcibly kill the child process, if any. */
|
/* Forcibly kill the child process, if any. */
|
||||||
void killChild() override;
|
void killChild() override;
|
||||||
|
|
||||||
|
/* Kill any processes running under the build user UID or in the
|
||||||
|
cgroup of the build. */
|
||||||
|
void killSandbox();
|
||||||
|
|
||||||
/* Create alternative path calculated from but distinct from the
|
/* Create alternative path calculated from but distinct from the
|
||||||
input, so we can avoid overwriting outputs (or other store paths)
|
input, so we can avoid overwriting outputs (or other store paths)
|
||||||
that already exist. */
|
that already exist. */
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#include "lock.hh"
|
#include "lock.hh"
|
||||||
#include "globals.hh"
|
#include "globals.hh"
|
||||||
#include "pathlocks.hh"
|
#include "pathlocks.hh"
|
||||||
#include "cgroup.hh"
|
|
||||||
|
|
||||||
#include <pwd.h>
|
#include <pwd.h>
|
||||||
#include <grp.h>
|
#include <grp.h>
|
||||||
|
@ -15,11 +14,6 @@ struct SimpleUserLock : UserLock
|
||||||
gid_t gid;
|
gid_t gid;
|
||||||
std::vector<gid_t> supplementaryGIDs;
|
std::vector<gid_t> supplementaryGIDs;
|
||||||
|
|
||||||
void kill() override
|
|
||||||
{
|
|
||||||
killUser(uid);
|
|
||||||
}
|
|
||||||
|
|
||||||
uid_t getUID() override { assert(uid); return uid; }
|
uid_t getUID() override { assert(uid); return uid; }
|
||||||
uid_t getUIDCount() override { return 1; }
|
uid_t getUIDCount() override { return 1; }
|
||||||
gid_t getGID() override { assert(gid); return gid; }
|
gid_t getGID() override { assert(gid); return gid; }
|
||||||
|
@ -116,32 +110,6 @@ struct AutoUserLock : UserLock
|
||||||
AutoCloseFD fdUserLock;
|
AutoCloseFD fdUserLock;
|
||||||
uid_t firstUid = 0;
|
uid_t firstUid = 0;
|
||||||
uid_t nrIds = 1;
|
uid_t nrIds = 1;
|
||||||
#if __linux__
|
|
||||||
std::optional<Path> cgroup;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
~AutoUserLock()
|
|
||||||
{
|
|
||||||
#if __linux__
|
|
||||||
// Get rid of our cgroup, ignoring errors.
|
|
||||||
if (cgroup) rmdir(cgroup->c_str());
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void kill() override
|
|
||||||
{
|
|
||||||
#if __linux__
|
|
||||||
if (cgroup) {
|
|
||||||
destroyCgroup(*cgroup);
|
|
||||||
if (mkdir(cgroup->c_str(), 0755) == -1)
|
|
||||||
throw SysError("creating cgroup '%s'", *cgroup);
|
|
||||||
} else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
assert(firstUid);
|
|
||||||
killUser(firstUid);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uid_t getUID() override { assert(firstUid); return firstUid; }
|
uid_t getUID() override { assert(firstUid); return firstUid; }
|
||||||
|
|
||||||
|
@ -183,55 +151,16 @@ struct AutoUserLock : UserLock
|
||||||
throw SysError("opening user lock '%s'", fnUserLock);
|
throw SysError("opening user lock '%s'", fnUserLock);
|
||||||
|
|
||||||
if (lockFile(fd.get(), ltWrite, false)) {
|
if (lockFile(fd.get(), ltWrite, false)) {
|
||||||
auto s = drainFD(fd.get());
|
|
||||||
|
|
||||||
#if __linux__
|
|
||||||
if (s != "") {
|
|
||||||
/* Kill the old cgroup, to ensure there are no
|
|
||||||
processes left over from an interrupted build. */
|
|
||||||
destroyCgroup(s);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (ftruncate(fd.get(), 0) == -1)
|
|
||||||
throw Error("truncating user lock");
|
|
||||||
|
|
||||||
auto lock = std::make_unique<AutoUserLock>();
|
auto lock = std::make_unique<AutoUserLock>();
|
||||||
lock->fdUserLock = std::move(fd);
|
lock->fdUserLock = std::move(fd);
|
||||||
lock->firstUid = settings.startId + i * maxIdsPerBuild;
|
lock->firstUid = settings.startId + i * maxIdsPerBuild;
|
||||||
lock->nrIds = nrIds;
|
lock->nrIds = nrIds;
|
||||||
|
|
||||||
#if __linux__
|
|
||||||
if (nrIds > 1) {
|
|
||||||
auto ourCgroups = getCgroups("/proc/self/cgroup");
|
|
||||||
auto ourCgroup = ourCgroups[""];
|
|
||||||
if (ourCgroup == "")
|
|
||||||
throw Error("cannot determine cgroup name from /proc/self/cgroup");
|
|
||||||
|
|
||||||
auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup);
|
|
||||||
|
|
||||||
if (!pathExists(ourCgroupPath))
|
|
||||||
throw Error("expected cgroup directory '%s'", ourCgroupPath);
|
|
||||||
|
|
||||||
lock->cgroup = fmt("%s/nix-build-%d", ourCgroupPath, lock->firstUid);
|
|
||||||
|
|
||||||
/* Record the cgroup in the lock file. This ensures that
|
|
||||||
if we subsequently get executed under a different parent
|
|
||||||
cgroup, we kill the previous cgroup first. */
|
|
||||||
writeFull(lock->fdUserLock.get(), *lock->cgroup);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return lock;
|
return lock;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if __linux__
|
|
||||||
std::optional<Path> getCgroup() override { return cgroup; }
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds)
|
std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds)
|
||||||
|
|
|
@ -27,13 +27,6 @@ struct UserLock
|
||||||
virtual gid_t getGID() = 0;
|
virtual gid_t getGID() = 0;
|
||||||
|
|
||||||
virtual std::vector<gid_t> getSupplementaryGIDs() = 0;
|
virtual std::vector<gid_t> getSupplementaryGIDs() = 0;
|
||||||
|
|
||||||
/* Kill any processes currently executing as this user. */
|
|
||||||
virtual void kill() = 0;
|
|
||||||
|
|
||||||
#if __linux__
|
|
||||||
virtual std::optional<Path> getCgroup() { return {}; };
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Acquire a user lock for a UID range of size `nrIds`. Note that this
|
/* Acquire a user lock for a UID range of size `nrIds`. Note that this
|
||||||
|
|
|
@ -15,6 +15,7 @@ std::map<ExperimentalFeature, std::string> stringifiedXpFeatures = {
|
||||||
{ Xp::FetchClosure, "fetch-closure" },
|
{ Xp::FetchClosure, "fetch-closure" },
|
||||||
{ Xp::ReplFlake, "repl-flake" },
|
{ Xp::ReplFlake, "repl-flake" },
|
||||||
{ Xp::AutoAllocateUids, "auto-allocate-uids" },
|
{ Xp::AutoAllocateUids, "auto-allocate-uids" },
|
||||||
|
{ Xp::Cgroups, "cgroups" },
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::optional<ExperimentalFeature> parseExperimentalFeature(const std::string_view & name)
|
const std::optional<ExperimentalFeature> parseExperimentalFeature(const std::string_view & name)
|
||||||
|
|
|
@ -24,6 +24,7 @@ enum struct ExperimentalFeature
|
||||||
FetchClosure,
|
FetchClosure,
|
||||||
ReplFlake,
|
ReplFlake,
|
||||||
AutoAllocateUids,
|
AutoAllocateUids,
|
||||||
|
Cgroups,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in a new issue