* Fix a race condition with parallel builds where multiple
fixed-output derivations or substitutions try to build the same store path at the same time. Locking generally catches this, but not between multiple goals in the same process. This happened especially often (actually, only) in the build farm with fetchurl downloads of the same file being executed on multiple machines and then copied back to the main machine where they would clobber each other (NIXBF-13). Solution: if a goal notices that the output path is already locked, then go to sleep until another goal finishes (hopefully the one locking the path) and try again.
This commit is contained in:
parent
bc0429b1cd
commit
c970b28ba0
3 changed files with 96 additions and 10 deletions
|
@ -181,6 +181,9 @@ private:
|
|||
WeakGoalMap derivationGoals;
|
||||
WeakGoalMap substitutionGoals;
|
||||
|
||||
/* Goals waiting for busy paths to be unlocked. */
|
||||
WeakGoals waitingForAnyGoal;
|
||||
|
||||
public:
|
||||
|
||||
Worker();
|
||||
|
@ -223,6 +226,10 @@ public:
|
|||
substituter invocations to reduce overhead. */
|
||||
void waitForInfo(GoalPtr goal, Path sub, Path storePath);
|
||||
|
||||
/* Wait for any goal to finish. Pretty indiscriminate way to
|
||||
wait for some resource that some other goal is holding. */
|
||||
void waitForAnyGoal(GoalPtr goal);
|
||||
|
||||
/* Loop until the specified top-level goals have finished. */
|
||||
void run(const Goals & topGoals);
|
||||
|
||||
|
@ -642,7 +649,7 @@ private:
|
|||
void buildDone();
|
||||
|
||||
/* Is the build hook willing to perform the build? */
|
||||
typedef enum {rpAccept, rpDecline, rpPostpone, rpDone} HookReply;
|
||||
typedef enum {rpAccept, rpDecline, rpPostpone, rpDone, rpRestart} HookReply;
|
||||
HookReply tryBuildHook();
|
||||
|
||||
/* Synchronously wait for a build hook to finish. */
|
||||
|
@ -651,9 +658,13 @@ private:
|
|||
/* Acquires locks on the output paths and gathers information
|
||||
about the build (e.g., the input closures). During this
|
||||
process its possible that we find out that the build is
|
||||
unnecessary, in which case we return false (this is not an
|
||||
error condition!). */
|
||||
bool prepareBuild();
|
||||
unnecessary, in which case we return prDone. It's also
|
||||
possible that some other goal is already building/substituting
|
||||
the output paths, in which case we return prRestart (go back to
|
||||
the haveDerivation() state). Otherwise, prProceed is
|
||||
returned. */
|
||||
typedef enum {prProceed, prDone, prRestart} PrepareBuildReply;
|
||||
PrepareBuildReply prepareBuild();
|
||||
|
||||
/* Start building a derivation. */
|
||||
void startBuilder();
|
||||
|
@ -791,6 +802,20 @@ void DerivationGoal::haveDerivation()
|
|||
return;
|
||||
}
|
||||
|
||||
/* If this is a fixed-output derivation, it is possible that some
|
||||
other goal is already building the output paths. (The case
|
||||
where some other process is building it is handled through
|
||||
normal locking mechanisms.) So if any output paths are already
|
||||
being built, put this goal to sleep. */
|
||||
for (PathSet::iterator i = invalidOutputs.begin();
|
||||
i != invalidOutputs.end(); ++i)
|
||||
if (pathIsLockedByMe(*i)) {
|
||||
/* Wait until any goal finishes (hopefully the one that is
|
||||
locking *i), then retry haveDerivation(). */
|
||||
worker.waitForAnyGoal(shared_from_this());
|
||||
return;
|
||||
}
|
||||
|
||||
/* We are first going to try to create the invalid output paths
|
||||
through substitutes. If that doesn't work, we'll build
|
||||
them. */
|
||||
|
@ -884,6 +909,12 @@ void DerivationGoal::tryToBuild()
|
|||
/* Somebody else did it. */
|
||||
amDone(ecSuccess);
|
||||
return;
|
||||
case rpRestart:
|
||||
/* Somebody else is building this output path.
|
||||
Restart from haveDerivation(). */
|
||||
state = &DerivationGoal::haveDerivation;
|
||||
worker.waitForAnyGoal(shared_from_this());
|
||||
return;
|
||||
}
|
||||
|
||||
/* Make sure that we are allowed to start a build. */
|
||||
|
@ -894,9 +925,14 @@ void DerivationGoal::tryToBuild()
|
|||
|
||||
/* Acquire locks and such. If we then see that the build has
|
||||
been done by somebody else, we're done. */
|
||||
if (!prepareBuild()) {
|
||||
PrepareBuildReply preply = prepareBuild();
|
||||
if (preply == prDone) {
|
||||
amDone(ecSuccess);
|
||||
return;
|
||||
} else if (preply == prRestart) {
|
||||
state = &DerivationGoal::haveDerivation;
|
||||
worker.waitForAnyGoal(shared_from_this());
|
||||
return;
|
||||
}
|
||||
|
||||
/* Okay, we have to build. */
|
||||
|
@ -1176,11 +1212,12 @@ DerivationGoal::HookReply DerivationGoal::tryBuildHook()
|
|||
|
||||
/* Acquire locks and such. If we then see that the output
|
||||
paths are now valid, we're done. */
|
||||
if (!prepareBuild()) {
|
||||
PrepareBuildReply preply = prepareBuild();
|
||||
if (preply == prDone || preply == prRestart) {
|
||||
/* Tell the hook to exit. */
|
||||
writeLine(toHook.writeSide, "cancel");
|
||||
terminateBuildHook();
|
||||
return rpDone;
|
||||
return preply == prDone ? rpDone : rpRestart;
|
||||
}
|
||||
|
||||
printMsg(lvlInfo, format("running hook to build path(s) %1%")
|
||||
|
@ -1256,8 +1293,20 @@ void DerivationGoal::terminateBuildHook(bool kill)
|
|||
}
|
||||
|
||||
|
||||
bool DerivationGoal::prepareBuild()
|
||||
DerivationGoal::PrepareBuildReply DerivationGoal::prepareBuild()
|
||||
{
|
||||
/* Check for the possibility that some other goal in this process
|
||||
has locked the output since we checked in haveDerivation().
|
||||
(It can't happen between here and the lockPaths() call below
|
||||
because we're not allowing multi-threading.) */
|
||||
for (DerivationOutputs::iterator i = drv.outputs.begin();
|
||||
i != drv.outputs.end(); ++i)
|
||||
if (pathIsLockedByMe(i->second.path)) {
|
||||
debug(format("restarting derivation `%1%' because `%2%' is locked by another goal")
|
||||
% drvPath % i->second.path);
|
||||
return prRestart;
|
||||
}
|
||||
|
||||
/* Obtain locks on all output paths. The locks are automatically
|
||||
released when we exit this function or Nix crashes. */
|
||||
/* !!! BUG: this could block, which is not allowed. */
|
||||
|
@ -1276,7 +1325,7 @@ bool DerivationGoal::prepareBuild()
|
|||
debug(format("skipping build of derivation `%1%', someone beat us to it")
|
||||
% drvPath);
|
||||
outputLocks.setDeletion(true);
|
||||
return false;
|
||||
return prDone;
|
||||
}
|
||||
|
||||
if (validPaths.size() > 0) {
|
||||
|
@ -1341,7 +1390,7 @@ bool DerivationGoal::prepareBuild()
|
|||
|
||||
allPaths.insert(inputPaths.begin(), inputPaths.end());
|
||||
|
||||
return true;
|
||||
return prProceed;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2028,6 +2077,16 @@ void SubstitutionGoal::tryToRun()
|
|||
return;
|
||||
}
|
||||
|
||||
/* Maybe a derivation goal has already locked this path
|
||||
(exceedingly unlikely, since it should have used a substitute
|
||||
first, but let's be defensive). */
|
||||
if (pathIsLockedByMe(storePath)) {
|
||||
debug(format("restarting substitution of `%1%' because it's locked by another goal")
|
||||
% storePath);
|
||||
worker.waitForAnyGoal(shared_from_this());
|
||||
return; /* restart in the tryToRun() state when another goal finishes */
|
||||
}
|
||||
|
||||
/* Acquire a lock on the output path. */
|
||||
outputLock = boost::shared_ptr<PathLocks>(new PathLocks);
|
||||
outputLock->lockPaths(singleton<PathSet>(storePath),
|
||||
|
@ -2251,6 +2310,16 @@ void Worker::removeGoal(GoalPtr goal)
|
|||
if (goal->getExitCode() == Goal::ecFailed && !keepGoing)
|
||||
topGoals.clear();
|
||||
}
|
||||
|
||||
/* Wake up goals waiting for any goal to finish. */
|
||||
for (WeakGoals::iterator i = waitingForAnyGoal.begin();
|
||||
i != waitingForAnyGoal.end(); ++i)
|
||||
{
|
||||
GoalPtr goal = i->lock();
|
||||
if (goal) wakeUp(goal);
|
||||
}
|
||||
|
||||
waitingForAnyGoal.clear();
|
||||
}
|
||||
|
||||
|
||||
|
@ -2337,6 +2406,13 @@ void Worker::waitForInfo(GoalPtr goal, Path sub, Path storePath)
|
|||
}
|
||||
|
||||
|
||||
void Worker::waitForAnyGoal(GoalPtr goal)
|
||||
{
|
||||
debug("wait for any goal");
|
||||
waitingForAnyGoal.insert(goal);
|
||||
}
|
||||
|
||||
|
||||
void Worker::getInfo()
|
||||
{
|
||||
for (map<Path, PathSet>::iterator i = requestedInfo.begin();
|
||||
|
|
|
@ -224,4 +224,11 @@ void PathLocks::setDeletion(bool deletePaths)
|
|||
}
|
||||
|
||||
|
||||
bool pathIsLockedByMe(const Path & path)
|
||||
{
|
||||
Path lockPath = path + ".lock";
|
||||
return lockedPaths.find(lockPath) != lockedPaths.end();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -40,6 +40,9 @@ public:
|
|||
};
|
||||
|
||||
|
||||
bool pathIsLockedByMe(const Path & path);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue