Propagate remote timeouts properly
This commit is contained in:
parent
c6e85ee474
commit
42e9ad8fd1
2 changed files with 38 additions and 31 deletions
|
@ -258,13 +258,8 @@ writeInt($maxSilentTime, $to);
|
||||||
writeInt($buildTimeout, $to);
|
writeInt($buildTimeout, $to);
|
||||||
my $res = readInt($from);
|
my $res = readInt($from);
|
||||||
if ($res != 0) {
|
if ($res != 0) {
|
||||||
# Note that if we get exit code 100 from `nix-store -r', it
|
|
||||||
# denotes a permanent build failure (as opposed to an SSH problem
|
|
||||||
# or a temporary Nix problem). We propagate this to the caller to
|
|
||||||
# allow it to distinguish between transient and permanent
|
|
||||||
# failures.
|
|
||||||
my $msg = readString($from);
|
my $msg = readString($from);
|
||||||
print STDERR "error: $msg (on `$hostName')\n";
|
print STDERR "error: $msg on `$hostName'\n";
|
||||||
exit $res;
|
exit $res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -238,6 +238,9 @@ public:
|
||||||
failure). */
|
failure). */
|
||||||
bool permanentFailure;
|
bool permanentFailure;
|
||||||
|
|
||||||
|
/* Set if at least one derivation had a timeout. */
|
||||||
|
bool timedOut;
|
||||||
|
|
||||||
LocalStore & store;
|
LocalStore & store;
|
||||||
|
|
||||||
std::shared_ptr<HookInstance> hook;
|
std::shared_ptr<HookInstance> hook;
|
||||||
|
@ -1440,33 +1443,39 @@ void DerivationGoal::buildDone()
|
||||||
outputLocks.unlock();
|
outputLocks.unlock();
|
||||||
buildUser.release();
|
buildUser.release();
|
||||||
|
|
||||||
/* When using a build hook, the hook will return a remote
|
if (hook && WIFEXITED(status) && WEXITSTATUS(status) == 101) {
|
||||||
build failure using exit code 100. Anything else is a hook
|
if (settings.printBuildTrace)
|
||||||
problem. */
|
printMsg(lvlError, format("@ build-failed %1% - timeout") % drvPath);
|
||||||
bool hookError = hook &&
|
worker.timedOut = true;
|
||||||
(!WIFEXITED(status) || WEXITSTATUS(status) != 100);
|
|
||||||
|
|
||||||
if (settings.printBuildTrace) {
|
|
||||||
if (hook && hookError)
|
|
||||||
printMsg(lvlError, format("@ hook-failed %1% - %2% %3%")
|
|
||||||
% drvPath % status % e.msg());
|
|
||||||
else
|
|
||||||
printMsg(lvlError, format("@ build-failed %1% - %2% %3%")
|
|
||||||
% drvPath % 1 % e.msg());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Register the outputs of this build as "failed" so we won't
|
else if (hook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) {
|
||||||
try to build them again (negative caching). However, don't
|
if (settings.printBuildTrace)
|
||||||
do this for fixed-output derivations, since they're likely
|
printMsg(lvlError, format("@ hook-failed %1% - %2% %3%")
|
||||||
to fail for transient reasons (e.g., fetchurl not being
|
% drvPath % status % e.msg());
|
||||||
able to access the network). Hook errors (like
|
}
|
||||||
communication problems with the remote machine) shouldn't
|
|
||||||
be cached either. */
|
else {
|
||||||
if (settings.cacheFailure && !hookError && !fixedOutput)
|
if (settings.printBuildTrace)
|
||||||
foreach (DerivationOutputs::iterator, i, drv.outputs)
|
printMsg(lvlError, format("@ build-failed %1% - %2% %3%")
|
||||||
worker.store.registerFailedPath(i->second.path);
|
% drvPath % 1 % e.msg());
|
||||||
|
worker.permanentFailure = !fixedOutput && !diskFull;
|
||||||
|
|
||||||
|
/* Register the outputs of this build as "failed" so we
|
||||||
|
won't try to build them again (negative caching).
|
||||||
|
However, don't do this for fixed-output derivations,
|
||||||
|
since they're likely to fail for transient reasons
|
||||||
|
(e.g., fetchurl not being able to access the network).
|
||||||
|
Hook errors (like communication problems with the
|
||||||
|
remote machine) shouldn't be cached either. */
|
||||||
|
if (/* settings.cacheFailure && */ !fixedOutput && !diskFull)
|
||||||
|
{
|
||||||
|
printMsg(lvlError, "REG");
|
||||||
|
foreach (DerivationOutputs::iterator, i, drv.outputs)
|
||||||
|
worker.store.registerFailedPath(i->second.path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
worker.permanentFailure = !hookError && !fixedOutput && !diskFull;
|
|
||||||
amDone(ecFailed);
|
amDone(ecFailed);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2909,6 +2918,7 @@ Worker::Worker(LocalStore & store)
|
||||||
nrLocalBuilds = 0;
|
nrLocalBuilds = 0;
|
||||||
lastWokenUp = 0;
|
lastWokenUp = 0;
|
||||||
permanentFailure = false;
|
permanentFailure = false;
|
||||||
|
timedOut = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3220,6 +3230,7 @@ void Worker::waitForInput()
|
||||||
format("%1% timed out after %2% seconds of silence")
|
format("%1% timed out after %2% seconds of silence")
|
||||||
% goal->getName() % settings.maxSilentTime);
|
% goal->getName() % settings.maxSilentTime);
|
||||||
goal->cancel(true);
|
goal->cancel(true);
|
||||||
|
timedOut = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (goal->getExitCode() == Goal::ecBusy &&
|
else if (goal->getExitCode() == Goal::ecBusy &&
|
||||||
|
@ -3231,6 +3242,7 @@ void Worker::waitForInput()
|
||||||
format("%1% timed out after %2% seconds")
|
format("%1% timed out after %2% seconds")
|
||||||
% goal->getName() % settings.buildTimeout);
|
% goal->getName() % settings.buildTimeout);
|
||||||
goal->cancel(true);
|
goal->cancel(true);
|
||||||
|
timedOut = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3247,7 +3259,7 @@ void Worker::waitForInput()
|
||||||
|
|
||||||
unsigned int Worker::exitStatus()
|
unsigned int Worker::exitStatus()
|
||||||
{
|
{
|
||||||
return permanentFailure ? 100 : 1;
|
return timedOut ? 101 : (permanentFailure ? 100 : 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue