Merge pull request #262839 from RaitoBezarius/qemu-vm/timeout
This commit is contained in:
commit
92fdbd284c
8 changed files with 104 additions and 18 deletions
|
@ -11,6 +11,7 @@
|
|||
, tesseract4
|
||||
, vde2
|
||||
, extraPythonPackages ? (_ : [])
|
||||
, nixosTests
|
||||
}:
|
||||
|
||||
python3Packages.buildPythonApplication {
|
||||
|
@ -31,6 +32,10 @@ python3Packages.buildPythonApplication {
|
|||
++ (lib.optionals enableOCR [ imagemagick_light tesseract4 ])
|
||||
++ extraPythonPackages python3Packages;
|
||||
|
||||
passthru.tests = {
|
||||
inherit (nixosTests.nixos-test-driver) driver-timeout;
|
||||
};
|
||||
|
||||
doCheck = true;
|
||||
nativeCheckInputs = with python3Packages; [ mypy ruff black ];
|
||||
checkPhase = ''
|
||||
|
|
|
@ -76,6 +76,14 @@ def main() -> None:
|
|||
nargs="*",
|
||||
help="vlans to span by the driver",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--global-timeout",
|
||||
type=int,
|
||||
metavar="GLOBAL_TIMEOUT",
|
||||
action=EnvDefault,
|
||||
envvar="globalTimeout",
|
||||
help="Timeout in seconds for the whole test",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"-o",
|
||||
"--output_directory",
|
||||
|
@ -103,6 +111,7 @@ def main() -> None:
|
|||
args.testscript.read_text(),
|
||||
args.output_directory.resolve(),
|
||||
args.keep_vm_state,
|
||||
args.global_timeout,
|
||||
) as driver:
|
||||
if args.interactive:
|
||||
history_dir = os.getcwd()
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
import os
|
||||
import re
|
||||
import signal
|
||||
import tempfile
|
||||
import threading
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, ContextManager, Dict, Iterator, List, Optional, Union
|
||||
|
@ -41,6 +43,8 @@ class Driver:
|
|||
vlans: List[VLan]
|
||||
machines: List[Machine]
|
||||
polling_conditions: List[PollingCondition]
|
||||
global_timeout: int
|
||||
race_timer: threading.Timer
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -49,9 +53,12 @@ class Driver:
|
|||
tests: str,
|
||||
out_dir: Path,
|
||||
keep_vm_state: bool = False,
|
||||
global_timeout: int = 24 * 60 * 60 * 7,
|
||||
):
|
||||
self.tests = tests
|
||||
self.out_dir = out_dir
|
||||
self.global_timeout = global_timeout
|
||||
self.race_timer = threading.Timer(global_timeout, self.terminate_test)
|
||||
|
||||
tmp_dir = get_tmp_dir()
|
||||
|
||||
|
@ -82,6 +89,7 @@ class Driver:
|
|||
|
||||
def __exit__(self, *_: Any) -> None:
|
||||
with rootlog.nested("cleanup"):
|
||||
self.race_timer.cancel()
|
||||
for machine in self.machines:
|
||||
machine.release()
|
||||
|
||||
|
@ -144,6 +152,10 @@ class Driver:
|
|||
|
||||
def run_tests(self) -> None:
|
||||
"""Run the test script (for non-interactive test runs)"""
|
||||
rootlog.info(
|
||||
f"Test will time out and terminate in {self.global_timeout} seconds"
|
||||
)
|
||||
self.race_timer.start()
|
||||
self.test_script()
|
||||
# TODO: Collect coverage data
|
||||
for machine in self.machines:
|
||||
|
@ -161,6 +173,19 @@ class Driver:
|
|||
with rootlog.nested("wait for all VMs to finish"):
|
||||
for machine in self.machines:
|
||||
machine.wait_for_shutdown()
|
||||
self.race_timer.cancel()
|
||||
|
||||
def terminate_test(self) -> None:
|
||||
# This will be usually running in another thread than
|
||||
# the thread actually executing the test script.
|
||||
with rootlog.nested("timeout reached; test terminating..."):
|
||||
for machine in self.machines:
|
||||
machine.release()
|
||||
# As we cannot `sys.exit` from another thread
|
||||
# We can at least force the main thread to get SIGTERM'ed.
|
||||
# This will prevent any user who caught all the exceptions
|
||||
# to swallow them and prevent itself from terminating.
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
|
||||
def create_machine(self, args: Dict[str, Any]) -> Machine:
|
||||
tmp_dir = get_tmp_dir()
|
||||
|
|
|
@ -42,6 +42,7 @@ rec {
|
|||
, nodes ? {}
|
||||
, testScript
|
||||
, enableOCR ? false
|
||||
, globalTimeout ? (60 * 60)
|
||||
, name ? "unnamed"
|
||||
, skipTypeCheck ? false
|
||||
# Skip linting (mainly intended for faster dev cycles)
|
||||
|
|
|
@ -94,6 +94,7 @@ let
|
|||
wrapProgram $out/bin/nixos-test-driver \
|
||||
--set startScripts "''${vmStartScripts[*]}" \
|
||||
--set testScript "$out/test-script" \
|
||||
--set globalTimeout "${toString config.globalTimeout}" \
|
||||
--set vlans '${toString vlans}' \
|
||||
${lib.escapeShellArgs (lib.concatMap (arg: ["--add-flags" arg]) config.extraDriverArgs)}
|
||||
'';
|
||||
|
@ -123,6 +124,18 @@ in
|
|||
defaultText = "hostPkgs.qemu_test";
|
||||
};
|
||||
|
||||
globalTimeout = mkOption {
|
||||
description = mdDoc ''
|
||||
A global timeout for the complete test, expressed in seconds.
|
||||
Beyond that timeout, every resource will be killed and released and the test will fail.
|
||||
|
||||
By default, we use a 1 hour timeout.
|
||||
'';
|
||||
type = types.int;
|
||||
default = 60 * 60;
|
||||
example = 10 * 60;
|
||||
};
|
||||
|
||||
enableOCR = mkOption {
|
||||
description = mdDoc ''
|
||||
Whether to enable Optical Character Recognition functionality for
|
||||
|
|
|
@ -16,6 +16,15 @@ in
|
|||
'';
|
||||
};
|
||||
|
||||
rawTestDerivation = mkOption {
|
||||
type = types.package;
|
||||
description = mdDoc ''
|
||||
Unfiltered version of `test`, for troubleshooting the test framework and `testBuildFailure` in the test framework's test suite.
|
||||
This is not intended for general use. Use `test` instead.
|
||||
'';
|
||||
internal = true;
|
||||
};
|
||||
|
||||
test = mkOption {
|
||||
type = types.package;
|
||||
# TODO: can the interactive driver be configured to access the network?
|
||||
|
@ -29,8 +38,7 @@ in
|
|||
};
|
||||
|
||||
config = {
|
||||
test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used.
|
||||
derivation = hostPkgs.stdenv.mkDerivation {
|
||||
rawTestDerivation = hostPkgs.stdenv.mkDerivation {
|
||||
name = "vm-test-run-${config.name}";
|
||||
|
||||
requiredSystemFeatures = [ "kvm" "nixos-test" ];
|
||||
|
@ -48,6 +56,8 @@ in
|
|||
|
||||
meta = config.meta;
|
||||
};
|
||||
test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used.
|
||||
derivation = config.rawTestDerivation;
|
||||
inherit (config) passthru meta;
|
||||
};
|
||||
|
||||
|
|
|
@ -90,6 +90,14 @@ in {
|
|||
lib-extend = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./nixos-test-driver/lib-extend.nix {};
|
||||
node-name = runTest ./nixos-test-driver/node-name.nix;
|
||||
busybox = runTest ./nixos-test-driver/busybox.nix;
|
||||
driver-timeout = pkgs.runCommand "ensure-timeout-induced-failure" {
|
||||
failed = pkgs.testers.testBuildFailure ((runTest ./nixos-test-driver/timeout.nix).config.rawTestDerivation);
|
||||
} ''
|
||||
grep -F "timeout reached; test terminating" $failed/testBuildFailure.log
|
||||
# The program will always be terminated by SIGTERM (143) if it waits for the deadline thread.
|
||||
[[ 143 = $(cat $failed/testBuildFailure.exit) ]]
|
||||
touch $out
|
||||
'';
|
||||
};
|
||||
|
||||
# NixOS vm tests and non-vm unit tests
|
||||
|
|
15
nixos/tests/nixos-test-driver/timeout.nix
Normal file
15
nixos/tests/nixos-test-driver/timeout.nix
Normal file
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
name = "Test that sleep of 6 seconds fails a timeout of 5 seconds";
|
||||
globalTimeout = 5;
|
||||
|
||||
nodes = {
|
||||
machine = ({ pkgs, ... }: {
|
||||
});
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
start_all()
|
||||
machine.wait_for_unit("multi-user.target")
|
||||
machine.succeed("sleep 6")
|
||||
'';
|
||||
}
|
Loading…
Reference in a new issue