Merge pull request #262839 from RaitoBezarius/qemu-vm/timeout

This commit is contained in:
Ryan Lahfa 2023-10-29 17:21:10 +01:00 committed by GitHub
commit 92fdbd284c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 104 additions and 18 deletions

View file

@ -11,6 +11,7 @@
, tesseract4 , tesseract4
, vde2 , vde2
, extraPythonPackages ? (_ : []) , extraPythonPackages ? (_ : [])
, nixosTests
}: }:
python3Packages.buildPythonApplication { python3Packages.buildPythonApplication {
@ -31,6 +32,10 @@ python3Packages.buildPythonApplication {
++ (lib.optionals enableOCR [ imagemagick_light tesseract4 ]) ++ (lib.optionals enableOCR [ imagemagick_light tesseract4 ])
++ extraPythonPackages python3Packages; ++ extraPythonPackages python3Packages;
passthru.tests = {
inherit (nixosTests.nixos-test-driver) driver-timeout;
};
doCheck = true; doCheck = true;
nativeCheckInputs = with python3Packages; [ mypy ruff black ]; nativeCheckInputs = with python3Packages; [ mypy ruff black ];
checkPhase = '' checkPhase = ''

View file

@ -76,6 +76,14 @@ def main() -> None:
nargs="*", nargs="*",
help="vlans to span by the driver", help="vlans to span by the driver",
) )
arg_parser.add_argument(
"--global-timeout",
type=int,
metavar="GLOBAL_TIMEOUT",
action=EnvDefault,
envvar="globalTimeout",
help="Timeout in seconds for the whole test",
)
arg_parser.add_argument( arg_parser.add_argument(
"-o", "-o",
"--output_directory", "--output_directory",
@ -103,6 +111,7 @@ def main() -> None:
args.testscript.read_text(), args.testscript.read_text(),
args.output_directory.resolve(), args.output_directory.resolve(),
args.keep_vm_state, args.keep_vm_state,
args.global_timeout,
) as driver: ) as driver:
if args.interactive: if args.interactive:
history_dir = os.getcwd() history_dir = os.getcwd()

View file

@ -1,6 +1,8 @@
import os import os
import re import re
import signal
import tempfile import tempfile
import threading
from contextlib import contextmanager from contextlib import contextmanager
from pathlib import Path from pathlib import Path
from typing import Any, Callable, ContextManager, Dict, Iterator, List, Optional, Union from typing import Any, Callable, ContextManager, Dict, Iterator, List, Optional, Union
@ -41,6 +43,8 @@ class Driver:
vlans: List[VLan] vlans: List[VLan]
machines: List[Machine] machines: List[Machine]
polling_conditions: List[PollingCondition] polling_conditions: List[PollingCondition]
global_timeout: int
race_timer: threading.Timer
def __init__( def __init__(
self, self,
@ -49,9 +53,12 @@ class Driver:
tests: str, tests: str,
out_dir: Path, out_dir: Path,
keep_vm_state: bool = False, keep_vm_state: bool = False,
global_timeout: int = 24 * 60 * 60 * 7,
): ):
self.tests = tests self.tests = tests
self.out_dir = out_dir self.out_dir = out_dir
self.global_timeout = global_timeout
self.race_timer = threading.Timer(global_timeout, self.terminate_test)
tmp_dir = get_tmp_dir() tmp_dir = get_tmp_dir()
@ -82,6 +89,7 @@ class Driver:
def __exit__(self, *_: Any) -> None: def __exit__(self, *_: Any) -> None:
with rootlog.nested("cleanup"): with rootlog.nested("cleanup"):
self.race_timer.cancel()
for machine in self.machines: for machine in self.machines:
machine.release() machine.release()
@ -144,6 +152,10 @@ class Driver:
def run_tests(self) -> None: def run_tests(self) -> None:
"""Run the test script (for non-interactive test runs)""" """Run the test script (for non-interactive test runs)"""
rootlog.info(
f"Test will time out and terminate in {self.global_timeout} seconds"
)
self.race_timer.start()
self.test_script() self.test_script()
# TODO: Collect coverage data # TODO: Collect coverage data
for machine in self.machines: for machine in self.machines:
@ -161,6 +173,19 @@ class Driver:
with rootlog.nested("wait for all VMs to finish"): with rootlog.nested("wait for all VMs to finish"):
for machine in self.machines: for machine in self.machines:
machine.wait_for_shutdown() machine.wait_for_shutdown()
self.race_timer.cancel()
def terminate_test(self) -> None:
# This will be usually running in another thread than
# the thread actually executing the test script.
with rootlog.nested("timeout reached; test terminating..."):
for machine in self.machines:
machine.release()
# As we cannot `sys.exit` from another thread
# We can at least force the main thread to get SIGTERM'ed.
# This will prevent any user who caught all the exceptions
# to swallow them and prevent itself from terminating.
os.kill(os.getpid(), signal.SIGTERM)
def create_machine(self, args: Dict[str, Any]) -> Machine: def create_machine(self, args: Dict[str, Any]) -> Machine:
tmp_dir = get_tmp_dir() tmp_dir = get_tmp_dir()

View file

@ -42,6 +42,7 @@ rec {
, nodes ? {} , nodes ? {}
, testScript , testScript
, enableOCR ? false , enableOCR ? false
, globalTimeout ? (60 * 60)
, name ? "unnamed" , name ? "unnamed"
, skipTypeCheck ? false , skipTypeCheck ? false
# Skip linting (mainly intended for faster dev cycles) # Skip linting (mainly intended for faster dev cycles)

View file

@ -94,6 +94,7 @@ let
wrapProgram $out/bin/nixos-test-driver \ wrapProgram $out/bin/nixos-test-driver \
--set startScripts "''${vmStartScripts[*]}" \ --set startScripts "''${vmStartScripts[*]}" \
--set testScript "$out/test-script" \ --set testScript "$out/test-script" \
--set globalTimeout "${toString config.globalTimeout}" \
--set vlans '${toString vlans}' \ --set vlans '${toString vlans}' \
${lib.escapeShellArgs (lib.concatMap (arg: ["--add-flags" arg]) config.extraDriverArgs)} ${lib.escapeShellArgs (lib.concatMap (arg: ["--add-flags" arg]) config.extraDriverArgs)}
''; '';
@ -123,6 +124,18 @@ in
defaultText = "hostPkgs.qemu_test"; defaultText = "hostPkgs.qemu_test";
}; };
globalTimeout = mkOption {
description = mdDoc ''
A global timeout for the complete test, expressed in seconds.
Beyond that timeout, every resource will be killed and released and the test will fail.
By default, we use a 1 hour timeout.
'';
type = types.int;
default = 60 * 60;
example = 10 * 60;
};
enableOCR = mkOption { enableOCR = mkOption {
description = mdDoc '' description = mdDoc ''
Whether to enable Optical Character Recognition functionality for Whether to enable Optical Character Recognition functionality for

View file

@ -16,6 +16,15 @@ in
''; '';
}; };
rawTestDerivation = mkOption {
type = types.package;
description = mdDoc ''
Unfiltered version of `test`, for troubleshooting the test framework and `testBuildFailure` in the test framework's test suite.
This is not intended for general use. Use `test` instead.
'';
internal = true;
};
test = mkOption { test = mkOption {
type = types.package; type = types.package;
# TODO: can the interactive driver be configured to access the network? # TODO: can the interactive driver be configured to access the network?
@ -29,25 +38,26 @@ in
}; };
config = { config = {
rawTestDerivation = hostPkgs.stdenv.mkDerivation {
name = "vm-test-run-${config.name}";
requiredSystemFeatures = [ "kvm" "nixos-test" ];
buildCommand = ''
mkdir -p $out
# effectively mute the XMLLogger
export LOGFILE=/dev/null
${config.driver}/bin/nixos-test-driver -o $out
'';
passthru = config.passthru;
meta = config.meta;
};
test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used. test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used.
derivation = hostPkgs.stdenv.mkDerivation { derivation = config.rawTestDerivation;
name = "vm-test-run-${config.name}";
requiredSystemFeatures = [ "kvm" "nixos-test" ];
buildCommand = ''
mkdir -p $out
# effectively mute the XMLLogger
export LOGFILE=/dev/null
${config.driver}/bin/nixos-test-driver -o $out
'';
passthru = config.passthru;
meta = config.meta;
};
inherit (config) passthru meta; inherit (config) passthru meta;
}; };

View file

@ -90,6 +90,14 @@ in {
lib-extend = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./nixos-test-driver/lib-extend.nix {}; lib-extend = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./nixos-test-driver/lib-extend.nix {};
node-name = runTest ./nixos-test-driver/node-name.nix; node-name = runTest ./nixos-test-driver/node-name.nix;
busybox = runTest ./nixos-test-driver/busybox.nix; busybox = runTest ./nixos-test-driver/busybox.nix;
driver-timeout = pkgs.runCommand "ensure-timeout-induced-failure" {
failed = pkgs.testers.testBuildFailure ((runTest ./nixos-test-driver/timeout.nix).config.rawTestDerivation);
} ''
grep -F "timeout reached; test terminating" $failed/testBuildFailure.log
# The program will always be terminated by SIGTERM (143) if it waits for the deadline thread.
[[ 143 = $(cat $failed/testBuildFailure.exit) ]]
touch $out
'';
}; };
# NixOS vm tests and non-vm unit tests # NixOS vm tests and non-vm unit tests

View file

@ -0,0 +1,15 @@
{
name = "Test that sleep of 6 seconds fails a timeout of 5 seconds";
globalTimeout = 5;
nodes = {
machine = ({ pkgs, ... }: {
});
};
testScript = ''
start_all()
machine.wait_for_unit("multi-user.target")
machine.succeed("sleep 6")
'';
}