services/hardware: add nvidia-container-toolkit

This commit is contained in:
Rafael Fernández López 2024-01-28 13:00:32 +01:00
parent fd464f0543
commit 8ba61ebb8a
9 changed files with 145 additions and 29 deletions

View file

@ -25,6 +25,10 @@ In addition to numerous new and upgraded packages, this release has the followin
- A new option `systemd.sysusers.enable` was added. If enabled, users and
groups are created with systemd-sysusers instead of with a custom perl script.
- A new option `virtualisation.containers.cdi` was added. It contains `static` and `dynamic` attributes (corresponding to `/etc/cdi` and `/run/cdi` respectively) to configure the Container Device Interface (CDI).
- `virtualisation.docker.enableNvidia` and `virtualisation.podman.enableNvidia` options are deprecated. `virtualisation.containers.cdi.dynamic.nvidia.enable` should be used instead. This option will expose GPUs on containers with the `--device` CLI option. This is supported by Docker 25, Podman 3.2.0 and Singularity 4. Any container runtime that supports the CDI specification will take advantage of this feature.
- A new option `system.etc.overlay.enable` was added. If enabled, `/etc` is
mounted via an overlayfs instead of being created by a custom perl script.

View file

@ -546,6 +546,7 @@
./services/hardware/kanata.nix
./services/hardware/lcd.nix
./services/hardware/lirc.nix
./services/hardware/nvidia-container-toolkit-cdi-generator
./services/hardware/nvidia-optimus.nix
./services/hardware/openrgb.nix
./services/hardware/pcscd.nix

View file

@ -0,0 +1,39 @@
{ config, lib, pkgs }: let
mountOptions = { options = ["ro" "nosuid" "nodev" "bind"]; };
mounts = [
{ hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-cuda-mps-control";
containerPath = "/usr/bin/nvidia-cuda-mps-control"; }
{ hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-cuda-mps-server";
containerPath = "/usr/bin/nvidia-cuda-mps-server"; }
{ hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-debugdump";
containerPath = "/usr/bin/nvidia-debugdump"; }
{ hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-powerd";
containerPath = "/usr/bin/nvidia-powerd"; }
{ hostPath = "${lib.getBin config.hardware.nvidia.package}/bin/nvidia-smi";
containerPath = "/usr/bin/nvidia-smi"; }
{ hostPath = "${pkgs.nvidia-container-toolkit}/bin/nvidia-ctk";
containerPath = "/usr/bin/nvidia-ctk"; }
{ hostPath = "${pkgs.glibc}/lib";
containerPath = "${pkgs.glibc}/lib"; }
{ hostPath = "${pkgs.glibc}/lib64";
containerPath = "${pkgs.glibc}/lib64"; }
];
jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +";
mountsToJq = lib.concatMap
(mount:
["${pkgs.jq}/bin/jq '${jqAddMountExpression} ${builtins.toJSON (mount // mountOptions)}'"])
mounts;
in ''
#! ${pkgs.runtimeShell}
function cdiGenerate {
${pkgs.nvidia-container-toolkit}/bin/nvidia-ctk cdi generate \
--format json \
--ldconfig-path ${pkgs.glibc.bin}/bin/ldconfig \
--library-search-path ${config.hardware.nvidia.package}/lib \
--nvidia-ctk-path ${pkgs.nvidia-container-toolkit}/bin/nvidia-ctk
}
cdiGenerate | \
${lib.concatStringsSep " | " mountsToJq} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json
''

View file

@ -0,0 +1,38 @@
{ config, lib, pkgs, ... }:
{
options = {
hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkOption {
default = false;
internal = true;
visible = false;
type = lib.types.bool;
description = lib.mdDoc ''
Enable dynamic CDI configuration for NVidia devices by running
nvidia-container-toolkit on boot.
'';
};
};
config = {
systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit-cdi-generator.enable {
description = "Container Device Interface (CDI) for Nvidia generator";
wantedBy = [ "multi-user.target" ];
after = [ "systemd-udev-settle.service" ];
serviceConfig = {
RuntimeDirectory = "cdi";
RemainAfterExit = true;
ExecStart = let
script = (pkgs.writeScriptBin "nvidia-cdi-generator"
(import ./cdi-generate.nix { inherit config lib pkgs; })); in (lib.getExe script);
Type = "oneshot";
};
};
};
}

View file

@ -28,29 +28,39 @@ in
description = lib.mdDoc "Enable the OCI seccomp BPF hook";
};
cdi = mkOption {
type = types.attrs;
default = { };
description = lib.mdDoc ''
Declarative CDI specification. Each key of the attribute set
will be mapped to a file in /etc/cdi. It is required for every
key to be provided in JSON format.
'';
example = {
some-vendor = builtins.fromJSON ''
{
"cdiVersion": "0.5.0",
"kind": "some-vendor.com/foo",
"devices": [],
"containerEdits": []
}
'';
cdi = {
dynamic.nvidia.enable = mkOption {
type = types.bool;
default = false;
description = lib.mdDoc ''
Enable dynamic CDI configuration for NVidia devices by running nvidia-container-toolkit on boot.
'';
};
some-other-vendor = {
cdiVersion = "0.5.0";
kind = "some-other-vendor.com/bar";
devices = [];
containerEdits = [];
static = mkOption {
type = types.attrs;
default = { };
description = lib.mdDoc ''
Declarative CDI specification. Each key of the attribute set
will be mapped to a file in /etc/cdi. It is required for every
key to be provided in JSON format.
'';
example = {
some-vendor = builtins.fromJSON ''
{
"cdiVersion": "0.5.0",
"kind": "some-vendor.com/foo",
"devices": [],
"containerEdits": []
}
'';
some-other-vendor = {
cdiVersion = "0.5.0";
kind = "some-other-vendor.com/bar";
devices = [];
containerEdits = [];
};
};
};
};
@ -140,6 +150,8 @@ in
config = lib.mkIf cfg.enable {
hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkIf cfg.cdi.dynamic.nvidia.enable true;
virtualisation.containers.containersConf.cniPlugins = [ pkgs.cni-plugins ];
virtualisation.containers.containersConf.settings = {
@ -152,11 +164,11 @@ in
};
environment.etc = let
cdiConfigurationFiles = (lib.attrsets.mapAttrs'
cdiStaticConfigurationFiles = (lib.attrsets.mapAttrs'
(name: value:
lib.attrsets.nameValuePair "cdi/${name}.json"
{ text = builtins.toJSON value; })
cfg.cdi);
cfg.cdi.static);
in {
"containers/containers.conf".source =
toml.generate "containers.conf" cfg.containersConf.settings;
@ -171,7 +183,7 @@ in
"containers/policy.json".source =
if cfg.policy != { } then pkgs.writeText "policy.json" (builtins.toJSON cfg.policy)
else "${pkgs.skopeo.policy}/default-policy.json";
} // cdiConfigurationFiles;
} // cdiStaticConfigurationFiles;
};

View file

@ -72,6 +72,8 @@ in
type = types.bool;
default = false;
description = lib.mdDoc ''
**Deprecated**, please use virtualisation.containers.cdi.dynamic.nvidia.enable instead.
Enable nvidia-docker wrapper, supporting NVIDIA GPUs inside docker containers.
'';
};
@ -185,6 +187,16 @@ in
users.groups.docker.gid = config.ids.gids.docker;
systemd.packages = [ cfg.package ];
# Docker 25.0.0 supports CDI by default
# (https://docs.docker.com/engine/release-notes/25.0/#new). Encourage
# moving to CDI as opposed to having deprecated runtime
# wrappers.
warnings = lib.optionals (cfg.enableNvidia && (lib.strings.versionAtLeast cfg.package.version "25")) [
''
You have set virtualisation.docker.enableNvidia. This option is deprecated, please set virtualisation.containers.cdi.dynamic.nvidia.enable instead.
''
];
systemd.services.docker = {
wantedBy = optional cfg.enableOnBoot "multi-user.target";
after = [ "network.target" "docker.socket" ];

View file

@ -82,6 +82,8 @@ in
type = types.bool;
default = false;
description = lib.mdDoc ''
**Deprecated**, please use virtualisation.containers.cdi.dynamic.nvidia.enable instead.
Enable use of NVidia GPUs from within podman containers.
'';
};
@ -166,6 +168,12 @@ in
inherit (networkConfig) dns_enabled network_interface;
in
lib.mkIf cfg.enable {
warnings = lib.optionals cfg.enableNvidia [
''
You have set virtualisation.podman.enableNvidia. This option is deprecated, please set virtualisation.containers.cdi.dynamic.nvidia.enable instead.
''
];
environment.systemPackages = [ cfg.package ]
++ lib.optional cfg.dockerCompat dockerCompat;

View file

@ -6,8 +6,8 @@
, linkFarm
, writeShellScript
, formats
, containerRuntimePath
, configTemplate
, containerRuntimePath ? null
, configTemplate ? null
, configTemplatePath ? null
, libnvidia-container
, cudaPackages
@ -91,7 +91,7 @@ buildGoModule rec {
makeWrapper
];
preConfigure = ''
preConfigure = lib.optionalString (containerRuntimePath != null) ''
# Ensure the runc symlink isn't broken:
if ! readlink --quiet --canonicalize-existing "${isolatedContainerRuntimePath}/runc" ; then
echo "${isolatedContainerRuntimePath}/runc: broken symlink" >&2
@ -109,7 +109,7 @@ buildGoModule rec {
in
[ "-skip" "${builtins.concatStringsSep "|" skippedTests}" ];
postInstall = ''
postInstall = lib.optionalString (containerRuntimePath != null) ''
mkdir -p $out/etc/nvidia-container-runtime
# nvidia-container-runtime invokes docker-runc or runc if that isn't

View file

@ -24067,6 +24067,8 @@ with pkgs;
nv-codec-headers-11 = callPackage ../development/libraries/nv-codec-headers/11_x.nix { };
nv-codec-headers-12 = callPackage ../development/libraries/nv-codec-headers/12_x.nix { };
nvidia-container-toolkit = callPackage ../applications/virtualization/nvidia-container-toolkit { };
nvidiaCtkPackages =
callPackage ../applications/virtualization/nvidia-container-toolkit/packages.nix
{ };