From 40f8d8b2569f11e0be3f734efd74f5e5e0b96ab5 Mon Sep 17 00:00:00 2001 From: Elliot Cameron Date: Fri, 21 Apr 2023 00:39:40 -0400 Subject: [PATCH 1/7] jsoncpp: fix enableStatic The workaround is no longer needed for static builds. --- pkgs/development/libraries/jsoncpp/default.nix | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pkgs/development/libraries/jsoncpp/default.nix b/pkgs/development/libraries/jsoncpp/default.nix index 41e9a2d0a03a..59572afc627c 100644 --- a/pkgs/development/libraries/jsoncpp/default.nix +++ b/pkgs/development/libraries/jsoncpp/default.nix @@ -40,19 +40,13 @@ stdenv.mkDerivation rec { "-DBUILD_SHARED_LIBS=ON" "-DBUILD_OBJECT_LIBS=OFF" "-DJSONCPP_WITH_CMAKE_PACKAGE=ON" + "-DBUILD_STATIC_LIBS=${if enableStatic then "ON" else "OFF"}" ] # the test's won't compile if secureMemory is used because there is no # comparison operators and conversion functions between # std::basic_string<..., Json::SecureAllocator> vs. # std::basic_string<..., [default allocator]> - ++ lib.optional ((stdenv.buildPlatform != stdenv.hostPlatform) || secureMemory) "-DJSONCPP_WITH_TESTS=OFF" - ++ lib.optional (!enableStatic) "-DBUILD_STATIC_LIBS=OFF"; - - # this is fixed and no longer necessary in 1.9.5 but there they use - # memset_s without switching to a different c++ standard in the cmake files - postInstall = lib.optionalString enableStatic '' - (cd $out/lib && ln -sf libjsoncpp_static.a libjsoncpp.a) - ''; + ++ lib.optional ((stdenv.buildPlatform != stdenv.hostPlatform) || secureMemory) "-DJSONCPP_WITH_TESTS=OFF"; meta = with lib; { homepage = "https://github.com/open-source-parsers/jsoncpp"; From 6ba492309955660d400e13d45dc3b35da7ed3943 Mon Sep 17 00:00:00 2001 From: Elliot Cameron Date: Mon, 24 Apr 2023 23:37:26 -0400 Subject: [PATCH 2/7] libevent: fix sslSupport = false --- pkgs/development/libraries/libevent/default.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkgs/development/libraries/libevent/default.nix b/pkgs/development/libraries/libevent/default.nix index bd5edec68a06..782d86f1f581 100644 --- a/pkgs/development/libraries/libevent/default.nix +++ b/pkgs/development/libraries/libevent/default.nix @@ -20,6 +20,8 @@ stdenv.mkDerivation rec { }) ]; + configureFlags = lib.optional (!sslSupport) "--disable-openssl"; + preConfigure = lib.optionalString (lib.versionAtLeast stdenv.hostPlatform.darwinMinVersion "11") '' MACOSX_DEPLOYMENT_TARGET=10.16 ''; From 85888a1298b3560579a3f5e98995c7e66e210fc9 Mon Sep 17 00:00:00 2001 From: Elliot Cameron Date: Mon, 24 Apr 2023 23:39:59 -0400 Subject: [PATCH 3/7] tclap: add 1.4 variant --- .../libraries/tclap/{default.nix => 1.2.nix} | 0 pkgs/development/libraries/tclap/1.4.nix | 48 +++++++++++++++++++ pkgs/top-level/all-packages.nix | 6 ++- 3 files changed, 53 insertions(+), 1 deletion(-) rename pkgs/development/libraries/tclap/{default.nix => 1.2.nix} (100%) create mode 100644 pkgs/development/libraries/tclap/1.4.nix diff --git a/pkgs/development/libraries/tclap/default.nix b/pkgs/development/libraries/tclap/1.2.nix similarity index 100% rename from pkgs/development/libraries/tclap/default.nix rename to pkgs/development/libraries/tclap/1.2.nix diff --git a/pkgs/development/libraries/tclap/1.4.nix b/pkgs/development/libraries/tclap/1.4.nix new file mode 100644 index 000000000000..7a0b57659633 --- /dev/null +++ b/pkgs/development/libraries/tclap/1.4.nix @@ -0,0 +1,48 @@ +{ lib +, stdenv +, fetchgit +, cmake +, doxygen +, python3 +}: +stdenv.mkDerivation { + pname = "tclap"; + + # This version is slightly newer than 1.4.0-rc1: + # See https://github.com/mirror/tclap/compare/1.4.0-rc1..3feeb7b2499b37d9cb80890cadaf7c905a9a50c6 + version = "1.4-3feeb7b"; + + src = fetchgit { + url = "git://git.code.sf.net/p/tclap/code"; + rev = "3feeb7b2499b37d9cb80890cadaf7c905a9a50c6"; # 1.4 branch + hash = "sha256-byLianB6Vf+I9ABMmsmuoGU2o5RO9c5sMckWW0F+GDM="; + }; + + postPatch = '' + substituteInPlace CMakeLists.txt \ + --replace '$'{CMAKE_INSTALL_LIBDIR_ARCHIND} '$'{CMAKE_INSTALL_LIBDIR} + substituteInPlace packaging/pkgconfig.pc.in \ + --replace '$'{prefix}/@CMAKE_INSTALL_INCLUDEDIR@ @CMAKE_INSTALL_FULL_INCLUDEDIR@ + ''; + + nativeBuildInputs = [ + cmake + doxygen + python3 + ]; + + # Installing docs is broken in this package+version so we stub out some files + preInstall = '' + touch docs/manual.html + ''; + + doCheck = true; + + meta = with lib; { + description = "Templatized C++ Command Line Parser Library (v1.4)"; + homepage = "https://tclap.sourceforge.net/"; + license = licenses.mit; + maintainers = teams.deshaw.members; + platforms = platforms.all; + }; +} diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 523be31c5a0b..38c00cfc82b5 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -24858,7 +24858,11 @@ with pkgs; taskflow = callPackage ../development/libraries/taskflow { }; - tclap = callPackage ../development/libraries/tclap { }; + tclap = tclap_1_2; + + tclap_1_2 = callPackage ../development/libraries/tclap/1.2.nix { }; + + tclap_1_4 = callPackage ../development/libraries/tclap/1.4.nix { }; tcllib = callPackage ../development/libraries/tcllib { }; From 65497764e8e176901560c9adf576e1fefc979b8f Mon Sep 17 00:00:00 2001 From: Elliot Cameron Date: Tue, 11 Jul 2023 17:59:32 -0400 Subject: [PATCH 4/7] cudatoolkit: fix builds for 10.* libnvrtc.so is not found in the same location in all versions of cudatoolkit. --- pkgs/development/compilers/cudatoolkit/common.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index 2e15012452a8..8037bd2a2aca 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -177,7 +177,9 @@ backendStdenv.mkDerivation rec { "libcom_err.so.2" ]; - preFixup = '' + preFixup = if lib.versionOlder version "11" then '' + patchelf $out/targets/*/lib/libnvrtc.so --add-needed libnvrtc-builtins.so + '' else '' patchelf $out/lib64/libnvrtc.so --add-needed libnvrtc-builtins.so ''; From 5ba94f8629027423cee3d60458f524ea8abc493e Mon Sep 17 00:00:00 2001 From: Elliot Cameron Date: Tue, 11 Jul 2023 18:22:42 -0400 Subject: [PATCH 5/7] cudatoolkit: fix build for 12.0.1 Many Qt dependencies were added only for 12.1 and above, but 12.0.1 also needs them. --- pkgs/development/compilers/cudatoolkit/common.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index 8037bd2a2aca..eba730b3a9cf 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -132,7 +132,7 @@ backendStdenv.mkDerivation rec { ucx xorg.libxshmfence xorg.libxkbfile - ] ++ (lib.optionals (lib.versionAtLeast version "12.1") (map lib.getLib ([ + ] ++ (lib.optionals (lib.versionAtLeast version "12") (map lib.getLib ([ # Used by `/target-linux-x64/CollectX/clx` and `/target-linux-x64/CollectX/libclx_api.so` for: # - `libcurl.so.4` curlMinimal From 1cdc3752a5f06c0891ac326fc934c87dd2aa30ca Mon Sep 17 00:00:00 2001 From: Elliot Cameron Date: Mon, 24 Apr 2023 23:42:44 -0400 Subject: [PATCH 6/7] dcgm: init at 3.1.8 --- pkgs/os-specific/linux/dcgm/default.nix | 147 ++++++++++++++++++++++++ pkgs/top-level/all-packages.nix | 2 + 2 files changed, 149 insertions(+) create mode 100644 pkgs/os-specific/linux/dcgm/default.nix diff --git a/pkgs/os-specific/linux/dcgm/default.nix b/pkgs/os-specific/linux/dcgm/default.nix new file mode 100644 index 000000000000..36c7e3ca6880 --- /dev/null +++ b/pkgs/os-specific/linux/dcgm/default.nix @@ -0,0 +1,147 @@ +{ lib +, callPackage +, gcc11Stdenv +, fetchFromGitHub +, addOpenGLRunpath +, catch2 +, cmake +, cudaPackages_10_2 +, cudaPackages_11_8 +, cudaPackages_12 +, fmt_9 +, git +, jsoncpp +, libevent +, plog +, python3 +, symlinkJoin +, tclap_1_4 +, yaml-cpp +}: +let + # Flags copied from DCGM's libevent build script + libevent-nossl = libevent.override { sslSupport = false; }; + libevent-nossl-static = libevent-nossl.overrideAttrs (super: { + CFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC"; + CXXFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC"; + configureFlags = super.configureFlags ++ [ "--disable-shared" "--with-pic" ]; + }); + + jsoncpp-static = jsoncpp.override { enableStatic = true; }; + + # DCGM depends on 3 different versions of CUDA at the same time. + # The runtime closure, thankfully, is quite small because most things + # are statically linked. + cudaPackageSetByVersion = [ + { + version = "10"; + # Nixpkgs cudaPackages_10 doesn't have redist packages broken out. + pkgSet = [ + cudaPackages_10_2.cudatoolkit + cudaPackages_10_2.cudatoolkit.lib + ]; + } + { + version = "11"; + pkgSet = getCudaPackages cudaPackages_11_8; + } + { + version = "12"; + pkgSet = getCudaPackages cudaPackages_12; + } + ]; + + # Select needed redist packages from cudaPackages + # C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39 + getCudaPackages = p: with p; [ + cuda_cccl + cuda_cudart + cuda_nvcc + cuda_nvml_dev + libcublas + libcufft + libcurand + ]; + + # Builds CMake code to add CUDA paths for include and lib. + mkAppendCudaPaths = { version, pkgSet }: + let + # The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must + # combine everything together for headers to work. + # It would be more convenient to use symlinkJoin on *just* the include subdirectories + # of each package, but not all of them have an include directory and making that work + # is more effort than it's worth for this temporary, build-time package. + combined = symlinkJoin { + name = "cuda-combined-${version}"; + paths = pkgSet; + }; + # The combined package above breaks the build for some reason so we just configure + # each package's library path. + libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet; + in '' + list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include") + list(APPEND Cuda${version}_LIB_PATHS ${libs}) + ''; + +# gcc11 is required by DCGM's very particular build system +# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22 +in gcc11Stdenv.mkDerivation rec { + pname = "dcgm"; + version = "3.1.8"; + + src = fetchFromGitHub { + owner = "NVIDIA"; + repo = "DCGM"; + rev = "refs/tags/v${version}"; + hash = "sha256-OXqXkP2ZUNPzafGIgJ0MKa39xB84keVFFYl+JsHgnks="; + }; + + # Add our paths to the CUDA paths so FindCuda.cmake can find them. + EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion; + prePatch = '' + echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake + ''; + + hardeningDisable = [ "all" ]; + + nativeBuildInputs = [ + addOpenGLRunpath + cmake + git + python3 + + jsoncpp-static + jsoncpp-static.dev + libevent-nossl-static + libevent-nossl-static.dev + plog.dev # header-only + tclap_1_4 # header-only + ]; + + buildInputs = [ + catch2 + fmt_9 + yaml-cpp + ]; + + # libcuda.so must be found at runtime because it is supplied by the NVIDIA + # driver. autoAddOpenGLRunpathHook breaks on the statically linked exes. + postFixup = '' + find "$out/bin" "$out/lib" -type f -executable -print0 | while IFS= read -r -d "" f; do + if isELF "$f" && [[ $(patchelf --print-needed "$f" || true) == *libcuda.so* ]]; then + addOpenGLRunpath "$f" + fi + done + ''; + + disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion; + + meta = with lib; { + description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs."; + homepage = "https://developer.nvidia.com/dcgm"; + license = licenses.asl20; + maintainers = teams.deshaw.members; + mainProgram = "dcgmi"; + platforms = platforms.linux; + }; +} diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 38c00cfc82b5..aff829e92770 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -541,6 +541,8 @@ with pkgs; dbip-country-lite = callPackage ../data/misc/dbip-country-lite { }; + dcgm = callPackage ../os-specific/linux/dcgm { }; + dhallDirectoryToNix = callPackage ../build-support/dhall/directory-to-nix.nix { }; dhallPackageToNix = callPackage ../build-support/dhall/package-to-nix.nix { }; From b25101f159010487c512c14ddde689dca253111d Mon Sep 17 00:00:00 2001 From: Elliot Cameron Date: Tue, 30 May 2023 12:05:23 -0400 Subject: [PATCH 7/7] prometheus-dcgm-exporter: init at 3.1.8-3.1.5 --- .../prometheus/dcgm-exporter/default.nix | 66 +++++++++++++++++++ pkgs/top-level/all-packages.nix | 1 + 2 files changed, 67 insertions(+) create mode 100644 pkgs/servers/monitoring/prometheus/dcgm-exporter/default.nix diff --git a/pkgs/servers/monitoring/prometheus/dcgm-exporter/default.nix b/pkgs/servers/monitoring/prometheus/dcgm-exporter/default.nix new file mode 100644 index 000000000000..173a978cf2eb --- /dev/null +++ b/pkgs/servers/monitoring/prometheus/dcgm-exporter/default.nix @@ -0,0 +1,66 @@ +{ lib +, buildGoModule +, fetchFromGitHub +, cudaPackages +, dcgm +, linuxPackages +}: +buildGoModule rec { + pname = "dcgm-exporter"; + version = "3.1.8-3.1.5"; + + src = fetchFromGitHub { + owner = "NVIDIA"; + repo = pname; + rev = "refs/tags/${version}"; + hash = "sha256-Jzv3cU3gmGIXV+DV3wV/1zSWwz18s3Jax6JC7WZW7Z4="; + }; + + # Upgrade to go 1.17 during the vendoring FOD build because it fails otherwise. + overrideModAttrs = _: { + preBuild = '' + substituteInPlace go.mod --replace 'go 1.16' 'go 1.17' + go mod tidy + ''; + postInstall = '' + cp go.mod "$out/go.mod" + ''; + }; + + CGO_LDFLAGS = "-ldcgm"; + + buildInputs = [ + dcgm + ]; + + # gonvml and go-dcgm do not work with ELF BIND_NOW hardening because not all + # symbols are available on startup. + hardeningDisable = [ "bindnow" ]; + + # Copy the modified go.mod we got from the vendoring process. + preBuild = '' + cp vendor/go.mod go.mod + ''; + + vendorHash = "sha256-KMCV79kUY1sNYysH0MmB7pVU98r7v+DpLIoYHxyyG4U="; + + nativeBuildInputs = [ + cudaPackages.autoAddOpenGLRunpathHook + ]; + + # Tests try to interact with running DCGM service. + doCheck = false; + + postFixup = '' + patchelf --add-needed libnvidia-ml.so "$out/bin/dcgm-exporter" + ''; + + meta = with lib; { + description = "NVIDIA GPU metrics exporter for Prometheus leveraging DCGM"; + homepage = "https://github.com/NVIDIA/dcgm-exporter"; + license = licenses.asl20; + maintainers = teams.deshaw.members; + mainProgram = "dcgm-exporter"; + platforms = platforms.linux; + }; +} diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index aff829e92770..f7e722a875fe 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -26693,6 +26693,7 @@ with pkgs; prometheus-cloudflare-exporter = callPackage ../servers/monitoring/prometheus/cloudflare-exporter.nix { }; prometheus-collectd-exporter = callPackage ../servers/monitoring/prometheus/collectd-exporter.nix { }; prometheus-consul-exporter = callPackage ../servers/monitoring/prometheus/consul-exporter.nix { }; + prometheus-dcgm-exporter = callPackage ../servers/monitoring/prometheus/dcgm-exporter { }; prometheus-dnsmasq-exporter = callPackage ../servers/monitoring/prometheus/dnsmasq-exporter.nix { }; prometheus-dovecot-exporter = callPackage ../servers/monitoring/prometheus/dovecot-exporter.nix { }; prometheus-domain-exporter = callPackage ../servers/monitoring/prometheus/domain-exporter.nix { };