diff --git a/pkgs/applications/science/math/caffe/default.nix b/pkgs/applications/science/math/caffe/default.nix index 6595f0b846dd..25f7229a845a 100644 --- a/pkgs/applications/science/math/caffe/default.nix +++ b/pkgs/applications/science/math/caffe/default.nix @@ -153,7 +153,7 @@ stdenv.mkDerivation rec { || cudaSupport || !(leveldbSupport -> (leveldb != null && snappy != null)) || !(cudnnSupport -> (hasCudnn && cudaSupport)) - || !(ncclSupport -> cudaSupport) + || !(ncclSupport -> (cudaSupport && !nccl.meta.unsupported)) || !(pythonSupport -> (python != null && numpy != null)) ; license = licenses.bsd2; diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index d92e07bb1b0b..e40d58736e8b 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -72,7 +72,7 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) { env.autoPatchelfIgnoreMissingDeps = prevAttrs.env.autoPatchelfIgnoreMissingDeps + " libnvrm_gpu.so libnvrm_mem.so libnvdla_runtime.so"; # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices. - brokenConditions = prevAttrs.brokenConditions // { + badPlatformsConditions = prevAttrs.badPlatformsConditions // { "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" = !final.flags.isJetsonBuild; }; diff --git a/pkgs/development/cuda-modules/cudnn/shims.nix b/pkgs/development/cuda-modules/cudnn/shims.nix index e9eca8ef7c8b..a36ee26dab5d 100644 --- a/pkgs/development/cuda-modules/cudnn/shims.nix +++ b/pkgs/development/cuda-modules/cudnn/shims.nix @@ -1,10 +1,18 @@ # Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix -{package, redistArch}: { - featureRelease.${redistArch}.outputs = { - lib = true; - static = true; - dev = true; + lib, + package, + # redistArch :: String + # String is "unsupported" if the given architecture is unsupported. + redistArch, +}: +{ + featureRelease = lib.optionalAttrs (redistArch != "unsupported") { + ${redistArch}.outputs = { + lib = true; + static = true; + dev = true; + }; }; redistribRelease = { name = "NVIDIA CUDA Deep Neural Network library (cuDNN)"; diff --git a/pkgs/development/cuda-modules/cutensor/extension.nix b/pkgs/development/cuda-modules/cutensor/extension.nix index b762fd22ede8..534941887c6e 100644 --- a/pkgs/development/cuda-modules/cutensor/extension.nix +++ b/pkgs/development/cuda-modules/cutensor/extension.nix @@ -92,6 +92,7 @@ let # A release is supported if it has a libPath that matches our CUDA version for our platform. # LibPath are not constant across the same release -- one platform may support fewer # CUDA versions than another. + # redistArch :: String redistArch = flags.getRedistArch hostPlatform.system; # platformIsSupported :: Manifests -> Boolean platformIsSupported = diff --git a/pkgs/development/cuda-modules/flags.nix b/pkgs/development/cuda-modules/flags.nix index a123c7bce5a1..d5e01be01fd5 100644 --- a/pkgs/development/cuda-modules/flags.nix +++ b/pkgs/development/cuda-modules/flags.nix @@ -131,39 +131,29 @@ let # `linux-aarch64` redist (which is for Jetson devices) if we're building any Jetson devices. # Since both are based on aarch64, we can only have one or the other, otherwise there's an # ambiguity as to which should be used. + # NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of + # `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported + # systems gracefully. # getRedistArch :: String -> String - getRedistArch = - nixSystem: - if nixSystem == "aarch64-linux" then - if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa" - else if nixSystem == "x86_64-linux" then - "linux-x86_64" - else if nixSystem == "ppc64le-linux" then - "linux-ppc64le" - else if nixSystem == "x86_64-windows" then - "windows-x86_64" - else - "unsupported"; + getRedistArch = nixSystem: attrsets.attrByPath [ nixSystem ] "unsupported" { + aarch64-linux = if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa"; + x86_64-linux = "linux-x86_64"; + ppc64le-linux = "linux-ppc64le"; + x86_64-windows = "windows-x86_64"; + }; # Maps NVIDIA redist arch to Nix system. - # It is imperative that we include the boolean condition based on jetsonTargets to ensure - # we don't advertise availability of packages only available on server-grade ARM - # as being available for the Jetson, since both `linux-sbsa` and `linux-aarch64` are - # mapped to the Nix system `aarch64-linux`. - getNixSystem = - redistArch: - if redistArch == "linux-sbsa" && jetsonTargets == [] then - "aarch64-linux" - else if redistArch == "linux-aarch64" && jetsonTargets != [] then - "aarch64-linux" - else if redistArch == "linux-x86_64" then - "x86_64-linux" - else if redistArch == "linux-ppc64le" then - "ppc64le-linux" - else if redistArch == "windows-x86_64" then - "x86_64-windows" - else - "unsupported-${redistArch}"; + # NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of + # `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported + # systems gracefully. + # getNixSystem :: String -> String + getNixSystem = redistArch: attrsets.attrByPath [ redistArch ] "unsupported-${redistArch}" { + linux-sbsa = "aarch64-linux"; + linux-aarch64 = "aarch64-linux"; + linux-x86_64 = "x86_64-linux"; + linux-ppc64le = "ppc64le-linux"; + windows-x86_64 = "x86_64-windows"; + }; formatCapabilities = { diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix index 5cde8552effc..d39c659a7cb9 100644 --- a/pkgs/development/cuda-modules/generic-builders/manifest.nix +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -43,6 +43,9 @@ let # Get the redist architectures for which package provides distributables. # These are used by meta.platforms. supportedRedistArchs = builtins.attrNames featureRelease; + # redistArch :: String + # The redistArch is the name of the architecture for which the redistributable is built. + # It is `"unsupported"` if the redistributable is not supported on the target platform. redistArch = flags.getRedistArch hostPlatform.system; in backendStdenv.mkDerivation ( @@ -87,8 +90,18 @@ backendStdenv.mkDerivation ( "sample" "python" ]; + # Filter out outputs that don't exist in the redistributable. + # NOTE: In the case the redistributable isn't supported on the target platform, + # we will have `outputs = [ "out" ] ++ possibleOutputs`. This is of note because platforms which + # aren't supported would otherwise have evaluation errors when trying to access outputs other than `out`. + # The alternative would be to have `outputs = [ "out" ]` when`redistArch = "unsupported"`, but that would + # require adding guards throughout the entirety of the CUDA package set to ensure `cudaSupport` is true -- + # recall that OfBorg will evaluate packages marked as broken and that `cudaPackages` will be evaluated with + # `cudaSupport = false`! additionalOutputs = - if redistArch == "unsupported" then possibleOutputs else builtins.filter hasOutput possibleOutputs; + if redistArch == "unsupported" + then possibleOutputs + else builtins.filter hasOutput possibleOutputs; # The out output is special -- it's the default output and we always include it. outputs = [ "out" ] ++ additionalOutputs; in @@ -112,21 +125,32 @@ backendStdenv.mkDerivation ( python = ["**/*.whl"]; }; - # Useful for introspecting why something went wrong. - # Maps descriptions of why the derivation would be marked broken to - # booleans indicating whether that description is true. - brokenConditions = {}; + # Useful for introspecting why something went wrong. Maps descriptions of why the derivation would be marked as + # broken on have badPlatforms include the current platform. - src = fetchurl { - url = - if (builtins.hasAttr redistArch redistribRelease) then - "https://developer.download.nvidia.com/compute/${redistName}/redist/${ - redistribRelease.${redistArch}.relative_path - }" - else - "cannot-construct-an-url-for-the-${redistArch}-platform"; - sha256 = redistribRelease.${redistArch}.sha256 or lib.fakeHash; - }; + # brokenConditions :: AttrSet Bool + # Sets `meta.broken = true` if any of the conditions are true. + # Example: Broken on a specific version of CUDA or when a dependency has a specific version. + brokenConditions = { }; + + # badPlatformsConditions :: AttrSet Bool + # Sets `meta.badPlatforms = meta.platforms` if any of the conditions are true. + # Example: Broken on a specific architecture when some condition is met (like targeting Jetson). + badPlatformsConditions = { }; + + # src :: Optional Derivation + src = trivial.pipe redistArch [ + # If redistArch doesn't exist in redistribRelease, return null. + (redistArch: redistribRelease.${redistArch} or null) + # If the release is non-null, fetch the source; otherwise, return null. + (trivial.mapNullable ( + { relative_path, sha256, ... }: + fetchurl { + url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}"; + inherit sha256; + } + )) + ]; # Handle the pkg-config files: # 1. No FHS @@ -297,17 +321,18 @@ backendStdenv.mkDerivation ( meta = { description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}"; sourceProvenance = [sourceTypes.binaryNativeCode]; - platforms = - lists.concatMap - ( - redistArch: - let - nixSystem = flags.getNixSystem redistArch; - in - lists.optionals (!(strings.hasPrefix "unsupported-" nixSystem)) [ nixSystem ] - ) - supportedRedistArchs; broken = lists.any trivial.id (attrsets.attrValues finalAttrs.brokenConditions); + platforms = trivial.pipe supportedRedistArchs [ + # Map each redist arch to the equivalent nix system or null if there is no equivalent. + (builtins.map flags.getNixSystem) + # Filter out unsupported systems + (builtins.filter (nixSystem: !(strings.hasPrefix "unsupported-" nixSystem))) + ]; + badPlatforms = + let + isBadPlatform = lists.any trivial.id (attrsets.attrValues finalAttrs.badPlatformsConditions); + in + lists.optionals isBadPlatform finalAttrs.meta.platforms; license = licenses.unfree; maintainers = teams.cuda.members; # Force the use of the default, fat output by default (even though `dev` exists, which diff --git a/pkgs/development/cuda-modules/generic-builders/multiplex.nix b/pkgs/development/cuda-modules/generic-builders/multiplex.nix index 5480da730726..6353b07545a4 100644 --- a/pkgs/development/cuda-modules/generic-builders/multiplex.nix +++ b/pkgs/development/cuda-modules/generic-builders/multiplex.nix @@ -20,7 +20,7 @@ # The featureRelease is used to populate meta.platforms (by way of looking at the attribute names) # and to determine the outputs of the package. # shimFn :: {package, redistArch} -> AttrSet - shimsFn ? ({package, redistArch}: throw "shimsFn must be provided"), + shimsFn ? (throw "shimsFn must be provided"), # fixupFn :: Path # A path (or nix expression) to be evaluated with callPackage and then # provided to the package's overrideAttrs function. @@ -29,16 +29,8 @@ # - cudaVersion # - mkVersionedPackageName # - package - fixupFn ? ( - { - final, - cudaVersion, - mkVersionedPackageName, - package, - ... - }: - throw "fixupFn must be provided" - ), + # - ... + fixupFn ? (throw "fixupFn must be provided"), }: let inherit (lib) @@ -80,9 +72,11 @@ let && strings.versionAtLeast package.maxCudaVersion cudaVersion; # Get all of the packages for our given platform. + # redistArch :: String + # Value is `"unsupported"` if the platform is not supported. redistArch = flags.getRedistArch hostPlatform.system; - allReleases = builtins.concatMap (xs: xs) (builtins.attrValues releaseSets); + allReleases = lists.flatten (builtins.attrValues releaseSets); # All the supported packages we can build for our platform. # perSystemReleases :: List Package diff --git a/pkgs/development/cuda-modules/nccl/default.nix b/pkgs/development/cuda-modules/nccl/default.nix index c56d59cb4206..6e385688d0f8 100644 --- a/pkgs/development/cuda-modules/nccl/default.nix +++ b/pkgs/development/cuda-modules/nccl/default.nix @@ -100,6 +100,9 @@ backendStdenv.mkDerivation ( homepage = "https://developer.nvidia.com/nccl"; license = licenses.bsd3; platforms = platforms.linux; + # NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication. + # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9 + badPlatforms = lib.optionals cudaFlags.isJetsonBuild [ "aarch64-linux" ]; maintainers = with maintainers; [ diff --git a/pkgs/development/cuda-modules/tensorrt/fixup.nix b/pkgs/development/cuda-modules/tensorrt/fixup.nix index 43a7dfb81784..51ca3d652bd1 100644 --- a/pkgs/development/cuda-modules/tensorrt/fixup.nix +++ b/pkgs/development/cuda-modules/tensorrt/fixup.nix @@ -11,18 +11,17 @@ }: let inherit (lib) + attrsets maintainers meta strings versions ; - targetArch = - if hostPlatform.isx86_64 then - "x86_64-linux-gnu" - else if hostPlatform.isAarch64 then - "aarch64-linux-gnu" - else - "unsupported"; + # targetArch :: String + targetArch = attrsets.attrByPath [ hostPlatform.system ] "unsupported" { + x86_64-linux = "x86_64-linux-gnu"; + aarch64-linux = "aarch64-linux-gnu"; + }; in finalAttrs: prevAttrs: { # Useful for inspecting why something went wrong. @@ -69,7 +68,7 @@ finalAttrs: prevAttrs: { preInstall = (prevAttrs.preInstall or "") - + '' + + strings.optionalString (targetArch != "unsupported") '' # Replace symlinks to bin and lib with the actual directories from targets. for dir in bin lib; do rm "$dir" diff --git a/pkgs/development/cuda-modules/tensorrt/shims.nix b/pkgs/development/cuda-modules/tensorrt/shims.nix index 8be3e7988bb3..12465434ec85 100644 --- a/pkgs/development/cuda-modules/tensorrt/shims.nix +++ b/pkgs/development/cuda-modules/tensorrt/shims.nix @@ -1,13 +1,21 @@ # Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix -{package, redistArch}: { - featureRelease.${redistArch}.outputs = { - bin = true; - lib = true; - static = true; - dev = true; - sample = true; - python = true; + lib, + package, + # redistArch :: String + # String is `"unsupported"` if the given architecture is unsupported. + redistArch, +}: +{ + featureRelease = lib.optionalAttrs (redistArch != "unsupported") { + ${redistArch}.outputs = { + bin = true; + lib = true; + static = true; + dev = true; + sample = true; + python = true; + }; }; redistribRelease = { name = "TensorRT: a high-performance deep learning interface"; diff --git a/pkgs/development/libraries/science/math/magma/generic.nix b/pkgs/development/libraries/science/math/magma/generic.nix index 1aaab46e1d1d..b27b42bf3ae8 100644 --- a/pkgs/development/libraries/science/math/magma/generic.nix +++ b/pkgs/development/libraries/science/math/magma/generic.nix @@ -159,7 +159,7 @@ stdenv.mkDerivation { description = "Matrix Algebra on GPU and Multicore Architectures"; license = licenses.bsd3; homepage = "http://icl.cs.utk.edu/magma/index.html"; - platforms = platforms.unix; + platforms = platforms.linux; maintainers = with maintainers; [ connorbaker ]; # Cf. https://bitbucket.org/icl/magma/src/fcfe5aa61c1a4c664b36a73ebabbdbab82765e9f/CMakeLists.txt#lines-20 diff --git a/pkgs/development/libraries/xgboost/default.nix b/pkgs/development/libraries/xgboost/default.nix index 2a44ffc44382..0af51a40dfb1 100644 --- a/pkgs/development/libraries/xgboost/default.nix +++ b/pkgs/development/libraries/xgboost/default.nix @@ -14,7 +14,7 @@ , rPackages }@inputs: -assert ncclSupport -> cudaSupport; +assert ncclSupport -> (cudaSupport && !cudaPackages.nccl.meta.unsupported); # Disable regular tests when building the R package # because 1) the R package runs its own tests and # 2) the R package creates a different binary shared diff --git a/pkgs/development/python-modules/jaxlib/default.nix b/pkgs/development/python-modules/jaxlib/default.nix index 27b9e61fbc82..d8dc4d67a594 100644 --- a/pkgs/development/python-modules/jaxlib/default.nix +++ b/pkgs/development/python-modules/jaxlib/default.nix @@ -64,7 +64,8 @@ let # aarch64-darwin is broken because of https://github.com/bazelbuild/rules_cc/pull/136 # however even with that fix applied, it doesn't work for everyone: # https://github.com/NixOS/nixpkgs/pull/184395#issuecomment-1207287129 - broken = stdenv.isDarwin; + # NOTE: We always build with NCCL; if it is unsupported, then our build is broken. + broken = stdenv.isDarwin || nccl.meta.unsupported; }; cudatoolkit_joined = symlinkJoin { diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index 8fb227cbd36b..8a499d763a4a 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -7,7 +7,8 @@ magma, magma-hip, magma-cuda-static, - useSystemNccl ? true, + # Use the system NCCL as long as we're targeting CUDA on a supported platform. + useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported), MPISupport ? false, mpi, buildDocs ? false, @@ -273,9 +274,11 @@ in buildPythonPackage rec { PYTORCH_BUILD_VERSION = version; PYTORCH_BUILD_NUMBER = 0; - USE_NCCL = setBool (cudaSupport && cudaPackages ? nccl); - USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL - USE_STATIC_NCCL = setBool useSystemNccl; + # In-tree builds of NCCL are not supported. + # Use NCCL when cudaSupport is enabled and nccl is available. + USE_NCCL = setBool useSystemNccl; + USE_SYSTEM_NCCL = USE_NCCL; + USE_STATIC_NCCL = USE_NCCL; # Suppress a weird warning in mkl-dnn, part of ideep in pytorch # (upstream seems to have fixed this in the wrong place?) @@ -363,7 +366,7 @@ in buildPythonPackage rec { ] ++ lists.optionals (cudaPackages ? cudnn) [ cudnn.dev cudnn.lib - ] ++ lists.optionals (useSystemNccl && cudaPackages ? nccl) [ + ] ++ lists.optionals useSystemNccl [ # Some platforms do not support NCCL (i.e., Jetson) nccl.dev # Provides nccl.h AND a static copy of NCCL! ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [ diff --git a/pkgs/top-level/cuda-packages.nix b/pkgs/top-level/cuda-packages.nix index 9045b5754ab8..f20a36152203 100644 --- a/pkgs/top-level/cuda-packages.nix +++ b/pkgs/top-level/cuda-packages.nix @@ -72,14 +72,7 @@ let # Loose packages cudatoolkit = final.callPackage ../development/cuda-modules/cudatoolkit {}; - # SaxPy is only available after 11.4 because it requires redistributable versions of CUDA libraries. - saxpy = attrsets.optionalAttrs (strings.versionAtLeast cudaVersion "11.4") ( - final.callPackage ../development/cuda-modules/saxpy {} - ); - } - # NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication. - # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9 - // attrsets.optionalAttrs (!flags.isJetsonBuild) { + saxpy = final.callPackage ../development/cuda-modules/saxpy {}; nccl = final.callPackage ../development/cuda-modules/nccl {}; nccl-tests = final.callPackage ../development/cuda-modules/nccl-tests {}; }