Merge pull request #274319 from ConnorBaker/feat/cudaPackages-all-packages-eval
tree-wide: cudaPackages attributes should not cause default eval to fail
This commit is contained in:
commit
e529aea84f
15 changed files with 131 additions and 106 deletions
|
@ -153,7 +153,7 @@ stdenv.mkDerivation rec {
|
|||
|| cudaSupport
|
||||
|| !(leveldbSupport -> (leveldb != null && snappy != null))
|
||||
|| !(cudnnSupport -> (hasCudnn && cudaSupport))
|
||||
|| !(ncclSupport -> cudaSupport)
|
||||
|| !(ncclSupport -> (cudaSupport && !nccl.meta.unsupported))
|
||||
|| !(pythonSupport -> (python != null && numpy != null))
|
||||
;
|
||||
license = licenses.bsd2;
|
||||
|
|
|
@ -72,7 +72,7 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
|
|||
env.autoPatchelfIgnoreMissingDeps =
|
||||
prevAttrs.env.autoPatchelfIgnoreMissingDeps + " libnvrm_gpu.so libnvrm_mem.so libnvdla_runtime.so";
|
||||
# `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices.
|
||||
brokenConditions = prevAttrs.brokenConditions // {
|
||||
badPlatformsConditions = prevAttrs.badPlatformsConditions // {
|
||||
"Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" =
|
||||
!final.flags.isJetsonBuild;
|
||||
};
|
||||
|
|
|
@ -1,10 +1,18 @@
|
|||
# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
|
||||
{package, redistArch}:
|
||||
{
|
||||
featureRelease.${redistArch}.outputs = {
|
||||
lib = true;
|
||||
static = true;
|
||||
dev = true;
|
||||
lib,
|
||||
package,
|
||||
# redistArch :: String
|
||||
# String is "unsupported" if the given architecture is unsupported.
|
||||
redistArch,
|
||||
}:
|
||||
{
|
||||
featureRelease = lib.optionalAttrs (redistArch != "unsupported") {
|
||||
${redistArch}.outputs = {
|
||||
lib = true;
|
||||
static = true;
|
||||
dev = true;
|
||||
};
|
||||
};
|
||||
redistribRelease = {
|
||||
name = "NVIDIA CUDA Deep Neural Network library (cuDNN)";
|
||||
|
|
|
@ -92,6 +92,7 @@ let
|
|||
# A release is supported if it has a libPath that matches our CUDA version for our platform.
|
||||
# LibPath are not constant across the same release -- one platform may support fewer
|
||||
# CUDA versions than another.
|
||||
# redistArch :: String
|
||||
redistArch = flags.getRedistArch hostPlatform.system;
|
||||
# platformIsSupported :: Manifests -> Boolean
|
||||
platformIsSupported =
|
||||
|
|
|
@ -131,39 +131,29 @@ let
|
|||
# `linux-aarch64` redist (which is for Jetson devices) if we're building any Jetson devices.
|
||||
# Since both are based on aarch64, we can only have one or the other, otherwise there's an
|
||||
# ambiguity as to which should be used.
|
||||
# NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of
|
||||
# `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported
|
||||
# systems gracefully.
|
||||
# getRedistArch :: String -> String
|
||||
getRedistArch =
|
||||
nixSystem:
|
||||
if nixSystem == "aarch64-linux" then
|
||||
if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa"
|
||||
else if nixSystem == "x86_64-linux" then
|
||||
"linux-x86_64"
|
||||
else if nixSystem == "ppc64le-linux" then
|
||||
"linux-ppc64le"
|
||||
else if nixSystem == "x86_64-windows" then
|
||||
"windows-x86_64"
|
||||
else
|
||||
"unsupported";
|
||||
getRedistArch = nixSystem: attrsets.attrByPath [ nixSystem ] "unsupported" {
|
||||
aarch64-linux = if jetsonTargets != [] then "linux-aarch64" else "linux-sbsa";
|
||||
x86_64-linux = "linux-x86_64";
|
||||
ppc64le-linux = "linux-ppc64le";
|
||||
x86_64-windows = "windows-x86_64";
|
||||
};
|
||||
|
||||
# Maps NVIDIA redist arch to Nix system.
|
||||
# It is imperative that we include the boolean condition based on jetsonTargets to ensure
|
||||
# we don't advertise availability of packages only available on server-grade ARM
|
||||
# as being available for the Jetson, since both `linux-sbsa` and `linux-aarch64` are
|
||||
# mapped to the Nix system `aarch64-linux`.
|
||||
getNixSystem =
|
||||
redistArch:
|
||||
if redistArch == "linux-sbsa" && jetsonTargets == [] then
|
||||
"aarch64-linux"
|
||||
else if redistArch == "linux-aarch64" && jetsonTargets != [] then
|
||||
"aarch64-linux"
|
||||
else if redistArch == "linux-x86_64" then
|
||||
"x86_64-linux"
|
||||
else if redistArch == "linux-ppc64le" then
|
||||
"ppc64le-linux"
|
||||
else if redistArch == "windows-x86_64" then
|
||||
"x86_64-windows"
|
||||
else
|
||||
"unsupported-${redistArch}";
|
||||
# NOTE: This function *will* be called by unsupported systems because `cudaPackages` is part of
|
||||
# `all-packages.nix`, which is evaluated on all systems. As such, we need to handle unsupported
|
||||
# systems gracefully.
|
||||
# getNixSystem :: String -> String
|
||||
getNixSystem = redistArch: attrsets.attrByPath [ redistArch ] "unsupported-${redistArch}" {
|
||||
linux-sbsa = "aarch64-linux";
|
||||
linux-aarch64 = "aarch64-linux";
|
||||
linux-x86_64 = "x86_64-linux";
|
||||
linux-ppc64le = "ppc64le-linux";
|
||||
windows-x86_64 = "x86_64-windows";
|
||||
};
|
||||
|
||||
formatCapabilities =
|
||||
{
|
||||
|
|
|
@ -43,6 +43,9 @@ let
|
|||
# Get the redist architectures for which package provides distributables.
|
||||
# These are used by meta.platforms.
|
||||
supportedRedistArchs = builtins.attrNames featureRelease;
|
||||
# redistArch :: String
|
||||
# The redistArch is the name of the architecture for which the redistributable is built.
|
||||
# It is `"unsupported"` if the redistributable is not supported on the target platform.
|
||||
redistArch = flags.getRedistArch hostPlatform.system;
|
||||
in
|
||||
backendStdenv.mkDerivation (
|
||||
|
@ -87,8 +90,18 @@ backendStdenv.mkDerivation (
|
|||
"sample"
|
||||
"python"
|
||||
];
|
||||
# Filter out outputs that don't exist in the redistributable.
|
||||
# NOTE: In the case the redistributable isn't supported on the target platform,
|
||||
# we will have `outputs = [ "out" ] ++ possibleOutputs`. This is of note because platforms which
|
||||
# aren't supported would otherwise have evaluation errors when trying to access outputs other than `out`.
|
||||
# The alternative would be to have `outputs = [ "out" ]` when`redistArch = "unsupported"`, but that would
|
||||
# require adding guards throughout the entirety of the CUDA package set to ensure `cudaSupport` is true --
|
||||
# recall that OfBorg will evaluate packages marked as broken and that `cudaPackages` will be evaluated with
|
||||
# `cudaSupport = false`!
|
||||
additionalOutputs =
|
||||
if redistArch == "unsupported" then possibleOutputs else builtins.filter hasOutput possibleOutputs;
|
||||
if redistArch == "unsupported"
|
||||
then possibleOutputs
|
||||
else builtins.filter hasOutput possibleOutputs;
|
||||
# The out output is special -- it's the default output and we always include it.
|
||||
outputs = [ "out" ] ++ additionalOutputs;
|
||||
in
|
||||
|
@ -112,21 +125,32 @@ backendStdenv.mkDerivation (
|
|||
python = ["**/*.whl"];
|
||||
};
|
||||
|
||||
# Useful for introspecting why something went wrong.
|
||||
# Maps descriptions of why the derivation would be marked broken to
|
||||
# booleans indicating whether that description is true.
|
||||
brokenConditions = {};
|
||||
# Useful for introspecting why something went wrong. Maps descriptions of why the derivation would be marked as
|
||||
# broken on have badPlatforms include the current platform.
|
||||
|
||||
src = fetchurl {
|
||||
url =
|
||||
if (builtins.hasAttr redistArch redistribRelease) then
|
||||
"https://developer.download.nvidia.com/compute/${redistName}/redist/${
|
||||
redistribRelease.${redistArch}.relative_path
|
||||
}"
|
||||
else
|
||||
"cannot-construct-an-url-for-the-${redistArch}-platform";
|
||||
sha256 = redistribRelease.${redistArch}.sha256 or lib.fakeHash;
|
||||
};
|
||||
# brokenConditions :: AttrSet Bool
|
||||
# Sets `meta.broken = true` if any of the conditions are true.
|
||||
# Example: Broken on a specific version of CUDA or when a dependency has a specific version.
|
||||
brokenConditions = { };
|
||||
|
||||
# badPlatformsConditions :: AttrSet Bool
|
||||
# Sets `meta.badPlatforms = meta.platforms` if any of the conditions are true.
|
||||
# Example: Broken on a specific architecture when some condition is met (like targeting Jetson).
|
||||
badPlatformsConditions = { };
|
||||
|
||||
# src :: Optional Derivation
|
||||
src = trivial.pipe redistArch [
|
||||
# If redistArch doesn't exist in redistribRelease, return null.
|
||||
(redistArch: redistribRelease.${redistArch} or null)
|
||||
# If the release is non-null, fetch the source; otherwise, return null.
|
||||
(trivial.mapNullable (
|
||||
{ relative_path, sha256, ... }:
|
||||
fetchurl {
|
||||
url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}";
|
||||
inherit sha256;
|
||||
}
|
||||
))
|
||||
];
|
||||
|
||||
# Handle the pkg-config files:
|
||||
# 1. No FHS
|
||||
|
@ -297,17 +321,18 @@ backendStdenv.mkDerivation (
|
|||
meta = {
|
||||
description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
|
||||
sourceProvenance = [sourceTypes.binaryNativeCode];
|
||||
platforms =
|
||||
lists.concatMap
|
||||
(
|
||||
redistArch:
|
||||
let
|
||||
nixSystem = flags.getNixSystem redistArch;
|
||||
in
|
||||
lists.optionals (!(strings.hasPrefix "unsupported-" nixSystem)) [ nixSystem ]
|
||||
)
|
||||
supportedRedistArchs;
|
||||
broken = lists.any trivial.id (attrsets.attrValues finalAttrs.brokenConditions);
|
||||
platforms = trivial.pipe supportedRedistArchs [
|
||||
# Map each redist arch to the equivalent nix system or null if there is no equivalent.
|
||||
(builtins.map flags.getNixSystem)
|
||||
# Filter out unsupported systems
|
||||
(builtins.filter (nixSystem: !(strings.hasPrefix "unsupported-" nixSystem)))
|
||||
];
|
||||
badPlatforms =
|
||||
let
|
||||
isBadPlatform = lists.any trivial.id (attrsets.attrValues finalAttrs.badPlatformsConditions);
|
||||
in
|
||||
lists.optionals isBadPlatform finalAttrs.meta.platforms;
|
||||
license = licenses.unfree;
|
||||
maintainers = teams.cuda.members;
|
||||
# Force the use of the default, fat output by default (even though `dev` exists, which
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
# The featureRelease is used to populate meta.platforms (by way of looking at the attribute names)
|
||||
# and to determine the outputs of the package.
|
||||
# shimFn :: {package, redistArch} -> AttrSet
|
||||
shimsFn ? ({package, redistArch}: throw "shimsFn must be provided"),
|
||||
shimsFn ? (throw "shimsFn must be provided"),
|
||||
# fixupFn :: Path
|
||||
# A path (or nix expression) to be evaluated with callPackage and then
|
||||
# provided to the package's overrideAttrs function.
|
||||
|
@ -29,16 +29,8 @@
|
|||
# - cudaVersion
|
||||
# - mkVersionedPackageName
|
||||
# - package
|
||||
fixupFn ? (
|
||||
{
|
||||
final,
|
||||
cudaVersion,
|
||||
mkVersionedPackageName,
|
||||
package,
|
||||
...
|
||||
}:
|
||||
throw "fixupFn must be provided"
|
||||
),
|
||||
# - ...
|
||||
fixupFn ? (throw "fixupFn must be provided"),
|
||||
}:
|
||||
let
|
||||
inherit (lib)
|
||||
|
@ -80,9 +72,11 @@ let
|
|||
&& strings.versionAtLeast package.maxCudaVersion cudaVersion;
|
||||
|
||||
# Get all of the packages for our given platform.
|
||||
# redistArch :: String
|
||||
# Value is `"unsupported"` if the platform is not supported.
|
||||
redistArch = flags.getRedistArch hostPlatform.system;
|
||||
|
||||
allReleases = builtins.concatMap (xs: xs) (builtins.attrValues releaseSets);
|
||||
allReleases = lists.flatten (builtins.attrValues releaseSets);
|
||||
|
||||
# All the supported packages we can build for our platform.
|
||||
# perSystemReleases :: List Package
|
||||
|
|
|
@ -100,6 +100,9 @@ backendStdenv.mkDerivation (
|
|||
homepage = "https://developer.nvidia.com/nccl";
|
||||
license = licenses.bsd3;
|
||||
platforms = platforms.linux;
|
||||
# NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication.
|
||||
# https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9
|
||||
badPlatforms = lib.optionals cudaFlags.isJetsonBuild [ "aarch64-linux" ];
|
||||
maintainers =
|
||||
with maintainers;
|
||||
[
|
||||
|
|
|
@ -11,18 +11,17 @@
|
|||
}:
|
||||
let
|
||||
inherit (lib)
|
||||
attrsets
|
||||
maintainers
|
||||
meta
|
||||
strings
|
||||
versions
|
||||
;
|
||||
targetArch =
|
||||
if hostPlatform.isx86_64 then
|
||||
"x86_64-linux-gnu"
|
||||
else if hostPlatform.isAarch64 then
|
||||
"aarch64-linux-gnu"
|
||||
else
|
||||
"unsupported";
|
||||
# targetArch :: String
|
||||
targetArch = attrsets.attrByPath [ hostPlatform.system ] "unsupported" {
|
||||
x86_64-linux = "x86_64-linux-gnu";
|
||||
aarch64-linux = "aarch64-linux-gnu";
|
||||
};
|
||||
in
|
||||
finalAttrs: prevAttrs: {
|
||||
# Useful for inspecting why something went wrong.
|
||||
|
@ -69,7 +68,7 @@ finalAttrs: prevAttrs: {
|
|||
|
||||
preInstall =
|
||||
(prevAttrs.preInstall or "")
|
||||
+ ''
|
||||
+ strings.optionalString (targetArch != "unsupported") ''
|
||||
# Replace symlinks to bin and lib with the actual directories from targets.
|
||||
for dir in bin lib; do
|
||||
rm "$dir"
|
||||
|
|
|
@ -1,13 +1,21 @@
|
|||
# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
|
||||
{package, redistArch}:
|
||||
{
|
||||
featureRelease.${redistArch}.outputs = {
|
||||
bin = true;
|
||||
lib = true;
|
||||
static = true;
|
||||
dev = true;
|
||||
sample = true;
|
||||
python = true;
|
||||
lib,
|
||||
package,
|
||||
# redistArch :: String
|
||||
# String is `"unsupported"` if the given architecture is unsupported.
|
||||
redistArch,
|
||||
}:
|
||||
{
|
||||
featureRelease = lib.optionalAttrs (redistArch != "unsupported") {
|
||||
${redistArch}.outputs = {
|
||||
bin = true;
|
||||
lib = true;
|
||||
static = true;
|
||||
dev = true;
|
||||
sample = true;
|
||||
python = true;
|
||||
};
|
||||
};
|
||||
redistribRelease = {
|
||||
name = "TensorRT: a high-performance deep learning interface";
|
||||
|
|
|
@ -159,7 +159,7 @@ stdenv.mkDerivation {
|
|||
description = "Matrix Algebra on GPU and Multicore Architectures";
|
||||
license = licenses.bsd3;
|
||||
homepage = "http://icl.cs.utk.edu/magma/index.html";
|
||||
platforms = platforms.unix;
|
||||
platforms = platforms.linux;
|
||||
maintainers = with maintainers; [ connorbaker ];
|
||||
|
||||
# Cf. https://bitbucket.org/icl/magma/src/fcfe5aa61c1a4c664b36a73ebabbdbab82765e9f/CMakeLists.txt#lines-20
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
, rPackages
|
||||
}@inputs:
|
||||
|
||||
assert ncclSupport -> cudaSupport;
|
||||
assert ncclSupport -> (cudaSupport && !cudaPackages.nccl.meta.unsupported);
|
||||
# Disable regular tests when building the R package
|
||||
# because 1) the R package runs its own tests and
|
||||
# 2) the R package creates a different binary shared
|
||||
|
|
|
@ -64,7 +64,8 @@ let
|
|||
# aarch64-darwin is broken because of https://github.com/bazelbuild/rules_cc/pull/136
|
||||
# however even with that fix applied, it doesn't work for everyone:
|
||||
# https://github.com/NixOS/nixpkgs/pull/184395#issuecomment-1207287129
|
||||
broken = stdenv.isDarwin;
|
||||
# NOTE: We always build with NCCL; if it is unsupported, then our build is broken.
|
||||
broken = stdenv.isDarwin || nccl.meta.unsupported;
|
||||
};
|
||||
|
||||
cudatoolkit_joined = symlinkJoin {
|
||||
|
|
|
@ -7,7 +7,8 @@
|
|||
magma,
|
||||
magma-hip,
|
||||
magma-cuda-static,
|
||||
useSystemNccl ? true,
|
||||
# Use the system NCCL as long as we're targeting CUDA on a supported platform.
|
||||
useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported),
|
||||
MPISupport ? false, mpi,
|
||||
buildDocs ? false,
|
||||
|
||||
|
@ -273,9 +274,11 @@ in buildPythonPackage rec {
|
|||
PYTORCH_BUILD_VERSION = version;
|
||||
PYTORCH_BUILD_NUMBER = 0;
|
||||
|
||||
USE_NCCL = setBool (cudaSupport && cudaPackages ? nccl);
|
||||
USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL
|
||||
USE_STATIC_NCCL = setBool useSystemNccl;
|
||||
# In-tree builds of NCCL are not supported.
|
||||
# Use NCCL when cudaSupport is enabled and nccl is available.
|
||||
USE_NCCL = setBool useSystemNccl;
|
||||
USE_SYSTEM_NCCL = USE_NCCL;
|
||||
USE_STATIC_NCCL = USE_NCCL;
|
||||
|
||||
# Suppress a weird warning in mkl-dnn, part of ideep in pytorch
|
||||
# (upstream seems to have fixed this in the wrong place?)
|
||||
|
@ -363,7 +366,7 @@ in buildPythonPackage rec {
|
|||
] ++ lists.optionals (cudaPackages ? cudnn) [
|
||||
cudnn.dev
|
||||
cudnn.lib
|
||||
] ++ lists.optionals (useSystemNccl && cudaPackages ? nccl) [
|
||||
] ++ lists.optionals useSystemNccl [
|
||||
# Some platforms do not support NCCL (i.e., Jetson)
|
||||
nccl.dev # Provides nccl.h AND a static copy of NCCL!
|
||||
] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
|
||||
|
|
|
@ -72,14 +72,7 @@ let
|
|||
|
||||
# Loose packages
|
||||
cudatoolkit = final.callPackage ../development/cuda-modules/cudatoolkit {};
|
||||
# SaxPy is only available after 11.4 because it requires redistributable versions of CUDA libraries.
|
||||
saxpy = attrsets.optionalAttrs (strings.versionAtLeast cudaVersion "11.4") (
|
||||
final.callPackage ../development/cuda-modules/saxpy {}
|
||||
);
|
||||
}
|
||||
# NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication.
|
||||
# https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9
|
||||
// attrsets.optionalAttrs (!flags.isJetsonBuild) {
|
||||
saxpy = final.callPackage ../development/cuda-modules/saxpy {};
|
||||
nccl = final.callPackage ../development/cuda-modules/nccl {};
|
||||
nccl-tests = final.callPackage ../development/cuda-modules/nccl-tests {};
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue