Merge pull request #217367 from ConnorBaker/feat/cudaFlags-arch-rewrite

cudaFlags: rewrite to capture all architectures and fix NixOS#215436
This commit is contained in:
Samuel Ainsworth 2023-02-24 17:31:11 -05:00 committed by GitHub
commit d24dde7f6c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 272 additions and 60 deletions

View file

@ -50,7 +50,7 @@ stdenv.mkDerivation rec {
"-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743
"-DCUDA_ARCH_NAME=All"
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
"-DMXNET_CUDA_ARCH=${cudaFlags.cudaCapabilitiesSemiColonString}"
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}"
] else [ "-DUSE_CUDA=OFF" ])
++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";

View file

@ -2,7 +2,18 @@
, lib
, cudatoolkit
}:
# Type aliases
# Gpu = {
# archName: String, # e.g., "Hopper"
# computeCapability: String, # e.g., "9.0"
# minCudaVersion: String, # e.g., "11.8"
# maxCudaVersion: String, # e.g., "12.0"
# }
let
inherit (lib) attrsets lists strings trivial versions;
cudaVersion = cudatoolkit.version;
# Flags are determined based on your CUDA toolkit by default. You may benefit
# from improved performance, reduced file size, or greater hardware suppport by
@ -13,66 +24,116 @@ let
#
# Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351
defaultCudaCapabilities = rec {
cuda9 = [
"3.0"
"3.5"
"5.0"
"5.2"
"6.0"
"6.1"
"7.0"
];
# gpus :: List Gpu
gpus = builtins.import ./gpus.nix;
cuda10 = cuda9 ++ [
"7.5"
];
# isVersionIn :: Gpu -> Bool
isSupported = gpu:
let
inherit (gpu) minCudaVersion maxCudaVersion;
lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion;
upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion);
in
lowerBoundSatisfied && upperBoundSatisfied;
cuda11 = [
"3.5"
"5.0"
"5.2"
"6.0"
"6.1"
"7.0"
"7.5"
"8.0"
"8.6"
];
# supportedGpus :: List Gpu
# GPUs which are supported by the provided CUDA version.
supportedGpus = builtins.filter isSupported gpus;
};
# cudaArchNameToVersions :: AttrSet String (List String)
# Maps the name of a GPU architecture to different versions of that architecture.
# For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
cudaArchNameToVersions =
lists.groupBy'
(versions: gpu: versions ++ [ gpu.computeCapability ])
[ ]
(gpu: gpu.archName)
supportedGpus;
cudaMicroarchitectureNames = {
"3" = "Kepler";
"5" = "Maxwell";
"6" = "Pascal";
"7" = "Volta";
"8" = "Ampere";
"9" = "Hopper";
};
# cudaArchNames :: List String
# NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here;
# otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them
# from is already sorted, so we'll preserve that order here.
cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus);
defaultCudaArchList = defaultCudaCapabilities."cuda${lib.versions.major cudatoolkit.version}";
cudaRealCapabilities = config.cudaCapabilities or defaultCudaArchList;
capabilitiesForward = "${lib.last cudaRealCapabilities}+PTX";
# cudaComputeCapabilityToName :: AttrSet String String
# Maps the version of a GPU architecture to the name of that architecture.
# For example, "8.0" maps to "Ampere".
cudaComputeCapabilityToName = builtins.listToAttrs (
lists.map
(gpu: {
name = gpu.computeCapability;
value = gpu.archName;
})
supportedGpus
);
dropDot = ver: builtins.replaceStrings ["."] [""] ver;
# cudaComputeCapabilities :: List String
# NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here;
# otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them
# from is already sorted, so we'll preserve that order here.
# Use the user-provided list of CUDA capabilities if it's provided.
cudaComputeCapabilities = config.cudaCapabilities
or (lists.map (gpu: gpu.computeCapability) supportedGpus);
archMapper = feat: map (ver: "${feat}_${dropDot ver}");
gencodeMapper = feat: map (ver: "-gencode=arch=compute_${dropDot ver},code=${feat}_${dropDot ver}");
cudaRealArchs = archMapper "sm" cudaRealCapabilities;
cudaPTXArchs = archMapper "compute" cudaRealCapabilities;
cudaArchs = cudaRealArchs ++ [ (lib.last cudaPTXArchs) ];
# cudaForwardComputeCapability :: String
cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX";
cudaArchNames = lib.unique (map (v: cudaMicroarchitectureNames.${lib.versions.major v}) cudaRealCapabilities);
cudaCapabilities = cudaRealCapabilities ++ lib.optional (config.cudaForwardCompat or true) capabilitiesForward;
cudaGencode = gencodeMapper "sm" cudaRealCapabilities ++ lib.optionals (config.cudaForwardCompat or true) (gencodeMapper "compute" [ (lib.last cudaPTXArchs) ]);
# cudaComputeCapabilitiesAndForward :: List String
# The list of supported CUDA architectures, including the forward compatibility architecture.
# If forward compatibility is disabled, this will be the same as cudaComputeCapabilities.
cudaComputeCapabilitiesAndForward = cudaComputeCapabilities
++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability;
cudaCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaCapabilities;
cudaCapabilitiesSemiColonString = lib.strings.concatStringsSep ";" cudaCapabilities;
cudaRealCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaRealCapabilities;
# dropDot :: String -> String
dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver;
# archMapper :: String -> List String -> List String
# Maps a feature across a list of architecture versions to produce a list of architectures.
# For example, "sm" and [ "8.0" "8.6" "8.7" ] produces [ "sm_80" "sm_86" "sm_87" ].
archMapper = feat: lists.map (computeCapability: "${feat}_${dropDot computeCapability}");
# gencodeMapper :: String -> List String -> List String
# Maps a feature across a list of architecture versions to produce a list of gencode arguments.
# For example, "sm" and [ "8.0" "8.6" "8.7" ] produces [ "-gencode=arch=compute_80,code=sm_80"
# "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_87,code=sm_87" ].
gencodeMapper = feat: lists.map (
computeCapability:
"-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}"
);
# cudaRealArches :: List String
# The real architectures are physical architectures supported by the CUDA version.
# For example, "sm_80".
cudaRealArches = archMapper "sm" cudaComputeCapabilities;
# cudaVirtualArches :: List String
# The virtual architectures are typically used for forward compatibility, when trying to support
# an architecture newer than the CUDA version allows.
# For example, "compute_80".
cudaVirtualArches = archMapper "compute" cudaComputeCapabilities;
# cudaArches :: List String
# By default, build for all supported architectures and forward compatibility via a virtual
# architecture for the newest supported architecture.
cudaArches = cudaRealArches ++
lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches);
# cudaGencode :: List String
# A list of CUDA gencode arguments to pass to NVCC.
cudaGencode =
let
base = gencodeMapper "sm" cudaComputeCapabilities;
forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ];
in
base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat;
in
{
inherit cudaArchs cudaArchNames cudaCapabilities cudaCapabilitiesCommaString cudaCapabilitiesSemiColonString
cudaRealCapabilities cudaRealCapabilitiesCommaString cudaGencode cudaRealArchs cudaPTXArchs;
inherit
cudaArchNames
cudaArchNameToVersions cudaComputeCapabilityToName
cudaRealArches cudaVirtualArches cudaArches
cudaGencode;
cudaCapabilities = cudaComputeCapabilitiesAndForward;
}

View file

@ -0,0 +1,110 @@
[
{
archName = "Kepler";
computeCapability = "3.0";
minCudaVersion = "10.0";
maxCudaVersion = "10.2";
}
{
archName = "Kepler";
computeCapability = "3.2";
minCudaVersion = "10.0";
maxCudaVersion = "10.2";
}
{
archName = "Kepler";
computeCapability = "3.5";
minCudaVersion = "10.0";
maxCudaVersion = "11.8";
}
{
archName = "Kepler";
computeCapability = "3.7";
minCudaVersion = "10.0";
maxCudaVersion = "11.8";
}
{
archName = "Maxwell";
computeCapability = "5.0";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Maxwell";
computeCapability = "5.2";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Maxwell";
computeCapability = "5.3";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Pascal";
computeCapability = "6.0";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Pascal";
computeCapability = "6.1";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Pascal";
computeCapability = "6.2";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Volta";
computeCapability = "7.0";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Volta";
computeCapability = "7.2";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Turing";
computeCapability = "7.5";
minCudaVersion = "10.0";
maxCudaVersion = "12.0";
}
{
archName = "Ampere";
computeCapability = "8.0";
minCudaVersion = "11.2";
maxCudaVersion = "12.0";
}
{
archName = "Ampere";
computeCapability = "8.6";
minCudaVersion = "11.2";
maxCudaVersion = "12.0";
}
{
archName = "Ampere";
computeCapability = "8.7";
minCudaVersion = "11.5";
maxCudaVersion = "12.0";
}
{
archName = "Ada";
computeCapability = "8.9";
minCudaVersion = "11.8";
maxCudaVersion = "12.0";
}
{
archName = "Hopper";
computeCapability = "9.0";
minCudaVersion = "11.8";
maxCudaVersion = "12.0";
}
]

View file

@ -52,7 +52,7 @@ in stdenv.mkDerivation (finalAttrs: {
"-DCMAKE_C_COMPILER=${cudatoolkit.cc}/bin/gcc"
"-DCMAKE_CXX_COMPILER=${cudatoolkit.cc}/bin/g++"
"-DMAGMA_ENABLE_CUDA=ON"
"-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArchs}"
"-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
] ++ lib.optionals useROCM [
"-DCMAKE_C_COMPILER=${hip}/bin/hipcc"
"-DCMAKE_CXX_COMPILER=${hip}/bin/hipcc"

View file

@ -164,7 +164,7 @@ let
build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}"
build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}"
build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${cudaFlags.cudaRealCapabilitiesCommaString}"
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
'' + ''
CFG
'';

View file

@ -301,7 +301,7 @@ let
TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc";
TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArchs;
TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArches;
postPatch = ''
# bazel 3.3 should work just as well as bazel 3.1

View file

@ -41,6 +41,7 @@
}:
let
inherit (lib) lists strings trivial;
inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
in
@ -54,6 +55,45 @@ assert !cudaSupport || magma.cudatoolkit == cudatoolkit;
let
setBool = v: if v then "1" else "0";
# https://github.com/pytorch/pytorch/blob/v1.13.1/torch/utils/cpp_extension.py#L1751
supportedTorchCudaCapabilities =
let
real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6"];
ptx = lists.map (x: "${x}+PTX") real;
in
real ++ ptx;
# NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements
# of the first list *from* the second list. That means:
# lists.subtractLists a b = b - a
# For CUDA
supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities;
unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities;
# Use trivial.warnIf to print a warning if any unsupported GPU targets are specified.
gpuArchWarner = supported: unsupported:
trivial.throwIf (supported == [ ])
(
"No supported GPU targets specified. Requested GPU targets: "
+ strings.concatStringsSep ", " unsupported
)
supported;
# Create the gpuTargetString.
gpuTargetString = strings.concatStringsSep ";" (
if gpuTargets != [ ] then
# If gpuTargets is specified, it always takes priority.
gpuTargets
else if cudaSupport then
gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities
else if rocmSupport then
hip.gpuTargets
else
throw "No GPU targets specified"
);
cudatoolkit_joined = symlinkJoin {
name = "${cudatoolkit.name}-unsplit";
# nccl is here purely for semantic grouping it could be moved to nativeBuildInputs
@ -146,14 +186,14 @@ in buildPythonPackage rec {
'';
preConfigure = lib.optionalString cudaSupport ''
export TORCH_CUDA_ARCH_LIST="${cudaFlags.cudaCapabilitiesSemiColonString}"
export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
'' + lib.optionalString (cudaSupport && cudnn != null) ''
export CUDNN_INCLUDE_DIR=${cudnn}/include
'' + lib.optionalString rocmSupport ''
export ROCM_PATH=${rocmtoolkit_joined}
export ROCM_SOURCE_DIR=${rocmtoolkit_joined}
export PYTORCH_ROCM_ARCH="${lib.strings.concatStringsSep ";" (if gpuTargets == [ ] then hip.gpuTargets else gpuTargets)}"
export PYTORCH_ROCM_ARCH="${gpuTargetString}"
export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
python tools/amd_build/build_amd.py
'';
@ -320,7 +360,8 @@ in buildPythonPackage rec {
requiredSystemFeatures = [ "big-parallel" ];
passthru = {
inherit cudaSupport cudaPackages;
inherit cudaSupport cudaPackages gpuTargetString;
cudaCapabilities = supportedCudaCapabilities;
# At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
blasProvider = blas.provider;
};

View file

@ -15,13 +15,13 @@
}:
let
inherit (torch.cudaPackages) cudatoolkit cudaFlags cudnn;
inherit (torch) gpuTargetString;
inherit (torch.cudaPackages) cudatoolkit cudnn;
cudatoolkit_joined = symlinkJoin {
name = "${cudatoolkit.name}-unsplit";
paths = [ cudatoolkit.out cudatoolkit.lib ];
};
cudaArchStr = lib.optionalString cudaSupport lib.strings.concatStringsSep ";" torch.cudaArchList;
in buildPythonPackage rec {
pname = "torchvision";
version = "0.14.1";
@ -45,7 +45,7 @@ in buildPythonPackage rec {
propagatedBuildInputs = [ numpy pillow torch scipy ];
preBuild = lib.optionalString cudaSupport ''
export TORCH_CUDA_ARCH_LIST="${cudaFlags.cudaCapabilitiesSemiColonString}"
export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
export FORCE_CUDA=1
'';