python3Packages.torch: migrate to CUDA redist from CUDA Toolkit
This commit is contained in:
parent
9a12fb6936
commit
b0bd1943b3
1 changed files with 68 additions and 35 deletions
|
@ -1,4 +1,4 @@
|
|||
{ stdenv, lib, fetchFromGitHub, buildPythonPackage, python,
|
||||
{ stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python,
|
||||
config, cudaSupport ? config.cudaSupport, cudaPackages, magma,
|
||||
useSystemNccl ? true,
|
||||
MPISupport ? false, mpi,
|
||||
|
@ -52,17 +52,8 @@
|
|||
|
||||
let
|
||||
inherit (lib) lists strings trivial;
|
||||
inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl;
|
||||
in
|
||||
inherit (cudaPackages) cudaFlags cudnn nccl;
|
||||
|
||||
assert cudaSupport -> stdenv.isLinux;
|
||||
assert cudaSupport -> (cudaPackages.cudaMajorVersion == "11");
|
||||
|
||||
# confirm that cudatoolkits are sync'd across dependencies
|
||||
assert !(MPISupport && cudaSupport) || mpi.cudatoolkit == cudatoolkit;
|
||||
assert !cudaSupport || magma.cudaPackages.cudatoolkit == cudatoolkit;
|
||||
|
||||
let
|
||||
setBool = v: if v then "1" else "0";
|
||||
|
||||
# https://github.com/pytorch/pytorch/blob/v2.0.1/torch/utils/cpp_extension.py#L1744
|
||||
|
@ -103,23 +94,6 @@ let
|
|||
throw "No GPU targets specified"
|
||||
);
|
||||
|
||||
cudatoolkit_joined = symlinkJoin {
|
||||
name = "${cudatoolkit.name}-unsplit";
|
||||
# nccl is here purely for semantic grouping it could be moved to nativeBuildInputs
|
||||
paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ];
|
||||
};
|
||||
|
||||
# Normally libcuda.so.1 is provided at runtime by nvidia-x11 via
|
||||
# LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub
|
||||
# libcuda.so from cudatoolkit for running tests, so that we don’t have
|
||||
# to recompile pytorch on every update to nvidia-x11 or the kernel.
|
||||
cudaStub = linkFarm "cuda-stub" [{
|
||||
name = "libcuda.so.1";
|
||||
path = "${cudatoolkit}/lib/stubs/libcuda.so";
|
||||
}];
|
||||
cudaStubEnv = lib.optionalString cudaSupport
|
||||
"LD_LIBRARY_PATH=${cudaStub}\${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ";
|
||||
|
||||
rocmtoolkit_joined = symlinkJoin {
|
||||
name = "rocm-merged";
|
||||
|
||||
|
@ -160,6 +134,12 @@ in buildPythonPackage rec {
|
|||
# base is 10.12. Until we upgrade, we can fall back on the older
|
||||
# pthread support.
|
||||
./pthreadpool-disable-gcd.diff
|
||||
] ++ lib.optionals stdenv.isLinux [
|
||||
# Propagate CUPTI to Kineto by overriding the search path with environment variables.
|
||||
(fetchpatch {
|
||||
url = "https://github.com/pytorch/pytorch/pull/108847/commits/7ae4d7c0e2dec358b4fe81538efe9da5eb580ec9.patch";
|
||||
hash = "sha256-skFaDg98xcJqJfzxWk+qhUxPLHDStqvd0mec3PgksIg=";
|
||||
})
|
||||
];
|
||||
|
||||
postPatch = lib.optionalString rocmSupport ''
|
||||
|
@ -184,6 +164,13 @@ in buildPythonPackage rec {
|
|||
--replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \
|
||||
"set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." hip.version))})"
|
||||
''
|
||||
# Detection of NCCL version doesn't work particularly well when using the static binary.
|
||||
+ lib.optionalString cudaSupport ''
|
||||
substituteInPlace cmake/Modules/FindNCCL.cmake \
|
||||
--replace \
|
||||
'message(FATAL_ERROR "Found NCCL header version and library version' \
|
||||
'message(WARNING "Found NCCL header version and library version'
|
||||
''
|
||||
# error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc'
|
||||
# This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header.
|
||||
+ lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.targetPlatform.darwinSdkVersion "11.0") ''
|
||||
|
@ -192,12 +179,16 @@ in buildPythonPackage rec {
|
|||
inline void *aligned_alloc(size_t align, size_t size)'
|
||||
'';
|
||||
|
||||
# NOTE(@connorbaker): Though we do not disable Gloo or MPI when building with CUDA support, caution should be taken
|
||||
# when using the different backends. Gloo's GPU support isn't great, and MPI and CUDA can't be used at the same time
|
||||
# without extreme care to ensure they don't lock each other out of shared resources.
|
||||
# For more, see https://github.com/open-mpi/ompi/issues/7733#issuecomment-629806195.
|
||||
preConfigure = lib.optionalString cudaSupport ''
|
||||
export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
|
||||
export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++
|
||||
'' + lib.optionalString (cudaSupport && cudnn != null) ''
|
||||
export CUDNN_INCLUDE_DIR=${cudnn.dev}/include
|
||||
export CUDNN_LIB_DIR=${cudnn.lib}/lib
|
||||
export CUPTI_INCLUDE_DIR=${cudaPackages.cuda_cupti.dev}/include
|
||||
export CUPTI_LIBRARY_DIR=${cudaPackages.cuda_cupti.lib}/lib
|
||||
'' + lib.optionalString rocmSupport ''
|
||||
export ROCM_PATH=${rocmtoolkit_joined}
|
||||
export ROCM_SOURCE_DIR=${rocmtoolkit_joined}
|
||||
|
@ -256,6 +247,7 @@ in buildPythonPackage rec {
|
|||
PYTORCH_BUILD_NUMBER = 0;
|
||||
|
||||
USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL
|
||||
USE_STATIC_NCCL = setBool useSystemNccl;
|
||||
|
||||
# Suppress a weird warning in mkl-dnn, part of ideep in pytorch
|
||||
# (upstream seems to have fixed this in the wrong place?)
|
||||
|
@ -286,12 +278,43 @@ in buildPythonPackage rec {
|
|||
pybind11
|
||||
pythonRelaxDepsHook
|
||||
removeReferencesTo
|
||||
] ++ lib.optionals cudaSupport [ cudatoolkit_joined ]
|
||||
++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
|
||||
] ++ lib.optionals cudaSupport (with cudaPackages; [
|
||||
autoAddOpenGLRunpathHook
|
||||
cuda_nvcc
|
||||
])
|
||||
++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
|
||||
|
||||
buildInputs = [ blas blas.provider pybind11 ]
|
||||
++ lib.optionals stdenv.isLinux [ linuxHeaders_5_19 ] # TMP: avoid "flexible array member" errors for now
|
||||
++ lib.optionals cudaSupport [ cudnn.dev cudnn.lib nccl ]
|
||||
++ lib.optionals cudaSupport (with cudaPackages; [
|
||||
cuda_cccl.dev # <thrust/*>
|
||||
cuda_cudart # cuda_runtime.h and libraries
|
||||
cuda_cupti.dev # For kineto
|
||||
cuda_cupti.lib # For kineto
|
||||
cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too
|
||||
cuda_nvml_dev.dev # <nvml.h>
|
||||
cuda_nvrtc.dev
|
||||
cuda_nvrtc.lib
|
||||
cuda_nvtx.dev
|
||||
cuda_nvtx.lib # -llibNVToolsExt
|
||||
cudnn.dev
|
||||
cudnn.lib
|
||||
libcublas.dev
|
||||
libcublas.lib
|
||||
libcufft.dev
|
||||
libcufft.lib
|
||||
libcurand.dev
|
||||
libcurand.lib
|
||||
libcusolver.dev
|
||||
libcusolver.lib
|
||||
libcusparse.dev
|
||||
libcusparse.lib
|
||||
nccl.dev # Provides nccl.h AND a static copy of NCCL!
|
||||
] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
|
||||
cuda_nvprof.dev # <cuda_profiler_api.h>
|
||||
] ++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [
|
||||
cuda_profiler_api.dev # <cuda_profiler_api.h>
|
||||
])
|
||||
++ lib.optionals rocmSupport [ openmp ]
|
||||
++ lib.optionals (cudaSupport || rocmSupport) [ magma ]
|
||||
++ lib.optionals stdenv.isLinux [ numactl ]
|
||||
|
@ -335,7 +358,6 @@ in buildPythonPackage rec {
|
|||
|
||||
checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [
|
||||
"runHook preCheck"
|
||||
cudaStubEnv
|
||||
"${python.interpreter} test/run_test.py"
|
||||
"--exclude"
|
||||
(concatStringsSep " " [
|
||||
|
@ -419,6 +441,17 @@ in buildPythonPackage rec {
|
|||
license = licenses.bsd3;
|
||||
maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds
|
||||
platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin;
|
||||
broken = rocmSupport && cudaSupport; # CUDA and ROCm are mutually exclusive
|
||||
broken = builtins.any trivial.id [
|
||||
# CUDA and ROCm are mutually exclusive
|
||||
(cudaSupport && rocmSupport)
|
||||
# CUDA is only supported on Linux
|
||||
(cudaSupport && !stdenv.isLinux)
|
||||
# Only CUDA 11 is currently supported
|
||||
(cudaSupport && (cudaPackages.cudaMajorVersion != "11"))
|
||||
# MPI cudatoolkit does not match cudaPackages.cudatoolkit
|
||||
(MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit))
|
||||
# Magma cudaPackages does not match cudaPackages
|
||||
(cudaSupport && (magma.cudaPackages != cudaPackages))
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue