cp2k: enable GPU offloading, sirius planewaves and optimisations

This commit is contained in:
Phillip Seeber 2023-10-05 14:24:58 +02:00
parent 7b50f78333
commit dcd7efd1f9

View file

@ -1,15 +1,57 @@
{ lib, stdenv, fetchFromGitHub, mpiCheckPhaseHook, python3, gfortran, blas, lapack { lib
, fftw, libint, libvori, libxc, mpi, gsl, scalapack, openssh, makeWrapper , stdenv
, libxsmm, spglib, which, pkg-config, plumed, zlib , fetchFromGitHub
, mpiCheckPhaseHook
, python3
, gfortran
, blas
, lapack
, fftw
, libint
, libvori
, libxc
, mpi
, gsl
, scalapack
, openssh
, makeWrapper
, libxsmm
, spglib
, which
, pkg-config
, plumed
, zlib
, hdf5-fortran
, sirius
, libvdwxc
, spla
, spfft
, enableElpa ? false , enableElpa ? false
, elpa , elpa
} : , gpuBackend ? "none"
, cudaPackages
# gpuVersion needs to be set for both CUDA as well as ROCM hardware.
# gpuArch is only required for the ROCM stack.
# Change to a value suitable for your target GPU.
# For AMD values see https://github.com/cp2k/cp2k/blob/master/INSTALL.md#2v-rocmhip-support-for-amd-gpu
# and for Nvidia see https://github.com/cp2k/cp2k/blob/master/INSTALL.md#2i-cuda-optional-improved-performance-on-gpu-systems
, gpuVersion ? "Mi100"
, gpuArch ? "gfx908"
, rocm-core
, hip
, hipblas
, hipfft
, rocblas
}:
assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ];
let let
cp2kVersion = "psmp"; cp2kVersion = "psmp";
arch = "Linux-x86-64-gfortran"; arch = "Linux-x86-64-gfortran";
in stdenv.mkDerivation rec { in
stdenv.mkDerivation rec {
pname = "cp2k"; pname = "cp2k";
version = "2023.2"; version = "2023.2";
@ -36,7 +78,16 @@ in stdenv.mkDerivation rec {
lapack lapack
plumed plumed
zlib zlib
] ++ lib.optional enableElpa elpa; hdf5-fortran
sirius
spla
spfft
libvdwxc
]
++ lib.optional enableElpa elpa
++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit
++ lib.optional (gpuBackend == "rocm") [hip rocm-core hipblas hipfft rocblas]
;
propagatedBuildInputs = [ mpi ]; propagatedBuildInputs = [ mpi ];
propagatedUserEnvPkgs = [ mpi ]; propagatedUserEnvPkgs = [ mpi ];
@ -46,7 +97,7 @@ in stdenv.mkDerivation rec {
"VERSION=${cp2kVersion}" "VERSION=${cp2kVersion}"
]; ];
doCheck = true; doCheck = gpuBackend == "none";
enableParallelBuilding = true; enableParallelBuilding = true;
@ -64,25 +115,46 @@ in stdenv.mkDerivation rec {
FC = mpif90 FC = mpif90
LD = mpif90 LD = mpif90
AR = ar -r AR = ar -r
${lib.strings.optionalString (gpuBackend == "cuda") ''
OFFLOAD_CC = nvcc
OFFLOAD_FLAGS = -O3 -g -w --std=c++11
OFFLOAD_TARGET = cuda
GPUVER = ${gpuVersion}
CXX = mpicxx
CXXFLAGS = -std=c++11 -fopenmp
''}
${lib.strings.optionalString (gpuBackend == "rocm") ''
GPUVER = ${gpuVersion}
OFFLOAD_CC = hipcc
OFFLOAD_FLAGS = -fopenmp -m64 -pthread -fPIC -D__GRID_HIP -O2 --offload-arch=${gpuArch} --rocm-path=${rocm-core}
OFFLOAD_TARGET = hip
CXX = mpicxx
CXXFLAGS = -std=c++11 -fopenmp -D__HIP_PLATFORM_AMD__
''}
DFLAGS = -D__FFTW3 -D__LIBXC -D__LIBINT -D__parallel -D__SCALAPACK \ DFLAGS = -D__FFTW3 -D__LIBXC -D__LIBINT -D__parallel -D__SCALAPACK \
-D__MPI_VERSION=3 -D__F2008 -D__LIBXSMM -D__SPGLIB \ -D__MPI_VERSION=3 -D__F2008 -D__LIBXSMM -D__SPGLIB \
-D__MAX_CONTR=4 -D__LIBVORI ${lib.optionalString enableElpa "-D__ELPA"} \ -D__MAX_CONTR=4 -D__LIBVORI ${lib.optionalString enableElpa "-D__ELPA"} \
-D__PLUMED2 -D__PLUMED2 -D__HDF5 -D__GSL -D__SIRIUS -D__LIBVDWXC -D__SPFFT -D__SPLA \
CFLAGS = -fopenmp ${lib.strings.optionalString (gpuBackend == "cuda") "-D__OFFLOAD_CUDA -D__DBCSR_ACC"} \
${lib.strings.optionalString (gpuBackend == "rocm") "-D__OFFLOAD_HIP -D__DBCSR_ACC -D__NO_OFFLOAD_PW"}
CFLAGS = -fopenmp -I${lib.getDev hdf5-fortran}/include -I${lib.getDev gsl}/include
FCFLAGS = \$(DFLAGS) -O2 -ffree-form -ffree-line-length-none \ FCFLAGS = \$(DFLAGS) -O2 -ffree-form -ffree-line-length-none \
-ftree-vectorize -funroll-loops -msse2 \ -ftree-vectorize -funroll-loops -msse2 \
-std=f2008 \ -std=f2008 \
-fopenmp -ftree-vectorize -funroll-loops \ -fopenmp -ftree-vectorize -funroll-loops \
-I${lib.getDev libxc}/include -I${lib.getDev libxsmm}/include \ -I${lib.getDev libint}/include ${lib.optionalString enableElpa "$(pkg-config --variable=fcflags elpa)"} \
-I${libint}/include ${lib.optionalString enableElpa "$(pkg-config --variable=fcflags elpa)"} -I${lib.getDev sirius}/include/sirius \
-I${lib.getDev libxc}/include -I${lib.getDev libxsmm}/include
LIBS = -lfftw3 -lfftw3_threads \ LIBS = -lfftw3 -lfftw3_threads \
-lscalapack -lblas -llapack \ -lscalapack -lblas -llapack \
-lxcf03 -lxc -lxsmmf -lxsmm -lsymspg \ -lxcf03 -lxc -lxsmmf -lxsmm -lsymspg \
-lint2 -lstdc++ -lvori \ -lint2 -lstdc++ -lvori \
-lgomp -lpthread -lm \ -lgomp -lpthread -lm \
-fopenmp ${lib.optionalString enableElpa "$(pkg-config --libs elpa)"} \ -fopenmp ${lib.optionalString enableElpa "$(pkg-config --libs elpa)"} \
-lz -ldl -lstdc++ ${lib.optionalString (mpi.pname == "openmpi") "$(mpicxx --showme:link)"} \ -lz -ldl ${lib.optionalString (mpi.pname == "openmpi") "$(mpicxx --showme:link)"} \
-lplumed -lplumed -lhdf5_fortran -lhdf5_hl -lhdf5 -lgsl -lsirius -lspla -lspfft -lvdwxc \
${lib.strings.optionalString (gpuBackend == "cuda") "-lcudart -lnvrtc -lcuda -lcublas"} \
${lib.strings.optionalString (gpuBackend == "rocm") "-lamdhip64 -lhipfft -lhipblas -lrocblas"}
LDFLAGS = \$(FCFLAGS) \$(LIBS) LDFLAGS = \$(FCFLAGS) \$(LIBS)
include ${plumed}/lib/plumed/src/lib/Plumed.inc include ${plumed}/lib/plumed/src/lib/Plumed.inc
EOF EOF