Merge pull request #258863 from sheepforce/cp2k

cp2k: enable SIRIUS planewave code and optimisations
2023-10-09 13:34:25 +02:00 · 2023-10-09 13:34:25 +02:00 · b7129e447b
commit b7129e447b
parent 55b51adc7f dcd7efd1f9
5 changed files with 375 additions and 13 deletions
--- a/pkgs/applications/science/chemistry/cp2k/default.nix
+++ b/pkgs/applications/science/chemistry/cp2k/default.nix
@ -1,15 +1,57 @@
-{ lib, stdenv, fetchFromGitHub, mpiCheckPhaseHook, python3, gfortran, blas, lapack
-, fftw, libint, libvori, libxc, mpi, gsl, scalapack, openssh, makeWrapper
-, libxsmm, spglib, which, pkg-config, plumed, zlib
+{ lib
+, stdenv
+, fetchFromGitHub
+, mpiCheckPhaseHook
+, python3
+, gfortran
+, blas
+, lapack
+, fftw
+, libint
+, libvori
+, libxc
+, mpi
+, gsl
+, scalapack
+, openssh
+, makeWrapper
+, libxsmm
+, spglib
+, which
+, pkg-config
+, plumed
+, zlib
+, hdf5-fortran
+, sirius
+, libvdwxc
+, spla
+, spfft
 , enableElpa ? false
 , elpa
-} :
+, gpuBackend ? "none"
+, cudaPackages
+# gpuVersion needs to be set for both CUDA as well as ROCM hardware.
+# gpuArch is only required for the ROCM stack.
+# Change to a value suitable for your target GPU.
+# For AMD values see https://github.com/cp2k/cp2k/blob/master/INSTALL.md#2v-rocmhip-support-for-amd-gpu
+# and for Nvidia see https://github.com/cp2k/cp2k/blob/master/INSTALL.md#2i-cuda-optional-improved-performance-on-gpu-systems
+, gpuVersion ? "Mi100"
+, gpuArch ? "gfx908"
+, rocm-core
+, hip
+, hipblas
+, hipfft
+, rocblas
+}:
+
+assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ];

 let
  cp2kVersion = "psmp";
  arch = "Linux-x86-64-gfortran";

-in stdenv.mkDerivation rec {
+in
+stdenv.mkDerivation rec {
  pname = "cp2k";
  version = "2023.2";

@ -36,7 +78,16 @@ in stdenv.mkDerivation rec {
    lapack
    plumed
    zlib
-  ] ++ lib.optional enableElpa elpa;
+    hdf5-fortran
+    sirius
+    spla
+    spfft
+    libvdwxc
+  ]
+  ++ lib.optional enableElpa elpa
+  ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit
+  ++ lib.optional (gpuBackend == "rocm") [hip rocm-core hipblas hipfft rocblas]
+  ;

  propagatedBuildInputs = [ mpi ];
  propagatedUserEnvPkgs = [ mpi ];
@ -46,7 +97,7 @@ in stdenv.mkDerivation rec {
    "VERSION=${cp2kVersion}"
  ];

-  doCheck = true;
+  doCheck = gpuBackend == "none";

  enableParallelBuilding = true;

@ -64,25 +115,46 @@ in stdenv.mkDerivation rec {
    FC         = mpif90
    LD         = mpif90
    AR         = ar -r
+    ${lib.strings.optionalString (gpuBackend == "cuda") ''
+    OFFLOAD_CC = nvcc
+    OFFLOAD_FLAGS = -O3 -g -w --std=c++11
+    OFFLOAD_TARGET = cuda
+    GPUVER = ${gpuVersion}
+    CXX = mpicxx
+    CXXFLAGS = -std=c++11 -fopenmp
+    ''}
+    ${lib.strings.optionalString (gpuBackend == "rocm") ''
+    GPUVER = ${gpuVersion}
+    OFFLOAD_CC = hipcc
+    OFFLOAD_FLAGS = -fopenmp -m64 -pthread -fPIC -D__GRID_HIP -O2 --offload-arch=${gpuArch} --rocm-path=${rocm-core}
+    OFFLOAD_TARGET = hip
+    CXX = mpicxx
+    CXXFLAGS = -std=c++11 -fopenmp -D__HIP_PLATFORM_AMD__
+    ''}
    DFLAGS     = -D__FFTW3 -D__LIBXC -D__LIBINT -D__parallel -D__SCALAPACK \
                 -D__MPI_VERSION=3 -D__F2008 -D__LIBXSMM -D__SPGLIB \
                 -D__MAX_CONTR=4 -D__LIBVORI ${lib.optionalString enableElpa "-D__ELPA"} \
-                 -D__PLUMED2
-    CFLAGS    = -fopenmp
+                 -D__PLUMED2 -D__HDF5 -D__GSL -D__SIRIUS -D__LIBVDWXC -D__SPFFT -D__SPLA \
+                 ${lib.strings.optionalString (gpuBackend == "cuda") "-D__OFFLOAD_CUDA -D__DBCSR_ACC"} \
+                 ${lib.strings.optionalString (gpuBackend == "rocm") "-D__OFFLOAD_HIP -D__DBCSR_ACC -D__NO_OFFLOAD_PW"}
+    CFLAGS    = -fopenmp -I${lib.getDev hdf5-fortran}/include -I${lib.getDev gsl}/include
    FCFLAGS    = \$(DFLAGS) -O2 -ffree-form -ffree-line-length-none \
                 -ftree-vectorize -funroll-loops -msse2 \
                 -std=f2008 \
                 -fopenmp -ftree-vectorize -funroll-loops \
-                 -I${lib.getDev libxc}/include -I${lib.getDev libxsmm}/include \
-                 -I${libint}/include ${lib.optionalString enableElpa "$(pkg-config --variable=fcflags elpa)"}
+                 -I${lib.getDev libint}/include ${lib.optionalString enableElpa "$(pkg-config --variable=fcflags elpa)"} \
+                 -I${lib.getDev sirius}/include/sirius \
+                 -I${lib.getDev libxc}/include -I${lib.getDev libxsmm}/include
    LIBS       = -lfftw3 -lfftw3_threads \
                 -lscalapack -lblas -llapack \
                 -lxcf03 -lxc -lxsmmf -lxsmm -lsymspg \
                 -lint2 -lstdc++ -lvori \
                 -lgomp -lpthread -lm \
                 -fopenmp ${lib.optionalString enableElpa "$(pkg-config --libs elpa)"} \
-                 -lz -ldl -lstdc++ ${lib.optionalString (mpi.pname == "openmpi") "$(mpicxx --showme:link)"} \
-                 -lplumed
+                 -lz -ldl ${lib.optionalString (mpi.pname == "openmpi") "$(mpicxx --showme:link)"} \
+                 -lplumed -lhdf5_fortran -lhdf5_hl -lhdf5 -lgsl -lsirius -lspla -lspfft -lvdwxc \
+                 ${lib.strings.optionalString (gpuBackend == "cuda") "-lcudart -lnvrtc -lcuda -lcublas"} \
+                 ${lib.strings.optionalString (gpuBackend == "rocm") "-lamdhip64 -lhipfft -lhipblas -lrocblas"}
    LDFLAGS    = \$(FCFLAGS) \$(LIBS)
    include ${plumed}/lib/plumed/src/lib/Plumed.inc
    EOF
--- a/pkgs/by-name/co/costa/package.nix
+++ b/pkgs/by-name/co/costa/package.nix
@ -0,0 +1,40 @@
+{ stdenv
+, lib
+, fetchFromGitHub
+, cmake
+, mpi
+, scalapack
+, llvmPackages
+}:
+
+stdenv.mkDerivation rec {
+  pname = "COSTA";
+  version = "2.2.2";
+
+  src = fetchFromGitHub {
+    owner = "eth-cscs";
+    repo = pname;
+    rev = "v${version}";
+    hash = "sha256-jiAyZXC7wiuEnOLsQFFLxhN3AsGXN09q/gHC2Hrb2gg=";
+  };
+
+  nativeBuildInputs = [ cmake ];
+
+  buildInputs = [ scalapack ] ++ lib.optional stdenv.isDarwin llvmPackages.openmp;
+
+  propagatedBuildInputs = [ mpi ];
+
+  cmakeFlags = [
+    "-DCOSTA_SCALAPACK=CUSTOM"
+    "-DSCALAPACK_ROOT=${scalapack}"
+  ];
+
+
+  meta = with lib; {
+    description = "Distributed Communication-Optimal Shuffle and Transpose Algorithm";
+    homepage = "https://github.com/eth-cscs/COSTA";
+    license = licenses.bsd3;
+    platforms = platforms.linux;
+    maintainers = [ maintainers.sheepforce ];
+  };
+}
--- a/pkgs/by-name/si/sirius/package.nix
+++ b/pkgs/by-name/si/sirius/package.nix
@ -0,0 +1,117 @@
+{ stdenv
+, lib
+, fetchFromGitHub
+, cmake
+, pkg-config
+, mpi
+, mpiCheckPhaseHook
+, openssh
+, gfortran
+, blas
+, lapack
+, gsl
+, libxc
+, hdf5
+, spglib
+, spfft
+, spla
+, costa
+, scalapack
+, boost
+, eigen
+, libvdwxc
+, llvmPackages
+, gpuBackend ? "none"
+, cudaPackages
+, hip
+, rocblas
+}:
+
+assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ];
+
+stdenv.mkDerivation rec {
+  pname = "SIRIUS";
+  version = "7.4.3";
+
+  src = fetchFromGitHub {
+    owner = "electronic-structure";
+    repo = pname;
+    rev = "v${version}";
+    hash = "sha256-s4rO+dePvtvn41wxCvbqgQGrEckWmfng7sPX2M8OPB0=";
+  };
+
+  postPatch = ''
+    substituteInPlace src/gpu/acc_blas_api.hpp \
+      --replace '#include <rocblas.h>' '#include <rocblas/rocblas.h>'
+  '';
+
+  nativeBuildInputs = [
+    cmake
+    gfortran
+    pkg-config
+  ];
+
+  buildInputs = [
+    blas
+    lapack
+    gsl
+    libxc
+    hdf5
+    spglib
+    spfft
+    spla
+    costa
+    scalapack
+    boost
+    eigen
+    libvdwxc
+  ]
+  ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit
+  ++ lib.optionals (gpuBackend == "rocm") [ hip rocblas ]
+  ++ lib.optional stdenv.isDarwin llvmPackages.openmp
+  ;
+
+  propagatedBuildInputs = [ mpi ];
+
+  cmakeFlags = [
+    "-DUSE_SCALAPACK=ON"
+    "-DBUILD_TESTING=ON"
+    "-DUSE_VDWXC=ON"
+    "-DCREATE_FORTRAN_BINDINGS=ON"
+    "-DUSE_OPENMP=ON"
+    "-DBUILD_TESTING=ON"
+  ]
+  ++ lib.optionals (gpuBackend == "cuda") [
+    "-DUSE_CUDA=ON"
+    "-DCUDA_TOOLKIT_ROOT_DIR=${cudaPackages.cudatoolkit}"
+  ]
+  ++ lib.optionals (gpuBackend == "rocm") [
+    "-DUSE_ROCM=ON"
+    "-DHIP_ROOT_DIR=${hip}"
+  ];
+
+  doCheck = true;
+
+  # Can not run parallel checks generally as it requires exactly multiples of 4 MPI ranks
+  checkPhase = ''
+    runHook preCheck
+
+    ctest --output-on-failure --label-exclude integration_test
+    ctest --output-on-failure -L cpu_serial
+
+    runHook postCheck
+  '';
+
+  nativeCheckInputs = [
+    mpiCheckPhaseHook
+    openssh
+  ];
+
+  meta = with lib; {
+    description = "Domain specific library for electronic structure calculations";
+    homepage = "https://github.com/electronic-structure/SIRIUS";
+    license = licenses.bsd2;
+    platforms = platforms.linux;
+    maintainers = [ maintainers.sheepforce ];
+  };
+}
--- a/pkgs/by-name/sp/spfft/package.nix
+++ b/pkgs/by-name/sp/spfft/package.nix
@ -0,0 +1,67 @@
+{ stdenv
+, lib
+, fetchFromGitHub
+, fftw
+, cmake
+, mpi
+, gfortran
+, llvmPackages
+, gpuBackend ? "none"
+, cudaPackages
+, hip
+, rocfft
+, hipfft
+}:
+
+assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ];
+
+stdenv.mkDerivation rec {
+  pname = "SpFFT";
+  version = "1.0.6";
+
+  src = fetchFromGitHub {
+    owner = "eth-cscs";
+    repo = pname;
+    rev = "v${version}";
+    hash = "sha256-70fPbIYbW50CoMdRS93hZKSbMEIQvZGFNE+eiRvuw0o=";
+  };
+
+  nativeBuildInputs = [
+    cmake
+    gfortran
+  ];
+
+  buildInputs = [
+    fftw
+  ]
+  ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit
+  ++ lib.optionals (gpuBackend == "rocm") [ hip rocfft hipfft ]
+  ++ lib.optional stdenv.isDarwin llvmPackages.openmp
+  ;
+
+  propagatedBuildInputs = [ mpi ];
+
+  cmakeFlags = [
+    "-DSPFFT_OMP=ON"
+    "-DSPFFT_MPI=ON"
+    "-DSPFFT_SINGLE_PRECISION=OFF"
+    "-DSPFFT_FORTRAN=ON"
+    # Required due to broken CMake files
+    "-DCMAKE_INSTALL_LIBDIR=lib"
+    "-DCMAKE_INSTALL_INCLUDEDIR=include"
+  ]
+  ++ lib.optional (gpuBackend == "cuda") "-DSPFFT_GPU_BACKEND=CUDA"
+  ++ lib.optionals (gpuBackend == "rocm") [
+    "-DSPFFT_GPU_BACKEND=ROCM"
+    "-DHIP_ROOT_DIR=${hip}"
+  ];
+
+
+  meta = with lib; {
+    description = "Sparse 3D FFT library with MPI, OpenMP, CUDA and ROCm support";
+    homepage = "https://github.com/eth-cscs/SpFFT";
+    license = licenses.bsd3;
+    maintainers = [ maintainers.sheepforce ];
+    platforms = platforms.linux;
+  };
+}
--- a/pkgs/by-name/sp/spla/package.nix
+++ b/pkgs/by-name/sp/spla/package.nix
@ -0,0 +1,66 @@
+{ stdenv
+, lib
+, fetchFromGitHub
+, cmake
+, mpi
+, blas
+, gfortran
+, llvmPackages
+, gpuBackend ? "none"
+, cudaPackages
+, hip
+, rocblas
+}:
+
+assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ];
+
+stdenv.mkDerivation rec {
+  pname = "spla";
+  version = "1.5.5";
+
+  src = fetchFromGitHub {
+    owner = "eth-cscs";
+    repo = pname;
+    rev = "v${version}";
+    hash = "sha256-71QpwTsRogH+6Bik9DKwezl9SqwoLxQt4SZ7zw5X6DE=";
+  };
+
+  postPatch = ''
+    substituteInPlace src/gpu_util/gpu_blas_api.hpp \
+      --replace '#include <rocblas.h>' '#include <rocblas/rocblas.h>'
+  '';
+
+  nativeBuildInputs = [
+    cmake
+    gfortran
+  ];
+
+  buildInputs = [
+    blas
+  ]
+  ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit
+  ++ lib.optionals (gpuBackend == "rocm") [ hip rocblas rocblas ]
+  ++ lib.optional stdenv.isDarwin llvmPackages.openmp
+  ;
+
+  propagatedBuildInputs = [ mpi ];
+
+  cmakeFlags = [
+    "-DSPLA_OMP=ON"
+    "-DSPLA_FORTRAN=ON"
+    "-DSPLA_INSTALL=ON"
+    # Required due to broken CMake files
+    "-DCMAKE_INSTALL_LIBDIR=lib"
+    "-DCMAKE_INSTALL_INCLUDEDIR=include"
+  ]
+  ++ lib.optional (gpuBackend == "cuda") "-DSPLA_GPU_BACKEND=CUDA"
+  ++ lib.optional (gpuBackend == "rocm") [ "-DSPLA_GPU_BACKEND=ROCM" ]
+  ;
+
+  meta = with lib; {
+    description = "Specialized Parallel Linear Algebra, providing distributed GEMM functionality for specific matrix distributions with optional GPU acceleration";
+    homepage = "https://github.com/eth-cscs/spla";
+    license = licenses.bsd3;
+    maintainers = [ maintainers.sheepforce ];#
+  };
+}