diff --git a/nixos/modules/services/misc/ollama.nix b/nixos/modules/services/misc/ollama.nix index d9359d2b5cd4..0d3574a2bac0 100644 --- a/nixos/modules/services/misc/ollama.nix +++ b/nixos/modules/services/misc/ollama.nix @@ -1,9 +1,14 @@ -{ config, lib, pkgs, ... }: let +{ config, lib, pkgs, ... }: +let + inherit (lib.types) nullOr enum; cfg = config.services.ollama; - -in { - + ollamaPackage = cfg.package.override { + inherit (cfg) acceleration; + linuxPackages.nvidia_x11 = config.hardware.nvidia.package; + }; +in +{ options = { services.ollama = { enable = lib.mkEnableOption ( @@ -16,12 +21,22 @@ in { Specifies the bind address on which the ollama server HTTP interface listens. ''; }; + acceleration = lib.mkOption { + type = nullOr (enum [ "rocm" "cuda" ]); + default = null; + example = "rocm"; + description = lib.mdDoc '' + Specifies the interface to use for hardware acceleration. + + - `rocm`: supported by modern AMD GPUs + - `cuda`: supported by modern NVIDIA GPUs + ''; + }; package = lib.mkPackageOption pkgs "ollama" { }; }; }; config = lib.mkIf cfg.enable { - systemd = { services.ollama = { wantedBy = [ "multi-user.target" ]; @@ -33,7 +48,7 @@ in { OLLAMA_HOST = cfg.listenAddress; }; serviceConfig = { - ExecStart = "${lib.getExe cfg.package} serve"; + ExecStart = "${lib.getExe ollamaPackage} serve"; WorkingDirectory = "/var/lib/ollama"; StateDirectory = [ "ollama" ]; DynamicUser = true; @@ -41,10 +56,8 @@ in { }; }; - environment.systemPackages = [ cfg.package ]; - + environment.systemPackages = [ ollamaPackage ]; }; - meta.maintainers = with lib.maintainers; [ onny ]; - + meta.maintainers = with lib.maintainers; [ abysssol onny ]; } diff --git a/pkgs/tools/misc/ollama/default.nix b/pkgs/tools/misc/ollama/default.nix index 6ce576644d49..32b7f5fcbfc3 100644 --- a/pkgs/tools/misc/ollama/default.nix +++ b/pkgs/tools/misc/ollama/default.nix @@ -8,6 +8,7 @@ , makeWrapper , stdenv +, pkgs , cmake , gcc12 , clblast @@ -17,98 +18,21 @@ , linuxPackages , darwin -, enableRocm ? false -, enableCuda ? false + # one of `[ null "rocm" "cuda" ]` +, acceleration ? null }: let pname = "ollama"; - version = "0.1.24"; + version = "0.1.26"; - warnIfNotLinux = warning: (lib.warnIfNot stdenv.isLinux warning stdenv.isLinux); - gpuWarning = api: "building ollama with ${api} is only supported on linux; falling back to cpu"; - rocmIsEnabled = enableRocm && (warnIfNotLinux (gpuWarning "rocm")); - cudaIsEnabled = enableCuda && (warnIfNotLinux (gpuWarning "cuda")); - enableLinuxGpu = rocmIsEnabled || cudaIsEnabled; - - appleFrameworks = darwin.apple_sdk_11_0.frameworks; - metalFrameworks = [ - appleFrameworks.Accelerate - appleFrameworks.Metal - appleFrameworks.MetalKit - appleFrameworks.MetalPerformanceShaders - ]; - - src = fetchFromGitHub { - owner = "jmorganca"; - repo = "ollama"; - rev = "v${version}"; - hash = "sha256-GwZA1QUH8I8m2bGToIcMMaB5MBnioQP4+n1SauUJYP8="; - fetchSubmodules = true; - }; - preparePatch = patch: hash: fetchpatch { - url = "file://${src}/llm/patches/${patch}"; - inherit hash; - stripLen = 1; - extraPrefix = "llm/llama.cpp/"; - }; - inherit (lib) licenses platforms maintainers; - ollama = { - inherit pname version src; - vendorHash = "sha256-wXRbfnkbeXPTOalm7SFLvHQ9j46S/yLNbFy+OWNSamQ="; - - nativeBuildInputs = [ - cmake - ] ++ lib.optionals enableLinuxGpu [ - makeWrapper - ] ++ lib.optionals stdenv.isDarwin - metalFrameworks; - - patches = [ - # remove uses of `git` in the `go generate` script - # instead use `patch` where necessary - ./remove-git.patch - # replace a hardcoded use of `g++` with `$CXX` - ./replace-gcc.patch - - # ollama's patches of llama.cpp's example server - # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream" - (preparePatch "01-cache.diff" "sha256-PC4yN98hFvK+PEITiDihL8ki3bJuLVXrAm0CGf8GPJE=") - (preparePatch "02-shutdown.diff" "sha256-cElAp9Z9exxN964vB/YFuBhZoEcoAwGSMCnbh+l/V4Q=") - ]; - postPatch = '' - # use a patch from the nix store in the `go generate` script - substituteInPlace llm/generate/gen_common.sh \ - --subst-var-by cmakeIncludePatch '${./cmake-include.patch}' - # `ollama/llm/generate/gen_common.sh` -> "avoid duplicate main symbols when we link into the cgo binary" - substituteInPlace llm/llama.cpp/examples/server/server.cpp \ - --replace-fail 'int main(' 'int __main(' - # replace inaccurate version number with actual release version - substituteInPlace version/version.go --replace-fail 0.0.0 '${version}' - ''; - preBuild = '' - export OLLAMA_SKIP_PATCHING=true - # build llama.cpp libraries for ollama - go generate ./... - ''; - - ldflags = [ - "-s" - "-w" - "-X=github.com/jmorganca/ollama/version.Version=${version}" - "-X=github.com/jmorganca/ollama/server.mode=release" - ]; - - meta = { - description = "Get up and running with large language models locally"; - homepage = "https://github.com/jmorganca/ollama"; - license = licenses.mit; - platforms = platforms.unix; - mainProgram = "ollama"; - maintainers = with maintainers; [ abysssol dit7ya elohmeier ]; - }; - }; + validAccel = lib.assertOneOf "ollama.acceleration" acceleration [ null "rocm" "cuda" ]; + warnIfNotLinux = api: (lib.warnIfNot stdenv.isLinux + "building ollama with `${api}` is only supported on linux; falling back to cpu" + stdenv.isLinux); + enableRocm = validAccel && (acceleration == "rocm") && (warnIfNotLinux "rocm"); + enableCuda = validAccel && (acceleration == "cuda") && (warnIfNotLinux "cuda"); rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; @@ -120,10 +44,6 @@ let rocmClang ]; }; - rocmVars = { - ROCM_PATH = rocmPath; - CLBlast_DIR = "${clblast}/lib/cmake/CLBlast"; - }; cudaToolkit = buildEnv { name = "cuda-toolkit"; @@ -133,50 +53,129 @@ let cudaPackages.cuda_cudart ]; }; - cudaVars = { - CUDA_LIB_DIR = "${cudaToolkit}/lib"; - CUDACXX = "${cudaToolkit}/bin/nvcc"; - CUDAToolkit_ROOT = cudaToolkit; - }; - linuxGpuLibs = { - buildInputs = lib.optionals rocmIsEnabled [ - rocmPackages.clr - rocmPackages.hipblas - rocmPackages.rocblas - rocmPackages.rocsolver - rocmPackages.rocsparse - libdrm - ] ++ lib.optionals cudaIsEnabled [ - cudaPackages.cuda_cudart - ]; - }; - - appleGpuLibs = { buildInputs = metalFrameworks; }; - - runtimeLibs = lib.optionals rocmIsEnabled [ + runtimeLibs = lib.optionals enableRocm [ rocmPackages.rocm-smi - ] ++ lib.optionals cudaIsEnabled [ + ] ++ lib.optionals enableCuda [ linuxPackages.nvidia_x11 ]; - runtimeLibWrapper = { - postFixup = '' - mv "$out/bin/${pname}" "$out/bin/.${pname}-unwrapped" - makeWrapper "$out/bin/.${pname}-unwrapped" "$out/bin/${pname}" \ - --suffix LD_LIBRARY_PATH : '${lib.makeLibraryPath runtimeLibs}' - ''; - }; + + appleFrameworks = darwin.apple_sdk_11_0.frameworks; + metalFrameworks = [ + appleFrameworks.Accelerate + appleFrameworks.Metal + appleFrameworks.MetalKit + appleFrameworks.MetalPerformanceShaders + ]; + goBuild = - if cudaIsEnabled then + if enableCuda then buildGoModule.override { stdenv = overrideCC stdenv gcc12; } else buildGoModule; -in -goBuild (ollama - // (lib.optionalAttrs rocmIsEnabled rocmVars) - // (lib.optionalAttrs cudaIsEnabled cudaVars) - // (lib.optionalAttrs enableLinuxGpu linuxGpuLibs) - // (lib.optionalAttrs enableLinuxGpu runtimeLibWrapper) - // (lib.optionalAttrs stdenv.isDarwin appleGpuLibs)) + src = fetchFromGitHub { + owner = "jmorganca"; + repo = "ollama"; + rev = "v${version}"; + hash = "sha256-Kw3tt9ayEMgI2V6OeaOkWfNwqfCL7MDD/nN5iXk5LnY="; + fetchSubmodules = true; + }; + preparePatch = patch: hash: fetchpatch { + url = "file://${src}/llm/patches/${patch}"; + inherit hash; + stripLen = 1; + extraPrefix = "llm/llama.cpp/"; + }; + inherit (lib) licenses platforms maintainers; +in +goBuild ((lib.optionalAttrs enableRocm { + ROCM_PATH = rocmPath; + CLBlast_DIR = "${clblast}/lib/cmake/CLBlast"; +}) // (lib.optionalAttrs enableCuda { + CUDA_LIB_DIR = "${cudaToolkit}/lib"; + CUDACXX = "${cudaToolkit}/bin/nvcc"; + CUDAToolkit_ROOT = cudaToolkit; +}) // { + inherit pname version src; + vendorHash = "sha256-zTrBighPBqZ9hhkEV3UawJZUYyPRay7+P6wkhDtpY7M="; + + nativeBuildInputs = [ + cmake + ] ++ lib.optionals (enableRocm || enableCuda) [ + makeWrapper + ] ++ lib.optionals stdenv.isDarwin + metalFrameworks; + + buildInputs = lib.optionals enableRocm [ + rocmPackages.clr + rocmPackages.hipblas + rocmPackages.rocblas + rocmPackages.rocsolver + rocmPackages.rocsparse + libdrm + ] ++ lib.optionals enableCuda [ + cudaPackages.cuda_cudart + ] ++ lib.optionals stdenv.isDarwin + metalFrameworks; + + patches = [ + # remove uses of `git` in the `go generate` script + # instead use `patch` where necessary + ./remove-git.patch + # replace a hardcoded use of `g++` with `$CXX` + ./replace-gcc.patch + + # ollama's patches of llama.cpp's example server + # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream" + (preparePatch "01-cache.diff" "sha256-MTTln2G0G8dntihUzEjPM1ruTsApb4ZToBczJb8EG68=") + (preparePatch "02-cudaleaks.diff" "sha256-Cu7E9iEcvddPL9mPPI5Z96qmwWigi3f0WgSpPRjGc88=") + ]; + postPatch = '' + # use a patch from the nix store in the `go generate` script + substituteInPlace llm/generate/gen_common.sh \ + --subst-var-by cmakeIncludePatch '${./cmake-include.patch}' + # `ollama/llm/generate/gen_common.sh` -> "avoid duplicate main symbols when we link into the cgo binary" + substituteInPlace llm/llama.cpp/examples/server/server.cpp \ + --replace-fail 'int main(' 'int __main(' + # replace inaccurate version number with actual release version + substituteInPlace version/version.go --replace-fail 0.0.0 '${version}' + ''; + preBuild = '' + export OLLAMA_SKIP_PATCHING=true + # build llama.cpp libraries for ollama + go generate ./... + ''; + postFixup = '' + # the app doesn't appear functional at the moment, so hide it + mv "$out/bin/app" "$out/bin/.ollama-app" + '' + lib.optionalString (enableRocm || enableCuda) '' + # expose runtime libraries necessary to use the gpu + mv "$out/bin/ollama" "$out/bin/.ollama-unwrapped" + makeWrapper "$out/bin/.ollama-unwrapped" "$out/bin/ollama" \ + --suffix LD_LIBRARY_PATH : '/run/opengl-driver/lib:${lib.makeLibraryPath runtimeLibs}' + ''; + + ldflags = [ + "-s" + "-w" + "-X=github.com/jmorganca/ollama/version.Version=${version}" + "-X=github.com/jmorganca/ollama/server.mode=release" + ]; + + # for now, just test that rocm and cuda build + passthru.tests = lib.optionalAttrs stdenv.isLinux { + rocm = pkgs.ollama.override { acceleration = "rocm"; }; + cuda = pkgs.ollama.override { acceleration = "cuda"; }; + }; + + meta = { + description = "Get up and running with large language models locally"; + homepage = "https://github.com/jmorganca/ollama"; + license = licenses.mit; + platforms = platforms.unix; + mainProgram = "ollama"; + maintainers = with maintainers; [ abysssol dit7ya elohmeier ]; + }; +})