diff --git a/pkgs/tools/misc/ollama/default.nix b/pkgs/tools/misc/ollama/default.nix index be1864024888..30be00d72a15 100644 --- a/pkgs/tools/misc/ollama/default.nix +++ b/pkgs/tools/misc/ollama/default.nix @@ -1,35 +1,50 @@ { lib , buildGoModule , fetchFromGitHub +, llama-cpp , stdenv -, darwin }: buildGoModule rec { pname = "ollama"; - version = "0.0.17"; + version = "0.1.7"; src = fetchFromGitHub { owner = "jmorganca"; repo = "ollama"; rev = "v${version}"; - hash = "sha256-idsFcjsRD1zPmG742gnYQJcgSWDA2DLMHksCFNe2GiY="; + hash = "sha256-rzcuRU2qcYTMo/GxiSHwJYnvA9samfWlztMEhOGzbRg="; }; - buildInputs = lib.optionals stdenv.isDarwin (with darwin.apple_sdk_11_0.frameworks; [ - Accelerate - MetalPerformanceShaders - MetalKit - ]); + patches = [ + # disable passing the deprecated gqa flag to llama-cpp-server + # see https://github.com/ggerganov/llama.cpp/issues/2975 + ./disable-gqa.patch - vendorHash = "sha256-IgEf/WOc1eNGCif1fViIFxbgZAd6mHBqfxcaqH/WvGg="; + # replace the call to the bundled llama-cpp-server with the one in the llama-cpp package + ./set-llamacpp-path.patch + ]; - ldflags = [ "-s" "-w" ]; + postPatch = '' + substituteInPlace llm/llama.go \ + --subst-var-by llamaCppServer "${llama-cpp}/bin/llama-cpp-server" + ''; + + vendorHash = "sha256-Qt5QVqRkwK61BJPVhFWtox6b9E8BpAIseNB0yhh+/90="; + + ldflags = [ + "-s" + "-w" + "-X=github.com/jmorganca/ollama/version.Version=${version}" + "-X=github.com/jmorganca/ollama/server.mode=release" + ]; meta = with lib; { description = "Get up and running with large language models locally"; homepage = "https://github.com/jmorganca/ollama"; license = licenses.mit; - maintainers = with maintainers; [ dit7ya ]; + mainProgram = "ollama"; + maintainers = with maintainers; [ dit7ya elohmeier ]; + platforms = platforms.unix; }; } diff --git a/pkgs/tools/misc/ollama/disable-gqa.patch b/pkgs/tools/misc/ollama/disable-gqa.patch new file mode 100644 index 000000000000..b54440cd3d53 --- /dev/null +++ b/pkgs/tools/misc/ollama/disable-gqa.patch @@ -0,0 +1,15 @@ +diff --git a/llm/llama.go b/llm/llama.go +index 0b460e9..b79e04a 100644 +--- a/llm/llama.go ++++ b/llm/llama.go +@@ -299,10 +299,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers + params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU)) + } + +- if opts.NumGQA > 0 { +- params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA)) +- } +- + if len(adapters) > 0 { + // TODO: applying multiple adapters is not supported by the llama.cpp server yet + params = append(params, "--lora", adapters[0]) diff --git a/pkgs/tools/misc/ollama/set-llamacpp-path.patch b/pkgs/tools/misc/ollama/set-llamacpp-path.patch new file mode 100644 index 000000000000..e90e552bab45 --- /dev/null +++ b/pkgs/tools/misc/ollama/set-llamacpp-path.patch @@ -0,0 +1,23 @@ +diff --git a/llm/llama.go b/llm/llama.go +index f23d5d8..6563550 100644 +--- a/llm/llama.go ++++ b/llm/llama.go +@@ -25,7 +25,6 @@ import ( + "github.com/jmorganca/ollama/api" + ) + +-//go:embed llama.cpp/*/build/*/bin/* + var llamaCppEmbed embed.FS + + type ModelRunner struct { +@@ -33,6 +32,10 @@ type ModelRunner struct { + } + + func chooseRunners(workDir, runnerType string) []ModelRunner { ++ return []ModelRunner{ ++ {Path: "@llamaCppServer@"}, ++ } ++ + buildPath := path.Join("llama.cpp", runnerType, "build") + var runners []string +