diff --git a/pkgs/by-name/lo/local-ai/lib.nix b/pkgs/by-name/lo/local-ai/lib.nix new file mode 100644 index 000000000000..46f3ba88e5db --- /dev/null +++ b/pkgs/by-name/lo/local-ai/lib.nix @@ -0,0 +1,30 @@ +{ lib +, writers +, writeText +, linkFarmFromDrvs +}: { + genModels = configs: + let + name = lib.strings.sanitizeDerivationName + (builtins.concatStringsSep "_" ([ "local-ai-models" ] ++ (builtins.attrNames configs))); + + genModelFiles = name: config: + let + templateName = type: name + "_" + type; + + config' = lib.recursiveUpdate config ({ + inherit name; + } // lib.optionalAttrs (lib.isDerivation config.parameters.model) { + parameters.model = config.parameters.model.name; + } // lib.optionalAttrs (config ? template) { + template = builtins.mapAttrs (n: _: templateName n) config.template; + }); + in + [ (writers.writeYAML "${name}.yaml" config') ] + ++ lib.optional (lib.isDerivation config.parameters.model) + config.parameters.model + ++ lib.optionals (config ? template) + (lib.mapAttrsToList (n: writeText "${templateName n}.tmpl") config.template); + in + linkFarmFromDrvs name (lib.flatten (lib.mapAttrsToList genModelFiles configs)); +} diff --git a/pkgs/by-name/lo/local-ai/module.nix b/pkgs/by-name/lo/local-ai/module.nix new file mode 100644 index 000000000000..d7b70048121f --- /dev/null +++ b/pkgs/by-name/lo/local-ai/module.nix @@ -0,0 +1,56 @@ +{ pkgs, config, lib, ... }: +let + cfg = config.services.local-ai; + inherit (lib) mkOption types; +in +{ + options.services.local-ai = { + enable = lib.mkEnableOption "Enable service"; + + package = lib.mkPackageOption pkgs "local-ai" { }; + + extraArgs = mkOption { + type = types.listOf types.str; + default = [ ]; + }; + + port = mkOption { + type = types.port; + default = 8080; + }; + + threads = mkOption { + type = types.int; + default = 1; + }; + + models = mkOption { + type = types.either types.package types.str; + default = "models"; + }; + }; + + config = lib.mkIf cfg.enable { + systemd.services.local-ai = { + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + DynamicUser = true; + ExecStart = lib.escapeShellArgs ([ + "${cfg.package}/bin/local-ai" + "--debug" + "--address" + ":${toString cfg.port}" + "--threads" + (toString cfg.threads) + "--localai-config-dir" + "." + "--models-path" + (toString cfg.models) + ] + ++ cfg.extraArgs); + RuntimeDirectory = "local-ai"; + WorkingDirectory = "%t/local-ai"; + }; + }; + }; +} diff --git a/pkgs/by-name/lo/local-ai/package.nix b/pkgs/by-name/lo/local-ai/package.nix index f597097dc8e3..061122c3f848 100644 --- a/pkgs/by-name/lo/local-ai/package.nix +++ b/pkgs/by-name/lo/local-ai/package.nix @@ -6,6 +6,8 @@ , fetchpatch , fetchFromGitHub , protobuf +, protoc-gen-go +, protoc-gen-go-grpc , grpc , openssl , llama-cpp @@ -61,8 +63,8 @@ let inherit (cudaPackages) libcublas cuda_nvcc cuda_cccl cuda_cudart cudatoolkit; - go-llama-ggml = effectiveStdenv.mkDerivation { - name = "go-llama-ggml"; + go-llama = effectiveStdenv.mkDerivation { + name = "go-llama"; src = fetchFromGitHub { owner = "go-skynet"; repo = "go-llama.cpp"; @@ -98,8 +100,8 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "llama.cpp"; - rev = "1b67731e184e27a465b8c5476061294a4af668ea"; - hash = "sha256-0WWbsklpW6HhFRkvWpYh8Lhi8VIansS/zmyIKNQRkIs="; + rev = "784e11dea1f5ce9638851b2b0dddb107e2a609c8"; + hash = "sha256-yAQAUo5J+a6O2kTqhFL1UH0tANxpQn3JhAd3MByaC6I="; fetchSubmodules = true; }; postPatch = prev.postPatch + '' @@ -252,8 +254,8 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "whisper.cpp"; - rev = "8f253ef3af1c62c04316ba4afa7145fc4d701a8c"; - hash = "sha256-yHHjhpQIn99A/hqFwAb7TfTf4Q9KnKat93zyXS70bT8="; + rev = "858452d58dba3acdc3431c9bced2bb8cfd9bf418"; + hash = "sha256-2fT3RgGpBex1mF6GJsVDo4rb0F31YqxTymsXcrpQAZk="; }; nativeBuildInputs = [ cmake pkg-config ] @@ -371,18 +373,18 @@ let stdenv; pname = "local-ai"; - version = "2.12.4"; + version = "2.13.0"; src = fetchFromGitHub { owner = "go-skynet"; repo = "LocalAI"; rev = "v${version}"; - hash = "sha256-piu2B6u4ZfxiOd9SXrE7jiiiwL2SM8EqXo2s5qeKRl0="; + hash = "sha256-jZE8Ow9FFhnx/jvsURLYlYtSuKpE4UWBezxg/mpHs9g="; }; self = buildGoModule.override { stdenv = effectiveStdenv; } { inherit pname version src; - vendorHash = "sha256-8Hu1y/PK21twnB7D22ltslFFzRrsB8d1R2hkgIFB/XY="; + vendorHash = "sha256-nWNK2YekQnBSLx4ouNSe6esIe0yFuo69E0HStYLQANg="; env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4"; @@ -392,12 +394,12 @@ let in '' sed -i Makefile \ - -e 's;git clone.*go-llama-ggml$;${cp} ${go-llama-ggml} sources/go-llama-ggml;' \ + -e 's;git clone.*go-llama\.cpp$;${cp} ${go-llama} sources/go-llama\.cpp;' \ -e 's;git clone.*gpt4all$;${cp} ${gpt4all} sources/gpt4all;' \ -e 's;git clone.*go-piper$;${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper;' \ - -e 's;git clone.*go-rwkv$;${cp} ${go-rwkv} sources/go-rwkv;' \ + -e 's;git clone.*go-rwkv\.cpp$;${cp} ${go-rwkv} sources/go-rwkv\.cpp;' \ -e 's;git clone.*whisper\.cpp$;${cp} ${whisper-cpp.src} sources/whisper\.cpp;' \ - -e 's;git clone.*go-bert$;${cp} ${go-bert} sources/go-bert;' \ + -e 's;git clone.*go-bert\.cpp$;${cp} ${go-bert} sources/go-bert\.cpp;' \ -e 's;git clone.*diffusion$;${cp} ${if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src} sources/go-stable-diffusion;' \ -e 's;git clone.*go-tiny-dream$;${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream;' \ -e 's, && git checkout.*,,g' \ @@ -415,14 +417,19 @@ let ++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs ++ lib.optionals with_tts go-piper.buildInputs; - nativeBuildInputs = [ makeWrapper ] - ++ lib.optionals with_cublas [ cuda_nvcc ]; + nativeBuildInputs = [ + protobuf + protoc-gen-go + protoc-gen-go-grpc + makeWrapper + ] + ++ lib.optionals with_cublas [ cuda_nvcc ]; enableParallelBuilding = false; modBuildPhase = '' mkdir sources - make prepare-sources + make prepare-sources protogen-go go mod tidy -v ''; @@ -486,7 +493,7 @@ let passthru.local-packages = { inherit - go-tiny-dream go-rwkv go-bert go-llama-ggml gpt4all go-piper + go-tiny-dream go-rwkv go-bert go-llama gpt4all go-piper llama-cpp-grpc whisper-cpp go-tiny-dream-ncnn espeak-ng' piper-phonemize piper-tts'; }; @@ -498,6 +505,7 @@ let }; passthru.tests = callPackages ./tests.nix { inherit self; }; + passthru.lib = callPackages ./lib.nix { }; meta = with lib; { description = "OpenAI alternative to run local LLMs, image and audio generation"; diff --git a/pkgs/by-name/lo/local-ai/tests.nix b/pkgs/by-name/lo/local-ai/tests.nix index 82d1b775dab8..7cebc6fff938 100644 --- a/pkgs/by-name/lo/local-ai/tests.nix +++ b/pkgs/by-name/lo/local-ai/tests.nix @@ -5,156 +5,244 @@ , fetchurl , writers , symlinkJoin -, linkFarmFromDrvs , jq }: +let + common-config = { config, ... }: { + imports = [ ./module.nix ]; + services.local-ai = { + enable = true; + package = self; + threads = config.virtualisation.cores; + }; + }; + + inherit (self.lib) genModels; +in { version = testers.testVersion { package = self; version = "v" + self.version; + command = "local-ai --help"; }; - health = - let - port = "8080"; - in - testers.runNixOSTest { - name = self.name + "-health"; - nodes.machine = { - systemd.services.local-ai = { - wantedBy = [ "multi-user.target" ]; - serviceConfig.ExecStart = "${self}/bin/local-ai --debug --localai-config-dir . --address :${port}"; - }; - }; - testScript = '' + health = testers.runNixOSTest ({ config, ... }: { + name = self.name + "-health"; + nodes.machine = common-config; + testScript = + let + port = "8080"; + in + '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") ''; - }; + }); - # https://localai.io/docs/getting-started/manual/ - llama = + # https://localai.io/features/embeddings/#bert-embeddings + bert = let - port = "8080"; - gguf = fetchurl { - url = "https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_K_M.gguf"; - sha256 = "6a9dc401c84f0d48996eaa405174999c3a33bf12c2bfd8ea4a1e98f376de1f15"; + model = "embedding"; + model-configs.${model} = { + # Note: q4_0 and q4_1 models can not be loaded + parameters.model = fetchurl { + url = "https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-f16.bin"; + sha256 = "9c195b2453a4fef60a4f6be3a88a39211366214df6498a4fe4885c9e22314f50"; + }; + backend = "bert-embeddings"; + embeddings = true; + }; + + models = genModels model-configs; + + requests.request = { + inherit model; + input = "Your text string goes here"; }; - models = linkFarmFromDrvs "models" [ - gguf - ]; in testers.runNixOSTest { - name = self.name + "-llama"; - nodes.machine = - let - cores = 4; - in - { - virtualisation = { - inherit cores; - memorySize = 8192; - }; - systemd.services.local-ai = { - wantedBy = [ "multi-user.target" ]; - serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}"; - }; - }; + name = self.name + "-bert"; + nodes.machine = { + imports = [ common-config ]; + virtualisation.cores = 2; + virtualisation.memorySize = 2048; + services.local-ai.models = models; + }; + passthru = { inherit models requests; }; testScript = let - # https://localai.io/features/text-generation/#chat-completions - request-chat-completions = { - model = gguf.name; - messages = [{ role = "user"; content = "Say this is a test!"; }]; - temperature = 0.7; - }; - # https://localai.io/features/text-generation/#edit-completions - request-edit-completions = { - model = gguf.name; - instruction = "rephrase"; - input = "Black cat jumped out of the window"; - temperature = 0.7; - }; - # https://localai.io/features/text-generation/#completions - request-completions = { - model = gguf.name; - prompt = "A long time ago in a galaxy far, far away"; - temperature = 0.7; - }; + port = "8080"; in '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json") - machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${gguf.name}\"' models.json") - machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" request-chat-completions} --output chat-completions.json") - machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"chat.completion\"' chat-completions.json") - machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" request-edit-completions} --output edit-completions.json") - machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"edit\"' edit-completions.json") - machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" request-completions} --output completions.json") - machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json") + machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json") + machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json") + machine.succeed("${jq}/bin/jq --exit-status 'debug | .model == \"${model}\"' embeddings.json") ''; }; -} // lib.optionalAttrs self.features.with_tts { - # https://localai.io/features/text-to-audio/#piper - tts = +} // lib.optionalAttrs (!self.features.with_cublas && !self.features.with_clblas) { + # https://localai.io/docs/getting-started/manual/ + llama = let - port = "8080"; - voice-en-us = fetchzip { - url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz"; - hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M="; - stripRoot = false; + model = "gpt-3.5-turbo"; + + # https://localai.io/advanced/#full-config-model-file-reference + model-configs.${model} = rec { + context_size = 8192; + parameters = { + # https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF + # https://ai.meta.com/blog/meta-llama-3/ + model = fetchurl { + url = "https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"; + sha256 = "ab9e4eec7e80892fd78f74d9a15d0299f1e22121cea44efd68a7a02a3fe9a1da"; + }; + # defaults from: + # https://deepinfra.com/meta-llama/Meta-Llama-3-8B-Instruct + temperature = 0.7; + top_p = 0.9; + top_k = 0; + # following parameter leads to outputs like: !!!!!!!!!!!!!!!!!!! + #repeat_penalty = 1; + presence_penalty = 0; + frequency_penalty = 0; + max_tokens = 100; + }; + stopwords = [ "<|eot_id|>" ]; + template = { + # Templates implement following specifications + # https://github.com/meta-llama/llama3/tree/main?tab=readme-ov-file#instruction-tuned-models + # ... and are insprired by: + # https://github.com/mudler/LocalAI/blob/master/embedded/models/llama3-instruct.yaml + # + # The rules for template evaluateion are defined here: + # https://pkg.go.dev/text/template + chat_message = '' + <|start_header_id|>{{.RoleName}}<|end_header_id|> + + {{.Content}}${builtins.head stopwords}''; + + chat = "<|begin_of_text|>{{.Input}}<|start_header_id|>assistant<|end_header_id|>"; + }; }; - ggml-tiny-en = fetchurl { - url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin"; - hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs="; - }; - whisper-en = { - name = "whisper-en"; - backend = "whisper"; - parameters.model = ggml-tiny-en.name; - }; - models = symlinkJoin { - name = "models"; - paths = [ - voice-en-us - (linkFarmFromDrvs "whisper-en" [ - (writers.writeYAML "whisper-en.yaml" whisper-en) - ggml-tiny-en - ]) - ]; + + models = genModels model-configs; + + requests = { + # https://localai.io/features/text-generation/#chat-completions + chat-completions = { + inherit model; + messages = [{ role = "user"; content = "1 + 2 = ?"; }]; + }; + # https://localai.io/features/text-generation/#edit-completions + edit-completions = { + inherit model; + instruction = "rephrase"; + input = "Black cat jumped out of the window"; + max_tokens = 50; + }; + # https://localai.io/features/text-generation/#completions + completions = { + inherit model; + prompt = "A long time ago in a galaxy far, far away"; + }; }; in testers.runNixOSTest { - name = self.name + "-tts"; - nodes.machine = - let - cores = 2; - in - { - virtualisation = { - inherit cores; - }; - systemd.services.local-ai = { - wantedBy = [ "multi-user.target" ]; - serviceConfig.ExecStart = "${self}/bin/local-ai --debug --threads ${toString cores} --models-path ${models} --localai-config-dir . --address :${port}"; - }; - }; + name = self.name + "-llama"; + nodes.machine = { + imports = [ common-config ]; + virtualisation.cores = 4; + virtualisation.memorySize = 8192; + services.local-ai.models = models; + }; + passthru = { inherit models requests; }; testScript = let - request = { - model = "en-us-danny-low.onnx"; - backend = "piper"; - input = "Hello, how are you?"; - }; + port = "8080"; in '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") - machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" request} --output out.wav") - machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${whisper-en.name} --output transcription.json") - machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${request.input}\"' transcription.json") + machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json") + machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json") + + machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" requests.chat-completions} --output chat-completions.json") + machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"chat.completion\"' chat-completions.json") + machine.succeed("${jq}/bin/jq --exit-status 'debug | .choices | first.message.content | tonumber == 3' chat-completions.json") + + machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" requests.edit-completions} --output edit-completions.json") + machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"edit\"' edit-completions.json") + machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString requests.edit-completions.max_tokens}' edit-completions.json") + + machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json") + machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json") + machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json") + ''; + }; + +} // lib.optionalAttrs (self.features.with_tts && !self.features.with_cublas && !self.features.with_clblas) { + # https://localai.io/features/text-to-audio/#piper + tts = + let + model-stt = "whisper-en"; + model-configs.${model-stt} = { + backend = "whisper"; + parameters.model = fetchurl { + url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin"; + hash = "sha256-x3xXZvHO8JtrfUfyG1Rsvd1BV4hrO11tT3CekeZsfCs="; + }; + }; + + model-tts = "piper-en"; + model-configs.${model-tts} = { + backend = "piper"; + parameters.model = "en-us-danny-low.onnx"; + }; + + models = + let + models = genModels model-configs; + in + symlinkJoin { + inherit (models) name; + paths = [ + models + (fetchzip { + url = "https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-danny-low.tar.gz"; + hash = "sha256-5wf+6H5HeQY0qgdqnAG1vSqtjIFM9lXH53OgouuPm0M="; + stripRoot = false; + }) + ]; + }; + + requests.request = { + model = model-tts; + input = "Hello, how are you?"; + }; + in + testers.runNixOSTest { + name = self.name + "-tts"; + nodes.machine = { + imports = [ common-config ]; + virtualisation.cores = 2; + services.local-ai.models = models; + }; + passthru = { inherit models requests; }; + testScript = + let + port = "8080"; + in + '' + machine.wait_for_open_port(${port}) + machine.succeed("curl -f http://localhost:${port}/readyz") + machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json") + machine.succeed("${jq}/bin/jq --exit-status 'debug' models.json") + machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav") + machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json") + machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json") ''; }; }