local-ai: 2.13.0 -> 2.14.0
This commit is contained in:
parent
dd4070b45f
commit
c3dcc669bc
3 changed files with 48 additions and 19 deletions
|
@ -28,25 +28,33 @@ in
|
|||
type = types.either types.package types.str;
|
||||
default = "models";
|
||||
};
|
||||
|
||||
parallelRequests = mkOption {
|
||||
type = types.int;
|
||||
default = 1;
|
||||
};
|
||||
|
||||
logLevel = mkOption {
|
||||
type = types.enum [ "error" "warn" "info" "debug" "trace" ];
|
||||
default = "warn";
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
systemd.services.local-ai = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
environment.LLAMACPP_PARALLEL = toString cfg.parallelRequests;
|
||||
serviceConfig = {
|
||||
DynamicUser = true;
|
||||
ExecStart = lib.escapeShellArgs ([
|
||||
"${cfg.package}/bin/local-ai"
|
||||
"--debug"
|
||||
"--address"
|
||||
":${toString cfg.port}"
|
||||
"--threads"
|
||||
(toString cfg.threads)
|
||||
"--localai-config-dir"
|
||||
"."
|
||||
"--models-path"
|
||||
(toString cfg.models)
|
||||
"--address=:${toString cfg.port}"
|
||||
"--threads=${toString cfg.threads}"
|
||||
"--localai-config-dir=."
|
||||
"--models-path=${cfg.models}"
|
||||
"--log-level=${cfg.logLevel}"
|
||||
]
|
||||
++ lib.optional (cfg.parallelRequests > 1) "--parallel-requests"
|
||||
++ cfg.extraArgs);
|
||||
RuntimeDirectory = "local-ai";
|
||||
WorkingDirectory = "%t/local-ai";
|
||||
|
|
|
@ -100,8 +100,8 @@ let
|
|||
src = fetchFromGitHub {
|
||||
owner = "ggerganov";
|
||||
repo = "llama.cpp";
|
||||
rev = "784e11dea1f5ce9638851b2b0dddb107e2a609c8";
|
||||
hash = "sha256-yAQAUo5J+a6O2kTqhFL1UH0tANxpQn3JhAd3MByaC6I=";
|
||||
rev = "6ecf3189e00a1e8e737a78b6d10e1d7006e050a2";
|
||||
hash = "sha256-JS287UdCzj6Es134cbhr8y/AoejMEux0w++/pZ5NejY=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
postPatch = prev.postPatch + ''
|
||||
|
@ -254,8 +254,8 @@ let
|
|||
src = fetchFromGitHub {
|
||||
owner = "ggerganov";
|
||||
repo = "whisper.cpp";
|
||||
rev = "858452d58dba3acdc3431c9bced2bb8cfd9bf418";
|
||||
hash = "sha256-2fT3RgGpBex1mF6GJsVDo4rb0F31YqxTymsXcrpQAZk=";
|
||||
rev = "8fac6455ffeb0a0950a84e790ddb74f7290d33c4";
|
||||
hash = "sha256-Dez/Q2vMvSmscS+BJwkgZ4QG+ebM/N8s1Okd5my0CWI=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [ cmake pkg-config ]
|
||||
|
@ -305,8 +305,8 @@ let
|
|||
src = fetchFromGitHub {
|
||||
owner = "mudler";
|
||||
repo = "go-stable-diffusion";
|
||||
rev = "362df9da29f882dbf09ade61972d16a1f53c3485";
|
||||
hash = "sha256-A5KvMZOviPsIpPHxM8cacT+qE2x1iFJAbPsRs4sLijY=";
|
||||
rev = "4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f";
|
||||
hash = "sha256-KXUvMP6cDyWib4rG0RmVRm3pgrdsfKXaH3k0v5/mTe8=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
buildFlags = [ "libstablediffusion.a" ];
|
||||
|
@ -342,8 +342,8 @@ let
|
|||
src = fetchFromGitHub {
|
||||
owner = "M0Rf30";
|
||||
repo = "go-tiny-dream";
|
||||
rev = "22a12a4bc0ac5455856f28f3b771331a551a4293";
|
||||
hash = "sha256-DAVHD6E0OKHf4C2ldoI0Mm7813DIrmWFONUhSCQPCfc=";
|
||||
rev = "c04fa463ace9d9a6464313aa5f9cd0f953b6c057";
|
||||
hash = "sha256-uow3vbAI4F/fTGjYOKOLqTpKq7NgGYSZhGlEhn7h6s0=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
postUnpack = ''
|
||||
|
@ -373,12 +373,12 @@ let
|
|||
stdenv;
|
||||
|
||||
pname = "local-ai";
|
||||
version = "2.13.0";
|
||||
version = "2.14.0";
|
||||
src = fetchFromGitHub {
|
||||
owner = "go-skynet";
|
||||
repo = "LocalAI";
|
||||
rev = "v${version}";
|
||||
hash = "sha256-jZE8Ow9FFhnx/jvsURLYlYtSuKpE4UWBezxg/mpHs9g=";
|
||||
hash = "sha256-wr7sTMjGofGiZZbRJ+RfgXx9TM9Adu2NBAXeB3P5Ep0=";
|
||||
};
|
||||
|
||||
self = buildGoModule.override { stdenv = effectiveStdenv; } {
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
, writers
|
||||
, symlinkJoin
|
||||
, jq
|
||||
, prom2json
|
||||
}:
|
||||
let
|
||||
common-config = { config, ... }: {
|
||||
|
@ -14,6 +15,7 @@ let
|
|||
enable = true;
|
||||
package = self;
|
||||
threads = config.virtualisation.cores;
|
||||
logLevel = "debug";
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -36,6 +38,10 @@ in
|
|||
''
|
||||
machine.wait_for_open_port(${port})
|
||||
machine.succeed("curl -f http://localhost:${port}/readyz")
|
||||
|
||||
machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
|
||||
|
||||
machine.copy_from_vm("metrics.json")
|
||||
'';
|
||||
});
|
||||
|
||||
|
@ -80,6 +86,10 @@ in
|
|||
machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json")
|
||||
machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status 'debug | .model == \"${model}\"' embeddings.json")
|
||||
|
||||
machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
|
||||
|
||||
machine.copy_from_vm("metrics.json")
|
||||
'';
|
||||
};
|
||||
|
||||
|
@ -92,6 +102,7 @@ in
|
|||
# https://localai.io/advanced/#full-config-model-file-reference
|
||||
model-configs.${model} = rec {
|
||||
context_size = 8192;
|
||||
backend = "llama-cpp";
|
||||
parameters = {
|
||||
# https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF
|
||||
# https://ai.meta.com/blog/meta-llama-3/
|
||||
|
@ -157,6 +168,8 @@ in
|
|||
virtualisation.cores = 4;
|
||||
virtualisation.memorySize = 8192;
|
||||
services.local-ai.models = models;
|
||||
# TODO: Add test case parallel requests
|
||||
services.local-ai.parallelRequests = 2;
|
||||
};
|
||||
passthru = { inherit models requests; };
|
||||
testScript =
|
||||
|
@ -180,6 +193,10 @@ in
|
|||
machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json")
|
||||
|
||||
machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
|
||||
|
||||
machine.copy_from_vm("metrics.json")
|
||||
'';
|
||||
};
|
||||
|
||||
|
@ -243,6 +260,10 @@ in
|
|||
machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav")
|
||||
machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json")
|
||||
machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json")
|
||||
|
||||
machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
|
||||
|
||||
machine.copy_from_vm("metrics.json")
|
||||
'';
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue