nixpkgs/nixos/modules/services/databases/victoriametrics.nix
Ivan Kozik fb6fbcb85c nixos/victoriametrics: set LimitNOFILE=1048576 to fix panic and restart loop
This fixes:

```
systemd[1]: Started VictoriaMetrics time series database.
victoria-metrics[379550]: 2021-08-04T19:33:39.833Z        panic        VictoriaMetrics/lib/storage/partition.go:954        FATAL: unrecoverable error when merging small parts in the partition "/var/lib/victoriametrics/data/small/2021_08": cannot open source part for merging: cannot open metaindex file in stream mode: cannot open file "/var/lib/victoriametrics/data/small/2021_08/1228_1228_20210804184120.712_20210804184121.899_16982E83CD7A763A/metaindex.bin": open /var/lib/victoriametrics/data/small/2021_08/1228_1228_20210804184120.712_20210804184121.899_16982E83CD7A763A/metaindex.bin: too many open files
victoria-metrics[379550]: panic: FATAL: unrecoverable error when merging small parts in the partition "/var/lib/victoriametrics/data/small/2021_08": cannot open source part for merging: cannot open metaindex file in stream mode: cannot open file "/var/lib/victoriametrics/data/small/2021_08/1228_1228_20210804184120.712_20210804184121.899_16982E83CD7A763A/metaindex.bin": open /var/lib/victoriametrics/data/small/2021_08/1228_1228_20210804184120.712_20210804184121.899_16982E83CD7A763A/metaindex.bin: too many open files
victoria-metrics[379550]: goroutine 629 [running]:
victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger.logMessage(0xbb3ea1, 0x5, 0xc001113800, 0x1e7, 0x4)
victoria-metrics[379550]:         github.com/VictoriaMetrics/VictoriaMetrics/lib/logger/logger.go:270 +0xc69
victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger.logLevelSkipframes(0x1, 0xbb3ea1, 0x5, 0xbe3f8b, 0x4b, 0xc000bb3f88, 0x2, 0x2)
victoria-metrics[379550]:         github.com/VictoriaMetrics/VictoriaMetrics/lib/logger/logger.go:138 +0xd1
victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger.logLevel(...)
victoria-metrics[379550]:         github.com/VictoriaMetrics/VictoriaMetrics/lib/logger/logger.go:130
victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger.Panicf(...)
victoria-metrics[379550]:         github.com/VictoriaMetrics/VictoriaMetrics/lib/logger/logger.go:126
victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*partition).smallPartsMerger(0xc0014d7980)
victoria-metrics[379550]:         github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/partition.go:954 +0x145
victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*partition).startMergeWorkers.func1(0xc0014d7980)
victoria-metrics[379550]:         github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/partition.go:933 +0x2b
victoria-metrics[379550]: created by github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*partition).startMergeWorkers
victoria-metrics[379550]:         github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/partition.go:932 +0x6c
systemd[1]: victoriametrics.service: Main process exited, code=exited, status=2/INVALIDARGUMENT
systemd[1]: victoriametrics.service: Failed with result 'exit-code'.
systemd[1]: victoriametrics.service: Consumed 587ms CPU time, received 6.5K IP traffic, sent 1.7K IP traffic.
systemd[1]: victoriametrics.service: Scheduled restart job, restart counter is at 2064.
systemd[1]: Stopped VictoriaMetrics time series database.
systemd[1]: victoriametrics.service: Consumed 587ms CPU time, received 6.5K IP traffic, sent 1.7K IP traffic.
systemd[1]: Starting VictoriaMetrics time series database...
```
2021-08-05 05:35:53 +00:00

78 lines
2.7 KiB
Nix

{ config, pkgs, lib, ... }:
let cfg = config.services.victoriametrics; in
{
options.services.victoriametrics = with lib; {
enable = mkEnableOption "victoriametrics";
package = mkOption {
type = types.package;
default = pkgs.victoriametrics;
defaultText = "pkgs.victoriametrics";
description = ''
The VictoriaMetrics distribution to use.
'';
};
listenAddress = mkOption {
default = ":8428";
type = types.str;
description = ''
The listen address for the http interface.
'';
};
retentionPeriod = mkOption {
type = types.int;
default = 1;
description = ''
Retention period in months.
'';
};
extraOptions = mkOption {
type = types.listOf types.str;
default = [];
description = ''
Extra options to pass to VictoriaMetrics. See the README: <link
xlink:href="https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md" />
or <command>victoriametrics -help</command> for more
information.
'';
};
};
config = lib.mkIf cfg.enable {
systemd.services.victoriametrics = {
description = "VictoriaMetrics time series database";
after = [ "network.target" ];
startLimitBurst = 5;
serviceConfig = {
Restart = "on-failure";
RestartSec = 1;
StateDirectory = "victoriametrics";
DynamicUser = true;
ExecStart = ''
${cfg.package}/bin/victoria-metrics \
-storageDataPath=/var/lib/victoriametrics \
-httpListenAddr ${cfg.listenAddress} \
-retentionPeriod ${toString cfg.retentionPeriod} \
${lib.escapeShellArgs cfg.extraOptions}
'';
# victoriametrics 1.59 with ~7GB of data seems to eventually panic when merging files and then
# begins restart-looping forever. Set LimitNOFILE= to a large number to work around this issue.
#
# panic: FATAL: unrecoverable error when merging small parts in the partition "/var/lib/victoriametrics/data/small/2021_08":
# cannot open source part for merging: cannot open values file in stream mode:
# cannot open file "/var/lib/victoriametrics/data/small/2021_08/[...]/values.bin":
# open /var/lib/victoriametrics/data/small/2021_08/[...]/values.bin: too many open files
LimitNOFILE = 1048576;
};
wantedBy = [ "multi-user.target" ];
postStart =
let
bindAddr = (lib.optionalString (lib.hasPrefix ":" cfg.listenAddress) "127.0.0.1") + cfg.listenAddress;
in
lib.mkBefore ''
until ${lib.getBin pkgs.curl}/bin/curl -s -o /dev/null http://${bindAddr}/ping; do
sleep 1;
done
'';
};
};
}