nixpkgs/nixos/modules/services/misc/bees.nix

{ config, lib, pkgs, ... }:

with lib;

let

  cfg = config.services.beesd;

  logLevels = { emerg = 0; alert = 1; crit = 2; err = 3; warning = 4; notice = 5; info = 6; debug = 7; };

  fsOptions = with types; {
    options.spec = mkOption {
      type = str;
      description = ''
        Description of how to identify the filesystem to be duplicated by this
        instance of bees. Note that deduplication crosses subvolumes; one must
        not configure multiple instances for subvolumes of the same filesystem
        (or block devices which are part of the same filesystem), but only for
        completely independent btrfs filesystems.
        </para>
        <para>
        This must be in a format usable by findmnt; that could be a key=value
        pair, or a bare path to a mount point.
      '';
      example = "LABEL=MyBulkDataDrive";
    };
    options.hashTableSizeMB = mkOption {
      type = types.addCheck types.int (n: mod n 16 == 0);
      default = 1024; # 1GB; default from upstream beesd script
      description = ''
        Hash table size in MB; must be a multiple of 16.
        </para>
        <para>
        A larger ratio of index size to storage size means smaller blocks of
        duplicate content are recognized.
        </para>
        <para>
        If you have 1TB of data, a 4GB hash table (which is to say, a value of
        4096) will permit 4KB extents (the smallest possible size) to be
        recognized, whereas a value of 1024 -- creating a 1GB hash table --
        will recognize only aligned duplicate blocks of 16KB.
      '';
    };
    options.verbosity = mkOption {
      type = types.enum (attrNames logLevels ++ attrValues logLevels);
      apply = v: if isString v then logLevels.${v} else v;
      default = "info";
      description = "Log verbosity (syslog keyword/level).";
    };
    options.workDir = mkOption {
      type = str;
      default = ".beeshome";
      description = ''
        Name (relative to the root of the filesystem) of the subvolume where
        the hash table will be stored.
      '';
    };
    options.extraOptions = mkOption {
      type = listOf str;
      default = [ ];
      description = ''
        Extra command-line options passed to the daemon. See upstream bees documentation.
      '';
      example = literalExpression ''
        [ "--thread-count" "4" ]
      '';
    };
  };

in
{

  options.services.beesd = {
    filesystems = mkOption {
      type = with types; attrsOf (submodule fsOptions);
      description = "BTRFS filesystems to run block-level deduplication on.";
      default = { };
      example = literalExpression ''
        {
          root = {
            spec = "LABEL=root";
            hashTableSizeMB = 2048;
            verbosity = "crit";
            extraOptions = [ "--loadavg-target" "5.0" ];
          };
        }
      '';
    };
  };
  config = {
    systemd.services = mapAttrs'
      (name: fs: nameValuePair "beesd@${name}" {
        description = "Block-level BTRFS deduplication for %i";
        after = [ "sysinit.target" ];

        serviceConfig =
          let
            configOpts = [
              fs.spec
              "verbosity=${toString fs.verbosity}"
              "idxSizeMB=${toString fs.hashTableSizeMB}"
              "workDir=${fs.workDir}"
            ];
            configOptsStr = escapeShellArgs configOpts;
          in
          {
            # Values from https://github.com/Zygo/bees/blob/v0.6.5/scripts/beesd@.service.in
            ExecStart = "${pkgs.bees}/bin/bees-service-wrapper run ${configOptsStr} -- --no-timestamps ${escapeShellArgs fs.extraOptions}";
            ExecStopPost = "${pkgs.bees}/bin/bees-service-wrapper cleanup ${configOptsStr}";
            CPUAccounting = true;
            CPUSchedulingPolicy = "batch";
            CPUWeight = 12;
            IOSchedulingClass = "idle";
            IOSchedulingPriority = 7;
            IOWeight = 10;
            KillMode = "control-group";
            KillSignal = "SIGTERM";
            MemoryAccounting = true;
            Nice = 19;
            Restart = "on-abnormal";
            StartupCPUWeight = 25;
            StartupIOWeight = 25;
            SyslogIdentifier = "beesd"; # would otherwise be "bees-service-wrapper"
          };
        wantedBy = [ "multi-user.target" ];
      })
      cfg.filesystems;
  };
}
nixos/modules: services.bees init 2018-10-14 17:58:56 +02:00			`{ config, lib, pkgs, ... }:`

			`with lib;`

			`let`

			`cfg = config.services.beesd;`

			`logLevels = { emerg = 0; alert = 1; crit = 2; err = 3; warning = 4; notice = 5; info = 6; debug = 7; };`

			`fsOptions = with types; {`
			`options.spec = mkOption {`
			`type = str;`
			`description = ''`
			`Description of how to identify the filesystem to be duplicated by this`
			`instance of bees. Note that deduplication crosses subvolumes; one must`
			`not configure multiple instances for subvolumes of the same filesystem`
			`(or block devices which are part of the same filesystem), but only for`
			`completely independent btrfs filesystems.`
			`</para>`
			`<para>`
			`This must be in a format usable by findmnt; that could be a key=value`
			`pair, or a bare path to a mount point.`
			`'';`
			`example = "LABEL=MyBulkDataDrive";`
			`};`
			`options.hashTableSizeMB = mkOption {`
			`type = types.addCheck types.int (n: mod n 16 == 0);`
			`default = 1024; # 1GB; default from upstream beesd script`
			`description = ''`
			`Hash table size in MB; must be a multiple of 16.`
			`</para>`
			`<para>`
			`A larger ratio of index size to storage size means smaller blocks of`
			`duplicate content are recognized.`
			`</para>`
			`<para>`
			`If you have 1TB of data, a 4GB hash table (which is to say, a value of`
			`4096) will permit 4KB extents (the smallest possible size) to be`
			`recognized, whereas a value of 1024 -- creating a 1GB hash table --`
			`will recognize only aligned duplicate blocks of 16KB.`
			`'';`
			`};`
			`options.verbosity = mkOption {`
			`type = types.enum (attrNames logLevels ++ attrValues logLevels);`
			`apply = v: if isString v then logLevels.${v} else v;`
			`default = "info";`
			`description = "Log verbosity (syslog keyword/level).";`
			`};`
			`options.workDir = mkOption {`
			`type = str;`
			`default = ".beeshome";`
			`description = ''`
			`Name (relative to the root of the filesystem) of the subvolume where`
			`the hash table will be stored.`
			`'';`
			`};`
			`options.extraOptions = mkOption {`
			`type = listOf str;`
bees: nixpkgs-fmt Change-Id: If4e9431dad00ffade3316cf22235d8d44d12d149 2021-06-05 15:45:17 +02:00			`default = [ ];`
nixos/modules: services.bees init 2018-10-14 17:58:56 +02:00			`description = ''`
			`Extra command-line options passed to the daemon. See upstream bees documentation.`
			`'';`
nixos/doc: clean up defaults and examples 2021-10-03 18:06:03 +02:00			`example = literalExpression ''`
nixos/modules: services.bees init 2018-10-14 17:58:56 +02:00			`[ "--thread-count" "4" ]`
			`'';`
			`};`
			`};`

bees: nixpkgs-fmt Change-Id: If4e9431dad00ffade3316cf22235d8d44d12d149 2021-06-05 15:45:17 +02:00			`in`
			`{`
nixos/modules: services.bees init 2018-10-14 17:58:56 +02:00
			`options.services.beesd = {`
			`filesystems = mkOption {`
			`type = with types; attrsOf (submodule fsOptions);`
			`description = "BTRFS filesystems to run block-level deduplication on.";`
			`default = { };`
nixos/doc: clean up defaults and examples 2021-10-03 18:06:03 +02:00			`example = literalExpression ''`
nixos/modules: services.bees init 2018-10-14 17:58:56 +02:00			`{`
			`root = {`
			`spec = "LABEL=root";`
			`hashTableSizeMB = 2048;`
			`verbosity = "crit";`
			`extraOptions = [ "--loadavg-target" "5.0" ];`
			`};`
			`}`
			`'';`
			`};`
			`};`
			`config = {`
bees: nixpkgs-fmt Change-Id: If4e9431dad00ffade3316cf22235d8d44d12d149 2021-06-05 15:45:17 +02:00			`systemd.services = mapAttrs'`
			`(name: fs: nameValuePair "beesd@${name}" {`
			`description = "Block-level BTRFS deduplication for %i";`
			`after = [ "sysinit.target" ];`
nixos/modules: services.bees init 2018-10-14 17:58:56 +02:00
bees: nixpkgs-fmt Change-Id: If4e9431dad00ffade3316cf22235d8d44d12d149 2021-06-05 15:45:17 +02:00			`serviceConfig =`
			`let`
			`configOpts = [`
			`fs.spec`
			`"verbosity=${toString fs.verbosity}"`
			`"idxSizeMB=${toString fs.hashTableSizeMB}"`
			`"workDir=${fs.workDir}"`
			`];`
			`configOptsStr = escapeShellArgs configOpts;`
			`in`
			`{`
bees: 0.6.3 -> 0.6.5 Change-Id: I1866eab9c348d9c10219290ecba698121a32d128 2021-06-05 15:55:12 +02:00			`# Values from https://github.com/Zygo/bees/blob/v0.6.5/scripts/beesd@.service.in`
bees: nixpkgs-fmt Change-Id: If4e9431dad00ffade3316cf22235d8d44d12d149 2021-06-05 15:45:17 +02:00			`ExecStart = "${pkgs.bees}/bin/bees-service-wrapper run ${configOptsStr} -- --no-timestamps ${escapeShellArgs fs.extraOptions}";`
			`ExecStopPost = "${pkgs.bees}/bin/bees-service-wrapper cleanup ${configOptsStr}";`
			`CPUAccounting = true;`
bees: 0.6.3 -> 0.6.5 Change-Id: I1866eab9c348d9c10219290ecba698121a32d128 2021-06-05 15:55:12 +02:00			`CPUSchedulingPolicy = "batch";`
bees: nixpkgs-fmt Change-Id: If4e9431dad00ffade3316cf22235d8d44d12d149 2021-06-05 15:45:17 +02:00			`CPUWeight = 12;`
			`IOSchedulingClass = "idle";`
			`IOSchedulingPriority = 7;`
			`IOWeight = 10;`
			`KillMode = "control-group";`
			`KillSignal = "SIGTERM";`
			`MemoryAccounting = true;`
			`Nice = 19;`
			`Restart = "on-abnormal";`
			`StartupCPUWeight = 25;`
			`StartupIOWeight = 25;`
bees: 0.6.3 -> 0.6.5 Change-Id: I1866eab9c348d9c10219290ecba698121a32d128 2021-06-05 15:55:12 +02:00			`SyslogIdentifier = "beesd"; # would otherwise be "bees-service-wrapper"`
bees: nixpkgs-fmt Change-Id: If4e9431dad00ffade3316cf22235d8d44d12d149 2021-06-05 15:45:17 +02:00			`};`
			`wantedBy = [ "multi-user.target" ];`
			`})`
			`cfg.filesystems;`
nixos/modules: services.bees init 2018-10-14 17:58:56 +02:00			`};`
			`}`