nixpkgs/nixos/modules/services/networking/multipath.nix
Michael Brantley 121cfd1998 nixos/multipath: add multipath module
The multipath-tools package had existed in Nixpkgs for some time but
without a nixos module to configure/drive it. This module provides
attributes to drive the majority of multipath configuration options
and is being successfully used in stage-1 and stage-2 boot to mount
/nix from a multipath-serviced iSCSI volume.

Credit goes to @grahamc for early contributions to the module and
authoring the NixOS module test.
2021-10-08 09:47:38 -04:00

572 lines
20 KiB
Nix

{ config, lib, pkgs, ... }: with lib;
# See http://christophe.varoqui.free.fr/usage.html and
# https://github.com/opensvc/multipath-tools/blob/master/multipath/multipath.conf.5
let
cfg = config.services.multipath;
indentLines = n: str: concatStringsSep "\n" (
map (line: "${fixedWidthString n " " " "}${line}") (
filter ( x: x != "" ) ( splitString "\n" str )
)
);
addCheckDesc = desc: elemType: check: types.addCheck elemType check
// { description = "${elemType.description} (with check: ${desc})"; };
hexChars = stringToCharacters "0123456789abcdef";
isHexString = s: all (c: elem c hexChars) (stringToCharacters (toLower s));
hexStr = addCheckDesc "hexadecimal string" types.str isHexString;
in {
options.services.multipath = with types; {
enable = mkEnableOption "the device mapper multipath (DM-MP) daemon";
package = mkOption {
type = package;
description = "multipath-tools package to use";
default = pkgs.multipath-tools;
defaultText = "pkgs.multipath-tools";
};
devices = mkOption {
default = [ ];
example = literalExpression ''
[
{
vendor = "\"COMPELNT\"";
product = "\"Compellent Vol\"";
path_checker = "tur";
no_path_retry = "queue";
max_sectors_kb = 256;
}, ...
]
'';
description = ''
This option allows you to define arrays for use in multipath
groups.
'';
type = listOf (submodule {
options = {
vendor = mkOption {
type = str;
example = "COMPELNT";
description = "Regular expression to match the vendor name";
};
product = mkOption {
type = str;
example = "Compellent Vol";
description = "Regular expression to match the product name";
};
revision = mkOption {
type = nullOr str;
default = null;
description = "Regular expression to match the product revision";
};
product_blacklist = mkOption {
type = nullOr str;
default = null;
description = "Products with the given vendor matching this string are blacklisted";
};
alias_prefix = mkOption {
type = nullOr str;
default = null;
description = "The user_friendly_names prefix to use for this device type, instead of the default mpath";
};
vpd_vendor = mkOption {
type = nullOr str;
default = null;
description = "The vendor specific vpd page information, using the vpd page abbreviation";
};
hardware_handler = mkOption {
type = nullOr (enum [ "emc" "rdac" "hp_sw" "alua" "ana" ]);
default = null;
description = "The hardware handler to use for this device type";
};
# Optional arguments
path_grouping_policy = mkOption {
type = nullOr (enum [ "failover" "multibus" "group_by_serial" "group_by_prio" "group_by_node_name" ]);
default = null; # real default: "failover"
description = "The default path grouping policy to apply to unspecified multipaths";
};
uid_attribute = mkOption {
type = nullOr str;
default = null;
description = "The udev attribute providing a unique path identifier (WWID)";
};
getuid_callout = mkOption {
type = nullOr str;
default = null;
description = ''
(Superseded by uid_attribute) The default program and args to callout
to obtain a unique path identifier. Should be specified with an absolute path.
'';
};
path_selector = mkOption {
type = nullOr (enum [
''"round-robin 0"''
''"queue-length 0"''
''"service-time 0"''
''"historical-service-time 0"''
]);
default = null; # real default: "service-time 0"
description = "The default path selector algorithm to use; they are offered by the kernel multipath target";
};
path_checker = mkOption {
type = enum [ "readsector0" "tur" "emc_clariion" "hp_sw" "rdac" "directio" "cciss_tur" "none" ];
default = "tur";
description = "The default method used to determine the paths state";
};
prio = mkOption {
type = nullOr (enum [
"none" "const" "sysfs" "emc" "alua" "ontap" "rdac" "hp_sw" "hds"
"random" "weightedpath" "path_latency" "ana" "datacore" "iet"
]);
default = null; # real default: "const"
description = "The name of the path priority routine";
};
prio_args = mkOption {
type = nullOr str;
default = null;
description = "Arguments to pass to to the prio function";
};
features = mkOption {
type = nullOr str;
default = null;
description = "Specify any device-mapper features to be used";
};
failback = mkOption {
type = nullOr str;
default = null; # real default: "manual"
description = "Tell multipathd how to manage path group failback. Quote integers as strings";
};
rr_weight = mkOption {
type = nullOr (enum [ "priorities" "uniform" ]);
default = null; # real default: "uniform"
description = ''
If set to priorities the multipath configurator will assign path weights
as "path prio * rr_min_io".
'';
};
no_path_retry = mkOption {
type = nullOr str;
default = null; # real default: "fail"
description = "Specify what to do when all paths are down. Quote integers as strings";
};
rr_min_io = mkOption {
type = nullOr int;
default = null; # real default: 1000
description = ''
Number of I/O requests to route to a path before switching to the next in the
same path group. This is only for Block I/O (BIO) based multipath and
only apply to round-robin path_selector.
'';
};
rr_min_io_rq = mkOption {
type = nullOr int;
default = null; # real default: 1
description = ''
Number of I/O requests to route to a path before switching to the next in the
same path group. This is only for Request based multipath and
only apply to round-robin path_selector.
'';
};
fast_io_fail_tmo = mkOption {
type = nullOr str;
default = null; # real default: 5
description = ''
Specify the number of seconds the SCSI layer will wait after a problem has been
detected on a FC remote port before failing I/O to devices on that remote port.
This should be smaller than dev_loss_tmo. Setting this to "off" will disable
the timeout. Quote integers as strings.
'';
};
dev_loss_tmo = mkOption {
type = nullOr str;
default = null; # real default: 600
description = ''
Specify the number of seconds the SCSI layer will wait after a problem has
been detected on a FC remote port before removing it from the system. This
can be set to "infinity" which sets it to the max value of 2147483647
seconds, or 68 years. It will be automatically adjusted to the overall
retry interval no_path_retry * polling_interval
if a number of retries is given with no_path_retry and the
overall retry interval is longer than the specified dev_loss_tmo value.
The Linux kernel will cap this value to 600 if fast_io_fail_tmo
is not set.
'';
};
flush_on_last_del = mkOption {
type = nullOr (enum [ "yes" "no" ]);
default = null; # real default: "no"
description = ''
If set to "yes" multipathd will disable queueing when the last path to a
device has been deleted.
'';
};
user_friendly_names = mkOption {
type = nullOr (enum [ "yes" "no" ]);
default = null; # real default: "no"
description = ''
If set to "yes", using the bindings file /etc/multipath/bindings
to assign a persistent and unique alias to the multipath, in the
form of mpath. If set to "no" use the WWID as the alias. In either
case this be will be overridden by any specific aliases in the
multipaths section.
'';
};
retain_attached_hw_handler = mkOption {
type = nullOr (enum [ "yes" "no" ]);
default = null; # real default: "yes"
description = ''
(Obsolete for kernels >= 4.3) If set to "yes" and the SCSI layer has
already attached a hardware_handler to the device, multipath will not
force the device to use the hardware_handler specified by mutipath.conf.
If the SCSI layer has not attached a hardware handler, multipath will
continue to use its configured hardware handler.
Important Note: Linux kernel 4.3 or newer always behaves as if
"retain_attached_hw_handler yes" was set.
'';
};
detect_prio = mkOption {
type = nullOr (enum [ "yes" "no" ]);
default = null; # real default: "yes"
description = ''
If set to "yes", multipath will try to detect if the device supports
SCSI-3 ALUA. If so, the device will automatically use the sysfs
prioritizer if the required sysf attributes access_state and
preferred_path are supported, or the alua prioritizer if not. If set
to "no", the prioritizer will be selected as usual.
'';
};
detect_checker = mkOption {
type = nullOr (enum [ "yes" "no" ]);
default = null; # real default: "yes"
description = ''
If set to "yes", multipath will try to detect if the device supports
SCSI-3 ALUA. If so, the device will automatically use the tur checker.
If set to "no", the checker will be selected as usual.
'';
};
deferred_remove = mkOption {
type = nullOr (enum [ "yes" "no" ]);
default = null; # real default: "no"
description = ''
If set to "yes", multipathd will do a deferred remove instead of a
regular remove when the last path device has been deleted. This means
that if the multipath device is still in use, it will be freed when
the last user closes it. If path is added to the multipath device
before the last user closes it, the deferred remove will be canceled.
'';
};
san_path_err_threshold = mkOption {
type = nullOr str;
default = null;
description = ''
If set to a value greater than 0, multipathd will watch paths and check
how many times a path has been failed due to errors.If the number of
failures on a particular path is greater then the san_path_err_threshold,
then the path will not reinstate till san_path_err_recovery_time. These
path failures should occur within a san_path_err_forget_rate checks, if
not we will consider the path is good enough to reinstantate.
'';
};
san_path_err_forget_rate = mkOption {
type = nullOr str;
default = null;
description = ''
If set to a value greater than 0, multipathd will check whether the path
failures has exceeded the san_path_err_threshold within this many checks
i.e san_path_err_forget_rate. If so we will not reinstante the path till
san_path_err_recovery_time.
'';
};
san_path_err_recovery_time = mkOption {
type = nullOr str;
default = null;
description = ''
If set to a value greater than 0, multipathd will make sure that when
path failures has exceeded the san_path_err_threshold within
san_path_err_forget_rate then the path will be placed in failed state
for san_path_err_recovery_time duration. Once san_path_err_recovery_time
has timeout we will reinstante the failed path. san_path_err_recovery_time
value should be in secs.
'';
};
marginal_path_err_sample_time = mkOption {
type = nullOr int;
default = null;
description = "One of the four parameters of supporting path check based on accounting IO error such as intermittent error";
};
marginal_path_err_rate_threshold = mkOption {
type = nullOr int;
default = null;
description = "The error rate threshold as a permillage (1/1000)";
};
marginal_path_err_recheck_gap_time = mkOption {
type = nullOr str;
default = null;
description = "One of the four parameters of supporting path check based on accounting IO error such as intermittent error";
};
marginal_path_double_failed_time = mkOption {
type = nullOr str;
default = null;
description = "One of the four parameters of supporting path check based on accounting IO error such as intermittent error";
};
delay_watch_checks = mkOption {
type = nullOr str;
default = null;
description = "This option is deprecated, and mapped to san_path_err_forget_rate";
};
delay_wait_checks = mkOption {
type = nullOr str;
default = null;
description = "This option is deprecated, and mapped to san_path_err_recovery_time";
};
skip_kpartx = mkOption {
type = nullOr (enum [ "yes" "no" ]);
default = null; # real default: "no"
description = "If set to yes, kpartx will not automatically create partitions on the device";
};
max_sectors_kb = mkOption {
type = nullOr int;
default = null;
description = "Sets the max_sectors_kb device parameter on all path devices and the multipath device to the specified value";
};
ghost_delay = mkOption {
type = nullOr int;
default = null;
description = "Sets the number of seconds that multipath will wait after creating a device with only ghost paths before marking it ready for use in systemd";
};
all_tg_pt = mkOption {
type = nullOr str;
default = null;
description = "Set the 'all targets ports' flag when registering keys with mpathpersist";
};
};
});
};
defaults = mkOption {
type = nullOr str;
default = null;
description = ''
This section defines default values for attributes which are used
whenever no values are given in the appropriate device or multipath
sections.
'';
};
blacklist = mkOption {
type = nullOr str;
default = null;
description = ''
This section defines which devices should be excluded from the
multipath topology discovery.
'';
};
blacklist_exceptions = mkOption {
type = nullOr str;
default = null;
description = ''
This section defines which devices should be included in the
multipath topology discovery, despite being listed in the
blacklist section.
'';
};
overrides = mkOption {
type = nullOr str;
default = null;
description = ''
This section defines values for attributes that should override the
device-specific settings for all devices.
'';
};
extraConfig = mkOption {
type = nullOr str;
default = null;
description = "Lines to append to default multipath.conf";
};
extraConfigFile = mkOption {
type = nullOr str;
default = null;
description = "Append an additional file's contents to /etc/multipath.conf";
};
pathGroups = mkOption {
example = literalExpression ''
[
{
wwid = "360080e500043b35c0123456789abcdef";
alias = 10001234;
array = "bigarray.example.com";
fsType = "zfs"; # optional
options = "ro"; # optional
}, ...
]
'';
description = ''
This option allows you to define multipath groups as described
in http://christophe.varoqui.free.fr/usage.html.
'';
type = listOf (submodule {
options = {
alias = mkOption {
type = int;
example = 1001234;
description = "The name of the multipath device";
};
wwid = mkOption {
type = hexStr;
example = "360080e500043b35c0123456789abcdef";
description = "The identifier for the multipath device";
};
array = mkOption {
type = str;
default = null;
example = "bigarray.example.com";
description = "The DNS name of the storage array";
};
fsType = mkOption {
type = nullOr str;
default = null;
example = "zfs";
description = "Type of the filesystem";
};
options = mkOption {
type = nullOr str;
default = null;
example = "ro";
description = "Options used to mount the file system";
};
};
});
};
};
config = mkIf cfg.enable {
environment.etc."multipath.conf".text =
let
inherit (cfg) defaults blacklist blacklist_exceptions overrides;
mkDeviceBlock = cfg: let
nonNullCfg = lib.filterAttrs (k: v: v != null) cfg;
attrs = lib.mapAttrsToList (name: value: " ${name} ${toString value}") nonNullCfg;
in ''
device {
${lib.concatStringsSep "\n" attrs}
}
'';
devices = lib.concatMapStringsSep "\n" mkDeviceBlock cfg.devices;
mkMultipathBlock = m: ''
multipath {
wwid ${m.wwid}
alias ${toString m.alias}
}
'';
multipaths = lib.concatMapStringsSep "\n" mkMultipathBlock cfg.pathGroups;
in ''
devices {
${indentLines 2 devices}
}
${optionalString (!isNull defaults) ''
defaults {
${indentLines 2 defaults}
multipath_dir ${cfg.package}/lib/multipath
}
''}
${optionalString (!isNull blacklist) ''
blacklist {
${indentLines 2 blacklist}
}
''}
${optionalString (!isNull blacklist_exceptions) ''
blacklist_exceptions {
${indentLines 2 blacklist_exceptions}
}
''}
${optionalString (!isNull overrides) ''
overrides {
${indentLines 2 overrides}
}
''}
multipaths {
${indentLines 2 multipaths}
}
'';
systemd.packages = [ cfg.package ];
environment.systemPackages = [ cfg.package ];
boot.kernelModules = [ "dm-multipath" "dm-service-time" ];
# We do not have systemd in stage-1 boot so must invoke `multipathd`
# with the `-1` argument which disables systemd calls. Invoke `multipath`
# to display the multipath mappings in the output of `journalctl -b`.
boot.initrd.kernelModules = [ "dm-multipath" "dm-service-time" ];
boot.initrd.postDeviceCommands = ''
modprobe -a dm-multipath dm-service-time
multipathd -s
(set -x && sleep 1 && multipath -ll)
'';
};
}