9045a8e24c
With these changes, a container can have more then one veth-pair. This allows for example to have LAN and DMZ as bridges on the host and add dedicated containers for proxies, ipv4-firewall and ipv6-firewall. Or to have a bridge for normal WAN, one bridge for administration and one bridge for customer-internal communication. So that web-server containers can be reached from outside per http, from the management via ssh and can talk to their database via the customer network. The scripts to set up the containers are now rendered several times instead of just one template. The scripts now contain per-container code to configure the extra veth interfaces. The default template without support for extra-veths is still rendered for the imperative containers. Also a test is there to see if extra veths can be placed into host-bridges or can be reached via routing.
584 lines
19 KiB
Nix
584 lines
19 KiB
Nix
{ config, lib, pkgs, ... }:
|
|
|
|
with lib;
|
|
|
|
let
|
|
|
|
# The container's init script, a small wrapper around the regular
|
|
# NixOS stage-2 init script.
|
|
containerInit = (cfg:
|
|
let
|
|
renderExtraVeth = (name: cfg:
|
|
''
|
|
echo "Bringing ${name} up"
|
|
ip link set dev ${name} up
|
|
${optionalString (cfg . "localAddress" or null != null) ''
|
|
echo "Setting ip for ${name}"
|
|
ip addr add ${cfg . "localAddress"} dev ${name}
|
|
''}
|
|
${optionalString (cfg . "localAddress6" or null != null) ''
|
|
echo "Setting ip6 for ${name}"
|
|
ip -6 addr add ${cfg . "localAddress6"} dev ${name}
|
|
''}
|
|
${optionalString (cfg . "hostAddress" or null != null) ''
|
|
echo "Setting route to host for ${name}"
|
|
ip route add ${cfg . "hostAddress"} dev ${name}
|
|
''}
|
|
${optionalString (cfg . "hostAddress6" or null != null) ''
|
|
echo "Setting route6 to host for ${name}"
|
|
ip -6 route add ${cfg . "hostAddress6"} dev ${name}
|
|
''}
|
|
''
|
|
);
|
|
in
|
|
pkgs.writeScript "container-init"
|
|
''
|
|
#! ${pkgs.stdenv.shell} -e
|
|
|
|
# Initialise the container side of the veth pair.
|
|
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
|
|
|
ip link set host0 name eth0
|
|
ip link set dev eth0 up
|
|
|
|
if [ -n "$LOCAL_ADDRESS" ]; then
|
|
ip addr add $LOCAL_ADDRESS dev eth0
|
|
fi
|
|
if [ -n "$LOCAL_ADDRESS6" ]; then
|
|
ip -6 addr add $LOCAL_ADDRESS6 dev eth0
|
|
fi
|
|
if [ -n "$HOST_ADDRESS" ]; then
|
|
ip route add $HOST_ADDRESS dev eth0
|
|
ip route add default via $HOST_ADDRESS
|
|
fi
|
|
if [ -n "$HOST_ADDRESS6" ]; then
|
|
ip -6 route add $HOST_ADDRESS6 dev eth0
|
|
ip -6 route add default via $HOST_ADDRESS6
|
|
fi
|
|
|
|
${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg . "extraVeths" or {})}
|
|
ip a
|
|
ip r
|
|
fi
|
|
|
|
# Start the regular stage 1 script.
|
|
exec "$1"
|
|
''
|
|
);
|
|
|
|
nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}");
|
|
startScript = (cfg:
|
|
''
|
|
mkdir -p -m 0755 "$root/etc" "$root/var/lib"
|
|
mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
|
|
if ! [ -e "$root/etc/os-release" ]; then
|
|
touch "$root/etc/os-release"
|
|
fi
|
|
|
|
if ! [ -e "$root/etc/machine-id" ]; then
|
|
touch "$root/etc/machine-id"
|
|
fi
|
|
|
|
mkdir -p -m 0755 \
|
|
"/nix/var/nix/profiles/per-container/$INSTANCE" \
|
|
"/nix/var/nix/gcroots/per-container/$INSTANCE"
|
|
|
|
cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
|
|
|
|
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
|
extraFlags+=" --network-veth"
|
|
if [ -n "$HOST_BRIDGE" ]; then
|
|
extraFlags+=" --network-bridge=$HOST_BRIDGE"
|
|
fi
|
|
fi
|
|
|
|
${if cfg . "extraVeths" or null != null then
|
|
''extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg . "extraVeths" or {})}"''
|
|
else
|
|
''# No extra veth pairs to create''
|
|
}
|
|
|
|
for iface in $INTERFACES; do
|
|
extraFlags+=" --network-interface=$iface"
|
|
done
|
|
|
|
for iface in $MACVLANS; do
|
|
extraFlags+=" --network-macvlan=$iface"
|
|
done
|
|
|
|
# If the host is 64-bit and the container is 32-bit, add a
|
|
# --personality flag.
|
|
${optionalString (config.nixpkgs.system == "x86_64-linux") ''
|
|
if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
|
|
extraFlags+=" --personality=x86"
|
|
fi
|
|
''}
|
|
|
|
# Run systemd-nspawn without startup notification (we'll
|
|
# wait for the container systemd to signal readiness).
|
|
EXIT_ON_REBOOT=1 \
|
|
exec ${config.systemd.package}/bin/systemd-nspawn \
|
|
--keep-unit \
|
|
-M "$INSTANCE" -D "$root" $extraFlags \
|
|
$EXTRA_NSPAWN_FLAGS \
|
|
--notify-ready=yes \
|
|
--bind-ro=/nix/store \
|
|
--bind-ro=/nix/var/nix/db \
|
|
--bind-ro=/nix/var/nix/daemon-socket \
|
|
--bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
|
|
--bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
|
|
--setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
|
|
--setenv HOST_BRIDGE="$HOST_BRIDGE" \
|
|
--setenv HOST_ADDRESS="$HOST_ADDRESS" \
|
|
--setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
|
|
--setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
|
|
--setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
|
|
--setenv PATH="$PATH" \
|
|
${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
|
|
''
|
|
);
|
|
|
|
preStartScript = (cfg:
|
|
''
|
|
# Clean up existing machined registration and interfaces.
|
|
machinectl terminate "$INSTANCE" 2> /dev/null || true
|
|
|
|
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
|
ip link del dev "ve-$INSTANCE" 2> /dev/null || true
|
|
ip link del dev "vb-$INSTANCE" 2> /dev/null || true
|
|
fi
|
|
|
|
${concatStringsSep "\n" (
|
|
mapAttrsToList (name: cfg:
|
|
''ip link del dev ${name} 2> /dev/null || true ''
|
|
) cfg . "extraVeths" or {}
|
|
)}
|
|
''
|
|
);
|
|
postStartScript = (cfg:
|
|
let
|
|
ipcall = (cfg: ipcmd: variable: attribute:
|
|
if cfg . attribute or null == null then
|
|
''
|
|
if [ -n "${variable}" ]; then
|
|
${ipcmd} add ${variable} dev $ifaceHost
|
|
fi
|
|
''
|
|
else
|
|
''${ipcmd} add ${cfg . attribute} dev $ifaceHost''
|
|
);
|
|
renderExtraVeth = (name: cfg:
|
|
if cfg . "hostBridge" or null != null then
|
|
''
|
|
# Add ${name} to bridge ${cfg.hostBridge}
|
|
ip link set dev ${name} master ${cfg.hostBridge} up
|
|
''
|
|
else
|
|
''
|
|
# Set IPs and routes for ${name}
|
|
${optionalString (cfg . "hostAddress" or null != null) ''
|
|
ip addr add ${cfg . "hostAddress"} dev ${name}
|
|
''}
|
|
${optionalString (cfg . "hostAddress6" or null != null) ''
|
|
ip -6 addr add ${cfg . "hostAddress6"} dev ${name}
|
|
''}
|
|
${optionalString (cfg . "localAddress" or null != null) ''
|
|
ip route add ${cfg . "localAddress"} dev ${name}
|
|
''}
|
|
${optionalString (cfg . "localAddress6" or null != null) ''
|
|
ip -6 route add ${cfg . "localAddress6"} dev ${name}
|
|
''}
|
|
''
|
|
);
|
|
in
|
|
''
|
|
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
|
if [ -z "$HOST_BRIDGE" ]; then
|
|
ifaceHost=ve-$INSTANCE
|
|
ip link set dev $ifaceHost up
|
|
|
|
${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"}
|
|
${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"}
|
|
${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"}
|
|
${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"}
|
|
fi
|
|
${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg . "extraVeths" or {})}
|
|
fi
|
|
|
|
# Get the leader PID so that we can signal it in
|
|
# preStop. We can't use machinectl there because D-Bus
|
|
# might be shutting down. FIXME: in systemd 219 we can
|
|
# just signal systemd-nspawn to do a clean shutdown.
|
|
machinectl show "$INSTANCE" | sed 's/Leader=\(.*\)/\1/;t;d' > "/run/containers/$INSTANCE.pid"
|
|
''
|
|
);
|
|
|
|
system = config.nixpkgs.system;
|
|
|
|
bindMountOpts = { name, config, ... }: {
|
|
|
|
options = {
|
|
mountPoint = mkOption {
|
|
example = "/mnt/usb";
|
|
type = types.str;
|
|
description = "Mount point on the container file system.";
|
|
};
|
|
hostPath = mkOption {
|
|
default = null;
|
|
example = "/home/alice";
|
|
type = types.nullOr types.str;
|
|
description = "Location of the host path to be mounted.";
|
|
};
|
|
isReadOnly = mkOption {
|
|
default = true;
|
|
example = true;
|
|
type = types.bool;
|
|
description = "Determine whether the mounted path will be accessed in read-only mode.";
|
|
};
|
|
};
|
|
|
|
config = {
|
|
mountPoint = mkDefault name;
|
|
};
|
|
|
|
};
|
|
|
|
mkBindFlag = d:
|
|
let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
|
|
mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
|
|
in flagPrefix + mountstr ;
|
|
|
|
mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs);
|
|
|
|
networkOptions = {
|
|
hostBridge = mkOption {
|
|
type = types.nullOr types.string;
|
|
default = null;
|
|
example = "br0";
|
|
description = ''
|
|
Put the host-side of the veth-pair into the named bridge.
|
|
Only one of hostAddress* or hostBridge can be given.
|
|
'';
|
|
};
|
|
|
|
hostAddress = mkOption {
|
|
type = types.nullOr types.str;
|
|
default = null;
|
|
example = "10.231.136.1";
|
|
description = ''
|
|
The IPv4 address assigned to the host interface.
|
|
(Not used when hostBridge is set.)
|
|
'';
|
|
};
|
|
|
|
hostAddress6 = mkOption {
|
|
type = types.nullOr types.string;
|
|
default = null;
|
|
example = "fc00::1";
|
|
description = ''
|
|
The IPv6 address assigned to the host interface.
|
|
(Not used when hostBridge is set.)
|
|
'';
|
|
};
|
|
|
|
localAddress = mkOption {
|
|
type = types.nullOr types.str;
|
|
default = null;
|
|
example = "10.231.136.2";
|
|
description = ''
|
|
The IPv4 address assigned to the interface in the container.
|
|
If a hostBridge is used, this should be given with netmask to access
|
|
the whole network. Otherwise the default netmask is /32 and routing is
|
|
set up from localAddress to hostAddress and back.
|
|
'';
|
|
};
|
|
|
|
localAddress6 = mkOption {
|
|
type = types.nullOr types.string;
|
|
default = null;
|
|
example = "fc00::2";
|
|
description = ''
|
|
The IPv6 address assigned to the interface in the container.
|
|
If a hostBridge is used, this should be given with netmask to access
|
|
the whole network. Otherwise the default netmask is /128 and routing is
|
|
set up from localAddress6 to hostAddress6 and back.
|
|
'';
|
|
};
|
|
|
|
};
|
|
|
|
in
|
|
|
|
{
|
|
options = {
|
|
|
|
boot.isContainer = mkOption {
|
|
type = types.bool;
|
|
default = false;
|
|
description = ''
|
|
Whether this NixOS machine is a lightweight container running
|
|
in another NixOS system.
|
|
'';
|
|
};
|
|
|
|
boot.enableContainers = mkOption {
|
|
type = types.bool;
|
|
default = !config.boot.isContainer;
|
|
description = ''
|
|
Whether to enable support for nixos containers.
|
|
'';
|
|
};
|
|
|
|
containers = mkOption {
|
|
type = types.attrsOf (types.submodule (
|
|
{ config, options, name, ... }:
|
|
{
|
|
options = {
|
|
|
|
config = mkOption {
|
|
description = ''
|
|
A specification of the desired configuration of this
|
|
container, as a NixOS module.
|
|
'';
|
|
};
|
|
|
|
path = mkOption {
|
|
type = types.path;
|
|
example = "/nix/var/nix/profiles/containers/webserver";
|
|
description = ''
|
|
As an alternative to specifying
|
|
<option>config</option>, you can specify the path to
|
|
the evaluated NixOS system configuration, typically a
|
|
symlink to a system profile.
|
|
'';
|
|
};
|
|
|
|
privateNetwork = mkOption {
|
|
type = types.bool;
|
|
default = false;
|
|
description = ''
|
|
Whether to give the container its own private virtual
|
|
Ethernet interface. The interface is called
|
|
<literal>eth0</literal>, and is hooked up to the interface
|
|
<literal>ve-<replaceable>container-name</replaceable></literal>
|
|
on the host. If this option is not set, then the
|
|
container shares the network interfaces of the host,
|
|
and can bind to any port on any interface.
|
|
'';
|
|
};
|
|
|
|
interfaces = mkOption {
|
|
type = types.listOf types.string;
|
|
default = [];
|
|
example = [ "eth1" "eth2" ];
|
|
description = ''
|
|
The list of interfaces to be moved into the container.
|
|
'';
|
|
};
|
|
|
|
extraVeths = mkOption {
|
|
type = types.attrsOf types.optionSet;
|
|
default = {};
|
|
options = networkOptions;
|
|
description = ''
|
|
Extra veth-pairs to be created for the container
|
|
'';
|
|
};
|
|
|
|
autoStart = mkOption {
|
|
type = types.bool;
|
|
default = false;
|
|
description = ''
|
|
Wether the container is automatically started at boot-time.
|
|
'';
|
|
};
|
|
|
|
bindMounts = mkOption {
|
|
type = types.loaOf types.optionSet;
|
|
options = [ bindMountOpts ];
|
|
default = {};
|
|
example = { "/home" = { hostPath = "/home/alice";
|
|
isReadOnly = false; };
|
|
};
|
|
|
|
description =
|
|
''
|
|
An extra list of directories that is bound to the container.
|
|
'';
|
|
};
|
|
|
|
} // networkOptions;
|
|
|
|
config = mkMerge
|
|
[ (mkIf options.config.isDefined {
|
|
path = (import ../../lib/eval-config.nix {
|
|
inherit system;
|
|
modules =
|
|
let extraConfig =
|
|
{ boot.isContainer = true;
|
|
networking.hostName = mkDefault name;
|
|
networking.useDHCP = false;
|
|
};
|
|
in [ extraConfig config.config ];
|
|
prefix = [ "containers" name ];
|
|
}).config.system.build.toplevel;
|
|
})
|
|
];
|
|
}));
|
|
|
|
default = {};
|
|
example = literalExample
|
|
''
|
|
{ webserver =
|
|
{ path = "/nix/var/nix/profiles/webserver";
|
|
};
|
|
database =
|
|
{ config =
|
|
{ config, pkgs, ... }:
|
|
{ services.postgresql.enable = true;
|
|
services.postgresql.package = pkgs.postgresql92;
|
|
};
|
|
};
|
|
}
|
|
'';
|
|
description = ''
|
|
A set of NixOS system configurations to be run as lightweight
|
|
containers. Each container appears as a service
|
|
<literal>container-<replaceable>name</replaceable></literal>
|
|
on the host system, allowing it to be started and stopped via
|
|
<command>systemctl</command> .
|
|
'';
|
|
};
|
|
|
|
};
|
|
|
|
|
|
config = mkIf (config.boot.enableContainers) (let
|
|
|
|
unit = {
|
|
description = "Container '%i'";
|
|
|
|
unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ];
|
|
|
|
path = [ pkgs.iproute ];
|
|
|
|
environment.INSTANCE = "%i";
|
|
environment.root = "/var/lib/containers/%i";
|
|
|
|
preStart = preStartScript {};
|
|
|
|
script = startScript {};
|
|
|
|
postStart = postStartScript {};
|
|
|
|
preStop =
|
|
''
|
|
pid="$(cat /run/containers/$INSTANCE.pid)"
|
|
if [ -n "$pid" ]; then
|
|
kill -RTMIN+4 "$pid"
|
|
fi
|
|
rm -f "/run/containers/$INSTANCE.pid"
|
|
'';
|
|
|
|
restartIfChanged = false;
|
|
|
|
serviceConfig = {
|
|
ExecReload = pkgs.writeScript "reload-container"
|
|
''
|
|
#! ${pkgs.stdenv.shell} -e
|
|
${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
|
|
bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
|
|
'';
|
|
|
|
SyslogIdentifier = "container %i";
|
|
|
|
EnvironmentFile = "-/etc/containers/%i.conf";
|
|
|
|
Type = "notify";
|
|
|
|
# Note that on reboot, systemd-nspawn returns 133, so this
|
|
# unit will be restarted. On poweroff, it returns 0, so the
|
|
# unit won't be restarted.
|
|
RestartForceExitStatus = "133";
|
|
SuccessExitStatus = "133";
|
|
|
|
Restart = "on-failure";
|
|
|
|
# Hack: we don't want to kill systemd-nspawn, since we call
|
|
# "machinectl poweroff" in preStop to shut down the
|
|
# container cleanly. But systemd requires sending a signal
|
|
# (at least if we want remaining processes to be killed
|
|
# after the timeout). So send an ignored signal.
|
|
KillMode = "mixed";
|
|
KillSignal = "WINCH";
|
|
|
|
DevicePolicy = "closed";
|
|
};
|
|
};
|
|
in {
|
|
systemd.services = listToAttrs (filter (x: x.value != null) (
|
|
# The generic container template used by imperative containers
|
|
[{ name = "container@"; value = unit; }]
|
|
# declarative containers
|
|
++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (
|
|
unit // {
|
|
preStart = preStartScript cfg;
|
|
script = startScript cfg;
|
|
postStart = postStartScript cfg;
|
|
} // (
|
|
if cfg.autoStart then
|
|
{
|
|
wantedBy = [ "multi-user.target" ];
|
|
wants = [ "network.target" ];
|
|
after = [ "network.target" ];
|
|
restartTriggers = [ cfg.path ];
|
|
reloadIfChanged = true;
|
|
}
|
|
else {})
|
|
)) config.containers)
|
|
));
|
|
|
|
# Generate a configuration file in /etc/containers for each
|
|
# container so that container@.target can get the container
|
|
# configuration.
|
|
environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf"
|
|
{ text =
|
|
''
|
|
SYSTEM_PATH=${cfg.path}
|
|
${optionalString cfg.privateNetwork ''
|
|
PRIVATE_NETWORK=1
|
|
${optionalString (cfg.hostBridge != null) ''
|
|
HOST_BRIDGE=${cfg.hostBridge}
|
|
''}
|
|
${optionalString (cfg.hostAddress != null) ''
|
|
HOST_ADDRESS=${cfg.hostAddress}
|
|
''}
|
|
${optionalString (cfg.hostAddress6 != null) ''
|
|
HOST_ADDRESS6=${cfg.hostAddress6}
|
|
''}
|
|
${optionalString (cfg.localAddress != null) ''
|
|
LOCAL_ADDRESS=${cfg.localAddress}
|
|
''}
|
|
${optionalString (cfg.localAddress6 != null) ''
|
|
LOCAL_ADDRESS6=${cfg.localAddress6}
|
|
''}
|
|
''}
|
|
INTERFACES="${toString cfg.interfaces}"
|
|
${optionalString cfg.autoStart ''
|
|
AUTO_START=1
|
|
''}
|
|
EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts}"
|
|
'';
|
|
}) config.containers;
|
|
|
|
# Generate /etc/hosts entries for the containers.
|
|
networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
|
|
''
|
|
${cfg.localAddress} ${name}.containers
|
|
'') config.containers);
|
|
|
|
networking.dhcpcd.denyInterfaces = [ "ve-*" ];
|
|
|
|
environment.systemPackages = [ pkgs.nixos-container ];
|
|
});
|
|
}
|