nixpkgs/nixos/tests/postgresql-wal-receiver.nix

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

120 lines
4.6 KiB
Nix
Raw Normal View History

{ system ? builtins.currentSystem,
config ? {},
pkgs ? import ../.. { inherit system config; }
}:
with import ../lib/testing-python.nix { inherit system pkgs; };
let
lib = pkgs.lib;
# Makes a test for a PostgreSQL package, given by name and looked up from `pkgs`.
makePostgresqlWalReceiverTest = postgresqlPackage:
{
name = postgresqlPackage;
value =
let
pkg = pkgs."${postgresqlPackage}";
postgresqlDataDir = "/var/lib/postgresql/${pkg.psqlSchema}";
replicationUser = "wal_receiver_user";
replicationSlot = "wal_receiver_slot";
replicationConn = "postgresql://${replicationUser}@localhost";
baseBackupDir = "/tmp/pg_basebackup";
walBackupDir = "/tmp/pg_wal";
atLeast12 = lib.versionAtLeast pkg.version "12.0";
recoveryFile = if atLeast12
then pkgs.writeTextDir "recovery.signal" ""
else pkgs.writeTextDir "recovery.conf" "restore_command = 'cp ${walBackupDir}/%f %p'";
in makeTest {
name = "postgresql-wal-receiver-${postgresqlPackage}";
meta.maintainers = with lib.maintainers; [ pacien ];
2022-03-21 00:15:30 +01:00
nodes.machine = { ... }: {
services.postgresql = {
package = pkg;
enable = true;
settings = lib.mkMerge [
{
wal_level = "archive"; # alias for replica on pg >= 9.6
max_wal_senders = 10;
max_replication_slots = 10;
}
(lib.mkIf atLeast12 {
restore_command = "cp ${walBackupDir}/%f %p";
recovery_end_command = "touch recovery.done";
})
];
authentication = ''
host replication ${replicationUser} all trust
'';
initialScript = pkgs.writeText "init.sql" ''
create user ${replicationUser} replication;
select * from pg_create_physical_replication_slot('${replicationSlot}');
'';
};
services.postgresqlWalReceiver.receivers.main = {
postgresqlPackage = pkg;
connection = replicationConn;
slot = replicationSlot;
directory = walBackupDir;
};
# This is only to speedup test, it isn't time racing. Service is set to autorestart always,
# default 60sec is fine for real system, but is too much for a test
systemd.services.postgresql-wal-receiver-main.serviceConfig.RestartSec = lib.mkForce 5;
};
testScript = ''
# make an initial base backup
machine.wait_for_unit("postgresql")
machine.wait_for_unit("postgresql-wal-receiver-main")
# WAL receiver healthchecks PG every 5 seconds, so let's be sure they have connected each other
# required only for 9.4
machine.sleep(5)
machine.succeed(
"${pkg}/bin/pg_basebackup --dbname=${replicationConn} --pgdata=${baseBackupDir}"
)
# create a dummy table with 100 records
machine.succeed(
"sudo -u postgres psql --command='create table dummy as select * from generate_series(1, 100) as val;'"
)
# stop postgres and destroy data
machine.systemctl("stop postgresql")
machine.systemctl("stop postgresql-wal-receiver-main")
machine.succeed("rm -r ${postgresqlDataDir}/{base,global,pg_*}")
# restore the base backup
machine.succeed(
"cp -r ${baseBackupDir}/* ${postgresqlDataDir} && chown postgres:postgres -R ${postgresqlDataDir}"
)
# prepare WAL and recovery
machine.succeed("chmod a+rX -R ${walBackupDir}")
machine.execute(
"for part in ${walBackupDir}/*.partial; do mv $part ''${part%%.*}; done"
) # make use of partial segments too
machine.succeed(
"cp ${recoveryFile}/* ${postgresqlDataDir}/ && chmod 666 ${postgresqlDataDir}/recovery*"
)
# replay WAL
machine.systemctl("start postgresql")
machine.wait_for_file("${postgresqlDataDir}/recovery.done")
machine.systemctl("restart postgresql")
machine.wait_for_unit("postgresql")
# check that our records have been restored
machine.succeed(
"test $(sudo -u postgres psql --pset='pager=off' --tuples-only --command='select count(distinct val) from dummy;') -eq 100"
)
'';
};
};
# Maps the generic function over all attributes of PostgreSQL packages
postgresql: implement opt-in JIT support Closes #150801 Note: I decided against resuming directly on #150801 because the conflict was too big (and resolving it seemed too error-prone to me). Also the `this`-refactoring could be done in an easier manner, i.e. by exposing JIT attributes with the correct configuration. More on that below. This patch creates variants of the `postgresql*`-packages with JIT[1] support. Please note that a lot of the work was derived from previous patches filed by other contributors, namely dasJ, andir and abbradar, hence the co-authored-by tags below. Effectively, the following things have changed: * For JIT variants an LLVM-backed stdenv with clang is now used as suggested by dasJ[2]. We need LLVM and CLang[3] anyways to build the JIT-part, so no need to mix this up with GCC's stdenv. Also, using the `dev`-output of LLVM and clang's stdenv for building (and adding llvm libs as build-inputs) seems more cross friendly to me (which will become useful when cross-building for JIT-variants will actually be supported). * Plugins inherit the build flags from the Makefiles in `$out/lib/pgxs/src` (e.g. `-Werror=unguarded-availability-new`). Since some of the flags are clang-specific (and stem from the use of the CLang stdenv) and don't work on gcc, the stdenv of `pkgs.postgresql` is passed to the plugins. I.e., plugins for non-JIT variants are built with a gcc stdenv on Linux and plugins for JIT variants with a clang stdenv. Since `plv8` hard-codes `gcc` as `$CC` in its Makefile[4], I marked it as broken for JIT-variants of postgresql only. * Added a test-matrix to confirm that JIT works fine on each `pkgs.postgresql_*_jit` (thanks Andi for the original test in #124804!). * For each postgresql version, a new attribute `postgresql_<version>_jit` (and a corresponding `postgresqlPackages<version>JitPackages`) are now exposed for better discoverability and prebuilt artifacts in the binary cache. * In #150801 the `this`-argument was replaced by an internal recursion. I decided against this approach because it'd blow up the diff even more which makes the readability way harder and also harder to revert this if necessary. Instead, it is made sure that `this` always points to the correct variant of `postgresql` and re-using that in an additional `.override {}`-expression is trivial because the JIT-variant is exposed in `all-packages.nix`. * I think the changes are sufficiently big to actually add myself as maintainer here. * Added `libxcrypt` to `buildInputs` for versions <v13. While building things with an LLVM stdenv, these versions complained that the extern `crypt()` symbol can't be found. Not sure what this is exactly about, but since we want to switch to libxcrypt for `crypt()` usage anyways[5] I decided to add it. For >=13 it's not relevant anymore anyways[6]. * JIT support doesn't work with cross-compilation. It is attempted to build LLVM-bytecode (`%.bc` is the corresponding `make(1)`-rule) for each sub-directory in `backend/` for the JIT apparently, but with a $(CLANG) that can produce binaries for the build, not the host-platform. I managed to get a cross-build with JIT support working with `depsBuildBuild = [ llvmPackages.clang ] ++ buildInputs`, but considering that the resulting LLVM IR isn't platform-independent this doesn't give you much. In fact, I tried to test the result in a VM-test, but as soon as JIT was used to optimize a query, postgres would coredump with `Illegal instruction`. A common concern of the original approach - with llvm as build input - was the massive increase of closure size. With the new approach of using the LLVM stdenv directly and patching out references to the clang drv in `$out` the effective closure size changes are: $ nix path-info -Sh $(nix-build -A postgresql_14) /nix/store/kssxxqycwa3c7kmwmykwxqvspxxa6r1w-postgresql-14.7 306.4M $ nix path-info -Sh $(nix-build -A postgresql_14_jit) /nix/store/xc7qmgqrn4h5yr4vmdwy56gs4bmja9ym-postgresql-14.7 689.2M Most of the increase in closure-size stems from the `lib`-output of LLVM $ nix path-info -Sh /nix/store/5r97sbs5j6mw7qnbg8nhnq1gad9973ap-llvm-11.1.0-lib /nix/store/5r97sbs5j6mw7qnbg8nhnq1gad9973ap-llvm-11.1.0-lib 349.8M which is why this shouldn't be enabled by default. While this is quite much because of LLVM, it's still a massive improvement over the simple approach of adding llvm/clang as build-inputs and building with `--with-llvm`: $ nix path-info -Sh $(nix-build -E ' with import ./. {}; postgresql.overrideAttrs ({ configureFlags ? [], buildInputs ? [], ... }: { configureFlags = configureFlags ++ [ "--with-llvm" ]; buildInputs = buildInputs ++ [ llvm clang ]; })' -j0) /nix/store/i3bd2r21c6c3428xb4gavjnplfqxn27p-postgresql-14.7 1.6G Co-authored-by: Andreas Rammhold <andreas@rammhold.de> Co-authored-by: Janne Heß <janne@hess.ooo> Co-authored-by: Nikolay Amiantov <ab@fmap.me> [1] https://www.postgresql.org/docs/current/jit-reason.html [2] https://github.com/NixOS/nixpkgs/pull/124804#issuecomment-864616931 & https://github.com/NixOS/nixpkgs/pull/150801#issuecomment-1467868321 [3] This fails with the following error otherwise: ``` configure: error: clang not found, but required when compiling --with-llvm, specify with CLANG= ``` [4] https://github.com/plv8/plv8/blob/v3.1.5/Makefile#L14 [5] https://github.com/NixOS/nixpkgs/pull/181764 [6] https://github.com/postgres/postgres/commit/c45643d618e35ec2fe91438df15abd4f3c0d85ca
2023-03-18 09:54:54 +01:00
in builtins.listToAttrs (map makePostgresqlWalReceiverTest (builtins.attrNames (import ../../pkgs/servers/sql/postgresql pkgs)))