Merge pull request #281192 from diogotcorreia/pgvecto.rs

postgresqlPackages.pgvecto-rs: init at 0.2.1
This commit is contained in:
Atemu 2024-03-11 07:12:52 +00:00 committed by GitHub
commit ce8ddcd321
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 3844 additions and 2 deletions

View file

@ -686,6 +686,7 @@ in {
pgbouncer = handleTest ./pgbouncer.nix {};
pgjwt = handleTest ./pgjwt.nix {};
pgmanage = handleTest ./pgmanage.nix {};
pgvecto-rs = handleTest ./pgvecto-rs.nix {};
phosh = handleTest ./phosh.nix {};
photoprism = handleTest ./photoprism.nix {};
php = handleTest ./php {};

View file

@ -0,0 +1,76 @@
# mostly copied from ./timescaledb.nix which was copied from ./postgresql.nix
# as it seemed unapproriate to test additional extensions for postgresql there.
{ system ? builtins.currentSystem
, config ? { }
, pkgs ? import ../.. { inherit system config; }
}:
with import ../lib/testing-python.nix { inherit system pkgs; };
with pkgs.lib;
let
postgresql-versions = import ../../pkgs/servers/sql/postgresql pkgs;
# Test cases from https://docs.pgvecto.rs/use-cases/hybrid-search.html
test-sql = pkgs.writeText "postgresql-test" ''
CREATE EXTENSION vectors;
CREATE TABLE items (
id bigserial PRIMARY KEY,
content text NOT NULL,
embedding vectors.vector(3) NOT NULL -- 3 dimensions
);
INSERT INTO items (content, embedding) VALUES
('a fat cat sat on a mat and ate a fat rat', '[1, 2, 3]'),
('a fat dog sat on a mat and ate a fat rat', '[4, 5, 6]'),
('a thin cat sat on a mat and ate a thin rat', '[7, 8, 9]'),
('a thin dog sat on a mat and ate a thin rat', '[10, 11, 12]');
'';
make-postgresql-test = postgresql-name: postgresql-package: makeTest {
name = postgresql-name;
meta = with pkgs.lib.maintainers; {
maintainers = [ diogotcorreia ];
};
nodes.machine = { ... }:
{
services.postgresql = {
enable = true;
package = postgresql-package;
extraPlugins = ps: with ps; [
pgvecto-rs
];
settings.shared_preload_libraries = "vectors";
};
};
testScript = ''
def check_count(statement, lines):
return 'test $(sudo -u postgres psql postgres -tAc "{}"|wc -l) -eq {}'.format(
statement, lines
)
machine.start()
machine.wait_for_unit("postgresql")
with subtest("Postgresql with extension vectors is available just after unit start"):
machine.succeed(check_count("SELECT * FROM pg_available_extensions WHERE name = 'vectors' AND default_version = '${postgresql-package.pkgs.pgvecto-rs.version}';", 1))
machine.succeed("sudo -u postgres psql -f ${test-sql}")
machine.succeed(check_count("SELECT content, embedding FROM items WHERE to_tsvector('english', content) @@ 'cat & rat'::tsquery;", 2))
machine.shutdown()
'';
};
applicablePostgresqlVersions = filterAttrs (_: value: versionAtLeast value.version "12") postgresql-versions;
in
mapAttrs'
(name: package: {
inherit name;
value = make-postgresql-test name package;
})
applicablePostgresqlVersions

View file

@ -1,4 +1,4 @@
{ lib, buildPackages, callPackage, cargo-auditable, stdenv, runCommand }@prev:
{ lib, buildPackages, callPackage, callPackages, cargo-auditable, stdenv, runCommand }@prev:
{ rustc
, cargo
@ -34,7 +34,7 @@ rec {
};
# Hooks
inherit (callPackage ../../../build-support/rust/hooks {
inherit (callPackages ../../../build-support/rust/hooks {
inherit stdenv cargo rustc;
}) cargoBuildHook cargoCheckHook cargoInstallHook cargoNextestHook cargoSetupHook maturinBuildHook bindgenHook;
}

View file

@ -0,0 +1,19 @@
diff --git a/crates/c/build.rs b/crates/c/build.rs
index 8d822e5..8b7e371 100644
--- a/crates/c/build.rs
+++ b/crates/c/build.rs
@@ -1,9 +1,13 @@
fn main() {
println!("cargo:rerun-if-changed=src/c.h");
println!("cargo:rerun-if-changed=src/c.c");
+ println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS");
cc::Build::new()
- .compiler("clang-16")
+ .compiler("@clang@")
.file("./src/c.c")
+ // read env var set by rustPlatform.bindgenHook
+ .try_flags_from_environment("BINDGEN_EXTRA_CLANG_ARGS")
+ .expect("the BINDGEN_EXTRA_CLANG_ARGS environment variable must be specified and UTF-8")
.opt_level(3)
.debug(true)
.compile("pgvectorsc");

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,118 @@
{ lib
, buildPgrxExtension
, cargo-pgrx
, clang_16
, fetchCrate
, fetchFromGitHub
, nix-update-script
, nixosTests
, openssl
, pkg-config
, postgresql
, rustPlatform
, stdenv
, substituteAll
}:
let
# Upstream only works with clang 16, so we're pinning it here to
# avoid future incompatibility.
# See https://docs.pgvecto.rs/developers/development.html#environment, step 4
clang = clang_16;
rustPlatform' = rustPlatform // {
bindgenHook = rustPlatform.bindgenHook.override { inherit clang; };
};
# Upstream only works with a fixed version of cargo-pgrx for each release,
# so we're pinning it here to avoid future incompatibility.
# See https://docs.pgvecto.rs/developers/development.html#environment, step 6
cargo-pgrx_0_11_2 = cargo-pgrx.overrideAttrs (old: rec {
pname = "cargo-pgrx";
version = "0.11.2";
src = fetchCrate {
pname = "cargo-pgrx";
inherit version;
hash = "sha256-8NlpMDFaltTIA8G4JioYm8LaPJ2RGKH5o6sd6lBHmmM=";
};
cargoDeps = old.cargoDeps.overrideAttrs (_: {
inherit src;
outputHash = "sha256-qTb3JV3u42EilaK2jP9oa5D09mkuHyRbGGRs9Rg4TzI=";
});
});
in
(buildPgrxExtension.override {
cargo-pgrx = cargo-pgrx_0_11_2;
rustPlatform = rustPlatform';
}) rec {
inherit postgresql;
pname = "pgvecto-rs";
version = "0.2.1";
buildInputs = [ openssl ];
nativeBuildInputs = [ pkg-config ];
patches = [
# Tell the `c` crate to use the flags from the rust bindgen hook
(substituteAll {
src = ./0001-read-clang-flags-from-environment.diff;
clang = lib.getExe clang;
})
];
src = fetchFromGitHub {
owner = "tensorchord";
repo = "pgvecto.rs";
rev = "v${version}";
hash = "sha256-kwaGHerEVh6Oxb9jQupSapm7CsKl5CoH6jCv+zbi4FE=";
};
# Package has git dependencies on Cargo.lock (instead of just crate.io dependencies),
# so cargoHash does not work, therefore we have to include Cargo.lock in nixpkgs.
cargoLock = {
lockFile = ./Cargo.lock;
outputHashes = {
"openai_api_rust-0.1.8" = "sha256-os5Y8KIWXJEYEcNzzT57wFPpEXdZ2Uy9W3j5+hJhhR4=";
"std_detect-0.1.5" = "sha256-RwWejfqyGOaeU9zWM4fbb/hiO1wMpxYPKEjLO0rtRmU=";
};
};
# Set appropriate version on vectors.control, otherwise it won't show up on PostgreSQL
postPatch = ''
substituteInPlace ./vectors.control --subst-var-by CARGO_VERSION ${version}
'';
# Include upgrade scripts in the final package
# https://github.com/tensorchord/pgvecto.rs/blob/v0.2.0/scripts/ci_package.sh#L6-L8
postInstall = ''
cp sql/upgrade/* $out/share/postgresql/extension/
'';
env = {
# Needed to get openssl-sys to use pkg-config.
OPENSSL_NO_VENDOR = 1;
# Bypass rust nightly features not being available on rust stable
RUSTC_BOOTSTRAP = 1;
};
passthru = {
updateScript = nix-update-script { };
tests = {
pgvecto-rs = nixosTests.pgvecto-rs;
};
};
meta = with lib; {
# The pgrx 0.11.2 dependency is broken in aarch64-linux: https://github.com/pgcentralfoundation/pgrx/issues/1429
# It is fixed in pgrx 0.11.3, but upstream is still using pgrx 0.11.2
broken = (stdenv.isLinux && stdenv.isAarch64) || stdenv.isDarwin;
description = "Scalable, Low-latency and Hybrid-enabled Vector Search in Postgres";
homepage = "https://github.com/tensorchord/pgvecto.rs";
license = licenses.asl20;
maintainers = with maintainers; [ diogotcorreia esclear ];
};
}

View file

@ -44,6 +44,8 @@ self: super: {
pgsql-http = super.callPackage ./ext/pgsql-http.nix { };
pgvecto-rs = super.callPackage ./ext/pgvecto-rs { };
pgvector = super.callPackage ./ext/pgvector.nix { };
plpgsql_check = super.callPackage ./ext/plpgsql_check.nix { };