Merge pull request #171874 from cpcloud/arrow-cpp-8.0

This commit is contained in:
Sandro 2022-05-10 16:38:13 +02:00 committed by GitHub
commit f76fa41ae6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 68 additions and 16 deletions

View file

@ -19,6 +19,7 @@
, grpc
, gtest
, jemalloc
, libbacktrace
, lz4
, minio
, ninja
@ -69,21 +70,20 @@ let
in
stdenv.mkDerivation rec {
pname = "arrow-cpp";
version = "7.0.0";
version = "8.0.0";
src = fetchurl {
url =
"mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
hash = "sha256-6PSbFJoV7O9OQPz6sbh8ETxrHuGGAFwWnlzfldMamd4=";
url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
hash = "sha256-rZoFcFEXyYnBFrrprHBJL+AVBQ4bgPsOOP3ktdhjqqM=";
};
sourceRoot = "apache-arrow-${version}/cpp";
${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = jemalloc.src;
# versions are all taken from
# https://github.com/apache/arrow/blob/apache-arrow-8.0.0/cpp/thirdparty/versions.txt
ARROW_MIMALLOC_URL = fetchFromGitHub {
# From
# ./cpp/cmake_modules/ThirdpartyToolchain.cmake
# ./cpp/thirdparty/versions.txt
owner = "microsoft";
repo = "mimalloc";
rev = "v1.7.3";
@ -93,8 +93,15 @@ stdenv.mkDerivation rec {
ARROW_XSIMD_URL = fetchFromGitHub {
owner = "xtensor-stack";
repo = "xsimd";
rev = "aeec9c872c8b475dedd7781336710f2dd2666cb2";
hash = "sha256-vWKdJkieKhaxyAJhijXUmD7NmNvMWd79PskQojulA1w=";
rev = "7d1778c3b38d63db7cec7145d939f40bc5d859d1";
hash = "sha256-89AysBUVnTdWyMPazeJegnQ6WEH90Ns7qQInZLMSXY4=";
};
ARROW_SUBSTRAIT_URL = fetchFromGitHub {
owner = "substrait-io";
repo = "substrait";
rev = "e1b4c04a1b518912f4c4065b16a1b2c0ac8e14cf";
hash = "sha256-56FSjDngsROSHLjMv+OYAIYqphEu3GzgIMHbgh/ZQw0=";
};
patches = [
@ -115,7 +122,10 @@ stdenv.mkDerivation rec {
gflags
glog
gtest
libbacktrace
lz4
nlohmann_json # alternative JSON parser to rapidjson
protobuf # substrait requires protobuf
rapidjson
re2
snappy
@ -150,6 +160,9 @@ stdenv.mkDerivation rec {
"-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
"-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
"-DARROW_BUILD_TESTS=ON"
"-DARROW_BUILD_INTEGRATION=ON"
"-DARROW_BUILD_UTILITIES=ON"
"-DARROW_EXTRA_ERROR_CONTEXT=ON"
"-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
"-DARROW_DEPENDENCY_SOURCE=SYSTEM"
"-DThrift_SOURCE=AUTO" # search for Thrift using pkg-config (ThriftConfig.cmake requires OpenSSL and libevent)
@ -168,8 +181,10 @@ stdenv.mkDerivation rec {
# Disable Python for static mode because openblas is currently broken there.
"-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
"-DARROW_USE_GLOG=ON"
"-DARROW_WITH_BACKTRACE=ON"
"-DARROW_WITH_BROTLI=ON"
"-DARROW_WITH_LZ4=ON"
"-DARROW_WITH_NLOHMANN_JSON=ON"
"-DARROW_WITH_SNAPPY=ON"
"-DARROW_WITH_UTF8PROC=ON"
"-DARROW_WITH_ZLIB=ON"
@ -177,8 +192,10 @@ stdenv.mkDerivation rec {
"-DARROW_MIMALLOC=ON"
# Parquet options:
"-DARROW_PARQUET=ON"
"-DARROW_SUBSTRAIT=ON"
"-DPARQUET_BUILD_EXECUTABLES=ON"
"-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
"-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
"-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
"-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
] ++ lib.optionals (!enableShared) [

View file

@ -5,6 +5,7 @@
, dill
, fastavro
, fetchFromGitHub
, fetchpatch
, freezegun
, grpcio
, grpcio-tools
@ -51,6 +52,15 @@ buildPythonPackage rec {
sha256 = "sha256-FmfTxRLqXUHhhAZIxCRx2+phX0bmU5rIHaftBU4yBJY=";
};
patches = [
# patch in the pyarrow.Table.to_batches(max_chunksize=...) argument fix
(fetchpatch {
url = "https://github.com/apache/beam/commit/2418a14ee99ff490d1c82944043f97f37ec97a85.patch";
sha256 = "sha256-G8ARBBf7nmF46P2ncnlteGFnPWq5iCqZDfuaosre9jY=";
stripLen = 2;
})
];
# See https://github.com/NixOS/nixpkgs/issues/156957.
postPatch = ''
substituteInPlace setup.py \

View file

@ -1,6 +1,7 @@
{ lib
, buildPythonPackage
, fetchPypi
, fetchpatch
, fetchFromGitHub
, numpy
, packaging
, pandas
@ -12,11 +13,20 @@ buildPythonPackage rec {
pname = "db-dtypes";
version = "1.0.0";
src = fetchPypi {
inherit pname version;
sha256 = "3070d1a8d86ff0b5d9b16f15c5fab9c18893c6b3d5723cd95ee397b169049454";
src = fetchFromGitHub {
owner = "googleapis";
repo = "python-db-dtypes-pandas";
rev = "v${version}";
hash = "sha256-7u/E0ICiz7LQfuplm/mkGlWrgGEPqeMwM3CUhfH6868=";
};
patches = [
(fetchpatch {
url = "https://github.com/googleapis/python-db-dtypes-pandas/commit/fb30adfd427d3df9919df00b096210ba1eb1b91d.patch";
sha256 = "sha256-39kZtYGbn3U1WXiDTczki5EM6SjUlSRXz8UMcdTU20g=";
})
];
propagatedBuildInputs = [
numpy
packaging

View file

@ -16,6 +16,7 @@
, proto-plus
, psutil
, pyarrow
, pytest-xdist
}:
buildPythonPackage rec {
@ -28,6 +29,11 @@ buildPythonPackage rec {
sha256 = "sha256-UmW6BEV44Ucdg/hUGSQk/kyDnB+Hsyx4q3AXTQe89hI=";
};
postPatch = ''
substituteInPlace setup.py \
--replace 'pyarrow >= 3.0.0, < 8.0dev' 'pyarrow >= 3.0.0, < 9.0dev'
'';
propagatedBuildInputs = [
google-cloud-core
google-cloud-bigquery-storage
@ -47,6 +53,7 @@ buildPythonPackage rec {
google-cloud-datacatalog
google-cloud-storage
pytestCheckHook
pytest-xdist
];
# prevent google directory from shadowing google imports

View file

@ -47,8 +47,10 @@ buildPythonPackage rec {
PYARROW_WITH_DATASET = zero_or_one true;
PYARROW_WITH_FLIGHT = zero_or_one _arrow-cpp.enableFlight;
PYARROW_WITH_PARQUET = zero_or_one true;
PYARROW_WITH_HDFS = zero_or_one true;
PYARROW_WITH_PARQUET = zero_or_one true;
PYARROW_WITH_PLASMA = zero_or_one (!stdenv.isDarwin);
PYARROW_WITH_S3 = zero_or_one _arrow-cpp.enableS3;
PYARROW_CMAKE_OPTIONS = [
"-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib"
@ -73,6 +75,11 @@ buildPythonPackage rec {
# enabled in nixpkgs.
# Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
"--deselect=pyarrow/tests/test_memory.py::test_env_var"
# these tests require access to s3 via the internet
"--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
"--deselect=pyarrow/tests/test_fs.py::test_s3_options"
] ++ lib.optionals stdenv.isDarwin [
# Requires loopback networking
"--deselect=pyarrow/tests/test_ipc.py::test_socket_"
@ -84,16 +91,17 @@ buildPythonPackage rec {
rm -r pyarrow/!(tests)
'';
pythonImportsCheck = map (module: "pyarrow.${module}") [
pythonImportsCheck = [ "pyarrow" ] ++ map (module: "pyarrow.${module}") ([
"compute"
"csv"
"dataset"
"feather"
"flight"
"fs"
"hdfs"
"json"
"parquet"
];
] ++ lib.optionals (!stdenv.isDarwin) [ "plasma" ]);
meta = with lib; {
description = "A cross-language development platform for in-memory data";