Merge pull request #171874 from cpcloud/arrow-cpp-8.0
This commit is contained in:
commit
f76fa41ae6
5 changed files with 68 additions and 16 deletions
|
@ -19,6 +19,7 @@
|
|||
, grpc
|
||||
, gtest
|
||||
, jemalloc
|
||||
, libbacktrace
|
||||
, lz4
|
||||
, minio
|
||||
, ninja
|
||||
|
@ -69,21 +70,20 @@ let
|
|||
in
|
||||
stdenv.mkDerivation rec {
|
||||
pname = "arrow-cpp";
|
||||
version = "7.0.0";
|
||||
version = "8.0.0";
|
||||
|
||||
src = fetchurl {
|
||||
url =
|
||||
"mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
|
||||
hash = "sha256-6PSbFJoV7O9OQPz6sbh8ETxrHuGGAFwWnlzfldMamd4=";
|
||||
url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
|
||||
hash = "sha256-rZoFcFEXyYnBFrrprHBJL+AVBQ4bgPsOOP3ktdhjqqM=";
|
||||
};
|
||||
sourceRoot = "apache-arrow-${version}/cpp";
|
||||
|
||||
${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = jemalloc.src;
|
||||
|
||||
# versions are all taken from
|
||||
# https://github.com/apache/arrow/blob/apache-arrow-8.0.0/cpp/thirdparty/versions.txt
|
||||
|
||||
ARROW_MIMALLOC_URL = fetchFromGitHub {
|
||||
# From
|
||||
# ./cpp/cmake_modules/ThirdpartyToolchain.cmake
|
||||
# ./cpp/thirdparty/versions.txt
|
||||
owner = "microsoft";
|
||||
repo = "mimalloc";
|
||||
rev = "v1.7.3";
|
||||
|
@ -93,8 +93,15 @@ stdenv.mkDerivation rec {
|
|||
ARROW_XSIMD_URL = fetchFromGitHub {
|
||||
owner = "xtensor-stack";
|
||||
repo = "xsimd";
|
||||
rev = "aeec9c872c8b475dedd7781336710f2dd2666cb2";
|
||||
hash = "sha256-vWKdJkieKhaxyAJhijXUmD7NmNvMWd79PskQojulA1w=";
|
||||
rev = "7d1778c3b38d63db7cec7145d939f40bc5d859d1";
|
||||
hash = "sha256-89AysBUVnTdWyMPazeJegnQ6WEH90Ns7qQInZLMSXY4=";
|
||||
};
|
||||
|
||||
ARROW_SUBSTRAIT_URL = fetchFromGitHub {
|
||||
owner = "substrait-io";
|
||||
repo = "substrait";
|
||||
rev = "e1b4c04a1b518912f4c4065b16a1b2c0ac8e14cf";
|
||||
hash = "sha256-56FSjDngsROSHLjMv+OYAIYqphEu3GzgIMHbgh/ZQw0=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
|
@ -115,7 +122,10 @@ stdenv.mkDerivation rec {
|
|||
gflags
|
||||
glog
|
||||
gtest
|
||||
libbacktrace
|
||||
lz4
|
||||
nlohmann_json # alternative JSON parser to rapidjson
|
||||
protobuf # substrait requires protobuf
|
||||
rapidjson
|
||||
re2
|
||||
snappy
|
||||
|
@ -150,6 +160,9 @@ stdenv.mkDerivation rec {
|
|||
"-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
|
||||
"-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
|
||||
"-DARROW_BUILD_TESTS=ON"
|
||||
"-DARROW_BUILD_INTEGRATION=ON"
|
||||
"-DARROW_BUILD_UTILITIES=ON"
|
||||
"-DARROW_EXTRA_ERROR_CONTEXT=ON"
|
||||
"-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
|
||||
"-DARROW_DEPENDENCY_SOURCE=SYSTEM"
|
||||
"-DThrift_SOURCE=AUTO" # search for Thrift using pkg-config (ThriftConfig.cmake requires OpenSSL and libevent)
|
||||
|
@ -168,8 +181,10 @@ stdenv.mkDerivation rec {
|
|||
# Disable Python for static mode because openblas is currently broken there.
|
||||
"-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
|
||||
"-DARROW_USE_GLOG=ON"
|
||||
"-DARROW_WITH_BACKTRACE=ON"
|
||||
"-DARROW_WITH_BROTLI=ON"
|
||||
"-DARROW_WITH_LZ4=ON"
|
||||
"-DARROW_WITH_NLOHMANN_JSON=ON"
|
||||
"-DARROW_WITH_SNAPPY=ON"
|
||||
"-DARROW_WITH_UTF8PROC=ON"
|
||||
"-DARROW_WITH_ZLIB=ON"
|
||||
|
@ -177,8 +192,10 @@ stdenv.mkDerivation rec {
|
|||
"-DARROW_MIMALLOC=ON"
|
||||
# Parquet options:
|
||||
"-DARROW_PARQUET=ON"
|
||||
"-DARROW_SUBSTRAIT=ON"
|
||||
"-DPARQUET_BUILD_EXECUTABLES=ON"
|
||||
"-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
|
||||
"-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
|
||||
"-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
|
||||
"-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
|
||||
] ++ lib.optionals (!enableShared) [
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
, dill
|
||||
, fastavro
|
||||
, fetchFromGitHub
|
||||
, fetchpatch
|
||||
, freezegun
|
||||
, grpcio
|
||||
, grpcio-tools
|
||||
|
@ -51,6 +52,15 @@ buildPythonPackage rec {
|
|||
sha256 = "sha256-FmfTxRLqXUHhhAZIxCRx2+phX0bmU5rIHaftBU4yBJY=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
# patch in the pyarrow.Table.to_batches(max_chunksize=...) argument fix
|
||||
(fetchpatch {
|
||||
url = "https://github.com/apache/beam/commit/2418a14ee99ff490d1c82944043f97f37ec97a85.patch";
|
||||
sha256 = "sha256-G8ARBBf7nmF46P2ncnlteGFnPWq5iCqZDfuaosre9jY=";
|
||||
stripLen = 2;
|
||||
})
|
||||
];
|
||||
|
||||
# See https://github.com/NixOS/nixpkgs/issues/156957.
|
||||
postPatch = ''
|
||||
substituteInPlace setup.py \
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
{ lib
|
||||
, buildPythonPackage
|
||||
, fetchPypi
|
||||
, fetchpatch
|
||||
, fetchFromGitHub
|
||||
, numpy
|
||||
, packaging
|
||||
, pandas
|
||||
|
@ -12,11 +13,20 @@ buildPythonPackage rec {
|
|||
pname = "db-dtypes";
|
||||
version = "1.0.0";
|
||||
|
||||
src = fetchPypi {
|
||||
inherit pname version;
|
||||
sha256 = "3070d1a8d86ff0b5d9b16f15c5fab9c18893c6b3d5723cd95ee397b169049454";
|
||||
src = fetchFromGitHub {
|
||||
owner = "googleapis";
|
||||
repo = "python-db-dtypes-pandas";
|
||||
rev = "v${version}";
|
||||
hash = "sha256-7u/E0ICiz7LQfuplm/mkGlWrgGEPqeMwM3CUhfH6868=";
|
||||
};
|
||||
|
||||
patches = [
|
||||
(fetchpatch {
|
||||
url = "https://github.com/googleapis/python-db-dtypes-pandas/commit/fb30adfd427d3df9919df00b096210ba1eb1b91d.patch";
|
||||
sha256 = "sha256-39kZtYGbn3U1WXiDTczki5EM6SjUlSRXz8UMcdTU20g=";
|
||||
})
|
||||
];
|
||||
|
||||
propagatedBuildInputs = [
|
||||
numpy
|
||||
packaging
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
, proto-plus
|
||||
, psutil
|
||||
, pyarrow
|
||||
, pytest-xdist
|
||||
}:
|
||||
|
||||
buildPythonPackage rec {
|
||||
|
@ -28,6 +29,11 @@ buildPythonPackage rec {
|
|||
sha256 = "sha256-UmW6BEV44Ucdg/hUGSQk/kyDnB+Hsyx4q3AXTQe89hI=";
|
||||
};
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace setup.py \
|
||||
--replace 'pyarrow >= 3.0.0, < 8.0dev' 'pyarrow >= 3.0.0, < 9.0dev'
|
||||
'';
|
||||
|
||||
propagatedBuildInputs = [
|
||||
google-cloud-core
|
||||
google-cloud-bigquery-storage
|
||||
|
@ -47,6 +53,7 @@ buildPythonPackage rec {
|
|||
google-cloud-datacatalog
|
||||
google-cloud-storage
|
||||
pytestCheckHook
|
||||
pytest-xdist
|
||||
];
|
||||
|
||||
# prevent google directory from shadowing google imports
|
||||
|
|
|
@ -47,8 +47,10 @@ buildPythonPackage rec {
|
|||
|
||||
PYARROW_WITH_DATASET = zero_or_one true;
|
||||
PYARROW_WITH_FLIGHT = zero_or_one _arrow-cpp.enableFlight;
|
||||
PYARROW_WITH_PARQUET = zero_or_one true;
|
||||
PYARROW_WITH_HDFS = zero_or_one true;
|
||||
PYARROW_WITH_PARQUET = zero_or_one true;
|
||||
PYARROW_WITH_PLASMA = zero_or_one (!stdenv.isDarwin);
|
||||
PYARROW_WITH_S3 = zero_or_one _arrow-cpp.enableS3;
|
||||
|
||||
PYARROW_CMAKE_OPTIONS = [
|
||||
"-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib"
|
||||
|
@ -73,6 +75,11 @@ buildPythonPackage rec {
|
|||
# enabled in nixpkgs.
|
||||
# Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
|
||||
"--deselect=pyarrow/tests/test_memory.py::test_env_var"
|
||||
# these tests require access to s3 via the internet
|
||||
"--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
|
||||
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
|
||||
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
|
||||
"--deselect=pyarrow/tests/test_fs.py::test_s3_options"
|
||||
] ++ lib.optionals stdenv.isDarwin [
|
||||
# Requires loopback networking
|
||||
"--deselect=pyarrow/tests/test_ipc.py::test_socket_"
|
||||
|
@ -84,16 +91,17 @@ buildPythonPackage rec {
|
|||
rm -r pyarrow/!(tests)
|
||||
'';
|
||||
|
||||
pythonImportsCheck = map (module: "pyarrow.${module}") [
|
||||
pythonImportsCheck = [ "pyarrow" ] ++ map (module: "pyarrow.${module}") ([
|
||||
"compute"
|
||||
"csv"
|
||||
"dataset"
|
||||
"feather"
|
||||
"flight"
|
||||
"fs"
|
||||
"hdfs"
|
||||
"json"
|
||||
"parquet"
|
||||
];
|
||||
] ++ lib.optionals (!stdenv.isDarwin) [ "plasma" ]);
|
||||
|
||||
meta = with lib; {
|
||||
description = "A cross-language development platform for in-memory data";
|
||||
|
|
Loading…
Reference in a new issue