Merge pull request #187008 from pbsds/catboost-upgrade

This commit is contained in:
Sandro 2022-10-03 21:39:51 +02:00 committed by GitHub
commit 9897cfeb0f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 160 additions and 87 deletions

View file

@ -1,11 +1,11 @@
{ buildPythonPackage, fetchFromGitHub, fetchpatch, lib, pythonOlder
, clang_7, python2
{ buildPythonPackage, fetchFromGitHub, lib, pythonOlder
, clang_12, python2, python
, graphviz, matplotlib, numpy, pandas, plotly, scipy, six
, withCuda ? false, cudatoolkit }:
buildPythonPackage rec {
pname = "catboost";
version = "0.24.4";
version = "1.0.5";
disabled = pythonOlder "3.4";
@ -13,26 +13,26 @@ buildPythonPackage rec {
owner = "catboost";
repo = "catboost";
rev = "v${version}";
sha256 = "sha256-pzmwEiKziB4ldnKgeCsP2HdnisX8sOkLssAzNfcSEx8=";
sha256 = "ILemeZUBI9jPb9G6F7QX/T1HaVhQ+g6y7YmsT6DFCJk=";
};
nativeBuildInputs = [ clang_7 python2 ];
nativeBuildInputs = [ clang_12 python2 ];
propagatedBuildInputs = [ graphviz matplotlib numpy pandas scipy plotly six ]
++ lib.optional withCuda [ cudatoolkit ];
patches = [
./nix-support.patch
(fetchpatch {
name = "format.patch";
url = "https://github.com/catboost/catboost/pull/1528/commits/a692ba42e5c0f62e5da82b2f6fccfa77deb3419c.patch";
sha256 = "sha256-fNGucHxsSDFRLk3hFH7rm+zzTdDpY9/QjRs8K+AzVvo=";
})
];
postPatch = ''
# substituteInPlace is too slow for these large files, and the target has lots of numbers in it that change often.
sed -e 's|\$(YMAKE_PYTHON3-.*)/python3|${python.interpreter}|' -i make/*.makefile
'';
preBuild = ''
cd catboost/python-package
'';
'';
setupPyBuildFlags = [ "--with-ymake=no" ];
CUDA_ROOT = lib.optional withCuda cudatoolkit;
enableParallelBuilding = true;

View file

@ -1,5 +1,5 @@
diff --git a/catboost/python-package/setup.py b/catboost/python-package/setup.py
index 17f1d8ff14..07da618cd1 100644
index fe9251a21f..86b880c5d0 100644
--- a/catboost/python-package/setup.py
+++ b/catboost/python-package/setup.py
@@ -80,7 +80,7 @@ class Helper(object):
@ -15,23 +15,23 @@ index 17f1d8ff14..07da618cd1 100644
def build_with_make(self, topsrc_dir, build_dir, catboost_ext, put_dir, verbose, dry_run):
logging.info('Buildling {} with gnu make'.format(catboost_ext))
- makefile = 'python{}.{}CLANG50-LINUX-X86_64.makefile'.format(python_version()[0], 'CUDA.' if self.with_cuda else '')
+ makefile = 'python{}.{}CLANG7-LINUX-X86_64.makefile'.format(python_version()[0], 'CUDA.' if self.with_cuda else '')
- makefile = 'python{}.{}CLANG11-LINUX-X86_64.makefile'.format(python_version()[0], 'CUDA.' if self.with_cuda else '')
+ makefile = 'python{}.{}CLANG12-LINUX-X86_64.makefile'.format(python_version()[0], 'CUDA.' if self.with_cuda else '')
make_cmd = [
'make', '-f', '../../make/' + makefile,
- 'CC=clang-5.0',
- 'CXX=clang++-5.0',
- 'CC=clang-11',
- 'CXX=clang++-11',
+ 'CC=clang',
+ 'CXX=clang++',
+ 'PYTHON=python2',
+ 'PYTHON=python{}'.format(python_version()[0]),
'BUILD_ROOT=' + build_dir,
'SOURCE_ROOT=' + topsrc_dir,
]
diff --git a/make/python2.CLANG7-LINUX-X86_64.makefile b/make/python2.CLANG7-LINUX-X86_64.makefile
index e54b7078e8..fb7b208af9 100644
--- a/make/python2.CLANG7-LINUX-X86_64.makefile
+++ b/make/python2.CLANG7-LINUX-X86_64.makefile
@@ -4,33 +4,6 @@ BUILD_ROOT = $(shell pwd)
diff --git a/make/python2.CLANG12-LINUX-X86_64.makefile b/make/python2.CLANG12-LINUX-X86_64.makefile
index b49a36fb3f..33996af995 100644
--- a/make/python2.CLANG12-LINUX-X86_64.makefile
+++ b/make/python2.CLANG12-LINUX-X86_64.makefile
@@ -4,31 +4,6 @@ BUILD_ROOT = $(shell pwd)
SOURCE_ROOT = $(shell pwd)
PYTHON = $(shell which python)
@ -43,8 +43,8 @@ index e54b7078e8..fb7b208af9 100644
-_CC_VERSION = $(shell echo '$(_CC_TEST)' | $(CC) -E -P -)
-$(info _CC_VERSION = '$(_CC_VERSION)')
-
-ifneq '$(_CC_VERSION)' '7 0'
- $(error clang 7.0 is required)
-ifneq '$(_CC_VERSION)' '12 0'
- $(error clang 12.0 is required)
-endif
-endif
-
@ -56,20 +56,18 @@ index e54b7078e8..fb7b208af9 100644
-_CXX_VERSION = $(shell echo '$(_CXX_TEST)' | $(CXX) -E -P -)
-$(info _CXX_VERSION = '$(_CXX_VERSION)')
-
-ifneq '$(_CXX_VERSION)' '7 0'
- $(error clang 7.0 is required)
-ifneq '$(_CXX_VERSION)' '12 0'
- $(error clang 12.0 is required)
-endif
-endif
-
-
all\
::\
$(BUILD_ROOT)/catboost/python-package/catboost/_catboost.so\
diff --git a/make/python2.CUDA.CLANG7-LINUX-X86_64.makefile b/make/python2.CUDA.CLANG7-LINUX-X86_64.makefile
index 2a22a79b25..522fb54a7c 100644
--- a/make/python2.CUDA.CLANG7-LINUX-X86_64.makefile
+++ b/make/python2.CUDA.CLANG7-LINUX-X86_64.makefile
@@ -4,33 +4,6 @@ BUILD_ROOT = $(shell pwd)
diff --git a/make/python2.CUDA.CLANG12-LINUX-X86_64.makefile b/make/python2.CUDA.CLANG12-LINUX-X86_64.makefile
index 82935b297e..093cc86532 100644
--- a/make/python2.CUDA.CLANG12-LINUX-X86_64.makefile
+++ b/make/python2.CUDA.CLANG12-LINUX-X86_64.makefile
@@ -4,31 +4,6 @@ BUILD_ROOT = $(shell pwd)
SOURCE_ROOT = $(shell pwd)
PYTHON = $(shell which python)
@ -81,8 +79,8 @@ index 2a22a79b25..522fb54a7c 100644
-_CC_VERSION = $(shell echo '$(_CC_TEST)' | $(CC) -E -P -)
-$(info _CC_VERSION = '$(_CC_VERSION)')
-
-ifneq '$(_CC_VERSION)' '7 0'
- $(error clang 7.0 is required)
-ifneq '$(_CC_VERSION)' '12 0'
- $(error clang 12.0 is required)
-endif
-endif
-
@ -94,20 +92,18 @@ index 2a22a79b25..522fb54a7c 100644
-_CXX_VERSION = $(shell echo '$(_CXX_TEST)' | $(CXX) -E -P -)
-$(info _CXX_VERSION = '$(_CXX_VERSION)')
-
-ifneq '$(_CXX_VERSION)' '7 0'
- $(error clang 7.0 is required)
-ifneq '$(_CXX_VERSION)' '12 0'
- $(error clang 12.0 is required)
-endif
-endif
-
-
all\
::\
$(BUILD_ROOT)/catboost/python-package/catboost/_catboost.so\
diff --git a/make/python3.CLANG7-LINUX-X86_64.makefile b/make/python3.CLANG7-LINUX-X86_64.makefile
index fee6750bcb..dc55908371 100644
--- a/make/python3.CLANG7-LINUX-X86_64.makefile
+++ b/make/python3.CLANG7-LINUX-X86_64.makefile
@@ -4,33 +4,6 @@ BUILD_ROOT = $(shell pwd)
diff --git a/make/python3.CLANG12-LINUX-X86_64.makefile b/make/python3.CLANG12-LINUX-X86_64.makefile
index 1c5d646ae4..6c091fbe17 100644
--- a/make/python3.CLANG12-LINUX-X86_64.makefile
+++ b/make/python3.CLANG12-LINUX-X86_64.makefile
@@ -4,31 +4,6 @@ BUILD_ROOT = $(shell pwd)
SOURCE_ROOT = $(shell pwd)
PYTHON = $(shell which python)
@ -119,8 +115,8 @@ index fee6750bcb..dc55908371 100644
-_CC_VERSION = $(shell echo '$(_CC_TEST)' | $(CC) -E -P -)
-$(info _CC_VERSION = '$(_CC_VERSION)')
-
-ifneq '$(_CC_VERSION)' '7 0'
- $(error clang 7.0 is required)
-ifneq '$(_CC_VERSION)' '12 0'
- $(error clang 12.0 is required)
-endif
-endif
-
@ -132,20 +128,18 @@ index fee6750bcb..dc55908371 100644
-_CXX_VERSION = $(shell echo '$(_CXX_TEST)' | $(CXX) -E -P -)
-$(info _CXX_VERSION = '$(_CXX_VERSION)')
-
-ifneq '$(_CXX_VERSION)' '7 0'
- $(error clang 7.0 is required)
-ifneq '$(_CXX_VERSION)' '12 0'
- $(error clang 12.0 is required)
-endif
-endif
-
-
all\
::\
$(BUILD_ROOT)/catboost/python-package/catboost/_catboost.so\
diff --git a/make/python3.CUDA.CLANG7-LINUX-X86_64.makefile b/make/python3.CUDA.CLANG7-LINUX-X86_64.makefile
index 5146830476..ff8535b03e 100644
--- a/make/python3.CUDA.CLANG7-LINUX-X86_64.makefile
+++ b/make/python3.CUDA.CLANG7-LINUX-X86_64.makefile
@@ -4,33 +4,6 @@ BUILD_ROOT = $(shell pwd)
diff --git a/make/python3.CUDA.CLANG12-LINUX-X86_64.makefile b/make/python3.CUDA.CLANG12-LINUX-X86_64.makefile
index fcdb75a719..4e1dbc3cd7 100644
--- a/make/python3.CUDA.CLANG12-LINUX-X86_64.makefile
+++ b/make/python3.CUDA.CLANG12-LINUX-X86_64.makefile
@@ -4,31 +4,6 @@ BUILD_ROOT = $(shell pwd)
SOURCE_ROOT = $(shell pwd)
PYTHON = $(shell which python)
@ -157,8 +151,8 @@ index 5146830476..ff8535b03e 100644
-_CC_VERSION = $(shell echo '$(_CC_TEST)' | $(CC) -E -P -)
-$(info _CC_VERSION = '$(_CC_VERSION)')
-
-ifneq '$(_CC_VERSION)' '7 0'
- $(error clang 7.0 is required)
-ifneq '$(_CC_VERSION)' '12 0'
- $(error clang 12.0 is required)
-endif
-endif
-
@ -170,12 +164,10 @@ index 5146830476..ff8535b03e 100644
-_CXX_VERSION = $(shell echo '$(_CXX_TEST)' | $(CXX) -E -P -)
-$(info _CXX_VERSION = '$(_CXX_VERSION)')
-
-ifneq '$(_CXX_VERSION)' '7 0'
- $(error clang 7.0 is required)
-ifneq '$(_CXX_VERSION)' '12 0'
- $(error clang 12.0 is required)
-endif
-endif
-
-
all\
::\
$(BUILD_ROOT)/catboost/python-package/catboost/_catboost.so\

View file

@ -1,17 +1,27 @@
{ lib
, buildPythonPackage
, fetchFromGitHub
, writeText
, isPy27
, pytestCheckHook
, pytest-mpl
, numpy
, scipy
, scikit-learn
, pandas
, transformers
, opencv4
, lightgbm
, catboost
, pyspark
, sentencepiece
, tqdm
, slicer
, numba
, matplotlib
, nose
, lime
, cloudpickle
, ipython
}:
@ -35,29 +45,102 @@ buildPythonPackage rec {
tqdm
slicer
numba
cloudpickle
];
preCheck = ''
passthru.optional-dependencies = {
plots = [ matplotlib ipython ];
others = [ lime ];
};
preCheck = let
# This pytest hook mocks and catches attempts at accessing the network
# tests that try to access the network will raise, get caught, be marked as skipped and tagged as xfailed.
conftestSkipNetworkErrors = writeText "conftest.py" ''
from _pytest.runner import pytest_runtest_makereport as orig_pytest_runtest_makereport
import urllib, requests
class NetworkAccessDeniedError(RuntimeError): pass
def deny_network_access(*a, **kw):
raise NetworkAccessDeniedError
requests.head = deny_network_access
requests.get = deny_network_access
urllib.request.urlopen = deny_network_access
urllib.request.Request = deny_network_access
def pytest_runtest_makereport(item, call):
tr = orig_pytest_runtest_makereport(item, call)
if call.excinfo is not None and call.excinfo.type is NetworkAccessDeniedError:
tr.outcome = 'skipped'
tr.wasxfail = "reason: Requires network access."
return tr
'';
in ''
export HOME=$TMPDIR
# when importing the local copy the extension is not found
rm -r shap
# coverage testing is a waste considering how much we have to skip
substituteInPlace pytest.ini \
--replace "--cov=shap --cov-report=term-missing" ""
# Add pytest hook skipping tests that access network.
# These tests are marked as "Expected fail" (xfail)
cat ${conftestSkipNetworkErrors} >> tests/conftest.py
'';
checkInputs = [ pytestCheckHook matplotlib nose ipython ];
# Those tests access the network
checkInputs = [
pytestCheckHook
pytest-mpl
matplotlib
nose
ipython
# optional dependencies, which only serve to enable more tests:
opencv4
#pytorch # we already skip all its tests due to slowness, adding it does nothing
transformers
#xgboost # numerically unstable? xgboost tests randomly fails pending on nixpkgs revision
lightgbm
catboost
pyspark
sentencepiece
];
disabledTestPaths = [
# takes forever without GPU acceleration
"tests/explainers/test_deep.py"
"tests/explainers/test_gradient.py"
# requires GPU. We skip here instead of having pytest repeatedly check for GPU
"tests/explainers/test_gpu_tree.py"
# The resulting plots look sane, but does not match pixel-perfectly with the baseline.
# Likely due to a matplotlib version mismatch, different backend, or due to missing fonts.
"tests/plots/test_summary.py" # FIXME: enable
# 100% of the tests in these paths require network
"tests/explainers/test_explainer.py"
"tests/explainers/test_exact.py"
"tests/explainers/test_partition.py"
"tests/maskers/test_fixed_composite.py"
"tests/maskers/test_text.py"
"tests/models/test_teacher_forcing_logits.py"
"tests/models/test_text_generation.py"
];
disabledTests = [
"test_kernel_shap_with_a1a_sparse_zero_background"
"test_kernel_shap_with_a1a_sparse_nonzero_background"
"test_kernel_shap_with_high_dim_sparse"
"test_sklearn_random_forest_newsgroups"
"test_sum_match_random_forest"
"test_sum_match_extra_trees"
"test_single_row_random_forest"
"test_sum_match_gradient_boosting_classifier"
"test_single_row_gradient_boosting_classifier"
"test_HistGradientBoostingClassifier_proba"
"test_HistGradientBoostingClassifier_multidim"
"test_sum_match_gradient_boosting_regressor"
"test_single_row_gradient_boosting_regressor"
# unstable. A xgboost-enabled test. possibly related: https://github.com/slundberg/shap/issues/2480
"test_provided_background_tree_path_dependent"
];
#pytestFlagsArray = ["-x" "-W" "ignore"]; # uncomment this to debug
pythonImportCheck = [
"shap"
"shap.explainers"
"shap.explainers.other"
"shap.plots"
"shap.plots.colors"
"shap.benchmark"
"shap.maskers"
"shap.utils"
"shap.actions"
"shap.models"
];
meta = with lib; {
@ -66,7 +149,5 @@ buildPythonPackage rec {
license = licenses.mit;
maintainers = with maintainers; [ evax ];
platforms = platforms.unix;
# ModuleNotFoundError: No module named 'sklearn.ensemble.iforest'
broken = true;
};
}