Merge pull request #109505 from danieldk/transformers-4.2.1
python3Packages.transformers: 4.1.1 -> 4.2.2
This commit is contained in:
commit
5f92e694d9
1 changed files with 9 additions and 60 deletions
|
@ -1,32 +1,28 @@
|
|||
{ buildPythonPackage
|
||||
, lib, stdenv
|
||||
, fetchFromGitHub
|
||||
, isPy39
|
||||
, pythonOlder
|
||||
, cookiecutter
|
||||
, filelock
|
||||
, importlib-metadata
|
||||
, regex
|
||||
, requests
|
||||
, numpy
|
||||
, pandas
|
||||
, parameterized
|
||||
, protobuf
|
||||
, sacremoses
|
||||
, timeout-decorator
|
||||
, tokenizers
|
||||
, tqdm
|
||||
, pytestCheckHook
|
||||
}:
|
||||
|
||||
buildPythonPackage rec {
|
||||
pname = "transformers";
|
||||
version = "4.1.1";
|
||||
disabled = isPy39;
|
||||
version = "4.2.2";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "huggingface";
|
||||
repo = pname;
|
||||
rev = "v${version}";
|
||||
sha256 = "1l1gxdsakjmzsgggypq45pnwm87brhlccjfzafs43460pz0wbd6k";
|
||||
hash = "sha256-sBMCzEgYX6HQbzoEIYnmMdpYecCCsQjTdl2mO1Veu9M=";
|
||||
};
|
||||
|
||||
propagatedBuildInputs = [
|
||||
|
@ -39,63 +35,16 @@ buildPythonPackage rec {
|
|||
sacremoses
|
||||
tokenizers
|
||||
tqdm
|
||||
];
|
||||
] ++ stdenv.lib.optionals (pythonOlder "3.8") [ importlib-metadata ];
|
||||
|
||||
checkInputs = [
|
||||
pandas
|
||||
parameterized
|
||||
pytestCheckHook
|
||||
timeout-decorator
|
||||
];
|
||||
# Many tests require internet access.
|
||||
doCheck = false;
|
||||
|
||||
postPatch = ''
|
||||
substituteInPlace setup.py \
|
||||
--replace "tokenizers == 0.9.4" "tokenizers"
|
||||
sed -ri 's/tokenizers==[0-9.]+/tokenizers/g' setup.py
|
||||
'';
|
||||
|
||||
preCheck = ''
|
||||
export HOME="$TMPDIR"
|
||||
|
||||
# This test requires the `datasets` module to download test
|
||||
# data. However, since we cannot download in the Nix sandbox
|
||||
# and `dataset` is an optional dependency for transformers
|
||||
# itself, we will just remove the tests files that import
|
||||
# `dataset`.
|
||||
rm tests/test_retrieval_rag.py
|
||||
rm tests/test_trainer.py
|
||||
'';
|
||||
|
||||
# We have to run from the main directory for the tests. However,
|
||||
# letting pytest discover tests leads to errors.
|
||||
pytestFlagsArray = [ "tests" ];
|
||||
|
||||
# Disable tests that require network access.
|
||||
disabledTests = [
|
||||
"BlenderbotSmallTokenizerTest"
|
||||
"Blenderbot3BTokenizerTests"
|
||||
"GetFromCacheTests"
|
||||
"TokenizationTest"
|
||||
"TestTokenizationBart"
|
||||
"test_all_tokenizers"
|
||||
"test_batch_encoding_is_fast"
|
||||
"test_batch_encoding_pickle"
|
||||
"test_batch_encoding_word_to_tokens"
|
||||
"test_config_from_model_shortcut"
|
||||
"test_config_model_type_from_model_identifier"
|
||||
"test_from_pretrained_use_fast_toggle"
|
||||
"test_hf_api"
|
||||
"test_outputs_can_be_shorter"
|
||||
"test_outputs_not_longer_than_maxlen"
|
||||
"test_padding_accepts_tensors"
|
||||
"test_pretokenized_tokenizers"
|
||||
"test_tokenizer_equivalence_en_de"
|
||||
"test_tokenizer_from_model_type"
|
||||
"test_tokenizer_from_model_type"
|
||||
"test_tokenizer_from_pretrained"
|
||||
"test_tokenizer_from_tokenizer_class"
|
||||
"test_tokenizer_identifier_with_correct_config"
|
||||
"test_tokenizer_identifier_non_existent"
|
||||
];
|
||||
pythonImportsCheck = [ "transformers" ];
|
||||
|
||||
meta = with lib; {
|
||||
homepage = "https://github.com/huggingface/transformers";
|
||||
|
|
Loading…
Reference in a new issue