diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix index a8f902fdb38f..98e651a5d95c 100644 --- a/pkgs/development/python-modules/tensorflow/default.nix +++ b/pkgs/development/python-modules/tensorflow/default.nix @@ -1,9 +1,42 @@ { stdenv, buildBazelPackage, lib, fetchFromGitHub, fetchpatch, symlinkJoin +, git , buildPythonPackage, isPy3k, pythonOlder, pythonAtLeast -, which, swig, binutils, glibcLocales +, which, binutils, glibcLocales , python, jemalloc, openmpi -, numpy, six, protobuf, tensorflow-tensorboard, backports_weakref, mock, enum34, absl-py -, cudaSupport ? false, nvidia_x11 ? null, cudatoolkit ? null, cudnn ? null +, numpy, tensorflow-tensorboard, backports_weakref, mock, enum34, absl-py +, future +, keras-preprocessing +, keras-applications +, astor +, gast +, google-pasta +, termcolor +, cython +, flatbuffers +, giflib +, libjpeg +, grpc +, grpcio +, hwloc +, icu +, jsoncpp +, lmdb +, nasm +, sqlite +, pcre +, libpng +, six +, snappy +, swig +, wrapt +, zlib +, protobuf +, protobuf_cc +, curl +, tensorflow-estimator +, setuptools +, wheel +, cudaSupport ? false, nvidia_x11 ? null, cudatoolkit ? null, cudnn ? null, nccl ? null # XLA without CUDA is broken , xlaSupport ? cudaSupport # Default from ./configure script @@ -21,7 +54,6 @@ assert cudaSupport -> nvidia_x11 != null assert ! (stdenv.isDarwin && cudaSupport); let - withTensorboard = pythonOlder "3.6"; cudatoolkit_joined = symlinkJoin { @@ -29,40 +61,147 @@ let paths = [ cudatoolkit.out cudatoolkit.lib ]; }; + bazel_cuda_toolkit = symlinkJoin { + name = "bazel-gcc-toolkit"; + paths = [ + cudatoolkit.cc + cudatoolkit.out + cudatoolkit.lib + binutils.bintools # for ar, dwp, nm, objcopy, objdump, strip + ]; + }; + tfFeature = x: if x then "1" else "0"; - version = "1.5.0"; + version = "1.14"; + variant = if cudaSupport then "_gpu" else ""; + pname = "tensorflow${variant}"; - pkg = buildBazelPackage rec { - name = "tensorflow-build-${version}"; + bazel-wheel = buildBazelPackage rec { + # indicate which configuration of the wheel is being built + name = let + python_tag = with lib.versions; "cp${major python.version}${minor python.version}"; # cp37 + arch_platform = lib.splitString "-" stdenv.hostPlatform.system; + platform = lib.last arch_platform; # "linux" + arch = lib.head arch_platform; # "x86_64" + in + # https://www.python.org/dev/peps/pep-0427/#file-name-convention + # tensorflow_gpu-1.14.0-cp27-none-linux_x86_64.whl + "${pname}-${version}-${python_tag}-none-${platform}_${arch}.whl"; src = fetchFromGitHub { owner = "tensorflow"; repo = "tensorflow"; - rev = "v${version}"; - sha256 = "1c4djsaip901nasm7a6dsimr02bsv70a7b1g0kysb4n39qpdh22q"; + rev = "r${version}"; + sha256 = "071rndqh3xy4y5idlykqv656lggp25vy4qx1divz4id3im1zlr13"; }; patches = [ - # Fix build with Bazel >= 0.10 + # Work around https://github.com/tensorflow/tensorflow/issues/24752 + ./no-saved-proto.patch + + # https://github.com/tensorflow/tensorflow/pull/29673 (fetchpatch { - url = "https://github.com/tensorflow/tensorflow/commit/6fcfab770c2672e2250e0f5686b9545d99eb7b2b.patch"; - sha256 = "0p61za1mx3a7gj1s5lsps16fcw18iwnvq2b46v1kyqfgq77a12vb"; + name = "fix-compile-with-cuda-and-mpi.patch"; + url = "https://github.com/tensorflow/tensorflow/pull/29673/commits/498e35a3bfe38dd75cf1416a1a23c07c3b59e6af.patch"; + sha256 = "1m2qmwv1ysqa61z6255xggwbq6mnxbig749bdvrhnch4zydxb4di"; }) + + # https://github.com/tensorflow/tensorflow/issues/29220 (fetchpatch { - url = "https://github.com/tensorflow/tensorflow/commit/3f57956725b553d196974c9ad31badeb3eabf8bb.patch"; - sha256 = "11dja5gqy0qw27sc9b6yw9r0lfk8dznb32vrqqfcnypk2qmv26va"; + name = "bazel-0.27.patch"; + url = "https://github.com/tensorflow/tensorflow/commit/cfccbdb8c4a92dd26382419dceb4d934c2380391.patch"; + sha256 = "1l56wjia2c4685flsfkkgy471wx3c66wyv8khspv06zchj0k0liw"; }) ]; + # On update, it can be useful to steal the changes from gentoo + # https://gitweb.gentoo.org/repo/gentoo.git/tree/sci-libs/tensorflow + nativeBuildInputs = [ swig which ]; - buildInputs = [ python jemalloc openmpi glibcLocales numpy ] - ++ lib.optionals cudaSupport [ cudatoolkit cudnn nvidia_x11 ]; + buildInputs = [ + python + jemalloc + openmpi + glibcLocales + git + + # python deps needed during wheel build time + numpy + keras-preprocessing + + # libs taken from system through the TF_SYS_LIBS mechanism + absl-py + + # for building the wheel + setuptools + wheel + ] ++ lib.optionals (!isPy3k) [ + future + mock + ] ++ lib.optionals cudaSupport [ + cudatoolkit + cudnn + nvidia_x11 + ]; + + # Take as many libraries from the system as possible. Keep in sync with + # list of valid syslibs in + # https://github.com/perfinion/tensorflow/blob/master/third_party/systemlibs/syslibs_configure.bzl + SYSLIBS= [ + "absl_py" + "astor_archive" + "boringssl" + "com_github_googleapis_googleapis" + "com_github_googlecloudplatform_google_cloud_cpp" + "com_google_protobuf" + "com_google_protobuf_cc" + "com_googlesource_code_re2" + "curl" + "cython" + "double_conversion" + "enum34_archive" + "flatbuffers" + "gast_archive" + "gif_archive" + "grpc" + "hwloc" + "icu" + "jpeg" + "jsoncpp_git" + "keras_applications_archive" + "lmdb" + "nasm" + # "nsync" # not packaged in nixpkgs + "sqlite" + "pasta" + "pcre" + "png_archive" + "protobuf_archive" + "six_archive" + "snappy" + "swig" + "termcolor_archive" + "wrapt" + "zlib_archive" + ]; preConfigure = '' patchShebangs configure + # dummy ldconfig + mkdir dummy-ldconfig + echo "#!${stdenv.shell}" > dummy-ldconfig/ldconfig + chmod +x dummy-ldconfig/ldconfig + export PATH="$PWD/dummy-ldconfig:$PATH" + + # arbitrarily set to the current latest bazel version, overly careful + export TF_IGNORE_MAX_BAZEL_VERSION=1 + + # don't rebuild the world + export TF_SYSTEM_LIBS=${lib.concatStringsSep " " SYSLIBS} + export PYTHON_BIN_PATH="${python.interpreter}" export PYTHON_LIB_PATH="$NIX_BUILD_TOP/site-packages" export TF_NEED_GCP=1 @@ -73,26 +212,43 @@ let export TF_NEED_MPI=${tfFeature cudaSupport} export TF_NEED_CUDA=${tfFeature cudaSupport} ${lib.optionalString cudaSupport '' - export CUDA_TOOLKIT_PATH=${cudatoolkit_joined} + export TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}" export TF_CUDA_VERSION=${cudatoolkit.majorVersion} - export CUDNN_INSTALL_PATH=${cudnn} export TF_CUDNN_VERSION=${cudnn.majorVersion} - export GCC_HOST_COMPILER_PATH=${cudatoolkit.cc}/bin/gcc + export GCC_HOST_COMPILER_PATH=${bazel_cuda_toolkit}/bin/gcc + export GCC_HOST_COMPILER_PREFIX="${bazel_cuda_toolkit}/bin" export TF_CUDA_COMPUTE_CAPABILITIES=${lib.concatStringsSep "," cudaCapabilities} ''} + # https://github.com/tensorflow/tensorflow/issues/20919 + sed -i '/androidndk/d' tensorflow/lite/kernels/internal/BUILD + mkdir -p "$PYTHON_LIB_PATH" ''; + # ar, cpp, gcov, strip, gcc, compat-ld, ld, objdump, dwp, nm, objcopy + configurePhase = '' + runHook preConfigure + # no flags (options provided by previously set environment variables) + export AR="${binutils.bintools}/bin/ar" + export BAZEL_TOOLS_OVERRIDE="ar=${binutils.bintools}/bin/ar" + # crosstool used with cuda + ./configure + + runHook postConfigure + ''; + + # FIXME NIX_LDFLAGS = lib.optionals cudaSupport [ "-lcublas" "-lcudnn" "-lcuda" "-lcudart" ]; hardeningDisable = [ "all" ]; - bazelFlags = [ "--config=opt" ] - ++ lib.optional sse42Support "--copt=-msse4.2" - ++ lib.optional avx2Support "--copt=-mavx2" - ++ lib.optional fmaSupport "--copt=-mfma" - ++ lib.optional cudaSupport "--config=cuda"; + bazelFlags = [ + # temporary fixes to make the build work with bazel 0.27 + "--incompatible_no_support_tools_in_action_inputs=false" + ] ++ lib.optional sse42Support "--copt=-msse4.2" + ++ lib.optional avx2Support "--copt=-mavx2" + ++ lib.optional fmaSupport "--copt=-mfma"; bazelTarget = "//tensorflow/tools/pip_package:build_pip_package"; @@ -101,44 +257,94 @@ let rm -rf $bazelOut/external/{bazel_tools,\@bazel_tools.marker,local_*,\@local_*} ''; - sha256 = "1nc98aqrp14q7llypcwaa0kdn9xi7r0p1mnd3vmmn1m299py33ca"; + # cudaSupport causes fetch of ncclArchive, resulting in different hashes + sha256 = if cudaSupport then + "0q1pmw7fzn6l554ap576r48m0zgwb7n1ljhyy1p36708z94scdh4" + else + "14n26h2r4w7wd5sddy4w0s51s2qcwf276n3hvv2505iysa8wqlc3"; }; buildAttrs = { preBuild = '' patchShebangs . - find -type f -name CROSSTOOL\* -exec sed -i \ - -e 's,/usr/bin/ar,${binutils.bintools}/bin/ar,g' \ - {} \; + + # beautiful bash to iterate over files containing a string + # https://github.com/bazelbuild/bazel/issues/5915#issuecomment-505100422 + # .. to make sure the output directory is covered + grep -lrZ '/usr/bin/ar\b' .. | while IFS="" read -r -d "" file; do + # patch /usr/bin/ar to the proper location + echo "File is $file" + sed -i \ + -e 's,/usr/bin/ar\b,${binutils.bintools}/bin/ar,g' \ + "$file" + done + + + # Tensorboard pulls in a bunch of dependencies, some of which may + # include security vulnerabilities. So we make it optional. + # https://github.com/tensorflow/tensorflow/issues/20280#issuecomment-400230560 + sed -i '/tensorboard >=/d' tensorflow/tools/pip_package/setup.py ''; + # Could alternatively use --src instead of --dst to output the sources + # instead of a whee. Generating a wheel makes it easier to unify handling + # of source and binary build though. installPhase = '' - sed -i 's,.*bdist_wheel.*,cp -rL . "$out"; exit 0,' bazel-bin/tensorflow/tools/pip_package/build_pip_package - bazel-bin/tensorflow/tools/pip_package/build_pip_package $PWD/dist + # work around timestamp issues + # https://github.com/NixOS/nixpkgs/issues/270#issuecomment-467583872 + export SOURCE_DATE_EPOCH=315532800 + + # bulid the wheel, then move it to $out (building directly to $out + # would actually put it into a *directory* called $out, but we want the + # file itself in $out) + bazel-bin/tensorflow/tools/pip_package/build_pip_package --dst $PWD/dist + mv dist/*.whl "$out" ''; }; - - dontFixup = true; }; in buildPythonPackage rec { - pname = "tensorflow"; - inherit version; + inherit version pname; - src = pkg; + src = bazel-wheel; - installFlags = lib.optional (!withTensorboard) "--no-dependencies"; + format = "wheel"; - postPatch = lib.optionalString (pythonAtLeast "3.4") '' - sed -i '/enum34/d' setup.py + # Upstream has a pip hack that results in bin/tensorboard being in both tensorflow + # and the propagated input tensorflow-tensorboard, which causes environment collisions. + # Another possibility would be to have tensorboard only in the buildInputs + # https://github.com/tensorflow/tensorflow/blob/v1.7.1/tensorflow/tools/pip_package/setup.py#L79 + postInstall = '' + rm $out/bin/tensorboard ''; - propagatedBuildInputs = [ numpy six protobuf absl-py ] - ++ lib.optional (!isPy3k) mock - ++ lib.optionals (pythonOlder "3.4") [ backports_weakref enum34 ] - ++ lib.optional withTensorboard tensorflow-tensorboard; + # tensorflow/tools/pip_package/setup.py + propagatedBuildInputs = [ + absl-py + astor + gast + google-pasta + keras-applications + keras-preprocessing + numpy + six + protobuf + tensorflow-estimator + termcolor + wrapt + grpcio + ] ++ lib.optionals (!isPy3k) [ + mock + future # FIXME + ] ++ lib.optionals (pythonOlder "3.4") [ + backports_weakref enum34 + ] ++ lib.optionals withTensorboard [ + tensorflow-tensorboard + ]; # Actual tests are slow and impure. + # TODO try to run them anyway + # TODO better test (files in tensorflow/tools/ci_build/builds/*test) checkPhase = '' ${python.interpreter} -c "import tensorflow" ''; diff --git a/pkgs/development/python-modules/tensorflow/no-saved-proto.patch b/pkgs/development/python-modules/tensorflow/no-saved-proto.patch new file mode 100644 index 000000000000..ead112f8ddd6 --- /dev/null +++ b/pkgs/development/python-modules/tensorflow/no-saved-proto.patch @@ -0,0 +1,14 @@ +diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD +index 8626ed0087..27deb34387 100644 +--- a/tensorflow/cc/saved_model/BUILD ++++ b/tensorflow/cc/saved_model/BUILD +@@ -49,9 +49,6 @@ cc_library( + # tf_lib depending on the build platform. + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", +- ]) + if_mobile([ +- # Mobile-friendly SavedModel proto. See go/portable-proto for more info. +- "//tensorflow/core:saved_model_portable_proto", + ]) + if_android([ + "//tensorflow/core:android_tensorflow_lib", + ]) + if_ios([ diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index be6a291b7c70..c28779bf4433 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -5703,11 +5703,13 @@ in { tensorflow = if stdenv.isDarwin then callPackage ../development/python-modules/tensorflow/bin.nix { } - else callPackage ../development/python-modules/tensorflow/bin.nix rec { + else callPackage ../development/python-modules/tensorflow rec { cudaSupport = pkgs.config.cudaSupport or false; inherit (pkgs.linuxPackages) nvidia_x11; - cudatoolkit = pkgs.cudatoolkit_10_0; - cudnn = pkgs.cudnn_cudatoolkit_10_0; + cudatoolkit = pkgs.cudatoolkit_10; + cudnn = pkgs.cudnn_cudatoolkit_10; + nccl = pkgs.nccl_cudatoolkit_10; + protobuf_cc = pkgs.protobuf; # not the python version }; tensorflowWithoutCuda = self.tensorflow.override {