From 60199f1d26630af16a0305057ebeba3362c594af Mon Sep 17 00:00:00 2001 From: illustris Date: Fri, 22 Sep 2023 22:40:27 +0530 Subject: [PATCH 1/7] spark: init 3.5.0, 3.4.0->3.4.1, 3.3.2->3.3.3 --- .../networking/cluster/spark/default.nix | 14 +++++++++----- pkgs/top-level/all-packages.nix | 4 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index 80142048c6fe..eed715e520b3 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -71,16 +71,20 @@ let }; in { + spark_3_5 = spark rec { + pname = "spark"; + version = "3.5.0"; + hash = "sha256-f+a4a23aOM0GCDoZlZ7WNXs0Olzyh3yMtO8ZmEoYvZ4="; + }; spark_3_4 = spark rec { pname = "spark"; - version = "3.4.0"; - hash = "sha256-0y80dRYzb6Ceu6MlGQHtpMdzOob/TBg6kf8dtF6KyCk="; + version = "3.4.1"; + hash = "sha256-4vC9oBCycVNy3hIxFII65j7FHlrxhDURU3NmsJZPDDU="; }; spark_3_3 = spark rec { pname = "spark"; - version = "3.3.2"; - hash = "sha256-AeKe2QN+mhUJgZRSIgbi/DttAWlDgwC1kl9p7syEvbo="; - extraMeta.knownVulnerabilities = [ "CVE-2023-22946" ]; + version = "3.3.3"; + hash = "sha256-YtHxRYTwrwSle3UpFjRSwKcnLFj2m9/zLBENH/HVzuM="; }; spark_3_2 = spark rec { pname = "spark"; diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index b7e1d2d32506..e79acec4e321 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -18111,8 +18111,8 @@ with pkgs; smiley-sans = callPackage ../data/fonts/smiley-sans { }; inherit (callPackages ../applications/networking/cluster/spark { }) - spark_3_4 spark_3_3 spark_3_2; - spark3 = spark_3_4; + spark_3_5 spark_3_4 spark_3_3 spark_3_2; + spark3 = spark_3_5; spark = spark3; sparkleshare = callPackage ../applications/version-management/sparkleshare { }; From fc7aceed429a24e3efc49b82691f5fac596f59e9 Mon Sep 17 00:00:00 2001 From: illustris Date: Sun, 22 Oct 2023 18:14:40 +0530 Subject: [PATCH 2/7] spark: use finalAttrs --- .../networking/cluster/spark/default.nix | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index eed715e520b3..366de1873557 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -14,20 +14,20 @@ let spark = { pname, version, hash, extraMeta ? {} }: - stdenv.mkDerivation rec { - inherit pname version; + stdenv.mkDerivation (finalAttrs: { + inherit pname version hash; jdk = if hadoopSupport then hadoop.jdk else jdk8; src = fetchzip { - url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; - inherit hash; + url = with finalAttrs; "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; + inherit (finalAttrs) hash; }; nativeBuildInputs = [ makeWrapper ]; - buildInputs = [ jdk python3Packages.python ] + buildInputs = [ finalAttrs.jdk python3Packages.python ] ++ extraPythonPackages ++ lib.optional RSupport R; - untarDir = "${pname}-${version}"; - installPhase = '' + untarDir = with finalAttrs; "${pname}-${version}"; + installPhase = with finalAttrs; '' mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java} mv * $out/lib/${untarDir} @@ -68,7 +68,7 @@ let platforms = lib.platforms.all; maintainers = with lib.maintainers; [ thoughtpolice offline kamilchm illustris ]; } // extraMeta; - }; + }); in { spark_3_5 = spark rec { From 392bc5422573ffce67a5ac924bc6494398150edb Mon Sep 17 00:00:00 2001 From: illustris Date: Sun, 22 Oct 2023 18:16:20 +0530 Subject: [PATCH 3/7] spark: add passthru test --- nixos/tests/spark/default.nix | 69 ++++++++++++------- .../networking/cluster/spark/default.nix | 5 ++ 2 files changed, 49 insertions(+), 25 deletions(-) diff --git a/nixos/tests/spark/default.nix b/nixos/tests/spark/default.nix index 462f0d23a403..63d6a5d44db8 100644 --- a/nixos/tests/spark/default.nix +++ b/nixos/tests/spark/default.nix @@ -1,28 +1,47 @@ -import ../make-test-python.nix ({...}: { - name = "spark"; +{ pkgs, ... }: - nodes = { - worker = { nodes, pkgs, ... }: { - services.spark.worker = { - enable = true; - master = "master:7077"; - }; - virtualisation.memorySize = 2048; - }; - master = { config, pkgs, ... }: { - services.spark.master = { - enable = true; - bind = "0.0.0.0"; - }; - networking.firewall.allowedTCPPorts = [ 22 7077 8080 ]; - }; +let + inherit (pkgs) lib; + tests = { + default = testsForPackage { sparkPackage = pkgs.spark; }; }; - testScript = '' - master.wait_for_unit("spark-master.service") - worker.wait_for_unit("spark-worker.service") - worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" ) - assert "Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/") - worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py") - ''; -}) + testsForPackage = args: lib.recurseIntoAttrs { + sparkCluster = testSparkCluster args; + passthru.override = args': testsForPackage (args // args'); + }; + testSparkCluster = { sparkPackage, ... }: pkgs.nixosTest ({ + name = "spark"; + + nodes = { + worker = { nodes, pkgs, ... }: { + services.spark = { + package = sparkPackage; + worker = { + enable = true; + master = "master:7077"; + }; + }; + virtualisation.memorySize = 2048; + }; + master = { config, pkgs, ... }: { + services.spark = { + package = sparkPackage; + master = { + enable = true; + bind = "0.0.0.0"; + }; + }; + networking.firewall.allowedTCPPorts = [ 22 7077 8080 ]; + }; + }; + + testScript = '' + master.wait_for_unit("spark-master.service") + worker.wait_for_unit("spark-worker.service") + worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" ) + assert "<title>Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/") + worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py") + ''; + }); +in tests diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index 366de1873557..a95db8d005eb 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -10,6 +10,7 @@ , hadoop , RSupport ? true , R +, nixosTests }: let @@ -60,6 +61,10 @@ let ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java ''; + passthru.tests = nixosTests.spark.default.passthru.override { + sparkPackage = finalAttrs.finalPackage; + }; + meta = { description = "Apache Spark is a fast and general engine for large-scale data processing"; homepage = "https://spark.apache.org/"; From ef9732e11f242a17b5b1a7e58397006a3c8f77d9 Mon Sep 17 00:00:00 2001 From: illustris <rharikrishnan95@gmail.com> Date: Sat, 4 Nov 2023 13:01:45 +0530 Subject: [PATCH 4/7] spark: remove untarDir --- .../services/cluster/spark/default.nix | 16 ++--- nixos/tests/spark/default.nix | 1 + .../networking/cluster/spark/default.nix | 69 ++++++++----------- 3 files changed, 38 insertions(+), 48 deletions(-) diff --git a/nixos/modules/services/cluster/spark/default.nix b/nixos/modules/services/cluster/spark/default.nix index bf39c5537332..985ff24d62c8 100644 --- a/nixos/modules/services/cluster/spark/default.nix +++ b/nixos/modules/services/cluster/spark/default.nix @@ -69,8 +69,8 @@ with lib; confDir = mkOption { type = types.path; description = lib.mdDoc "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory."; - default = "${cfg.package}/lib/${cfg.package.untarDir}/conf"; - defaultText = literalExpression ''"''${package}/lib/''${package.untarDir}/conf"''; + default = "${cfg.package}/conf"; + defaultText = literalExpression ''"''${package}/conf"''; }; logDir = mkOption { type = types.path; @@ -113,9 +113,9 @@ with lib; Type = "forking"; User = "spark"; Group = "spark"; - WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}"; - ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-master.sh"; - ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-master.sh"; + WorkingDirectory = "${cfg.package}/"; + ExecStart = "${cfg.package}/sbin/start-master.sh"; + ExecStop = "${cfg.package}/sbin/stop-master.sh"; TimeoutSec = 300; StartLimitBurst=10; Restart = "always"; @@ -136,9 +136,9 @@ with lib; serviceConfig = { Type = "forking"; User = "spark"; - WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}"; - ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-worker.sh spark://${cfg.worker.master}"; - ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-worker.sh"; + WorkingDirectory = "${cfg.package}/"; + ExecStart = "${cfg.package}/sbin/start-worker.sh spark://${cfg.worker.master}"; + ExecStop = "${cfg.package}/sbin/stop-worker.sh"; TimeoutSec = 300; StartLimitBurst=10; Restart = "always"; diff --git a/nixos/tests/spark/default.nix b/nixos/tests/spark/default.nix index 63d6a5d44db8..eed7db35bf4f 100644 --- a/nixos/tests/spark/default.nix +++ b/nixos/tests/spark/default.nix @@ -41,6 +41,7 @@ let worker.wait_for_unit("spark-worker.service") worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" ) assert "<title>Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/") + worker.succeed("spark-submit --version | systemd-cat") worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py") ''; }); diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index a95db8d005eb..0d5d2c1e4eb9 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -3,10 +3,9 @@ , fetchzip , makeWrapper , jdk8 -, python3Packages -, extraPythonPackages ? [ ] +, python3 +, python310 , coreutils -, hadoopSupport ? true , hadoop , RSupport ? true , R @@ -14,55 +13,43 @@ }: let - spark = { pname, version, hash, extraMeta ? {} }: + spark = { pname, version, hash, extraMeta ? {}, pysparkPython ? python3 }: stdenv.mkDerivation (finalAttrs: { - inherit pname version hash; - jdk = if hadoopSupport then hadoop.jdk else jdk8; + inherit pname version hash hadoop R pysparkPython; + inherit (finalAttrs.hadoop) jdk; src = fetchzip { url = with finalAttrs; "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; inherit (finalAttrs) hash; }; nativeBuildInputs = [ makeWrapper ]; - buildInputs = [ finalAttrs.jdk python3Packages.python ] - ++ extraPythonPackages + buildInputs = [ finalAttrs.jdk finalAttrs.pysparkPython ] ++ lib.optional RSupport R; - untarDir = with finalAttrs; "${pname}-${version}"; installPhase = with finalAttrs; '' - mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java} - mv * $out/lib/${untarDir} - - cp $out/lib/${untarDir}/conf/log4j.properties{.template,} || \ - cp $out/lib/${untarDir}/conf/log4j2.properties{.template,} - - cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF - export JAVA_HOME="${jdk}" - export SPARK_HOME="$out/lib/${untarDir}" - '' + lib.optionalString hadoopSupport '' - export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath) - '' + '' - export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}" - export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH" - ${lib.optionalString RSupport '' - export SPARKR_R_SHELL="${R}/bin/R" - export PATH="\$PATH:${R}/bin"''} - EOF - - for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do - makeWrapper "$n" "$out/bin/$(basename $n)" - substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname + mkdir -p "$out/opt" + mv * $out/ + for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do + wrapProgram "$n" --set JAVA_HOME "${jdk}" \ + --run "[ -z SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \ + ${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${R}/bin/R"''} \ + --prefix PATH : "${ + lib.makeBinPath ( + [ pysparkPython ] ++ + (lib.optionals RSupport [ R ]) + )}" done - for n in $(find $out/lib/${untarDir}/sbin -type f); do - # Spark deprecated scripts with "slave" in the name. - # This line adds forward compatibility with the nixos spark module for - # older versions of spark that don't have the new "worker" scripts. - ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true - done - ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java + ln -s ${finalAttrs.hadoop} "$out/opt/hadoop" + ${lib.optionalString RSupport ''ln -s ${finalAttrs.R} "$out/opt/R"''} ''; - passthru.tests = nixosTests.spark.default.passthru.override { - sparkPackage = finalAttrs.finalPackage; + passthru = { + tests = nixosTests.spark.default.passthru.override { + sparkPackage = finalAttrs.finalPackage; + }; + # Add python packages to PYSPARK_PYTHON + withPythonPackages = f: finalAttrs.finalPackage.overrideAttrs (old: { + pysparkPython = old.pysparkPython.withPackages f; + }); }; meta = { @@ -90,11 +77,13 @@ in pname = "spark"; version = "3.3.3"; hash = "sha256-YtHxRYTwrwSle3UpFjRSwKcnLFj2m9/zLBENH/HVzuM="; + pysparkPython = python310; }; spark_3_2 = spark rec { pname = "spark"; version = "3.2.4"; hash = "sha256-xL4W+dTWbvmmncq3/8iXmhp24rp5SftvoRfkTyxCI8E="; + pysparkPython = python310; extraMeta.knownVulnerabilities = [ "CVE-2023-22946" ]; }; } From a6ddad641da57aa4985777185324245c5bea11be Mon Sep 17 00:00:00 2001 From: illustris <rharikrishnan95@gmail.com> Date: Sat, 4 Nov 2023 13:14:10 +0530 Subject: [PATCH 5/7] spark: remove 3.2.4 --- pkgs/applications/networking/cluster/spark/default.nix | 9 +-------- pkgs/top-level/all-packages.nix | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index 0d5d2c1e4eb9..bd10b78c1f6a 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -30,7 +30,7 @@ let mv * $out/ for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do wrapProgram "$n" --set JAVA_HOME "${jdk}" \ - --run "[ -z SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \ + --run "[ -z $SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \ ${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${R}/bin/R"''} \ --prefix PATH : "${ lib.makeBinPath ( @@ -79,11 +79,4 @@ in hash = "sha256-YtHxRYTwrwSle3UpFjRSwKcnLFj2m9/zLBENH/HVzuM="; pysparkPython = python310; }; - spark_3_2 = spark rec { - pname = "spark"; - version = "3.2.4"; - hash = "sha256-xL4W+dTWbvmmncq3/8iXmhp24rp5SftvoRfkTyxCI8E="; - pysparkPython = python310; - extraMeta.knownVulnerabilities = [ "CVE-2023-22946" ]; - }; } diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index e79acec4e321..08f7ce098e55 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -18111,7 +18111,7 @@ with pkgs; smiley-sans = callPackage ../data/fonts/smiley-sans { }; inherit (callPackages ../applications/networking/cluster/spark { }) - spark_3_5 spark_3_4 spark_3_3 spark_3_2; + spark_3_5 spark_3_4 spark_3_3; spark3 = spark_3_5; spark = spark3; From fefde6c13541500dcd85c56f3900d757808a5d06 Mon Sep 17 00:00:00 2001 From: illustris <rharikrishnan95@gmail.com> Date: Sat, 4 Nov 2023 19:30:56 +0530 Subject: [PATCH 6/7] spark: remove with; to avoid ambiguity --- .../networking/cluster/spark/default.nix | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index bd10b78c1f6a..3d6c620cb938 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -22,20 +22,20 @@ let inherit (finalAttrs) hash; }; nativeBuildInputs = [ makeWrapper ]; - buildInputs = [ finalAttrs.jdk finalAttrs.pysparkPython ] - ++ lib.optional RSupport R; + buildInputs = with finalAttrs; [ jdk pysparkPython ] + ++ lib.optional RSupport finalAttrs.R; - installPhase = with finalAttrs; '' + installPhase = '' mkdir -p "$out/opt" mv * $out/ for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do - wrapProgram "$n" --set JAVA_HOME "${jdk}" \ + wrapProgram "$n" --set JAVA_HOME "${finalAttrs.jdk}" \ --run "[ -z $SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \ - ${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${R}/bin/R"''} \ + ${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${finalAttrs.R}/bin/R"''} \ --prefix PATH : "${ lib.makeBinPath ( - [ pysparkPython ] ++ - (lib.optionals RSupport [ R ]) + [ finalAttrs.pysparkPython ] ++ + (lib.optionals RSupport [ finalAttrs.R ]) )}" done ln -s ${finalAttrs.hadoop} "$out/opt/hadoop" From 7697f2ef7f17a22747e1b9e4dc16120a173ed483 Mon Sep 17 00:00:00 2001 From: illustris <rharikrishnan95@gmail.com> Date: Sun, 3 Dec 2023 23:48:19 +0530 Subject: [PATCH 7/7] spark: 3.4.1->3.4.2 --- pkgs/applications/networking/cluster/spark/default.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkgs/applications/networking/cluster/spark/default.nix b/pkgs/applications/networking/cluster/spark/default.nix index 3d6c620cb938..19c4b280a60c 100644 --- a/pkgs/applications/networking/cluster/spark/default.nix +++ b/pkgs/applications/networking/cluster/spark/default.nix @@ -70,8 +70,8 @@ in }; spark_3_4 = spark rec { pname = "spark"; - version = "3.4.1"; - hash = "sha256-4vC9oBCycVNy3hIxFII65j7FHlrxhDURU3NmsJZPDDU="; + version = "3.4.2"; + hash = "sha256-qr0tRuzzEcarJznrQYkaQzGqI7tugp/XJpoZxL7tJwk="; }; spark_3_3 = spark rec { pname = "spark";