Merge pull request #256737 from illustris/spark

spark: init 3.5.0, 3.4.0->3.4.2, 3.3.2->3.3.3
This commit is contained in:
Peder Bergebakken Sundt 2023-12-07 04:24:16 +01:00 committed by GitHub
commit d41449afb3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 100 additions and 89 deletions

View file

@ -69,8 +69,8 @@ with lib;
confDir = mkOption { confDir = mkOption {
type = types.path; type = types.path;
description = lib.mdDoc "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory."; description = lib.mdDoc "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory.";
default = "${cfg.package}/lib/${cfg.package.untarDir}/conf"; default = "${cfg.package}/conf";
defaultText = literalExpression ''"''${package}/lib/''${package.untarDir}/conf"''; defaultText = literalExpression ''"''${package}/conf"'';
}; };
logDir = mkOption { logDir = mkOption {
type = types.path; type = types.path;
@ -111,9 +111,9 @@ with lib;
Type = "forking"; Type = "forking";
User = "spark"; User = "spark";
Group = "spark"; Group = "spark";
WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}"; WorkingDirectory = "${cfg.package}/";
ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-master.sh"; ExecStart = "${cfg.package}/sbin/start-master.sh";
ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-master.sh"; ExecStop = "${cfg.package}/sbin/stop-master.sh";
TimeoutSec = 300; TimeoutSec = 300;
StartLimitBurst=10; StartLimitBurst=10;
Restart = "always"; Restart = "always";
@ -134,9 +134,9 @@ with lib;
serviceConfig = { serviceConfig = {
Type = "forking"; Type = "forking";
User = "spark"; User = "spark";
WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}"; WorkingDirectory = "${cfg.package}/";
ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-worker.sh spark://${cfg.worker.master}"; ExecStart = "${cfg.package}/sbin/start-worker.sh spark://${cfg.worker.master}";
ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-worker.sh"; ExecStop = "${cfg.package}/sbin/stop-worker.sh";
TimeoutSec = 300; TimeoutSec = 300;
StartLimitBurst=10; StartLimitBurst=10;
Restart = "always"; Restart = "always";

View file

@ -1,28 +1,48 @@
import ../make-test-python.nix ({...}: { { pkgs, ... }:
name = "spark";
nodes = { let
worker = { nodes, pkgs, ... }: { inherit (pkgs) lib;
services.spark.worker = { tests = {
enable = true; default = testsForPackage { sparkPackage = pkgs.spark; };
master = "master:7077";
};
virtualisation.memorySize = 2048;
};
master = { config, pkgs, ... }: {
services.spark.master = {
enable = true;
bind = "0.0.0.0";
};
networking.firewall.allowedTCPPorts = [ 22 7077 8080 ];
};
}; };
testScript = '' testsForPackage = args: lib.recurseIntoAttrs {
master.wait_for_unit("spark-master.service") sparkCluster = testSparkCluster args;
worker.wait_for_unit("spark-worker.service") passthru.override = args': testsForPackage (args // args');
worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" ) };
assert "<title>Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/") testSparkCluster = { sparkPackage, ... }: pkgs.nixosTest ({
worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py") name = "spark";
'';
}) nodes = {
worker = { nodes, pkgs, ... }: {
services.spark = {
package = sparkPackage;
worker = {
enable = true;
master = "master:7077";
};
};
virtualisation.memorySize = 2048;
};
master = { config, pkgs, ... }: {
services.spark = {
package = sparkPackage;
master = {
enable = true;
bind = "0.0.0.0";
};
};
networking.firewall.allowedTCPPorts = [ 22 7077 8080 ];
};
};
testScript = ''
master.wait_for_unit("spark-master.service")
worker.wait_for_unit("spark-worker.service")
worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" )
assert "<title>Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/")
worker.succeed("spark-submit --version | systemd-cat")
worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py")
'';
});
in tests

View file

@ -3,63 +3,55 @@
, fetchzip , fetchzip
, makeWrapper , makeWrapper
, jdk8 , jdk8
, python3Packages , python3
, extraPythonPackages ? [ ] , python310
, coreutils , coreutils
, hadoopSupport ? true
, hadoop , hadoop
, RSupport ? true , RSupport ? true
, R , R
, nixosTests
}: }:
let let
spark = { pname, version, hash, extraMeta ? {} }: spark = { pname, version, hash, extraMeta ? {}, pysparkPython ? python3 }:
stdenv.mkDerivation rec { stdenv.mkDerivation (finalAttrs: {
inherit pname version; inherit pname version hash hadoop R pysparkPython;
jdk = if hadoopSupport then hadoop.jdk else jdk8; inherit (finalAttrs.hadoop) jdk;
src = fetchzip { src = fetchzip {
url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz"; url = with finalAttrs; "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
inherit hash; inherit (finalAttrs) hash;
}; };
nativeBuildInputs = [ makeWrapper ]; nativeBuildInputs = [ makeWrapper ];
buildInputs = [ jdk python3Packages.python ] buildInputs = with finalAttrs; [ jdk pysparkPython ]
++ extraPythonPackages ++ lib.optional RSupport finalAttrs.R;
++ lib.optional RSupport R;
untarDir = "${pname}-${version}";
installPhase = '' installPhase = ''
mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java} mkdir -p "$out/opt"
mv * $out/lib/${untarDir} mv * $out/
for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do
cp $out/lib/${untarDir}/conf/log4j.properties{.template,} || \ wrapProgram "$n" --set JAVA_HOME "${finalAttrs.jdk}" \
cp $out/lib/${untarDir}/conf/log4j2.properties{.template,} --run "[ -z $SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \
${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${finalAttrs.R}/bin/R"''} \
cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF --prefix PATH : "${
export JAVA_HOME="${jdk}" lib.makeBinPath (
export SPARK_HOME="$out/lib/${untarDir}" [ finalAttrs.pysparkPython ] ++
'' + lib.optionalString hadoopSupport '' (lib.optionals RSupport [ finalAttrs.R ])
export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath) )}"
'' + ''
export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}"
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
${lib.optionalString RSupport ''
export SPARKR_R_SHELL="${R}/bin/R"
export PATH="\$PATH:${R}/bin"''}
EOF
for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
makeWrapper "$n" "$out/bin/$(basename $n)"
substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
done done
for n in $(find $out/lib/${untarDir}/sbin -type f); do ln -s ${finalAttrs.hadoop} "$out/opt/hadoop"
# Spark deprecated scripts with "slave" in the name. ${lib.optionalString RSupport ''ln -s ${finalAttrs.R} "$out/opt/R"''}
# This line adds forward compatibility with the nixos spark module for
# older versions of spark that don't have the new "worker" scripts.
ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true
done
ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
''; '';
passthru = {
tests = nixosTests.spark.default.passthru.override {
sparkPackage = finalAttrs.finalPackage;
};
# Add python packages to PYSPARK_PYTHON
withPythonPackages = f: finalAttrs.finalPackage.overrideAttrs (old: {
pysparkPython = old.pysparkPython.withPackages f;
});
};
meta = { meta = {
description = "Apache Spark is a fast and general engine for large-scale data processing"; description = "Apache Spark is a fast and general engine for large-scale data processing";
homepage = "https://spark.apache.org/"; homepage = "https://spark.apache.org/";
@ -68,24 +60,23 @@ let
platforms = lib.platforms.all; platforms = lib.platforms.all;
maintainers = with lib.maintainers; [ thoughtpolice offline kamilchm illustris ]; maintainers = with lib.maintainers; [ thoughtpolice offline kamilchm illustris ];
} // extraMeta; } // extraMeta;
}; });
in in
{ {
spark_3_5 = spark rec {
pname = "spark";
version = "3.5.0";
hash = "sha256-f+a4a23aOM0GCDoZlZ7WNXs0Olzyh3yMtO8ZmEoYvZ4=";
};
spark_3_4 = spark rec { spark_3_4 = spark rec {
pname = "spark"; pname = "spark";
version = "3.4.0"; version = "3.4.2";
hash = "sha256-0y80dRYzb6Ceu6MlGQHtpMdzOob/TBg6kf8dtF6KyCk="; hash = "sha256-qr0tRuzzEcarJznrQYkaQzGqI7tugp/XJpoZxL7tJwk=";
}; };
spark_3_3 = spark rec { spark_3_3 = spark rec {
pname = "spark"; pname = "spark";
version = "3.3.2"; version = "3.3.3";
hash = "sha256-AeKe2QN+mhUJgZRSIgbi/DttAWlDgwC1kl9p7syEvbo="; hash = "sha256-YtHxRYTwrwSle3UpFjRSwKcnLFj2m9/zLBENH/HVzuM=";
extraMeta.knownVulnerabilities = [ "CVE-2023-22946" ]; pysparkPython = python310;
};
spark_3_2 = spark rec {
pname = "spark";
version = "3.2.4";
hash = "sha256-xL4W+dTWbvmmncq3/8iXmhp24rp5SftvoRfkTyxCI8E=";
extraMeta.knownVulnerabilities = [ "CVE-2023-22946" ];
}; };
} }

View file

@ -18035,8 +18035,8 @@ with pkgs;
smiley-sans = callPackage ../data/fonts/smiley-sans { }; smiley-sans = callPackage ../data/fonts/smiley-sans { };
inherit (callPackages ../applications/networking/cluster/spark { }) inherit (callPackages ../applications/networking/cluster/spark { })
spark_3_4 spark_3_3 spark_3_2; spark_3_5 spark_3_4 spark_3_3;
spark3 = spark_3_4; spark3 = spark_3_5;
spark = spark3; spark = spark3;
sparkleshare = callPackage ../applications/version-management/sparkleshare { }; sparkleshare = callPackage ../applications/version-management/sparkleshare { };