Merge pull request #256737 from illustris/spark
spark: init 3.5.0, 3.4.0->3.4.2, 3.3.2->3.3.3
This commit is contained in:
commit
d41449afb3
4 changed files with 100 additions and 89 deletions
|
@ -69,8 +69,8 @@ with lib;
|
||||||
confDir = mkOption {
|
confDir = mkOption {
|
||||||
type = types.path;
|
type = types.path;
|
||||||
description = lib.mdDoc "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory.";
|
description = lib.mdDoc "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory.";
|
||||||
default = "${cfg.package}/lib/${cfg.package.untarDir}/conf";
|
default = "${cfg.package}/conf";
|
||||||
defaultText = literalExpression ''"''${package}/lib/''${package.untarDir}/conf"'';
|
defaultText = literalExpression ''"''${package}/conf"'';
|
||||||
};
|
};
|
||||||
logDir = mkOption {
|
logDir = mkOption {
|
||||||
type = types.path;
|
type = types.path;
|
||||||
|
@ -111,9 +111,9 @@ with lib;
|
||||||
Type = "forking";
|
Type = "forking";
|
||||||
User = "spark";
|
User = "spark";
|
||||||
Group = "spark";
|
Group = "spark";
|
||||||
WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}";
|
WorkingDirectory = "${cfg.package}/";
|
||||||
ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-master.sh";
|
ExecStart = "${cfg.package}/sbin/start-master.sh";
|
||||||
ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-master.sh";
|
ExecStop = "${cfg.package}/sbin/stop-master.sh";
|
||||||
TimeoutSec = 300;
|
TimeoutSec = 300;
|
||||||
StartLimitBurst=10;
|
StartLimitBurst=10;
|
||||||
Restart = "always";
|
Restart = "always";
|
||||||
|
@ -134,9 +134,9 @@ with lib;
|
||||||
serviceConfig = {
|
serviceConfig = {
|
||||||
Type = "forking";
|
Type = "forking";
|
||||||
User = "spark";
|
User = "spark";
|
||||||
WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}";
|
WorkingDirectory = "${cfg.package}/";
|
||||||
ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-worker.sh spark://${cfg.worker.master}";
|
ExecStart = "${cfg.package}/sbin/start-worker.sh spark://${cfg.worker.master}";
|
||||||
ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-worker.sh";
|
ExecStop = "${cfg.package}/sbin/stop-worker.sh";
|
||||||
TimeoutSec = 300;
|
TimeoutSec = 300;
|
||||||
StartLimitBurst=10;
|
StartLimitBurst=10;
|
||||||
Restart = "always";
|
Restart = "always";
|
||||||
|
|
|
@ -1,28 +1,48 @@
|
||||||
import ../make-test-python.nix ({...}: {
|
{ pkgs, ... }:
|
||||||
name = "spark";
|
|
||||||
|
|
||||||
nodes = {
|
let
|
||||||
worker = { nodes, pkgs, ... }: {
|
inherit (pkgs) lib;
|
||||||
services.spark.worker = {
|
tests = {
|
||||||
enable = true;
|
default = testsForPackage { sparkPackage = pkgs.spark; };
|
||||||
master = "master:7077";
|
|
||||||
};
|
|
||||||
virtualisation.memorySize = 2048;
|
|
||||||
};
|
|
||||||
master = { config, pkgs, ... }: {
|
|
||||||
services.spark.master = {
|
|
||||||
enable = true;
|
|
||||||
bind = "0.0.0.0";
|
|
||||||
};
|
|
||||||
networking.firewall.allowedTCPPorts = [ 22 7077 8080 ];
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
testScript = ''
|
testsForPackage = args: lib.recurseIntoAttrs {
|
||||||
master.wait_for_unit("spark-master.service")
|
sparkCluster = testSparkCluster args;
|
||||||
worker.wait_for_unit("spark-worker.service")
|
passthru.override = args': testsForPackage (args // args');
|
||||||
worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" )
|
};
|
||||||
assert "<title>Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/")
|
testSparkCluster = { sparkPackage, ... }: pkgs.nixosTest ({
|
||||||
worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py")
|
name = "spark";
|
||||||
'';
|
|
||||||
})
|
nodes = {
|
||||||
|
worker = { nodes, pkgs, ... }: {
|
||||||
|
services.spark = {
|
||||||
|
package = sparkPackage;
|
||||||
|
worker = {
|
||||||
|
enable = true;
|
||||||
|
master = "master:7077";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
virtualisation.memorySize = 2048;
|
||||||
|
};
|
||||||
|
master = { config, pkgs, ... }: {
|
||||||
|
services.spark = {
|
||||||
|
package = sparkPackage;
|
||||||
|
master = {
|
||||||
|
enable = true;
|
||||||
|
bind = "0.0.0.0";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
networking.firewall.allowedTCPPorts = [ 22 7077 8080 ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
testScript = ''
|
||||||
|
master.wait_for_unit("spark-master.service")
|
||||||
|
worker.wait_for_unit("spark-worker.service")
|
||||||
|
worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" )
|
||||||
|
assert "<title>Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/")
|
||||||
|
worker.succeed("spark-submit --version | systemd-cat")
|
||||||
|
worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py")
|
||||||
|
'';
|
||||||
|
});
|
||||||
|
in tests
|
||||||
|
|
|
@ -3,63 +3,55 @@
|
||||||
, fetchzip
|
, fetchzip
|
||||||
, makeWrapper
|
, makeWrapper
|
||||||
, jdk8
|
, jdk8
|
||||||
, python3Packages
|
, python3
|
||||||
, extraPythonPackages ? [ ]
|
, python310
|
||||||
, coreutils
|
, coreutils
|
||||||
, hadoopSupport ? true
|
|
||||||
, hadoop
|
, hadoop
|
||||||
, RSupport ? true
|
, RSupport ? true
|
||||||
, R
|
, R
|
||||||
|
, nixosTests
|
||||||
}:
|
}:
|
||||||
|
|
||||||
let
|
let
|
||||||
spark = { pname, version, hash, extraMeta ? {} }:
|
spark = { pname, version, hash, extraMeta ? {}, pysparkPython ? python3 }:
|
||||||
stdenv.mkDerivation rec {
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
inherit pname version;
|
inherit pname version hash hadoop R pysparkPython;
|
||||||
jdk = if hadoopSupport then hadoop.jdk else jdk8;
|
inherit (finalAttrs.hadoop) jdk;
|
||||||
src = fetchzip {
|
src = fetchzip {
|
||||||
url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
|
url = with finalAttrs; "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
|
||||||
inherit hash;
|
inherit (finalAttrs) hash;
|
||||||
};
|
};
|
||||||
nativeBuildInputs = [ makeWrapper ];
|
nativeBuildInputs = [ makeWrapper ];
|
||||||
buildInputs = [ jdk python3Packages.python ]
|
buildInputs = with finalAttrs; [ jdk pysparkPython ]
|
||||||
++ extraPythonPackages
|
++ lib.optional RSupport finalAttrs.R;
|
||||||
++ lib.optional RSupport R;
|
|
||||||
|
|
||||||
untarDir = "${pname}-${version}";
|
|
||||||
installPhase = ''
|
installPhase = ''
|
||||||
mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
|
mkdir -p "$out/opt"
|
||||||
mv * $out/lib/${untarDir}
|
mv * $out/
|
||||||
|
for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do
|
||||||
cp $out/lib/${untarDir}/conf/log4j.properties{.template,} || \
|
wrapProgram "$n" --set JAVA_HOME "${finalAttrs.jdk}" \
|
||||||
cp $out/lib/${untarDir}/conf/log4j2.properties{.template,}
|
--run "[ -z $SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \
|
||||||
|
${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${finalAttrs.R}/bin/R"''} \
|
||||||
cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
|
--prefix PATH : "${
|
||||||
export JAVA_HOME="${jdk}"
|
lib.makeBinPath (
|
||||||
export SPARK_HOME="$out/lib/${untarDir}"
|
[ finalAttrs.pysparkPython ] ++
|
||||||
'' + lib.optionalString hadoopSupport ''
|
(lib.optionals RSupport [ finalAttrs.R ])
|
||||||
export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
|
)}"
|
||||||
'' + ''
|
|
||||||
export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}"
|
|
||||||
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
|
|
||||||
${lib.optionalString RSupport ''
|
|
||||||
export SPARKR_R_SHELL="${R}/bin/R"
|
|
||||||
export PATH="\$PATH:${R}/bin"''}
|
|
||||||
EOF
|
|
||||||
|
|
||||||
for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
|
|
||||||
makeWrapper "$n" "$out/bin/$(basename $n)"
|
|
||||||
substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
|
|
||||||
done
|
done
|
||||||
for n in $(find $out/lib/${untarDir}/sbin -type f); do
|
ln -s ${finalAttrs.hadoop} "$out/opt/hadoop"
|
||||||
# Spark deprecated scripts with "slave" in the name.
|
${lib.optionalString RSupport ''ln -s ${finalAttrs.R} "$out/opt/R"''}
|
||||||
# This line adds forward compatibility with the nixos spark module for
|
|
||||||
# older versions of spark that don't have the new "worker" scripts.
|
|
||||||
ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true
|
|
||||||
done
|
|
||||||
ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
|
|
||||||
'';
|
'';
|
||||||
|
|
||||||
|
passthru = {
|
||||||
|
tests = nixosTests.spark.default.passthru.override {
|
||||||
|
sparkPackage = finalAttrs.finalPackage;
|
||||||
|
};
|
||||||
|
# Add python packages to PYSPARK_PYTHON
|
||||||
|
withPythonPackages = f: finalAttrs.finalPackage.overrideAttrs (old: {
|
||||||
|
pysparkPython = old.pysparkPython.withPackages f;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
meta = {
|
meta = {
|
||||||
description = "Apache Spark is a fast and general engine for large-scale data processing";
|
description = "Apache Spark is a fast and general engine for large-scale data processing";
|
||||||
homepage = "https://spark.apache.org/";
|
homepage = "https://spark.apache.org/";
|
||||||
|
@ -68,24 +60,23 @@ let
|
||||||
platforms = lib.platforms.all;
|
platforms = lib.platforms.all;
|
||||||
maintainers = with lib.maintainers; [ thoughtpolice offline kamilchm illustris ];
|
maintainers = with lib.maintainers; [ thoughtpolice offline kamilchm illustris ];
|
||||||
} // extraMeta;
|
} // extraMeta;
|
||||||
};
|
});
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
|
spark_3_5 = spark rec {
|
||||||
|
pname = "spark";
|
||||||
|
version = "3.5.0";
|
||||||
|
hash = "sha256-f+a4a23aOM0GCDoZlZ7WNXs0Olzyh3yMtO8ZmEoYvZ4=";
|
||||||
|
};
|
||||||
spark_3_4 = spark rec {
|
spark_3_4 = spark rec {
|
||||||
pname = "spark";
|
pname = "spark";
|
||||||
version = "3.4.0";
|
version = "3.4.2";
|
||||||
hash = "sha256-0y80dRYzb6Ceu6MlGQHtpMdzOob/TBg6kf8dtF6KyCk=";
|
hash = "sha256-qr0tRuzzEcarJznrQYkaQzGqI7tugp/XJpoZxL7tJwk=";
|
||||||
};
|
};
|
||||||
spark_3_3 = spark rec {
|
spark_3_3 = spark rec {
|
||||||
pname = "spark";
|
pname = "spark";
|
||||||
version = "3.3.2";
|
version = "3.3.3";
|
||||||
hash = "sha256-AeKe2QN+mhUJgZRSIgbi/DttAWlDgwC1kl9p7syEvbo=";
|
hash = "sha256-YtHxRYTwrwSle3UpFjRSwKcnLFj2m9/zLBENH/HVzuM=";
|
||||||
extraMeta.knownVulnerabilities = [ "CVE-2023-22946" ];
|
pysparkPython = python310;
|
||||||
};
|
|
||||||
spark_3_2 = spark rec {
|
|
||||||
pname = "spark";
|
|
||||||
version = "3.2.4";
|
|
||||||
hash = "sha256-xL4W+dTWbvmmncq3/8iXmhp24rp5SftvoRfkTyxCI8E=";
|
|
||||||
extraMeta.knownVulnerabilities = [ "CVE-2023-22946" ];
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -18035,8 +18035,8 @@ with pkgs;
|
||||||
smiley-sans = callPackage ../data/fonts/smiley-sans { };
|
smiley-sans = callPackage ../data/fonts/smiley-sans { };
|
||||||
|
|
||||||
inherit (callPackages ../applications/networking/cluster/spark { })
|
inherit (callPackages ../applications/networking/cluster/spark { })
|
||||||
spark_3_4 spark_3_3 spark_3_2;
|
spark_3_5 spark_3_4 spark_3_3;
|
||||||
spark3 = spark_3_4;
|
spark3 = spark_3_5;
|
||||||
spark = spark3;
|
spark = spark3;
|
||||||
|
|
||||||
sparkleshare = callPackage ../applications/version-management/sparkleshare { };
|
sparkleshare = callPackage ../applications/version-management/sparkleshare { };
|
||||||
|
|
Loading…
Reference in a new issue