From 799dc66cf1ea36b2dea0893734ace5606cb63433 Mon Sep 17 00:00:00 2001 From: illustris Date: Sat, 8 Jan 2022 18:38:34 +0530 Subject: [PATCH] hadoop: add passthrough tests --- nixos/tests/all-tests.nix | 6 +- nixos/tests/hadoop/default.nix | 7 + nixos/tests/hadoop/hadoop.nix | 244 +++++++++--------- nixos/tests/hadoop/hdfs.nix | 12 +- nixos/tests/hadoop/yarn.nix | 51 ++-- .../networking/cluster/hadoop/default.nix | 55 ++-- 6 files changed, 199 insertions(+), 176 deletions(-) create mode 100644 nixos/tests/hadoop/default.nix diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 15b54cd9fe1d..8712407521f7 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -189,9 +189,9 @@ in grocy = handleTest ./grocy.nix {}; grub = handleTest ./grub.nix {}; gvisor = handleTest ./gvisor.nix {}; - hadoop.all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop/hadoop.nix {}; - hadoop.hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop/hdfs.nix {}; - hadoop.yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop/yarn.nix {}; + hadoop = import ./hadoop { inherit handleTestOn; package=pkgs.hadoop; }; + hadoop_3_2 = import ./hadoop { inherit handleTestOn; package=pkgs.hadoop_3_2; }; + hadoop2 = import ./hadoop { inherit handleTestOn; package=pkgs.hadoop2; }; haka = handleTest ./haka.nix {}; haproxy = handleTest ./haproxy.nix {}; hardened = handleTest ./hardened.nix {}; diff --git a/nixos/tests/hadoop/default.nix b/nixos/tests/hadoop/default.nix new file mode 100644 index 000000000000..d2a97cbeffb8 --- /dev/null +++ b/nixos/tests/hadoop/default.nix @@ -0,0 +1,7 @@ +{ handleTestOn, package, ... }: + +{ + all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop.nix { inherit package; }; + hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hdfs.nix { inherit package; }; + yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./yarn.nix { inherit package; }; +} diff --git a/nixos/tests/hadoop/hadoop.nix b/nixos/tests/hadoop/hadoop.nix index adc3c9f393c2..e84a56f302f5 100644 --- a/nixos/tests/hadoop/hadoop.nix +++ b/nixos/tests/hadoop/hadoop.nix @@ -1,149 +1,151 @@ # This test is very comprehensive. It tests whether all hadoop services work well with each other. # Run this when updating the Hadoop package or making significant changes to the hadoop module. # For a more basic test, see hdfs.nix and yarn.nix -import ../make-test-python.nix ({pkgs, ...}: { +import ../make-test-python.nix ({ package, ... }: { + name = "hadoop-combined"; - nodes = let - package = pkgs.hadoop; - coreSite = { - "fs.defaultFS" = "hdfs://ns1"; - }; - hdfsSite = { - "dfs.namenode.rpc-bind-host" = "0.0.0.0"; - "dfs.namenode.http-bind-host" = "0.0.0.0"; - "dfs.namenode.servicerpc-bind-host" = "0.0.0.0"; + nodes = + let + coreSite = { + "fs.defaultFS" = "hdfs://ns1"; + }; + hdfsSite = { + "dfs.namenode.rpc-bind-host" = "0.0.0.0"; + "dfs.namenode.http-bind-host" = "0.0.0.0"; + "dfs.namenode.servicerpc-bind-host" = "0.0.0.0"; - # HA Quorum Journal Manager configuration - "dfs.nameservices" = "ns1"; - "dfs.ha.namenodes.ns1" = "nn1,nn2"; - "dfs.namenode.shared.edits.dir.ns1.nn1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1"; - "dfs.namenode.shared.edits.dir.ns1.nn2" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1"; - "dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020"; - "dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020"; - "dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022"; - "dfs.namenode.servicerpc-address.ns1.nn2" = "nn2:8022"; - "dfs.namenode.http-address.ns1.nn1" = "nn1:9870"; - "dfs.namenode.http-address.ns1.nn2" = "nn2:9870"; + # HA Quorum Journal Manager configuration + "dfs.nameservices" = "ns1"; + "dfs.ha.namenodes.ns1" = "nn1,nn2"; + "dfs.namenode.shared.edits.dir.ns1.nn1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1"; + "dfs.namenode.shared.edits.dir.ns1.nn2" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1"; + "dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020"; + "dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020"; + "dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022"; + "dfs.namenode.servicerpc-address.ns1.nn2" = "nn2:8022"; + "dfs.namenode.http-address.ns1.nn1" = "nn1:9870"; + "dfs.namenode.http-address.ns1.nn2" = "nn2:9870"; - # Automatic failover configuration - "dfs.client.failover.proxy.provider.ns1" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"; - "dfs.ha.automatic-failover.enabled.ns1" = "true"; - "dfs.ha.fencing.methods" = "shell(true)"; - "ha.zookeeper.quorum" = "zk1:2181"; - }; - yarnSiteHA = { - "yarn.resourcemanager.zk-address" = "zk1:2181"; - "yarn.resourcemanager.ha.enabled" = "true"; - "yarn.resourcemanager.ha.rm-ids" = "rm1,rm2"; - "yarn.resourcemanager.hostname.rm1" = "rm1"; - "yarn.resourcemanager.hostname.rm2" = "rm2"; - "yarn.resourcemanager.ha.automatic-failover.enabled" = "true"; - "yarn.resourcemanager.cluster-id" = "cluster1"; - # yarn.resourcemanager.webapp.address needs to be defined even though yarn.resourcemanager.hostname is set. This shouldn't be necessary, but there's a bug in - # hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmFilterInitializer.java:70 - # that causes AM containers to fail otherwise. - "yarn.resourcemanager.webapp.address.rm1" = "rm1:8088"; - "yarn.resourcemanager.webapp.address.rm2" = "rm2:8088"; - }; - in { - zk1 = { ... }: { - services.zookeeper.enable = true; - networking.firewall.allowedTCPPorts = [ 2181 ]; - }; + # Automatic failover configuration + "dfs.client.failover.proxy.provider.ns1" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"; + "dfs.ha.automatic-failover.enabled.ns1" = "true"; + "dfs.ha.fencing.methods" = "shell(true)"; + "ha.zookeeper.quorum" = "zk1:2181"; + }; + yarnSiteHA = { + "yarn.resourcemanager.zk-address" = "zk1:2181"; + "yarn.resourcemanager.ha.enabled" = "true"; + "yarn.resourcemanager.ha.rm-ids" = "rm1,rm2"; + "yarn.resourcemanager.hostname.rm1" = "rm1"; + "yarn.resourcemanager.hostname.rm2" = "rm2"; + "yarn.resourcemanager.ha.automatic-failover.enabled" = "true"; + "yarn.resourcemanager.cluster-id" = "cluster1"; + # yarn.resourcemanager.webapp.address needs to be defined even though yarn.resourcemanager.hostname is set. This shouldn't be necessary, but there's a bug in + # hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmFilterInitializer.java:70 + # that causes AM containers to fail otherwise. + "yarn.resourcemanager.webapp.address.rm1" = "rm1:8088"; + "yarn.resourcemanager.webapp.address.rm2" = "rm2:8088"; + }; + in + { + zk1 = { ... }: { + services.zookeeper.enable = true; + networking.firewall.allowedTCPPorts = [ 2181 ]; + }; - # HDFS cluster - nn1 = {pkgs, options, ...}: { - services.hadoop = { - inherit package coreSite hdfsSite; - hdfs.namenode = { - enable = true; - openFirewall = true; + # HDFS cluster + nn1 = { ... }: { + services.hadoop = { + inherit package coreSite hdfsSite; + hdfs.namenode = { + enable = true; + openFirewall = true; + }; + hdfs.zkfc.enable = true; }; - hdfs.zkfc.enable = true; }; - }; - nn2 = {pkgs, options, ...}: { - services.hadoop = { - inherit package coreSite hdfsSite; - hdfs.namenode = { - enable = true; - openFirewall = true; + nn2 = { ... }: { + services.hadoop = { + inherit package coreSite hdfsSite; + hdfs.namenode = { + enable = true; + openFirewall = true; + }; + hdfs.zkfc.enable = true; }; - hdfs.zkfc.enable = true; }; - }; - jn1 = {pkgs, options, ...}: { - services.hadoop = { - inherit package coreSite hdfsSite; - hdfs.journalnode = { - enable = true; - openFirewall = true; + jn1 = { ... }: { + services.hadoop = { + inherit package coreSite hdfsSite; + hdfs.journalnode = { + enable = true; + openFirewall = true; + }; }; }; - }; - jn2 = {pkgs, options, ...}: { - services.hadoop = { - inherit package coreSite hdfsSite; - hdfs.journalnode = { - enable = true; - openFirewall = true; + jn2 = { ... }: { + services.hadoop = { + inherit package coreSite hdfsSite; + hdfs.journalnode = { + enable = true; + openFirewall = true; + }; }; }; - }; - jn3 = {pkgs, options, ...}: { - services.hadoop = { - inherit package coreSite hdfsSite; - hdfs.journalnode = { - enable = true; - openFirewall = true; + jn3 = { ... }: { + services.hadoop = { + inherit package coreSite hdfsSite; + hdfs.journalnode = { + enable = true; + openFirewall = true; + }; }; }; - }; - dn1 = {pkgs, options, ...}: { - services.hadoop = { - inherit package coreSite hdfsSite; - hdfs.datanode = { - enable = true; - openFirewall = true; + dn1 = { ... }: { + services.hadoop = { + inherit package coreSite hdfsSite; + hdfs.datanode = { + enable = true; + openFirewall = true; + }; }; }; - }; - # YARN cluster - rm1 = {pkgs, options, ...}: { - services.hadoop = { - inherit package coreSite hdfsSite; - yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA; - yarn.resourcemanager = { - enable = true; - openFirewall = true; + # YARN cluster + rm1 = { options, ... }: { + services.hadoop = { + inherit package coreSite hdfsSite; + yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA; + yarn.resourcemanager = { + enable = true; + openFirewall = true; + }; + }; + }; + rm2 = { options, ... }: { + services.hadoop = { + inherit package coreSite hdfsSite; + yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA; + yarn.resourcemanager = { + enable = true; + openFirewall = true; + }; + }; + }; + nm1 = { options, ... }: { + virtualisation.memorySize = 2048; + services.hadoop = { + inherit package coreSite hdfsSite; + yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA; + yarn.nodemanager = { + enable = true; + openFirewall = true; + }; }; }; }; - rm2 = {pkgs, options, ...}: { - services.hadoop = { - inherit package coreSite hdfsSite; - yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA; - yarn.resourcemanager = { - enable = true; - openFirewall = true; - }; - }; - }; - nm1 = {pkgs, options, ...}: { - virtualisation.memorySize = 2048; - services.hadoop = { - inherit package coreSite hdfsSite; - yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA; - yarn.nodemanager = { - enable = true; - openFirewall = true; - }; - }; - }; - }; testScript = '' start_all() diff --git a/nixos/tests/hadoop/hdfs.nix b/nixos/tests/hadoop/hdfs.nix index c5aee0d5ee7a..e60d14109172 100644 --- a/nixos/tests/hadoop/hdfs.nix +++ b/nixos/tests/hadoop/hdfs.nix @@ -1,9 +1,11 @@ # Test a minimal HDFS cluster with no HA -import ../make-test-python.nix ({...}: { +import ../make-test-python.nix ({ package, ... }: { + name = "hadoop-hdfs"; + nodes = { - namenode = {pkgs, ...}: { + namenode = { pkgs, ... }: { services.hadoop = { - package = pkgs.hadoop; + inherit package; hdfs = { namenode = { enable = true; @@ -22,9 +24,9 @@ import ../make-test-python.nix ({...}: { }; }; }; - datanode = {pkgs, ...}: { + datanode = { pkgs, ... }: { services.hadoop = { - package = pkgs.hadoop; + inherit package; hdfs.datanode = { enable = true; openFirewall = true; diff --git a/nixos/tests/hadoop/yarn.nix b/nixos/tests/hadoop/yarn.nix index fbf05b19cd29..c121f6556d5d 100644 --- a/nixos/tests/hadoop/yarn.nix +++ b/nixos/tests/hadoop/yarn.nix @@ -1,28 +1,33 @@ # This only tests if YARN is able to start its services -import ../make-test-python.nix ({...}: { - nodes = { - resourcemanager = {pkgs, ...}: { - services.hadoop.package = pkgs.hadoop; - services.hadoop.yarn.resourcemanager = { - enable = true; - openFirewall = true; - }; - services.hadoop.yarnSite = { - "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler"; - }; - }; - nodemanager = {pkgs, ...}: { - services.hadoop.package = pkgs.hadoop; - services.hadoop.yarn.nodemanager = { - enable = true; - openFirewall = true; - }; - services.hadoop.yarnSite = { - "yarn.resourcemanager.hostname" = "resourcemanager"; - "yarn.nodemanager.log-dirs" = "/tmp/userlogs"; - }; - }; +import ../make-test-python.nix ({ package, ... }: { + name = "hadoop-yarn"; + nodes = { + resourcemanager = { ... }: { + services.hadoop = { + inherit package; + yarn.resourcemanager = { + enable = true; + openFirewall = true; + }; + yarnSite = { + "yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler"; + }; + }; + }; + nodemanager = { ... }: { + services.hadoop = { + inherit package; + yarn.nodemanager = { + enable = true; + openFirewall = true; + }; + yarnSite = { + "yarn.resourcemanager.hostname" = "resourcemanager"; + "yarn.nodemanager.log-dirs" = "/tmp/userlogs"; + }; + }; + }; }; testScript = '' diff --git a/pkgs/applications/networking/cluster/hadoop/default.nix b/pkgs/applications/networking/cluster/hadoop/default.nix index 6a48cc8ada89..a16aff58a7c2 100644 --- a/pkgs/applications/networking/cluster/hadoop/default.nix +++ b/pkgs/applications/networking/cluster/hadoop/default.nix @@ -15,6 +15,8 @@ , zlib , zstd , openssl +, openssl +, nixosTests }: with lib; @@ -22,7 +24,7 @@ with lib; assert elem stdenv.system [ "x86_64-linux" "x86_64-darwin" "aarch64-linux" "aarch64-darwin" ]; let - common = { pname, version, untarDir ? "${pname}-${version}", sha256, jdk, openssl ? null, nativeLibs ? [ ], libPatches ? "" }: + common = { pname, version, untarDir ? "${pname}-${version}", sha256, jdk, openssl ? null, nativeLibs ? [ ], libPatches ? "", tests }: stdenv.mkDerivation rec { inherit pname version jdk libPatches untarDir openssl; src = fetchurl { @@ -49,6 +51,8 @@ let done '' + libPatches; + passthru = { inherit tests; }; + meta = { homepage = "https://hadoop.apache.org/"; description = "Framework for distributed processing of large data sets across clusters of computers"; @@ -73,30 +77,29 @@ in { # Different version of hadoop support different java runtime versions # https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions - hadoop_3_3 = - common - (rec { - pname = "hadoop"; - version = "3.3.1"; - untarDir = "${pname}-${version}"; - sha256 = rec { - x86_64-linux = "1b3v16ihysqaxw8za1r5jlnphy8dwhivdx2d0z64309w57ihlxxd"; - x86_64-darwin = x86_64-linux; - aarch64-linux = "00ln18vpi07jq2slk3kplyhcj8ad41n0yl880q5cihilk7daclxz"; - aarch64-darwin = aarch64-linux; - }; + hadoop_3_3 = common rec { + pname = "hadoop"; + version = "3.3.1"; + untarDir = "${pname}-${version}"; + sha256 = rec { + x86_64-linux = "1b3v16ihysqaxw8za1r5jlnphy8dwhivdx2d0z64309w57ihlxxd"; + x86_64-darwin = x86_64-linux; + aarch64-linux = "00ln18vpi07jq2slk3kplyhcj8ad41n0yl880q5cihilk7daclxz"; + aarch64-darwin = aarch64-linux; + }; - inherit openssl; - nativeLibs = [ stdenv.cc.cc.lib protobuf3_7 zlib snappy ]; - libPatches = '' - ln -s ${getLib cyrus_sasl}/lib/libsasl2.so $out/lib/${untarDir}/lib/native/libsasl2.so.2 - ln -s ${getLib openssl}/lib/libcrypto.so $out/lib/${untarDir}/lib/native/ - ln -s ${getLib zlib}/lib/libz.so.1 $out/lib/${untarDir}/lib/native/ - ln -s ${getLib zstd}/lib/libzstd.so.1 $out/lib/${untarDir}/lib/native/ - ln -s ${getLib bzip2}/lib/libbz2.so.1 $out/lib/${untarDir}/lib/native/ - '' + optionalString stdenv.isLinux "patchelf --add-rpath ${jdk.home}/lib/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0"; - jdk = jdk11_headless; - }); + inherit openssl; + nativeLibs = [ stdenv.cc.cc.lib protobuf3_7 zlib snappy ]; + libPatches = '' + ln -s ${getLib cyrus_sasl}/lib/libsasl2.so $out/lib/${untarDir}/lib/native/libsasl2.so.2 + ln -s ${getLib openssl}/lib/libcrypto.so $out/lib/${untarDir}/lib/native/ + ln -s ${getLib zlib}/lib/libz.so.1 $out/lib/${untarDir}/lib/native/ + ln -s ${getLib zstd}/lib/libzstd.so.1 $out/lib/${untarDir}/lib/native/ + ln -s ${getLib bzip2}/lib/libbz2.so.1 $out/lib/${untarDir}/lib/native/ + '' + optionalString stdenv.isLinux "patchelf --add-rpath ${jdk.home}/lib/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0"; + jdk = jdk11_headless; + tests = nixosTests.hadoop; + }; hadoop_3_2 = common rec { pname = "hadoop"; version = "3.2.2"; @@ -104,11 +107,15 @@ in jdk = jdk8_headless; # not using native libs because of broken openssl_1_0_2 dependency # can be manually overriden + # Disable tests involving HDFS till the module adds support for hadoop_3_2 + tests = nixosTests.hadoop_3_2 // { all = null; hdfs = null; }; }; hadoop2 = common rec { pname = "hadoop"; version = "2.10.1"; sha256.x86_64-linux = "1w31x4bk9f2swnx8qxx0cgwfg8vbpm6cy5lvfnbbpl3rsjhmyg97"; jdk = jdk8_headless; + # Disable tests involving HDFS till the module adds support for hadoop2 + tests = nixosTests.hadoop2 // { all = null; hdfs = null; }; }; }