diff --git a/nixos/modules/services/cluster/hadoop/default.nix b/nixos/modules/services/cluster/hadoop/default.nix index 9968706d5a95..636bb4067277 100644 --- a/nixos/modules/services/cluster/hadoop/default.nix +++ b/nixos/modules/services/cluster/hadoop/default.nix @@ -25,6 +25,7 @@ with lib; hdfsSite = mkOption { default = { "dfs.namenode.rpc-bind-host" = "0.0.0.0"; + "dfs.namenode.http-address" = "0.0.0.0:9870"; }; type = types.attrsOf types.anything; example = literalExpression '' diff --git a/nixos/modules/services/cluster/hadoop/hdfs.nix b/nixos/modules/services/cluster/hadoop/hdfs.nix index 451e74df7120..61d9941298aa 100644 --- a/nixos/modules/services/cluster/hadoop/hdfs.nix +++ b/nixos/modules/services/cluster/hadoop/hdfs.nix @@ -100,7 +100,8 @@ in allowedTCPPorts = [ 9870 # namenode.http-address 8020 # namenode.rpc-address - 8022 # namenode. servicerpc-address + 8022 # namenode.servicerpc-address + 8019 # dfs.ha.zkfc.port ]; preStart = (mkIf cfg.hdfs.namenode.formatOnInit "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true" @@ -109,10 +110,15 @@ in (hadoopServiceConfig { name = "DataNode"; - allowedTCPPorts = [ + # port numbers for datanode changed between hadoop 2 and 3 + allowedTCPPorts = if versionAtLeast cfg.package.version "3" then [ 9864 # datanode.http.address 9866 # datanode.address 9867 # datanode.ipc.address + ] else [ + 50075 # datanode.http.address + 50010 # datanode.address + 50020 # datanode.ipc.address ]; }) diff --git a/nixos/tests/hadoop/hdfs.nix b/nixos/tests/hadoop/hdfs.nix index e60d14109172..cc70fb8ecaf8 100644 --- a/nixos/tests/hadoop/hdfs.nix +++ b/nixos/tests/hadoop/hdfs.nix @@ -1,8 +1,16 @@ # Test a minimal HDFS cluster with no HA -import ../make-test-python.nix ({ package, ... }: { +import ../make-test-python.nix ({ package, lib, ... }: +with lib; +{ name = "hadoop-hdfs"; - nodes = { + nodes = let + coreSite = { + "fs.defaultFS" = "hdfs://namenode:8020"; + "hadoop.proxyuser.httpfs.groups" = "*"; + "hadoop.proxyuser.httpfs.hosts" = "*"; + }; + in { namenode = { pkgs, ... }: { services.hadoop = { inherit package; @@ -13,15 +21,12 @@ import ../make-test-python.nix ({ package, ... }: { formatOnInit = true; }; httpfs = { - enable = true; + # The NixOS hadoop module only support webHDFS on 3.3 and newer + enable = mkIf (versionAtLeast package.version "3.3") true; openFirewall = true; }; }; - coreSite = { - "fs.defaultFS" = "hdfs://namenode:8020"; - "hadoop.proxyuser.httpfs.groups" = "*"; - "hadoop.proxyuser.httpfs.hosts" = "*"; - }; + inherit coreSite; }; }; datanode = { pkgs, ... }: { @@ -31,11 +36,7 @@ import ../make-test-python.nix ({ package, ... }: { enable = true; openFirewall = true; }; - coreSite = { - "fs.defaultFS" = "hdfs://namenode:8020"; - "hadoop.proxyuser.httpfs.groups" = "*"; - "hadoop.proxyuser.httpfs.hosts" = "*"; - }; + inherit coreSite; }; }; }; @@ -46,21 +47,32 @@ import ../make-test-python.nix ({ package, ... }: { namenode.wait_for_unit("hdfs-namenode") namenode.wait_for_unit("network.target") namenode.wait_for_open_port(8020) + namenode.succeed("ss -tulpne | systemd-cat") + namenode.succeed("cat /etc/hadoop*/hdfs-site.xml | systemd-cat") namenode.wait_for_open_port(9870) datanode.wait_for_unit("hdfs-datanode") datanode.wait_for_unit("network.target") + '' + ( if versionAtLeast package.version "3" then '' datanode.wait_for_open_port(9864) datanode.wait_for_open_port(9866) datanode.wait_for_open_port(9867) - namenode.succeed("curl -f http://namenode:9870") datanode.succeed("curl -f http://datanode:9864") + '' else '' + datanode.wait_for_open_port(50075) + datanode.wait_for_open_port(50010) + datanode.wait_for_open_port(50020) + + datanode.succeed("curl -f http://datanode:50075") + '' ) + '' + namenode.succeed("curl -f http://namenode:9870") datanode.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait") datanode.succeed("echo testfilecontents | sudo -u hdfs hdfs dfs -put - /testfile") assert "testfilecontents" in datanode.succeed("sudo -u hdfs hdfs dfs -cat /testfile") + '' + optionalString ( versionAtLeast package.version "3.3" ) '' namenode.wait_for_unit("hdfs-httpfs") namenode.wait_for_open_port(14000) assert "testfilecontents" in datanode.succeed("curl -f \"http://namenode:14000/webhdfs/v1/testfile?user.name=hdfs&op=OPEN\" 2>&1")