From 03bb8f84e0f54d9980b160785f98da8ae07196fe Mon Sep 17 00:00:00 2001
From: Tom Bereknyei <tomberek@gmail.com>
Date: Tue, 12 Oct 2021 02:14:36 -0400
Subject: [PATCH] Add compression level for NARs

Based off on @dtzWill's #2276
---
 src/libstore/binary-cache-store.cc |  2 +-
 src/libstore/binary-cache-store.hh |  8 ++++++--
 src/libutil/compression.cc         | 22 +++++++++++++++-------
 src/libutil/compression.hh         |  4 ++--
 tests/compression-levels.sh        | 22 ++++++++++++++++++++++
 tests/local.mk                     |  1 +
 6 files changed, 47 insertions(+), 12 deletions(-)
 create mode 100644 tests/compression-levels.sh

diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc
index 3a6be541f..8fce94264 100644
--- a/src/libstore/binary-cache-store.cc
+++ b/src/libstore/binary-cache-store.cc
@@ -149,7 +149,7 @@ ref<const ValidPathInfo> BinaryCacheStore::addToStoreCommon(
     {
     FdSink fileSink(fdTemp.get());
     TeeSink teeSinkCompressed { fileSink, fileHashSink };
-    auto compressionSink = makeCompressionSink(compression, teeSinkCompressed);
+    auto compressionSink = makeCompressionSink(compression, teeSinkCompressed, parallelCompression, compressionLevel);
     TeeSink teeSinkUncompressed { *compressionSink, narHashSink };
     TeeSource teeSource { narSource, teeSinkUncompressed };
     narAccessor = makeNarAccessor(teeSource);
diff --git a/src/libstore/binary-cache-store.hh b/src/libstore/binary-cache-store.hh
index 657be2fcf..723f2e805 100644
--- a/src/libstore/binary-cache-store.hh
+++ b/src/libstore/binary-cache-store.hh
@@ -15,13 +15,17 @@ struct BinaryCacheStoreConfig : virtual StoreConfig
 {
     using StoreConfig::StoreConfig;
 
-    const Setting<std::string> compression{(StoreConfig*) this, "xz", "compression", "NAR compression method ('xz', 'bzip2', or 'none')"};
+    const Setting<std::string> compression{(StoreConfig*) this, "xz", "compression", "NAR compression method ('xz', 'bzip2', 'gzip', 'zstd', or 'none')"};
     const Setting<bool> writeNARListing{(StoreConfig*) this, false, "write-nar-listing", "whether to write a JSON file listing the files in each NAR"};
     const Setting<bool> writeDebugInfo{(StoreConfig*) this, false, "index-debug-info", "whether to index DWARF debug info files by build ID"};
     const Setting<Path> secretKeyFile{(StoreConfig*) this, "", "secret-key", "path to secret key used to sign the binary cache"};
     const Setting<Path> localNarCache{(StoreConfig*) this, "", "local-nar-cache", "path to a local cache of NARs"};
     const Setting<bool> parallelCompression{(StoreConfig*) this, false, "parallel-compression",
-        "enable multi-threading compression, available for xz only currently"};
+        "enable multi-threading compression for NARs, available for xz and zstd only currently"};
+    const Setting<int> compressionLevel{(StoreConfig*) this, -1, "compression-level",
+        "specify 'preset level' of compression to be used with NARs: "
+        "meaning and accepted range of values depends on compression method selected, "
+        "other than -1 which we reserve to indicate Nix defaults should be used"};
 };
 
 class BinaryCacheStore : public virtual BinaryCacheStoreConfig, public virtual Store
diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc
index d26f68fde..4802edb78 100644
--- a/src/libutil/compression.cc
+++ b/src/libutil/compression.cc
@@ -16,6 +16,8 @@
 
 namespace nix {
 
+static const int COMPRESSION_LEVEL_DEFAULT = -1;
+
 // Don't feed brotli too much at once.
 struct ChunkedCompressionSink : CompressionSink
 {
@@ -65,14 +67,17 @@ struct ArchiveCompressionSink : CompressionSink
     Sink & nextSink;
     struct archive * archive;
 
-    ArchiveCompressionSink(Sink & nextSink, std::string format, bool parallel) : nextSink(nextSink) {
+    ArchiveCompressionSink(Sink & nextSink, std::string format, bool parallel, int _level = COMPRESSION_LEVEL_DEFAULT) : nextSink(nextSink) {
         archive = archive_write_new();
         if (!archive) throw Error("failed to initialize libarchive");
         check(archive_write_add_filter_by_name(archive, format.c_str()), "couldn't initialize compression (%s)");
         check(archive_write_set_format_raw(archive));
-        if (format == "xz" && parallel) {
+        if (parallel) {
             check(archive_write_set_filter_option(archive, format.c_str(), "threads", "0"));
         }
+        if (_level != COMPRESSION_LEVEL_DEFAULT){
+            check(archive_write_set_filter_option(archive, format.c_str(), "compression-level", std::to_string(_level).c_str()));
+        }
         // disable internal buffering
         check(archive_write_set_bytes_per_block(archive, 0));
         // disable output padding
@@ -126,7 +131,10 @@ private:
 struct NoneSink : CompressionSink
 {
     Sink & nextSink;
-    NoneSink(Sink & nextSink) : nextSink(nextSink) { }
+    NoneSink(Sink & nextSink, int level = COMPRESSION_LEVEL_DEFAULT) : nextSink(nextSink) {
+        if (level != COMPRESSION_LEVEL_DEFAULT)
+            printError("Warning: requested compression level '%d' not supported by compression method 'none'", level);
+    }
     void finish() override { flush(); }
     void write(std::string_view data) override { nextSink(data); }
 };
@@ -257,13 +265,13 @@ struct BrotliCompressionSink : ChunkedCompressionSink
     }
 };
 
-ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel)
+ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel, int level)
 {
     std::vector<std::string> la_supports = {
         "bzip2", "compress", "grzip", "gzip", "lrzip", "lz4", "lzip", "lzma", "lzop", "xz", "zstd"
     };
     if (std::find(la_supports.begin(), la_supports.end(), method) != la_supports.end()) {
-        return make_ref<ArchiveCompressionSink>(nextSink, method, parallel);
+        return make_ref<ArchiveCompressionSink>(nextSink, method, parallel, level);
     }
     if (method == "none")
         return make_ref<NoneSink>(nextSink);
@@ -273,10 +281,10 @@ ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & next
         throw UnknownCompressionMethod("unknown compression method '%s'", method);
 }
 
-ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel)
+ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel, int level)
 {
     StringSink ssink;
-    auto sink = makeCompressionSink(method, ssink, parallel);
+    auto sink = makeCompressionSink(method, ssink, parallel, level);
     (*sink)(in);
     sink->finish();
     return ssink.s;
diff --git a/src/libutil/compression.hh b/src/libutil/compression.hh
index 338a0d9f2..9b1e4a9d4 100644
--- a/src/libutil/compression.hh
+++ b/src/libutil/compression.hh
@@ -19,9 +19,9 @@ ref<std::string> decompress(const std::string & method, const std::string & in);
 
 std::unique_ptr<FinishSink> makeDecompressionSink(const std::string & method, Sink & nextSink);
 
-ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false);
+ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false, int level = -1);
 
-ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false);
+ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false, int level = -1);
 
 MakeError(UnknownCompressionMethod, Error);
 
diff --git a/tests/compression-levels.sh b/tests/compression-levels.sh
new file mode 100644
index 000000000..85f12974a
--- /dev/null
+++ b/tests/compression-levels.sh
@@ -0,0 +1,22 @@
+source common.sh
+
+clearStore
+clearCache
+
+outPath=$(nix-build dependencies.nix --no-out-link)
+
+cacheURI="file://$cacheDir?compression=xz&compression-level=0"
+
+nix copy --to $cacheURI $outPath
+
+FILESIZES=$(cat ${cacheDir}/*.narinfo | awk '/FileSize: /{sum+=$2}END{print sum}')
+
+clearCache
+
+cacheURI="file://$cacheDir?compression=xz&compression-level=5"
+
+nix copy --to $cacheURI $outPath
+
+FILESIZES2=$(cat ${cacheDir}/*.narinfo | awk '/FileSize: /{sum+=$2}END{print sum}')
+
+[[ $FILESIZES -gt $FILESIZES2 ]]
diff --git a/tests/local.mk b/tests/local.mk
index b100e7f15..d88c3a875 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -33,6 +33,7 @@ nix_tests = \
   shell.sh \
   brotli.sh \
   zstd.sh \
+  compression-levels.sh \
   pure-eval.sh \
   check.sh \
   plugins.sh \