libarchive proof of concept

This commit is contained in:
Yorick van Pelt 2019-12-07 22:35:14 +07:00
parent 3b9c9d34e5
commit 9ff5f6492f
No known key found for this signature in database
GPG key ID: A36E70F9DC014A15
8 changed files with 124 additions and 27 deletions

View file

@ -18,6 +18,7 @@ SODIUM_LIBS = @SODIUM_LIBS@
LIBLZMA_LIBS = @LIBLZMA_LIBS@
SQLITE3_LIBS = @SQLITE3_LIBS@
LIBBROTLI_LIBS = @LIBBROTLI_LIBS@
LIBARCHIVE_LIBS = @LIBARCHIVE_LIBS@
EDITLINE_LIBS = @EDITLINE_LIBS@
bash = @bash@
bindir = @bindir@

View file

@ -178,6 +178,8 @@ AC_CHECK_LIB([bz2], [BZ2_bzWriteOpen], [true],
[AC_MSG_ERROR([Nix requires libbz2, which is part of bzip2. See https://web.archive.org/web/20180624184756/http://www.bzip.org/.])])
AC_CHECK_HEADERS([bzlib.h], [true],
[AC_MSG_ERROR([Nix requires libbz2, which is part of bzip2. See https://web.archive.org/web/20180624184756/http://www.bzip.org/.])])
# Checks for libarchive
PKG_CHECK_MODULES([LIBARCHIVE], [libarchive >= 3.4.0], [CXXFLAGS="$LIBARCHIVE_CFLAGS $CXXFLAGS"])
# Look for SQLite, a required dependency.
PKG_CHECK_MODULES([SQLITE3], [sqlite3 >= 3.6.19], [CXXFLAGS="$SQLITE3_CFLAGS $CXXFLAGS"])

View file

@ -49,6 +49,7 @@ rec {
[ curl
bzip2 xz brotli editline
openssl pkgconfig sqlite boehmgc
libarchive
boost
nlohmann_json
rustc cargo

View file

@ -907,7 +907,7 @@ CachedDownloadResult Downloader::downloadCached(
printInfo("unpacking '%s'...", url);
Path tmpDir = createTempDir();
AutoDelete autoDelete(tmpDir, true);
unpackTarfile(store->toRealPath(storePath), tmpDir, baseNameOf(url));
unpackTarfile(store->toRealPath(storePath), tmpDir);
auto members = readDirectory(tmpDir);
if (members.size() != 1)
throw nix::Error("tarball '%s' contains an unexpected number of top-level files", url);

View file

@ -6,6 +6,6 @@ libutil_DIR := $(d)
libutil_SOURCES := $(wildcard $(d)/*.cc)
libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS) $(BOOST_LDFLAGS) -lboost_context
libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS) $(LIBARCHIVE_LIBS) $(BOOST_LDFLAGS) -lboost_context
libutil_LIBS = libnixrust

View file

@ -1,5 +1,8 @@
#include "rust-ffi.hh"
#include "compression.hh"
#include <archive.h>
#include <archive_entry.h>
#include "finally.hh"
extern "C" {
rust::Result<std::tuple<>> *
@ -8,29 +11,123 @@ extern "C" {
namespace nix {
std::shared_ptr<struct archive> archive_read_ptr() {
return std::shared_ptr<struct archive>(archive_read_new(),
[](auto p) {
archive_read_close(p);
archive_read_free(p);
});
}
void archive_read_open_source(std::shared_ptr<struct archive> a, Source& s, unsigned int bufsize = 1024) {
std::shared_ptr<unsigned char> buffer((unsigned char*)malloc(bufsize), [](auto p) { free(p); });
typedef struct {
decltype(buffer) buf;
Source& src;
unsigned int bs;
} St;
St* state = new St({buffer, s, bufsize});
if (archive_read_open(a.get(), state,
NULL /* open */,
([] (struct archive*, void* sptr, const void** buf) -> long int {
St& s = *(static_cast<St*>(sptr));
*buf = s.buf.get();
try {
return s.src.read(s.buf.get(), s.bs);
} catch (EndOfFile &) {
return 0;
}
/* TODO: I don't know what happens if anything else is thrown here */
}), [] (struct archive*, void* sptr) {
delete static_cast<St*>(sptr);
return ARCHIVE_OK;
})) {
throw Error("archive is corrupt (%s)", archive_error_string(a.get()));
}
}
std::shared_ptr<struct archive> archive_write_ptr() {
return std::shared_ptr<struct archive>(archive_write_disk_new(),
[](auto p) {
archive_write_close(p);
archive_write_free(p);
});
}
static void copy_data(std::shared_ptr<struct archive> ar, std::shared_ptr<struct archive> aw)
{
int r;
const void *buff;
size_t size;
la_int64_t offset;
for (;;) {
r = archive_read_data_block(ar.get(), &buff, &size, &offset);
if (r == ARCHIVE_EOF) return;
if (r < ARCHIVE_OK) {
throw Error("archive is corrupt (%s)", archive_error_string(ar.get()));
}
r = archive_write_data_block(aw.get(), buff, size, offset);
if (r < ARCHIVE_OK) {
throw Error("could not write archive output (%s)", archive_error_string(aw.get()));
}
}
}
static void extract_archive(std::shared_ptr<struct archive> a, const Path & destDir) {
char * cwd = getcwd(0, 0);
if (!cwd) throw SysError("getting current directory");
Finally freeCwd([&]() { free(cwd); });
int r = chdir(destDir.c_str());
if (r != 0) throw SysError("setting directory to tar output path");
struct archive_entry *entry;
r = archive_read_next_header(a.get(), &entry);
if (r != ARCHIVE_OK) {
throw Error("archive is corrupt (%s)", archive_error_string(a.get()));
}
int flags = 0;
auto ext = archive_write_ptr();
flags |= ARCHIVE_EXTRACT_PERM;
flags |= ARCHIVE_EXTRACT_FFLAGS;
archive_write_disk_set_options(ext.get(), flags);
archive_write_disk_set_standard_lookup(ext.get());
for(;;) {
r = archive_read_next_header(a.get(), &entry);
if (r == ARCHIVE_EOF) break;
if (r == ARCHIVE_WARN) {
std::cerr << "warning: " << archive_error_string(a.get());
} else if (r < ARCHIVE_WARN) {
throw Error("archive is corrupt (%s)", archive_error_string(a.get()));
}
r = archive_write_header(ext.get(), entry);
if (r != ARCHIVE_OK) {
throw Error("could not write archive output (%s)", archive_error_string(ext.get()));
}
if (archive_entry_size(entry) > 0) {
copy_data(a, ext);
}
archive_write_finish_entry(ext.get());
}
r = chdir(cwd);
if (r != 0) throw SysError("resetting directory after archive extraction");
}
void unpackTarfile(Source & source, const Path & destDir)
{
rust::Source source2(source);
rust::CBox(unpack_tarfile(source2, destDir))->unwrap();
auto a = archive_read_ptr();
archive_read_support_filter_all(a.get());
archive_read_support_format_all(a.get());
archive_read_open_source(a, source);
createDirs(destDir);
extract_archive(a, destDir);
}
void unpackTarfile(const Path & tarFile, const Path & destDir,
std::optional<std::string> baseName)
void unpackTarfile(const Path & tarFile, const Path & destDir)
{
if (!baseName) baseName = baseNameOf(tarFile);
auto source = sinkToSource([&](Sink & sink) {
// FIXME: look at first few bytes to determine compression type.
auto decompressor =
// FIXME: add .gz support
hasSuffix(*baseName, ".bz2") ? makeDecompressionSink("bzip2", sink) :
hasSuffix(*baseName, ".xz") ? makeDecompressionSink("xz", sink) :
makeDecompressionSink("none", sink);
readFile(tarFile, *decompressor);
decompressor->finish();
});
unpackTarfile(*source, destDir);
auto a = archive_read_ptr();
archive_read_support_filter_all(a.get());
archive_read_support_format_all(a.get());
int r = archive_read_open_filename(a.get(), tarFile.c_str(), 16384);
if (r != ARCHIVE_OK) {
throw Error("archive is corrupt (%s)", archive_error_string(a.get()));
}
createDirs(destDir);
extract_archive(a, destDir);
}
}

View file

@ -4,7 +4,6 @@ namespace nix {
void unpackTarfile(Source & source, const Path & destDir);
void unpackTarfile(const Path & tarFile, const Path & destDir,
std::optional<std::string> baseName = {});
void unpackTarfile(const Path & tarFile, const Path & destDir);
}

View file

@ -190,10 +190,7 @@ static int _main(int argc, char * * argv)
printInfo("unpacking...");
Path unpacked = (Path) tmpDir + "/unpacked";
createDirs(unpacked);
if (hasSuffix(baseNameOf(uri), ".zip"))
runProgram("unzip", true, {"-qq", tmpFile, "-d", unpacked});
else
unpackTarfile(tmpFile, unpacked, baseNameOf(uri));
unpackTarfile(tmpFile, unpacked);
/* If the archive unpacks to a single file/directory, then use
that as the top-level. */