libarchive proof of concept
This commit is contained in:
parent
3b9c9d34e5
commit
9ff5f6492f
8 changed files with 124 additions and 27 deletions
|
@ -18,6 +18,7 @@ SODIUM_LIBS = @SODIUM_LIBS@
|
|||
LIBLZMA_LIBS = @LIBLZMA_LIBS@
|
||||
SQLITE3_LIBS = @SQLITE3_LIBS@
|
||||
LIBBROTLI_LIBS = @LIBBROTLI_LIBS@
|
||||
LIBARCHIVE_LIBS = @LIBARCHIVE_LIBS@
|
||||
EDITLINE_LIBS = @EDITLINE_LIBS@
|
||||
bash = @bash@
|
||||
bindir = @bindir@
|
||||
|
|
|
@ -178,6 +178,8 @@ AC_CHECK_LIB([bz2], [BZ2_bzWriteOpen], [true],
|
|||
[AC_MSG_ERROR([Nix requires libbz2, which is part of bzip2. See https://web.archive.org/web/20180624184756/http://www.bzip.org/.])])
|
||||
AC_CHECK_HEADERS([bzlib.h], [true],
|
||||
[AC_MSG_ERROR([Nix requires libbz2, which is part of bzip2. See https://web.archive.org/web/20180624184756/http://www.bzip.org/.])])
|
||||
# Checks for libarchive
|
||||
PKG_CHECK_MODULES([LIBARCHIVE], [libarchive >= 3.4.0], [CXXFLAGS="$LIBARCHIVE_CFLAGS $CXXFLAGS"])
|
||||
|
||||
# Look for SQLite, a required dependency.
|
||||
PKG_CHECK_MODULES([SQLITE3], [sqlite3 >= 3.6.19], [CXXFLAGS="$SQLITE3_CFLAGS $CXXFLAGS"])
|
||||
|
|
|
@ -49,6 +49,7 @@ rec {
|
|||
[ curl
|
||||
bzip2 xz brotli editline
|
||||
openssl pkgconfig sqlite boehmgc
|
||||
libarchive
|
||||
boost
|
||||
nlohmann_json
|
||||
rustc cargo
|
||||
|
|
|
@ -907,7 +907,7 @@ CachedDownloadResult Downloader::downloadCached(
|
|||
printInfo("unpacking '%s'...", url);
|
||||
Path tmpDir = createTempDir();
|
||||
AutoDelete autoDelete(tmpDir, true);
|
||||
unpackTarfile(store->toRealPath(storePath), tmpDir, baseNameOf(url));
|
||||
unpackTarfile(store->toRealPath(storePath), tmpDir);
|
||||
auto members = readDirectory(tmpDir);
|
||||
if (members.size() != 1)
|
||||
throw nix::Error("tarball '%s' contains an unexpected number of top-level files", url);
|
||||
|
|
|
@ -6,6 +6,6 @@ libutil_DIR := $(d)
|
|||
|
||||
libutil_SOURCES := $(wildcard $(d)/*.cc)
|
||||
|
||||
libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS) $(BOOST_LDFLAGS) -lboost_context
|
||||
libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS) $(LIBARCHIVE_LIBS) $(BOOST_LDFLAGS) -lboost_context
|
||||
|
||||
libutil_LIBS = libnixrust
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
#include "rust-ffi.hh"
|
||||
#include "compression.hh"
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
#include "finally.hh"
|
||||
|
||||
extern "C" {
|
||||
rust::Result<std::tuple<>> *
|
||||
|
@ -8,29 +11,123 @@ extern "C" {
|
|||
|
||||
namespace nix {
|
||||
|
||||
std::shared_ptr<struct archive> archive_read_ptr() {
|
||||
return std::shared_ptr<struct archive>(archive_read_new(),
|
||||
[](auto p) {
|
||||
archive_read_close(p);
|
||||
archive_read_free(p);
|
||||
});
|
||||
}
|
||||
void archive_read_open_source(std::shared_ptr<struct archive> a, Source& s, unsigned int bufsize = 1024) {
|
||||
std::shared_ptr<unsigned char> buffer((unsigned char*)malloc(bufsize), [](auto p) { free(p); });
|
||||
typedef struct {
|
||||
decltype(buffer) buf;
|
||||
Source& src;
|
||||
unsigned int bs;
|
||||
} St;
|
||||
St* state = new St({buffer, s, bufsize});
|
||||
if (archive_read_open(a.get(), state,
|
||||
NULL /* open */,
|
||||
([] (struct archive*, void* sptr, const void** buf) -> long int {
|
||||
St& s = *(static_cast<St*>(sptr));
|
||||
*buf = s.buf.get();
|
||||
try {
|
||||
return s.src.read(s.buf.get(), s.bs);
|
||||
} catch (EndOfFile &) {
|
||||
return 0;
|
||||
}
|
||||
/* TODO: I don't know what happens if anything else is thrown here */
|
||||
}), [] (struct archive*, void* sptr) {
|
||||
delete static_cast<St*>(sptr);
|
||||
return ARCHIVE_OK;
|
||||
})) {
|
||||
throw Error("archive is corrupt (%s)", archive_error_string(a.get()));
|
||||
}
|
||||
}
|
||||
std::shared_ptr<struct archive> archive_write_ptr() {
|
||||
return std::shared_ptr<struct archive>(archive_write_disk_new(),
|
||||
[](auto p) {
|
||||
archive_write_close(p);
|
||||
archive_write_free(p);
|
||||
});
|
||||
}
|
||||
static void copy_data(std::shared_ptr<struct archive> ar, std::shared_ptr<struct archive> aw)
|
||||
{
|
||||
int r;
|
||||
const void *buff;
|
||||
size_t size;
|
||||
la_int64_t offset;
|
||||
|
||||
for (;;) {
|
||||
r = archive_read_data_block(ar.get(), &buff, &size, &offset);
|
||||
if (r == ARCHIVE_EOF) return;
|
||||
if (r < ARCHIVE_OK) {
|
||||
throw Error("archive is corrupt (%s)", archive_error_string(ar.get()));
|
||||
}
|
||||
r = archive_write_data_block(aw.get(), buff, size, offset);
|
||||
if (r < ARCHIVE_OK) {
|
||||
throw Error("could not write archive output (%s)", archive_error_string(aw.get()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void extract_archive(std::shared_ptr<struct archive> a, const Path & destDir) {
|
||||
char * cwd = getcwd(0, 0);
|
||||
if (!cwd) throw SysError("getting current directory");
|
||||
Finally freeCwd([&]() { free(cwd); });
|
||||
int r = chdir(destDir.c_str());
|
||||
if (r != 0) throw SysError("setting directory to tar output path");
|
||||
struct archive_entry *entry;
|
||||
r = archive_read_next_header(a.get(), &entry);
|
||||
if (r != ARCHIVE_OK) {
|
||||
throw Error("archive is corrupt (%s)", archive_error_string(a.get()));
|
||||
}
|
||||
int flags = 0;
|
||||
auto ext = archive_write_ptr();
|
||||
flags |= ARCHIVE_EXTRACT_PERM;
|
||||
flags |= ARCHIVE_EXTRACT_FFLAGS;
|
||||
archive_write_disk_set_options(ext.get(), flags);
|
||||
archive_write_disk_set_standard_lookup(ext.get());
|
||||
for(;;) {
|
||||
r = archive_read_next_header(a.get(), &entry);
|
||||
if (r == ARCHIVE_EOF) break;
|
||||
if (r == ARCHIVE_WARN) {
|
||||
std::cerr << "warning: " << archive_error_string(a.get());
|
||||
} else if (r < ARCHIVE_WARN) {
|
||||
throw Error("archive is corrupt (%s)", archive_error_string(a.get()));
|
||||
}
|
||||
r = archive_write_header(ext.get(), entry);
|
||||
if (r != ARCHIVE_OK) {
|
||||
throw Error("could not write archive output (%s)", archive_error_string(ext.get()));
|
||||
}
|
||||
if (archive_entry_size(entry) > 0) {
|
||||
copy_data(a, ext);
|
||||
}
|
||||
archive_write_finish_entry(ext.get());
|
||||
}
|
||||
r = chdir(cwd);
|
||||
if (r != 0) throw SysError("resetting directory after archive extraction");
|
||||
}
|
||||
void unpackTarfile(Source & source, const Path & destDir)
|
||||
{
|
||||
rust::Source source2(source);
|
||||
rust::CBox(unpack_tarfile(source2, destDir))->unwrap();
|
||||
auto a = archive_read_ptr();
|
||||
archive_read_support_filter_all(a.get());
|
||||
archive_read_support_format_all(a.get());
|
||||
archive_read_open_source(a, source);
|
||||
createDirs(destDir);
|
||||
extract_archive(a, destDir);
|
||||
}
|
||||
|
||||
void unpackTarfile(const Path & tarFile, const Path & destDir,
|
||||
std::optional<std::string> baseName)
|
||||
void unpackTarfile(const Path & tarFile, const Path & destDir)
|
||||
{
|
||||
if (!baseName) baseName = baseNameOf(tarFile);
|
||||
|
||||
auto source = sinkToSource([&](Sink & sink) {
|
||||
// FIXME: look at first few bytes to determine compression type.
|
||||
auto decompressor =
|
||||
// FIXME: add .gz support
|
||||
hasSuffix(*baseName, ".bz2") ? makeDecompressionSink("bzip2", sink) :
|
||||
hasSuffix(*baseName, ".xz") ? makeDecompressionSink("xz", sink) :
|
||||
makeDecompressionSink("none", sink);
|
||||
readFile(tarFile, *decompressor);
|
||||
decompressor->finish();
|
||||
});
|
||||
|
||||
unpackTarfile(*source, destDir);
|
||||
auto a = archive_read_ptr();
|
||||
archive_read_support_filter_all(a.get());
|
||||
archive_read_support_format_all(a.get());
|
||||
int r = archive_read_open_filename(a.get(), tarFile.c_str(), 16384);
|
||||
if (r != ARCHIVE_OK) {
|
||||
throw Error("archive is corrupt (%s)", archive_error_string(a.get()));
|
||||
}
|
||||
createDirs(destDir);
|
||||
extract_archive(a, destDir);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@ namespace nix {
|
|||
|
||||
void unpackTarfile(Source & source, const Path & destDir);
|
||||
|
||||
void unpackTarfile(const Path & tarFile, const Path & destDir,
|
||||
std::optional<std::string> baseName = {});
|
||||
void unpackTarfile(const Path & tarFile, const Path & destDir);
|
||||
|
||||
}
|
||||
|
|
|
@ -190,10 +190,7 @@ static int _main(int argc, char * * argv)
|
|||
printInfo("unpacking...");
|
||||
Path unpacked = (Path) tmpDir + "/unpacked";
|
||||
createDirs(unpacked);
|
||||
if (hasSuffix(baseNameOf(uri), ".zip"))
|
||||
runProgram("unzip", true, {"-qq", tmpFile, "-d", unpacked});
|
||||
else
|
||||
unpackTarfile(tmpFile, unpacked, baseNameOf(uri));
|
||||
unpackTarfile(tmpFile, unpacked);
|
||||
|
||||
/* If the archive unpacks to a single file/directory, then use
|
||||
that as the top-level. */
|
||||
|
|
Loading…
Reference in a new issue