decompress(): Use a Source and Sink
This allows decompression to happen in O(1) memory.
This commit is contained in:
parent
64441f0551
commit
3e6b194d78
6 changed files with 210 additions and 91 deletions
|
@ -203,22 +203,18 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink)
|
|||
stats.narRead++;
|
||||
stats.narReadCompressedBytes += nar->size();
|
||||
|
||||
/* Decompress the NAR. FIXME: would be nice to have the remote
|
||||
side do this. */
|
||||
try {
|
||||
nar = decompress(info->compression, *nar);
|
||||
} catch (UnknownCompressionMethod &) {
|
||||
throw Error(format("binary cache path '%s' uses unknown compression method '%s'")
|
||||
% storePath % info->compression);
|
||||
}
|
||||
uint64_t narSize = 0;
|
||||
|
||||
stats.narReadBytes += nar->size();
|
||||
StringSource source(*nar);
|
||||
|
||||
printMsg(lvlTalkative, format("exporting path '%1%' (%2% bytes)") % storePath % nar->size());
|
||||
LambdaSink wrapperSink([&](const unsigned char * data, size_t len) {
|
||||
sink(data, len);
|
||||
narSize += len;
|
||||
});
|
||||
|
||||
assert(nar->size() % 8 == 0);
|
||||
decompress(info->compression, source, wrapperSink);
|
||||
|
||||
sink((unsigned char *) nar->c_str(), nar->size());
|
||||
stats.narReadBytes += narSize;
|
||||
}
|
||||
|
||||
void BinaryCacheStore::queryPathInfoUncached(const Path & storePath,
|
||||
|
|
|
@ -17,7 +17,23 @@
|
|||
|
||||
namespace nix {
|
||||
|
||||
static ref<std::string> decompressXZ(const std::string & in)
|
||||
static const size_t bufSize = 32 * 1024;
|
||||
|
||||
static void decompressNone(Source & source, Sink & sink)
|
||||
{
|
||||
std::vector<unsigned char> buf(bufSize);
|
||||
while (true) {
|
||||
size_t n;
|
||||
try {
|
||||
n = source.read(buf.data(), buf.size());
|
||||
} catch (EndOfFile &) {
|
||||
break;
|
||||
}
|
||||
sink(buf.data(), n);
|
||||
}
|
||||
}
|
||||
|
||||
static void decompressXZ(Source & source, Sink & sink)
|
||||
{
|
||||
lzma_stream strm(LZMA_STREAM_INIT);
|
||||
|
||||
|
@ -29,36 +45,44 @@ static ref<std::string> decompressXZ(const std::string & in)
|
|||
Finally free([&]() { lzma_end(&strm); });
|
||||
|
||||
lzma_action action = LZMA_RUN;
|
||||
uint8_t outbuf[BUFSIZ];
|
||||
ref<std::string> res = make_ref<std::string>();
|
||||
strm.next_in = (uint8_t *) in.c_str();
|
||||
strm.avail_in = in.size();
|
||||
strm.next_out = outbuf;
|
||||
strm.avail_out = sizeof(outbuf);
|
||||
std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
|
||||
strm.next_in = nullptr;
|
||||
strm.avail_in = 0;
|
||||
strm.next_out = outbuf.data();
|
||||
strm.avail_out = outbuf.size();
|
||||
bool eof = false;
|
||||
|
||||
while (true) {
|
||||
checkInterrupt();
|
||||
|
||||
if (strm.avail_in == 0 && !eof) {
|
||||
strm.next_in = inbuf.data();
|
||||
try {
|
||||
strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
|
||||
} catch (EndOfFile &) {
|
||||
eof = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (strm.avail_in == 0)
|
||||
action = LZMA_FINISH;
|
||||
|
||||
lzma_ret ret = lzma_code(&strm, action);
|
||||
|
||||
if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
|
||||
res->append((char *) outbuf, sizeof(outbuf) - strm.avail_out);
|
||||
strm.next_out = outbuf;
|
||||
strm.avail_out = sizeof(outbuf);
|
||||
if (strm.avail_out < outbuf.size()) {
|
||||
sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
|
||||
strm.next_out = outbuf.data();
|
||||
strm.avail_out = outbuf.size();
|
||||
}
|
||||
|
||||
if (ret == LZMA_STREAM_END)
|
||||
return res;
|
||||
if (ret == LZMA_STREAM_END) return;
|
||||
|
||||
if (ret != LZMA_OK)
|
||||
throw CompressionError("error %d while decompressing xz file", ret);
|
||||
}
|
||||
}
|
||||
|
||||
static ref<std::string> decompressBzip2(const std::string & in)
|
||||
static void decompressBzip2(Source & source, Sink & sink)
|
||||
{
|
||||
bz_stream strm;
|
||||
memset(&strm, 0, sizeof(strm));
|
||||
|
@ -69,39 +93,50 @@ static ref<std::string> decompressBzip2(const std::string & in)
|
|||
|
||||
Finally free([&]() { BZ2_bzDecompressEnd(&strm); });
|
||||
|
||||
char outbuf[BUFSIZ];
|
||||
ref<std::string> res = make_ref<std::string>();
|
||||
strm.next_in = (char *) in.c_str();
|
||||
strm.avail_in = in.size();
|
||||
strm.next_out = outbuf;
|
||||
strm.avail_out = sizeof(outbuf);
|
||||
std::vector<char> inbuf(bufSize), outbuf(bufSize);
|
||||
strm.next_in = nullptr;
|
||||
strm.avail_in = 0;
|
||||
strm.next_out = outbuf.data();
|
||||
strm.avail_out = outbuf.size();
|
||||
bool eof = false;
|
||||
|
||||
while (true) {
|
||||
checkInterrupt();
|
||||
|
||||
int ret = BZ2_bzDecompress(&strm);
|
||||
|
||||
if (strm.avail_out == 0 || ret == BZ_STREAM_END) {
|
||||
res->append(outbuf, sizeof(outbuf) - strm.avail_out);
|
||||
strm.next_out = outbuf;
|
||||
strm.avail_out = sizeof(outbuf);
|
||||
if (strm.avail_in == 0 && !eof) {
|
||||
strm.next_in = inbuf.data();
|
||||
try {
|
||||
strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
|
||||
} catch (EndOfFile &) {
|
||||
eof = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == BZ_STREAM_END)
|
||||
return res;
|
||||
int ret = BZ2_bzDecompress(&strm);
|
||||
|
||||
if (strm.avail_in == 0 && strm.avail_out == outbuf.size() && eof)
|
||||
throw CompressionError("bzip2 data ends prematurely");
|
||||
|
||||
if (strm.avail_out < outbuf.size()) {
|
||||
sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
|
||||
strm.next_out = outbuf.data();
|
||||
strm.avail_out = outbuf.size();
|
||||
}
|
||||
|
||||
if (ret == BZ_STREAM_END) return;
|
||||
|
||||
if (ret != BZ_OK)
|
||||
throw CompressionError("error while decompressing bzip2 file");
|
||||
|
||||
if (strm.avail_in == 0)
|
||||
throw CompressionError("bzip2 data ends prematurely");
|
||||
}
|
||||
}
|
||||
|
||||
static ref<std::string> decompressBrotli(const std::string & in)
|
||||
static void decompressBrotli(Source & source, Sink & sink)
|
||||
{
|
||||
#if !HAVE_BROTLI
|
||||
return make_ref<std::string>(runProgram(BROTLI, true, {"-d"}, {in}));
|
||||
RunOptions options(BROTLI, {"-d"});
|
||||
options.stdin = &source;
|
||||
options.stdout = &sink;
|
||||
runProgram2(options);
|
||||
#else
|
||||
auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
|
||||
if (!s)
|
||||
|
@ -109,16 +144,26 @@ static ref<std::string> decompressBrotli(const std::string & in)
|
|||
|
||||
Finally free([s]() { BrotliDecoderDestroyInstance(s); });
|
||||
|
||||
uint8_t outbuf[BUFSIZ];
|
||||
ref<std::string> res = make_ref<std::string>();
|
||||
const uint8_t *next_in = (uint8_t *)in.c_str();
|
||||
size_t avail_in = in.size();
|
||||
uint8_t *next_out = outbuf;
|
||||
size_t avail_out = sizeof(outbuf);
|
||||
std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
|
||||
const uint8_t * next_in = nullptr;
|
||||
size_t avail_in = 0;
|
||||
bool eof = false;
|
||||
|
||||
while (true) {
|
||||
checkInterrupt();
|
||||
|
||||
if (avail_in == 0 && !eof) {
|
||||
next_in = inbuf.data();
|
||||
try {
|
||||
avail_in = source.read((unsigned char *) next_in, inbuf.size());
|
||||
} catch (EndOfFile &) {
|
||||
eof = true;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t * next_out = outbuf.data();
|
||||
size_t avail_out = outbuf.size();
|
||||
|
||||
auto ret = BrotliDecoderDecompressStream(s,
|
||||
&avail_in, &next_in,
|
||||
&avail_out, &next_out,
|
||||
|
@ -128,51 +173,49 @@ static ref<std::string> decompressBrotli(const std::string & in)
|
|||
case BROTLI_DECODER_RESULT_ERROR:
|
||||
throw CompressionError("error while decompressing brotli file");
|
||||
case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
|
||||
throw CompressionError("incomplete or corrupt brotli file");
|
||||
if (eof)
|
||||
throw CompressionError("incomplete or corrupt brotli file");
|
||||
break;
|
||||
case BROTLI_DECODER_RESULT_SUCCESS:
|
||||
if (avail_in != 0)
|
||||
throw CompressionError("unexpected input after brotli decompression");
|
||||
break;
|
||||
case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
|
||||
// I'm not sure if this can happen, but abort if this happens with empty buffer
|
||||
if (avail_out == sizeof(outbuf))
|
||||
if (avail_out == outbuf.size())
|
||||
throw CompressionError("brotli decompression requires larger buffer");
|
||||
break;
|
||||
}
|
||||
|
||||
// Always ensure we have full buffer for next invocation
|
||||
if (avail_out < sizeof(outbuf)) {
|
||||
res->append((char*)outbuf, sizeof(outbuf) - avail_out);
|
||||
next_out = outbuf;
|
||||
avail_out = sizeof(outbuf);
|
||||
}
|
||||
if (avail_out < outbuf.size())
|
||||
sink((unsigned char *) outbuf.data(), outbuf.size() - avail_out);
|
||||
|
||||
if (ret == BROTLI_DECODER_RESULT_SUCCESS) return res;
|
||||
if (ret == BROTLI_DECODER_RESULT_SUCCESS) return;
|
||||
}
|
||||
#endif // HAVE_BROTLI
|
||||
}
|
||||
|
||||
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel)
|
||||
{
|
||||
StringSink ssink;
|
||||
auto sink = makeCompressionSink(method, ssink, parallel);
|
||||
(*sink)(in);
|
||||
sink->finish();
|
||||
return ssink.s;
|
||||
}
|
||||
|
||||
ref<std::string> decompress(const std::string & method, const std::string & in)
|
||||
{
|
||||
StringSource source(in);
|
||||
StringSink sink;
|
||||
decompress(method, source, sink);
|
||||
return sink.s;
|
||||
}
|
||||
|
||||
void decompress(const std::string & method, Source & source, Sink & sink)
|
||||
{
|
||||
if (method == "none")
|
||||
return make_ref<std::string>(in);
|
||||
return decompressNone(source, sink);
|
||||
else if (method == "xz")
|
||||
return decompressXZ(in);
|
||||
return decompressXZ(source, sink);
|
||||
else if (method == "bzip2")
|
||||
return decompressBzip2(in);
|
||||
return decompressBzip2(source, sink);
|
||||
else if (method == "br")
|
||||
return decompressBrotli(in);
|
||||
return decompressBrotli(source, sink);
|
||||
else
|
||||
throw UnknownCompressionMethod(format("unknown compression method '%s'") % method);
|
||||
throw UnknownCompressionMethod("unknown compression method '%s'", method);
|
||||
}
|
||||
|
||||
struct NoneSink : CompressionSink
|
||||
|
@ -499,4 +542,13 @@ ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & next
|
|||
throw UnknownCompressionMethod(format("unknown compression method '%s'") % method);
|
||||
}
|
||||
|
||||
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel)
|
||||
{
|
||||
StringSink ssink;
|
||||
auto sink = makeCompressionSink(method, ssink, parallel);
|
||||
(*sink)(in);
|
||||
sink->finish();
|
||||
return ssink.s;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -8,10 +8,12 @@
|
|||
|
||||
namespace nix {
|
||||
|
||||
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false);
|
||||
|
||||
ref<std::string> decompress(const std::string & method, const std::string & in);
|
||||
|
||||
void decompress(const std::string & method, Source & source, Sink & sink);
|
||||
|
||||
ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false);
|
||||
|
||||
struct CompressionSink : BufferedSink
|
||||
{
|
||||
virtual void finish() = 0;
|
||||
|
|
|
@ -56,7 +56,7 @@ struct Source
|
|||
void operator () (unsigned char * data, size_t len);
|
||||
|
||||
/* Store up to ‘len’ in the buffer pointed to by ‘data’, and
|
||||
return the number of bytes stored. If blocks until at least
|
||||
return the number of bytes stored. It blocks until at least
|
||||
one byte is available. */
|
||||
virtual size_t read(unsigned char * data, size_t len) = 0;
|
||||
|
||||
|
@ -175,6 +175,22 @@ struct TeeSource : Source
|
|||
};
|
||||
|
||||
|
||||
/* Convert a function into a sink. */
|
||||
struct LambdaSink : Sink
|
||||
{
|
||||
typedef std::function<void(const unsigned char *, size_t)> lambda_t;
|
||||
|
||||
lambda_t lambda;
|
||||
|
||||
LambdaSink(const lambda_t & lambda) : lambda(lambda) { }
|
||||
|
||||
virtual void operator () (const unsigned char * data, size_t len)
|
||||
{
|
||||
lambda(data, len);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void writePadding(size_t len, Sink & sink);
|
||||
void writeString(const unsigned char * buf, size_t len, Sink & sink);
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "affinity.hh"
|
||||
#include "sync.hh"
|
||||
#include "finally.hh"
|
||||
#include "serialise.hh"
|
||||
|
||||
#include <cctype>
|
||||
#include <cerrno>
|
||||
|
@ -568,19 +569,25 @@ void writeFull(int fd, const string & s, bool allowInterrupts)
|
|||
|
||||
string drainFD(int fd)
|
||||
{
|
||||
string result;
|
||||
unsigned char buffer[4096];
|
||||
StringSink sink;
|
||||
drainFD(fd, sink);
|
||||
return std::move(*sink.s);
|
||||
}
|
||||
|
||||
|
||||
void drainFD(int fd, Sink & sink)
|
||||
{
|
||||
std::vector<unsigned char> buf(4096);
|
||||
while (1) {
|
||||
checkInterrupt();
|
||||
ssize_t rd = read(fd, buffer, sizeof buffer);
|
||||
ssize_t rd = read(fd, buf.data(), buf.size());
|
||||
if (rd == -1) {
|
||||
if (errno != EINTR)
|
||||
throw SysError("reading from file");
|
||||
}
|
||||
else if (rd == 0) break;
|
||||
else result.append((char *) buffer, rd);
|
||||
else sink(buf.data(), rd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
@ -920,20 +927,47 @@ string runProgram(Path program, bool searchPath, const Strings & args,
|
|||
return res.second;
|
||||
}
|
||||
|
||||
std::pair<int, std::string> runProgram(const RunOptions & options)
|
||||
std::pair<int, std::string> runProgram(const RunOptions & options_)
|
||||
{
|
||||
RunOptions options(options_);
|
||||
StringSink sink;
|
||||
options.stdout = &sink;
|
||||
|
||||
int status = 0;
|
||||
|
||||
try {
|
||||
runProgram2(options);
|
||||
} catch (ExecError & e) {
|
||||
status = e.status;
|
||||
}
|
||||
|
||||
return {status, std::move(*sink.s)};
|
||||
}
|
||||
|
||||
void runProgram2(const RunOptions & options)
|
||||
{
|
||||
checkInterrupt();
|
||||
|
||||
assert(!(options.stdin && options.input));
|
||||
|
||||
std::unique_ptr<Source> source_;
|
||||
Source * source = options.stdin;
|
||||
|
||||
if (options.input) {
|
||||
source_ = std::make_unique<StringSource>(*options.input);
|
||||
source = source_.get();
|
||||
}
|
||||
|
||||
/* Create a pipe. */
|
||||
Pipe out, in;
|
||||
out.create();
|
||||
if (options.input) in.create();
|
||||
if (options.stdout) out.create();
|
||||
if (source) in.create();
|
||||
|
||||
/* Fork. */
|
||||
Pid pid = startProcess([&]() {
|
||||
if (dup2(out.writeSide.get(), STDOUT_FILENO) == -1)
|
||||
if (options.stdout && dup2(out.writeSide.get(), STDOUT_FILENO) == -1)
|
||||
throw SysError("dupping stdout");
|
||||
if (options.input && dup2(in.readSide.get(), STDIN_FILENO) == -1)
|
||||
if (source && dup2(in.readSide.get(), STDIN_FILENO) == -1)
|
||||
throw SysError("dupping stdin");
|
||||
|
||||
Strings args_(options.args);
|
||||
|
@ -961,11 +995,20 @@ std::pair<int, std::string> runProgram(const RunOptions & options)
|
|||
});
|
||||
|
||||
|
||||
if (options.input) {
|
||||
if (source) {
|
||||
in.readSide = -1;
|
||||
writerThread = std::thread([&]() {
|
||||
try {
|
||||
writeFull(in.writeSide.get(), *options.input);
|
||||
std::vector<unsigned char> buf(8 * 1024);
|
||||
while (true) {
|
||||
size_t n;
|
||||
try {
|
||||
n = source->read(buf.data(), buf.size());
|
||||
} catch (EndOfFile &) {
|
||||
break;
|
||||
}
|
||||
writeFull(in.writeSide.get(), buf.data(), n);
|
||||
}
|
||||
promise.set_value();
|
||||
} catch (...) {
|
||||
promise.set_exception(std::current_exception());
|
||||
|
@ -974,15 +1017,17 @@ std::pair<int, std::string> runProgram(const RunOptions & options)
|
|||
});
|
||||
}
|
||||
|
||||
string result = drainFD(out.readSide.get());
|
||||
if (options.stdout)
|
||||
drainFD(out.readSide.get(), *options.stdout);
|
||||
|
||||
/* Wait for the child to finish. */
|
||||
int status = pid.wait();
|
||||
|
||||
/* Wait for the writer thread to finish. */
|
||||
if (options.input) promise.get_future().get();
|
||||
if (source) promise.get_future().get();
|
||||
|
||||
return {status, result};
|
||||
if (status)
|
||||
throw ExecError(status, fmt("program '%1%' %2%", options.program, statusToString(status)));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -25,6 +25,9 @@
|
|||
|
||||
namespace nix {
|
||||
|
||||
struct Sink;
|
||||
struct Source;
|
||||
|
||||
|
||||
/* Return an environment variable. */
|
||||
string getEnv(const string & key, const string & def = "");
|
||||
|
@ -150,6 +153,7 @@ MakeError(EndOfFile, Error)
|
|||
/* Read a file descriptor until EOF occurs. */
|
||||
string drainFD(int fd);
|
||||
|
||||
void drainFD(int fd, Sink & sink);
|
||||
|
||||
|
||||
/* Automatic cleanup of resources. */
|
||||
|
@ -256,6 +260,8 @@ struct RunOptions
|
|||
bool searchPath = true;
|
||||
Strings args;
|
||||
std::experimental::optional<std::string> input;
|
||||
Source * stdin = nullptr;
|
||||
Sink * stdout = nullptr;
|
||||
bool _killStderr = false;
|
||||
|
||||
RunOptions(const Path & program, const Strings & args)
|
||||
|
@ -266,6 +272,8 @@ struct RunOptions
|
|||
|
||||
std::pair<int, std::string> runProgram(const RunOptions & options);
|
||||
|
||||
void runProgram2(const RunOptions & options);
|
||||
|
||||
|
||||
class ExecError : public Error
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue