libexpr: store lazy string lengths

Splitup the internal rep of string to allow for storing string lengths
without hurting overall eval performance. If the length is not known at
the time the value is created, it will be cached after the first call to
str(). This fixes O(n) substring, O(n^2) stringToCharacters, etc.

Change-Id: I10a6f16544f6619b0726f77b617acd517ee87a00
This commit is contained in:
Mel Zuser 2024-05-15 20:38:02 -07:00
parent 1eff2adc93
commit a9f026f8b1
3 changed files with 127 additions and 35 deletions

View file

@ -94,6 +94,14 @@ static const char * makeImmutableString(std::string_view s)
return t; return t;
} }
static Value::StringMeta * makeStringMeta(size_t size, const char * * context)
{
auto meta = (Value::StringMeta *) allocBytes(sizeof(Value::StringMeta));
meta->size = size;
meta->context = context;
return meta;
}
RootValue allocRootValue(Value * v) RootValue allocRootValue(Value * v)
{ {
@ -157,7 +165,10 @@ std::string showType(const Value & v)
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wswitch-enum" #pragma GCC diagnostic ignored "-Wswitch-enum"
switch (v.internalType) { switch (v.internalType) {
case tString: return v.string.context ? "a string with context" : "a string"; case tStringEmpty: return v.stringContext() ? "a string with context" : "a string";
case tStringUnknownSize: return "a string";
case tStringKnownSize: return "a string";
case tStringWithContext: return "a string with context";
case tPrimOp: case tPrimOp:
return fmt("the built-in function '%s'", std::string(v.primOp->name)); return fmt("the built-in function '%s'", std::string(v.primOp->name));
case tPrimOpApp: case tPrimOpApp:
@ -877,34 +888,41 @@ DebugTraceStacker::DebugTraceStacker(EvalState & evalState, DebugTrace t)
evalState.runDebugRepl(nullptr, trace.env, trace.expr); evalState.runDebugRepl(nullptr, trace.env, trace.expr);
} }
void Value::mkString(std::string_view s) void Value::mkString(const char * s, size_t size, const char * * context)
{ {
mkString(makeImmutableString(s));
if (size == 0) {
mkEmptyString(context);
} else if (! context) {
mkStringKnownSize(s, size);
} else {
mkStringWithContext(s, makeStringMeta(size, context));
}
} }
void Value::mkString(std::string_view s)
static void copyContextToValue(Value & v, const NixStringContext & context)
{ {
if (!context.empty()) { mkString(makeImmutableString(s), s.size());
size_t n = 0;
v.string.context = (const char * *)
allocBytes((context.size() + 1) * sizeof(char *));
for (auto & i : context)
v.string.context[n++] = dupString(i.to_string().c_str());
v.string.context[n] = 0;
}
} }
void Value::mkString(std::string_view s, const NixStringContext & context) void Value::mkString(std::string_view s, const NixStringContext & context)
{ {
mkString(s); mkStringMove(makeImmutableString(s), s.size(), context);
copyContextToValue(*this, context);
} }
void Value::mkStringMove(const char * s, const NixStringContext & context) void Value::mkStringMove(const char * s, size_t size, const NixStringContext & context)
{ {
mkString(s); if (context.empty())
copyContextToValue(*this, context); mkString(s, size);
else {
auto vContext = (const char * *) allocBytes((context.size() + 1) * sizeof(char *));
mkString(s, size, vContext);
size_t n = 0;
for (auto & i : context)
vContext[n++] = dupString(i.to_string().c_str());
vContext[n] = 0;
}
} }
@ -2091,8 +2109,9 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v)
if (!context.empty()) if (!context.empty())
state.error<EvalError>("a string that refers to a store path cannot be appended to a path").atPos(pos).withFrame(env, *this).debugThrow(); state.error<EvalError>("a string that refers to a store path cannot be appended to a path").atPos(pos).withFrame(env, *this).debugThrow();
v.mkPath(CanonPath(canonPath(str()))); v.mkPath(CanonPath(canonPath(str())));
} else } else {
v.mkStringMove(c_str(), context); v.mkStringMove(c_str(), sSize, context);
}
} }

View file

@ -3740,7 +3740,7 @@ static void prim_substring(EvalState & state, const PosIdx pos, Value * * args,
if (len == 0) { if (len == 0) {
state.forceValue(*args[2], pos); state.forceValue(*args[2], pos);
if (args[2]->type() == nString) { if (args[2]->type() == nString) {
v.mkString("", args[2]->stringContext()); v.mkEmptyString(args[2]->stringContext());
return; return;
} }
} }

View file

@ -23,7 +23,10 @@ class BindingsBuilder;
typedef enum { typedef enum {
tInt = 1, tInt = 1,
tBool, tBool,
tString, tStringEmpty,
tStringUnknownSize,
tStringKnownSize,
tStringWithContext,
tPath, tPath,
tNull, tNull,
tAttrs, tAttrs,
@ -159,6 +162,13 @@ public:
inline bool isPrimOp() const { return internalType == tPrimOp; }; inline bool isPrimOp() const { return internalType == tPrimOp; };
inline bool isPrimOpApp() const { return internalType == tPrimOpApp; }; inline bool isPrimOpApp() const { return internalType == tPrimOpApp; };
// Widening Value kills eval performace so we use an extra indirection
// to carry more metadata.
struct StringMeta {
size_t size;
const char * * context; // must be in sorted order, see note below
};
union union
{ {
NixInt integer; NixInt integer;
@ -186,10 +196,29 @@ public:
* For canonicity, the store paths should be in sorted order. * For canonicity, the store paths should be in sorted order.
*/ */
// When a string is empty we can store the context directly.
struct {
const char * * context;
} emptyString;
// When the context is empty, we can use the InternalType
// to be lazy about calculating the size of the string.
struct { struct {
const char * s; const char * s;
const char * * context; // must be in sorted order } stringUnknownSize;
} string; struct {
const char * s;
size_t size;
} stringKnownSize;
// We happen to always have a size available when we're
// constucting a string with context. If this changes
// use the same trick as for strings without context.
struct {
const char * s;
const StringMeta * meta;
} stringWithContext;
const char * _path; const char * _path;
Bindings * attrs; Bindings * attrs;
@ -229,7 +258,7 @@ public:
switch (internalType) { switch (internalType) {
case tInt: return nInt; case tInt: return nInt;
case tBool: return nBool; case tBool: return nBool;
case tString: return nString; case tStringEmpty: case tStringUnknownSize: case tStringKnownSize: case tStringWithContext: return nString;
case tPath: return nPath; case tPath: return nPath;
case tNull: return nNull; case tNull: return nNull;
case tAttrs: return nAttrs; case tAttrs: return nAttrs;
@ -268,22 +297,55 @@ public:
boolean = b; boolean = b;
} }
inline void mkString(const char * s, const char * * context = 0) inline void mkEmptyString(const char * * context = 0)
{ {
internalType = tString; clearValue();
string.s = s; internalType = tStringEmpty;
string.context = context; emptyString.context = context;
}
inline void mkStringUnknownSize(const char * s)
{
clearValue();
internalType = tStringUnknownSize;
stringUnknownSize.s = s;
}
inline void mkStringKnownSize(const char * s, size_t size)
{
internalType = tStringKnownSize;
stringKnownSize.s = s;
stringKnownSize.size = size;
}
inline void mkStringWithContext(const char * s, StringMeta * meta)
{
internalType = tStringWithContext;
stringWithContext.s = s;
stringWithContext.meta = meta;
}
void mkString(const char * s, size_t size, const char * * context = 0);
// Don't change this proto. You'll get upcast to string_view and kill the gc.
inline void mkString(const char * s)
{
if (s[0] == '\0')
mkEmptyString();
else
mkStringUnknownSize(s);
} }
void mkString(std::string_view s); void mkString(std::string_view s);
void mkString(std::string_view s, const NixStringContext & context); void mkString(std::string_view s, const NixStringContext & context);
void mkStringMove(const char * s, const NixStringContext & context); void mkStringMove(const char * s, size_t size, const NixStringContext & context);
inline void mkString(const Symbol & s) inline void mkString(const Symbol & sym)
{ {
mkString(((const std::string &) s).c_str()); auto s = (const std::string &) sym;
mkString(s.c_str(), s.size());
} }
void mkPath(const SourcePath & path); void mkPath(const SourcePath & path);
@ -444,7 +506,10 @@ public:
const char * c_str() const const char * c_str() const
{ {
switch(internalType) { switch(internalType) {
case tString: return string.s; case tStringEmpty: return "";
case tStringUnknownSize: return stringUnknownSize.s;
case tStringKnownSize: return stringKnownSize.s;
case tStringWithContext: return stringWithContext.s;
default: abort(); default: abort();
} }
} }
@ -452,7 +517,12 @@ public:
std::string_view str() std::string_view str()
{ {
switch(internalType) { switch(internalType) {
case tString: return std::string_view(string.s); case tStringEmpty: return std::string_view("");
case tStringUnknownSize:
mkStringKnownSize(stringUnknownSize.s, strlen(stringUnknownSize.s));
return std::string_view(stringKnownSize.s, stringKnownSize.size);
case tStringKnownSize: return std::string_view(stringKnownSize.s, stringKnownSize.size);
case tStringWithContext: return std::string_view(stringWithContext.s, stringWithContext.meta->size);
default: abort(); default: abort();
} }
} }
@ -460,7 +530,10 @@ public:
const char * * stringContext() const const char * * stringContext() const
{ {
switch(internalType) { switch(internalType) {
case tString: return string.context; case tStringEmpty: return emptyString.context;
case tStringUnknownSize: return 0;
case tStringKnownSize: return 0;
case tStringWithContext: return stringWithContext.meta->context;
default: abort(); default: abort();
} }
} }