afc60d017b
This avoids putting a large disk image in the store (and possibly in a binary cache), while improving runtime performance. Assuming you're running an SSD, and/or with plenty of cache (?) it is feasible to preempt the virtualization overhead before VM start, in single-digit seconds. For some tests that perform many reads on the store, the improved performance of EROFS is sufficient that not only the image creation overhead is compensated for, but is actually faster. Stats for nixosTests.gitlab: Baseline without useNixStoreImage: >1000s Baseline with useNixStoreImage without writableStore = false ext4 image in store: 277 seconds + significant image build time and/or disk space Disposable erofs image: 249 seconds _including_ image build time Custom erofs overlay on 9p host store: 391 seconds; presumably because the overlay still performs too many 9p accesses, or perhaps some other overhead. This solution had no obvious performance advantage, while requiring extra options to work, so it was discarded.
86 lines
2.4 KiB
Python
86 lines
2.4 KiB
Python
|
|
# Convert a list of strings to a regex that matches everything but those strings
|
|
# ... and it had to be a POSIX regex; no negative lookahead :(
|
|
# This is a workaround for erofs supporting only exclude regex, not an include list
|
|
|
|
import sys
|
|
import re
|
|
from collections import defaultdict
|
|
|
|
# We can configure this script to match in different ways if we need to.
|
|
# The regex got too long for the argument list, so we had to truncate the
|
|
# hashes and use MATCH_STRING_PREFIX. That's less accurate, and might pick up some
|
|
# garbage like .lock files, but only if the sandbox doesn't hide those. Even
|
|
# then it should be harmless.
|
|
|
|
# Produce the negation of ^a$
|
|
MATCH_EXACTLY = ".+"
|
|
# Produce the negation of ^a
|
|
MATCH_STRING_PREFIX = "//X" # //X should be epsilon regex instead. Not supported??
|
|
# Produce the negation of ^a/?
|
|
MATCH_SUBPATHS = "[^/].*$"
|
|
|
|
# match_end = MATCH_SUBPATHS
|
|
match_end = MATCH_STRING_PREFIX
|
|
# match_end = MATCH_EXACTLY
|
|
|
|
def chars_to_inverted_class(letters):
|
|
assert len(letters) > 0
|
|
letters = list(letters)
|
|
|
|
s = "[^"
|
|
|
|
if "]" in letters:
|
|
s += "]"
|
|
letters.remove("]")
|
|
|
|
final = ""
|
|
if "-" in letters:
|
|
final = "-"
|
|
letters.remove("-")
|
|
|
|
s += "".join(letters)
|
|
|
|
s += final
|
|
|
|
s += "]"
|
|
|
|
return s
|
|
|
|
# There's probably at least one bug in here, but it seems to works well enough
|
|
# for filtering store paths.
|
|
def strings_to_inverted_regex(strings):
|
|
s = "("
|
|
|
|
# Match anything that starts with the wrong character
|
|
|
|
chars = defaultdict(list)
|
|
|
|
for item in strings:
|
|
if item != "":
|
|
chars[item[0]].append(item[1:])
|
|
|
|
if len(chars) == 0:
|
|
s += match_end
|
|
else:
|
|
s += chars_to_inverted_class(chars)
|
|
|
|
# Now match anything that starts with the right char, but then goes wrong
|
|
|
|
for char, sub in chars.items():
|
|
s += "|(" + re.escape(char) + strings_to_inverted_regex(sub) + ")"
|
|
|
|
s += ")"
|
|
return s
|
|
|
|
if __name__ == "__main__":
|
|
stdin_lines = []
|
|
for line in sys.stdin:
|
|
if line.strip() != "":
|
|
stdin_lines.append(line.strip())
|
|
|
|
print("^" + strings_to_inverted_regex(stdin_lines))
|
|
|
|
# Test:
|
|
# (echo foo; echo fo/; echo foo/; echo foo/ba/r; echo b; echo az; echo az/; echo az/a; echo ab; echo ab/a; echo ab/; echo abc; echo abcde; echo abb; echo ac; echo b) | grep -vE "$((echo ab; echo az; echo foo;) | python includes-to-excludes.py | tee /dev/stderr )"
|
|
# should print ab, az, foo and their subpaths
|