87 lines
2.4 KiB
Python
87 lines
2.4 KiB
Python
|
|
||
|
# Convert a list of strings to a regex that matches everything but those strings
|
||
|
# ... and it had to be a POSIX regex; no negative lookahead :(
|
||
|
# This is a workaround for erofs supporting only exclude regex, not an include list
|
||
|
|
||
|
import sys
|
||
|
import re
|
||
|
from collections import defaultdict
|
||
|
|
||
|
# We can configure this script to match in different ways if we need to.
|
||
|
# The regex got too long for the argument list, so we had to truncate the
|
||
|
# hashes and use MATCH_STRING_PREFIX. That's less accurate, and might pick up some
|
||
|
# garbage like .lock files, but only if the sandbox doesn't hide those. Even
|
||
|
# then it should be harmless.
|
||
|
|
||
|
# Produce the negation of ^a$
|
||
|
MATCH_EXACTLY = ".+"
|
||
|
# Produce the negation of ^a
|
||
|
MATCH_STRING_PREFIX = "//X" # //X should be epsilon regex instead. Not supported??
|
||
|
# Produce the negation of ^a/?
|
||
|
MATCH_SUBPATHS = "[^/].*$"
|
||
|
|
||
|
# match_end = MATCH_SUBPATHS
|
||
|
match_end = MATCH_STRING_PREFIX
|
||
|
# match_end = MATCH_EXACTLY
|
||
|
|
||
|
def chars_to_inverted_class(letters):
|
||
|
assert len(letters) > 0
|
||
|
letters = list(letters)
|
||
|
|
||
|
s = "[^"
|
||
|
|
||
|
if "]" in letters:
|
||
|
s += "]"
|
||
|
letters.remove("]")
|
||
|
|
||
|
final = ""
|
||
|
if "-" in letters:
|
||
|
final = "-"
|
||
|
letters.remove("-")
|
||
|
|
||
|
s += "".join(letters)
|
||
|
|
||
|
s += final
|
||
|
|
||
|
s += "]"
|
||
|
|
||
|
return s
|
||
|
|
||
|
# There's probably at least one bug in here, but it seems to works well enough
|
||
|
# for filtering store paths.
|
||
|
def strings_to_inverted_regex(strings):
|
||
|
s = "("
|
||
|
|
||
|
# Match anything that starts with the wrong character
|
||
|
|
||
|
chars = defaultdict(list)
|
||
|
|
||
|
for item in strings:
|
||
|
if item != "":
|
||
|
chars[item[0]].append(item[1:])
|
||
|
|
||
|
if len(chars) == 0:
|
||
|
s += match_end
|
||
|
else:
|
||
|
s += chars_to_inverted_class(chars)
|
||
|
|
||
|
# Now match anything that starts with the right char, but then goes wrong
|
||
|
|
||
|
for char, sub in chars.items():
|
||
|
s += "|(" + re.escape(char) + strings_to_inverted_regex(sub) + ")"
|
||
|
|
||
|
s += ")"
|
||
|
return s
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
stdin_lines = []
|
||
|
for line in sys.stdin:
|
||
|
if line.strip() != "":
|
||
|
stdin_lines.append(line.strip())
|
||
|
|
||
|
print("^" + strings_to_inverted_regex(stdin_lines))
|
||
|
|
||
|
# Test:
|
||
|
# (echo foo; echo fo/; echo foo/; echo foo/ba/r; echo b; echo az; echo az/; echo az/a; echo ab; echo ab/a; echo ab/; echo abc; echo abcde; echo abb; echo ac; echo b) | grep -vE "$((echo ab; echo az; echo foo;) | python includes-to-excludes.py | tee /dev/stderr )"
|
||
|
# should print ab, az, foo and their subpaths
|