tree-sitter/update: Fetch repositories in parallel

This was the main annoyance with the existing script, it would fetch
one repository after the other instead of multiple in parallel.
The `xe` tool is a simple `xargs`-like that can do that.

We put the json information for all repositories into a file with one
object per line, then pass that to xe.
This commit is contained in:
Profpatsch 2022-09-07 02:55:18 +02:00
parent a64a9d5552
commit 2de554d512
2 changed files with 34 additions and 18 deletions

View file

@ -419,29 +419,43 @@ let
lib.concatMapStringsSep "\n" f
(lib.mapAttrsToList (k: v: { name = k; } // v) attrs);
jsonNewlines = lib.concatMapStringsSep "\n" (lib.generators.toJSON {});
# Run the given script for each of the attr list.
# The attrs are passed to the script as a json value.
forEachParallel = name: script: listOfAttrs: writeShellScript "for-each-parallel.sh" ''
< ${writeText "${name}.json" (jsonNewlines listOfAttrs)} \
${xe}/bin/xe -F -j5 ${script} {}
'';
outputDir = "${toString ./.}/grammars";
update-all-grammars = writeShellScript "update-all-grammars.sh" ''
set -euo pipefail
echo "fetching list of grammars" 1>&2
treeSitterRepos=$(${fetchImpl} fetch-orga-latest-repos '{"orga": "tree-sitter"}')
echo "checking the tree-sitter repo list against the grammars we know" 1>&2
printf '%s' "$treeSitterRepos" | ${checkTreeSitterRepos}
outputDir="${toString ./.}/grammars"
echo "writing files to $outputDir" 1>&2
mkdir -p "$outputDir"
${foreachSh allGrammars
({name, orga, repo}: ''
${atomically-write} \
$outputDir/${name}.json \
${fetchImpl} fetch-repo '${lib.generators.toJSON {} {inherit orga repo;}}'
'')}
( echo "{ lib }:"
echo "{"
${foreachSh allGrammars
({name, ...}: ''
# indentation hack
printf " %s = lib.importJSON ./%s.json;\n" "${name}" "${name}"'')}
echo "}" ) \
> "$outputDir/default.nix"
echo "writing files to ${outputDir}" 1>&2
mkdir -p "${outputDir}"
${forEachParallel
"repos-to-fetch"
(writeShellScript "fetch-repo" ''
${atomically-write} \
"${outputDir}/$(jq --raw-output --null-input '$ARGS.positional[0].name' --jsonargs "$1").json" \
${fetchImpl} fetch-repo "$1"
'')
(lib.mapAttrsToList (name: attrs: attrs // { inherit name; }) allGrammars)
}
${atomically-write} \
"${outputDir}/default.nix" \
${writeShellScript "print-all-grammars" ''
echo "{ lib }:"
echo "{"
${foreachSh allGrammars
({name, ...}: ''
printf " %s = lib.importJSON ./%s.json;\n" "${name}" "${name}"'')}
echo "}"
''}
'';
# Atomically write a file (just `>` redirection in bash

View file

@ -19,6 +19,8 @@ def curl_github_args(token: str | None, url: str) -> Args:
"""Query the github API via curl"""
if not debug:
yield "--silent"
# follow redirects
yield "--location"
if token:
yield "-H"
yield f"Authorization: token {token}"
@ -79,7 +81,7 @@ def fetchRepo() -> None:
case {"tag_name": tag_name}:
release = tag_name
case _:
sys.exit("git result did not have a `tag_name` field")
sys.exit(f"git result for {orga}/{repo} did not have a `tag_name` field")
print(f"Fetching latest release ({release}) of {orga}/{repo}", file=sys.stderr)
res = run_bin(