nixpkgs/pkgs/build-support/fetchgit/nix-prefetch-git
Bjørn Forsman 53614cf1a7 nix-prefetch-git: fix determinism with leaveDotGit
Add more files to the delete list:

 * .git/FETCH_HEAD
 * .git/ORIG_HEAD
 * .git/refs/remotes/origin/HEAD
 * .git/config

Further, remove all remote branches, remove tags not reachable from the
given 'rev', do a full repack and then garbage collect unreferenced
objects.

According to my testing, the result is fully deterministic. As in "any
change done to the upstream repo, ahead of 'rev', will not affect the
hash of the resulting 'clone'". Even changing the clone URL will not
change the output hash, because .git/config is removed.

A new version of git can of course change store format, but that's
unavoidable.

For big repositories, the repack operation may be a bit heavy. But as
far as I can see there is no cheaper way to determinism.
2014-11-02 13:15:33 +01:00

325 lines
8.7 KiB
Bash
Executable file

#! /bin/sh -e
url=
rev=
expHash=
hashType=$NIX_HASH_ALGO
deepClone=$NIX_PREFETCH_GIT_DEEP_CLONE
leaveDotGit=$NIX_PREFETCH_GIT_LEAVE_DOT_GIT
fetchSubmodules=
builder=
if test -n "$deepClone"; then
deepClone=true
else
deepClone=false
fi
if test "$leaveDotGit" != 1; then
leaveDotGit=
else
leaveDotGit=true
fi
argi=0
argfun=""
for arg; do
if test -z "$argfun"; then
case $arg in
--out) argfun=set_out;;
--url) argfun=set_url;;
--rev) argfun=set_rev;;
--hash) argfun=set_hashType;;
--deepClone) deepClone=true;;
--no-deepClone) deepClone=false;;
--leave-dotGit) leaveDotGit=true;;
--fetch-submodules) fetchSubmodules=true;;
--builder) builder=true;;
*)
argi=$(($argi + 1))
case $argi in
1) url=$arg;;
2) rev=$arg;;
3) expHash=$arg;;
*) exit 1;;
esac
;;
esac
else
case $argfun in
set_*)
var=$(echo $argfun | sed 's,^set_,,')
eval $var=$arg
;;
esac
argfun=""
fi
done
usage(){
echo >&2 "syntax: nix-prefetch-git [options] [URL [REVISION [EXPECTED-HASH]]]
Options:
--out path Path where the output would be stored.
--url url Any url understand by 'git clone'.
--rev ref Any sha1 or references (such as refs/heads/master)
--hash h Expected hash.
--deepClone Clone submodules recursively.
--no-deepClone Do not clone submodules.
--leave-dotGit Keep the .git directories.
--fetch-submodules Fetch submodules.
--builder Clone as fetchgit does, but url, rev, and out option are mandatory.
"
exit 1
}
if test -z "$url"; then
usage
fi
init_remote(){
local url=$1
git init
git remote add origin $url
}
# Return the reference of an hash if it exists on the remote repository.
ref_from_hash(){
local hash=$1
git ls-remote origin | sed -n "\,$hash\t, { s,\(.*\)\t\(.*\),\2,; p; q}"
}
# Return the hash of a reference if it exists on the remote repository.
hash_from_ref(){
local ref=$1
git ls-remote origin | sed -n "\,\t$ref, { s,\(.*\)\t\(.*\),\1,; p; q}"
}
# Fetch everything and checkout the right sha1
checkout_hash(){
local hash="$1"
local ref="$2"
if test -z "$hash"; then
hash=$(hash_from_ref $ref)
fi
git fetch ${builder:+--progress} origin || return 1
git checkout -b fetchgit $hash || return 1
}
# Fetch only a branch/tag and checkout it.
checkout_ref(){
local hash="$1"
local ref="$2"
if "$deepClone"; then
# The caller explicitly asked for a deep clone. Deep clones
# allow "git describe" and similar tools to work. See
# http://thread.gmane.org/gmane.linux.distributions.nixos/3569
# for a discussion.
return 1
fi
if test -z "$ref"; then
ref=$(ref_from_hash $hash)
fi
if test -n "$ref"; then
# --depth option is ignored on http repository.
git fetch ${builder:+--progress} --depth 1 origin +"$ref" || return 1
git checkout -b fetchgit FETCH_HEAD || return 1
else
return 1
fi
}
# Update submodules
init_submodules(){
# Add urls into .git/config file
git submodule init
# list submodule directories and their hashes
git submodule status |
while read l; do
# checkout each submodule
local hash=$(echo $l | awk '{print substr($1,2)}')
local dir=$(echo $l | awk '{print $2}')
local name=$(
git config -f .gitmodules --get-regexp submodule\.[^.]*\.path |
sed -n "s,^\(.*\)\.path $dir\$,\\1,p")
local url=$(git config -f .gitmodules --get ${name}.url)
# Get Absolute URL if we have a relative URL
if ! echo "$url" | grep '^[a-zA-Z]\+://' >/dev/null 2>&1; then
url="$(git config --get remote.origin.url)/$url"
fi
clone "$dir" "$url" "$hash" ""
done
}
clone(){
local top=$(pwd)
local dir="$1"
local url="$2"
local hash="$3"
local ref="$4"
cd $dir
# Initialize the repository.
init_remote "$url"
# Download data from the repository.
checkout_ref "$hash" "$ref" ||
checkout_hash "$hash" "$ref" || (
echo 1>&2 "Unable to checkout $hash$ref from $url."
exit 1
)
# Checkout linked sources.
if test -n "$fetchSubmodules"; then
init_submodules
fi
if [ -z "$builder" -a -f .topdeps ]; then
if tg help 2>&1 > /dev/null
then
echo "populating TopGit branches..."
tg remote --populate origin
else
echo "WARNING: would populate TopGit branches but TopGit is not available" >&2
echo "WARNING: install TopGit to fix the problem" >&2
fi
fi
cd $top
}
# Remove all remote branches, remove tags not reachable from HEAD, do a full
# repack and then garbage collect unreferenced objects.
make_deterministic_repo(){
local repo="$1"
# run in sub-shell to not touch current working directory
(
cd "$repo"
# Remove files that contain timestamps or otherwise have non-deterministic
# properties.
rm -rf .git/logs/ .git/hooks/ .git/index .git/FETCH_HEAD .git/ORIG_HEAD \
.git/refs/remotes/origin/HEAD .git/config
# Remove all remote branches.
git branch -r | while read branch; do
git branch -rD "$branch" >&2
done
# Remove tags not reachable from HEAD. If we're exactly on a tag, don't
# delete it.
maybe_tag=$(git tag --points-at HEAD)
git tag --contains HEAD | while read tag; do
if [ "$tag" != "$maybe_tag" ]; then
git tag -d "$tag" >&2
fi
done
# Do a full repack, for determinism.
# Repack does not add unreferenced objects to a pack file.
git repack -A -d -f
# Garbage collect unreferenced objects.
git gc --prune=all
)
}
clone_user_rev() {
local dir="$1"
local url="$2"
local rev="${3:-HEAD}"
# Perform the checkout.
case "$rev" in
HEAD|refs/*)
clone "$dir" "$url" "" "$rev" 1>&2;;
*)
if test -z "$(echo $rev | tr -d 0123456789abcdef)"; then
clone "$dir" "$url" "$rev" "" 1>&2
else
echo 1>&2 "Bad commit hash or bad reference."
exit 1
fi;;
esac
local full_revision=$(cd $dir && (git rev-parse $rev 2> /dev/null || git rev-parse refs/heads/fetchgit) | tail -n1)
echo "git revision is $full_revision"
echo "git human-readable version is $(cd $dir && (git describe $full_revision 2> /dev/null || git describe --tags $full_revision 2> /dev/null || echo -- none --))" >&2
# Allow doing additional processing before .git removal
eval "$NIX_PREFETCH_GIT_CHECKOUT_HOOK"
if test -z "$leaveDotGit"; then
echo "removing \`.git'..." >&2
find $dir -name .git\* | xargs rm -rf
else
find $dir -name .git | while read gitdir; do
make_deterministic_repo "$(readlink -f "$gitdir/..")"
done
fi
}
if test -n "$builder"; then
test -n "$out" -a -n "$url" -a -n "$rev" || usage
mkdir $out
clone_user_rev "$out" "$url" "$rev"
else
if test -z "$hashType"; then
hashType=sha256
fi
# If the hash was given, a file with that hash may already be in the
# store.
if test -n "$expHash"; then
finalPath=$(nix-store --print-fixed-path --recursive "$hashType" "$expHash" git-export)
if ! nix-store --check-validity "$finalPath" 2> /dev/null; then
finalPath=
fi
hash=$expHash
fi
# If we don't know the hash or a path with that hash doesn't exist,
# download the file and add it to the store.
if test -z "$finalPath"; then
tmpPath="$(mktemp -d "${TMPDIR:-/tmp}/git-checkout-tmp-XXXXXXXX")"
trap "rm -rf \"$tmpPath\"" EXIT
tmpFile="$tmpPath/git-export"
mkdir "$tmpFile"
# Perform the checkout.
clone_user_rev "$tmpFile" "$url" "$rev"
# Compute the hash.
hash=$(nix-hash --type $hashType $hashFormat $tmpFile)
if ! test -n "$QUIET"; then echo "hash is $hash" >&2; fi
# Add the downloaded file to the Nix store.
finalPath=$(nix-store --add-fixed --recursive "$hashType" $tmpFile)
if test -n "$expHash" -a "$expHash" != "$hash"; then
echo "hash mismatch for URL \`$url'"
exit 1
fi
fi
if ! test -n "$QUIET"; then echo "path is $finalPath" >&2; fi
echo $hash
if test -n "$PRINT_PATH"; then
echo $finalPath
fi
fi