mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-11-14 14:49:32 +01:00
523efa433b
* Extract out the common cat-file batch calls Signed-off-by: Andrew Thornton <art27@cantab.net> * Move bleve and elastic indexers to use a common cat-file --batch when indexing Signed-off-by: Andrew Thornton <art27@cantab.net> * move catfilebatch to batch_reader and rename to batch_reader.go Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: Lauris BH <lauris@nix.lv>
247 lines
6 KiB
Go
247 lines
6 KiB
Go
// Copyright 2020 The Gitea Authors. All rights reserved.
|
|
// Use of this source code is governed by a MIT-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build !gogit
|
|
|
|
package pipeline
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"code.gitea.io/gitea/modules/git"
|
|
)
|
|
|
|
// LFSResult represents commits found using a provided pointer file hash
|
|
type LFSResult struct {
|
|
Name string
|
|
SHA string
|
|
Summary string
|
|
When time.Time
|
|
ParentHashes []git.SHA1
|
|
BranchName string
|
|
FullCommitName string
|
|
}
|
|
|
|
type lfsResultSlice []*LFSResult
|
|
|
|
func (a lfsResultSlice) Len() int { return len(a) }
|
|
func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) }
|
|
|
|
// FindLFSFile finds commits that contain a provided pointer file hash
|
|
func FindLFSFile(repo *git.Repository, hash git.SHA1) ([]*LFSResult, error) {
|
|
resultsMap := map[string]*LFSResult{}
|
|
results := make([]*LFSResult, 0)
|
|
|
|
basePath := repo.Path
|
|
|
|
hashStr := hash.String()
|
|
|
|
// Use rev-list to provide us with all commits in order
|
|
revListReader, revListWriter := io.Pipe()
|
|
defer func() {
|
|
_ = revListWriter.Close()
|
|
_ = revListReader.Close()
|
|
}()
|
|
|
|
go func() {
|
|
stderr := strings.Builder{}
|
|
err := git.NewCommand("rev-list", "--all").RunInDirPipeline(repo.Path, revListWriter, &stderr)
|
|
if err != nil {
|
|
_ = revListWriter.CloseWithError(git.ConcatenateError(err, (&stderr).String()))
|
|
} else {
|
|
_ = revListWriter.Close()
|
|
}
|
|
}()
|
|
|
|
// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
|
|
// so let's create a batch stdin and stdout
|
|
batchStdinWriter, batchReader, cancel := git.CatFileBatch(repo.Path)
|
|
defer cancel()
|
|
|
|
// We'll use a scanner for the revList because it's simpler than a bufio.Reader
|
|
scan := bufio.NewScanner(revListReader)
|
|
trees := [][]byte{}
|
|
paths := []string{}
|
|
|
|
fnameBuf := make([]byte, 4096)
|
|
modeBuf := make([]byte, 40)
|
|
workingShaBuf := make([]byte, 40)
|
|
|
|
for scan.Scan() {
|
|
// Get the next commit ID
|
|
commitID := scan.Bytes()
|
|
|
|
// push the commit to the cat-file --batch process
|
|
_, err := batchStdinWriter.Write(commitID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
_, err = batchStdinWriter.Write([]byte{'\n'})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var curCommit *git.Commit
|
|
curPath := ""
|
|
|
|
commitReadingLoop:
|
|
for {
|
|
_, typ, size, err := git.ReadBatchLine(batchReader)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
switch typ {
|
|
case "tag":
|
|
// This shouldn't happen but if it does well just get the commit and try again
|
|
id, err := git.ReadTagObjectID(batchReader, size)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
_, err = batchStdinWriter.Write([]byte(id + "\n"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
continue
|
|
case "commit":
|
|
// Read in the commit to get its tree and in case this is one of the last used commits
|
|
curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, int64(size)))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
_, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
curPath = ""
|
|
case "tree":
|
|
var n int64
|
|
for n < size {
|
|
mode, fname, sha, count, err := git.ParseTreeLine(batchReader, modeBuf, fnameBuf, workingShaBuf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
n += int64(count)
|
|
if bytes.Equal(sha, []byte(hashStr)) {
|
|
result := LFSResult{
|
|
Name: curPath + string(fname),
|
|
SHA: curCommit.ID.String(),
|
|
Summary: strings.Split(strings.TrimSpace(curCommit.CommitMessage), "\n")[0],
|
|
When: curCommit.Author.When,
|
|
ParentHashes: curCommit.Parents,
|
|
}
|
|
resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result
|
|
} else if string(mode) == git.EntryModeTree.String() {
|
|
trees = append(trees, sha)
|
|
paths = append(paths, curPath+string(fname)+"/")
|
|
}
|
|
}
|
|
if len(trees) > 0 {
|
|
_, err := batchStdinWriter.Write(trees[len(trees)-1])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
_, err = batchStdinWriter.Write([]byte("\n"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
curPath = paths[len(paths)-1]
|
|
trees = trees[:len(trees)-1]
|
|
paths = paths[:len(paths)-1]
|
|
} else {
|
|
break commitReadingLoop
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if err := scan.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for _, result := range resultsMap {
|
|
hasParent := false
|
|
for _, parentHash := range result.ParentHashes {
|
|
if _, hasParent = resultsMap[parentHash.String()+":"+result.Name]; hasParent {
|
|
break
|
|
}
|
|
}
|
|
if !hasParent {
|
|
results = append(results, result)
|
|
}
|
|
}
|
|
|
|
sort.Sort(lfsResultSlice(results))
|
|
|
|
// Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple
|
|
shasToNameReader, shasToNameWriter := io.Pipe()
|
|
nameRevStdinReader, nameRevStdinWriter := io.Pipe()
|
|
errChan := make(chan error, 1)
|
|
wg := sync.WaitGroup{}
|
|
wg.Add(3)
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
scanner := bufio.NewScanner(nameRevStdinReader)
|
|
i := 0
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if len(line) == 0 {
|
|
continue
|
|
}
|
|
result := results[i]
|
|
result.FullCommitName = line
|
|
result.BranchName = strings.Split(line, "~")[0]
|
|
i++
|
|
}
|
|
}()
|
|
go NameRevStdin(shasToNameReader, nameRevStdinWriter, &wg, basePath)
|
|
go func() {
|
|
defer wg.Done()
|
|
defer shasToNameWriter.Close()
|
|
for _, result := range results {
|
|
i := 0
|
|
if i < len(result.SHA) {
|
|
n, err := shasToNameWriter.Write([]byte(result.SHA)[i:])
|
|
if err != nil {
|
|
errChan <- err
|
|
break
|
|
}
|
|
i += n
|
|
}
|
|
var err error
|
|
n := 0
|
|
for n < 1 {
|
|
n, err = shasToNameWriter.Write([]byte{'\n'})
|
|
if err != nil {
|
|
errChan <- err
|
|
break
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
}()
|
|
|
|
wg.Wait()
|
|
|
|
select {
|
|
case err, has := <-errChan:
|
|
if has {
|
|
return nil, fmt.Errorf("Unable to obtain name for LFS files. Error: %w", err)
|
|
}
|
|
default:
|
|
}
|
|
|
|
return results, nil
|
|
}
|