forgejo/modules/git/blob.go
Lunny Xiao 0ca13c5eae
[PORT] Refactor the usage of batch catfile (gitea#31754)
When opening a repository, it will call `ensureValidRepository` and also
`CatFileBatch`. But sometimes these will not be used until repository
closed. So it's a waste of CPU to invoke 3 times git command for every
open repository.

This PR removed all of these from `OpenRepository` but only kept
checking whether the folder exists. When a batch is necessary, the
necessary functions will be invoked.

---
Conflict resolution: Because of the removal of go-git in (#4941)
`_nogogit.go` files were either renamed or merged into the 'common'
file. Git does handle the renames correctly, but for those that were
merged has to be manually copied pasted over. The patch looks the same,
201 additions 90 deletions as the original patch.

(cherry picked from commit c03baab678ba5b2e9d974aea147e660417f5d3f7)
2024-08-26 03:48:51 +02:00

228 lines
4.5 KiB
Go

// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bufio"
"bytes"
"encoding/base64"
"io"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"
)
// Blob represents a Git object.
type Blob struct {
ID ObjectID
gotSize bool
size int64
name string
repo *Repository
}
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
wr, rd, cancel, err := b.repo.CatFileBatch(b.repo.Ctx)
if err != nil {
return nil, err
}
_, err = wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
cancel()
return nil, err
}
_, _, size, err := ReadBatchLine(rd)
if err != nil {
cancel()
return nil, err
}
b.gotSize = true
b.size = size
if size < 4096 {
bs, err := io.ReadAll(io.LimitReader(rd, size))
defer cancel()
if err != nil {
return nil, err
}
_, err = rd.Discard(1)
return io.NopCloser(bytes.NewReader(bs)), err
}
return &blobReader{
rd: rd,
n: size,
cancel: cancel,
}, nil
}
// Size returns the uncompressed size of the blob
func (b *Blob) Size() int64 {
if b.gotSize {
return b.size
}
wr, rd, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx)
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
defer cancel()
_, err = wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
_, _, b.size, err = ReadBatchLine(rd)
if err != nil {
log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}
b.gotSize = true
return b.size
}
type blobReader struct {
rd *bufio.Reader
n int64
cancel func()
}
func (b *blobReader) Read(p []byte) (n int, err error) {
if b.n <= 0 {
return 0, io.EOF
}
if int64(len(p)) > b.n {
p = p[0:b.n]
}
n, err = b.rd.Read(p)
b.n -= int64(n)
return n, err
}
// Close implements io.Closer
func (b *blobReader) Close() error {
if b.rd == nil {
return nil
}
defer b.cancel()
if err := DiscardFull(b.rd, b.n+1); err != nil {
return err
}
b.rd = nil
return nil
}
// Name returns name of the tree entry this blob object was created from (or empty string)
func (b *Blob) Name() string {
return b.name
}
// GetBlobContent Gets the limited content of the blob as raw text
func (b *Blob) GetBlobContent(limit int64) (string, error) {
if limit <= 0 {
return "", nil
}
dataRc, err := b.DataAsync()
if err != nil {
return "", err
}
defer dataRc.Close()
buf, err := util.ReadWithLimit(dataRc, int(limit))
return string(buf), err
}
// GetBlobLineCount gets line count of the blob
func (b *Blob) GetBlobLineCount() (int, error) {
reader, err := b.DataAsync()
if err != nil {
return 0, err
}
defer reader.Close()
buf := make([]byte, 32*1024)
count := 1
lineSep := []byte{'\n'}
c, err := reader.Read(buf)
if c == 0 && err == io.EOF {
return 0, nil
}
for {
count += bytes.Count(buf[:c], lineSep)
switch {
case err == io.EOF:
return count, nil
case err != nil:
return count, err
}
c, err = reader.Read(buf)
}
}
// GetBlobContentBase64 Reads the content of the blob with a base64 encode and returns the encoded string
func (b *Blob) GetBlobContentBase64() (string, error) {
dataRc, err := b.DataAsync()
if err != nil {
return "", err
}
defer dataRc.Close()
pr, pw := io.Pipe()
encoder := base64.NewEncoder(base64.StdEncoding, pw)
go func() {
_, err := io.Copy(encoder, dataRc)
_ = encoder.Close()
if err != nil {
_ = pw.CloseWithError(err)
} else {
_ = pw.Close()
}
}()
out, err := io.ReadAll(pr)
if err != nil {
return "", err
}
return string(out), nil
}
// GuessContentType guesses the content type of the blob.
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
r, err := b.DataAsync()
if err != nil {
return typesniffer.SniffedType{}, err
}
defer r.Close()
return typesniffer.DetectContentTypeFromReader(r)
}
// GetBlob finds the blob object in the repository.
func (repo *Repository) GetBlob(idStr string) (*Blob, error) {
id, err := NewIDFromString(idStr)
if err != nil {
return nil, err
}
if id.IsZero() {
return nil, ErrNotExist{id.String(), ""}
}
return &Blob{
ID: id,
repo: repo,
}, nil
}