mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-11-24 11:31:54 +01:00
188 lines
4.8 KiB
Go
188 lines
4.8 KiB
Go
|
package certmagic
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"errors"
|
||
|
"log"
|
||
|
"runtime"
|
||
|
"sync"
|
||
|
"time"
|
||
|
|
||
|
"go.uber.org/zap"
|
||
|
)
|
||
|
|
||
|
var jm = &jobManager{maxConcurrentJobs: 1000}
|
||
|
|
||
|
type jobManager struct {
|
||
|
mu sync.Mutex
|
||
|
maxConcurrentJobs int
|
||
|
activeWorkers int
|
||
|
queue []namedJob
|
||
|
names map[string]struct{}
|
||
|
}
|
||
|
|
||
|
type namedJob struct {
|
||
|
name string
|
||
|
job func() error
|
||
|
logger *zap.Logger
|
||
|
}
|
||
|
|
||
|
// Submit enqueues the given job with the given name. If name is non-empty
|
||
|
// and a job with the same name is already enqueued or running, this is a
|
||
|
// no-op. If name is empty, no duplicate prevention will occur. The job
|
||
|
// manager will then run this job as soon as it is able.
|
||
|
func (jm *jobManager) Submit(logger *zap.Logger, name string, job func() error) {
|
||
|
jm.mu.Lock()
|
||
|
defer jm.mu.Unlock()
|
||
|
if jm.names == nil {
|
||
|
jm.names = make(map[string]struct{})
|
||
|
}
|
||
|
if name != "" {
|
||
|
// prevent duplicate jobs
|
||
|
if _, ok := jm.names[name]; ok {
|
||
|
return
|
||
|
}
|
||
|
jm.names[name] = struct{}{}
|
||
|
}
|
||
|
jm.queue = append(jm.queue, namedJob{name, job, logger})
|
||
|
if jm.activeWorkers < jm.maxConcurrentJobs {
|
||
|
jm.activeWorkers++
|
||
|
go jm.worker()
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (jm *jobManager) worker() {
|
||
|
defer func() {
|
||
|
if err := recover(); err != nil {
|
||
|
buf := make([]byte, stackTraceBufferSize)
|
||
|
buf = buf[:runtime.Stack(buf, false)]
|
||
|
log.Printf("panic: certificate worker: %v\n%s", err, buf)
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
for {
|
||
|
jm.mu.Lock()
|
||
|
if len(jm.queue) == 0 {
|
||
|
jm.activeWorkers--
|
||
|
jm.mu.Unlock()
|
||
|
return
|
||
|
}
|
||
|
next := jm.queue[0]
|
||
|
jm.queue = jm.queue[1:]
|
||
|
jm.mu.Unlock()
|
||
|
if err := next.job(); err != nil {
|
||
|
if next.logger != nil {
|
||
|
next.logger.Error("job failed", zap.Error(err))
|
||
|
}
|
||
|
}
|
||
|
if next.name != "" {
|
||
|
jm.mu.Lock()
|
||
|
delete(jm.names, next.name)
|
||
|
jm.mu.Unlock()
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func doWithRetry(ctx context.Context, log *zap.Logger, f func(context.Context) error) error {
|
||
|
var attempts int
|
||
|
ctx = context.WithValue(ctx, AttemptsCtxKey, &attempts)
|
||
|
|
||
|
// the initial intervalIndex is -1, signaling
|
||
|
// that we should not wait for the first attempt
|
||
|
start, intervalIndex := time.Now(), -1
|
||
|
var err error
|
||
|
|
||
|
for time.Since(start) < maxRetryDuration {
|
||
|
var wait time.Duration
|
||
|
if intervalIndex >= 0 {
|
||
|
wait = retryIntervals[intervalIndex]
|
||
|
}
|
||
|
timer := time.NewTimer(wait)
|
||
|
select {
|
||
|
case <-ctx.Done():
|
||
|
timer.Stop()
|
||
|
return context.Canceled
|
||
|
case <-timer.C:
|
||
|
err = f(ctx)
|
||
|
attempts++
|
||
|
if err == nil || errors.Is(err, context.Canceled) {
|
||
|
return err
|
||
|
}
|
||
|
var errNoRetry ErrNoRetry
|
||
|
if errors.As(err, &errNoRetry) {
|
||
|
return err
|
||
|
}
|
||
|
if intervalIndex < len(retryIntervals)-1 {
|
||
|
intervalIndex++
|
||
|
}
|
||
|
if time.Since(start) < maxRetryDuration {
|
||
|
if log != nil {
|
||
|
log.Error("will retry",
|
||
|
zap.Error(err),
|
||
|
zap.Int("attempt", attempts),
|
||
|
zap.Duration("retrying_in", retryIntervals[intervalIndex]),
|
||
|
zap.Duration("elapsed", time.Since(start)),
|
||
|
zap.Duration("max_duration", maxRetryDuration))
|
||
|
}
|
||
|
} else {
|
||
|
if log != nil {
|
||
|
log.Error("final attempt; giving up",
|
||
|
zap.Error(err),
|
||
|
zap.Int("attempt", attempts),
|
||
|
zap.Duration("elapsed", time.Since(start)),
|
||
|
zap.Duration("max_duration", maxRetryDuration))
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
// ErrNoRetry is an error type which signals
|
||
|
// to stop retries early.
|
||
|
type ErrNoRetry struct{ Err error }
|
||
|
|
||
|
// Unwrap makes it so that e wraps e.Err.
|
||
|
func (e ErrNoRetry) Unwrap() error { return e.Err }
|
||
|
func (e ErrNoRetry) Error() string { return e.Err.Error() }
|
||
|
|
||
|
type retryStateCtxKey struct{}
|
||
|
|
||
|
// AttemptsCtxKey is the context key for the value
|
||
|
// that holds the attempt counter. The value counts
|
||
|
// how many times the operation has been attempted.
|
||
|
// A value of 0 means first attempt.
|
||
|
var AttemptsCtxKey retryStateCtxKey
|
||
|
|
||
|
// retryIntervals are based on the idea of exponential
|
||
|
// backoff, but weighed a little more heavily to the
|
||
|
// front. We figure that intermittent errors would be
|
||
|
// resolved after the first retry, but any errors after
|
||
|
// that would probably require at least a few minutes
|
||
|
// to clear up: either for DNS to propagate, for the
|
||
|
// administrator to fix their DNS or network properties,
|
||
|
// or some other external factor needs to change. We
|
||
|
// chose intervals that we think will be most useful
|
||
|
// without introducing unnecessary delay. The last
|
||
|
// interval in this list will be used until the time
|
||
|
// of maxRetryDuration has elapsed.
|
||
|
var retryIntervals = []time.Duration{
|
||
|
1 * time.Minute,
|
||
|
2 * time.Minute,
|
||
|
2 * time.Minute,
|
||
|
5 * time.Minute, // elapsed: 10 min
|
||
|
10 * time.Minute,
|
||
|
20 * time.Minute,
|
||
|
20 * time.Minute, // elapsed: 1 hr
|
||
|
30 * time.Minute,
|
||
|
30 * time.Minute, // elapsed: 2 hr
|
||
|
1 * time.Hour,
|
||
|
3 * time.Hour, // elapsed: 6 hr
|
||
|
6 * time.Hour, // for up to maxRetryDuration
|
||
|
}
|
||
|
|
||
|
// maxRetryDuration is the maximum duration to try
|
||
|
// doing retries using the above intervals.
|
||
|
const maxRetryDuration = 24 * time.Hour * 30
|