Merge pull request 'feat: add setting to block disposable emails' (#5787) from Ironfractal/forgejo:forgejo into forgejo

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/5787
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Reviewed-by: jerger <jerger@noreply.codeberg.org>
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Reviewed-by: Michael Kriese <michael.kriese@gmx.de>
This commit is contained in:
Earl Warren 2024-11-21 12:22:41 +00:00
commit ce6cdd8f20
5 changed files with 4173 additions and 11 deletions

View file

@ -51,6 +51,9 @@ GOMOCK_PACKAGE ?= go.uber.org/mock/mockgen@v0.4.0 # renovate: datasource=go
GOPLS_PACKAGE ?= golang.org/x/tools/gopls@v0.16.2 # renovate: datasource=go GOPLS_PACKAGE ?= golang.org/x/tools/gopls@v0.16.2 # renovate: datasource=go
RENOVATE_NPM_PACKAGE ?= renovate@39.19.1 # renovate: datasource=docker packageName=code.forgejo.org/forgejo-contrib/renovate RENOVATE_NPM_PACKAGE ?= renovate@39.19.1 # renovate: datasource=docker packageName=code.forgejo.org/forgejo-contrib/renovate
# https://github.com/disposable-email-domains/disposable-email-domains/commits/main/
DISPOSABLE_EMAILS_SHA ?= 0c27e671231d27cf66370034d7f6818037416989 # renovate: ...
ifeq ($(HAS_GO), yes) ifeq ($(HAS_GO), yes)
CGO_EXTRA_CFLAGS := -DSQLITE_MAX_VARIABLE_NUMBER=32766 CGO_EXTRA_CFLAGS := -DSQLITE_MAX_VARIABLE_NUMBER=32766
CGO_CFLAGS ?= $(shell $(GO) env CGO_CFLAGS) $(CGO_EXTRA_CFLAGS) CGO_CFLAGS ?= $(shell $(GO) env CGO_CFLAGS) $(CGO_EXTRA_CFLAGS)
@ -417,10 +420,10 @@ lint-frontend: lint-js lint-css
lint-frontend-fix: lint-js-fix lint-css-fix lint-frontend-fix: lint-js-fix lint-css-fix
.PHONY: lint-backend .PHONY: lint-backend
lint-backend: lint-go lint-go-vet lint-editorconfig lint-renovate lint-locale lint-backend: lint-go lint-go-vet lint-editorconfig lint-renovate lint-locale lint-disposable-emails
.PHONY: lint-backend-fix .PHONY: lint-backend-fix
lint-backend-fix: lint-go-fix lint-go-vet lint-editorconfig lint-backend-fix: lint-go-fix lint-go-vet lint-editorconfig lint-disposable-emails-fix
.PHONY: lint-codespell .PHONY: lint-codespell
lint-codespell: lint-codespell:
@ -511,6 +514,14 @@ lint-go-gopls:
lint-editorconfig: lint-editorconfig:
$(GO) run $(EDITORCONFIG_CHECKER_PACKAGE) templates .forgejo/workflows $(GO) run $(EDITORCONFIG_CHECKER_PACKAGE) templates .forgejo/workflows
.PHONY: lint-disposable-emails
lint-disposable-emails:
$(GO) run build/generate-disposable-email.go -check -r $(DISPOSABLE_EMAILS_SHA)
.PHONY: lint-disposable-emails-fix
lint-disposable-emails-fix:
$(GO) run build/generate-disposable-email.go -r $(DISPOSABLE_EMAILS_SHA)
.PHONY: lint-templates .PHONY: lint-templates
lint-templates: .venv node_modules lint-templates: .venv node_modules
@node tools/lint-templates-svg.js @node tools/lint-templates-svg.js

View file

@ -0,0 +1,203 @@
// Copyright 2024 James Hatfield
// SPDX-License-Identifier: MIT
//go:build ignore
package main
import (
"bufio"
"bytes"
"crypto"
"flag"
"fmt"
"go/format"
"io"
"log"
"net/http"
"os"
"regexp"
"strings"
)
const disposableEmailListURL string = "https://raw.githubusercontent.com/disposable-email-domains/disposable-email-domains/%s/disposable_email_blocklist.conf"
var (
gitRef *string = flag.String("r", "master", "Git reference of the domain list version")
outPat *string = flag.String("o", "modules/setting/disposable_email_domain_data.go", "Output path")
check *bool = flag.Bool("check", false, "Check if the current output file matches the current upstream list")
)
func main() {
flag.Parse()
if *check {
// read in the local copy of the domain list
local, err := get_local_file()
if err != nil {
log.Fatalf("File Read Error: %v", err)
}
// generate the remote copy of the domain list
remote, err := generate()
if err != nil {
log.Fatalf("Generation Error: %v", err)
}
// strip the comments from both (so we dont fail simply due to git ref difference)
local = strip_comments(local)
remote = strip_comments(remote)
// generate the hash of the local copy
local_sha, err := hash(local)
if err != nil {
log.Fatalf("Local Hash Generation Error: %v", err)
}
// generate the hash of the remote copy
remote_sha, err := hash(remote)
if err != nil {
log.Fatalf("Remote Hash Generation Error: %v", err)
}
// if the hashes dont match then the local copy needs to be updated
if local_sha != remote_sha {
log.Fatalf("Disposable email domain list needs to be updated!! \"make lint-disposable-emails-fix\"")
}
} else {
// generate the source code (array of domains)
res, err := generate()
if err != nil {
log.Fatalf("Generation Error: %v", err)
}
// write result to a file
err = os.WriteFile(*outPat, res, 0o644)
if err != nil {
log.Fatalf("File Write Error: %v", err)
}
}
}
func strip_comments(data []byte) []byte {
result := make([]byte, 0, len(data))
re := regexp.MustCompile(`^\W*//.*$`)
for _, line := range bytes.Split(data, []byte("\n")) {
if !re.Match(line) {
result = append(result, line...)
}
}
return result
}
func hash(data []byte) (string, error) {
var err error
hash := crypto.SHA3_256.New()
_, err = hash.Write(data)
if err != nil {
return "", err
}
return fmt.Sprintf("%x", hash.Sum(nil)), err
}
func get_local_file() ([]byte, error) {
var err error
f, err := os.Open(*outPat)
if err != nil {
return nil, err
}
defer f.Close()
data, err := io.ReadAll(f)
if err != nil {
return nil, err
}
return data, err
}
func get_remote() ([]string, error) {
var err error
var url string = fmt.Sprintf(disposableEmailListURL, *gitRef)
// download the domain list
res, err := http.Get(url)
if err != nil {
return nil, err
}
defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
// go through all entries (1 domain per line)
scanner := bufio.NewScanner(bytes.NewReader(body))
var arrDomains []string
for scanner.Scan() {
line := scanner.Text()
arrDomains = append(arrDomains, line)
}
return arrDomains, err
}
func generate() ([]byte, error) {
var err error
var url string = fmt.Sprintf(disposableEmailListURL, *gitRef)
// download the domains list
arrDomains, err := get_remote()
if err != nil {
return nil, err
}
// build the string in a readable way
var sb strings.Builder
_, err = sb.WriteString("[]string{\n")
if err != nil {
return nil, err
}
for _, item := range arrDomains {
_, err = sb.WriteString(fmt.Sprintf("\t%q,\n", item))
if err != nil {
return nil, err
}
}
_, err = sb.WriteString("}")
if err != nil {
return nil, err
}
// insert the values into file
final := fmt.Sprintf(hdr, url, sb.String())
return format.Source([]byte(final))
}
const hdr = `
// Copyright 2024 James Hatfield
// SPDX-License-Identifier: MIT
//
// Code generated by build/generate-disposable-email.go. DO NOT EDIT
// Sourced from %s
package setting
import "sync"
var DisposableEmailDomains = sync.OnceValue(func() []string {
return %s
})
`

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@ package setting
import ( import (
"regexp" "regexp"
"slices"
"strings" "strings"
"time" "time"
@ -37,6 +38,7 @@ var Service = struct {
RegisterManualConfirm bool RegisterManualConfirm bool
EmailDomainAllowList []glob.Glob EmailDomainAllowList []glob.Glob
EmailDomainBlockList []glob.Glob EmailDomainBlockList []glob.Glob
EmailDomainBlockDisposable bool
DisableRegistration bool DisableRegistration bool
AllowOnlyInternalRegistration bool AllowOnlyInternalRegistration bool
AllowOnlyExternalRegistration bool AllowOnlyExternalRegistration bool
@ -156,6 +158,22 @@ func loadServiceFrom(rootCfg ConfigProvider) {
} }
Service.EmailDomainAllowList = CompileEmailGlobList(sec, "EMAIL_DOMAIN_WHITELIST", "EMAIL_DOMAIN_ALLOWLIST") Service.EmailDomainAllowList = CompileEmailGlobList(sec, "EMAIL_DOMAIN_WHITELIST", "EMAIL_DOMAIN_ALLOWLIST")
Service.EmailDomainBlockList = CompileEmailGlobList(sec, "EMAIL_DOMAIN_BLOCKLIST") Service.EmailDomainBlockList = CompileEmailGlobList(sec, "EMAIL_DOMAIN_BLOCKLIST")
Service.EmailDomainBlockDisposable = sec.Key("EMAIL_DOMAIN_BLOCK_DISPOSABLE").MustBool(false)
if Service.EmailDomainBlockDisposable {
toAdd := make([]glob.Glob, 0, len(DisposableEmailDomains()))
for _, domain := range DisposableEmailDomains() {
domain = strings.ToLower(domain)
// Only add domains that aren't blocked yet.
if !slices.ContainsFunc(Service.EmailDomainBlockList, func(g glob.Glob) bool { return g.Match(domain) }) {
if g, err := glob.Compile(domain); err != nil {
log.Error("Error in disposable domain %s: %v", domain, err)
} else {
toAdd = append(toAdd, g)
}
}
}
Service.EmailDomainBlockList = append(Service.EmailDomainBlockList, toAdd...)
}
Service.ShowRegistrationButton = sec.Key("SHOW_REGISTRATION_BUTTON").MustBool(!(Service.DisableRegistration || Service.AllowOnlyExternalRegistration)) Service.ShowRegistrationButton = sec.Key("SHOW_REGISTRATION_BUTTON").MustBool(!(Service.DisableRegistration || Service.AllowOnlyExternalRegistration))
Service.ShowMilestonesDashboardPage = sec.Key("SHOW_MILESTONES_DASHBOARD_PAGE").MustBool(true) Service.ShowMilestonesDashboardPage = sec.Key("SHOW_MILESTONES_DASHBOARD_PAGE").MustBool(true)
Service.RequireSignInView = sec.Key("REQUIRE_SIGNIN_VIEW").MustBool() Service.RequireSignInView = sec.Key("REQUIRE_SIGNIN_VIEW").MustBool()

View file

@ -4,6 +4,9 @@
package setting package setting
import ( import (
"fmt"
"sort"
"strings"
"testing" "testing"
"code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/structs"
@ -11,8 +14,18 @@ import (
"github.com/gobwas/glob" "github.com/gobwas/glob"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"golang.org/x/net/publicsuffix"
) )
func match(globs []glob.Glob, s string) bool {
for _, g := range globs {
if g.Match(s) {
return true
}
}
return false
}
func TestLoadServices(t *testing.T) { func TestLoadServices(t *testing.T) {
oldService := Service oldService := Service
defer func() { defer func() {
@ -28,15 +41,6 @@ EMAIL_DOMAIN_BLOCKLIST = d3, *.b
require.NoError(t, err) require.NoError(t, err)
loadServiceFrom(cfg) loadServiceFrom(cfg)
match := func(globs []glob.Glob, s string) bool {
for _, g := range globs {
if g.Match(s) {
return true
}
}
return false
}
assert.True(t, match(Service.EmailDomainAllowList, "d1")) assert.True(t, match(Service.EmailDomainAllowList, "d1"))
assert.True(t, match(Service.EmailDomainAllowList, "foo.w")) assert.True(t, match(Service.EmailDomainAllowList, "foo.w"))
assert.True(t, match(Service.EmailDomainAllowList, "d2")) assert.True(t, match(Service.EmailDomainAllowList, "d2"))
@ -48,6 +52,121 @@ EMAIL_DOMAIN_BLOCKLIST = d3, *.b
assert.False(t, match(Service.EmailDomainBlockList, "d1")) assert.False(t, match(Service.EmailDomainBlockList, "d1"))
} }
func TestLoadServiceBlockDisposable(t *testing.T) {
oldService := Service
defer func() {
Service = oldService
}()
cfg, err := NewConfigProviderFromData(`
[service]
EMAIL_DOMAIN_BLOCK_DISPOSABLE = true
`)
require.NoError(t, err)
loadServiceFrom(cfg)
for _, domain := range DisposableEmailDomains() {
require.True(t, match(Service.EmailDomainBlockList, domain))
}
require.Len(t, Service.EmailDomainBlockList, len(DisposableEmailDomains()))
knownGood := [...]string{
"aol.com",
"gmx.com",
"mail.com",
"zoho.com",
"proton.me",
"gmail.com",
"yahoo.com",
"icloud.com",
"outlook.com",
"protonmail.com",
}
for _, domain := range knownGood {
require.False(t, match(Service.EmailDomainBlockList, domain))
}
}
func TestLoadServiceBlockDisposableWithExistingGlobs(t *testing.T) {
oldService := Service
defer func() {
Service = oldService
}()
tldCounts := make(map[string]int)
for _, domain := range DisposableEmailDomains() {
tld, _ := publicsuffix.PublicSuffix(domain)
tldCounts[tld]++
}
type tldkv struct {
Tld string
Count int
}
sortedTldCounts := make([]tldkv, 0)
for tld, count := range tldCounts {
sortedTldCounts = append(sortedTldCounts, tldkv{tld, count})
}
sort.Slice(sortedTldCounts, func(i, j int) bool {
return sortedTldCounts[i].Count > sortedTldCounts[j].Count
})
require.GreaterOrEqual(t, len(sortedTldCounts), 2)
blockString := fmt.Sprintf("*.%s,*.%s", sortedTldCounts[0].Tld, sortedTldCounts[1].Tld)
cfg, err := NewConfigProviderFromData(fmt.Sprintf(`
[service]
EMAIL_DOMAIN_BLOCKLIST = %s
EMAIL_DOMAIN_BLOCK_DISPOSABLE = true
`, blockString))
require.NoError(t, err)
loadServiceFrom(cfg)
for _, domain := range DisposableEmailDomains() {
require.True(t, match(Service.EmailDomainBlockList, domain))
}
redundant := 0
for _, val := range DisposableEmailDomains() {
if strings.HasSuffix(val, sortedTldCounts[0].Tld) ||
strings.HasSuffix(val, sortedTldCounts[1].Tld) {
redundant++
}
}
expected := len(DisposableEmailDomains()) - redundant + 2
require.Len(t, Service.EmailDomainBlockList, expected)
}
func TestLoadServiceBlockDisposableWithComplementGlobs(t *testing.T) {
oldService := Service
defer func() {
Service = oldService
}()
cfg, err := NewConfigProviderFromData(`
[service]
EMAIL_DOMAIN_BLOCKLIST = *.random
EMAIL_DOMAIN_BLOCK_DISPOSABLE = true
`)
require.NoError(t, err)
loadServiceFrom(cfg)
for _, domain := range DisposableEmailDomains() {
require.True(t, match(Service.EmailDomainBlockList, domain))
}
expected := len(DisposableEmailDomains()) + 1
require.Len(t, Service.EmailDomainBlockList, expected)
}
func TestLoadServiceVisibilityModes(t *testing.T) { func TestLoadServiceVisibilityModes(t *testing.T) {
oldService := Service oldService := Service
defer func() { defer func() {