cron task to cleanup dangling container images with version sha256:*

Fixes: https://codeberg.org/forgejo/forgejo/issues/4378
This commit is contained in:
Earl Warren 2024-07-26 19:39:19 +02:00
parent d30be160c9
commit f19f31ac73
No known key found for this signature in database
GPG key ID: 0579CB2928A78A00
3 changed files with 383 additions and 0 deletions

View file

@ -21,6 +21,9 @@ func Cleanup(ctx context.Context, olderThan time.Duration) error {
if err := cleanupExpiredBlobUploads(ctx, olderThan); err != nil {
return err
}
if err := CleanupSHA256(ctx, olderThan); err != nil {
return err
}
return cleanupExpiredUploadedBlobs(ctx, olderThan)
}

View file

@ -0,0 +1,142 @@
// Copyright 2024 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: GPL-3.0-or-later
package container
import (
"context"
"strings"
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/packages"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
container_module "code.gitea.io/gitea/modules/packages/container"
)
var (
SHA256BatchSize = 500
SHA256Log = "cleanup dangling images with a sha256:* version"
SHA256LogStart = "Start to " + SHA256Log
SHA256LogFinish = "Finished to " + SHA256Log
)
func CleanupSHA256(ctx context.Context, olderThan time.Duration) error {
log.Info(SHA256LogStart)
err := cleanupSHA256(ctx, olderThan)
log.Info(SHA256LogFinish)
return err
}
func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
ctx, committer, err := db.TxContext(outerCtx)
if err != nil {
return err
}
defer committer.Close()
foundAtLeastOneSHA256 := false
shaToVersionID := make(map[string]int64, 100)
knownSHA := make(map[string]any, 100)
log.Debug("Look for all package_version.version that start with sha256:")
old := time.Now().Add(-olderThan).Unix()
// Iterate over all container versions in ascending order and store
// in shaToVersionID all versions with a sha256: prefix. If an index
// manifest is found, the sha256: digest it references are removed
// from shaToVersionID. If the sha256: digest found in an index
// manifest is not already in shaToVersionID, it is stored in
// knownSHA to be dealt with later.
//
// Although it is theoretically possible that a sha256: is uploaded
// after the index manifest that references it, this is not the
// normal order of operations. First the sha256: version is uploaded
// and then the index manifest. When the iteration completes,
// knownSHA will therefore be empty most of the time and
// shaToVersionID will only contain unreferenced sha256: versions.
if err := db.GetEngine(ctx).
Select("`package_version`.`id`, `package_version`.`lower_version`, `package_version`.`metadata_json`").
Join("INNER", "`package`", "`package`.`id` = `package_version`.`package_id`").
Where("`package`.`type` = ? AND `package_version`.`created_unix` < ?", packages.TypeContainer, old).
OrderBy("`package_version`.`id` ASC").
Iterate(new(packages.PackageVersion), func(_ int, bean any) error {
v := bean.(*packages.PackageVersion)
if strings.HasPrefix(v.LowerVersion, "sha256:") {
shaToVersionID[v.LowerVersion] = v.ID
foundAtLeastOneSHA256 = true
} else if strings.Contains(v.MetadataJSON, `"manifests":[{`) {
var metadata container_module.Metadata
if err := json.Unmarshal([]byte(v.MetadataJSON), &metadata); err != nil {
log.Error("package_version.id = %d package_version.metadata_json %s is not a JSON string containing valid metadata. It was ignored but it is an inconsistency in the database that should be looked at. %v", v.ID, v.MetadataJSON, err)
return nil
}
for _, manifest := range metadata.Manifests {
if _, ok := shaToVersionID[manifest.Digest]; ok {
delete(shaToVersionID, manifest.Digest)
} else {
knownSHA[manifest.Digest] = true
}
}
}
return nil
}); err != nil {
return err
}
for sha := range knownSHA {
delete(shaToVersionID, sha)
}
if len(shaToVersionID) == 0 {
if foundAtLeastOneSHA256 {
log.Debug("All container images with a version matching sha256:* are referenced by an index manifest")
} else {
log.Debug("There are no container images with a version matching sha256:*")
}
log.Info("Nothing to cleanup")
return nil
}
found := len(shaToVersionID)
log.Warn("%d container image(s) with a version matching sha256:* are not referenced by an index manifest", found)
log.Debug("Deleting unreferenced image versions from `package_version`, `package_file` and `package_property` (%d at a time)", SHA256BatchSize)
packageVersionIDs := make([]int64, 0, SHA256BatchSize)
for _, id := range shaToVersionID {
packageVersionIDs = append(packageVersionIDs, id)
}
for len(packageVersionIDs) > 0 {
upper := min(len(packageVersionIDs), SHA256BatchSize)
versionIDs := packageVersionIDs[0:upper]
var packageFileIDs []int64
if err := db.GetEngine(ctx).Select("id").Table("package_file").In("version_id", versionIDs).Find(&packageFileIDs); err != nil {
return err
}
log.Info("Removing %d entries from `package_file` and `package_property`", len(packageFileIDs))
if _, err := db.GetEngine(ctx).In("id", packageFileIDs).Delete(&packages.PackageFile{}); err != nil {
return err
}
if _, err := db.GetEngine(ctx).In("ref_id", packageFileIDs).And("ref_type = ?", packages.PropertyTypeFile).Delete(&packages.PackageProperty{}); err != nil {
return err
}
log.Info("Removing %d entries from `package_version` and `package_property`", upper)
if _, err := db.GetEngine(ctx).In("id", versionIDs).Delete(&packages.PackageVersion{}); err != nil {
return err
}
if _, err := db.GetEngine(ctx).In("ref_id", versionIDs).And("ref_type = ?", packages.PropertyTypeVersion).Delete(&packages.PackageProperty{}); err != nil {
return err
}
packageVersionIDs = packageVersionIDs[upper:]
}
return committer.Commit()
}

View file

@ -0,0 +1,238 @@
// Copyright 2024 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: GPL-3.0-or-later
package integration
import (
"bytes"
"encoding/base64"
"fmt"
"net/http"
"strings"
"testing"
"time"
"code.gitea.io/gitea/models/db"
packages_model "code.gitea.io/gitea/models/packages"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/log"
packages_module "code.gitea.io/gitea/modules/packages"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
packages_cleanup "code.gitea.io/gitea/services/packages/cleanup"
packages_container "code.gitea.io/gitea/services/packages/container"
"code.gitea.io/gitea/tests"
oci "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestPackagesContainerCleanupSHA256(t *testing.T) {
defer tests.PrepareTestEnv(t, 1)()
defer test.MockVariableValue(&setting.Packages.Storage.Type, setting.LocalStorageType)()
defer test.MockVariableValue(&packages_container.SHA256BatchSize, 1)()
ctx := db.DefaultContext
user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
cleanupAndCheckLogs := func(t *testing.T, expected ...string) {
t.Helper()
logChecker, cleanup := test.NewLogChecker(log.DEFAULT, log.TRACE)
logChecker.Filter(expected...)
logChecker.StopMark(packages_container.SHA256LogFinish)
defer cleanup()
require.NoError(t, packages_cleanup.CleanupExpiredData(ctx, -1*time.Hour))
logFiltered, logStopped := logChecker.Check(5 * time.Second)
assert.True(t, logStopped)
filtered := make([]bool, 0, len(expected))
for range expected {
filtered = append(filtered, true)
}
assert.EqualValues(t, filtered, logFiltered, expected)
}
userToken := ""
t.Run("Authenticate", func(t *testing.T) {
type TokenResponse struct {
Token string `json:"token"`
}
authenticate := []string{`Bearer realm="` + setting.AppURL + `v2/token",service="container_registry",scope="*"`}
t.Run("User", func(t *testing.T) {
req := NewRequest(t, "GET", fmt.Sprintf("%sv2", setting.AppURL))
resp := MakeRequest(t, req, http.StatusUnauthorized)
assert.ElementsMatch(t, authenticate, resp.Header().Values("WWW-Authenticate"))
req = NewRequest(t, "GET", fmt.Sprintf("%sv2/token", setting.AppURL)).
AddBasicAuth(user.Name)
resp = MakeRequest(t, req, http.StatusOK)
tokenResponse := &TokenResponse{}
DecodeJSON(t, resp, &tokenResponse)
assert.NotEmpty(t, tokenResponse.Token)
userToken = fmt.Sprintf("Bearer %s", tokenResponse.Token)
req = NewRequest(t, "GET", fmt.Sprintf("%sv2", setting.AppURL)).
AddTokenAuth(userToken)
MakeRequest(t, req, http.StatusOK)
})
})
image := "test"
multiTag := "multi"
url := fmt.Sprintf("%sv2/%s/%s", setting.AppURL, user.Name, image)
blobDigest := "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4"
sha256ManifestDigest := "sha256:4305f5f5572b9a426b88909b036e52ee3cf3d7b9c1b01fac840e90747f56623d"
indexManifestDigest := "sha256:b992f98104ab25f60d78368a674ce6f6a49741f4e32729e8496067ed06174e9b"
uploadSHA256Version := func(t *testing.T) {
t.Helper()
blobContent, _ := base64.StdEncoding.DecodeString(`H4sIAAAJbogA/2IYBaNgFIxYAAgAAP//Lq+17wAEAAA=`)
req := NewRequestWithBody(t, "POST", fmt.Sprintf("%s/blobs/uploads?digest=%s", url, blobDigest), bytes.NewReader(blobContent)).
AddTokenAuth(userToken)
resp := MakeRequest(t, req, http.StatusCreated)
assert.Equal(t, fmt.Sprintf("/v2/%s/%s/blobs/%s", user.Name, image, blobDigest), resp.Header().Get("Location"))
assert.Equal(t, blobDigest, resp.Header().Get("Docker-Content-Digest"))
configDigest := "sha256:4607e093bec406eaadb6f3a340f63400c9d3a7038680744c406903766b938f0d"
configContent := `{"architecture":"amd64","config":{"Env":["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"],"Cmd":["/true"],"ArgsEscaped":true,"Image":"sha256:9bd8b88dc68b80cffe126cc820e4b52c6e558eb3b37680bfee8e5f3ed7b8c257"},"container":"b89fe92a887d55c0961f02bdfbfd8ac3ddf66167db374770d2d9e9fab3311510","container_config":{"Hostname":"b89fe92a887d","Env":["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"],"Cmd":["/bin/sh","-c","#(nop) ","CMD [\"/true\"]"],"ArgsEscaped":true,"Image":"sha256:9bd8b88dc68b80cffe126cc820e4b52c6e558eb3b37680bfee8e5f3ed7b8c257"},"created":"2022-01-01T00:00:00.000000000Z","docker_version":"20.10.12","history":[{"created":"2022-01-01T00:00:00.000000000Z","created_by":"/bin/sh -c #(nop) COPY file:0e7589b0c800daaf6fa460d2677101e4676dd9491980210cb345480e513f3602 in /true "},{"created":"2022-01-01T00:00:00.000000001Z","created_by":"/bin/sh -c #(nop) CMD [\"/true\"]","empty_layer":true}],"os":"linux","rootfs":{"type":"layers","diff_ids":["sha256:0ff3b91bdf21ecdf2f2f3d4372c2098a14dbe06cd678e8f0a85fd4902d00e2e2"]}}`
req = NewRequestWithBody(t, "POST", fmt.Sprintf("%s/blobs/uploads?digest=%s", url, configDigest), strings.NewReader(configContent)).
AddTokenAuth(userToken)
MakeRequest(t, req, http.StatusCreated)
sha256ManifestContent := `{"schemaVersion":2,"mediaType":"` + oci.MediaTypeImageManifest + `","config":{"mediaType":"application/vnd.docker.container.image.v1+json","digest":"sha256:4607e093bec406eaadb6f3a340f63400c9d3a7038680744c406903766b938f0d","size":1069},"layers":[{"mediaType":"application/vnd.docker.image.rootfs.diff.tar.gzip","digest":"sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4","size":32}]}`
req = NewRequestWithBody(t, "PUT", fmt.Sprintf("%s/manifests/%s", url, sha256ManifestDigest), strings.NewReader(sha256ManifestContent)).
AddTokenAuth(userToken).
SetHeader("Content-Type", oci.MediaTypeImageManifest)
resp = MakeRequest(t, req, http.StatusCreated)
assert.Equal(t, sha256ManifestDigest, resp.Header().Get("Docker-Content-Digest"))
req = NewRequest(t, "HEAD", fmt.Sprintf("%s/manifests/%s", url, sha256ManifestDigest)).
AddTokenAuth(userToken)
resp = MakeRequest(t, req, http.StatusOK)
assert.Equal(t, fmt.Sprintf("%d", len(sha256ManifestContent)), resp.Header().Get("Content-Length"))
assert.Equal(t, sha256ManifestDigest, resp.Header().Get("Docker-Content-Digest"))
}
uploadIndexManifest := func(t *testing.T) {
indexManifestContent := `{"schemaVersion":2,"mediaType":"` + oci.MediaTypeImageIndex + `","manifests":[{"mediaType":"application/vnd.docker.distribution.manifest.v2+json","digest":"` + sha256ManifestDigest + `","platform":{"os":"linux","architecture":"arm","variant":"v7"}}]}`
req := NewRequestWithBody(t, "PUT", fmt.Sprintf("%s/manifests/%s", url, multiTag), strings.NewReader(indexManifestContent)).
AddTokenAuth(userToken).
SetHeader("Content-Type", oci.MediaTypeImageIndex)
resp := MakeRequest(t, req, http.StatusCreated)
assert.Equal(t, indexManifestDigest, resp.Header().Get("Docker-Content-Digest"))
}
assertImageExists := func(t *testing.T, manifestDigest, blobDigest string) {
req := NewRequest(t, "HEAD", fmt.Sprintf("%s/manifests/%s", url, manifestDigest)).
AddTokenAuth(userToken)
MakeRequest(t, req, http.StatusOK)
req = NewRequest(t, "HEAD", fmt.Sprintf("%s/blobs/%s", url, blobDigest)).
AddTokenAuth(userToken)
MakeRequest(t, req, http.StatusOK)
}
assertImageNotExists := func(t *testing.T, manifestDigest, blobDigest string) {
req := NewRequest(t, "HEAD", fmt.Sprintf("%s/manifests/%s", url, manifestDigest)).
AddTokenAuth(userToken)
MakeRequest(t, req, http.StatusNotFound)
req = NewRequest(t, "HEAD", fmt.Sprintf("%s/blobs/%s", url, blobDigest)).
AddTokenAuth(userToken)
MakeRequest(t, req, http.StatusNotFound)
}
assertImageDeleted := func(t *testing.T, image, manifestDigest, blobDigest string, cleanup func()) {
t.Helper()
packageVersion := unittest.AssertExistsAndLoadBean(t, &packages_model.PackageVersion{Version: manifestDigest})
packageFile := unittest.AssertExistsAndLoadBean(t, &packages_model.PackageFile{VersionID: packageVersion.ID})
unittest.AssertExistsAndLoadBean(t, &packages_model.PackageProperty{RefID: packageFile.ID, RefType: packages_model.PropertyTypeVersion})
packageBlob := unittest.AssertExistsAndLoadBean(t, &packages_model.PackageBlob{ID: packageFile.BlobID})
contentStore := packages_module.NewContentStore()
require.NoError(t, contentStore.Has(packages_module.BlobHash256Key(packageBlob.HashSHA256)))
assertImageExists(t, manifestDigest, blobDigest)
cleanup()
assertImageNotExists(t, manifestDigest, blobDigest)
unittest.AssertNotExistsBean(t, &packages_model.PackageVersion{Version: manifestDigest})
unittest.AssertNotExistsBean(t, &packages_model.PackageFile{VersionID: packageVersion.ID})
unittest.AssertNotExistsBean(t, &packages_model.PackageProperty{RefID: packageFile.ID, RefType: packages_model.PropertyTypeVersion})
unittest.AssertNotExistsBean(t, &packages_model.PackageBlob{ID: packageFile.BlobID})
assert.Error(t, contentStore.Has(packages_module.BlobHash256Key(packageBlob.HashSHA256)))
}
assertImageAndPackageDeleted := func(t *testing.T, image, manifestDigest, blobDigest string, cleanup func()) {
t.Helper()
unittest.AssertExistsAndLoadBean(t, &packages_model.Package{Name: image})
assertImageDeleted(t, image, manifestDigest, blobDigest, cleanup)
unittest.AssertNotExistsBean(t, &packages_model.Package{Name: image})
}
t.Run("Nothing to look at", func(t *testing.T) {
cleanupAndCheckLogs(t, "There are no container images with a version matching sha256:*")
})
uploadSHA256Version(t)
t.Run("Dangling image found", func(t *testing.T) {
assertImageAndPackageDeleted(t, image, sha256ManifestDigest, blobDigest, func() {
cleanupAndCheckLogs(t,
"Removing 3 entries from `package_file` and `package_property`",
"Removing 1 entries from `package_version` and `package_property`",
)
})
})
uploadSHA256Version(t)
uploadIndexManifest(t)
t.Run("Corrupted index manifest metadata is ignored", func(t *testing.T) {
assertImageExists(t, sha256ManifestDigest, blobDigest)
_, err := db.GetEngine(ctx).Table("package_version").Where("version = ?", multiTag).Update(&packages_model.PackageVersion{MetadataJSON: `corrupted "manifests":[{ bad`})
require.NoError(t, err)
// do not expect the package to be deleted because it contains
// corrupted metadata that prevents that from happening
assertImageDeleted(t, image, sha256ManifestDigest, blobDigest, func() {
cleanupAndCheckLogs(t,
"Removing 3 entries from `package_file` and `package_property`",
"Removing 1 entries from `package_version` and `package_property`",
"is not a JSON string containing valid metadata",
)
})
})
uploadSHA256Version(t)
uploadIndexManifest(t)
t.Run("Image found but referenced", func(t *testing.T) {
assertImageExists(t, sha256ManifestDigest, blobDigest)
cleanupAndCheckLogs(t,
"All container images with a version matching sha256:* are referenced by an index manifest",
)
assertImageExists(t, sha256ManifestDigest, blobDigest)
})
}