From f19f31ac738637b03aa61d8868befb6138608528 Mon Sep 17 00:00:00 2001 From: Earl Warren Date: Fri, 26 Jul 2024 19:39:19 +0200 Subject: [PATCH] cron task to cleanup dangling container images with version sha256:* Fixes: https://codeberg.org/forgejo/forgejo/issues/4378 --- services/packages/container/cleanup.go | 3 + services/packages/container/cleanup_sha256.go | 142 +++++++++++ ..._packages_container_cleanup_sha256_test.go | 238 ++++++++++++++++++ 3 files changed, 383 insertions(+) create mode 100644 services/packages/container/cleanup_sha256.go create mode 100644 tests/integration/api_packages_container_cleanup_sha256_test.go diff --git a/services/packages/container/cleanup.go b/services/packages/container/cleanup.go index 3f5f43bbc0..b5563c688f 100644 --- a/services/packages/container/cleanup.go +++ b/services/packages/container/cleanup.go @@ -21,6 +21,9 @@ func Cleanup(ctx context.Context, olderThan time.Duration) error { if err := cleanupExpiredBlobUploads(ctx, olderThan); err != nil { return err } + if err := CleanupSHA256(ctx, olderThan); err != nil { + return err + } return cleanupExpiredUploadedBlobs(ctx, olderThan) } diff --git a/services/packages/container/cleanup_sha256.go b/services/packages/container/cleanup_sha256.go new file mode 100644 index 0000000000..558aea3a55 --- /dev/null +++ b/services/packages/container/cleanup_sha256.go @@ -0,0 +1,142 @@ +// Copyright 2024 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: GPL-3.0-or-later + +package container + +import ( + "context" + "strings" + "time" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/modules/json" + "code.gitea.io/gitea/modules/log" + container_module "code.gitea.io/gitea/modules/packages/container" +) + +var ( + SHA256BatchSize = 500 + SHA256Log = "cleanup dangling images with a sha256:* version" + SHA256LogStart = "Start to " + SHA256Log + SHA256LogFinish = "Finished to " + SHA256Log +) + +func CleanupSHA256(ctx context.Context, olderThan time.Duration) error { + log.Info(SHA256LogStart) + err := cleanupSHA256(ctx, olderThan) + log.Info(SHA256LogFinish) + return err +} + +func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error { + ctx, committer, err := db.TxContext(outerCtx) + if err != nil { + return err + } + defer committer.Close() + + foundAtLeastOneSHA256 := false + shaToVersionID := make(map[string]int64, 100) + knownSHA := make(map[string]any, 100) + + log.Debug("Look for all package_version.version that start with sha256:") + + old := time.Now().Add(-olderThan).Unix() + + // Iterate over all container versions in ascending order and store + // in shaToVersionID all versions with a sha256: prefix. If an index + // manifest is found, the sha256: digest it references are removed + // from shaToVersionID. If the sha256: digest found in an index + // manifest is not already in shaToVersionID, it is stored in + // knownSHA to be dealt with later. + // + // Although it is theoretically possible that a sha256: is uploaded + // after the index manifest that references it, this is not the + // normal order of operations. First the sha256: version is uploaded + // and then the index manifest. When the iteration completes, + // knownSHA will therefore be empty most of the time and + // shaToVersionID will only contain unreferenced sha256: versions. + if err := db.GetEngine(ctx). + Select("`package_version`.`id`, `package_version`.`lower_version`, `package_version`.`metadata_json`"). + Join("INNER", "`package`", "`package`.`id` = `package_version`.`package_id`"). + Where("`package`.`type` = ? AND `package_version`.`created_unix` < ?", packages.TypeContainer, old). + OrderBy("`package_version`.`id` ASC"). + Iterate(new(packages.PackageVersion), func(_ int, bean any) error { + v := bean.(*packages.PackageVersion) + if strings.HasPrefix(v.LowerVersion, "sha256:") { + shaToVersionID[v.LowerVersion] = v.ID + foundAtLeastOneSHA256 = true + } else if strings.Contains(v.MetadataJSON, `"manifests":[{`) { + var metadata container_module.Metadata + if err := json.Unmarshal([]byte(v.MetadataJSON), &metadata); err != nil { + log.Error("package_version.id = %d package_version.metadata_json %s is not a JSON string containing valid metadata. It was ignored but it is an inconsistency in the database that should be looked at. %v", v.ID, v.MetadataJSON, err) + return nil + } + for _, manifest := range metadata.Manifests { + if _, ok := shaToVersionID[manifest.Digest]; ok { + delete(shaToVersionID, manifest.Digest) + } else { + knownSHA[manifest.Digest] = true + } + } + } + return nil + }); err != nil { + return err + } + + for sha := range knownSHA { + delete(shaToVersionID, sha) + } + + if len(shaToVersionID) == 0 { + if foundAtLeastOneSHA256 { + log.Debug("All container images with a version matching sha256:* are referenced by an index manifest") + } else { + log.Debug("There are no container images with a version matching sha256:*") + } + log.Info("Nothing to cleanup") + return nil + } + + found := len(shaToVersionID) + + log.Warn("%d container image(s) with a version matching sha256:* are not referenced by an index manifest", found) + + log.Debug("Deleting unreferenced image versions from `package_version`, `package_file` and `package_property` (%d at a time)", SHA256BatchSize) + + packageVersionIDs := make([]int64, 0, SHA256BatchSize) + for _, id := range shaToVersionID { + packageVersionIDs = append(packageVersionIDs, id) + } + + for len(packageVersionIDs) > 0 { + upper := min(len(packageVersionIDs), SHA256BatchSize) + versionIDs := packageVersionIDs[0:upper] + + var packageFileIDs []int64 + if err := db.GetEngine(ctx).Select("id").Table("package_file").In("version_id", versionIDs).Find(&packageFileIDs); err != nil { + return err + } + log.Info("Removing %d entries from `package_file` and `package_property`", len(packageFileIDs)) + if _, err := db.GetEngine(ctx).In("id", packageFileIDs).Delete(&packages.PackageFile{}); err != nil { + return err + } + if _, err := db.GetEngine(ctx).In("ref_id", packageFileIDs).And("ref_type = ?", packages.PropertyTypeFile).Delete(&packages.PackageProperty{}); err != nil { + return err + } + + log.Info("Removing %d entries from `package_version` and `package_property`", upper) + if _, err := db.GetEngine(ctx).In("id", versionIDs).Delete(&packages.PackageVersion{}); err != nil { + return err + } + if _, err := db.GetEngine(ctx).In("ref_id", versionIDs).And("ref_type = ?", packages.PropertyTypeVersion).Delete(&packages.PackageProperty{}); err != nil { + return err + } + + packageVersionIDs = packageVersionIDs[upper:] + } + + return committer.Commit() +} diff --git a/tests/integration/api_packages_container_cleanup_sha256_test.go b/tests/integration/api_packages_container_cleanup_sha256_test.go new file mode 100644 index 0000000000..eb63eff720 --- /dev/null +++ b/tests/integration/api_packages_container_cleanup_sha256_test.go @@ -0,0 +1,238 @@ +// Copyright 2024 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: GPL-3.0-or-later + +package integration + +import ( + "bytes" + "encoding/base64" + "fmt" + "net/http" + "strings" + "testing" + "time" + + "code.gitea.io/gitea/models/db" + packages_model "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/models/unittest" + user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/log" + packages_module "code.gitea.io/gitea/modules/packages" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" + packages_cleanup "code.gitea.io/gitea/services/packages/cleanup" + packages_container "code.gitea.io/gitea/services/packages/container" + "code.gitea.io/gitea/tests" + + oci "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPackagesContainerCleanupSHA256(t *testing.T) { + defer tests.PrepareTestEnv(t, 1)() + defer test.MockVariableValue(&setting.Packages.Storage.Type, setting.LocalStorageType)() + defer test.MockVariableValue(&packages_container.SHA256BatchSize, 1)() + + ctx := db.DefaultContext + + user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) + + cleanupAndCheckLogs := func(t *testing.T, expected ...string) { + t.Helper() + logChecker, cleanup := test.NewLogChecker(log.DEFAULT, log.TRACE) + logChecker.Filter(expected...) + logChecker.StopMark(packages_container.SHA256LogFinish) + defer cleanup() + + require.NoError(t, packages_cleanup.CleanupExpiredData(ctx, -1*time.Hour)) + + logFiltered, logStopped := logChecker.Check(5 * time.Second) + assert.True(t, logStopped) + filtered := make([]bool, 0, len(expected)) + for range expected { + filtered = append(filtered, true) + } + assert.EqualValues(t, filtered, logFiltered, expected) + } + + userToken := "" + + t.Run("Authenticate", func(t *testing.T) { + type TokenResponse struct { + Token string `json:"token"` + } + + authenticate := []string{`Bearer realm="` + setting.AppURL + `v2/token",service="container_registry",scope="*"`} + + t.Run("User", func(t *testing.T) { + req := NewRequest(t, "GET", fmt.Sprintf("%sv2", setting.AppURL)) + resp := MakeRequest(t, req, http.StatusUnauthorized) + + assert.ElementsMatch(t, authenticate, resp.Header().Values("WWW-Authenticate")) + + req = NewRequest(t, "GET", fmt.Sprintf("%sv2/token", setting.AppURL)). + AddBasicAuth(user.Name) + resp = MakeRequest(t, req, http.StatusOK) + + tokenResponse := &TokenResponse{} + DecodeJSON(t, resp, &tokenResponse) + + assert.NotEmpty(t, tokenResponse.Token) + + userToken = fmt.Sprintf("Bearer %s", tokenResponse.Token) + + req = NewRequest(t, "GET", fmt.Sprintf("%sv2", setting.AppURL)). + AddTokenAuth(userToken) + MakeRequest(t, req, http.StatusOK) + }) + }) + + image := "test" + multiTag := "multi" + + url := fmt.Sprintf("%sv2/%s/%s", setting.AppURL, user.Name, image) + + blobDigest := "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4" + sha256ManifestDigest := "sha256:4305f5f5572b9a426b88909b036e52ee3cf3d7b9c1b01fac840e90747f56623d" + indexManifestDigest := "sha256:b992f98104ab25f60d78368a674ce6f6a49741f4e32729e8496067ed06174e9b" + + uploadSHA256Version := func(t *testing.T) { + t.Helper() + + blobContent, _ := base64.StdEncoding.DecodeString(`H4sIAAAJbogA/2IYBaNgFIxYAAgAAP//Lq+17wAEAAA=`) + + req := NewRequestWithBody(t, "POST", fmt.Sprintf("%s/blobs/uploads?digest=%s", url, blobDigest), bytes.NewReader(blobContent)). + AddTokenAuth(userToken) + resp := MakeRequest(t, req, http.StatusCreated) + + assert.Equal(t, fmt.Sprintf("/v2/%s/%s/blobs/%s", user.Name, image, blobDigest), resp.Header().Get("Location")) + assert.Equal(t, blobDigest, resp.Header().Get("Docker-Content-Digest")) + + configDigest := "sha256:4607e093bec406eaadb6f3a340f63400c9d3a7038680744c406903766b938f0d" + configContent := `{"architecture":"amd64","config":{"Env":["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"],"Cmd":["/true"],"ArgsEscaped":true,"Image":"sha256:9bd8b88dc68b80cffe126cc820e4b52c6e558eb3b37680bfee8e5f3ed7b8c257"},"container":"b89fe92a887d55c0961f02bdfbfd8ac3ddf66167db374770d2d9e9fab3311510","container_config":{"Hostname":"b89fe92a887d","Env":["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"],"Cmd":["/bin/sh","-c","#(nop) ","CMD [\"/true\"]"],"ArgsEscaped":true,"Image":"sha256:9bd8b88dc68b80cffe126cc820e4b52c6e558eb3b37680bfee8e5f3ed7b8c257"},"created":"2022-01-01T00:00:00.000000000Z","docker_version":"20.10.12","history":[{"created":"2022-01-01T00:00:00.000000000Z","created_by":"/bin/sh -c #(nop) COPY file:0e7589b0c800daaf6fa460d2677101e4676dd9491980210cb345480e513f3602 in /true "},{"created":"2022-01-01T00:00:00.000000001Z","created_by":"/bin/sh -c #(nop) CMD [\"/true\"]","empty_layer":true}],"os":"linux","rootfs":{"type":"layers","diff_ids":["sha256:0ff3b91bdf21ecdf2f2f3d4372c2098a14dbe06cd678e8f0a85fd4902d00e2e2"]}}` + + req = NewRequestWithBody(t, "POST", fmt.Sprintf("%s/blobs/uploads?digest=%s", url, configDigest), strings.NewReader(configContent)). + AddTokenAuth(userToken) + MakeRequest(t, req, http.StatusCreated) + + sha256ManifestContent := `{"schemaVersion":2,"mediaType":"` + oci.MediaTypeImageManifest + `","config":{"mediaType":"application/vnd.docker.container.image.v1+json","digest":"sha256:4607e093bec406eaadb6f3a340f63400c9d3a7038680744c406903766b938f0d","size":1069},"layers":[{"mediaType":"application/vnd.docker.image.rootfs.diff.tar.gzip","digest":"sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4","size":32}]}` + req = NewRequestWithBody(t, "PUT", fmt.Sprintf("%s/manifests/%s", url, sha256ManifestDigest), strings.NewReader(sha256ManifestContent)). + AddTokenAuth(userToken). + SetHeader("Content-Type", oci.MediaTypeImageManifest) + resp = MakeRequest(t, req, http.StatusCreated) + + assert.Equal(t, sha256ManifestDigest, resp.Header().Get("Docker-Content-Digest")) + + req = NewRequest(t, "HEAD", fmt.Sprintf("%s/manifests/%s", url, sha256ManifestDigest)). + AddTokenAuth(userToken) + resp = MakeRequest(t, req, http.StatusOK) + + assert.Equal(t, fmt.Sprintf("%d", len(sha256ManifestContent)), resp.Header().Get("Content-Length")) + assert.Equal(t, sha256ManifestDigest, resp.Header().Get("Docker-Content-Digest")) + } + + uploadIndexManifest := func(t *testing.T) { + indexManifestContent := `{"schemaVersion":2,"mediaType":"` + oci.MediaTypeImageIndex + `","manifests":[{"mediaType":"application/vnd.docker.distribution.manifest.v2+json","digest":"` + sha256ManifestDigest + `","platform":{"os":"linux","architecture":"arm","variant":"v7"}}]}` + req := NewRequestWithBody(t, "PUT", fmt.Sprintf("%s/manifests/%s", url, multiTag), strings.NewReader(indexManifestContent)). + AddTokenAuth(userToken). + SetHeader("Content-Type", oci.MediaTypeImageIndex) + resp := MakeRequest(t, req, http.StatusCreated) + + assert.Equal(t, indexManifestDigest, resp.Header().Get("Docker-Content-Digest")) + } + + assertImageExists := func(t *testing.T, manifestDigest, blobDigest string) { + req := NewRequest(t, "HEAD", fmt.Sprintf("%s/manifests/%s", url, manifestDigest)). + AddTokenAuth(userToken) + MakeRequest(t, req, http.StatusOK) + + req = NewRequest(t, "HEAD", fmt.Sprintf("%s/blobs/%s", url, blobDigest)). + AddTokenAuth(userToken) + MakeRequest(t, req, http.StatusOK) + } + + assertImageNotExists := func(t *testing.T, manifestDigest, blobDigest string) { + req := NewRequest(t, "HEAD", fmt.Sprintf("%s/manifests/%s", url, manifestDigest)). + AddTokenAuth(userToken) + MakeRequest(t, req, http.StatusNotFound) + + req = NewRequest(t, "HEAD", fmt.Sprintf("%s/blobs/%s", url, blobDigest)). + AddTokenAuth(userToken) + MakeRequest(t, req, http.StatusNotFound) + } + + assertImageDeleted := func(t *testing.T, image, manifestDigest, blobDigest string, cleanup func()) { + t.Helper() + packageVersion := unittest.AssertExistsAndLoadBean(t, &packages_model.PackageVersion{Version: manifestDigest}) + packageFile := unittest.AssertExistsAndLoadBean(t, &packages_model.PackageFile{VersionID: packageVersion.ID}) + unittest.AssertExistsAndLoadBean(t, &packages_model.PackageProperty{RefID: packageFile.ID, RefType: packages_model.PropertyTypeVersion}) + packageBlob := unittest.AssertExistsAndLoadBean(t, &packages_model.PackageBlob{ID: packageFile.BlobID}) + contentStore := packages_module.NewContentStore() + require.NoError(t, contentStore.Has(packages_module.BlobHash256Key(packageBlob.HashSHA256))) + + assertImageExists(t, manifestDigest, blobDigest) + + cleanup() + + assertImageNotExists(t, manifestDigest, blobDigest) + + unittest.AssertNotExistsBean(t, &packages_model.PackageVersion{Version: manifestDigest}) + unittest.AssertNotExistsBean(t, &packages_model.PackageFile{VersionID: packageVersion.ID}) + unittest.AssertNotExistsBean(t, &packages_model.PackageProperty{RefID: packageFile.ID, RefType: packages_model.PropertyTypeVersion}) + unittest.AssertNotExistsBean(t, &packages_model.PackageBlob{ID: packageFile.BlobID}) + assert.Error(t, contentStore.Has(packages_module.BlobHash256Key(packageBlob.HashSHA256))) + } + + assertImageAndPackageDeleted := func(t *testing.T, image, manifestDigest, blobDigest string, cleanup func()) { + t.Helper() + unittest.AssertExistsAndLoadBean(t, &packages_model.Package{Name: image}) + assertImageDeleted(t, image, manifestDigest, blobDigest, cleanup) + unittest.AssertNotExistsBean(t, &packages_model.Package{Name: image}) + } + + t.Run("Nothing to look at", func(t *testing.T) { + cleanupAndCheckLogs(t, "There are no container images with a version matching sha256:*") + }) + + uploadSHA256Version(t) + + t.Run("Dangling image found", func(t *testing.T) { + assertImageAndPackageDeleted(t, image, sha256ManifestDigest, blobDigest, func() { + cleanupAndCheckLogs(t, + "Removing 3 entries from `package_file` and `package_property`", + "Removing 1 entries from `package_version` and `package_property`", + ) + }) + }) + + uploadSHA256Version(t) + uploadIndexManifest(t) + + t.Run("Corrupted index manifest metadata is ignored", func(t *testing.T) { + assertImageExists(t, sha256ManifestDigest, blobDigest) + _, err := db.GetEngine(ctx).Table("package_version").Where("version = ?", multiTag).Update(&packages_model.PackageVersion{MetadataJSON: `corrupted "manifests":[{ bad`}) + require.NoError(t, err) + + // do not expect the package to be deleted because it contains + // corrupted metadata that prevents that from happening + assertImageDeleted(t, image, sha256ManifestDigest, blobDigest, func() { + cleanupAndCheckLogs(t, + "Removing 3 entries from `package_file` and `package_property`", + "Removing 1 entries from `package_version` and `package_property`", + "is not a JSON string containing valid metadata", + ) + }) + }) + + uploadSHA256Version(t) + uploadIndexManifest(t) + + t.Run("Image found but referenced", func(t *testing.T) { + assertImageExists(t, sha256ManifestDigest, blobDigest) + cleanupAndCheckLogs(t, + "All container images with a version matching sha256:* are referenced by an index manifest", + ) + assertImageExists(t, sha256ManifestDigest, blobDigest) + }) +}