[ENHANCEMENT] Improve caching of contributor stats

- It was noticed [in the Forgejo matrix channel](https://matrix.to/#/!qjPHwFPdxhpLkXMkyP:matrix.org/$vk78UR0eFCwQMDMTZ7-DWjMVB_LIAwHW6SkjhEcGkQQ?via=matrix.org) that the generation of the Forgejo project contributor stats was taking quite a while on codeberg.org. This was continued with the fact that a few moments later it was once again generating them again; it seemed like they weren't being cached while they were.
- The problem was that the cache TTL is hardcoded to ten minutes and not to the configured TTL. This patch changes that by using the configured TLL for the contributor cache, as this is a computationally heavy operation and should be cached for as long as possible for a good user experience. This doesn't impact the accuracy of this feature because the commit ID of the default branch is used as a cache key.
- Also changed in this patch, is that errors aren't cached and are instead being logged, this is more helpful to the administrator. For the user essentially nothing changed on this side, the contributor stats just looks like it's loading indefinitely.
- Realistically, testing this isn't possible, as the cache library Forgejo currently uses doesn't expose the TTL or expiration time of a key. Manually testing this behavior is quite lengthy, as one of the steps would need to be "wait for ten minutes" and describe how you can notice the data was cached or was just generated, and because you could use different types of cache, it will be quite hard to write down how you could check the TTL of a key for a particular cache (I'm not even sure it's even possible for some).
This commit is contained in:
Gusted 2024-07-06 13:08:26 +02:00
parent 3c7a830fda
commit 70a7d6a0de
No known key found for this signature in database
GPG key ID: FD821B732837125F
3 changed files with 18 additions and 17 deletions

View file

@ -0,0 +1 @@
The caching of contributor stats was improved (the data used by `/<user>/<repo>/activity/recent-commits`) to use the configured cache TTL from the config (`[cache].ITEM_TTL`) instead of a hardcoded TTL of ten minutes. The computation of this operation is computationally heavy and makes a lot of requests to the database and Git on repositories with a lot of commits. It should be cached for longer than what was previously hardcoded, ten minutes.

View file

@ -22,15 +22,13 @@ import (
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"gitea.com/go-chi/cache"
)
const (
contributorStatsCacheKey = "GetContributorStats/%s/%s"
contributorStatsCacheTimeout int64 = 60 * 10
)
const contributorStatsCacheKey = "GetContributorStats/%s/%s"
var (
ErrAwaitGeneration = errors.New("generation took longer than ")
@ -211,8 +209,7 @@ func generateContributorStats(genDone chan struct{}, cache cache.Cache, cacheKey
gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo)
if err != nil {
err := fmt.Errorf("OpenRepository: %w", err)
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
log.Error("OpenRepository[repo=%q]: %v", repo.FullName(), err)
return
}
defer closer.Close()
@ -222,13 +219,11 @@ func generateContributorStats(genDone chan struct{}, cache cache.Cache, cacheKey
}
extendedCommitStats, err := getExtendedCommitStats(gitRepo, revision)
if err != nil {
err := fmt.Errorf("ExtendedCommitStats: %w", err)
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
log.Error("getExtendedCommitStats[repo=%q revision=%q]: %v", repo.FullName(), revision, err)
return
}
if len(extendedCommitStats) == 0 {
err := fmt.Errorf("no commit stats returned for revision '%s'", revision)
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
log.Error("No commit stats were returned [repo=%q revision=%q]", repo.FullName(), revision)
return
}
@ -312,14 +307,13 @@ func generateContributorStats(genDone chan struct{}, cache cache.Cache, cacheKey
data, err := json.Marshal(contributorsCommitStats)
if err != nil {
err := fmt.Errorf("couldn't marshal the data: %w", err)
_ = cache.Put(cacheKey, err, contributorStatsCacheTimeout)
log.Error("json.Marshal[repo=%q revision=%q]: %v", repo.FullName(), revision, err)
return
}
// Store the data as an string, to make it uniform what data type is returned
// from caches.
_ = cache.Put(cacheKey, string(data), contributorStatsCacheTimeout)
_ = cache.Put(cacheKey, string(data), setting.CacheService.TTLSeconds())
generateLock.Delete(cacheKey)
if genDone != nil {
genDone <- struct{}{}

View file

@ -6,12 +6,14 @@ package repository
import (
"slices"
"testing"
"time"
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/test"
"gitea.com/go-chi/cache"
"github.com/stretchr/testify/assert"
@ -27,10 +29,14 @@ func TestRepository_ContributorsGraph(t *testing.T) {
})
assert.NoError(t, err)
lc, cleanup := test.NewLogChecker(log.DEFAULT, log.INFO)
lc.StopMark(`getExtendedCommitStats[repo="user2/repo2" revision="404ref"]: object does not exist [id: 404ref, rel_path: ]`)
defer cleanup()
generateContributorStats(nil, mockCache, "key", repo, "404ref")
err, isErr := mockCache.Get("key").(error)
assert.True(t, isErr)
assert.ErrorAs(t, err, &git.ErrNotExist{})
assert.False(t, mockCache.IsExist("key"))
_, stopped := lc.Check(100 * time.Millisecond)
assert.True(t, stopped)
generateContributorStats(nil, mockCache, "key2", repo, "master")
dataString, isData := mockCache.Get("key2").(string)