From a9ba7379fe97fd8a1157ed076b268a3b4f2454b2 Mon Sep 17 00:00:00 2001 From: zeripath Date: Sat, 28 Jan 2023 17:56:16 +0000 Subject: [PATCH] Improve checkIfPRContentChanged (#22611) (#22644) Backport #22611 The code for checking if a commit has caused a change in a PR is extremely inefficient and affects the head repository instead of using a temporary repository. This PR therefore makes several significant improvements: * A temporary repo like that used in merging. * The diff code is then significant improved to use a three-way diff instead of comparing diffs (possibly binary) line-by-line - in memory... Ref #22578 Signed-off-by: Andrew Thornton --- modules/util/io.go | 22 +++++++++++ services/pull/pull.go | 88 ++++++++++++++++++------------------------- 2 files changed, 59 insertions(+), 51 deletions(-) diff --git a/modules/util/io.go b/modules/util/io.go index d765e27733..f3a24736a8 100644 --- a/modules/util/io.go +++ b/modules/util/io.go @@ -5,6 +5,7 @@ package util import ( + "errors" "io" ) @@ -18,3 +19,24 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) { } return n, err } + +// ErrNotEmpty is an error reported when there is a non-empty reader +var ErrNotEmpty = errors.New("not-empty") + +// IsEmptyReader reads a reader and ensures it is empty +func IsEmptyReader(r io.Reader) (err error) { + var buf [1]byte + + for { + n, err := r.Read(buf[:]) + if err != nil { + if err == io.EOF { + return nil + } + return err + } + if n > 0 { + return ErrNotEmpty + } + } +} diff --git a/services/pull/pull.go b/services/pull/pull.go index 5f8bd6b671..fbfc31d4e7 100644 --- a/services/pull/pull.go +++ b/services/pull/pull.go @@ -5,14 +5,12 @@ package pull import ( - "bufio" - "bytes" "context" "fmt" "io" + "os" "regexp" "strings" - "time" "code.gitea.io/gitea/models" "code.gitea.io/gitea/models/db" @@ -30,6 +28,7 @@ import ( repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/sync" + "code.gitea.io/gitea/modules/util" issue_service "code.gitea.io/gitea/services/issue" ) @@ -352,69 +351,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string, // checkIfPRContentChanged checks if diff to target branch has changed by push // A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) { - if err = pr.LoadHeadRepoCtx(ctx); err != nil { - return false, fmt.Errorf("LoadHeadRepo: %w", err) - } else if pr.HeadRepo == nil { - // corrupt data assumed changed - return true, nil + tmpBasePath, err := createTemporaryRepo(ctx, pr) + if err != nil { + log.Error("CreateTemporaryRepo: %v", err) + return false, err } + defer func() { + if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil { + log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err) + } + }() - if err = pr.LoadBaseRepoCtx(ctx); err != nil { - return false, fmt.Errorf("LoadBaseRepo: %w", err) - } - - headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath()) + tmpRepo, err := git.OpenRepository(ctx, tmpBasePath) if err != nil { return false, fmt.Errorf("OpenRepository: %w", err) } - defer headGitRepo.Close() + defer tmpRepo.Close() - // Add a temporary remote. - tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano()) - if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil { - return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) - } - defer func() { - if err := headGitRepo.RemoveRemote(tmpRemote); err != nil { - log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err) - } - }() - // To synchronize repo and get a base ref - _, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch) + // Find the merge-base + _, base, err := tmpRepo.GetMergeBase("", "base", "tracking") if err != nil { return false, fmt.Errorf("GetMergeBase: %w", err) } - diffBefore := &bytes.Buffer{} - diffAfter := &bytes.Buffer{} - if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil { - // If old commit not found, assume changed. - log.Debug("GetDiffFromMergeBase: %v", err) - return true, nil - } - if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil { - // New commit should be found - return false, fmt.Errorf("GetDiffFromMergeBase: %w", err) + cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base) + stdoutReader, stdoutWriter, err := os.Pipe() + if err != nil { + return false, fmt.Errorf("unable to open pipe for to run diff: %w", err) } - diffBeforeLines := bufio.NewScanner(diffBefore) - diffAfterLines := bufio.NewScanner(diffAfter) - - for diffBeforeLines.Scan() && diffAfterLines.Scan() { - if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") { - // file hashes can change without the diff changing - continue - } else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") { - // the location of the difference may change - continue - } else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) { + if err := cmd.Run(&git.RunOpts{ + Dir: tmpBasePath, + Stdout: stdoutWriter, + PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error { + _ = stdoutWriter.Close() + defer func() { + _ = stdoutReader.Close() + }() + return util.IsEmptyReader(stdoutReader) + }, + }); err != nil { + if err == util.ErrNotEmpty { return true, nil } - } - if diffBeforeLines.Scan() || diffAfterLines.Scan() { - // Diffs not of equal length - return true, nil + log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v", + newCommitID, oldCommitID, base, + pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch, + err) + + return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err) } return false, nil