2019-12-14 06:21:06 +08:00
// Copyright 2019 The Gitea Authors.
// All rights reserved.
2022-11-28 02:20:29 +08:00
// SPDX-License-Identifier: MIT
2019-12-14 06:21:06 +08:00
package pull
import (
"bufio"
"context"
"fmt"
"io"
"os"
2021-12-19 12:19:25 +08:00
"path/filepath"
2019-12-14 06:21:06 +08:00
"strings"
"code.gitea.io/gitea/models"
2023-01-16 16:00:22 +08:00
git_model "code.gitea.io/gitea/models/git"
2022-06-13 17:37:59 +08:00
issues_model "code.gitea.io/gitea/models/issues"
2021-11-10 03:57:58 +08:00
"code.gitea.io/gitea/models/unit"
2022-10-12 13:18:26 +08:00
"code.gitea.io/gitea/modules/container"
2019-12-14 06:21:06 +08:00
"code.gitea.io/gitea/modules/git"
Simplify how git repositories are opened (#28937)
## Purpose
This is a refactor toward building an abstraction over managing git
repositories.
Afterwards, it does not matter anymore if they are stored on the local
disk or somewhere remote.
## What this PR changes
We used `git.OpenRepository` everywhere previously.
Now, we should split them into two distinct functions:
Firstly, there are temporary repositories which do not change:
```go
git.OpenRepository(ctx, diskPath)
```
Gitea managed repositories having a record in the database in the
`repository` table are moved into the new package `gitrepo`:
```go
gitrepo.OpenRepository(ctx, repo_model.Repo)
```
Why is `repo_model.Repository` the second parameter instead of file
path?
Because then we can easily adapt our repository storage strategy.
The repositories can be stored locally, however, they could just as well
be stored on a remote server.
## Further changes in other PRs
- A Git Command wrapper on package `gitrepo` could be created. i.e.
`NewCommand(ctx, repo_model.Repository, commands...)`. `git.RunOpts{Dir:
repo.RepoPath()}`, the directory should be empty before invoking this
method and it can be filled in the function only. #28940
- Remove the `RepoPath()`/`WikiPath()` functions to reduce the
possibility of mistakes.
---------
Co-authored-by: delvh <dev.lh@web.de>
2024-01-28 04:09:51 +08:00
"code.gitea.io/gitea/modules/gitrepo"
2021-12-19 12:19:25 +08:00
"code.gitea.io/gitea/modules/graceful"
2019-12-14 06:21:06 +08:00
"code.gitea.io/gitea/modules/log"
2021-12-19 12:19:25 +08:00
"code.gitea.io/gitea/modules/process"
2022-12-19 19:37:15 +08:00
"code.gitea.io/gitea/modules/setting"
2020-08-12 04:05:34 +08:00
"code.gitea.io/gitea/modules/util"
2020-10-14 02:50:57 +08:00
"github.com/gobwas/glob"
2019-12-14 06:21:06 +08:00
)
// DownloadDiffOrPatch will write the patch for the pr to the writer
2022-06-13 17:37:59 +08:00
func DownloadDiffOrPatch ( ctx context . Context , pr * issues_model . PullRequest , w io . Writer , patch , binary bool ) error {
2022-11-19 16:12:33 +08:00
if err := pr . LoadBaseRepo ( ctx ) ; err != nil {
2020-04-03 21:21:41 +08:00
log . Error ( "Unable to load base repository ID %d for pr #%d [%d]" , pr . BaseRepoID , pr . Index , pr . ID )
2019-12-14 06:21:06 +08:00
return err
}
Simplify how git repositories are opened (#28937)
## Purpose
This is a refactor toward building an abstraction over managing git
repositories.
Afterwards, it does not matter anymore if they are stored on the local
disk or somewhere remote.
## What this PR changes
We used `git.OpenRepository` everywhere previously.
Now, we should split them into two distinct functions:
Firstly, there are temporary repositories which do not change:
```go
git.OpenRepository(ctx, diskPath)
```
Gitea managed repositories having a record in the database in the
`repository` table are moved into the new package `gitrepo`:
```go
gitrepo.OpenRepository(ctx, repo_model.Repo)
```
Why is `repo_model.Repository` the second parameter instead of file
path?
Because then we can easily adapt our repository storage strategy.
The repositories can be stored locally, however, they could just as well
be stored on a remote server.
## Further changes in other PRs
- A Git Command wrapper on package `gitrepo` could be created. i.e.
`NewCommand(ctx, repo_model.Repository, commands...)`. `git.RunOpts{Dir:
repo.RepoPath()}`, the directory should be empty before invoking this
method and it can be filled in the function only. #28940
- Remove the `RepoPath()`/`WikiPath()` functions to reduce the
possibility of mistakes.
---------
Co-authored-by: delvh <dev.lh@web.de>
2024-01-28 04:09:51 +08:00
gitRepo , closer , err := gitrepo . RepositoryFromContextOrOpen ( ctx , pr . BaseRepo )
2019-12-14 06:21:06 +08:00
if err != nil {
2022-10-25 03:29:17 +08:00
return fmt . Errorf ( "OpenRepository: %w" , err )
2019-12-14 06:21:06 +08:00
}
2022-01-20 07:26:57 +08:00
defer closer . Close ( )
2021-09-28 05:09:49 +08:00
if err := gitRepo . GetDiffOrPatch ( pr . MergeBase , pr . GetGitRefName ( ) , w , patch , binary ) ; err != nil {
2020-01-12 17:36:21 +08:00
log . Error ( "Unable to get patch file from %s to %s in %s Error: %v" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2022-10-25 03:29:17 +08:00
return fmt . Errorf ( "Unable to get patch file from %s to %s in %s Error: %w" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2019-12-14 06:21:06 +08:00
}
return nil
}
var patchErrorSuffices = [ ] string {
": already exists in index" ,
": patch does not apply" ,
": already exists in working directory" ,
"unrecognized input" ,
2022-12-14 21:45:33 +08:00
": No such file or directory" ,
": does not exist in index" ,
2019-12-14 06:21:06 +08:00
}
// TestPatch will test whether a simple patch will apply
2022-06-13 17:37:59 +08:00
func TestPatch ( pr * issues_model . PullRequest ) error {
2023-02-04 07:11:48 +08:00
ctx , _ , finished := process . GetManager ( ) . AddContext ( graceful . GetManager ( ) . HammerContext ( ) , fmt . Sprintf ( "TestPatch: %s" , pr ) )
2022-01-20 07:26:57 +08:00
defer finished ( )
2023-03-08 04:07:35 +08:00
prCtx , cancel , err := createTemporaryRepoForPR ( ctx , pr )
2019-12-14 06:21:06 +08:00
if err != nil {
2023-08-10 10:39:21 +08:00
if ! git_model . IsErrBranchNotExist ( err ) {
log . Error ( "CreateTemporaryRepoForPR %-v: %v" , pr , err )
}
2019-12-14 06:21:06 +08:00
return err
}
2023-03-08 04:07:35 +08:00
defer cancel ( )
2019-12-14 06:21:06 +08:00
2023-08-10 10:39:21 +08:00
return testPatch ( ctx , prCtx , pr )
}
func testPatch ( ctx context . Context , prCtx * prContext , pr * issues_model . PullRequest ) error {
2023-03-08 04:07:35 +08:00
gitRepo , err := git . OpenRepository ( ctx , prCtx . tmpBasePath )
2019-12-14 06:21:06 +08:00
if err != nil {
2022-10-25 03:29:17 +08:00
return fmt . Errorf ( "OpenRepository: %w" , err )
2019-12-14 06:21:06 +08:00
}
defer gitRepo . Close ( )
2020-10-14 02:50:57 +08:00
// 1. update merge base
2023-03-08 04:07:35 +08:00
pr . MergeBase , _ , err = git . NewCommand ( ctx , "merge-base" , "--" , "base" , "tracking" ) . RunStdString ( & git . RunOpts { Dir : prCtx . tmpBasePath } )
2019-12-14 06:21:06 +08:00
if err != nil {
var err2 error
pr . MergeBase , err2 = gitRepo . GetRefCommitID ( git . BranchPrefix + "base" )
if err2 != nil {
2022-10-25 03:29:17 +08:00
return fmt . Errorf ( "GetMergeBase: %v and can't find commit ID for base: %w" , err , err2 )
2019-12-14 06:21:06 +08:00
}
}
pr . MergeBase = strings . TrimSpace ( pr . MergeBase )
2022-07-13 16:22:51 +08:00
if pr . HeadCommitID , err = gitRepo . GetRefCommitID ( git . BranchPrefix + "tracking" ) ; err != nil {
return fmt . Errorf ( "GetBranchCommitID: can't find commit ID for head: %w" , err )
}
if pr . HeadCommitID == pr . MergeBase {
pr . Status = issues_model . PullRequestStatusAncestor
return nil
}
2020-10-14 02:50:57 +08:00
// 2. Check for conflicts
2023-03-08 04:07:35 +08:00
if conflicts , err := checkConflicts ( ctx , pr , gitRepo , prCtx . tmpBasePath ) ; err != nil || conflicts || pr . Status == issues_model . PullRequestStatusEmpty {
2020-10-14 02:50:57 +08:00
return err
}
// 3. Check for protected files changes
2023-01-16 16:00:22 +08:00
if err = checkPullFilesProtection ( ctx , pr , gitRepo ) ; err != nil {
return fmt . Errorf ( "pr.CheckPullFilesProtection(): %v" , err )
2020-10-14 02:50:57 +08:00
}
if len ( pr . ChangedProtectedFiles ) > 0 {
log . Trace ( "Found %d protected files changed" , len ( pr . ChangedProtectedFiles ) )
}
2022-06-13 17:37:59 +08:00
pr . Status = issues_model . PullRequestStatusMergeable
2020-10-14 02:50:57 +08:00
return nil
}
2021-12-19 12:19:25 +08:00
type errMergeConflict struct {
filename string
}
func ( e * errMergeConflict ) Error ( ) string {
return fmt . Sprintf ( "conflict detected at: %s" , e . filename )
}
2024-07-05 17:24:01 +08:00
func attemptMerge ( ctx context . Context , file * unmergedFile , tmpBasePath string , filesToRemove * [ ] string , filesToAdd * [ ] git . IndexObjectInfo ) error {
2022-07-29 07:19:55 +08:00
log . Trace ( "Attempt to merge:\n%v" , file )
2023-12-14 05:02:00 +08:00
2021-12-19 12:19:25 +08:00
switch {
case file . stage1 != nil && ( file . stage2 == nil || file . stage3 == nil ) :
// 1. Deleted in one or both:
//
// Conflict <==> the stage1 !SameAs to the undeleted one
if ( file . stage2 != nil && ! file . stage1 . SameAs ( file . stage2 ) ) || ( file . stage3 != nil && ! file . stage1 . SameAs ( file . stage3 ) ) {
// Conflict!
return & errMergeConflict { file . stage1 . path }
}
// Not a genuine conflict and we can simply remove the file from the index
2024-07-05 17:24:01 +08:00
* filesToRemove = append ( * filesToRemove , file . stage1 . path )
return nil
2021-12-19 12:19:25 +08:00
case file . stage1 == nil && file . stage2 != nil && ( file . stage3 == nil || file . stage2 . SameAs ( file . stage3 ) ) :
// 2. Added in ours but not in theirs or identical in both
//
// Not a genuine conflict just add to the index
2024-07-05 17:24:01 +08:00
* filesToAdd = append ( * filesToAdd , git . IndexObjectInfo { Mode : file . stage2 . mode , Object : git . MustIDFromString ( file . stage2 . sha ) , Filename : file . stage2 . path } )
2021-12-19 12:19:25 +08:00
return nil
case file . stage1 == nil && file . stage2 != nil && file . stage3 != nil && file . stage2 . sha == file . stage3 . sha && file . stage2 . mode != file . stage3 . mode :
// 3. Added in both with the same sha but the modes are different
//
// Conflict! (Not sure that this can actually happen but we should handle)
return & errMergeConflict { file . stage2 . path }
case file . stage1 == nil && file . stage2 == nil && file . stage3 != nil :
// 4. Added in theirs but not ours:
//
// Not a genuine conflict just add to the index
2024-07-05 17:24:01 +08:00
* filesToAdd = append ( * filesToAdd , git . IndexObjectInfo { Mode : file . stage3 . mode , Object : git . MustIDFromString ( file . stage3 . sha ) , Filename : file . stage3 . path } )
return nil
2021-12-19 12:19:25 +08:00
case file . stage1 == nil :
// 5. Created by new in both
//
// Conflict!
return & errMergeConflict { file . stage2 . path }
case file . stage2 != nil && file . stage3 != nil :
// 5. Modified in both - we should try to merge in the changes but first:
//
if file . stage2 . mode == "120000" || file . stage3 . mode == "120000" {
// 5a. Conflicting symbolic link change
return & errMergeConflict { file . stage2 . path }
}
if file . stage2 . mode == "160000" || file . stage3 . mode == "160000" {
// 5b. Conflicting submodule change
return & errMergeConflict { file . stage2 . path }
}
if file . stage2 . mode != file . stage3 . mode {
// 5c. Conflicting mode change
return & errMergeConflict { file . stage2 . path }
}
// Need to get the objects from the object db to attempt to merge
2022-10-23 22:44:45 +08:00
root , _ , err := git . NewCommand ( ctx , "unpack-file" ) . AddDynamicArguments ( file . stage1 . sha ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 12:19:25 +08:00
if err != nil {
return fmt . Errorf ( "unable to get root object: %s at path: %s for merging. Error: %w" , file . stage1 . sha , file . stage1 . path , err )
}
root = strings . TrimSpace ( root )
defer func ( ) {
_ = util . Remove ( filepath . Join ( tmpBasePath , root ) )
} ( )
2022-10-23 22:44:45 +08:00
base , _ , err := git . NewCommand ( ctx , "unpack-file" ) . AddDynamicArguments ( file . stage2 . sha ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 12:19:25 +08:00
if err != nil {
return fmt . Errorf ( "unable to get base object: %s at path: %s for merging. Error: %w" , file . stage2 . sha , file . stage2 . path , err )
}
base = strings . TrimSpace ( filepath . Join ( tmpBasePath , base ) )
defer func ( ) {
_ = util . Remove ( base )
} ( )
2022-10-23 22:44:45 +08:00
head , _ , err := git . NewCommand ( ctx , "unpack-file" ) . AddDynamicArguments ( file . stage3 . sha ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 12:19:25 +08:00
if err != nil {
return fmt . Errorf ( "unable to get head object:%s at path: %s for merging. Error: %w" , file . stage3 . sha , file . stage3 . path , err )
}
head = strings . TrimSpace ( head )
defer func ( ) {
_ = util . Remove ( filepath . Join ( tmpBasePath , head ) )
} ( )
// now git merge-file annoyingly takes a different order to the merge-tree ...
2022-10-23 22:44:45 +08:00
_ , _ , conflictErr := git . NewCommand ( ctx , "merge-file" ) . AddDynamicArguments ( base , root , head ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 12:19:25 +08:00
if conflictErr != nil {
return & errMergeConflict { file . stage2 . path }
}
// base now contains the merged data
2022-10-23 22:44:45 +08:00
hash , _ , err := git . NewCommand ( ctx , "hash-object" , "-w" , "--path" ) . AddDynamicArguments ( file . stage2 . path , base ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 12:19:25 +08:00
if err != nil {
return err
}
hash = strings . TrimSpace ( hash )
2024-07-05 17:24:01 +08:00
* filesToAdd = append ( * filesToAdd , git . IndexObjectInfo { Mode : file . stage2 . mode , Object : git . MustIDFromString ( hash ) , Filename : file . stage2 . path } )
return nil
2021-12-19 12:19:25 +08:00
default :
if file . stage1 != nil {
return & errMergeConflict { file . stage1 . path }
} else if file . stage2 != nil {
return & errMergeConflict { file . stage2 . path }
} else if file . stage3 != nil {
return & errMergeConflict { file . stage3 . path }
}
}
return nil
}
2022-02-10 04:28:55 +08:00
// AttemptThreeWayMerge will attempt to three way merge using git read-tree and then follow the git merge-one-file algorithm to attempt to resolve basic conflicts
func AttemptThreeWayMerge ( ctx context . Context , gitPath string , gitRepo * git . Repository , base , ours , theirs , description string ) ( bool , [ ] string , error ) {
ctx , cancel := context . WithCancel ( ctx )
defer cancel ( )
2021-12-19 12:19:25 +08:00
// First we use read-tree to do a simple three-way merge
2022-10-23 22:44:45 +08:00
if _ , _ , err := git . NewCommand ( ctx , "read-tree" , "-m" ) . AddDynamicArguments ( base , ours , theirs ) . RunStdString ( & git . RunOpts { Dir : gitPath } ) ; err != nil {
2021-12-19 12:19:25 +08:00
log . Error ( "Unable to run read-tree -m! Error: %v" , err )
2022-10-25 03:29:17 +08:00
return false , nil , fmt . Errorf ( "unable to run read-tree -m! Error: %w" , err )
2021-12-19 12:19:25 +08:00
}
2024-07-05 17:24:01 +08:00
var filesToRemove [ ] string
var filesToAdd [ ] git . IndexObjectInfo
2021-12-19 12:19:25 +08:00
// Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles
unmerged := make ( chan * unmergedFile )
2022-02-10 04:28:55 +08:00
go unmergedFiles ( ctx , gitPath , unmerged )
2021-12-19 12:19:25 +08:00
defer func ( ) {
cancel ( )
for range unmerged {
// empty the unmerged channel
}
} ( )
numberOfConflicts := 0
conflict := false
2022-02-10 04:28:55 +08:00
conflictedFiles := make ( [ ] string , 0 , 5 )
2021-12-19 12:19:25 +08:00
for file := range unmerged {
if file == nil {
break
}
if file . err != nil {
cancel ( )
2022-02-10 04:28:55 +08:00
return false , nil , file . err
2021-12-19 12:19:25 +08:00
}
// OK now we have the unmerged file triplet attempt to merge it
2024-07-05 17:24:01 +08:00
if err := attemptMerge ( ctx , file , gitPath , & filesToRemove , & filesToAdd ) ; err != nil {
2021-12-19 12:19:25 +08:00
if conflictErr , ok := err . ( * errMergeConflict ) ; ok {
2022-02-10 04:28:55 +08:00
log . Trace ( "Conflict: %s in %s" , conflictErr . filename , description )
2021-12-19 12:19:25 +08:00
conflict = true
if numberOfConflicts < 10 {
2022-02-10 04:28:55 +08:00
conflictedFiles = append ( conflictedFiles , conflictErr . filename )
2021-12-19 12:19:25 +08:00
}
numberOfConflicts ++
continue
}
2022-02-10 04:28:55 +08:00
return false , nil , err
2021-12-19 12:19:25 +08:00
}
}
2024-07-05 17:24:01 +08:00
// Add and remove files in one command, as this is slow with many files otherwise
if err := gitRepo . RemoveFilesFromIndex ( filesToRemove ... ) ; err != nil {
return false , nil , err
}
if err := gitRepo . AddObjectsToIndex ( filesToAdd ... ) ; err != nil {
return false , nil , err
}
2022-02-10 04:28:55 +08:00
return conflict , conflictedFiles , nil
}
2022-06-13 17:37:59 +08:00
func checkConflicts ( ctx context . Context , pr * issues_model . PullRequest , gitRepo * git . Repository , tmpBasePath string ) ( bool , error ) {
2022-03-30 00:42:34 +08:00
// 1. checkConflicts resets the conflict status - therefore - reset the conflict status
pr . ConflictedFiles = nil
// 2. AttemptThreeWayMerge first - this is much quicker than plain patch to base
2022-02-10 04:28:55 +08:00
description := fmt . Sprintf ( "PR[%d] %s/%s#%d" , pr . ID , pr . BaseRepo . OwnerName , pr . BaseRepo . Name , pr . Index )
2022-12-19 19:37:15 +08:00
conflict , conflictFiles , err := AttemptThreeWayMerge ( ctx ,
2022-02-10 04:28:55 +08:00
tmpBasePath , gitRepo , pr . MergeBase , "base" , "tracking" , description )
if err != nil {
return false , err
}
2021-12-19 12:19:25 +08:00
if ! conflict {
2022-12-19 19:37:15 +08:00
// No conflicts detected so we need to check if the patch is empty...
// a. Write the newly merged tree and check the new tree-hash
2022-04-01 10:55:30 +08:00
var treeHash string
treeHash , _ , err = git . NewCommand ( ctx , "write-tree" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2021-12-19 12:19:25 +08:00
if err != nil {
2022-07-29 07:19:55 +08:00
lsfiles , _ , _ := git . NewCommand ( ctx , "ls-files" , "-u" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
return false , fmt . Errorf ( "unable to write unconflicted tree: %w\n`git ls-files -u`:\n%s" , err , lsfiles )
2021-12-19 12:19:25 +08:00
}
treeHash = strings . TrimSpace ( treeHash )
baseTree , err := gitRepo . GetTree ( "base" )
if err != nil {
return false , err
}
2022-12-19 19:37:15 +08:00
// b. compare the new tree-hash with the base tree hash
2021-12-19 12:19:25 +08:00
if treeHash == baseTree . ID . String ( ) {
log . Debug ( "PullRequest[%d]: Patch is empty - ignoring" , pr . ID )
2022-06-13 17:37:59 +08:00
pr . Status = issues_model . PullRequestStatusEmpty
2021-12-19 12:19:25 +08:00
}
return false , nil
}
2022-12-19 19:37:15 +08:00
// 3. OK the three-way merge method has detected conflicts
// 3a. Are still testing with GitApply? If not set the conflict status and move on
if ! setting . Repository . PullRequest . TestConflictingPatchesWithGitApply {
pr . Status = issues_model . PullRequestStatusConflict
pr . ConflictedFiles = conflictFiles
log . Trace ( "Found %d files conflicted: %v" , len ( pr . ConflictedFiles ) , pr . ConflictedFiles )
return true , nil
}
2021-12-19 12:19:25 +08:00
2022-12-19 19:37:15 +08:00
// 3b. Create a plain patch from head to base
2021-09-22 13:38:34 +08:00
tmpPatchFile , err := os . CreateTemp ( "" , "patch" )
2019-12-14 06:21:06 +08:00
if err != nil {
log . Error ( "Unable to create temporary patch file! Error: %v" , err )
2022-10-25 03:29:17 +08:00
return false , fmt . Errorf ( "unable to create temporary patch file! Error: %w" , err )
2019-12-14 06:21:06 +08:00
}
defer func ( ) {
2020-08-12 04:05:34 +08:00
_ = util . Remove ( tmpPatchFile . Name ( ) )
2019-12-14 06:21:06 +08:00
} ( )
2021-09-28 05:09:49 +08:00
if err := gitRepo . GetDiffBinary ( pr . MergeBase , "tracking" , tmpPatchFile ) ; err != nil {
2019-12-14 06:21:06 +08:00
tmpPatchFile . Close ( )
2020-01-12 17:36:21 +08:00
log . Error ( "Unable to get patch file from %s to %s in %s Error: %v" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2022-10-25 03:29:17 +08:00
return false , fmt . Errorf ( "unable to get patch file from %s to %s in %s Error: %w" , pr . MergeBase , pr . HeadBranch , pr . BaseRepo . FullName ( ) , err )
2019-12-14 06:21:06 +08:00
}
stat , err := tmpPatchFile . Stat ( )
if err != nil {
tmpPatchFile . Close ( )
2022-10-25 03:29:17 +08:00
return false , fmt . Errorf ( "unable to stat patch file: %w" , err )
2019-12-14 06:21:06 +08:00
}
patchPath := tmpPatchFile . Name ( )
tmpPatchFile . Close ( )
2022-12-19 19:37:15 +08:00
// 3c. if the size of that patch is 0 - there can be no conflicts!
2019-12-14 06:21:06 +08:00
if stat . Size ( ) == 0 {
log . Debug ( "PullRequest[%d]: Patch is empty - ignoring" , pr . ID )
2022-06-13 17:37:59 +08:00
pr . Status = issues_model . PullRequestStatusEmpty
2020-10-14 02:50:57 +08:00
return false , nil
2019-12-14 06:21:06 +08:00
}
log . Trace ( "PullRequest[%d].testPatch (patchPath): %s" , pr . ID , patchPath )
2022-03-30 00:42:34 +08:00
// 4. Read the base branch in to the index of the temporary repository
2022-04-01 10:55:30 +08:00
_ , _ , err = git . NewCommand ( gitRepo . Ctx , "read-tree" , "base" ) . RunStdString ( & git . RunOpts { Dir : tmpBasePath } )
2019-12-14 06:21:06 +08:00
if err != nil {
2022-10-25 03:29:17 +08:00
return false , fmt . Errorf ( "git read-tree %s: %w" , pr . BaseBranch , err )
2019-12-14 06:21:06 +08:00
}
2022-03-30 00:42:34 +08:00
// 5. Now get the pull request configuration to check if we need to ignore whitespace
2022-12-10 10:46:31 +08:00
prUnit , err := pr . BaseRepo . GetUnit ( ctx , unit . TypePullRequests )
2019-12-14 06:21:06 +08:00
if err != nil {
2020-10-14 02:50:57 +08:00
return false , err
2019-12-14 06:21:06 +08:00
}
prConfig := prUnit . PullRequestsConfig ( )
2022-03-30 00:42:34 +08:00
// 6. Prepare the arguments to apply the patch against the index
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
cmdApply := git . NewCommand ( gitRepo . Ctx , "apply" , "--check" , "--cached" )
2019-12-14 06:21:06 +08:00
if prConfig . IgnoreWhitespaceConflicts {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
cmdApply . AddArguments ( "--ignore-whitespace" )
2019-12-14 06:21:06 +08:00
}
2022-02-02 12:46:10 +08:00
is3way := false
2024-05-07 10:07:33 +08:00
if git . DefaultFeatures ( ) . CheckVersionAtLeast ( "2.32.0" ) {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
cmdApply . AddArguments ( "--3way" )
2022-02-02 12:46:10 +08:00
is3way = true
2021-12-19 12:19:25 +08:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
cmdApply . AddDynamicArguments ( patchPath )
2019-12-14 06:21:06 +08:00
2022-03-30 00:42:34 +08:00
// 7. Prep the pipe:
2020-10-14 02:50:57 +08:00
// - Here we could do the equivalent of:
// `git apply --check --cached patch_file > conflicts`
// Then iterate through the conflicts. However, that means storing all the conflicts
// in memory - which is very wasteful.
// - alternatively we can do the equivalent of:
// `git apply --check ... | grep ...`
// meaning we don't store all of the conflicts unnecessarily.
2019-12-14 06:21:06 +08:00
stderrReader , stderrWriter , err := os . Pipe ( )
if err != nil {
log . Error ( "Unable to open stderr pipe: %v" , err )
2022-10-25 03:29:17 +08:00
return false , fmt . Errorf ( "unable to open stderr pipe: %w" , err )
2019-12-14 06:21:06 +08:00
}
defer func ( ) {
_ = stderrReader . Close ( )
_ = stderrWriter . Close ( )
} ( )
2020-10-14 02:50:57 +08:00
2022-03-30 00:42:34 +08:00
// 8. Run the check command
2021-12-19 12:19:25 +08:00
conflict = false
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
err = cmdApply . Run ( & git . RunOpts {
Dir : tmpBasePath ,
Stderr : stderrWriter ,
PipelineFunc : func ( ctx context . Context , cancel context . CancelFunc ) error {
// Close the writer end of the pipe to begin processing
_ = stderrWriter . Close ( )
defer func ( ) {
// Close the reader on return to terminate the git command if necessary
_ = stderrReader . Close ( )
} ( )
const prefix = "error: patch failed:"
const errorPrefix = "error: "
const threewayFailed = "Failed to perform three-way merge..."
const appliedPatchPrefix = "Applied patch to '"
const withConflicts = "' with conflicts."
conflicts := make ( container . Set [ string ] )
// Now scan the output from the command
scanner := bufio . NewScanner ( stderrReader )
for scanner . Scan ( ) {
line := scanner . Text ( )
log . Trace ( "PullRequest[%d].testPatch: stderr: %s" , pr . ID , line )
if strings . HasPrefix ( line , prefix ) {
conflict = true
filepath := strings . TrimSpace ( strings . Split ( line [ len ( prefix ) : ] , ":" ) [ 0 ] )
conflicts . Add ( filepath )
} else if is3way && line == threewayFailed {
conflict = true
} else if strings . HasPrefix ( line , errorPrefix ) {
conflict = true
for _ , suffix := range patchErrorSuffices {
if strings . HasSuffix ( line , suffix ) {
filepath := strings . TrimSpace ( strings . TrimSuffix ( line [ len ( errorPrefix ) : ] , suffix ) )
if filepath != "" {
conflicts . Add ( filepath )
2019-12-14 06:21:06 +08:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
break
2022-02-02 12:46:10 +08:00
}
2019-12-14 06:21:06 +08:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
} else if is3way && strings . HasPrefix ( line , appliedPatchPrefix ) && strings . HasSuffix ( line , withConflicts ) {
conflict = true
filepath := strings . TrimPrefix ( strings . TrimSuffix ( line , withConflicts ) , appliedPatchPrefix )
if filepath != "" {
conflicts . Add ( filepath )
2019-12-14 06:21:06 +08:00
}
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
// only list 10 conflicted files
if len ( conflicts ) >= 10 {
break
}
}
2020-10-14 02:50:57 +08:00
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
if len ( conflicts ) > 0 {
pr . ConflictedFiles = make ( [ ] string , 0 , len ( conflicts ) )
for key := range conflicts {
pr . ConflictedFiles = append ( pr . ConflictedFiles , key )
2019-12-14 06:21:06 +08:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
}
2020-10-14 02:50:57 +08:00
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
return nil
} ,
} )
2019-12-14 06:21:06 +08:00
2022-04-22 05:55:45 +08:00
// 9. Check if the found conflictedfiles is non-zero, "err" could be non-nil, so we should ignore it if we found conflicts.
// Note: `"err" could be non-nil` is due that if enable 3-way merge, it doesn't return any error on found conflicts.
if len ( pr . ConflictedFiles ) > 0 {
2019-12-14 06:21:06 +08:00
if conflict {
2022-06-13 17:37:59 +08:00
pr . Status = issues_model . PullRequestStatusConflict
2019-12-14 06:21:06 +08:00
log . Trace ( "Found %d files conflicted: %v" , len ( pr . ConflictedFiles ) , pr . ConflictedFiles )
2020-10-14 02:50:57 +08:00
return true , nil
2019-12-14 06:21:06 +08:00
}
2022-04-22 05:55:45 +08:00
} else if err != nil {
2022-10-25 03:29:17 +08:00
return false , fmt . Errorf ( "git apply --check: %w" , err )
2019-12-14 06:21:06 +08:00
}
2020-10-14 02:50:57 +08:00
return false , nil
}
// CheckFileProtection check file Protection
2022-01-20 07:26:57 +08:00
func CheckFileProtection ( repo * git . Repository , oldCommitID , newCommitID string , patterns [ ] glob . Glob , limit int , env [ ] string ) ( [ ] string , error ) {
2020-10-14 02:50:57 +08:00
if len ( patterns ) == 0 {
return nil , nil
}
2022-01-20 07:26:57 +08:00
affectedFiles , err := git . GetAffectedFiles ( repo , oldCommitID , newCommitID , env )
2020-10-14 02:50:57 +08:00
if err != nil {
return nil , err
}
changedProtectedFiles := make ( [ ] string , 0 , limit )
2021-09-11 22:21:17 +08:00
for _ , affectedFile := range affectedFiles {
lpath := strings . ToLower ( affectedFile )
for _ , pat := range patterns {
if pat . Match ( lpath ) {
changedProtectedFiles = append ( changedProtectedFiles , lpath )
break
}
}
if len ( changedProtectedFiles ) >= limit {
break
}
}
if len ( changedProtectedFiles ) > 0 {
err = models . ErrFilePathProtected {
Path : changedProtectedFiles [ 0 ] ,
}
2020-10-14 02:50:57 +08:00
}
return changedProtectedFiles , err
}
2021-09-11 22:21:17 +08:00
// CheckUnprotectedFiles check if the commit only touches unprotected files
2022-01-20 07:26:57 +08:00
func CheckUnprotectedFiles ( repo * git . Repository , oldCommitID , newCommitID string , patterns [ ] glob . Glob , env [ ] string ) ( bool , error ) {
2021-09-11 22:21:17 +08:00
if len ( patterns ) == 0 {
return false , nil
}
2022-01-20 07:26:57 +08:00
affectedFiles , err := git . GetAffectedFiles ( repo , oldCommitID , newCommitID , env )
2021-09-11 22:21:17 +08:00
if err != nil {
return false , err
}
for _ , affectedFile := range affectedFiles {
lpath := strings . ToLower ( affectedFile )
unprotected := false
for _ , pat := range patterns {
if pat . Match ( lpath ) {
unprotected = true
break
}
}
if ! unprotected {
return false , nil
}
}
return true , nil
}
2020-10-14 02:50:57 +08:00
// checkPullFilesProtection check if pr changed protected files and save results
2023-01-16 16:00:22 +08:00
func checkPullFilesProtection ( ctx context . Context , pr * issues_model . PullRequest , gitRepo * git . Repository ) error {
2022-06-13 17:37:59 +08:00
if pr . Status == issues_model . PullRequestStatusEmpty {
2022-03-30 00:42:34 +08:00
pr . ChangedProtectedFiles = nil
return nil
}
2023-01-16 16:00:22 +08:00
pb , err := git_model . GetFirstMatchProtectedBranchRule ( ctx , pr . BaseRepoID , pr . BaseBranch )
if err != nil {
2020-10-14 02:50:57 +08:00
return err
}
2023-01-16 16:00:22 +08:00
if pb == nil {
2020-10-14 02:50:57 +08:00
pr . ChangedProtectedFiles = nil
return nil
}
2023-01-16 16:00:22 +08:00
pr . ChangedProtectedFiles , err = CheckFileProtection ( gitRepo , pr . MergeBase , "tracking" , pb . GetProtectedFilePatterns ( ) , 10 , os . Environ ( ) )
2020-10-14 02:50:57 +08:00
if err != nil && ! models . IsErrFilePathProtected ( err ) {
return err
}
2019-12-14 06:21:06 +08:00
return nil
}