seaweedfs/weed/s3api/filer_multipart.go

402 lines
14 KiB
Go
Raw Normal View History

package s3api
import (
"cmp"
"encoding/hex"
2019-02-27 16:21:37 +08:00
"encoding/xml"
2018-09-10 07:25:43 +08:00
"fmt"
"github.com/seaweedfs/seaweedfs/weed/stats"
"golang.org/x/exp/slices"
"math"
2018-09-10 07:25:43 +08:00
"path/filepath"
"sort"
2018-09-10 07:26:11 +08:00
"strconv"
"strings"
"time"
2018-09-10 07:25:43 +08:00
"github.com/aws/aws-sdk-go/aws"
2018-09-10 07:26:11 +08:00
"github.com/aws/aws-sdk-go/service/s3"
"github.com/google/uuid"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
2020-02-26 14:23:59 +08:00
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
)
const multipartExt = ".part"
2018-09-12 15:46:12 +08:00
type InitiateMultipartUploadResult struct {
2019-02-27 16:21:37 +08:00
XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ InitiateMultipartUploadResult"`
2018-09-12 15:46:12 +08:00
s3.CreateMultipartUploadOutput
}
2020-09-20 05:09:58 +08:00
func (s3a *S3ApiServer) createMultipartUpload(input *s3.CreateMultipartUploadInput) (output *InitiateMultipartUploadResult, code s3err.ErrorCode) {
2020-09-22 01:51:24 +08:00
glog.V(2).Infof("createMultipartUpload input %v", input)
uploadIdString := s3a.generateUploadID(*input.Key)
uploadIdString = uploadIdString + "_" + strings.ReplaceAll(uuid.New().String(), "-", "")
2022-11-10 23:17:29 +08:00
2020-02-26 14:23:59 +08:00
if err := s3a.mkdir(s3a.genUploadsFolder(*input.Bucket), uploadIdString, func(entry *filer_pb.Entry) {
if entry.Extended == nil {
2018-09-08 04:12:52 +08:00
entry.Extended = make(map[string][]byte)
}
2018-09-08 04:12:52 +08:00
entry.Extended["key"] = []byte(*input.Key)
for k, v := range input.Metadata {
entry.Extended[k] = []byte(*v)
}
if input.ContentType != nil {
entry.Attributes.Mime = *input.ContentType
}
}); err != nil {
glog.Errorf("NewMultipartUpload error: %v", err)
2020-09-20 05:09:58 +08:00
return nil, s3err.ErrInternalError
}
2018-09-12 15:46:12 +08:00
output = &InitiateMultipartUploadResult{
2019-02-27 16:21:37 +08:00
CreateMultipartUploadOutput: s3.CreateMultipartUploadOutput{
2018-09-12 15:46:12 +08:00
Bucket: input.Bucket,
2019-07-09 03:37:20 +08:00
Key: objectKey(input.Key),
2018-09-12 15:46:12 +08:00
UploadId: aws.String(uploadIdString),
},
}
return
}
2018-09-12 15:46:12 +08:00
type CompleteMultipartUploadResult struct {
2019-02-27 16:21:37 +08:00
XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ CompleteMultipartUploadResult"`
2018-09-12 15:46:12 +08:00
s3.CompleteMultipartUploadOutput
}
func (s3a *S3ApiServer) completeMultipartUpload(input *s3.CompleteMultipartUploadInput, parts *CompleteMultipartUpload) (output *CompleteMultipartUploadResult, code s3err.ErrorCode) {
2018-09-10 07:25:43 +08:00
2020-09-22 01:51:24 +08:00
glog.V(2).Infof("completeMultipartUpload input %v", input)
completedPartNumbers := []int{}
completedPartMap := make(map[int][]string)
for _, part := range parts.Parts {
if _, ok := completedPartMap[part.PartNumber]; !ok {
completedPartNumbers = append(completedPartNumbers, part.PartNumber)
}
completedPartMap[part.PartNumber] = append(completedPartMap[part.PartNumber], part.ETag)
}
2018-09-10 07:25:43 +08:00
uploadDirectory := s3a.genUploadsFolder(*input.Bucket) + "/" + *input.UploadId
entries, _, err := s3a.list(uploadDirectory, "", "", false, maxPartsList)
if err != nil || len(entries) == 0 {
glog.Errorf("completeMultipartUpload %s %s error: %v, entries:%d", *input.Bucket, *input.UploadId, err, len(entries))
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()
2020-09-20 05:09:58 +08:00
return nil, s3err.ErrNoSuchUpload
2018-09-10 07:25:43 +08:00
}
2021-07-22 05:38:12 +08:00
pentry, err := s3a.getEntry(s3a.genUploadsFolder(*input.Bucket), *input.UploadId)
if err != nil {
glog.Errorf("completeMultipartUpload %s %s error: %v", *input.Bucket, *input.UploadId, err)
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedNoSuchUpload).Inc()
return nil, s3err.ErrNoSuchUpload
}
deleteEntries := []*filer_pb.Entry{}
partEntries := make(map[int][]*filer_pb.Entry, len(entries))
for _, entry := range entries {
foundEntry := false
glog.V(4).Infof("completeMultipartUpload part entries %s", entry.Name)
if entry.IsDirectory || !strings.HasSuffix(entry.Name, multipartExt) {
continue
}
partNumber, err := parsePartNumber(entry.Name)
if err != nil {
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedPartNumber).Inc()
glog.Errorf("completeMultipartUpload failed to pasre partNumber %s:%s", entry.Name, err)
continue
}
completedPartsByNumber, ok := completedPartMap[partNumber]
if !ok {
continue
}
for _, partETag := range completedPartsByNumber {
partETag = strings.Trim(partETag, `"`)
entryETag := hex.EncodeToString(entry.Attributes.GetMd5())
if partETag != "" && len(partETag) == 32 && entryETag != "" {
if entryETag != partETag {
glog.Errorf("completeMultipartUpload %s ETag mismatch chunk: %s part: %s", entry.Name, entryETag, partETag)
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedEtagMismatch).Inc()
continue
}
} else {
glog.Warningf("invalid complete etag %s, partEtag %s", partETag, entryETag)
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedEtagInvalid).Inc()
}
if len(entry.Chunks) == 0 {
glog.Warningf("completeMultipartUpload %s empty chunks", entry.Name)
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedPartEmpty).Inc()
continue
}
//there maybe multi same part, because of client retry
partEntries[partNumber] = append(partEntries[partNumber], entry)
foundEntry = true
}
if !foundEntry {
deleteEntries = append(deleteEntries, entry)
}
}
mime := pentry.Attributes.Mime
2018-09-10 07:25:43 +08:00
var finalParts []*filer_pb.FileChunk
var offset int64
sort.Ints(completedPartNumbers)
for _, partNumber := range completedPartNumbers {
partEntriesByNumber, ok := partEntries[partNumber]
if !ok {
glog.Errorf("part %d has no entry", partNumber)
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedPartNotFound).Inc()
return nil, s3err.ErrInvalidPart
}
found := false
if len(partEntriesByNumber) > 1 {
slices.SortFunc(partEntriesByNumber, func(a, b *filer_pb.Entry) int {
return cmp.Compare(b.Chunks[0].ModifiedTsNs, a.Chunks[0].ModifiedTsNs)
})
}
for _, entry := range partEntriesByNumber {
if found {
deleteEntries = append(deleteEntries, entry)
stats.S3HandlerCounter.WithLabelValues(stats.ErrorCompletedPartEntryMismatch).Inc()
continue
}
for _, chunk := range entry.GetChunks() {
2018-09-12 15:46:12 +08:00
p := &filer_pb.FileChunk{
FileId: chunk.GetFileIdString(),
Offset: offset,
Size: chunk.Size,
ModifiedTsNs: chunk.ModifiedTsNs,
CipherKey: chunk.CipherKey,
ETag: chunk.ETag,
2018-09-12 15:46:12 +08:00
}
finalParts = append(finalParts, p)
2018-09-10 07:25:43 +08:00
offset += int64(chunk.Size)
}
found = true
}
2018-09-10 07:25:43 +08:00
}
2018-09-10 07:25:43 +08:00
entryName := filepath.Base(*input.Key)
dirName := filepath.ToSlash(filepath.Dir(*input.Key))
2018-09-10 07:25:43 +08:00
if dirName == "." {
dirName = ""
}
2018-09-12 15:46:12 +08:00
if strings.HasPrefix(dirName, "/") {
dirName = dirName[1:]
}
2018-09-10 07:25:43 +08:00
dirName = fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, *input.Bucket, dirName)
// remove suffix '/'
if strings.HasSuffix(dirName, "/") {
dirName = dirName[:len(dirName)-1]
}
2021-07-22 05:38:12 +08:00
err = s3a.mkFile(dirName, entryName, finalParts, func(entry *filer_pb.Entry) {
if entry.Extended == nil {
entry.Extended = make(map[string][]byte)
}
2021-07-22 05:38:12 +08:00
for k, v := range pentry.Extended {
if k != "key" {
entry.Extended[k] = v
}
}
if pentry.Attributes.Mime != "" {
entry.Attributes.Mime = pentry.Attributes.Mime
2021-10-14 18:03:11 +08:00
} else if mime != "" {
entry.Attributes.Mime = mime
}
2022-09-15 16:27:02 +08:00
entry.Attributes.FileSize = uint64(offset)
})
2018-09-10 07:25:43 +08:00
if err != nil {
glog.Errorf("completeMultipartUpload %s/%s error: %v", dirName, entryName, err)
2020-09-20 05:09:58 +08:00
return nil, s3err.ErrInternalError
2018-09-10 07:25:43 +08:00
}
2018-09-12 15:46:12 +08:00
output = &CompleteMultipartUploadResult{
2019-02-27 16:21:37 +08:00
CompleteMultipartUploadOutput: s3.CompleteMultipartUploadOutput{
Location: aws.String(fmt.Sprintf("http://%s%s/%s", s3a.option.Filer.ToHttpAddress(), urlEscapeObject(dirName), urlPathEscape(entryName))),
2019-07-22 12:51:38 +08:00
Bucket: input.Bucket,
2020-09-01 15:21:19 +08:00
ETag: aws.String("\"" + filer.ETagChunks(finalParts) + "\""),
2019-07-22 12:51:38 +08:00
Key: objectKey(input.Key),
2018-09-12 15:46:12 +08:00
},
2018-09-10 07:25:43 +08:00
}
for _, deleteEntry := range deleteEntries {
//delete unused part data
glog.Infof("completeMultipartUpload cleanup %s upload %s unused %s", *input.Bucket, *input.UploadId, deleteEntry.Name)
if err = s3a.rm(uploadDirectory, deleteEntry.Name, true, true); err != nil {
glog.Warningf("completeMultipartUpload cleanup %s upload %s unused %s : %v", *input.Bucket, *input.UploadId, deleteEntry.Name, err)
}
}
if err = s3a.rm(s3a.genUploadsFolder(*input.Bucket), *input.UploadId, false, true); err != nil {
2018-09-12 16:00:51 +08:00
glog.V(1).Infof("completeMultipartUpload cleanup %s upload %s: %v", *input.Bucket, *input.UploadId, err)
}
return
}
func parsePartNumber(fileName string) (int, error) {
var partNumberString string
index := strings.Index(fileName, "_")
if index != -1 {
partNumberString = fileName[:index]
} else {
partNumberString = fileName[:len(fileName)-len(multipartExt)]
2022-03-29 22:53:12 +08:00
}
return strconv.Atoi(partNumberString)
}
2020-09-20 05:09:58 +08:00
func (s3a *S3ApiServer) abortMultipartUpload(input *s3.AbortMultipartUploadInput) (output *s3.AbortMultipartUploadOutput, code s3err.ErrorCode) {
2020-09-22 01:51:24 +08:00
glog.V(2).Infof("abortMultipartUpload input %v", input)
2020-02-26 14:23:59 +08:00
exists, err := s3a.exists(s3a.genUploadsFolder(*input.Bucket), *input.UploadId, true)
if err != nil {
2018-09-10 07:25:43 +08:00
glog.V(1).Infof("bucket %s abort upload %s: %v", *input.Bucket, *input.UploadId, err)
2020-09-20 05:09:58 +08:00
return nil, s3err.ErrNoSuchUpload
}
2018-09-10 07:25:43 +08:00
if exists {
err = s3a.rm(s3a.genUploadsFolder(*input.Bucket), *input.UploadId, true, true)
2018-09-10 07:25:43 +08:00
}
if err != nil {
glog.V(1).Infof("bucket %s remove upload %s: %v", *input.Bucket, *input.UploadId, err)
2020-09-20 05:09:58 +08:00
return nil, s3err.ErrInternalError
2018-09-10 07:25:43 +08:00
}
2020-09-20 05:09:58 +08:00
return &s3.AbortMultipartUploadOutput{}, s3err.ErrNone
2018-09-10 07:25:43 +08:00
}
2018-09-12 15:46:12 +08:00
type ListMultipartUploadsResult struct {
2019-02-27 16:21:37 +08:00
XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListMultipartUploadsResult"`
// copied from s3.ListMultipartUploadsOutput, the Uploads is not converting to <Upload></Upload>
Bucket *string `type:"string"`
Delimiter *string `type:"string"`
EncodingType *string `type:"string" enum:"EncodingType"`
IsTruncated *bool `type:"boolean"`
KeyMarker *string `type:"string"`
MaxUploads *int64 `type:"integer"`
NextKeyMarker *string `type:"string"`
NextUploadIdMarker *string `type:"string"`
Prefix *string `type:"string"`
UploadIdMarker *string `type:"string"`
Upload []*s3.MultipartUpload `locationName:"Upload" type:"list" flattened:"true"`
2018-09-12 15:46:12 +08:00
}
2020-09-20 05:09:58 +08:00
func (s3a *S3ApiServer) listMultipartUploads(input *s3.ListMultipartUploadsInput) (output *ListMultipartUploadsResult, code s3err.ErrorCode) {
// https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUploads.html
2018-09-10 07:25:43 +08:00
2020-09-22 01:51:24 +08:00
glog.V(2).Infof("listMultipartUploads input %v", input)
2018-09-12 15:46:12 +08:00
output = &ListMultipartUploadsResult{
Bucket: input.Bucket,
Delimiter: input.Delimiter,
EncodingType: input.EncodingType,
KeyMarker: input.KeyMarker,
MaxUploads: input.MaxUploads,
Prefix: input.Prefix,
IsTruncated: aws.Bool(false),
}
2018-09-10 07:25:43 +08:00
entries, _, err := s3a.list(s3a.genUploadsFolder(*input.Bucket), "", *input.UploadIdMarker, false, math.MaxInt32)
2018-09-10 07:25:43 +08:00
if err != nil {
glog.Errorf("listMultipartUploads %s error: %v", *input.Bucket, err)
return
}
uploadsCount := int64(0)
for _, entry := range entries {
if entry.Extended != nil {
2020-09-22 01:51:24 +08:00
key := string(entry.Extended["key"])
if *input.KeyMarker != "" && *input.KeyMarker != key {
continue
}
if *input.Prefix != "" && !strings.HasPrefix(key, *input.Prefix) {
continue
}
output.Upload = append(output.Upload, &s3.MultipartUpload{
2020-09-22 01:51:24 +08:00
Key: objectKey(aws.String(key)),
UploadId: aws.String(entry.Name),
})
uploadsCount += 1
}
if uploadsCount >= *input.MaxUploads {
output.IsTruncated = aws.Bool(true)
output.NextUploadIdMarker = aws.String(entry.Name)
break
}
}
2018-09-12 16:00:51 +08:00
return
}
2018-09-12 15:46:12 +08:00
type ListPartsResult struct {
2019-02-27 16:21:37 +08:00
XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListPartsResult"`
// copied from s3.ListPartsOutput, the Parts is not converting to <Part></Part>
Bucket *string `type:"string"`
IsTruncated *bool `type:"boolean"`
Key *string `min:"1" type:"string"`
MaxParts *int64 `type:"integer"`
NextPartNumberMarker *int64 `type:"integer"`
PartNumberMarker *int64 `type:"integer"`
Part []*s3.Part `locationName:"Part" type:"list" flattened:"true"`
StorageClass *string `type:"string" enum:"StorageClass"`
UploadId *string `type:"string"`
2018-09-12 15:46:12 +08:00
}
2020-09-20 05:09:58 +08:00
func (s3a *S3ApiServer) listObjectParts(input *s3.ListPartsInput) (output *ListPartsResult, code s3err.ErrorCode) {
2020-09-12 06:07:19 +08:00
// https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListParts.html
2020-09-22 01:51:24 +08:00
glog.V(2).Infof("listObjectParts input %v", input)
2018-09-12 15:46:12 +08:00
output = &ListPartsResult{
Bucket: input.Bucket,
Key: objectKey(input.Key),
UploadId: input.UploadId,
MaxParts: input.MaxParts, // the maximum number of parts to return.
PartNumberMarker: input.PartNumberMarker, // the part number starts after this, exclusive
StorageClass: aws.String("STANDARD"),
2018-09-10 07:25:43 +08:00
}
entries, isLast, err := s3a.list(s3a.genUploadsFolder(*input.Bucket)+"/"+*input.UploadId, "", fmt.Sprintf("%04d%s", *input.PartNumberMarker, multipartExt), false, uint32(*input.MaxParts))
2018-09-10 07:25:43 +08:00
if err != nil {
2018-09-12 04:01:51 +08:00
glog.Errorf("listObjectParts %s %s error: %v", *input.Bucket, *input.UploadId, err)
2020-09-20 05:09:58 +08:00
return nil, s3err.ErrNoSuchUpload
2018-09-10 07:25:43 +08:00
}
2022-02-19 14:14:40 +08:00
// Note: The upload directory is sort of a marker of the existence of an multipart upload request.
// So can not just delete empty upload folders.
output.IsTruncated = aws.Bool(!isLast)
2018-09-10 07:25:43 +08:00
for _, entry := range entries {
if strings.HasSuffix(entry.Name, multipartExt) && !entry.IsDirectory {
partNumber, err := parsePartNumber(entry.Name)
2018-09-10 07:25:43 +08:00
if err != nil {
2018-09-12 04:01:51 +08:00
glog.Errorf("listObjectParts %s %s parse %s: %v", *input.Bucket, *input.UploadId, entry.Name, err)
2018-09-10 07:25:43 +08:00
continue
}
output.Part = append(output.Part, &s3.Part{
2018-09-10 07:25:43 +08:00
PartNumber: aws.Int64(int64(partNumber)),
LastModified: aws.Time(time.Unix(entry.Attributes.Mtime, 0).UTC()),
2020-09-01 15:21:19 +08:00
Size: aws.Int64(int64(filer.FileSize(entry))),
ETag: aws.String("\"" + filer.ETag(entry) + "\""),
2018-09-10 07:25:43 +08:00
})
if !isLast {
output.NextPartNumberMarker = aws.Int64(int64(partNumber))
}
2018-09-10 07:25:43 +08:00
}
}
return
}