seaweedfs/weed/operation/upload_content.go

374 lines
11 KiB
Go
Raw Normal View History

2012-09-21 08:58:29 +08:00
package operation
import (
2012-09-26 18:27:10 +08:00
"bytes"
2022-08-21 09:50:57 +08:00
"context"
2012-09-26 18:27:10 +08:00
"encoding/json"
"fmt"
2012-09-26 18:27:10 +08:00
"io"
"mime"
2012-09-26 18:27:10 +08:00
"mime/multipart"
"net"
2012-09-26 18:27:10 +08:00
"net/http"
"net/textproto"
"path/filepath"
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/security"
"github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/util"
2012-09-21 08:58:29 +08:00
)
2021-09-07 07:20:49 +08:00
type UploadOption struct {
UploadUrl string
Filename string
Cipher bool
IsInputCompressed bool
MimeType string
PairMap map[string]string
Jwt security.EncodedJwt
2022-08-21 09:50:57 +08:00
RetryForever bool
Md5 string
2021-09-07 07:20:49 +08:00
}
2012-09-21 08:58:29 +08:00
type UploadResult struct {
Name string `json:"name,omitempty"`
Size uint32 `json:"size,omitempty"`
Error string `json:"error,omitempty"`
ETag string `json:"eTag,omitempty"`
CipherKey []byte `json:"cipherKey,omitempty"`
Mime string `json:"mime,omitempty"`
Gzip uint32 `json:"gzip,omitempty"`
ContentMd5 string `json:"contentMd5,omitempty"`
2021-03-08 03:26:15 +08:00
RetryCount int `json:"-"`
2012-09-21 08:58:29 +08:00
}
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2023-01-03 15:20:45 +08:00
func (uploadResult *UploadResult) ToPbFileChunk(fileId string, offset int64, tsNs int64) *filer_pb.FileChunk {
2020-08-16 10:55:28 +08:00
fid, _ := filer_pb.ToFileIdObject(fileId)
return &filer_pb.FileChunk{
FileId: fileId,
Offset: offset,
Size: uint64(uploadResult.Size),
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2023-01-03 15:20:45 +08:00
ModifiedTsNs: tsNs,
2021-04-29 01:28:05 +08:00
ETag: uploadResult.ContentMd5,
CipherKey: uploadResult.CipherKey,
IsCompressed: uploadResult.Gzip > 0,
2020-08-16 10:55:28 +08:00
Fid: fid,
}
}
// HTTPClient interface for testing
type HTTPClient interface {
Do(req *http.Request) (*http.Response, error)
}
2014-03-13 05:07:01 +08:00
var (
HttpClient HTTPClient
2014-03-13 05:07:01 +08:00
)
func init() {
HttpClient = &http.Client{Transport: &http.Transport{
DialContext: (&net.Dialer{
Timeout: 10 * time.Second,
KeepAlive: 10 * time.Second,
}).DialContext,
MaxIdleConns: 1024,
MaxIdleConnsPerHost: 1024,
}}
2014-03-13 05:07:01 +08:00
}
2022-08-21 09:50:57 +08:00
// UploadWithRetry will retry both assigning volume request and uploading content
// The option parameter does not need to specify UploadUrl and Jwt, which will come from assigning volume.
func UploadWithRetry(filerClient filer_pb.FilerClient, assignRequest *filer_pb.AssignVolumeRequest, uploadOption *UploadOption, genFileUrlFn func(host, fileId string) string, reader io.Reader) (fileId string, uploadResult *UploadResult, err error, data []byte) {
2022-08-21 09:50:57 +08:00
doUploadFunc := func() error {
var host string
2022-08-21 09:50:57 +08:00
var auth security.EncodedJwt
// grpc assign volume
if grpcAssignErr := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
resp, assignErr := client.AssignVolume(context.Background(), assignRequest)
if assignErr != nil {
glog.V(0).Infof("assign volume failure %v: %v", assignRequest, assignErr)
return assignErr
}
if resp.Error != "" {
return fmt.Errorf("assign volume failure %v: %v", assignRequest, resp.Error)
}
fileId, auth = resp.FileId, security.EncodedJwt(resp.Auth)
loc := resp.Location
host = filerClient.AdjustedUrl(loc)
return nil
}); grpcAssignErr != nil {
return fmt.Errorf("filerGrpcAddress assign volume: %v", grpcAssignErr)
}
uploadOption.UploadUrl = genFileUrlFn(host, fileId)
uploadOption.Jwt = auth
var uploadErr error
uploadResult, uploadErr, data = doUpload(reader, uploadOption)
return uploadErr
}
if uploadOption.RetryForever {
2023-10-02 02:33:56 +08:00
util.RetryUntil("uploadWithRetryForever", doUploadFunc, func(err error) (shouldContinue bool) {
2022-08-21 09:50:57 +08:00
glog.V(0).Infof("upload content: %v", err)
return true
})
} else {
uploadErrList := []string{"transport", "is read only"}
err = util.MultiRetry("uploadWithRetry", uploadErrList, doUploadFunc)
2022-08-21 09:50:57 +08:00
}
return
}
var fileNameEscaper = strings.NewReplacer(`\`, `\\`, `"`, `\"`, "\n", "")
// Upload sends a POST request to a volume server to upload the content with adjustable compression level
2021-09-07 07:20:49 +08:00
func UploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
uploadResult, err = retriedUploadData(data, option)
return
}
// Upload sends a POST request to a volume server to upload the content with fast compression
2021-09-07 07:20:49 +08:00
func Upload(reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
uploadResult, err, data = doUpload(reader, option)
return
}
2021-09-07 07:20:49 +08:00
func doUpload(reader io.Reader, option *UploadOption) (uploadResult *UploadResult, err error, data []byte) {
bytesReader, ok := reader.(*util.BytesReader)
if ok {
data = bytesReader.Bytes
} else {
data, err = io.ReadAll(reader)
if err != nil {
err = fmt.Errorf("read input: %v", err)
return
}
2020-03-15 04:55:32 +08:00
}
2021-09-07 07:20:49 +08:00
uploadResult, uploadErr := retriedUploadData(data, option)
2020-03-29 04:41:58 +08:00
return uploadResult, uploadErr, data
}
2021-09-07 07:20:49 +08:00
func retriedUploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
2020-10-30 17:16:34 +08:00
for i := 0; i < 3; i++ {
2022-09-11 07:56:03 +08:00
if i > 0 {
time.Sleep(time.Millisecond * time.Duration(237*(i+1)))
}
2021-09-07 07:20:49 +08:00
uploadResult, err = doUploadData(data, option)
2020-08-12 11:30:11 +08:00
if err == nil {
2021-03-08 03:45:35 +08:00
uploadResult.RetryCount = i
2020-08-12 11:30:11 +08:00
return
}
2022-09-11 07:56:03 +08:00
glog.Warningf("uploading %d to %s: %v", i, option.UploadUrl, err)
2020-08-12 11:30:11 +08:00
}
return
}
2021-09-07 07:20:49 +08:00
func doUploadData(data []byte, option *UploadOption) (uploadResult *UploadResult, err error) {
contentIsGzipped := option.IsInputCompressed
shouldGzipNow := false
2021-09-07 07:20:49 +08:00
if !option.IsInputCompressed {
if option.MimeType == "" {
option.MimeType = http.DetectContentType(data)
// println("detect1 mimetype to", MimeType)
if option.MimeType == "application/octet-stream" {
option.MimeType = ""
2020-04-15 02:32:31 +08:00
}
}
2021-09-07 07:20:49 +08:00
if shouldBeCompressed, iAmSure := util.IsCompressableFileType(filepath.Base(option.Filename), option.MimeType); iAmSure && shouldBeCompressed {
shouldGzipNow = true
2021-09-07 07:20:49 +08:00
} else if !iAmSure && option.MimeType == "" && len(data) > 16*1024 {
var compressed []byte
compressed, err = util.GzipData(data[0:128])
if err != nil {
return
}
shouldGzipNow = len(compressed)*10 < 128*9 // can not compress to less than 90%
}
}
var clearDataLen int
// gzip if possible
// this could be double copying
clearDataLen = len(data)
clearData := data
2021-09-07 07:20:49 +08:00
if shouldGzipNow && !option.Cipher {
compressed, compressErr := util.GzipData(data)
// fmt.Printf("data is compressed from %d ==> %d\n", len(data), len(compressed))
if compressErr == nil {
data = compressed
contentIsGzipped = true
}
2021-09-07 07:20:49 +08:00
} else if option.IsInputCompressed {
// just to get the clear data length
clearData, err = util.DecompressData(data)
if err == nil {
clearDataLen = len(clearData)
}
}
2021-09-07 07:20:49 +08:00
if option.Cipher {
// encrypt(gzip(data))
// encrypt
cipherKey := util.GenCipherKey()
encryptedData, encryptionErr := util.Encrypt(clearData, cipherKey)
if encryptionErr != nil {
err = fmt.Errorf("encrypt input: %v", encryptionErr)
return
}
// upload data
2021-09-07 07:35:55 +08:00
uploadResult, err = upload_content(func(w io.Writer) (err error) {
2020-03-09 12:54:47 +08:00
_, err = w.Write(encryptedData)
return
2021-09-07 07:35:55 +08:00
}, len(encryptedData), &UploadOption{
UploadUrl: option.UploadUrl,
Filename: "",
Cipher: false,
IsInputCompressed: false,
MimeType: "",
PairMap: nil,
Jwt: option.Jwt,
})
if uploadResult == nil {
return
}
2021-09-07 07:20:49 +08:00
uploadResult.Name = option.Filename
uploadResult.Mime = option.MimeType
uploadResult.CipherKey = cipherKey
uploadResult.Size = uint32(clearDataLen)
} else {
// upload data
2021-09-07 07:35:55 +08:00
uploadResult, err = upload_content(func(w io.Writer) (err error) {
2020-03-09 12:54:47 +08:00
_, err = w.Write(data)
return
2021-09-07 07:35:55 +08:00
}, len(data), &UploadOption{
UploadUrl: option.UploadUrl,
Filename: option.Filename,
Cipher: false,
IsInputCompressed: contentIsGzipped,
MimeType: option.MimeType,
PairMap: option.PairMap,
Jwt: option.Jwt,
Md5: option.Md5,
2021-09-07 07:35:55 +08:00
})
if uploadResult == nil {
return
}
uploadResult.Size = uint32(clearDataLen)
if contentIsGzipped {
uploadResult.Gzip = 1
}
}
return uploadResult, err
}
2021-09-07 07:35:55 +08:00
func upload_content(fillBufferFunction func(w io.Writer) error, originalDataSize int, option *UploadOption) (*UploadResult, error) {
2021-04-01 17:20:00 +08:00
buf := GetBuffer()
defer PutBuffer(buf)
body_writer := multipart.NewWriter(buf)
h := make(textproto.MIMEHeader)
filename := fileNameEscaper.Replace(option.Filename)
h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="file"; filename="%s"`, filename))
2021-09-07 07:35:55 +08:00
h.Set("Idempotency-Key", option.UploadUrl)
if option.MimeType == "" {
option.MimeType = mime.TypeByExtension(strings.ToLower(filepath.Ext(option.Filename)))
2013-07-30 01:09:36 +08:00
}
2021-09-07 07:35:55 +08:00
if option.MimeType != "" {
h.Set("Content-Type", option.MimeType)
}
2021-09-07 07:35:55 +08:00
if option.IsInputCompressed {
h.Set("Content-Encoding", "gzip")
}
if option.Md5 != "" {
h.Set("Content-MD5", option.Md5)
}
2014-03-13 06:17:23 +08:00
file_writer, cp_err := body_writer.CreatePart(h)
if cp_err != nil {
glog.V(0).Infoln("error creating form file", cp_err.Error())
return nil, cp_err
2013-02-27 14:54:22 +08:00
}
2014-03-13 06:17:23 +08:00
if err := fillBufferFunction(file_writer); err != nil {
glog.V(0).Infoln("error copying data", err)
2013-02-27 14:54:22 +08:00
return nil, err
}
2013-07-10 15:27:01 +08:00
content_type := body_writer.FormDataContentType()
2014-03-13 06:17:23 +08:00
if err := body_writer.Close(); err != nil {
glog.V(0).Infoln("error closing body", err)
2013-02-27 14:54:22 +08:00
return nil, err
}
2021-09-07 07:35:55 +08:00
req, postErr := http.NewRequest("POST", option.UploadUrl, bytes.NewReader(buf.Bytes()))
if postErr != nil {
2021-09-07 07:35:55 +08:00
glog.V(1).Infof("create upload request %s: %v", option.UploadUrl, postErr)
return nil, fmt.Errorf("create upload request %s: %v", option.UploadUrl, postErr)
}
req.Header.Set("Content-Type", content_type)
2021-09-07 07:35:55 +08:00
for k, v := range option.PairMap {
req.Header.Set(k, v)
}
2021-09-07 07:35:55 +08:00
if option.Jwt != "" {
req.Header.Set("Authorization", "BEARER "+string(option.Jwt))
2019-02-15 16:09:19 +08:00
}
2020-10-24 15:12:02 +08:00
// print("+")
resp, post_err := HttpClient.Do(req)
defer util.CloseResponse(resp)
2014-03-13 06:17:23 +08:00
if post_err != nil {
if strings.Contains(post_err.Error(), "connection reset by peer") ||
strings.Contains(post_err.Error(), "use of closed network connection") {
glog.V(1).Infof("repeat error upload request %s: %v", option.UploadUrl, postErr)
stats.FilerHandlerCounter.WithLabelValues(stats.RepeatErrorUploadContent).Inc()
resp, post_err = HttpClient.Do(req)
defer util.CloseResponse(resp)
}
}
if post_err != nil {
2021-09-07 07:35:55 +08:00
return nil, fmt.Errorf("upload %s %d bytes to %v: %v", option.Filename, originalDataSize, option.UploadUrl, post_err)
2012-09-26 18:27:10 +08:00
}
2020-10-24 15:12:02 +08:00
// print("-")
var ret UploadResult
2018-09-23 13:12:21 +08:00
etag := getEtag(resp)
if resp.StatusCode == http.StatusNoContent {
ret.ETag = etag
return &ret, nil
}
resp_body, ra_err := io.ReadAll(resp.Body)
2020-08-24 15:32:44 +08:00
if ra_err != nil {
2021-09-07 07:35:55 +08:00
return nil, fmt.Errorf("read response body %v: %v", option.UploadUrl, ra_err)
2020-08-24 15:32:44 +08:00
}
2014-03-13 06:17:23 +08:00
unmarshal_err := json.Unmarshal(resp_body, &ret)
if unmarshal_err != nil {
2021-09-07 07:35:55 +08:00
glog.Errorf("unmarshal %s: %v", option.UploadUrl, string(resp_body))
return nil, fmt.Errorf("unmarshal %v: %v", option.UploadUrl, unmarshal_err)
2012-09-26 18:27:10 +08:00
}
2013-01-17 16:56:56 +08:00
if ret.Error != "" {
2021-09-07 07:35:55 +08:00
return nil, fmt.Errorf("unmarshalled error %v: %v", option.UploadUrl, ret.Error)
2012-09-27 05:28:46 +08:00
}
2018-09-23 13:12:21 +08:00
ret.ETag = etag
ret.ContentMd5 = resp.Header.Get("Content-MD5")
2012-09-26 18:27:10 +08:00
return &ret, nil
2012-09-21 08:58:29 +08:00
}
2018-09-23 13:12:21 +08:00
func getEtag(r *http.Response) (etag string) {
etag = r.Header.Get("ETag")
if strings.HasPrefix(etag, "\"") && strings.HasSuffix(etag, "\"") {
etag = etag[1 : len(etag)-1]
}
return
}