seaweedfs/weed/server/filer_server_handlers_write_upload.go

162 lines
4.8 KiB
Go
Raw Normal View History

2021-03-31 12:07:34 +08:00
package weed_server
import (
2021-04-29 01:28:05 +08:00
"bytes"
2021-03-31 12:07:34 +08:00
"crypto/md5"
"hash"
"io"
"io/ioutil"
"net/http"
2021-06-07 11:23:36 +08:00
"sort"
2021-03-31 12:07:34 +08:00
"strings"
"sync"
2021-03-31 12:07:34 +08:00
"time"
"github.com/chrislusf/seaweedfs/weed/filer"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/operation"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/security"
"github.com/chrislusf/seaweedfs/weed/stats"
"github.com/chrislusf/seaweedfs/weed/util"
)
var bufPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
func (fs *FilerServer) uploadReaderToChunks(w http.ResponseWriter, r *http.Request, reader io.Reader, chunkSize int32, fileName, contentType string, contentLength int64, so *operation.StorageOption) ([]*filer_pb.FileChunk, hash.Hash, int64, error, []byte) {
var fileChunks []*filer_pb.FileChunk
2021-03-31 12:07:34 +08:00
md5Hash := md5.New()
2021-03-31 12:07:34 +08:00
var partReader = ioutil.NopCloser(io.TeeReader(reader, md5Hash))
chunkOffset := int64(0)
var smallContent []byte
2021-06-07 11:23:36 +08:00
var uploadErr error
2021-03-31 12:07:34 +08:00
2021-06-07 11:23:36 +08:00
var wg sync.WaitGroup
for {
2021-06-07 11:23:36 +08:00
// need to throttle this for large files
bytesBuffer := bufPool.Get().(*bytes.Buffer)
defer bufPool.Put(bytesBuffer)
limitedReader := io.LimitReader(partReader, int64(chunkSize))
2021-03-31 12:07:34 +08:00
bytesBuffer.Reset()
dataSize, err := bytesBuffer.ReadFrom(limitedReader)
// data, err := ioutil.ReadAll(limitedReader)
2021-06-07 11:23:36 +08:00
if err != nil || dataSize == 0 {
return nil, md5Hash, 0, err, nil
2021-03-31 12:07:34 +08:00
}
if chunkOffset == 0 && !isAppend(r) {
if dataSize < fs.option.SaveToFilerLimit || strings.HasPrefix(r.URL.Path, filer.DirectoryEtcRoot) && dataSize < 4*1024 {
chunkOffset += dataSize
smallContent = make([]byte, dataSize)
bytesBuffer.Write(smallContent)
break
}
}
2021-03-31 12:07:34 +08:00
2021-06-07 11:23:36 +08:00
wg.Add(1)
go func(offset int64) {
defer wg.Done()
2021-03-31 12:07:34 +08:00
2021-06-07 11:23:36 +08:00
chunk, toChunkErr := fs.dataToChunk(fileName, contentType, bytesBuffer.Bytes(), offset, so, md5Hash)
if toChunkErr != nil {
uploadErr = toChunkErr
}
if chunk != nil {
fileChunks = append(fileChunks, chunk)
glog.V(4).Infof("uploaded %s chunk %d to %s [%d,%d)", fileName, len(fileChunks), chunk.FileId, offset, offset+int64(chunk.Size))
}
}(chunkOffset)
2021-03-31 12:07:34 +08:00
// reset variables for the next chunk
2021-06-07 11:23:36 +08:00
chunkOffset = chunkOffset + dataSize
2021-03-31 12:07:34 +08:00
// if last chunk was not at full chunk size, but already exhausted the reader
2021-06-07 11:23:36 +08:00
if dataSize < int64(chunkSize) {
break
}
}
2021-06-07 11:23:36 +08:00
wg.Wait()
if uploadErr != nil {
return nil, md5Hash, 0, uploadErr, nil
}
sort.Slice(fileChunks, func(i, j int) bool {
return fileChunks[i].Offset < fileChunks[j].Offset
})
return fileChunks, md5Hash, chunkOffset, nil, smallContent
2021-03-31 12:07:34 +08:00
}
2021-06-07 09:43:04 +08:00
func (fs *FilerServer) doUpload(urlLocation string, limitedReader io.Reader, fileName string, contentType string, pairMap map[string]string, auth security.EncodedJwt) (*operation.UploadResult, error, []byte) {
2021-03-31 12:07:34 +08:00
stats.FilerRequestCounter.WithLabelValues("chunkUpload").Inc()
start := time.Now()
defer func() {
stats.FilerRequestHistogram.WithLabelValues("chunkUpload").Observe(time.Since(start).Seconds())
}()
uploadResult, err, data := operation.Upload(urlLocation, fileName, fs.option.Cipher, limitedReader, false, contentType, pairMap, auth)
if uploadResult != nil && uploadResult.RetryCount > 0 {
stats.FilerRequestCounter.WithLabelValues("chunkUploadRetry").Add(float64(uploadResult.RetryCount))
}
return uploadResult, err, data
}
2021-06-07 09:43:04 +08:00
func (fs *FilerServer) dataToChunk(fileName, contentType string, data []byte, chunkOffset int64, so *operation.StorageOption, md5Hash hash.Hash) (*filer_pb.FileChunk, error) {
dataReader := util.NewBytesReader(data)
// retry to assign a different file id
var fileId, urlLocation string
var auth security.EncodedJwt
var uploadErr error
var uploadResult *operation.UploadResult
for i := 0; i < 3; i++ {
// assign one file id for one chunk
fileId, urlLocation, auth, uploadErr = fs.assignNewFileInfo(so)
if uploadErr != nil {
glog.V(4).Infof("retry later due to assign error: %v", uploadErr)
time.Sleep(time.Duration(i+1) * 251 * time.Millisecond)
continue
}
// upload the chunk to the volume server
uploadResult, uploadErr, _ = fs.doUpload(urlLocation, dataReader, fileName, contentType, nil, auth)
if uploadErr != nil {
glog.V(4).Infof("retry later due to upload error: %v", uploadErr)
time.Sleep(time.Duration(i+1) * 251 * time.Millisecond)
continue
}
break
}
if uploadErr != nil {
glog.Errorf("upload error: %v", uploadErr)
return nil, uploadErr
}
// if last chunk exhausted the reader exactly at the border
if uploadResult.Size == 0 {
return nil, nil
}
if chunkOffset == 0 {
uploadedMd5 := util.Base64Md5ToBytes(uploadResult.ContentMd5)
readedMd5 := md5Hash.Sum(nil)
if !bytes.Equal(uploadedMd5, readedMd5) {
glog.Errorf("md5 %x does not match %x uploaded chunk %s to the volume server", readedMd5, uploadedMd5, uploadResult.Name)
}
}
return uploadResult.ToPbFileChunk(fileId, chunkOffset), nil
}