seaweedfs/weed/server/filer_server_handlers_write_autochunk.go

353 lines
10 KiB
Go
Raw Normal View History

2018-05-28 14:53:10 +08:00
package weed_server
import (
2019-03-16 06:55:34 +08:00
"context"
"crypto/md5"
"fmt"
2020-08-09 01:45:37 +08:00
"hash"
2018-05-28 14:53:10 +08:00
"io"
"io/ioutil"
2018-05-28 14:53:10 +08:00
"net/http"
"os"
2018-05-28 14:53:10 +08:00
"path"
"strconv"
"strings"
2018-05-28 14:59:49 +08:00
"time"
2020-09-01 15:21:19 +08:00
"github.com/chrislusf/seaweedfs/weed/filer"
2018-05-28 14:53:10 +08:00
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/operation"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
2020-10-29 16:05:40 +08:00
xhttp "github.com/chrislusf/seaweedfs/weed/s3api/http"
2019-02-15 16:09:19 +08:00
"github.com/chrislusf/seaweedfs/weed/security"
2019-06-23 13:53:52 +08:00
"github.com/chrislusf/seaweedfs/weed/stats"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
2020-03-23 15:01:34 +08:00
"github.com/chrislusf/seaweedfs/weed/util"
2018-05-28 14:53:10 +08:00
)
2020-11-16 08:58:48 +08:00
func (fs *FilerServer) autoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, so *operation.StorageOption) {
2018-05-28 14:53:10 +08:00
// autoChunking can be set at the command-line level or as a query param. Query param overrides command-line
query := r.URL.Query()
parsedMaxMB, _ := strconv.ParseInt(query.Get("maxMB"), 10, 32)
maxMB := int32(parsedMaxMB)
2018-07-07 17:18:47 +08:00
if maxMB <= 0 && fs.option.MaxMB > 0 {
maxMB = int32(fs.option.MaxMB)
2018-05-28 14:53:10 +08:00
}
chunkSize := 1024 * 1024 * maxMB
2020-08-09 01:45:37 +08:00
stats.FilerRequestCounter.WithLabelValues("postAutoChunk").Inc()
start := time.Now()
defer func() {
stats.FilerRequestHistogram.WithLabelValues("postAutoChunk").Observe(time.Since(start).Seconds())
}()
var reply *FilerPostResult
var err error
var md5bytes []byte
2020-08-09 01:45:37 +08:00
if r.Method == "POST" {
if r.Header.Get("Content-Type") == "" && strings.HasSuffix(r.URL.Path, "/") {
reply, err = fs.mkdir(ctx, w, r)
} else {
2020-11-16 06:41:56 +08:00
reply, md5bytes, err = fs.doPostAutoChunk(ctx, w, r, chunkSize, so)
}
2020-08-09 01:45:37 +08:00
} else {
2020-11-16 06:41:56 +08:00
reply, md5bytes, err = fs.doPutAutoChunk(ctx, w, r, chunkSize, so)
2020-08-09 01:45:37 +08:00
}
2018-05-28 14:53:10 +08:00
if err != nil {
if strings.HasPrefix(err.Error(), "read input:") {
writeJsonError(w, r, 499, err)
} else {
writeJsonError(w, r, http.StatusInternalServerError, err)
}
2018-05-28 14:53:10 +08:00
} else if reply != nil {
if len(md5bytes) > 0 {
w.Header().Set("Content-MD5", util.Base64Encode(md5bytes))
}
2018-05-28 14:53:10 +08:00
writeJsonQuiet(w, r, http.StatusCreated, reply)
}
}
2020-11-16 08:58:48 +08:00
func (fs *FilerServer) doPostAutoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, chunkSize int32, so *operation.StorageOption) (filerResult *FilerPostResult, md5bytes []byte, replyerr error) {
2018-05-28 14:53:10 +08:00
multipartReader, multipartReaderErr := r.MultipartReader()
if multipartReaderErr != nil {
return nil, nil, multipartReaderErr
2018-05-28 14:53:10 +08:00
}
part1, part1Err := multipartReader.NextPart()
if part1Err != nil {
return nil, nil, part1Err
2018-05-28 14:53:10 +08:00
}
fileName := part1.FileName()
if fileName != "" {
fileName = path.Base(fileName)
}
contentType := part1.Header.Get("Content-Type")
2020-08-09 01:45:37 +08:00
if contentType == "application/octet-stream" {
contentType = ""
}
2020-11-30 20:34:04 +08:00
fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadReaderToChunks(w, r, part1, chunkSize, fileName, contentType, so)
2020-08-09 01:45:37 +08:00
if err != nil {
return nil, nil, err
2020-08-09 01:45:37 +08:00
}
2018-05-28 14:53:10 +08:00
2020-11-16 06:41:56 +08:00
fileChunks, replyerr = filer.MaybeManifestize(fs.saveAsChunk(so), fileChunks)
2020-08-09 01:45:37 +08:00
if replyerr != nil {
glog.V(0).Infof("manifestize %s: %v", r.RequestURI, replyerr)
return
}
2018-05-28 14:53:10 +08:00
md5bytes = md5Hash.Sum(nil)
2020-11-30 20:34:04 +08:00
filerResult, replyerr = fs.saveMetaData(ctx, r, fileName, contentType, so, md5bytes, fileChunks, chunkOffset, smallContent)
2018-05-28 14:53:10 +08:00
2020-08-09 01:45:37 +08:00
return
}
2018-05-28 14:53:10 +08:00
2020-11-16 08:58:48 +08:00
func (fs *FilerServer) doPutAutoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, chunkSize int32, so *operation.StorageOption) (filerResult *FilerPostResult, md5bytes []byte, replyerr error) {
2020-08-09 01:45:37 +08:00
fileName := ""
contentType := ""
2020-11-30 20:34:04 +08:00
fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadReaderToChunks(w, r, r.Body, chunkSize, fileName, contentType, so)
2020-08-09 01:45:37 +08:00
if err != nil {
return nil, nil, err
2018-05-28 14:53:10 +08:00
}
2020-11-16 06:41:56 +08:00
fileChunks, replyerr = filer.MaybeManifestize(fs.saveAsChunk(so), fileChunks)
if replyerr != nil {
glog.V(0).Infof("manifestize %s: %v", r.RequestURI, replyerr)
return
}
md5bytes = md5Hash.Sum(nil)
2020-11-30 20:34:04 +08:00
filerResult, replyerr = fs.saveMetaData(ctx, r, fileName, contentType, so, md5bytes, fileChunks, chunkOffset, smallContent)
2020-08-09 01:45:37 +08:00
return
}
2020-11-30 20:34:04 +08:00
func (fs *FilerServer) saveMetaData(ctx context.Context, r *http.Request, fileName string, contentType string, so *operation.StorageOption, md5bytes []byte, fileChunks []*filer_pb.FileChunk, chunkOffset int64, content []byte) (filerResult *FilerPostResult, replyerr error) {
// detect file mode
modeStr := r.URL.Query().Get("mode")
if modeStr == "" {
modeStr = "0660"
}
mode, err := strconv.ParseUint(modeStr, 8, 32)
if err != nil {
glog.Errorf("Invalid mode format: %s, use 0660 by default", modeStr)
mode = 0660
}
// fix the path
2018-05-28 14:53:10 +08:00
path := r.URL.Path
if strings.HasSuffix(path, "/") {
if fileName != "" {
path += fileName
2018-05-28 14:53:10 +08:00
}
}
glog.V(4).Infoln("saving", path)
2020-09-01 15:21:19 +08:00
entry := &filer.Entry{
2020-03-23 15:01:34 +08:00
FullPath: util.FullPath(path),
2020-09-01 15:21:19 +08:00
Attr: filer.Attr{
Mtime: time.Now(),
2020-11-30 19:11:52 +08:00
Crtime: time.Now(),
Mode: os.FileMode(mode),
Uid: OS_UID,
Gid: OS_GID,
2020-11-16 06:41:56 +08:00
Replication: so.Replication,
Collection: so.Collection,
TtlSec: so.TtlSeconds,
Mime: contentType,
Md5: md5bytes,
FileSize: uint64(chunkOffset),
2018-05-28 14:53:10 +08:00
},
2020-11-30 20:34:04 +08:00
Chunks: fileChunks,
Content: content,
2018-05-28 14:53:10 +08:00
}
filerResult = &FilerPostResult{
Name: fileName,
Size: chunkOffset,
}
if entry.Extended == nil {
entry.Extended = make(map[string][]byte)
}
2020-10-28 18:16:05 +08:00
fs.saveAmzMetaData(r, entry)
for k, v := range r.Header {
if len(v) > 0 && strings.HasPrefix(k, needle.PairNamePrefix) {
2020-11-03 16:21:10 +08:00
entry.Extended[k] = []byte(v[0])
}
}
if dbErr := fs.filer.CreateEntry(ctx, entry, false, false, nil); dbErr != nil {
fs.filer.DeleteChunks(entry.Chunks)
2019-06-23 13:53:52 +08:00
replyerr = dbErr
filerResult.Error = dbErr.Error()
glog.V(0).Infof("failing to write %s to filer server : %v", path, dbErr)
2018-05-28 14:53:10 +08:00
}
2020-08-09 01:45:37 +08:00
return filerResult, replyerr
}
2018-05-28 14:53:10 +08:00
2020-11-30 20:34:04 +08:00
func (fs *FilerServer) uploadReaderToChunks(w http.ResponseWriter, r *http.Request, reader io.Reader, chunkSize int32, fileName, contentType string, so *operation.StorageOption) ([]*filer_pb.FileChunk, hash.Hash, int64, error, []byte) {
2020-08-09 01:45:37 +08:00
var fileChunks []*filer_pb.FileChunk
md5Hash := md5.New()
var partReader = ioutil.NopCloser(io.TeeReader(reader, md5Hash))
chunkOffset := int64(0)
2020-11-30 20:34:04 +08:00
var smallContent, content []byte
2020-08-09 01:45:37 +08:00
for {
2020-08-09 01:45:37 +08:00
limitedReader := io.LimitReader(partReader, int64(chunkSize))
// assign one file id for one chunk
2020-11-16 06:41:56 +08:00
fileId, urlLocation, auth, assignErr := fs.assignNewFileInfo(so)
2020-08-09 01:45:37 +08:00
if assignErr != nil {
2020-11-30 20:34:04 +08:00
return nil, nil, 0, assignErr, nil
2020-08-09 01:45:37 +08:00
}
// upload the chunk to the volume server
2020-11-30 20:34:04 +08:00
uploadResult, uploadErr, data := fs.doUpload(urlLocation, w, r, limitedReader, fileName, contentType, nil, auth)
2020-08-09 01:45:37 +08:00
if uploadErr != nil {
2020-11-30 20:34:04 +08:00
return nil, nil, 0, uploadErr, nil
2020-08-09 01:45:37 +08:00
}
2020-11-30 20:34:04 +08:00
content = data
2020-08-09 01:45:37 +08:00
// if last chunk exhausted the reader exactly at the border
if uploadResult.Size == 0 {
break
}
// Save to chunk manifest structure
fileChunks = append(fileChunks, uploadResult.ToPbFileChunk(fileId, chunkOffset))
glog.V(4).Infof("uploaded %s chunk %d to %s [%d,%d)", fileName, len(fileChunks), fileId, chunkOffset, chunkOffset+int64(uploadResult.Size))
2020-08-09 01:45:37 +08:00
// reset variables for the next chunk
chunkOffset = chunkOffset + int64(uploadResult.Size)
// if last chunk was not at full chunk size, but already exhausted the reader
if int64(uploadResult.Size) < int64(chunkSize) {
break
}
}
if chunkOffset < fs.option.CacheToFilerLimit || strings.HasPrefix(r.URL.Path, filer.DirectoryEtcRoot) && chunkOffset < 4*1024 {
2020-11-30 20:34:04 +08:00
smallContent = content
}
return fileChunks, md5Hash, chunkOffset, nil, smallContent
2018-05-28 14:53:10 +08:00
}
2020-11-30 20:34:04 +08:00
func (fs *FilerServer) doUpload(urlLocation string, w http.ResponseWriter, r *http.Request, limitedReader io.Reader, fileName string, contentType string, pairMap map[string]string, auth security.EncodedJwt) (*operation.UploadResult, error, []byte) {
2018-05-28 14:53:10 +08:00
2019-06-23 13:53:52 +08:00
stats.FilerRequestCounter.WithLabelValues("postAutoChunkUpload").Inc()
start := time.Now()
2019-06-23 16:57:51 +08:00
defer func() {
stats.FilerRequestHistogram.WithLabelValues("postAutoChunkUpload").Observe(time.Since(start).Seconds())
}()
2019-06-23 13:53:52 +08:00
2020-11-30 20:34:04 +08:00
uploadResult, err, data := operation.Upload(urlLocation, fileName, fs.option.Cipher, limitedReader, false, contentType, pairMap, auth)
return uploadResult, err, data
2018-05-28 14:53:10 +08:00
}
2020-11-16 08:58:48 +08:00
func (fs *FilerServer) saveAsChunk(so *operation.StorageOption) filer.SaveDataAsChunkFunctionType {
return func(reader io.Reader, name string, offset int64) (*filer_pb.FileChunk, string, string, error) {
// assign one file id for one chunk
2020-11-16 06:41:56 +08:00
fileId, urlLocation, auth, assignErr := fs.assignNewFileInfo(so)
if assignErr != nil {
return nil, "", "", assignErr
}
// upload the chunk to the volume server
uploadResult, uploadErr, _ := operation.Upload(urlLocation, name, fs.option.Cipher, reader, false, "", nil, auth)
if uploadErr != nil {
return nil, "", "", uploadErr
}
2020-11-16 06:41:56 +08:00
return uploadResult.ToPbFileChunk(fileId, offset), so.Collection, so.Replication, nil
}
}
func (fs *FilerServer) mkdir(ctx context.Context, w http.ResponseWriter, r *http.Request) (filerResult *FilerPostResult, replyerr error) {
// detect file mode
modeStr := r.URL.Query().Get("mode")
if modeStr == "" {
modeStr = "0660"
}
mode, err := strconv.ParseUint(modeStr, 8, 32)
if err != nil {
glog.Errorf("Invalid mode format: %s, use 0660 by default", modeStr)
mode = 0660
}
// fix the path
path := r.URL.Path
if strings.HasSuffix(path, "/") {
path = path[:len(path)-1]
}
existingEntry, err := fs.filer.FindEntry(ctx, util.FullPath(path))
if err == nil && existingEntry != nil {
replyerr = fmt.Errorf("dir %s already exists", path)
return
}
glog.V(4).Infoln("mkdir", path)
entry := &filer.Entry{
FullPath: util.FullPath(path),
Attr: filer.Attr{
2020-09-16 16:27:24 +08:00
Mtime: time.Now(),
Crtime: time.Now(),
Mode: os.FileMode(mode) | os.ModeDir,
Uid: OS_UID,
Gid: OS_GID,
},
}
filerResult = &FilerPostResult{
Name: util.FullPath(path).Name(),
}
if dbErr := fs.filer.CreateEntry(ctx, entry, false, false, nil); dbErr != nil {
replyerr = dbErr
filerResult.Error = dbErr.Error()
glog.V(0).Infof("failing to create dir %s on filer server : %v", path, dbErr)
}
return filerResult, replyerr
}
2020-10-28 18:16:05 +08:00
func (fs *FilerServer) saveAmzMetaData(r *http.Request, entry *filer.Entry) {
2020-10-29 16:05:40 +08:00
if sc := r.Header.Get(xhttp.AmzStorageClass); sc != "" {
entry.Extended[xhttp.AmzStorageClass] = []byte(sc)
2020-10-28 18:16:05 +08:00
}
2020-10-29 16:05:40 +08:00
if tags := r.Header.Get(xhttp.AmzObjectTagging); tags != "" {
2020-10-28 18:16:05 +08:00
for _, v := range strings.Split(tags, "&") {
tag := strings.Split(v, "=")
if len(tag) == 2 {
2020-10-29 16:05:40 +08:00
entry.Extended[xhttp.AmzObjectTagging+"-"+tag[0]] = []byte(tag[1])
2020-10-28 18:16:05 +08:00
}
}
}
for header, values := range r.Header {
2020-10-29 16:05:40 +08:00
if strings.HasPrefix(header, xhttp.AmzUserMetaPrefix) {
2020-10-28 18:16:05 +08:00
for _, value := range values {
entry.Extended[header] = []byte(value)
}
}
}
}