seaweedfs/weed/server/filer_server_handlers_write_autochunk.go

407 lines
11 KiB
Go
Raw Normal View History

2018-05-28 14:53:10 +08:00
package weed_server
import (
"bytes"
2019-03-16 06:55:34 +08:00
"context"
"fmt"
//"github.com/seaweedfs/seaweedfs/weed/s3api"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
2018-05-28 14:53:10 +08:00
"io"
"net/http"
"os"
2018-05-28 14:53:10 +08:00
"path"
"strconv"
"strings"
2018-05-28 14:59:49 +08:00
"time"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/util"
2018-05-28 14:53:10 +08:00
)
2021-03-31 12:07:34 +08:00
func (fs *FilerServer) autoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, contentLength int64, so *operation.StorageOption) {
2018-05-28 14:53:10 +08:00
// autoChunking can be set at the command-line level or as a query param. Query param overrides command-line
query := r.URL.Query()
parsedMaxMB, _ := strconv.ParseInt(query.Get("maxMB"), 10, 32)
maxMB := int32(parsedMaxMB)
2018-07-07 17:18:47 +08:00
if maxMB <= 0 && fs.option.MaxMB > 0 {
maxMB = int32(fs.option.MaxMB)
2018-05-28 14:53:10 +08:00
}
chunkSize := 1024 * 1024 * maxMB
2020-08-09 01:45:37 +08:00
var reply *FilerPostResult
var err error
var md5bytes []byte
2020-08-09 01:45:37 +08:00
if r.Method == "POST" {
if r.Header.Get("Content-Type") == "" && strings.HasSuffix(r.URL.Path, "/") {
reply, err = fs.mkdir(ctx, w, r, so)
} else {
2021-03-31 12:07:34 +08:00
reply, md5bytes, err = fs.doPostAutoChunk(ctx, w, r, chunkSize, contentLength, so)
}
2020-08-09 01:45:37 +08:00
} else {
2021-03-31 12:07:34 +08:00
reply, md5bytes, err = fs.doPutAutoChunk(ctx, w, r, chunkSize, contentLength, so)
2020-08-09 01:45:37 +08:00
}
2018-05-28 14:53:10 +08:00
if err != nil {
if strings.HasPrefix(err.Error(), "read input:") || err.Error() == io.ErrUnexpectedEOF.Error() {
writeJsonError(w, r, 499, err)
} else if strings.HasSuffix(err.Error(), "is a file") || strings.HasSuffix(err.Error(), "already exists") {
writeJsonError(w, r, http.StatusConflict, err)
} else {
writeJsonError(w, r, http.StatusInternalServerError, err)
}
2018-05-28 14:53:10 +08:00
} else if reply != nil {
if len(md5bytes) > 0 {
2021-09-13 15:31:06 +08:00
md5InBase64 := util.Base64Encode(md5bytes)
w.Header().Set("Content-MD5", md5InBase64)
}
2018-05-28 14:53:10 +08:00
writeJsonQuiet(w, r, http.StatusCreated, reply)
}
}
2021-03-31 12:07:34 +08:00
func (fs *FilerServer) doPostAutoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, chunkSize int32, contentLength int64, so *operation.StorageOption) (filerResult *FilerPostResult, md5bytes []byte, replyerr error) {
2018-05-28 14:53:10 +08:00
multipartReader, multipartReaderErr := r.MultipartReader()
if multipartReaderErr != nil {
return nil, nil, multipartReaderErr
2018-05-28 14:53:10 +08:00
}
part1, part1Err := multipartReader.NextPart()
if part1Err != nil {
return nil, nil, part1Err
2018-05-28 14:53:10 +08:00
}
fileName := part1.FileName()
if fileName != "" {
fileName = path.Base(fileName)
}
contentType := part1.Header.Get("Content-Type")
2020-08-09 01:45:37 +08:00
if contentType == "application/octet-stream" {
contentType = ""
}
if so.SaveInside {
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
buf.ReadFrom(part1)
filerResult, replyerr = fs.saveMetaData(ctx, r, fileName, contentType, so, nil, nil, 0, buf.Bytes())
bufPool.Put(buf)
return
}
2021-03-31 12:07:34 +08:00
fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadReaderToChunks(w, r, part1, chunkSize, fileName, contentType, contentLength, so)
2020-08-09 01:45:37 +08:00
if err != nil {
return nil, nil, err
2020-08-09 01:45:37 +08:00
}
2018-05-28 14:53:10 +08:00
md5bytes = md5Hash.Sum(nil)
2020-11-30 20:34:04 +08:00
filerResult, replyerr = fs.saveMetaData(ctx, r, fileName, contentType, so, md5bytes, fileChunks, chunkOffset, smallContent)
if replyerr != nil {
fs.filer.DeleteChunks(fileChunks)
}
2018-05-28 14:53:10 +08:00
2020-08-09 01:45:37 +08:00
return
}
2018-05-28 14:53:10 +08:00
2021-03-31 12:07:34 +08:00
func (fs *FilerServer) doPutAutoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, chunkSize int32, contentLength int64, so *operation.StorageOption) (filerResult *FilerPostResult, md5bytes []byte, replyerr error) {
fileName := path.Base(r.URL.Path)
contentType := r.Header.Get("Content-Type")
if contentType == "application/octet-stream" {
contentType = ""
}
2021-03-31 12:07:34 +08:00
fileChunks, md5Hash, chunkOffset, err, smallContent := fs.uploadReaderToChunks(w, r, r.Body, chunkSize, fileName, contentType, contentLength, so)
2020-08-09 01:45:37 +08:00
if err != nil {
return nil, nil, err
2018-05-28 14:53:10 +08:00
}
md5bytes = md5Hash.Sum(nil)
2020-11-30 20:34:04 +08:00
filerResult, replyerr = fs.saveMetaData(ctx, r, fileName, contentType, so, md5bytes, fileChunks, chunkOffset, smallContent)
if replyerr != nil {
fs.filer.DeleteChunks(fileChunks)
}
2020-08-09 01:45:37 +08:00
return
}
2022-03-07 15:41:07 +08:00
func isAppend(r *http.Request) bool {
return r.URL.Query().Get("op") == "append"
}
func skipCheckParentDirEntry(r *http.Request) bool {
return r.URL.Query().Get("skipCheckParentDir") == "true"
}
2020-11-30 20:34:04 +08:00
func (fs *FilerServer) saveMetaData(ctx context.Context, r *http.Request, fileName string, contentType string, so *operation.StorageOption, md5bytes []byte, fileChunks []*filer_pb.FileChunk, chunkOffset int64, content []byte) (filerResult *FilerPostResult, replyerr error) {
// detect file mode
modeStr := r.URL.Query().Get("mode")
if modeStr == "" {
modeStr = "0660"
}
mode, err := strconv.ParseUint(modeStr, 8, 32)
if err != nil {
glog.Errorf("Invalid mode format: %s, use 0660 by default", modeStr)
mode = 0660
}
// fix the path
2018-05-28 14:53:10 +08:00
path := r.URL.Path
if strings.HasSuffix(path, "/") {
if fileName != "" {
path += fileName
2018-05-28 14:53:10 +08:00
}
} else {
if fileName != "" {
if possibleDirEntry, findDirErr := fs.filer.FindEntry(ctx, util.FullPath(path)); findDirErr == nil {
if possibleDirEntry.IsDirectory() {
path += "/" + fileName
}
}
}
2018-05-28 14:53:10 +08:00
}
2021-01-21 05:40:32 +08:00
var entry *filer.Entry
var newChunks []*filer_pb.FileChunk
2021-01-21 05:40:32 +08:00
var mergedChunks []*filer_pb.FileChunk
2022-03-07 16:04:59 +08:00
isAppend := isAppend(r)
isOffsetWrite := len(fileChunks) > 0 && fileChunks[0].Offset > 0
2021-01-21 05:40:32 +08:00
// when it is an append
2022-03-07 16:04:59 +08:00
if isAppend || isOffsetWrite {
2021-01-21 05:40:32 +08:00
existingEntry, findErr := fs.filer.FindEntry(ctx, util.FullPath(path))
if findErr != nil && findErr != filer_pb.ErrNotFound {
glog.V(0).Infof("failing to find %s: %v", path, findErr)
}
entry = existingEntry
2018-05-28 14:53:10 +08:00
}
2021-01-21 05:40:32 +08:00
if entry != nil {
entry.Mtime = time.Now()
entry.Md5 = nil
// adjust chunk offsets
2022-03-07 16:04:59 +08:00
if isAppend {
for _, chunk := range fileChunks {
chunk.Offset += int64(entry.FileSize)
}
entry.FileSize += uint64(chunkOffset)
2021-01-21 05:40:32 +08:00
}
newChunks = append(entry.GetChunks(), fileChunks...)
2021-01-21 05:40:32 +08:00
// TODO
if len(entry.Content) > 0 {
replyerr = fmt.Errorf("append to small file is not supported yet")
return
}
} else {
glog.V(4).Infoln("saving", path)
newChunks = fileChunks
2021-01-21 05:40:32 +08:00
entry = &filer.Entry{
FullPath: util.FullPath(path),
Attr: filer.Attr{
Mtime: time.Now(),
Crtime: time.Now(),
Mode: os.FileMode(mode),
Uid: OS_UID,
Gid: OS_GID,
TtlSec: so.TtlSeconds,
Mime: contentType,
Md5: md5bytes,
FileSize: uint64(chunkOffset),
2021-01-21 05:40:32 +08:00
},
Content: content,
}
}
// maybe concatenate small chunks into one whole chunk
mergedChunks, replyerr = fs.maybeMergeChunks(so, newChunks)
if replyerr != nil {
glog.V(0).Infof("merge chunks %s: %v", r.RequestURI, replyerr)
mergedChunks = newChunks
2021-01-21 05:40:32 +08:00
}
// maybe compact entry chunks
mergedChunks, replyerr = filer.MaybeManifestize(fs.saveAsChunk(so), mergedChunks)
if replyerr != nil {
glog.V(0).Infof("manifestize %s: %v", r.RequestURI, replyerr)
return
2018-05-28 14:53:10 +08:00
}
2021-01-21 05:40:32 +08:00
entry.Chunks = mergedChunks
if isOffsetWrite {
entry.Md5 = nil
entry.FileSize = entry.Size()
}
filerResult = &FilerPostResult{
Name: fileName,
Size: int64(entry.FileSize),
}
entry.Extended = SaveAmzMetaData(r, entry.Extended, false)
2020-10-28 18:16:05 +08:00
for k, v := range r.Header {
if len(v) > 0 && len(v[0]) > 0 {
if strings.HasPrefix(k, needle.PairNamePrefix) || k == "Cache-Control" || k == "Expires" || k == "Content-Disposition" {
entry.Extended[k] = []byte(v[0])
}
if k == "Response-Content-Disposition" {
entry.Extended["Content-Disposition"] = []byte(v[0])
}
}
}
if dbErr := fs.filer.CreateEntry(ctx, entry, false, false, nil, skipCheckParentDirEntry(r)); dbErr != nil {
2019-06-23 13:53:52 +08:00
replyerr = dbErr
filerResult.Error = dbErr.Error()
glog.V(0).Infof("failing to write %s to filer server : %v", path, dbErr)
2018-05-28 14:53:10 +08:00
}
2020-08-09 01:45:37 +08:00
return filerResult, replyerr
}
2018-05-28 14:53:10 +08:00
2020-11-16 08:58:48 +08:00
func (fs *FilerServer) saveAsChunk(so *operation.StorageOption) filer.SaveDataAsChunkFunctionType {
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2023-01-03 15:20:45 +08:00
return func(reader io.Reader, name string, offset int64, tsNs int64) (*filer_pb.FileChunk, error) {
2022-08-21 10:15:44 +08:00
var fileId string
var uploadResult *operation.UploadResult
err := util.Retry("saveAsChunk", func() error {
// assign one file id for one chunk
assignedFileId, urlLocation, auth, assignErr := fs.assignNewFileInfo(so)
if assignErr != nil {
return assignErr
}
2022-08-21 10:15:44 +08:00
fileId = assignedFileId
// upload the chunk to the volume server
uploadOption := &operation.UploadOption{
UploadUrl: urlLocation,
Filename: name,
Cipher: fs.option.Cipher,
IsInputCompressed: false,
MimeType: "",
PairMap: nil,
Jwt: auth,
}
var uploadErr error
uploadResult, uploadErr, _ = operation.Upload(reader, uploadOption)
if uploadErr != nil {
return uploadErr
}
return nil
})
if err != nil {
return nil, err
}
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as *semaphore.Weighted not found impactful * optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to *list.List * refactor chunkViews to *list.List * add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to *IntervalList[*ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation
2023-01-03 15:20:45 +08:00
return uploadResult.ToPbFileChunk(fileId, offset, tsNs), nil
}
}
func (fs *FilerServer) mkdir(ctx context.Context, w http.ResponseWriter, r *http.Request, so *operation.StorageOption) (filerResult *FilerPostResult, replyerr error) {
// detect file mode
modeStr := r.URL.Query().Get("mode")
if modeStr == "" {
modeStr = "0660"
}
mode, err := strconv.ParseUint(modeStr, 8, 32)
if err != nil {
glog.Errorf("Invalid mode format: %s, use 0660 by default", modeStr)
mode = 0660
}
// fix the path
path := r.URL.Path
if strings.HasSuffix(path, "/") {
path = path[:len(path)-1]
}
existingEntry, err := fs.filer.FindEntry(ctx, util.FullPath(path))
if err == nil && existingEntry != nil {
replyerr = fmt.Errorf("dir %s already exists", path)
return
}
glog.V(4).Infoln("mkdir", path)
entry := &filer.Entry{
FullPath: util.FullPath(path),
Attr: filer.Attr{
2020-09-16 16:27:24 +08:00
Mtime: time.Now(),
Crtime: time.Now(),
Mode: os.FileMode(mode) | os.ModeDir,
Uid: OS_UID,
Gid: OS_GID,
TtlSec: so.TtlSeconds,
},
}
filerResult = &FilerPostResult{
Name: util.FullPath(path).Name(),
}
if dbErr := fs.filer.CreateEntry(ctx, entry, false, false, nil, false); dbErr != nil {
replyerr = dbErr
filerResult.Error = dbErr.Error()
glog.V(0).Infof("failing to create dir %s on filer server : %v", path, dbErr)
}
return filerResult, replyerr
}
2020-10-28 18:16:05 +08:00
func SaveAmzMetaData(r *http.Request, existing map[string][]byte, isReplace bool) (metadata map[string][]byte) {
metadata = make(map[string][]byte)
if !isReplace {
for k, v := range existing {
metadata[k] = v
}
}
2020-10-28 18:16:05 +08:00
if sc := r.Header.Get(s3_constants.AmzStorageClass); sc != "" {
metadata[s3_constants.AmzStorageClass] = []byte(sc)
2020-10-28 18:16:05 +08:00
}
if ce := r.Header.Get("Content-Encoding"); ce != "" {
metadata["Content-Encoding"] = []byte(ce)
}
if tags := r.Header.Get(s3_constants.AmzObjectTagging); tags != "" {
2020-10-28 18:16:05 +08:00
for _, v := range strings.Split(tags, "&") {
tag := strings.Split(v, "=")
if len(tag) == 2 {
metadata[s3_constants.AmzObjectTagging+"-"+tag[0]] = []byte(tag[1])
} else if len(tag) == 1 {
metadata[s3_constants.AmzObjectTagging+"-"+tag[0]] = nil
2020-10-28 18:16:05 +08:00
}
}
}
for header, values := range r.Header {
if strings.HasPrefix(header, s3_constants.AmzUserMetaPrefix) {
2020-10-28 18:16:05 +08:00
for _, value := range values {
metadata[header] = []byte(value)
2020-10-28 18:16:05 +08:00
}
}
}
//acp-owner
acpOwner := r.Header.Get(s3_constants.ExtAmzOwnerKey)
if len(acpOwner) > 0 {
metadata[s3_constants.ExtAmzOwnerKey] = []byte(acpOwner)
}
//acp-grants
acpGrants := r.Header.Get(s3_constants.ExtAmzAclKey)
if len(acpOwner) > 0 {
metadata[s3_constants.ExtAmzAclKey] = []byte(acpGrants)
}
return
2020-10-28 18:16:05 +08:00
}