2020-09-01 15:21:19 +08:00
|
|
|
package filer
|
2020-07-20 08:59:43 +08:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"math"
|
2021-09-08 10:29:42 +08:00
|
|
|
"net/url"
|
|
|
|
"strings"
|
2022-03-03 05:50:46 +08:00
|
|
|
"sync"
|
2020-10-09 14:31:26 +08:00
|
|
|
"time"
|
2020-07-20 08:59:43 +08:00
|
|
|
|
2022-07-29 15:17:28 +08:00
|
|
|
"github.com/seaweedfs/seaweedfs/weed/wdclient"
|
2022-06-19 16:54:04 +08:00
|
|
|
|
2022-08-18 03:05:07 +08:00
|
|
|
"google.golang.org/protobuf/proto"
|
2020-07-20 08:59:43 +08:00
|
|
|
|
2022-07-29 15:17:28 +08:00
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
2020-07-20 08:59:43 +08:00
|
|
|
)
|
|
|
|
|
2020-07-20 18:34:06 +08:00
|
|
|
const (
|
2022-03-07 16:07:53 +08:00
|
|
|
ManifestBatch = 10000
|
2020-07-20 18:34:06 +08:00
|
|
|
)
|
|
|
|
|
2022-03-03 05:50:46 +08:00
|
|
|
var bytesBufferPool = sync.Pool{
|
|
|
|
New: func() interface{} {
|
|
|
|
return new(bytes.Buffer)
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2020-07-20 08:59:43 +08:00
|
|
|
func HasChunkManifest(chunks []*filer_pb.FileChunk) bool {
|
|
|
|
for _, chunk := range chunks {
|
|
|
|
if chunk.IsChunkManifest {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2020-08-24 09:30:11 +08:00
|
|
|
func SeparateManifestChunks(chunks []*filer_pb.FileChunk) (manifestChunks, nonManifestChunks []*filer_pb.FileChunk) {
|
|
|
|
for _, c := range chunks {
|
2020-08-30 17:07:14 +08:00
|
|
|
if c.IsChunkManifest {
|
2020-08-24 09:30:11 +08:00
|
|
|
manifestChunks = append(manifestChunks, c)
|
|
|
|
} else {
|
|
|
|
nonManifestChunks = append(nonManifestChunks, c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-07-20 14:07:22 +08:00
|
|
|
func ResolveChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, startOffset, stopOffset int64) (dataChunks, manifestChunks []*filer_pb.FileChunk, manifestResolveErr error) {
|
2020-07-20 08:59:43 +08:00
|
|
|
// TODO maybe parallel this
|
|
|
|
for _, chunk := range chunks {
|
2021-07-20 14:07:22 +08:00
|
|
|
|
|
|
|
if max(chunk.Offset, startOffset) >= min(chunk.Offset+int64(chunk.Size), stopOffset) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-07-20 08:59:43 +08:00
|
|
|
if !chunk.IsChunkManifest {
|
|
|
|
dataChunks = append(dataChunks, chunk)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-08-30 17:07:14 +08:00
|
|
|
resolvedChunks, err := ResolveOneChunkManifest(lookupFileIdFn, chunk)
|
2020-07-20 08:59:43 +08:00
|
|
|
if err != nil {
|
2022-06-19 16:54:04 +08:00
|
|
|
return dataChunks, nil, err
|
2020-07-20 08:59:43 +08:00
|
|
|
}
|
2020-08-30 17:07:14 +08:00
|
|
|
|
2020-07-20 08:59:43 +08:00
|
|
|
manifestChunks = append(manifestChunks, chunk)
|
|
|
|
// recursive
|
2022-05-23 16:16:10 +08:00
|
|
|
subDataChunks, subManifestChunks, subErr := ResolveChunkManifest(lookupFileIdFn, resolvedChunks, startOffset, stopOffset)
|
2020-07-20 08:59:43 +08:00
|
|
|
if subErr != nil {
|
2022-06-19 16:54:04 +08:00
|
|
|
return dataChunks, nil, subErr
|
2020-07-20 08:59:43 +08:00
|
|
|
}
|
2022-05-23 16:14:56 +08:00
|
|
|
dataChunks = append(dataChunks, subDataChunks...)
|
|
|
|
manifestChunks = append(manifestChunks, subManifestChunks...)
|
2020-07-20 08:59:43 +08:00
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-01-06 20:21:34 +08:00
|
|
|
func ResolveOneChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunk *filer_pb.FileChunk) (dataChunks []*filer_pb.FileChunk, manifestResolveErr error) {
|
2020-08-30 17:07:14 +08:00
|
|
|
if !chunk.IsChunkManifest {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsChunkManifest
|
2022-03-03 05:50:46 +08:00
|
|
|
bytesBuffer := bytesBufferPool.Get().(*bytes.Buffer)
|
2022-03-06 21:06:04 +08:00
|
|
|
bytesBuffer.Reset()
|
2022-03-03 05:50:46 +08:00
|
|
|
defer bytesBufferPool.Put(bytesBuffer)
|
|
|
|
err := fetchWholeChunk(bytesBuffer, lookupFileIdFn, chunk.GetFileIdString(), chunk.CipherKey, chunk.IsCompressed)
|
2020-08-30 17:07:14 +08:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("fail to read manifest %s: %v", chunk.GetFileIdString(), err)
|
|
|
|
}
|
|
|
|
m := &filer_pb.FileChunkManifest{}
|
2022-03-03 05:50:46 +08:00
|
|
|
if err := proto.Unmarshal(bytesBuffer.Bytes(), m); err != nil {
|
2020-08-30 17:07:14 +08:00
|
|
|
return nil, fmt.Errorf("fail to unmarshal manifest %s: %v", chunk.GetFileIdString(), err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// recursive
|
|
|
|
filer_pb.AfterEntryDeserialization(m.Chunks)
|
|
|
|
return m.Chunks, nil
|
|
|
|
}
|
|
|
|
|
2020-07-20 18:34:06 +08:00
|
|
|
// TODO fetch from cache for weed mount?
|
2022-03-03 05:50:46 +08:00
|
|
|
func fetchWholeChunk(bytesBuffer *bytes.Buffer, lookupFileIdFn wdclient.LookupFileIdFunctionType, fileId string, cipherKey []byte, isGzipped bool) error {
|
2020-10-08 13:49:04 +08:00
|
|
|
urlStrings, err := lookupFileIdFn(fileId)
|
2020-07-20 08:59:43 +08:00
|
|
|
if err != nil {
|
2020-07-20 18:34:06 +08:00
|
|
|
glog.Errorf("operation LookupFileId %s failed, err: %v", fileId, err)
|
2022-03-03 05:50:46 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
err = retriedStreamFetchChunkData(bytesBuffer, urlStrings, cipherKey, isGzipped, true, 0, 0)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2020-07-20 08:59:43 +08:00
|
|
|
}
|
2022-03-03 05:50:46 +08:00
|
|
|
return nil
|
2020-10-09 14:19:42 +08:00
|
|
|
}
|
2020-10-08 13:49:04 +08:00
|
|
|
|
2022-03-13 17:38:52 +08:00
|
|
|
func fetchChunkRange(buffer []byte, lookupFileIdFn wdclient.LookupFileIdFunctionType, fileId string, cipherKey []byte, isGzipped bool, offset int64) (int, error) {
|
|
|
|
urlStrings, err := lookupFileIdFn(fileId)
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("operation LookupFileId %s failed, err: %v", fileId, err)
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
return retriedFetchChunkData(buffer, urlStrings, cipherKey, isGzipped, false, offset)
|
|
|
|
}
|
|
|
|
|
2022-02-26 18:16:47 +08:00
|
|
|
func retriedFetchChunkData(buffer []byte, urlStrings []string, cipherKey []byte, isGzipped bool, isFullChunk bool, offset int64) (n int, err error) {
|
2020-10-09 14:19:42 +08:00
|
|
|
|
2020-10-13 15:29:46 +08:00
|
|
|
var shouldRetry bool
|
2020-10-09 14:31:26 +08:00
|
|
|
|
2020-11-01 18:36:43 +08:00
|
|
|
for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 {
|
2020-10-09 14:31:26 +08:00
|
|
|
for _, urlString := range urlStrings {
|
2022-02-26 18:16:47 +08:00
|
|
|
n = 0
|
2021-09-08 10:29:42 +08:00
|
|
|
if strings.Contains(urlString, "%") {
|
|
|
|
urlString = url.PathEscape(urlString)
|
|
|
|
}
|
2022-02-26 18:16:47 +08:00
|
|
|
shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", cipherKey, isGzipped, isFullChunk, offset, len(buffer), func(data []byte) {
|
2022-02-26 19:23:06 +08:00
|
|
|
if n < len(buffer) {
|
|
|
|
x := copy(buffer[n:], data)
|
|
|
|
n += x
|
|
|
|
}
|
2020-10-09 14:31:26 +08:00
|
|
|
})
|
2020-10-13 15:29:46 +08:00
|
|
|
if !shouldRetry {
|
|
|
|
break
|
|
|
|
}
|
2020-10-09 14:31:26 +08:00
|
|
|
if err != nil {
|
|
|
|
glog.V(0).Infof("read %s failed, err: %v", urlString, err)
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
2020-10-08 13:49:04 +08:00
|
|
|
}
|
2020-10-14 10:50:46 +08:00
|
|
|
if err != nil && shouldRetry {
|
2020-10-14 10:50:22 +08:00
|
|
|
glog.V(0).Infof("retry reading in %v", waitTime)
|
2020-10-09 15:01:47 +08:00
|
|
|
time.Sleep(waitTime)
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
2020-07-20 08:59:43 +08:00
|
|
|
}
|
|
|
|
|
2022-02-26 18:16:47 +08:00
|
|
|
return n, err
|
2021-03-23 13:12:57 +08:00
|
|
|
|
2020-07-20 08:59:43 +08:00
|
|
|
}
|
|
|
|
|
2021-08-14 02:00:11 +08:00
|
|
|
func retriedStreamFetchChunkData(writer io.Writer, urlStrings []string, cipherKey []byte, isGzipped bool, isFullChunk bool, offset int64, size int) (err error) {
|
|
|
|
|
|
|
|
var shouldRetry bool
|
2021-08-14 02:30:38 +08:00
|
|
|
var totalWritten int
|
2021-08-14 02:00:11 +08:00
|
|
|
|
|
|
|
for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 {
|
|
|
|
for _, urlString := range urlStrings {
|
2022-09-15 01:29:55 +08:00
|
|
|
var localProcessed int
|
2022-09-19 07:49:48 +08:00
|
|
|
var writeErr error
|
2021-08-14 02:00:11 +08:00
|
|
|
shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", cipherKey, isGzipped, isFullChunk, offset, size, func(data []byte) {
|
2022-09-15 01:29:55 +08:00
|
|
|
if totalWritten > localProcessed {
|
|
|
|
toBeSkipped := totalWritten - localProcessed
|
2021-08-14 02:30:38 +08:00
|
|
|
if len(data) <= toBeSkipped {
|
2022-09-15 01:29:55 +08:00
|
|
|
localProcessed += len(data)
|
2021-08-14 02:30:38 +08:00
|
|
|
return // skip if already processed
|
|
|
|
}
|
2021-08-14 02:31:43 +08:00
|
|
|
data = data[toBeSkipped:]
|
2022-09-15 01:29:55 +08:00
|
|
|
localProcessed += toBeSkipped
|
2021-08-14 02:30:38 +08:00
|
|
|
}
|
2022-09-19 07:49:48 +08:00
|
|
|
var writtenCount int
|
|
|
|
writtenCount, writeErr = writer.Write(data)
|
|
|
|
localProcessed += writtenCount
|
|
|
|
totalWritten += writtenCount
|
2021-08-14 02:00:11 +08:00
|
|
|
})
|
|
|
|
if !shouldRetry {
|
|
|
|
break
|
|
|
|
}
|
2022-09-19 07:49:48 +08:00
|
|
|
if writeErr != nil {
|
|
|
|
err = writeErr
|
|
|
|
break
|
|
|
|
}
|
2021-08-14 02:00:11 +08:00
|
|
|
if err != nil {
|
|
|
|
glog.V(0).Infof("read %s failed, err: %v", urlString, err)
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2021-08-14 02:13:30 +08:00
|
|
|
if err != nil && shouldRetry {
|
2021-08-14 02:00:11 +08:00
|
|
|
glog.V(0).Infof("retry reading in %v", waitTime)
|
|
|
|
time.Sleep(waitTime)
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return err
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-07-20 18:34:06 +08:00
|
|
|
func MaybeManifestize(saveFunc SaveDataAsChunkFunctionType, inputChunks []*filer_pb.FileChunk) (chunks []*filer_pb.FileChunk, err error) {
|
|
|
|
return doMaybeManifestize(saveFunc, inputChunks, ManifestBatch, mergeIntoManifest)
|
2020-07-20 08:59:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func doMaybeManifestize(saveFunc SaveDataAsChunkFunctionType, inputChunks []*filer_pb.FileChunk, mergeFactor int, mergefn func(saveFunc SaveDataAsChunkFunctionType, dataChunks []*filer_pb.FileChunk) (manifestChunk *filer_pb.FileChunk, err error)) (chunks []*filer_pb.FileChunk, err error) {
|
|
|
|
|
|
|
|
var dataChunks []*filer_pb.FileChunk
|
|
|
|
for _, chunk := range inputChunks {
|
|
|
|
if !chunk.IsChunkManifest {
|
|
|
|
dataChunks = append(dataChunks, chunk)
|
|
|
|
} else {
|
|
|
|
chunks = append(chunks, chunk)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
remaining := len(dataChunks)
|
2020-07-20 18:34:06 +08:00
|
|
|
for i := 0; i+mergeFactor <= len(dataChunks); i += mergeFactor {
|
|
|
|
chunk, err := mergefn(saveFunc, dataChunks[i:i+mergeFactor])
|
2020-07-20 08:59:43 +08:00
|
|
|
if err != nil {
|
|
|
|
return dataChunks, err
|
|
|
|
}
|
|
|
|
chunks = append(chunks, chunk)
|
2020-07-20 18:34:06 +08:00
|
|
|
remaining -= mergeFactor
|
2020-07-20 08:59:43 +08:00
|
|
|
}
|
|
|
|
// remaining
|
|
|
|
for i := len(dataChunks) - remaining; i < len(dataChunks); i++ {
|
|
|
|
chunks = append(chunks, dataChunks[i])
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func mergeIntoManifest(saveFunc SaveDataAsChunkFunctionType, dataChunks []*filer_pb.FileChunk) (manifestChunk *filer_pb.FileChunk, err error) {
|
|
|
|
|
2020-07-21 13:01:39 +08:00
|
|
|
filer_pb.BeforeEntrySerialization(dataChunks)
|
|
|
|
|
2020-07-20 08:59:43 +08:00
|
|
|
// create and serialize the manifest
|
|
|
|
data, serErr := proto.Marshal(&filer_pb.FileChunkManifest{
|
|
|
|
Chunks: dataChunks,
|
|
|
|
})
|
|
|
|
if serErr != nil {
|
|
|
|
return nil, fmt.Errorf("serializing manifest: %v", serErr)
|
|
|
|
}
|
|
|
|
|
|
|
|
minOffset, maxOffset := int64(math.MaxInt64), int64(math.MinInt64)
|
2020-07-20 18:34:06 +08:00
|
|
|
for _, chunk := range dataChunks {
|
2020-07-20 08:59:43 +08:00
|
|
|
if minOffset > int64(chunk.Offset) {
|
|
|
|
minOffset = chunk.Offset
|
|
|
|
}
|
|
|
|
if maxOffset < int64(chunk.Size)+chunk.Offset {
|
|
|
|
maxOffset = int64(chunk.Size) + chunk.Offset
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-03 15:20:45 +08:00
|
|
|
manifestChunk, err = saveFunc(bytes.NewReader(data), "", 0, 0)
|
2020-07-20 08:59:43 +08:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
manifestChunk.IsChunkManifest = true
|
|
|
|
manifestChunk.Offset = minOffset
|
|
|
|
manifestChunk.Size = uint64(maxOffset - minOffset)
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-01-03 15:20:45 +08:00
|
|
|
type SaveDataAsChunkFunctionType func(reader io.Reader, name string, offset int64, tsNs int64) (chunk *filer_pb.FileChunk, err error)
|