seaweedfs/weed/filer/stream.go

336 lines
9.5 KiB
Go
Raw Normal View History

2020-09-01 15:21:19 +08:00
package filer
import (
"bytes"
2020-10-14 04:53:34 +08:00
"fmt"
"io"
"math"
2021-08-08 06:35:27 +08:00
"sort"
"strings"
2021-08-26 08:28:50 +08:00
"sync"
2021-05-24 15:14:50 +08:00
"time"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
2021-05-24 15:14:50 +08:00
"github.com/chrislusf/seaweedfs/weed/stats"
"github.com/chrislusf/seaweedfs/weed/util"
"github.com/chrislusf/seaweedfs/weed/wdclient"
)
2021-08-16 10:46:45 +08:00
func HasData(entry *filer_pb.Entry) bool {
if len(entry.Content) > 0 {
return true
}
return len(entry.Chunks) > 0
}
func IsSameData(a, b *filer_pb.Entry) bool {
if len(a.Content) > 0 || len(b.Content) > 0 {
return bytes.Equal(a.Content, b.Content)
}
return isSameChunks(a.Chunks, b.Chunks)
}
func isSameChunks(a, b []*filer_pb.FileChunk) bool {
if len(a) != len(b) {
return false
}
sort.Slice(a, func(i, j int) bool {
return strings.Compare(a[i].ETag, a[j].ETag) < 0
})
sort.Slice(b, func(i, j int) bool {
return strings.Compare(b[i].ETag, b[j].ETag) < 0
})
2021-08-16 10:46:45 +08:00
for i := 0; i < len(a); i++ {
if a[i].ETag != b[i].ETag {
2021-08-16 10:46:45 +08:00
return false
}
}
return true
}
func NewFileReader(filerClient filer_pb.FilerClient, entry *filer_pb.Entry) io.Reader {
if len(entry.Content) > 0 {
return bytes.NewReader(entry.Content)
}
return NewChunkStreamReader(filerClient, entry.Chunks)
}
func StreamContent(masterClient wdclient.HasLookupFileIdFunction, writer io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64) error {
2021-03-12 01:38:59 +08:00
glog.V(9).Infof("start to stream content for chunks: %+v\n", chunks)
chunkViews := ViewFromChunks(masterClient.GetLookupFileIdFunction(), chunks, offset, size)
fileId2Url := make(map[string][]string)
for _, chunkView := range chunkViews {
urlStrings, err := masterClient.GetLookupFileIdFunction()(chunkView.FileId)
if err != nil {
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
return err
2021-03-12 02:34:36 +08:00
} else if len(urlStrings) == 0 {
glog.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
return fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
}
fileId2Url[chunkView.FileId] = urlStrings
}
for _, chunkView := range chunkViews {
urlStrings := fileId2Url[chunkView.FileId]
2021-05-24 15:14:50 +08:00
start := time.Now()
err := retriedStreamFetchChunkData(writer, urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size))
2021-05-24 15:14:50 +08:00
stats.FilerRequestHistogram.WithLabelValues("chunkDownload").Observe(time.Since(start).Seconds())
2020-10-11 07:02:10 +08:00
if err != nil {
2021-05-24 15:14:50 +08:00
stats.FilerRequestCounter.WithLabelValues("chunkDownloadError").Inc()
2020-10-14 04:53:34 +08:00
return fmt.Errorf("read chunk: %v", err)
}
2021-05-24 15:14:50 +08:00
stats.FilerRequestCounter.WithLabelValues("chunkDownload").Inc()
}
return nil
}
2020-04-28 15:05:47 +08:00
// ---------------- ReadAllReader ----------------------------------
func ReadAll(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) ([]byte, error) {
buffer := bytes.Buffer{}
lookupFileIdFn := func(fileId string) (targetUrls []string, err error) {
2020-04-28 15:05:47 +08:00
return masterClient.LookupFileId(fileId)
}
chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
2020-04-28 15:05:47 +08:00
for _, chunkView := range chunkViews {
urlStrings, err := lookupFileIdFn(chunkView.FileId)
2020-04-28 15:05:47 +08:00
if err != nil {
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
return nil, err
}
2020-10-09 14:19:42 +08:00
2021-03-23 13:13:19 +08:00
data, err := retriedFetchChunkData(urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size))
2020-10-09 14:19:42 +08:00
if err != nil {
return nil, err
2020-04-28 15:05:47 +08:00
}
2020-10-09 14:19:42 +08:00
buffer.Write(data)
2020-04-28 15:05:47 +08:00
}
return buffer.Bytes(), nil
}
// ---------------- ChunkStreamReader ----------------------------------
type ChunkStreamReader struct {
2021-09-01 17:45:42 +08:00
chunkViews []*ChunkView
totalSize int64
logicOffset int64
buffer []byte
bufferOffset int64
bufferLock sync.Mutex
chunk string
lookupFileId wdclient.LookupFileIdFunctionType
}
var _ = io.ReadSeeker(&ChunkStreamReader{})
var _ = io.ReaderAt(&ChunkStreamReader{})
2021-08-08 06:35:27 +08:00
func doNewChunkStreamReader(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
2021-08-08 06:35:27 +08:00
sort.Slice(chunkViews, func(i, j int) bool {
return chunkViews[i].LogicOffset < chunkViews[j].LogicOffset
})
2021-08-08 05:46:23 +08:00
var totalSize int64
for _, chunk := range chunkViews {
totalSize += int64(chunk.Size)
}
return &ChunkStreamReader{
chunkViews: chunkViews,
lookupFileId: lookupFileIdFn,
2021-08-08 05:46:23 +08:00
totalSize: totalSize,
}
}
2021-08-08 06:35:27 +08:00
func NewChunkStreamReaderFromFiler(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
2021-08-08 06:35:27 +08:00
lookupFileIdFn := func(fileId string) (targetUrl []string, err error) {
return masterClient.LookupFileId(fileId)
}
2021-08-08 06:35:27 +08:00
return doNewChunkStreamReader(lookupFileIdFn, chunks)
}
2021-08-08 06:35:27 +08:00
func NewChunkStreamReader(filerClient filer_pb.FilerClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
2021-08-08 05:46:23 +08:00
2021-08-08 06:35:27 +08:00
lookupFileIdFn := LookupFn(filerClient)
return doNewChunkStreamReader(lookupFileIdFn, chunks)
}
func (c *ChunkStreamReader) ReadAt(p []byte, off int64) (n int, err error) {
2021-08-26 08:28:50 +08:00
c.bufferLock.Lock()
defer c.bufferLock.Unlock()
if err = c.prepareBufferFor(off); err != nil {
return
}
c.logicOffset = off
2021-08-26 08:28:50 +08:00
return c.doRead(p)
}
func (c *ChunkStreamReader) Read(p []byte) (n int, err error) {
2021-08-26 08:28:50 +08:00
c.bufferLock.Lock()
defer c.bufferLock.Unlock()
return c.doRead(p)
}
func (c *ChunkStreamReader) doRead(p []byte) (n int, err error) {
// fmt.Printf("do read [%d,%d) at %s[%d,%d)\n", c.logicOffset, c.logicOffset+int64(len(p)), c.chunk, c.bufferOffset, c.bufferOffset+int64(len(c.buffer)))
2020-04-29 17:42:58 +08:00
for n < len(p) {
2021-08-26 08:28:50 +08:00
// println("read", c.logicOffset)
if err = c.prepareBufferFor(c.logicOffset); err != nil {
return
}
2021-08-26 08:28:50 +08:00
t := copy(p[n:], c.buffer[c.logicOffset-c.bufferOffset:])
2020-04-29 17:42:58 +08:00
n += t
2021-08-08 16:21:42 +08:00
c.logicOffset += int64(t)
}
return
}
func (c *ChunkStreamReader) isBufferEmpty() bool {
2021-09-01 17:45:42 +08:00
return len(c.buffer) <= int(c.logicOffset-c.bufferOffset)
}
func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) {
2021-08-26 08:28:50 +08:00
c.bufferLock.Lock()
defer c.bufferLock.Unlock()
var err error
switch whence {
case io.SeekStart:
case io.SeekCurrent:
2021-08-08 16:21:42 +08:00
offset += c.logicOffset
case io.SeekEnd:
2021-08-08 05:46:23 +08:00
offset = c.totalSize + offset
}
2021-08-08 05:46:23 +08:00
if offset > c.totalSize {
err = io.ErrUnexpectedEOF
2021-08-08 16:21:42 +08:00
} else {
c.logicOffset = offset
}
2021-08-08 16:21:42 +08:00
return offset, err
}
2021-08-26 08:28:50 +08:00
func insideChunk(offset int64, chunk *ChunkView) bool {
return chunk.LogicOffset <= offset && offset < chunk.LogicOffset+int64(chunk.Size)
}
2021-08-08 16:21:42 +08:00
func (c *ChunkStreamReader) prepareBufferFor(offset int64) (err error) {
2021-08-08 06:35:27 +08:00
// stay in the same chunk
2021-08-26 08:28:50 +08:00
if c.bufferOffset <= offset && offset < c.bufferOffset+int64(len(c.buffer)) {
return nil
2021-08-08 06:35:27 +08:00
}
2021-08-26 08:28:50 +08:00
// fmt.Printf("fetch for offset %d\n", offset)
2021-08-08 06:35:27 +08:00
// need to seek to a different chunk
currentChunkIndex := sort.Search(len(c.chunkViews), func(i int) bool {
2021-08-16 14:07:58 +08:00
return offset < c.chunkViews[i].LogicOffset
2021-08-08 06:35:27 +08:00
})
if currentChunkIndex == len(c.chunkViews) {
2021-08-16 14:07:58 +08:00
// not found
2021-08-26 08:28:50 +08:00
if insideChunk(offset, c.chunkViews[0]) {
// fmt.Printf("select0 chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId)
2021-08-16 14:07:58 +08:00
currentChunkIndex = 0
2021-08-26 08:28:50 +08:00
} else if insideChunk(offset, c.chunkViews[len(c.chunkViews)-1]) {
currentChunkIndex = len(c.chunkViews) - 1
// fmt.Printf("select last chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId)
2021-08-16 14:07:58 +08:00
} else {
return io.EOF
}
} else if currentChunkIndex > 0 {
2021-08-26 08:28:50 +08:00
if insideChunk(offset, c.chunkViews[currentChunkIndex]) {
// good hit
2021-09-01 17:45:42 +08:00
} else if insideChunk(offset, c.chunkViews[currentChunkIndex-1]) {
2021-08-16 14:07:58 +08:00
currentChunkIndex -= 1
2021-08-26 08:28:50 +08:00
// fmt.Printf("select -1 chunk %d %s\n", currentChunkIndex, c.chunkViews[currentChunkIndex].FileId)
2021-08-16 14:07:58 +08:00
} else {
2021-08-26 08:28:50 +08:00
// glog.Fatalf("unexpected1 offset %d", offset)
2021-08-16 14:07:58 +08:00
return fmt.Errorf("unexpected1 offset %d", offset)
}
} else {
2021-08-26 08:28:50 +08:00
// glog.Fatalf("unexpected2 offset %d", offset)
2021-08-16 14:07:58 +08:00
return fmt.Errorf("unexpected2 offset %d", offset)
2021-08-08 06:35:27 +08:00
}
// positioning within the new chunk
chunk := c.chunkViews[currentChunkIndex]
2021-08-26 08:28:50 +08:00
if insideChunk(offset, chunk) {
2021-08-08 06:35:27 +08:00
if c.isBufferEmpty() || c.bufferOffset != chunk.LogicOffset {
2021-08-08 16:21:42 +08:00
if err = c.fetchChunkToBuffer(chunk); err != nil {
return
}
}
2021-08-26 08:28:50 +08:00
} else {
// glog.Fatalf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size))
return fmt.Errorf("unexpected3 offset %d in %s [%d,%d)", offset, chunk.FileId, chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size))
}
2021-08-08 16:21:42 +08:00
return
}
func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
urlStrings, err := c.lookupFileId(chunkView.FileId)
if err != nil {
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
return err
}
var buffer bytes.Buffer
2020-10-13 15:29:46 +08:00
var shouldRetry bool
for _, urlString := range urlStrings {
shouldRetry, err = util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) {
buffer.Write(data)
})
2020-10-13 15:29:46 +08:00
if !shouldRetry {
break
}
if err != nil {
glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err)
buffer.Reset()
} else {
break
}
}
if err != nil {
return err
}
c.buffer = buffer.Bytes()
c.bufferOffset = chunkView.LogicOffset
2021-08-26 08:28:50 +08:00
c.chunk = chunkView.FileId
2021-08-26 08:28:50 +08:00
// glog.V(0).Infof("fetched %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
return nil
}
2020-04-28 15:05:47 +08:00
func (c *ChunkStreamReader) Close() {
// TODO try to release and reuse buffer
}
2020-03-23 15:01:34 +08:00
func VolumeId(fileId string) string {
lastCommaIndex := strings.LastIndex(fileId, ",")
if lastCommaIndex > 0 {
return fileId[:lastCommaIndex]
}
2020-03-23 15:01:34 +08:00
return fileId
}