seaweedfs/weed/filer/stream.go

257 lines
6.6 KiB
Go
Raw Normal View History

2020-09-01 15:21:19 +08:00
package filer
import (
"bytes"
2020-10-14 04:53:34 +08:00
"fmt"
"golang.org/x/sync/errgroup"
"io"
"math"
"strings"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
"github.com/chrislusf/seaweedfs/weed/util"
"github.com/chrislusf/seaweedfs/weed/wdclient"
)
func StreamContent(masterClient wdclient.HasLookupFileIdFunction, w io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64) error {
2021-03-12 01:38:59 +08:00
glog.V(9).Infof("start to stream content for chunks: %+v\n", chunks)
chunkViews := ViewFromChunks(masterClient.GetLookupFileIdFunction(), chunks, offset, size)
fileId2Url := make(map[string][]string)
for _, chunkView := range chunkViews {
urlStrings, err := masterClient.GetLookupFileIdFunction()(chunkView.FileId)
if err != nil {
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
return err
2021-03-12 02:34:36 +08:00
} else if len(urlStrings) == 0 {
glog.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
return fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId)
}
fileId2Url[chunkView.FileId] = urlStrings
}
for idx, chunkView := range chunkViews {
urlStrings := fileId2Url[chunkView.FileId]
2020-10-09 14:19:42 +08:00
// Pre-check all chunkViews urls
gErr := new(errgroup.Group)
if len(chunkViews) > 1 && idx == 0 {
CheckAllChunkViews(chunkViews[1:], &fileId2Url, gErr)
}
data, err := retriedFetchChunkData(
urlStrings,
chunkView.CipherKey,
chunkView.IsGzipped,
chunkView.IsFullChunk(),
false,
chunkView.Offset,
int(chunkView.Size),
)
2020-10-11 07:02:10 +08:00
if err != nil {
2020-10-14 04:53:34 +08:00
glog.Errorf("read chunk: %v", err)
return fmt.Errorf("read chunk: %v", err)
}
if err := gErr.Wait(); err != nil {
glog.Errorf("check all chunks: %v", err)
return fmt.Errorf("check all chunks: %v", err)
}
2020-10-14 04:53:34 +08:00
_, err = w.Write(data)
if err != nil {
glog.Errorf("write chunk: %v", err)
return fmt.Errorf("write chunk: %v", err)
}
}
return nil
}
func CheckAllChunkViews(chunkViews []*ChunkView, fileId2Url *map[string][]string, gErr *errgroup.Group) {
for _, chunkView := range chunkViews {
gErr.Go(func() error {
_, err := retriedFetchChunkData(
(*fileId2Url)[chunkView.FileId],
chunkView.CipherKey,
chunkView.IsGzipped,
chunkView.IsFullChunk(),
true,
chunkView.Offset,
int(chunkView.Size))
return err
})
}
}
2020-04-28 15:05:47 +08:00
// ---------------- ReadAllReader ----------------------------------
func ReadAll(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) ([]byte, error) {
buffer := bytes.Buffer{}
lookupFileIdFn := func(fileId string) (targetUrls []string, err error) {
2020-04-28 15:05:47 +08:00
return masterClient.LookupFileId(fileId)
}
chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
2020-04-28 15:05:47 +08:00
for _, chunkView := range chunkViews {
urlStrings, err := lookupFileIdFn(chunkView.FileId)
2020-04-28 15:05:47 +08:00
if err != nil {
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
return nil, err
}
2020-10-09 14:19:42 +08:00
data, err := retriedFetchChunkData(urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), false, chunkView.Offset, int(chunkView.Size))
2020-10-09 14:19:42 +08:00
if err != nil {
return nil, err
2020-04-28 15:05:47 +08:00
}
2020-10-09 14:19:42 +08:00
buffer.Write(data)
2020-04-28 15:05:47 +08:00
}
return buffer.Bytes(), nil
}
// ---------------- ChunkStreamReader ----------------------------------
type ChunkStreamReader struct {
chunkViews []*ChunkView
logicOffset int64
buffer []byte
bufferOffset int64
bufferPos int
chunkIndex int
lookupFileId wdclient.LookupFileIdFunctionType
}
var _ = io.ReadSeeker(&ChunkStreamReader{})
func NewChunkStreamReaderFromFiler(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
lookupFileIdFn := func(fileId string) (targetUrl []string, err error) {
return masterClient.LookupFileId(fileId)
}
chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
return &ChunkStreamReader{
chunkViews: chunkViews,
lookupFileId: lookupFileIdFn,
}
}
func NewChunkStreamReader(filerClient filer_pb.FilerClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader {
lookupFileIdFn := LookupFn(filerClient)
chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64)
return &ChunkStreamReader{
2020-05-10 18:50:30 +08:00
chunkViews: chunkViews,
lookupFileId: lookupFileIdFn,
}
}
func (c *ChunkStreamReader) Read(p []byte) (n int, err error) {
2020-04-29 17:42:58 +08:00
for n < len(p) {
if c.isBufferEmpty() {
if c.chunkIndex >= len(c.chunkViews) {
return n, io.EOF
}
chunkView := c.chunkViews[c.chunkIndex]
c.fetchChunkToBuffer(chunkView)
c.chunkIndex++
}
2020-04-29 17:42:58 +08:00
t := copy(p[n:], c.buffer[c.bufferPos:])
c.bufferPos += t
n += t
}
return
}
func (c *ChunkStreamReader) isBufferEmpty() bool {
return len(c.buffer) <= c.bufferPos
}
func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) {
2020-03-22 14:48:11 +08:00
var totalSize int64
for _, chunk := range c.chunkViews {
totalSize += int64(chunk.Size)
}
var err error
switch whence {
case io.SeekStart:
case io.SeekCurrent:
offset += c.bufferOffset + int64(c.bufferPos)
case io.SeekEnd:
2020-03-22 14:48:11 +08:00
offset = totalSize + offset
}
2020-03-22 14:48:11 +08:00
if offset > totalSize {
err = io.ErrUnexpectedEOF
}
for i, chunk := range c.chunkViews {
if chunk.LogicOffset <= offset && offset < chunk.LogicOffset+int64(chunk.Size) {
if c.isBufferEmpty() || c.bufferOffset != chunk.LogicOffset {
c.fetchChunkToBuffer(chunk)
c.chunkIndex = i + 1
break
}
}
}
c.bufferPos = int(offset - c.bufferOffset)
return offset, err
}
func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error {
urlStrings, err := c.lookupFileId(chunkView.FileId)
if err != nil {
glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err)
return err
}
var buffer bytes.Buffer
2020-10-13 15:29:46 +08:00
var shouldRetry bool
for _, urlString := range urlStrings {
shouldRetry, err = util.FastReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), false, chunkView.Offset, int(chunkView.Size), func(data []byte) {
buffer.Write(data)
})
2020-10-13 15:29:46 +08:00
if !shouldRetry {
break
}
if err != nil {
glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err)
buffer.Reset()
} else {
break
}
}
if err != nil {
return err
}
c.buffer = buffer.Bytes()
c.bufferPos = 0
c.bufferOffset = chunkView.LogicOffset
// glog.V(0).Infof("read %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size))
return nil
}
2020-04-28 15:05:47 +08:00
func (c *ChunkStreamReader) Close() {
// TODO try to release and reuse buffer
}
2020-03-23 15:01:34 +08:00
func VolumeId(fileId string) string {
lastCommaIndex := strings.LastIndex(fileId, ",")
if lastCommaIndex > 0 {
return fileId[:lastCommaIndex]
}
2020-03-23 15:01:34 +08:00
return fileId
}