2020-09-01 15:21:19 +08:00
package filer
2020-03-27 19:50:51 +08:00
import (
"context"
"fmt"
2020-11-01 18:36:43 +08:00
"io"
"math/rand"
"sync"
2020-03-27 19:50:51 +08:00
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/chrislusf/seaweedfs/weed/pb/filer_pb"
2020-11-01 16:58:48 +08:00
"github.com/chrislusf/seaweedfs/weed/util"
2020-04-12 03:45:24 +08:00
"github.com/chrislusf/seaweedfs/weed/util/chunk_cache"
2020-03-27 19:50:51 +08:00
"github.com/chrislusf/seaweedfs/weed/wdclient"
2020-10-08 14:58:32 +08:00
"github.com/golang/groupcache/singleflight"
2020-10-11 11:09:43 +08:00
)
2020-03-27 19:50:51 +08:00
type ChunkReadAt struct {
masterClient * wdclient . MasterClient
2020-03-30 12:07:55 +08:00
chunkViews [ ] * ChunkView
2021-01-06 20:21:34 +08:00
lookupFileId wdclient . LookupFileIdFunctionType
2020-03-27 19:50:51 +08:00
readerLock sync . Mutex
2020-08-16 15:49:08 +08:00
fileSize int64
2020-03-29 04:43:31 +08:00
2020-10-08 13:49:04 +08:00
fetchGroup singleflight . Group
chunkCache chunk_cache . ChunkCache
2020-12-09 14:26:46 +08:00
lastChunkFileId string
lastChunkData [ ] byte
2021-12-20 14:43:14 +08:00
readerPattern * ReaderPattern
2020-03-27 19:50:51 +08:00
}
2020-12-09 14:26:46 +08:00
var _ = io . ReaderAt ( & ChunkReadAt { } )
var _ = io . Closer ( & ChunkReadAt { } )
2020-03-27 19:50:51 +08:00
2021-01-06 20:21:34 +08:00
func LookupFn ( filerClient filer_pb . FilerClient ) wdclient . LookupFileIdFunctionType {
2020-10-04 11:16:42 +08:00
vidCache := make ( map [ string ] * filer_pb . Locations )
2020-10-22 10:28:59 +08:00
var vicCacheLock sync . RWMutex
2020-10-08 13:49:04 +08:00
return func ( fileId string ) ( targetUrls [ ] string , err error ) {
2020-10-04 11:16:42 +08:00
vid := VolumeId ( fileId )
2020-10-22 10:28:59 +08:00
vicCacheLock . RLock ( )
2020-10-04 11:16:42 +08:00
locations , found := vidCache [ vid ]
2020-10-22 10:28:59 +08:00
vicCacheLock . RUnlock ( )
2020-10-04 11:16:42 +08:00
2020-11-01 16:58:48 +08:00
if ! found {
2020-11-01 18:36:43 +08:00
util . Retry ( "lookup volume " + vid , func ( ) error {
2021-12-26 16:15:03 +08:00
err = filerClient . WithFilerClient ( false , func ( client filer_pb . SeaweedFilerClient ) error {
2020-11-01 16:58:48 +08:00
resp , err := client . LookupVolume ( context . Background ( ) , & filer_pb . LookupVolumeRequest {
VolumeIds : [ ] string { vid } ,
} )
if err != nil {
return err
}
locations = resp . LocationsMap [ vid ]
if locations == nil || len ( locations . Locations ) == 0 {
glog . V ( 0 ) . Infof ( "failed to locate %s" , fileId )
return fmt . Errorf ( "failed to locate %s" , fileId )
}
vicCacheLock . Lock ( )
vidCache [ vid ] = locations
vicCacheLock . Unlock ( )
return nil
2020-10-04 11:16:42 +08:00
} )
2020-11-01 16:58:48 +08:00
return err
2020-04-30 08:40:08 +08:00
} )
2020-10-04 11:16:42 +08:00
}
2020-04-30 08:40:08 +08:00
2020-10-11 06:43:22 +08:00
if err != nil {
return nil , err
}
2020-10-08 13:49:04 +08:00
for _ , loc := range locations . Locations {
2021-01-29 06:36:29 +08:00
volumeServerAddress := filerClient . AdjustedUrl ( loc )
2020-10-08 13:49:04 +08:00
targetUrl := fmt . Sprintf ( "http://%s/%s" , volumeServerAddress , fileId )
targetUrls = append ( targetUrls , targetUrl )
}
2020-04-30 08:40:08 +08:00
2020-10-08 14:58:32 +08:00
for i := len ( targetUrls ) - 1 ; i > 0 ; i -- {
j := rand . Intn ( i + 1 )
targetUrls [ i ] , targetUrls [ j ] = targetUrls [ j ] , targetUrls [ i ]
}
2020-04-30 08:40:08 +08:00
return
}
}
2021-01-25 11:01:58 +08:00
func NewChunkReaderAtFromClient ( lookupFn wdclient . LookupFileIdFunctionType , chunkViews [ ] * ChunkView , chunkCache chunk_cache . ChunkCache , fileSize int64 ) * ChunkReadAt {
2020-03-27 19:50:51 +08:00
return & ChunkReadAt {
2021-12-20 14:43:14 +08:00
chunkViews : chunkViews ,
lookupFileId : lookupFn ,
chunkCache : chunkCache ,
fileSize : fileSize ,
readerPattern : NewReaderPattern ( ) ,
2020-03-27 19:50:51 +08:00
}
}
2020-12-09 14:26:46 +08:00
func ( c * ChunkReadAt ) Close ( ) error {
c . lastChunkData = nil
c . lastChunkFileId = ""
return nil
}
2020-03-27 19:50:51 +08:00
func ( c * ChunkReadAt ) ReadAt ( p [ ] byte , offset int64 ) ( n int , err error ) {
2021-12-29 08:30:33 +08:00
c . readerPattern . MonitorReadAt ( offset , len ( p ) )
2020-03-27 19:50:51 +08:00
c . readerLock . Lock ( )
defer c . readerLock . Unlock ( )
2021-05-11 12:47:51 +08:00
// glog.V(4).Infof("ReadAt [%d,%d) of total file size %d bytes %d chunk views", offset, offset+int64(len(p)), c.fileSize, len(c.chunkViews))
2021-01-18 17:14:27 +08:00
return c . doReadAt ( p , offset )
2020-03-27 19:50:51 +08:00
}
func ( c * ChunkReadAt ) doReadAt ( p [ ] byte , offset int64 ) ( n int , err error ) {
2020-08-18 11:14:40 +08:00
startOffset , remaining := offset , int64 ( len ( p ) )
2020-10-04 16:31:04 +08:00
var nextChunk * ChunkView
2020-08-18 07:05:40 +08:00
for i , chunk := range c . chunkViews {
2020-08-18 11:14:40 +08:00
if remaining <= 0 {
break
}
2020-10-04 16:31:04 +08:00
if i + 1 < len ( c . chunkViews ) {
nextChunk = c . chunkViews [ i + 1 ]
} else {
nextChunk = nil
}
2020-08-18 11:14:40 +08:00
if startOffset < chunk . LogicOffset {
gap := int ( chunk . LogicOffset - startOffset )
2022-01-13 22:20:33 +08:00
glog . V ( 4 ) . Infof ( "zero [%d,%d)" , startOffset , chunk . LogicOffset )
2020-08-18 15:32:01 +08:00
n += int ( min ( int64 ( gap ) , remaining ) )
2020-08-18 11:14:40 +08:00
startOffset , remaining = chunk . LogicOffset , remaining - int64 ( gap )
if remaining <= 0 {
break
}
2020-08-17 12:07:46 +08:00
}
2020-08-17 06:16:46 +08:00
// fmt.Printf(">>> doReadAt [%d,%d), chunk[%d,%d)\n", offset, offset+int64(len(p)), chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size))
2020-08-18 11:14:40 +08:00
chunkStart , chunkStop := max ( chunk . LogicOffset , startOffset ) , min ( chunk . LogicOffset + int64 ( chunk . Size ) , startOffset + remaining )
2020-08-17 12:07:46 +08:00
if chunkStart >= chunkStop {
continue
}
2021-05-11 12:47:51 +08:00
// glog.V(4).Infof("read [%d,%d), %d/%d chunk %s [%d,%d)", chunkStart, chunkStop, i, len(c.chunkViews), chunk.FileId, chunk.LogicOffset-chunk.Offset, chunk.LogicOffset-chunk.Offset+int64(chunk.Size))
2020-12-08 18:38:53 +08:00
var buffer [ ] byte
2021-04-29 07:13:37 +08:00
bufferOffset := chunkStart - chunk . LogicOffset + chunk . Offset
bufferLength := chunkStop - chunkStart
buffer , err = c . readChunkSlice ( chunk , nextChunk , uint64 ( bufferOffset ) , uint64 ( bufferLength ) )
2020-08-17 12:07:46 +08:00
if err != nil {
glog . Errorf ( "fetching chunk %+v: %v\n" , chunk , err )
return
2020-03-27 19:50:51 +08:00
}
2021-04-29 07:13:37 +08:00
copied := copy ( p [ startOffset - offset : chunkStop - chunkStart + startOffset - offset ] , buffer )
2020-08-17 12:07:46 +08:00
n += copied
2020-08-18 11:20:08 +08:00
startOffset , remaining = startOffset + int64 ( copied ) , remaining - int64 ( copied )
2020-03-27 19:50:51 +08:00
}
2020-08-16 15:49:08 +08:00
2021-05-11 12:47:51 +08:00
// glog.V(4).Infof("doReadAt [%d,%d), n:%v, err:%v", offset, offset+int64(len(p)), n, err)
2020-08-16 15:49:08 +08:00
2020-08-18 23:50:14 +08:00
if err == nil && remaining > 0 && c . fileSize > startOffset {
2020-08-30 13:28:33 +08:00
delta := int ( min ( remaining , c . fileSize - startOffset ) )
2020-08-18 15:34:15 +08:00
glog . V ( 4 ) . Infof ( "zero2 [%d,%d) of file size %d bytes" , startOffset , startOffset + int64 ( delta ) , c . fileSize )
2020-08-18 13:46:32 +08:00
n += delta
2020-03-27 19:50:51 +08:00
}
2020-08-18 11:14:40 +08:00
2020-10-15 03:18:24 +08:00
if err == nil && offset + int64 ( len ( p ) ) >= c . fileSize {
2020-08-16 15:49:08 +08:00
err = io . EOF
}
2020-08-17 06:16:46 +08:00
// fmt.Printf("~~~ filled %d, err: %v\n\n", n, err)
2020-03-27 19:50:51 +08:00
return
}
2021-04-29 07:13:37 +08:00
func ( c * ChunkReadAt ) readChunkSlice ( chunkView * ChunkView , nextChunkViews * ChunkView , offset , length uint64 ) ( [ ] byte , error ) {
2021-12-20 15:06:03 +08:00
var chunkSlice [ ] byte
if chunkView . LogicOffset == 0 {
chunkSlice = c . chunkCache . GetChunkSlice ( chunkView . FileId , offset , length )
}
2021-04-29 07:13:37 +08:00
if len ( chunkSlice ) > 0 {
return chunkSlice , nil
}
2021-12-23 08:05:08 +08:00
if c . lookupFileId == nil {
return nil , nil
}
2021-12-23 08:17:30 +08:00
if c . readerPattern . IsRandomMode ( ) {
return c . doFetchRangeChunkData ( chunkView , offset , length )
}
2021-04-29 07:13:37 +08:00
chunkData , err := c . readFromWholeChunkData ( chunkView , nextChunkViews )
if err != nil {
return nil , err
}
wanted := min ( int64 ( length ) , int64 ( len ( chunkData ) ) - int64 ( offset ) )
return chunkData [ offset : int64 ( offset ) + wanted ] , nil
}
2020-10-08 13:49:04 +08:00
func ( c * ChunkReadAt ) readFromWholeChunkData ( chunkView * ChunkView , nextChunkViews ... * ChunkView ) ( chunkData [ ] byte , err error ) {
2020-10-04 11:16:42 +08:00
2020-12-09 14:26:46 +08:00
if c . lastChunkFileId == chunkView . FileId {
return c . lastChunkData , nil
}
2020-10-04 16:31:04 +08:00
v , doErr := c . readOneWholeChunk ( chunkView )
2020-03-29 04:43:31 +08:00
2020-10-04 16:31:04 +08:00
if doErr != nil {
2020-10-09 14:19:20 +08:00
return nil , doErr
2020-04-12 16:13:57 +08:00
}
2020-03-29 15:54:39 +08:00
2020-10-04 16:31:04 +08:00
chunkData = v . ( [ ] byte )
2020-12-09 14:26:46 +08:00
c . lastChunkData = chunkData
c . lastChunkFileId = chunkView . FileId
2020-10-06 05:06:18 +08:00
for _ , nextChunkView := range nextChunkViews {
if c . chunkCache != nil && nextChunkView != nil {
go c . readOneWholeChunk ( nextChunkView )
2020-10-04 16:31:04 +08:00
}
2020-10-06 05:06:18 +08:00
}
2020-10-04 16:31:04 +08:00
2020-08-17 06:16:46 +08:00
return
2020-03-29 15:54:39 +08:00
}
2020-10-04 16:31:04 +08:00
func ( c * ChunkReadAt ) readOneWholeChunk ( chunkView * ChunkView ) ( interface { } , error ) {
var err error
return c . fetchGroup . Do ( chunkView . FileId , func ( ) ( interface { } , error ) {
glog . V ( 4 ) . Infof ( "readFromWholeChunkData %s offset %d [%d,%d) size at least %d" , chunkView . FileId , chunkView . Offset , chunkView . LogicOffset , chunkView . LogicOffset + int64 ( chunkView . Size ) , chunkView . ChunkSize )
2021-12-20 15:06:03 +08:00
var data [ ] byte
if chunkView . LogicOffset == 0 {
data = c . chunkCache . GetChunk ( chunkView . FileId , chunkView . ChunkSize )
}
2020-10-04 16:31:04 +08:00
if data != nil {
glog . V ( 4 ) . Infof ( "cache hit %s [%d,%d)" , chunkView . FileId , chunkView . LogicOffset - chunkView . Offset , chunkView . LogicOffset - chunkView . Offset + int64 ( len ( data ) ) )
} else {
var err error
data , err = c . doFetchFullChunkData ( chunkView )
if err != nil {
return data , err
}
2021-12-20 15:06:03 +08:00
if chunkView . LogicOffset == 0 {
// only cache the first chunk
c . chunkCache . SetChunk ( chunkView . FileId , data )
}
2020-10-04 16:31:04 +08:00
}
return data , err
} )
}
2020-10-04 11:16:42 +08:00
func ( c * ChunkReadAt ) doFetchFullChunkData ( chunkView * ChunkView ) ( [ ] byte , error ) {
2020-10-04 16:31:04 +08:00
2020-10-05 07:21:43 +08:00
glog . V ( 4 ) . Infof ( "+ doFetchFullChunkData %s" , chunkView . FileId )
2020-10-04 16:31:04 +08:00
2020-10-04 11:16:42 +08:00
data , err := fetchChunk ( c . lookupFileId , chunkView . FileId , chunkView . CipherKey , chunkView . IsGzipped )
2020-03-29 15:54:39 +08:00
2020-10-05 07:21:43 +08:00
glog . V ( 4 ) . Infof ( "- doFetchFullChunkData %s" , chunkView . FileId )
2020-10-04 16:31:04 +08:00
2020-10-04 11:16:42 +08:00
return data , err
2020-03-27 19:50:51 +08:00
}
2021-12-20 14:43:14 +08:00
func ( c * ChunkReadAt ) doFetchRangeChunkData ( chunkView * ChunkView , offset , length uint64 ) ( [ ] byte , error ) {
glog . V ( 4 ) . Infof ( "+ doFetchFullChunkData %s" , chunkView . FileId )
data , err := fetchChunkRange ( c . lookupFileId , chunkView . FileId , chunkView . CipherKey , chunkView . IsGzipped , int64 ( offset ) , int ( length ) )
glog . V ( 4 ) . Infof ( "- doFetchFullChunkData %s" , chunkView . FileId )
return data , err
}