seaweedfs/weed/mount/weedfs_file_read.go

package mount

import (
	"bytes"
	"fmt"
	"github.com/seaweedfs/seaweedfs/weed/util"
	"io"

	"github.com/hanwen/go-fuse/v2/fuse"

	"github.com/seaweedfs/seaweedfs/weed/glog"
)

/**
 * Read data
 *
 * Read should send exactly the number of bytes requested except
 * on EOF or error, otherwise the rest of the data will be
 * substituted with zeroes.  An exception to this is when the file
 * has been opened in 'direct_io' mode, in which case the return
 * value of the read system call will reflect the return value of
 * this operation.
 *
 * fi->fh will contain the value set by the open method, or will
 * be undefined if the open method didn't set any value.
 *
 * Valid replies:
 *   fuse_reply_buf
 *   fuse_reply_iov
 *   fuse_reply_data
 *   fuse_reply_err
 *
 * @param req request handle
 * @param ino the inode number
 * @param size number of bytes to read
 * @param off offset to read from
 * @param fi file information
 */
func (wfs *WFS) Read(cancel <-chan struct{}, in *fuse.ReadIn, buff []byte) (fuse.ReadResult, fuse.Status) {
	fh := wfs.GetHandle(FileHandleId(in.Fh))
	if fh == nil {
		return nil, fuse.ENOENT
	}

	fhActiveLock := fh.wfs.fhLockTable.AcquireLock("Read", fh.fh, util.SharedLock)
	defer fh.wfs.fhLockTable.ReleaseLock(fh.fh, fhActiveLock)

	offset := int64(in.Offset)
	totalRead, err := readDataByFileHandle(buff, fh, offset)
	if err != nil {
		glog.Warningf("file handle read %s %d: %v", fh.FullPath(), totalRead, err)
		return nil, fuse.EIO
	}

	if IsDebugFileReadWrite {
		// print(".")
		mirrorData := make([]byte, totalRead)
		fh.mirrorFile.ReadAt(mirrorData, offset)
		if bytes.Compare(mirrorData, buff[:totalRead]) != 0 {

			againBuff := make([]byte, len(buff))
			againRead, _ := readDataByFileHandle(againBuff, fh, offset)
			againCorrect := bytes.Compare(mirrorData, againBuff[:againRead]) == 0
			againSame := bytes.Compare(buff[:totalRead], againBuff[:againRead]) == 0

			fmt.Printf("\ncompare %v [%d,%d) size:%d againSame:%v againCorrect:%v\n", fh.mirrorFile.Name(), offset, offset+totalRead, totalRead, againSame, againCorrect)
			//fmt.Printf("read mirrow data: %v\n", mirrorData)
			//fmt.Printf("read actual data: %v\n", againBuff[:totalRead])
		}
	}

	return fuse.ReadResultData(buff[:totalRead]), fuse.OK
}

func readDataByFileHandle(buff []byte, fhIn *FileHandle, offset int64) (int64, error) {
	// read data from source file
	size := len(buff)
	fhIn.lockForRead(offset, size)
	defer fhIn.unlockForRead(offset, size)

	n, tsNs, err := fhIn.readFromChunks(buff, offset)
	if err == nil || err == io.EOF {
		maxStop := fhIn.readFromDirtyPages(buff, offset, tsNs)
		n = max(maxStop-offset, n)
	}
	if err == io.EOF {
		err = nil
	}
	return n, err
}
add open release, refactor 2022-02-14 11:14:34 +08:00			`package mount`

			`import (`
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`"bytes"`
			`"fmt"`
mount switch to ordered lock requests 2023-09-22 02:08:26 +08:00			`"github.com/seaweedfs/seaweedfs/weed/util"`
Implement copy_file_range syscall in FUSE (#3475) See the man page of copy_file_range: https://man7.org/linux/man-pages/man2/copy_file_range.2.html 2022-08-22 08:11:43 +08:00			`"io"`

add open release, refactor 2022-02-14 11:14:34 +08:00			`"github.com/hanwen/go-fuse/v2/fuse"`
Implement copy_file_range syscall in FUSE (#3475) See the man page of copy_file_range: https://man7.org/linux/man-pages/man2/copy_file_range.2.html 2022-08-22 08:11:43 +08:00
move to https://github.com/seaweedfs/seaweedfs 2022-07-29 15:17:28 +08:00			`"github.com/seaweedfs/seaweedfs/weed/glog"`
add open release, refactor 2022-02-14 11:14:34 +08:00			`)`

			`/**`
			`* Read data`
			`*`
			`* Read should send exactly the number of bytes requested except`
			`* on EOF or error, otherwise the rest of the data will be`
			`* substituted with zeroes. An exception to this is when the file`
			`* has been opened in 'direct_io' mode, in which case the return`
			`* value of the read system call will reflect the return value of`
			`* this operation.`
			`*`
			`* fi->fh will contain the value set by the open method, or will`
			`* be undefined if the open method didn't set any value.`
			`*`
			`* Valid replies:`
			`* fuse_reply_buf`
			`* fuse_reply_iov`
			`* fuse_reply_data`
			`* fuse_reply_err`
			`*`
			`* @param req request handle`
			`* @param ino the inode number`
			`* @param size number of bytes to read`
			`* @param off offset to read from`
			`* @param fi file information`
			`*/`
support read 2022-02-14 14:50:44 +08:00			`func (wfs WFS) Read(cancel <-chan struct{}, in fuse.ReadIn, buff []byte) (fuse.ReadResult, fuse.Status) {`
			`fh := wfs.GetHandle(FileHandleId(in.Fh))`
			`if fh == nil {`
			`return nil, fuse.ENOENT`
			`}`

mount switch to ordered lock requests 2023-09-22 02:08:26 +08:00			`fhActiveLock := fh.wfs.fhLockTable.AcquireLock("Read", fh.fh, util.SharedLock)`
			`defer fh.wfs.fhLockTable.ReleaseLock(fh.fh, fhActiveLock)`
Implement copy_file_range syscall in FUSE (#3475) See the man page of copy_file_range: https://man7.org/linux/man-pages/man2/copy_file_range.2.html 2022-08-22 08:11:43 +08:00
support read 2022-02-14 14:50:44 +08:00			`offset := int64(in.Offset)`
Reuse readDataByFileHandle in Read call (#3482) 2022-08-22 23:24:06 +08:00			`totalRead, err := readDataByFileHandle(buff, fh, offset)`
support read 2022-02-14 14:50:44 +08:00			`if err != nil {`
			`glog.Warningf("file handle read %s %d: %v", fh.FullPath(), totalRead, err)`
			`return nil, fuse.EIO`
			`}`

more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`if IsDebugFileReadWrite {`
			`// print(".")`
			`mirrorData := make([]byte, totalRead)`
			`fh.mirrorFile.ReadAt(mirrorData, offset)`
			`if bytes.Compare(mirrorData, buff[:totalRead]) != 0 {`

			`againBuff := make([]byte, len(buff))`
Fix buff use (#4628) 2023-07-03 23:33:34 +08:00			`againRead, _ := readDataByFileHandle(againBuff, fh, offset)`
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`againCorrect := bytes.Compare(mirrorData, againBuff[:againRead]) == 0`
			`againSame := bytes.Compare(buff[:totalRead], againBuff[:againRead]) == 0`

			`fmt.Printf("\ncompare %v [%d,%d) size:%d againSame:%v againCorrect:%v\n", fh.mirrorFile.Name(), offset, offset+totalRead, totalRead, againSame, againCorrect)`
			`//fmt.Printf("read mirrow data: %v\n", mirrorData)`
Fix buff use (#4628) 2023-07-03 23:33:34 +08:00			`//fmt.Printf("read actual data: %v\n", againBuff[:totalRead])`
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`}`
			`}`

support read 2022-02-14 14:50:44 +08:00			`return fuse.ReadResultData(buff[:totalRead]), fuse.OK`
add open release, refactor 2022-02-14 11:14:34 +08:00			`}`
Reuse readDataByFileHandle in Read call (#3482) 2022-08-22 23:24:06 +08:00
			`func readDataByFileHandle(buff []byte, fhIn *FileHandle, offset int64) (int64, error) {`
			`// read data from source file`
			`size := len(buff)`
			`fhIn.lockForRead(offset, size)`
			`defer fhIn.unlockForRead(offset, size)`

more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`n, tsNs, err := fhIn.readFromChunks(buff, offset)`
Reuse readDataByFileHandle in Read call (#3482) 2022-08-22 23:24:06 +08:00			`if err == nil \|\| err == io.EOF {`
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`maxStop := fhIn.readFromDirtyPages(buff, offset, tsNs)`
Reuse readDataByFileHandle in Read call (#3482) 2022-08-22 23:24:06 +08:00			`n = max(maxStop-offset, n)`
			`}`
			`if err == io.EOF {`
			`err = nil`
			`}`
			`return n, err`
			`}`