seaweedfs/weed/mount/page_writer.go

package mount

import (
	"github.com/seaweedfs/seaweedfs/weed/glog"
	"github.com/seaweedfs/seaweedfs/weed/mount/page_writer"
)

type PageWriter struct {
	fh            *FileHandle
	collection    string
	replication   string
	chunkSize     int64
	writerPattern *WriterPattern

	randomWriter page_writer.DirtyPages
}

var (
	_ = page_writer.DirtyPages(&PageWriter{})
)

func newPageWriter(fh *FileHandle, chunkSize int64) *PageWriter {
	pw := &PageWriter{
		fh:            fh,
		chunkSize:     chunkSize,
		writerPattern: NewWriterPattern(chunkSize),
		randomWriter:  newMemoryChunkPages(fh, chunkSize),
	}
	return pw
}

func (pw *PageWriter) AddPage(offset int64, data []byte, isSequential bool, tsNs int64) {

	glog.V(4).Infof("%v AddPage [%d, %d)", pw.fh.fh, offset, offset+int64(len(data)))

	chunkIndex := offset / pw.chunkSize
	for i := chunkIndex; len(data) > 0; i++ {
		writeSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset)
		pw.addToOneChunk(i, offset, data[:writeSize], isSequential, tsNs)
		offset += writeSize
		data = data[writeSize:]
	}
}

func (pw *PageWriter) addToOneChunk(chunkIndex, offset int64, data []byte, isSequential bool, tsNs int64) {
	pw.randomWriter.AddPage(offset, data, isSequential, tsNs)
}

func (pw *PageWriter) FlushData() error {
	return pw.randomWriter.FlushData()
}

func (pw *PageWriter) ReadDirtyDataAt(data []byte, offset int64, tsNs int64) (maxStop int64) {
	glog.V(4).Infof("ReadDirtyDataAt %v [%d, %d)", pw.fh.fh, offset, offset+int64(len(data)))

	chunkIndex := offset / pw.chunkSize
	for i := chunkIndex; len(data) > 0; i++ {
		readSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset)

		maxStop = pw.randomWriter.ReadDirtyDataAt(data[:readSize], offset, tsNs)

		offset += readSize
		data = data[readSize:]
	}

	return
}

func (pw *PageWriter) LockForRead(startOffset, stopOffset int64) {
	pw.randomWriter.LockForRead(startOffset, stopOffset)
}

func (pw *PageWriter) UnlockForRead(startOffset, stopOffset int64) {
	pw.randomWriter.UnlockForRead(startOffset, stopOffset)
}

func (pw *PageWriter) Destroy() {
	pw.randomWriter.Destroy()
}

func max(x, y int64) int64 {
	if x > y {
		return x
	}
	return y
}
func min(x, y int64) int64 {
	if x < y {
		return x
	}
	return y
}
support read 2022-02-14 14:50:44 +08:00			`package mount`

			`import (`
move to https://github.com/seaweedfs/seaweedfs 2022-07-29 15:17:28 +08:00			`"github.com/seaweedfs/seaweedfs/weed/glog"`
			`"github.com/seaweedfs/seaweedfs/weed/mount/page_writer"`
support read 2022-02-14 14:50:44 +08:00			`)`

			`type PageWriter struct {`
write to disk during random writes, limiting total disk spaces used 2022-03-14 09:17:35 +08:00			`fh *FileHandle`
			`collection string`
			`replication string`
			`chunkSize int64`
			`writerPattern *WriterPattern`
support read 2022-02-14 14:50:44 +08:00
			`randomWriter page_writer.DirtyPages`
			`}`

			`var (`
			`_ = page_writer.DirtyPages(&PageWriter{})`
			`)`

			`func newPageWriter(fh FileHandle, chunkSize int64) PageWriter {`
			`pw := &PageWriter{`
write to disk during random writes, limiting total disk spaces used 2022-03-14 09:17:35 +08:00			`fh: fh,`
			`chunkSize: chunkSize,`
			`writerPattern: NewWriterPattern(chunkSize),`
			`randomWriter: newMemoryChunkPages(fh, chunkSize),`
support read 2022-02-14 14:50:44 +08:00			`}`
			`return pw`
			`}`

more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`func (pw *PageWriter) AddPage(offset int64, data []byte, isSequential bool, tsNs int64) {`
support read 2022-02-14 14:50:44 +08:00
file set attribute 2022-02-14 17:36:10 +08:00			`glog.V(4).Infof("%v AddPage [%d, %d)", pw.fh.fh, offset, offset+int64(len(data)))`
support read 2022-02-14 14:50:44 +08:00
			`chunkIndex := offset / pw.chunkSize`
			`for i := chunkIndex; len(data) > 0; i++ {`
			`writeSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset)`
more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`pw.addToOneChunk(i, offset, data[:writeSize], isSequential, tsNs)`
support read 2022-02-14 14:50:44 +08:00			`offset += writeSize`
			`data = data[writeSize:]`
			`}`
			`}`

more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`func (pw *PageWriter) addToOneChunk(chunkIndex, offset int64, data []byte, isSequential bool, tsNs int64) {`
			`pw.randomWriter.AddPage(offset, data, isSequential, tsNs)`
support read 2022-02-14 14:50:44 +08:00			`}`

			`func (pw *PageWriter) FlushData() error {`
			`return pw.randomWriter.FlushData()`
			`}`

more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`func (pw *PageWriter) ReadDirtyDataAt(data []byte, offset int64, tsNs int64) (maxStop int64) {`
file set attribute 2022-02-14 17:36:10 +08:00			`glog.V(4).Infof("ReadDirtyDataAt %v [%d, %d)", pw.fh.fh, offset, offset+int64(len(data)))`
support read 2022-02-14 14:50:44 +08:00
			`chunkIndex := offset / pw.chunkSize`
			`for i := chunkIndex; len(data) > 0; i++ {`
			`readSize := min(int64(len(data)), (i+1)*pw.chunkSize-offset)`

more solid weed mount (#4089) * compare chunks by timestamp * fix slab clearing error * fix test compilation * move oldest chunk to sealed, instead of by fullness * lock on fh.entryViewCache * remove verbose logs * revert slat clearing * less logs * less logs * track write and read by timestamp * remove useless logic * add entry lock on file handle release * use mem chunk only, swap file chunk has problems * comment out code that maybe used later * add debug mode to compare data read and write * more efficient readResolvedChunks with linked list * small optimization * fix test compilation * minor fix on writer * add SeparateGarbageChunks * group chunks into sections * turn off debug mode * fix tests * fix tests * tmp enable swap file chunk * Revert "tmp enable swap file chunk" This reverts commit 985137ec472924e4815f258189f6ca9f2168a0a7. * simple refactoring * simple refactoring * do not re-use swap file chunk. Sealed chunks should not be re-used. * comment out debugging facilities * either mem chunk or swap file chunk is fine now * remove orderedMutex as semaphore.Weighted not found impactful optimize size calculation for changing large files * optimize performance to avoid going through the long list of chunks * still problems with swap file chunk * rename * tiny optimization * swap file chunk save only successfully read data * fix * enable both mem and swap file chunk * resolve chunks with range * rename * fix chunk interval list * also change file handle chunk group when adding chunks * pick in-active chunk with time-decayed counter * fix compilation * avoid nil with empty fh.entry * refactoring * rename * rename * refactor visible intervals to list.List refactor chunkViews to list.List add IntervalList for generic interval list * change visible interval to use IntervalList in generics * cahnge chunkViews to IntervalList[ChunkView] * use NewFileChunkSection to create * rename variables * refactor * fix renaming leftover * renaming * renaming * add insert interval * interval list adds lock * incrementally add chunks to readers Fixes: 1. set start and stop offset for the value object 2. clone the value object 3. use pointer instead of copy-by-value when passing to interval.Value 4. use insert interval since adding chunk could be out of order * fix tests compilation * fix tests compilation 2023-01-03 15:20:45 +08:00			`maxStop = pw.randomWriter.ReadDirtyDataAt(data[:readSize], offset, tsNs)`
support read 2022-02-14 14:50:44 +08:00
			`offset += readSize`
			`data = data[readSize:]`
			`}`

			`return`
			`}`

			`func (pw *PageWriter) LockForRead(startOffset, stopOffset int64) {`
			`pw.randomWriter.LockForRead(startOffset, stopOffset)`
			`}`

			`func (pw *PageWriter) UnlockForRead(startOffset, stopOffset int64) {`
			`pw.randomWriter.UnlockForRead(startOffset, stopOffset)`
			`}`

			`func (pw *PageWriter) Destroy() {`
			`pw.randomWriter.Destroy()`
			`}`

			`func max(x, y int64) int64 {`
			`if x > y {`
			`return x`
			`}`
			`return y`
			`}`
			`func min(x, y int64) int64 {`
			`if x < y {`
			`return x`
			`}`
			`return y`
			`}`