2014-05-20 10:24:35 +08:00
package storage
import (
2014-05-20 11:54:39 +08:00
"fmt"
2014-05-20 10:24:35 +08:00
"os"
2020-02-27 16:59:35 +08:00
"runtime"
2014-09-21 03:38:59 +08:00
"time"
2014-10-27 02:34:55 +08:00
2022-07-29 15:17:28 +08:00
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/storage/backend"
idx2 "github.com/seaweedfs/seaweedfs/weed/storage/idx"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
. "github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/util"
2014-05-20 10:24:35 +08:00
)
2021-10-24 16:55:34 +08:00
type ProgressFunc func ( processed int64 ) bool
2014-05-20 10:24:35 +08:00
func ( v * Volume ) garbageLevel ( ) float64 {
2018-06-25 16:20:15 +08:00
if v . ContentSize ( ) == 0 {
return 0
}
2019-12-25 06:55:50 +08:00
deletedSize := v . DeletedSize ( )
fileSize := v . ContentSize ( )
if v . DeletedCount ( ) > 0 && v . DeletedSize ( ) == 0 {
// this happens for .sdx converted back to normal .idx
// where deleted entry size is missing
datFileSize , _ , _ := v . FileStat ( )
deletedSize = datFileSize - fileSize - super_block . SuperBlockSize
fileSize = datFileSize
}
return float64 ( deletedSize ) / float64 ( fileSize )
2014-05-20 10:24:35 +08:00
}
2019-12-25 06:55:50 +08:00
// compact a volume based on deletions in .dat files
2019-09-04 00:00:59 +08:00
func ( v * Volume ) Compact ( preallocate int64 , compactionBytePerSecond int64 ) error {
2019-09-03 22:05:43 +08:00
2019-12-25 00:13:01 +08:00
if v . MemoryMapMaxSizeMb != 0 { //it makes no sense to compact in memory
2019-09-03 22:05:43 +08:00
return nil
}
2019-12-25 00:13:01 +08:00
glog . V ( 3 ) . Infof ( "Compacting volume %d ..." , v . Id )
//no need to lock for copy on write
//v.accessLock.Lock()
//defer v.accessLock.Unlock()
//glog.V(3).Infof("Got Compaction lock...")
2024-08-19 03:08:43 +08:00
if v . isCompacting || v . isCommitCompacting {
glog . V ( 0 ) . Infof ( "Volume %d is already compacting..." , v . Id )
return nil
}
2019-12-25 00:13:01 +08:00
v . isCompacting = true
defer func ( ) {
v . isCompacting = false
} ( )
v . lastCompactIndexOffset = v . IndexFileSize ( )
v . lastCompactRevision = v . SuperBlock . CompactionRevision
glog . V ( 3 ) . Infof ( "creating copies for volume %d ,last offset %d..." , v . Id , v . lastCompactIndexOffset )
2020-03-20 14:54:52 +08:00
if err := v . DataBackend . Sync ( ) ; err != nil {
2022-10-13 15:51:20 +08:00
glog . V ( 0 ) . Infof ( "compact failed to sync volume %d" , v . Id )
2020-03-20 14:54:52 +08:00
}
2020-03-21 14:38:46 +08:00
if err := v . nm . Sync ( ) ; err != nil {
2022-10-13 15:51:20 +08:00
glog . V ( 0 ) . Infof ( "compact failed to sync volume idx %d" , v . Id )
2020-03-21 14:38:46 +08:00
}
2020-11-27 19:17:10 +08:00
return v . copyDataAndGenerateIndexFile ( v . FileName ( ".cpd" ) , v . FileName ( ".cpx" ) , preallocate , compactionBytePerSecond )
2014-05-20 10:24:35 +08:00
}
2016-09-23 11:31:17 +08:00
2019-12-25 06:55:50 +08:00
// compact a volume based on deletions in .idx files
2021-10-24 16:55:34 +08:00
func ( v * Volume ) Compact2 ( preallocate int64 , compactionBytePerSecond int64 , progressFn ProgressFunc ) error {
2019-08-12 15:53:50 +08:00
2019-12-25 00:13:01 +08:00
if v . MemoryMapMaxSizeMb != 0 { //it makes no sense to compact in memory
2019-09-03 22:05:43 +08:00
return nil
}
2019-12-25 00:13:01 +08:00
glog . V ( 3 ) . Infof ( "Compact2 volume %d ..." , v . Id )
2024-08-19 03:08:43 +08:00
if v . isCompacting || v . isCommitCompacting {
glog . V ( 0 ) . Infof ( "Volume %d is already compacting2 ..." , v . Id )
return nil
}
2019-12-25 00:13:01 +08:00
v . isCompacting = true
defer func ( ) {
v . isCompacting = false
} ( )
2019-12-25 06:55:50 +08:00
v . lastCompactIndexOffset = v . IndexFileSize ( )
v . lastCompactRevision = v . SuperBlock . CompactionRevision
2019-12-25 00:13:01 +08:00
glog . V ( 3 ) . Infof ( "creating copies for volume %d ..." , v . Id )
2022-05-30 07:59:30 +08:00
if v . DataBackend == nil {
return fmt . Errorf ( "volume %d backend is empty remote:%v" , v . Id , v . HasRemoteFile ( ) )
}
2020-03-20 14:54:52 +08:00
if err := v . DataBackend . Sync ( ) ; err != nil {
2022-10-13 15:51:20 +08:00
glog . V ( 0 ) . Infof ( "compact2 failed to sync volume dat %d: %v" , v . Id , err )
2020-03-21 14:38:46 +08:00
}
if err := v . nm . Sync ( ) ; err != nil {
2022-10-13 15:51:20 +08:00
glog . V ( 0 ) . Infof ( "compact2 failed to sync volume idx %d: %v" , v . Id , err )
2020-03-20 14:54:52 +08:00
}
2022-08-24 14:53:35 +08:00
return v . copyDataBasedOnIndexFile (
2022-07-29 14:59:33 +08:00
v . FileName ( ".dat" ) , v . FileName ( ".idx" ) ,
v . FileName ( ".cpd" ) , v . FileName ( ".cpx" ) ,
v . SuperBlock ,
v . Version ( ) ,
preallocate ,
compactionBytePerSecond ,
progressFn ,
)
2016-09-23 11:31:17 +08:00
}
2019-03-26 00:16:12 +08:00
func ( v * Volume ) CommitCompact ( ) error {
2019-12-25 00:13:01 +08:00
if v . MemoryMapMaxSizeMb != 0 { //it makes no sense to compact in memory
return nil
}
glog . V ( 0 ) . Infof ( "Committing volume %d vacuuming..." , v . Id )
2019-09-03 22:05:43 +08:00
2024-08-19 03:08:43 +08:00
if v . isCommitCompacting {
glog . V ( 0 ) . Infof ( "Volume %d is already commit compacting ..." , v . Id )
return nil
}
2022-04-27 14:28:34 +08:00
v . isCommitCompacting = true
2019-12-25 00:13:01 +08:00
defer func ( ) {
2022-04-27 14:28:34 +08:00
v . isCommitCompacting = false
2019-12-25 00:13:01 +08:00
} ( )
2019-09-03 22:05:43 +08:00
2019-12-25 00:13:01 +08:00
v . dataFileAccessLock . Lock ( )
defer v . dataFileAccessLock . Unlock ( )
2019-09-03 22:05:43 +08:00
2019-12-25 00:13:01 +08:00
glog . V ( 3 ) . Infof ( "Got volume %d committing lock..." , v . Id )
2022-08-24 14:53:35 +08:00
if v . nm != nil {
v . nm . Close ( )
v . nm = nil
}
2020-02-27 16:07:25 +08:00
if v . DataBackend != nil {
if err := v . DataBackend . Close ( ) ; err != nil {
2022-10-13 15:51:20 +08:00
glog . V ( 0 ) . Infof ( "failed to close volume %d" , v . Id )
2020-02-27 16:07:25 +08:00
}
2019-12-25 00:13:01 +08:00
}
v . DataBackend = nil
2024-04-17 19:49:50 +08:00
stats . VolumeServerVolumeGauge . WithLabelValues ( v . Collection , "volume" ) . Dec ( )
2019-12-25 00:13:01 +08:00
var e error
2020-11-27 19:17:10 +08:00
if e = v . makeupDiff ( v . FileName ( ".cpd" ) , v . FileName ( ".cpx" ) , v . FileName ( ".dat" ) , v . FileName ( ".idx" ) ) ; e != nil {
2019-12-25 00:13:01 +08:00
glog . V ( 0 ) . Infof ( "makeupDiff in CommitCompact volume %d failed %v" , v . Id , e )
2020-11-27 19:17:10 +08:00
e = os . Remove ( v . FileName ( ".cpd" ) )
2019-12-25 00:13:01 +08:00
if e != nil {
return e
2016-09-29 13:57:23 +08:00
}
2020-11-27 19:17:10 +08:00
e = os . Remove ( v . FileName ( ".cpx" ) )
2019-12-25 00:13:01 +08:00
if e != nil {
return e
}
} else {
2020-02-27 16:59:35 +08:00
if runtime . GOOS == "windows" {
2020-11-27 19:17:10 +08:00
e = os . RemoveAll ( v . FileName ( ".dat" ) )
2020-03-11 04:33:26 +08:00
if e != nil {
return e
}
2020-11-27 19:17:10 +08:00
e = os . RemoveAll ( v . FileName ( ".idx" ) )
2020-03-11 04:33:26 +08:00
if e != nil {
return e
}
2020-02-27 16:59:35 +08:00
}
2016-09-29 13:57:23 +08:00
var e error
2020-11-27 19:17:10 +08:00
if e = os . Rename ( v . FileName ( ".cpd" ) , v . FileName ( ".dat" ) ) ; e != nil {
return fmt . Errorf ( "rename %s: %v" , v . FileName ( ".cpd" ) , e )
2016-09-29 13:57:23 +08:00
}
2020-11-27 19:17:10 +08:00
if e = os . Rename ( v . FileName ( ".cpx" ) , v . FileName ( ".idx" ) ) ; e != nil {
return fmt . Errorf ( "rename %s: %v" , v . FileName ( ".cpx" ) , e )
2019-12-25 00:13:01 +08:00
}
}
2016-09-29 13:57:23 +08:00
2019-12-25 00:13:01 +08:00
//glog.V(3).Infof("Pretending to be vacuuming...")
//time.Sleep(20 * time.Second)
2018-09-27 09:45:51 +08:00
2020-11-27 19:17:10 +08:00
os . RemoveAll ( v . FileName ( ".ldb" ) )
2018-09-27 09:45:51 +08:00
2019-12-25 00:13:01 +08:00
glog . V ( 3 ) . Infof ( "Loading volume %d commit file..." , v . Id )
if e = v . load ( true , false , v . needleMapKind , 0 ) ; e != nil {
return e
2014-05-20 10:24:35 +08:00
}
2022-09-16 17:43:17 +08:00
glog . V ( 3 ) . Infof ( "Finish committing volume %d" , v . Id )
2019-12-25 02:18:56 +08:00
return nil
2014-05-20 10:24:35 +08:00
}
2014-05-20 11:54:39 +08:00
2017-08-30 14:59:53 +08:00
func ( v * Volume ) cleanupCompact ( ) error {
2018-10-19 11:47:30 +08:00
glog . V ( 0 ) . Infof ( "Cleaning up volume %d vacuuming..." , v . Id )
2017-08-30 14:59:53 +08:00
2020-11-27 19:17:10 +08:00
e1 := os . Remove ( v . FileName ( ".cpd" ) )
e2 := os . Remove ( v . FileName ( ".cpx" ) )
2022-08-24 14:53:35 +08:00
e3 := os . RemoveAll ( v . FileName ( ".cpldb" ) )
2021-05-16 00:37:39 +08:00
if e1 != nil && ! os . IsNotExist ( e1 ) {
2017-08-30 14:59:53 +08:00
return e1
}
2021-05-16 00:37:39 +08:00
if e2 != nil && ! os . IsNotExist ( e2 ) {
2017-08-30 14:59:53 +08:00
return e2
}
2022-08-24 14:53:35 +08:00
if e3 != nil && ! os . IsNotExist ( e3 ) {
return e3
}
2017-08-30 14:59:53 +08:00
return nil
}
2019-11-29 10:33:18 +08:00
func fetchCompactRevisionFromDatFile ( datBackend backend . BackendStorageFile ) ( compactRevision uint16 , err error ) {
2019-12-24 04:48:20 +08:00
superBlock , err := super_block . ReadSuperBlock ( datBackend )
2016-10-07 16:22:24 +08:00
if err != nil {
return 0 , err
}
2019-04-20 03:29:49 +08:00
return superBlock . CompactionRevision , nil
2016-10-07 16:22:24 +08:00
}
2019-12-25 06:55:50 +08:00
// if old .dat and .idx files are updated, this func tries to apply the same changes to new files accordingly
2016-09-29 13:57:23 +08:00
func ( v * Volume ) makeupDiff ( newDatFileName , newIdxFileName , oldDatFileName , oldIdxFileName string ) ( err error ) {
var indexSize int64
oldIdxFile , err := os . Open ( oldIdxFileName )
2020-12-02 15:36:49 +08:00
if err != nil {
2020-12-02 15:37:49 +08:00
return fmt . Errorf ( "makeupDiff open %s failed: %v" , oldIdxFileName , err )
2020-12-02 15:36:49 +08:00
}
2016-09-29 13:57:23 +08:00
defer oldIdxFile . Close ( )
oldDatFile , err := os . Open ( oldDatFileName )
2020-12-02 15:36:49 +08:00
if err != nil {
2020-12-02 15:37:49 +08:00
return fmt . Errorf ( "makeupDiff open %s failed: %v" , oldDatFileName , err )
2020-12-02 15:36:49 +08:00
}
2019-10-30 13:37:36 +08:00
oldDatBackend := backend . NewDiskFile ( oldDatFile )
2019-10-29 15:35:16 +08:00
defer oldDatBackend . Close ( )
2016-09-29 13:57:23 +08:00
2019-12-25 06:55:50 +08:00
// skip if the old .idx file has not changed
2016-09-29 13:57:23 +08:00
if indexSize , err = verifyIndexFileIntegrity ( oldIdxFile ) ; err != nil {
return fmt . Errorf ( "verifyIndexFileIntegrity %s failed: %v" , oldIdxFileName , err )
}
2016-10-07 16:22:24 +08:00
if indexSize == 0 || uint64 ( indexSize ) <= v . lastCompactIndexOffset {
2022-10-12 12:13:25 +08:00
return nil
2016-09-29 13:57:23 +08:00
}
2019-12-25 06:55:50 +08:00
// fail if the old .dat file has changed to a new revision
2019-10-29 15:35:16 +08:00
oldDatCompactRevision , err := fetchCompactRevisionFromDatFile ( oldDatBackend )
2016-10-07 16:22:24 +08:00
if err != nil {
2018-06-24 09:24:59 +08:00
return f mt . Errorf ( "fetchCompactRevisionFromDatFile src %s failed: %v" , oldDatFile . Name ( ) , err )
2016-10-07 16:22:24 +08:00
}
if oldDatCompactRevision != v . lastCompactRevision {
return fmt . Errorf ( "current old dat file's compact revision %d is not the expected one %d" , oldDatCompactRevision , v . lastCompactRevision )
}
type keyField struct {
2018-07-08 17:28:04 +08:00
offset Offset
2020-08-19 08:04:28 +08:00
size Size
2016-10-07 16:22:24 +08:00
}
2018-07-08 17:28:04 +08:00
incrementedHasUpdatedIndexEntry := make ( map [ NeedleId ] keyField )
2016-10-07 16:22:24 +08:00
2019-04-19 15:39:34 +08:00
for idxOffset := indexSize - NeedleMapEntrySize ; uint64 ( idxOffset ) >= v . lastCompactIndexOffset ; idxOffset -= NeedleMapEntrySize {
2016-09-29 13:57:23 +08:00
var IdxEntry [ ] byte
2019-01-17 09:17:19 +08:00
if IdxEntry , err = readIndexEntryAtOffset ( oldIdxFile , idxOffset ) ; err != nil {
return fmt . Errorf ( "readIndexEntry %s at offset %d failed: %v" , oldIdxFileName , idxOffset , err )
2016-09-29 13:57:23 +08:00
}
2019-05-22 13:41:20 +08:00
key , offset , size := idx2 . IdxFileEntry ( IdxEntry )
2018-07-07 15:48:58 +08:00
glog . V ( 4 ) . Infof ( "key %d offset %d size %d" , key , offset , size )
2016-10-07 16:22:24 +08:00
if _ , found := incrementedHasUpdatedIndexEntry [ key ] ; ! found {
incrementedHasUpdatedIndexEntry [ key ] = keyField {
2016-09-29 13:57:23 +08:00
offset : offset ,
size : size ,
}
}
}
2018-06-24 09:24:59 +08:00
// no updates during commit step
if len ( incrementedHasUpdatedIndexEntry ) == 0 {
return nil
}
2016-09-29 13:57:23 +08:00
2018-06-24 09:24:59 +08:00
// deal with updates during commit step
var (
dst , idx * os . File
)
if dst , err = os . OpenFile ( newDatFileName , os . O_RDWR , 0644 ) ; err != nil {
return fmt . Errorf ( "open dat file %s failed: %v" , newDatFileName , err )
}
2019-10-30 13:37:36 +08:00
dstDatBackend := backend . NewDiskFile ( dst )
2019-10-29 15:35:16 +08:00
defer dstDatBackend . Close ( )
2016-09-29 13:57:23 +08:00
2018-06-24 09:24:59 +08:00
if idx , err = os . OpenFile ( newIdxFileName , os . O_RDWR , 0644 ) ; err != nil {
return fmt . Errorf ( "open idx file %s failed: %v" , newIdxFileName , err )
}
2022-08-24 14:53:35 +08:00
2022-10-13 15:51:20 +08:00
defer func ( ) {
idx . Sync ( )
idx . Close ( )
} ( )
2022-08-24 14:53:35 +08:00
stat , err := idx . Stat ( )
if err != nil {
return fmt . Errorf ( "stat file %s: %v" , idx . Name ( ) , err )
}
idxSize := stat . Size ( )
2018-06-24 09:24:59 +08:00
var newDatCompactRevision uint16
2019-10-29 15:35:16 +08:00
newDatCompactRevision , err = fetchCompactRevisionFromDatFile ( dstDatBackend )
2018-06-24 09:24:59 +08:00
if err != nil {
return fmt . Errorf ( "fetchCompactRevisionFromDatFile dst %s failed: %v" , dst . Name ( ) , err )
}
if oldDatCompactRevision + 1 != newDatCompactRevision {
return fmt . Errorf ( "oldDatFile %s 's compact revision is %d while newDatFile %s 's compact revision is %d" , oldDatFileName , oldDatCompactRevision , newDatFileName , newDatCompactRevision )
}
2016-10-07 16:22:24 +08:00
2019-01-16 17:48:59 +08:00
for key , increIdxEntry := range incrementedHasUpdatedIndexEntry {
2019-07-22 04:50:24 +08:00
idxEntryBytes := needle_map . ToBytes ( key , increIdxEntry . offset , increIdxEntry . size )
2016-09-29 13:57:23 +08:00
2018-06-24 09:24:59 +08:00
var offset int64
if offset , err = dst . Seek ( 0 , 2 ) ; err != nil {
glog . V ( 0 ) . Infof ( "failed to seek the end of file: %v" , err )
return
}
//ensure file writing starting from aligned positions
if offset % NeedlePaddingSize != 0 {
offset = offset + ( NeedlePaddingSize - offset % NeedlePaddingSize )
2019-07-22 04:50:42 +08:00
if offset , err = dst . Seek ( offset , 0 ) ; err != nil {
glog . V ( 0 ) . Infof ( "failed to align in datafile %s: %v" , dst . Name ( ) , err )
2016-09-29 13:57:23 +08:00
return
}
2018-06-24 09:24:59 +08:00
}
//updated needle
2020-08-19 09:01:37 +08:00
if ! increIdxEntry . offset . IsZero ( ) && increIdxEntry . size != 0 && increIdxEntry . size . IsValid ( ) {
2018-06-24 09:24:59 +08:00
//even the needle cache in memory is hit, the need_bytes is correct
2021-02-07 12:11:51 +08:00
glog . V ( 4 ) . Infof ( "file %d offset %d size %d" , key , increIdxEntry . offset . ToActualOffset ( ) , increIdxEntry . size )
2019-01-16 17:48:59 +08:00
var needleBytes [ ] byte
2021-02-07 12:11:51 +08:00
needleBytes , err = needle . ReadNeedleBlob ( oldDatBackend , increIdxEntry . offset . ToActualOffset ( ) , increIdxEntry . size , v . Version ( ) )
2018-06-24 09:24:59 +08:00
if err != nil {
2021-02-07 12:11:51 +08:00
return fmt . Errorf ( "ReadNeedleBlob %s key %d offset %d size %d failed: %v" , oldDatFile . Name ( ) , key , increIdxEntry . offset . ToActualOffset ( ) , increIdxEntry . size , err )
2016-10-07 16:22:24 +08:00
}
2021-03-05 18:29:38 +08:00
dstDatBackend . Write ( needleBytes )
2022-02-08 03:10:53 +08:00
if err := dstDatBackend . Sync ( ) ; err != nil {
return fmt . Errorf ( "cannot sync needle %s: %v" , dstDatBackend . File . Name ( ) , err )
}
2019-01-16 17:48:59 +08:00
util . Uint32toBytes ( idxEntryBytes [ 8 : 12 ] , uint32 ( offset / NeedlePaddingSize ) )
2018-06-24 09:24:59 +08:00
} else { //deleted needle
2022-07-25 11:46:41 +08:00
//fakeDelNeedle's default Data field is nil
2019-04-19 12:43:36 +08:00
fakeDelNeedle := new ( needle . Needle )
2018-06-24 09:24:59 +08:00
fakeDelNeedle . Id = key
fakeDelNeedle . Cookie = 0x12345678
2018-07-24 17:44:33 +08:00
fakeDelNeedle . AppendAtNs = uint64 ( time . Now ( ) . UnixNano ( ) )
2019-10-29 15:35:16 +08:00
_ , _ , _ , err = fakeDelNeedle . Append ( dstDatBackend , v . Version ( ) )
2018-06-24 09:24:59 +08:00
if err != nil {
return fmt . Errorf ( "append deleted %d failed: %v" , key , err )
2016-10-07 16:22:24 +08:00
}
2019-01-16 17:48:59 +08:00
util . Uint32toBytes ( idxEntryBytes [ 8 : 12 ] , uint32 ( 0 ) )
2018-06-24 09:24:59 +08:00
}
if _ , err := idx . Seek ( 0 , 2 ) ; err != nil {
return fmt . Errorf ( "cannot seek end of indexfile %s: %v" ,
newIdxFileName , err )
2016-09-29 13:57:23 +08:00
}
2019-01-16 17:48:59 +08:00
_ , err = idx . Write ( idxEntryBytes )
2022-02-08 03:10:53 +08:00
if err != nil {
return fmt . Errorf ( "cannot write indexfile %s: %v" , newIdxFileName , err )
}
2016-09-29 13:57:23 +08:00
}
2022-10-13 13:59:07 +08:00
return v . tmpNm . DoOffsetLoading ( v , idx , uint64 ( idxSize ) / NeedleMapEntrySize )
2016-09-27 13:26:39 +08:00
}
2019-01-16 17:48:59 +08:00
type VolumeFileScanner4Vacuum struct {
2019-05-07 04:56:08 +08:00
version needle . Version
v * Volume
2019-11-29 10:33:18 +08:00
dstBackend backend . BackendStorageFile
2019-12-25 02:18:56 +08:00
nm * needle_map . MemDb
2019-05-07 04:56:08 +08:00
newOffset int64
now uint64
writeThrottler * util . WriteThrottler
2019-01-16 17:48:59 +08:00
}
2019-12-24 04:48:20 +08:00
func ( scanner * VolumeFileScanner4Vacuum ) VisitSuperBlock ( superBlock super_block . SuperBlock ) error {
scanner . version = superBlock . Version
2019-04-20 03:29:49 +08:00
superBlock . CompactionRevision ++
2019-10-29 15:35:16 +08:00
_ , err := scanner . dstBackend . WriteAt ( superBlock . Bytes ( ) , 0 )
2019-01-16 17:48:59 +08:00
scanner . newOffset = int64 ( superBlock . BlockSize ( ) )
return err
}
func ( scanner * VolumeFileScanner4Vacuum ) ReadNeedleBody ( ) bool {
return true
}
2019-10-22 15:50:30 +08:00
func ( scanner * VolumeFileScanner4Vacuum ) VisitNeedle ( n * needle . Needle , offset int64 , needleHeader , needleBody [ ] byte ) error {
2019-01-16 17:48:59 +08:00
if n . HasTtl ( ) && scanner . now >= n . LastModified + uint64 ( scanner . v . Ttl . Minutes ( ) * 60 ) {
return nil
}
nv , ok := scanner . v . nm . Get ( n . Id )
glog . V ( 4 ) . Infoln ( "needle expected offset " , offset , "ok" , ok , "nv" , nv )
2021-02-07 12:11:51 +08:00
if ok && nv . Offset . ToActualOffset ( ) == offset && nv . Size > 0 && nv . Size . IsValid ( ) {
2019-12-25 02:18:56 +08:00
if err := scanner . nm . Set ( n . Id , ToOffset ( scanner . newOffset ) , n . Size ) ; err != nil {
2019-01-16 17:48:59 +08:00
return fmt . Errorf ( "cannot put needle: %s" , err )
}
2019-10-29 15:35:16 +08:00
if _ , _ , _ , err := n . Append ( scanner . dstBackend , scanner . v . Version ( ) ) ; err != nil {
2019-01-16 17:48:59 +08:00
return fmt . Errorf ( "cannot append needle: %s" , err )
}
2019-05-04 08:22:39 +08:00
delta := n . DiskSize ( scanner . version )
scanner . newOffset += delta
2019-05-07 04:56:08 +08:00
scanner . writeThrottler . MaybeSlowdown ( delta )
2019-01-16 17:48:59 +08:00
glog . V ( 4 ) . Infoln ( "saving key" , n . Id , "volume offset" , offset , "=>" , scanner . newOffset , "data_size" , n . Size )
}
return nil
}
2019-09-04 00:00:59 +08:00
func ( v * Volume ) copyDataAndGenerateIndexFile ( dstName , idxName string , preallocate int64 , compactionBytePerSecond int64 ) ( err error ) {
2022-10-13 15:51:20 +08:00
var dst backend . BackendStorageFile
2020-04-12 05:27:25 +08:00
if dst , err = backend . CreateVolumeFile ( dstName , preallocate , 0 ) ; err != nil {
2022-02-23 16:17:48 +08:00
return err
2014-05-20 11:54:39 +08:00
}
defer dst . Close ( )
2019-12-25 02:18:56 +08:00
nm := needle_map . NewMemDb ( )
2020-03-11 04:31:23 +08:00
defer nm . Close ( )
2014-05-20 11:54:39 +08:00
2019-01-16 17:48:59 +08:00
scanner := & VolumeFileScanner4Vacuum {
2019-05-07 04:56:08 +08:00
v : v ,
now : uint64 ( time . Now ( ) . Unix ( ) ) ,
2019-12-25 02:18:56 +08:00
nm : nm ,
2019-11-09 16:10:59 +08:00
dstBackend : dst ,
2019-05-07 04:56:08 +08:00
writeThrottler : util . NewWriteThrottler ( compactionBytePerSecond ) ,
2019-01-16 17:48:59 +08:00
}
err = ScanVolumeFile ( v . dir , v . Collection , v . Id , v . needleMapKind , scanner )
2019-12-25 02:18:56 +08:00
if err != nil {
2022-02-23 16:17:48 +08:00
return err
2019-12-25 02:18:56 +08:00
}
2022-02-24 07:34:25 +08:00
return nm . SaveToIdx ( idxName )
2014-05-20 11:54:39 +08:00
}
2016-09-23 11:31:17 +08:00
2022-08-24 14:53:35 +08:00
func ( v * Volume ) copyDataBasedOnIndexFile ( srcDatName , srcIdxName , dstDatName , datIdxName string , sb super_block . SuperBlock , version needle . Version , preallocate , compactionBytePerSecond int64 , progressFn ProgressFunc ) ( err error ) {
2016-09-23 11:31:17 +08:00
var (
2020-01-22 13:18:01 +08:00
srcDatBackend , dstDatBackend backend . BackendStorageFile
2020-01-30 01:09:55 +08:00
dataFile * os . File
2016-09-23 11:31:17 +08:00
)
2020-04-12 05:27:25 +08:00
if dstDatBackend , err = backend . CreateVolumeFile ( dstDatName , preallocate , 0 ) ; err != nil {
2022-02-23 16:21:25 +08:00
return err
2016-09-23 11:31:17 +08:00
}
2024-02-24 21:27:35 +08:00
defer func ( ) {
dstDatBackend . Sync ( )
dstDatBackend . Close ( )
} ( )
2016-09-23 11:31:17 +08:00
2020-01-09 01:45:03 +08:00
oldNm := needle_map . NewMemDb ( )
2020-03-11 04:31:23 +08:00
defer oldNm . Close ( )
2020-01-09 01:45:03 +08:00
newNm := needle_map . NewMemDb ( )
2020-03-11 04:31:23 +08:00
defer newNm . Close ( )
2020-01-22 13:18:01 +08:00
if err = oldNm . LoadFromIdx ( srcIdxName ) ; err != nil {
2022-02-23 16:21:25 +08:00
return err
2016-09-23 11:31:17 +08:00
}
2020-01-22 13:18:01 +08:00
if dataFile , err = os . Open ( srcDatName ) ; err != nil {
return err
}
srcDatBackend = backend . NewDiskFile ( dataFile )
2020-03-11 04:32:22 +08:00
defer srcDatBackend . Close ( )
2019-12-25 02:18:56 +08:00
2016-09-23 11:31:17 +08:00
now := uint64 ( time . Now ( ) . Unix ( ) )
2020-01-22 13:18:01 +08:00
sb . CompactionRevision ++
dstDatBackend . WriteAt ( sb . Bytes ( ) , 0 )
newOffset := int64 ( sb . BlockSize ( ) )
2016-09-23 11:31:17 +08:00
2020-03-12 01:32:17 +08:00
writeThrottler := util . NewWriteThrottler ( compactionBytePerSecond )
2022-02-24 07:34:25 +08:00
err = oldNm . AscendingVisit ( func ( value needle_map . NeedleValue ) error {
2020-01-09 01:45:03 +08:00
offset , size := value . Offset , value . Size
2016-09-23 11:31:17 +08:00
2020-08-19 08:35:19 +08:00
if offset . IsZero ( ) || size . IsDeleted ( ) {
2016-09-23 11:31:17 +08:00
return nil
}
2021-10-24 16:55:34 +08:00
if progressFn != nil {
if ! progressFn ( offset . ToActualOffset ( ) ) {
return fmt . Errorf ( "interrupted" )
}
}
2019-04-19 12:43:36 +08:00
n := new ( needle . Needle )
2022-04-18 21:40:58 +08:00
if err := n . ReadData ( srcDatBackend , offset . ToActualOffset ( ) , size , version ) ; err != nil {
2022-02-24 15:54:36 +08:00
return fmt . Errorf ( "cannot hydrate needle from file: %s" , err )
2019-03-19 20:34:43 +08:00
}
2016-09-23 11:31:17 +08:00
2020-01-22 13:18:01 +08:00
if n . HasTtl ( ) && now >= n . LastModified + uint64 ( sb . Ttl . Minutes ( ) * 60 ) {
2016-09-23 11:31:17 +08:00
return nil
}
2020-01-09 01:45:03 +08:00
if err = newNm . Set ( n . Id , ToOffset ( newOffset ) , n . Size ) ; err != nil {
return fmt . Errorf ( "cannot put needle: %s" , err )
2016-09-23 11:31:17 +08:00
}
2020-01-22 13:18:01 +08:00
if _ , _ , _ , err = n . Append ( dstDatBackend , sb . Version ) ; err != nil {
2020-01-09 01:45:03 +08:00
return fmt . Errorf ( "cannot append needle: %s" , err )
}
2020-03-12 01:32:17 +08:00
delta := n . DiskSize ( version )
newOffset += delta
writeThrottler . MaybeSlowdown ( delta )
2020-03-11 04:33:26 +08:00
glog . V ( 4 ) . Infoln ( "saving key" , n . Id , "volume offset" , offset , "=>" , newOffset , "data_size" , n . Size )
2020-01-09 01:45:03 +08:00
2016-09-23 11:31:17 +08:00
return nil
} )
2022-02-24 07:34:25 +08:00
if err != nil {
return err
}
2024-05-13 02:31:34 +08:00
if v . Ttl . String ( ) == "" {
dstDatSize , _ , err := dstDatBackend . GetStat ( )
if err != nil {
return err
}
if v . nm . ContentSize ( ) > v . nm . DeletedSize ( ) {
expectedContentSize := v . nm . ContentSize ( ) - v . nm . DeletedSize ( )
if expectedContentSize > uint64 ( dstDatSize ) {
return fmt . Errorf ( "volume %s unexpected new data size: %d does not match size of content minus deleted: %d" ,
v . Id . String ( ) , dstDatSize , expectedContentSize )
}
} else {
glog . Warningf ( "volume %s content size: %d less deleted size: %d, new size: %d" ,
v . Id . String ( ) , v . nm . ContentSize ( ) , v . nm . DeletedSize ( ) , dstDatSize )
2024-02-24 21:27:35 +08:00
}
}
2022-08-24 14:53:35 +08:00
err = newNm . SaveToIdx ( datIdxName )
if err != nil {
return err
}
indexFile , err := os . OpenFile ( datIdxName , os . O_RDWR | os . O_CREATE , 0644 )
if err != nil {
glog . Errorf ( "cannot open Volume Index %s: %v" , datIdxName , err )
return err
}
2022-10-13 15:51:20 +08:00
defer func ( ) {
indexFile . Sync ( )
indexFile . Close ( )
} ( )
2022-08-24 14:53:35 +08:00
if v . tmpNm != nil {
v . tmpNm . Close ( )
v . tmpNm = nil
}
if v . needleMapKind == NeedleMapInMemory {
2019-12-25 02:18:56 +08:00
2022-08-24 14:53:35 +08:00
nm := & NeedleMap {
m : needle_map . NewCompactMap ( ) ,
}
v . tmpNm = nm
//can be optimized, filling nm in oldNm.AscendingVisit
err = v . tmpNm . DoOffsetLoading ( nil , indexFile , 0 )
return err
} else {
dbFileName := v . FileName ( ".ldb" )
m := & LevelDbNeedleMap { dbFileName : dbFileName }
m . dbFileName = dbFileName
mm := & mapMetric { }
m . mapMetric = * mm
v . tmpNm = m
err = v . tmpNm . DoOffsetLoading ( v , indexFile , 0 )
if err != nil {
return err
}
}
return
2016-09-23 11:31:17 +08:00
}