fix: recreate index include deleted files (#5579)

* fix: recreate index include deleted files
https://github.com/seaweedfs/seaweedfs/issues/5508

* fix: counting the number of files

* fix: log
This commit is contained in:
Konstantin Lebedev 2024-05-12 23:31:34 +05:00 committed by GitHub
parent 731b3aadbe
commit d389c5b27e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 64 additions and 31 deletions

View File

@ -32,12 +32,15 @@ var cmdFix = &Command{
var (
fixVolumeCollection = cmdFix.Flag.String("collection", "", "an optional volume collection name, if specified only it will be processed")
fixVolumeId = cmdFix.Flag.Int64("volumeId", 0, "an optional volume id, if not 0 (default) only it will be processed")
fixIncludeDeleted = cmdFix.Flag.Bool("includeDeleted", true, "include deleted entries in the index file")
fixIgnoreError = cmdFix.Flag.Bool("ignoreError", false, "an optional, if true will be processed despite errors")
)
type VolumeFileScanner4Fix struct {
version needle.Version
nm *needle_map.MemDb
version needle.Version
nm *needle_map.MemDb
nmDeleted *needle_map.MemDb
includeDeleted bool
}
func (scanner *VolumeFileScanner4Fix) VisitSuperBlock(superBlock super_block.SuperBlock) error {
@ -50,13 +53,20 @@ func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool {
}
func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
glog.V(2).Infof("key %d offset %d size %d disk_size %d compressed %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed())
glog.V(2).Infof("key %v offset %d size %d disk_size %d compressed %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsCompressed())
if n.Size.IsValid() {
pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size)
glog.V(2).Infof("saved %d with error %v", n.Size, pe)
if pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size); pe != nil {
return fmt.Errorf("saved %d with error %v", n.Size, pe)
}
} else {
glog.V(2).Infof("skipping deleted file ...")
return scanner.nm.Delete(n.Id)
if scanner.includeDeleted {
if pe := scanner.nmDeleted.Set(n.Id, types.ToOffset(offset), types.TombstoneFileSize); pe != nil {
return fmt.Errorf("saved deleted %d with error %v", n.Size, pe)
}
} else {
glog.V(2).Infof("skipping deleted file ...")
return scanner.nm.Delete(n.Id)
}
}
return nil
}
@ -109,21 +119,45 @@ func runFix(cmd *Command, args []string) bool {
if *fixVolumeId != 0 && *fixVolumeId != volumeId {
continue
}
doFixOneVolume(basePath, baseFileName, collection, volumeId)
doFixOneVolume(basePath, baseFileName, collection, volumeId, *fixIncludeDeleted)
}
}
return true
}
func doFixOneVolume(basepath string, baseFileName string, collection string, volumeId int64) {
func SaveToIdx(scaner *VolumeFileScanner4Fix, idxName string) (ret error) {
idxFile, err := os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return
}
defer func() {
idxFile.Close()
}()
return scaner.nm.AscendingVisit(func(value needle_map.NeedleValue) error {
_, err := idxFile.Write(value.ToBytes())
if scaner.includeDeleted && err == nil {
if deleted, ok := scaner.nmDeleted.Get(value.Key); ok {
_, err = idxFile.Write(deleted.ToBytes())
}
}
return err
})
}
func doFixOneVolume(basepath string, baseFileName string, collection string, volumeId int64, fixIncludeDeleted bool) {
indexFileName := path.Join(basepath, baseFileName+".idx")
nm := needle_map.NewMemDb()
nmDeleted := needle_map.NewMemDb()
defer nm.Close()
defer nmDeleted.Close()
vid := needle.VolumeId(volumeId)
scanner := &VolumeFileScanner4Fix{
nm: nm,
nm: nm,
nmDeleted: nmDeleted,
includeDeleted: fixIncludeDeleted,
}
if err := storage.ScanVolumeFile(basepath, collection, vid, storage.NeedleMapInMemory, scanner); err != nil {
@ -135,12 +169,12 @@ func doFixOneVolume(basepath string, baseFileName string, collection string, vol
}
}
if err := nm.SaveToIdx(indexFileName); err != nil {
os.Remove(indexFileName)
if err := SaveToIdx(scanner, indexFileName); err != nil {
err := fmt.Errorf("save to .idx File: %v", err)
if *fixIgnoreError {
glog.Error(err)
} else {
os.Remove(indexFileName)
glog.Fatal(err)
}
}

View File

@ -36,8 +36,8 @@ func LoadCompactNeedleMap(file *os.File) (*NeedleMap, error) {
func doLoading(file *os.File, nm *NeedleMap) (*NeedleMap, error) {
e := idx.WalkIndexFile(file, 0, func(key NeedleId, offset Offset, size Size) error {
nm.MaybeSetMaxFileKey(key)
nm.FileCounter++
if !offset.IsZero() && size.IsValid() {
nm.FileCounter++
nm.FileByteCounter = nm.FileByteCounter + uint64(size)
oldOffset, oldSize := nm.m.Set(NeedleId(key), offset, size)
if !oldOffset.IsZero() && oldSize.IsValid() {
@ -51,7 +51,7 @@ func doLoading(file *os.File, nm *NeedleMap) (*NeedleMap, error) {
}
return nil
})
glog.V(1).Infof("max file key: %d for file: %s", nm.MaxFileKey(), file.Name())
glog.V(1).Infof("max file key: %v count: %d deleted: %d for file: %s", nm.MaxFileKey(), nm.FileCount(), nm.DeletedCount(), file.Name())
return nm, e
}

View File

@ -109,9 +109,6 @@ func verifyNeedleIntegrity(datFile backend.BackendStorageFile, v needle.Version,
return 0, fmt.Errorf("verifyNeedleIntegrity check %s entry offset %d size %d: %v", datFile.Name(), offset, size, err)
}
n.AppendAtNs = util.BytesToUint64(bytes)
if n.HasTtl() {
return n.AppendAtNs, nil
}
fileTailOffset := offset + needle.GetActualSize(size, v)
fileSize, _, err := datFile.GetStat()
if err != nil {
@ -130,7 +127,7 @@ func verifyNeedleIntegrity(datFile backend.BackendStorageFile, v needle.Version,
return n.AppendAtNs, fmt.Errorf("read data [%d,%d) : %v", offset, offset+int64(size), err)
}
if n.Id != key {
return n.AppendAtNs, fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id)
return n.AppendAtNs, fmt.Errorf("index key %v does not match needle's Id %v", key, n.Id)
}
return n.AppendAtNs, err
}
@ -147,7 +144,7 @@ func verifyDeletedNeedleIntegrity(datFile backend.BackendStorageFile, v needle.V
return n.AppendAtNs, fmt.Errorf("read data [%d,%d) : %v", fileSize-size, size, err)
}
if n.Id != key {
return n.AppendAtNs, fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id)
return n.AppendAtNs, fmt.Errorf("index key %v does not match needle's Id %v", key, n.Id)
}
return n.AppendAtNs, err
}

View File

@ -487,19 +487,21 @@ func (v *Volume) copyDataBasedOnIndexFile(srcDatName, srcIdxName, dstDatName, da
if err != nil {
return err
}
dstDatSize, _, err := dstDatBackend.GetStat()
if err != nil {
return err
}
if v.nm.ContentSize() > v.nm.DeletedSize() {
expectedContentSize := v.nm.ContentSize() - v.nm.DeletedSize()
if expectedContentSize > uint64(dstDatSize) {
return fmt.Errorf("volume %s unexpected new data size: %d does not match size of content minus deleted: %d",
v.Id.String(), dstDatSize, expectedContentSize)
if v.Ttl.String() == "" {
dstDatSize, _, err := dstDatBackend.GetStat()
if err != nil {
return err
}
if v.nm.ContentSize() > v.nm.DeletedSize() {
expectedContentSize := v.nm.ContentSize() - v.nm.DeletedSize()
if expectedContentSize > uint64(dstDatSize) {
return fmt.Errorf("volume %s unexpected new data size: %d does not match size of content minus deleted: %d",
v.Id.String(), dstDatSize, expectedContentSize)
}
} else {
glog.Warningf("volume %s content size: %d less deleted size: %d, new size: %d",
v.Id.String(), v.nm.ContentSize(), v.nm.DeletedSize(), dstDatSize)
}
} else {
glog.Warningf("volume %s content size: %d less deleted size: %d, new size: %d",
v.Id.String(), v.nm.ContentSize(), v.nm.DeletedSize(), dstDatSize)
}
err = newNm.SaveToIdx(datIdxName)
if err != nil {