2019-03-24 02:34:09 +08:00
package shell
import (
"context"
2020-09-21 00:27:34 +08:00
"flag"
2019-03-24 02:34:09 +08:00
"fmt"
2024-04-23 21:33:50 +08:00
"io"
"path/filepath"
"strconv"
"time"
2022-07-29 15:17:28 +08:00
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
2023-09-28 14:12:10 +08:00
"github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
2022-07-29 15:17:28 +08:00
"github.com/seaweedfs/seaweedfs/weed/storage/types"
2022-04-18 10:35:43 +08:00
"golang.org/x/exp/slices"
2023-09-28 14:12:10 +08:00
"google.golang.org/grpc"
2019-12-24 04:48:20 +08:00
2022-07-29 15:17:28 +08:00
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
2019-03-24 02:34:09 +08:00
)
func init ( ) {
2019-06-05 16:30:24 +08:00
Commands = append ( Commands , & commandVolumeFixReplication { } )
2019-03-24 02:34:09 +08:00
}
type commandVolumeFixReplication struct {
2021-01-27 14:30:37 +08:00
collectionPattern * string
2019-03-24 02:34:09 +08:00
}
func ( c * commandVolumeFixReplication ) Name ( ) string {
return "volume.fix.replication"
}
func ( c * commandVolumeFixReplication ) Help ( ) string {
2021-08-30 13:19:25 +08:00
return ` add or remove replicas to volumes that are missing replicas or over - replicated
2019-03-24 02:34:09 +08:00
2020-09-08 07:00:10 +08:00
This command finds all over - replicated volumes . If found , it will purge the oldest copies and stop .
This command also finds all under - replicated volumes , and finds volume servers with free slots .
2019-03-24 02:54:26 +08:00
If the free slots satisfy the replication requirement , the volume content is copied over and mounted .
2021-01-27 14:30:37 +08:00
volume . fix . replication - n # do not take action
volume . fix . replication # actually deleting or copying the volume files and mount the volume
volume . fix . replication - collectionPattern = important * # fix any collections with prefix "important"
2019-03-24 02:54:26 +08:00
2019-03-24 03:57:35 +08:00
Note :
2021-01-27 16:48:31 +08:00
* each time this will only add back one replica for each volume id that is under replicated .
If there are multiple replicas are missing , e . g . replica count is > 2 , you may need to run this multiple times .
2020-09-08 02:31:33 +08:00
* do not run this too quickly within seconds , since the new volume replica may take a few seconds
2019-03-24 03:57:35 +08:00
to register itself to the master .
2019-03-24 02:34:09 +08:00
`
}
2024-09-30 01:51:17 +08:00
func ( c * commandVolumeFixReplication ) HasTag ( tag CommandTag ) bool {
2024-09-30 02:32:18 +08:00
return false && tag == ResourceHeavy // resource intensive only when deleting and checking with replicas.
2024-09-29 11:22:57 +08:00
}
2019-06-05 16:30:24 +08:00
func ( c * commandVolumeFixReplication ) Do ( args [ ] string , commandEnv * CommandEnv , writer io . Writer ) ( err error ) {
2019-03-24 02:34:09 +08:00
2020-09-21 00:27:34 +08:00
volFixReplicationCommand := flag . NewFlagSet ( c . Name ( ) , flag . ContinueOnError )
2021-01-27 14:30:37 +08:00
c . collectionPattern = volFixReplicationCommand . String ( "collectionPattern" , "" , "match with wildcard characters '*' and '?'" )
2020-09-21 00:27:34 +08:00
skipChange := volFixReplicationCommand . Bool ( "n" , false , "skip the changes" )
2023-09-28 14:12:10 +08:00
doDelete := volFixReplicationCommand . Bool ( "doDelete" , true , "Also delete over-replicated volumes besides fixing under-replication" )
doCheck := volFixReplicationCommand . Bool ( "doCheck" , true , "Also check synchronization before deleting" )
2023-02-23 02:47:52 +08:00
retryCount := volFixReplicationCommand . Int ( "retry" , 5 , "how many times to retry" )
2021-10-01 03:17:54 +08:00
volumesPerStep := volFixReplicationCommand . Int ( "volumesPerStep" , 0 , "how many volumes to fix in one cycle" )
2021-09-30 23:24:24 +08:00
2020-09-21 00:27:34 +08:00
if err = volFixReplicationCommand . Parse ( args ) ; err != nil {
return nil
2019-03-24 02:34:09 +08:00
}
2021-12-11 05:24:38 +08:00
if err = commandEnv . confirmIsLocked ( args ) ; err != nil {
2021-09-14 13:13:34 +08:00
return
}
2020-09-21 00:27:34 +08:00
takeAction := ! * skipChange
2021-09-30 23:24:24 +08:00
underReplicatedVolumeIdsCount := 1
for underReplicatedVolumeIdsCount > 0 {
2021-10-01 21:51:22 +08:00
fixedVolumeReplicas := map [ string ] int { }
2021-09-30 23:24:24 +08:00
// collect topology information
2022-02-08 16:53:55 +08:00
topologyInfo , _ , err := collectTopologyInfo ( commandEnv , 15 * time . Second )
2021-09-30 23:24:24 +08:00
if err != nil {
return err
}
2019-03-24 02:34:09 +08:00
2021-09-30 23:24:24 +08:00
// find all volumes that needs replication
// collect all data nodes
volumeReplicas , allLocations := collectVolumeReplicaLocations ( topologyInfo )
2020-09-10 02:21:23 +08:00
2021-09-30 23:24:24 +08:00
if len ( allLocations ) == 0 {
return fmt . Errorf ( "no data nodes at all" )
2019-03-24 02:34:09 +08:00
}
2021-09-30 23:24:24 +08:00
// find all under replicated volumes
2021-12-06 08:56:25 +08:00
var underReplicatedVolumeIds , overReplicatedVolumeIds , misplacedVolumeIds [ ] uint32
2021-09-30 23:24:24 +08:00
for vid , replicas := range volumeReplicas {
replica := replicas [ 0 ]
replicaPlacement , _ := super_block . NewReplicaPlacementFromByte ( byte ( replica . info . ReplicaPlacement ) )
2023-09-26 15:20:48 +08:00
switch {
2024-09-26 23:34:13 +08:00
case replicaPlacement . GetCopyCount ( ) > len ( replicas ) || ! satisfyReplicaCurrentLocation ( replicaPlacement , replicas ) :
2021-09-30 23:24:24 +08:00
underReplicatedVolumeIds = append ( underReplicatedVolumeIds , vid )
2023-09-26 15:20:48 +08:00
case isMisplaced ( replicas , replicaPlacement ) :
misplacedVolumeIds = append ( misplacedVolumeIds , vid )
2023-10-02 23:29:09 +08:00
fmt . Fprintf ( writer , "volume %d replication %s is not well placed %s\n" , replica . info . Id , replicaPlacement , replica . location . dataNode . Id )
2023-09-26 15:20:48 +08:00
case replicaPlacement . GetCopyCount ( ) < len ( replicas ) :
2021-09-30 23:24:24 +08:00
overReplicatedVolumeIds = append ( overReplicatedVolumeIds , vid )
fmt . Fprintf ( writer , "volume %d replication %s, but over replicated %+d\n" , replica . info . Id , replicaPlacement , len ( replicas ) )
}
}
2020-09-08 07:00:10 +08:00
2022-08-23 05:12:23 +08:00
if ! commandEnv . isLocked ( ) {
return fmt . Errorf ( "lock is lost" )
}
2023-09-28 14:12:10 +08:00
if len ( overReplicatedVolumeIds ) > 0 && * doDelete {
if err := c . deleteOneVolume ( commandEnv , writer , takeAction , * doCheck , overReplicatedVolumeIds , volumeReplicas , allLocations , pickOneReplicaToDelete ) ; err != nil {
2021-12-06 08:56:25 +08:00
return err
}
}
2023-09-28 14:12:10 +08:00
if len ( misplacedVolumeIds ) > 0 && * doDelete {
if err := c . deleteOneVolume ( commandEnv , writer , takeAction , * doCheck , misplacedVolumeIds , volumeReplicas , allLocations , pickOneMisplacedVolume ) ; err != nil {
2021-09-30 23:24:24 +08:00
return err
}
}
2019-03-24 02:34:09 +08:00
2021-09-30 23:24:24 +08:00
underReplicatedVolumeIdsCount = len ( underReplicatedVolumeIds )
if underReplicatedVolumeIdsCount > 0 {
2024-11-20 00:31:33 +08:00
// find the most underpopulated data nodes
2021-10-02 03:10:11 +08:00
fixedVolumeReplicas , err = c . fixUnderReplicatedVolumes ( commandEnv , writer , takeAction , underReplicatedVolumeIds , volumeReplicas , allLocations , * retryCount , * volumesPerStep )
2021-10-01 21:51:22 +08:00
if err != nil {
2021-09-30 23:24:24 +08:00
return err
}
}
2020-09-08 02:31:33 +08:00
2021-09-30 23:24:24 +08:00
if * skipChange {
break
}
2021-10-01 21:51:22 +08:00
// check that the topology has been updated
if len ( fixedVolumeReplicas ) > 0 {
fixedVolumes := make ( [ ] string , 0 , len ( fixedVolumeReplicas ) )
for k , _ := range fixedVolumeReplicas {
fixedVolumes = append ( fixedVolumes , k )
}
2021-10-02 03:10:11 +08:00
volumeIdLocations , err := lookupVolumeIds ( commandEnv , fixedVolumes )
2021-10-01 21:51:22 +08:00
if err != nil {
return err
}
for _ , volumeIdLocation := range volumeIdLocations {
volumeId := volumeIdLocation . VolumeOrFileId
volumeIdLocationCount := len ( volumeIdLocation . Locations )
i := 0
for fixedVolumeReplicas [ volumeId ] >= volumeIdLocationCount {
fmt . Fprintf ( writer , "the number of locations for volume %s has not increased yet, let's wait\n" , volumeId )
time . Sleep ( time . Duration ( i + 1 ) * time . Second * 7 )
2021-10-02 03:10:11 +08:00
volumeLocIds , err := lookupVolumeIds ( commandEnv , [ ] string { volumeId } )
2021-10-01 21:51:22 +08:00
if err != nil {
return err
}
volumeIdLocationCount = len ( volumeLocIds [ 0 ] . Locations )
2022-06-03 23:45:29 +08:00
if * retryCount <= i {
2021-10-01 21:51:22 +08:00
return fmt . Errorf ( "replicas volume %s mismatch in topology" , volumeId )
}
i += 1
}
}
}
2021-09-30 23:24:24 +08:00
}
return nil
2020-09-08 02:31:33 +08:00
}
2021-02-22 16:28:42 +08:00
func collectVolumeReplicaLocations ( topologyInfo * master_pb . TopologyInfo ) ( map [ uint32 ] [ ] * VolumeReplica , [ ] location ) {
2020-09-11 15:29:25 +08:00
volumeReplicas := make ( map [ uint32 ] [ ] * VolumeReplica )
var allLocations [ ] location
2024-11-19 22:33:18 +08:00
eachDataNode ( topologyInfo , func ( dc DataCenterId , rack RackId , dn * master_pb . DataNodeInfo ) {
loc := newLocation ( string ( dc ) , string ( rack ) , dn )
2021-02-16 18:47:02 +08:00
for _ , diskInfo := range dn . DiskInfos {
for _ , v := range diskInfo . VolumeInfos {
volumeReplicas [ v . Id ] = append ( volumeReplicas [ v . Id ] , & VolumeReplica {
location : & loc ,
info : v ,
} )
}
2020-09-11 15:29:25 +08:00
}
allLocations = append ( allLocations , loc )
} )
return volumeReplicas , allLocations
}
2021-12-06 08:56:25 +08:00
type SelectOneVolumeFunc func ( replicas [ ] * VolumeReplica , replicaPlacement * super_block . ReplicaPlacement ) * VolumeReplica
2023-09-28 14:12:10 +08:00
func checkOneVolume ( a * VolumeReplica , b * VolumeReplica , writer io . Writer , grpcDialOption grpc . DialOption ) ( err error ) {
aDB , bDB := needle_map . NewMemDb ( ) , needle_map . NewMemDb ( )
defer func ( ) {
aDB . Close ( )
bDB . Close ( )
} ( )
// read index db
readIndexDbCutoffFrom := uint64 ( time . Now ( ) . UnixNano ( ) )
if err = readIndexDatabase ( aDB , a . info . Collection , a . info . Id , pb . NewServerAddressFromDataNode ( a . location . dataNode ) , false , writer , grpcDialOption ) ; err != nil {
return fmt . Errorf ( "readIndexDatabase %s volume %d: %v" , a . location . dataNode , a . info . Id , err )
}
if err := readIndexDatabase ( bDB , b . info . Collection , b . info . Id , pb . NewServerAddressFromDataNode ( b . location . dataNode ) , false , writer , grpcDialOption ) ; err != nil {
return fmt . Errorf ( "readIndexDatabase %s volume %d: %v" , b . location . dataNode , b . info . Id , err )
}
if _ , err = doVolumeCheckDisk ( aDB , bDB , a , b , false , writer , true , false , float64 ( 1 ) , readIndexDbCutoffFrom , grpcDialOption ) ; err != nil {
return fmt . Errorf ( "doVolumeCheckDisk source:%s target:%s volume %d: %v" , a . location . dataNode . Id , b . location . dataNode . Id , a . info . Id , err )
}
return
}
func ( c * commandVolumeFixReplication ) deleteOneVolume ( commandEnv * CommandEnv , writer io . Writer , takeAction bool , doCheck bool , overReplicatedVolumeIds [ ] uint32 , volumeReplicas map [ uint32 ] [ ] * VolumeReplica , allLocations [ ] location , selectOneVolumeFn SelectOneVolumeFunc ) error {
2020-09-08 07:00:10 +08:00
for _ , vid := range overReplicatedVolumeIds {
replicas := volumeReplicas [ vid ]
replicaPlacement , _ := super_block . NewReplicaPlacementFromByte ( byte ( replicas [ 0 ] . info . ReplicaPlacement ) )
2021-12-06 08:56:25 +08:00
replica := selectOneVolumeFn ( replicas , replicaPlacement )
2020-09-08 07:00:10 +08:00
2021-01-27 14:30:37 +08:00
// check collection name pattern
if * c . collectionPattern != "" {
matched , err := filepath . Match ( * c . collectionPattern , replica . info . Collection )
if err != nil {
return fmt . Errorf ( "match pattern %s with collection %s: %v" , * c . collectionPattern , replica . info . Collection , err )
}
if ! matched {
break
}
}
2022-05-16 16:23:23 +08:00
collectionIsMismatch := false
2022-05-16 16:07:05 +08:00
for _ , volumeReplica := range replicas {
if volumeReplica . info . Collection != replica . info . Collection {
fmt . Fprintf ( writer , "skip delete volume %d as collection %s is mismatch: %s\n" , replica . info . Id , replica . info . Collection , volumeReplica . info . Collection )
2022-05-16 16:23:23 +08:00
collectionIsMismatch = true
2022-05-16 16:07:05 +08:00
}
}
2022-05-16 16:23:23 +08:00
if collectionIsMismatch {
2022-05-16 16:16:27 +08:00
continue
}
2022-05-16 16:07:05 +08:00
2020-09-08 07:00:10 +08:00
fmt . Fprintf ( writer , "deleting volume %d from %s ...\n" , replica . info . Id , replica . location . dataNode . Id )
if ! takeAction {
break
}
2023-09-28 14:12:10 +08:00
if doCheck {
for _ , replicaB := range replicas {
if replicaB . location . dataNode == replica . location . dataNode {
continue
}
if err := checkOneVolume ( replica , replicaB , writer , commandEnv . option . GrpcDialOption ) ; err != nil {
return fmt . Errorf ( "sync volume %d on %s and %s: %v\n" , replica . info . Id , replica . location . dataNode . Id , replicaB . location . dataNode . Id , err )
}
}
}
2023-06-13 01:42:44 +08:00
if err := deleteVolume ( commandEnv . option . GrpcDialOption , needle . VolumeId ( replica . info . Id ) ,
pb . NewServerAddressFromDataNode ( replica . location . dataNode ) , false ) ; err != nil {
2020-09-08 07:00:10 +08:00
return fmt . Errorf ( "deleting volume %d from %s : %v" , replica . info . Id , replica . location . dataNode . Id , err )
}
}
return nil
}
2021-10-02 03:10:11 +08:00
func ( c * commandVolumeFixReplication ) fixUnderReplicatedVolumes ( commandEnv * CommandEnv , writer io . Writer , takeAction bool , underReplicatedVolumeIds [ ] uint32 , volumeReplicas map [ uint32 ] [ ] * VolumeReplica , allLocations [ ] location , retryCount int , volumesPerStep int ) ( fixedVolumes map [ string ] int , err error ) {
2021-10-01 21:51:22 +08:00
fixedVolumes = map [ string ] int { }
2021-10-01 03:17:54 +08:00
if len ( underReplicatedVolumeIds ) > volumesPerStep && volumesPerStep > 0 {
underReplicatedVolumeIds = underReplicatedVolumeIds [ 0 : volumesPerStep ]
2021-09-30 23:24:24 +08:00
}
2020-09-08 03:35:02 +08:00
for _ , vid := range underReplicatedVolumeIds {
2021-07-22 05:38:12 +08:00
for i := 0 ; i < retryCount + 1 ; i ++ {
2021-07-17 03:13:46 +08:00
if err = c . fixOneUnderReplicatedVolume ( commandEnv , writer , takeAction , volumeReplicas , vid , allLocations ) ; err == nil {
2021-10-01 21:51:22 +08:00
if takeAction {
fixedVolumes [ strconv . FormatUint ( uint64 ( vid ) , 10 ) ] = len ( volumeReplicas [ vid ] )
}
2021-08-04 00:32:55 +08:00
break
2023-08-09 11:35:21 +08:00
} else {
fmt . Fprintf ( writer , "fixing under replicated volume %d: %v\n" , vid , err )
2021-07-17 03:13:46 +08:00
}
2021-07-17 02:46:04 +08:00
}
}
2021-10-02 03:10:11 +08:00
return fixedVolumes , nil
2021-07-17 02:46:04 +08:00
}
2019-03-24 02:34:09 +08:00
2021-07-17 02:46:04 +08:00
func ( c * commandVolumeFixReplication ) fixOneUnderReplicatedVolume ( commandEnv * CommandEnv , writer io . Writer , takeAction bool , volumeReplicas map [ uint32 ] [ ] * VolumeReplica , vid uint32 , allLocations [ ] location ) error {
replicas := volumeReplicas [ vid ]
replica := pickOneReplicaToCopyFrom ( replicas )
replicaPlacement , _ := super_block . NewReplicaPlacementFromByte ( byte ( replica . info . ReplicaPlacement ) )
foundNewLocation := false
hasSkippedCollection := false
keepDataNodesSorted ( allLocations , types . ToDiskType ( replica . info . DiskType ) )
fn := capacityByFreeVolumeCount ( types . ToDiskType ( replica . info . DiskType ) )
for _ , dst := range allLocations {
// check whether data nodes satisfy the constraints
if fn ( dst . dataNode ) > 0 && satisfyReplicaPlacement ( replicaPlacement , replicas , dst ) {
// check collection name pattern
if * c . collectionPattern != "" {
matched , err := filepath . Match ( * c . collectionPattern , replica . info . Collection )
if err != nil {
return fmt . Errorf ( "match pattern %s with collection %s: %v" , * c . collectionPattern , replica . info . Collection , err )
}
if ! matched {
hasSkippedCollection = true
2019-03-24 02:34:09 +08:00
break
}
2021-07-17 02:46:04 +08:00
}
2019-03-24 02:34:09 +08:00
2021-07-17 02:46:04 +08:00
// ask the volume server to replicate the volume
foundNewLocation = true
fmt . Fprintf ( writer , "replicating volume %d %s from %s to dataNode %s ...\n" , replica . info . Id , replicaPlacement , replica . location . dataNode . Id , dst . dataNode . Id )
2019-03-24 02:34:09 +08:00
2021-07-17 02:46:04 +08:00
if ! takeAction {
2024-04-08 22:30:04 +08:00
// adjust volume count
2024-04-23 21:33:50 +08:00
addVolumeCount ( dst . dataNode . DiskInfos [ replica . info . DiskType ] , 1 )
2021-07-17 02:46:04 +08:00
break
}
2021-12-26 16:15:03 +08:00
err := operation . WithVolumeServerClient ( false , pb . NewServerAddressFromDataNode ( dst . dataNode ) , commandEnv . option . GrpcDialOption , func ( volumeServerClient volume_server_pb . VolumeServerClient ) error {
2021-10-24 17:52:56 +08:00
stream , replicateErr := volumeServerClient . VolumeCopy ( context . Background ( ) , & volume_server_pb . VolumeCopyRequest {
2021-07-17 02:46:04 +08:00
VolumeId : replica . info . Id ,
2021-09-13 13:47:52 +08:00
SourceDataNode : string ( pb . NewServerAddressFromDataNode ( replica . location . dataNode ) ) ,
2021-07-17 02:46:04 +08:00
} )
if replicateErr != nil {
return fmt . Errorf ( "copying from %s => %s : %v" , replica . location . dataNode . Id , dst . dataNode . Id , replicateErr )
2019-03-24 02:34:09 +08:00
}
2021-10-24 17:52:56 +08:00
for {
resp , recvErr := stream . Recv ( )
if recvErr != nil {
if recvErr == io . EOF {
break
} else {
return recvErr
}
}
if resp . ProcessedBytes > 0 {
fmt . Fprintf ( writer , "volume %d processed %d bytes\n" , replica . info . Id , resp . ProcessedBytes )
}
}
2021-07-17 02:46:04 +08:00
return nil
} )
2019-03-24 02:34:09 +08:00
2021-07-17 02:46:04 +08:00
if err != nil {
return err
2019-03-24 02:34:09 +08:00
}
2021-02-16 18:47:02 +08:00
2024-04-08 22:30:04 +08:00
// adjust volume count
2024-04-23 21:33:50 +08:00
addVolumeCount ( dst . dataNode . DiskInfos [ replica . info . DiskType ] , 1 )
2021-07-17 02:46:04 +08:00
break
2019-03-24 02:34:09 +08:00
}
2021-07-17 02:46:04 +08:00
}
2019-03-24 02:34:09 +08:00
2021-07-17 02:46:04 +08:00
if ! foundNewLocation && ! hasSkippedCollection {
fmt . Fprintf ( writer , "failed to place volume %d replica as %s, existing:%+v\n" , replica . info . Id , replicaPlacement , len ( replicas ) )
2019-03-24 02:34:09 +08:00
}
return nil
}
2024-04-23 21:33:50 +08:00
func addVolumeCount ( info * master_pb . DiskInfo , count int ) {
if info == nil {
return
}
info . VolumeCount += int64 ( count )
info . FreeVolumeCount -= int64 ( count )
}
2021-02-22 17:30:07 +08:00
func keepDataNodesSorted ( dataNodes [ ] location , diskType types . DiskType ) {
fn := capacityByFreeVolumeCount ( diskType )
2023-09-26 00:35:16 +08:00
slices . SortFunc ( dataNodes , func ( a , b location ) int {
return int ( fn ( b . dataNode ) - fn ( a . dataNode ) )
2019-03-24 02:34:09 +08:00
} )
}
2024-09-26 23:34:13 +08:00
func satisfyReplicaCurrentLocation ( replicaPlacement * super_block . ReplicaPlacement , replicas [ ] * VolumeReplica ) bool {
existingDataCenters , existingRacks , _ := countReplicas ( replicas )
if replicaPlacement . DiffDataCenterCount + 1 > len ( existingDataCenters ) {
return false
}
if replicaPlacement . DiffRackCount + 1 > len ( existingRacks ) {
return false
}
if replicaPlacement . SameRackCount > 0 {
foundSatisfyRack := false
for _ , rackCount := range existingRacks {
if rackCount >= replicaPlacement . SameRackCount + 1 {
foundSatisfyRack = true
}
}
return foundSatisfyRack
}
return true
}
2020-04-02 17:16:16 +08:00
/ *
2022-09-15 14:06:44 +08:00
if on an existing data node {
return false
}
if different from existing dcs {
if lack on different dcs {
return true
} else {
return false
}
}
if not on primary dc {
return false
}
if different from existing racks {
if lack on different racks {
return true
} else {
return false
}
}
if not on primary rack {
return false
}
if lacks on same rack {
return true
} else {
return false
}
2020-04-02 17:16:16 +08:00
* /
2020-09-08 03:35:02 +08:00
func satisfyReplicaPlacement ( replicaPlacement * super_block . ReplicaPlacement , replicas [ ] * VolumeReplica , possibleLocation location ) bool {
2019-03-24 02:34:09 +08:00
2020-09-08 07:00:10 +08:00
existingDataCenters , _ , existingDataNodes := countReplicas ( replicas )
if _ , found := existingDataNodes [ possibleLocation . String ( ) ] ; found {
// avoid duplicated volume on the same data node
2020-04-02 17:16:16 +08:00
return false
}
primaryDataCenters , _ := findTopKeys ( existingDataCenters )
// ensure data center count is within limit
if _ , found := existingDataCenters [ possibleLocation . DataCenter ( ) ] ; ! found {
// different from existing dcs
if len ( existingDataCenters ) < replicaPlacement . DiffDataCenterCount + 1 {
// lack on different dcs
return true
} else {
// adding this would go over the different dcs limit
return false
}
}
// now this is same as one of the existing data center
if ! isAmong ( possibleLocation . DataCenter ( ) , primaryDataCenters ) {
// not on one of the primary dcs
return false
2019-03-24 02:34:09 +08:00
}
2020-04-02 17:16:16 +08:00
// now this is one of the primary dcs
2020-09-08 07:00:10 +08:00
primaryDcRacks := make ( map [ string ] int )
2020-09-08 03:35:02 +08:00
for _ , replica := range replicas {
if replica . location . DataCenter ( ) != possibleLocation . DataCenter ( ) {
2020-04-02 17:16:16 +08:00
continue
}
2020-09-08 07:00:10 +08:00
primaryDcRacks [ replica . location . Rack ( ) ] += 1
2020-04-02 17:16:16 +08:00
}
2020-09-08 07:00:10 +08:00
primaryRacks , _ := findTopKeys ( primaryDcRacks )
sameRackCount := primaryDcRacks [ possibleLocation . Rack ( ) ]
2020-04-02 17:16:16 +08:00
// ensure rack count is within limit
2020-09-08 07:00:10 +08:00
if _ , found := primaryDcRacks [ possibleLocation . Rack ( ) ] ; ! found {
2020-04-02 17:16:16 +08:00
// different from existing racks
2020-09-08 07:00:10 +08:00
if len ( primaryDcRacks ) < replicaPlacement . DiffRackCount + 1 {
2020-04-02 17:16:16 +08:00
// lack on different racks
return true
} else {
// adding this would go over the different racks limit
return false
}
}
// now this is same as one of the existing racks
if ! isAmong ( possibleLocation . Rack ( ) , primaryRacks ) {
// not on the primary rack
return false
2019-03-24 02:34:09 +08:00
}
2020-04-02 17:16:16 +08:00
// now this is on the primary rack
// different from existing data nodes
if sameRackCount < replicaPlacement . SameRackCount + 1 {
// lack on same rack
return true
} else {
// adding this would go over the same data node limit
return false
}
}
func findTopKeys ( m map [ string ] int ) ( topKeys [ ] string , max int ) {
for k , c := range m {
if max < c {
topKeys = topKeys [ : 0 ]
topKeys = append ( topKeys , k )
max = c
} else if max == c {
topKeys = append ( topKeys , k )
}
}
return
}
func isAmong ( key string , keys [ ] string ) bool {
for _ , k := range keys {
if k == key {
return true
}
}
2019-03-24 02:34:09 +08:00
return false
}
2020-09-08 03:35:02 +08:00
type VolumeReplica struct {
location * location
info * master_pb . VolumeInformationMessage
}
2019-03-24 02:34:09 +08:00
type location struct {
dc string
rack string
dataNode * master_pb . DataNodeInfo
}
func newLocation ( dc , rack string , dataNode * master_pb . DataNodeInfo ) location {
return location {
dc : dc ,
rack : rack ,
dataNode : dataNode ,
}
}
func ( l location ) String ( ) string {
return fmt . Sprintf ( "%s %s %s" , l . dc , l . rack , l . dataNode . Id )
}
func ( l location ) Rack ( ) string {
return fmt . Sprintf ( "%s %s" , l . dc , l . rack )
}
func ( l location ) DataCenter ( ) string {
return l . dc
}
2020-09-08 07:00:10 +08:00
func pickOneReplicaToCopyFrom ( replicas [ ] * VolumeReplica ) * VolumeReplica {
mostRecent := replicas [ 0 ]
for _ , replica := range replicas {
if replica . info . ModifiedAtSecond > mostRecent . info . ModifiedAtSecond {
mostRecent = replica
}
}
return mostRecent
}
func countReplicas ( replicas [ ] * VolumeReplica ) ( diffDc , diffRack , diffNode map [ string ] int ) {
diffDc = make ( map [ string ] int )
diffRack = make ( map [ string ] int )
diffNode = make ( map [ string ] int )
for _ , replica := range replicas {
diffDc [ replica . location . DataCenter ( ) ] += 1
diffRack [ replica . location . Rack ( ) ] += 1
diffNode [ replica . location . String ( ) ] += 1
}
return
}
func pickOneReplicaToDelete ( replicas [ ] * VolumeReplica , replicaPlacement * super_block . ReplicaPlacement ) * VolumeReplica {
2023-09-26 00:35:16 +08:00
slices . SortFunc ( replicas , func ( a , b * VolumeReplica ) int {
2021-08-11 03:30:41 +08:00
if a . info . Size != b . info . Size {
2023-09-26 00:35:16 +08:00
return int ( a . info . Size - b . info . Size )
2020-09-08 07:00:10 +08:00
}
2020-11-11 04:26:05 +08:00
if a . info . ModifiedAtSecond != b . info . ModifiedAtSecond {
2023-09-26 00:35:16 +08:00
return int ( a . info . ModifiedAtSecond - b . info . ModifiedAtSecond )
2020-11-11 04:26:05 +08:00
}
2021-08-11 03:30:41 +08:00
if a . info . CompactRevision != b . info . CompactRevision {
2023-09-26 00:35:16 +08:00
return int ( a . info . CompactRevision - b . info . CompactRevision )
2020-11-11 04:26:05 +08:00
}
2023-09-26 00:35:16 +08:00
return 0
2020-11-11 04:26:05 +08:00
} )
return replicas [ 0 ]
2020-09-08 07:00:10 +08:00
}
2021-12-06 08:56:25 +08:00
// check and fix misplaced volumes
func isMisplaced ( replicas [ ] * VolumeReplica , replicaPlacement * super_block . ReplicaPlacement ) bool {
for i := 0 ; i < len ( replicas ) ; i ++ {
others := otherThan ( replicas , i )
2023-11-07 23:58:19 +08:00
if ! satisfyReplicaPlacement ( replicaPlacement , others , * replicas [ i ] . location ) {
return true
2021-12-06 08:56:25 +08:00
}
}
2023-11-07 23:58:19 +08:00
return false
2021-12-06 08:56:25 +08:00
}
func otherThan ( replicas [ ] * VolumeReplica , index int ) ( others [ ] * VolumeReplica ) {
for i := 0 ; i < len ( replicas ) ; i ++ {
if index != i {
others = append ( others , replicas [ i ] )
}
}
return
}
func pickOneMisplacedVolume ( replicas [ ] * VolumeReplica , replicaPlacement * super_block . ReplicaPlacement ) ( toDelete * VolumeReplica ) {
var deletionCandidates [ ] * VolumeReplica
for i := 0 ; i < len ( replicas ) ; i ++ {
others := otherThan ( replicas , i )
if ! isMisplaced ( others , replicaPlacement ) {
deletionCandidates = append ( deletionCandidates , replicas [ i ] )
}
}
if len ( deletionCandidates ) > 0 {
return pickOneReplicaToDelete ( deletionCandidates , replicaPlacement )
}
return pickOneReplicaToDelete ( replicas , replicaPlacement )
}