2019-05-06 12:17:23 +08:00
package shell
import (
"context"
"flag"
"fmt"
"io"
"os"
"sort"
"time"
"github.com/chrislusf/seaweedfs/weed/pb/master_pb"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
)
func init ( ) {
2019-06-05 16:30:24 +08:00
Commands = append ( Commands , & commandVolumeBalance { } )
2019-05-06 12:17:23 +08:00
}
type commandVolumeBalance struct {
}
func ( c * commandVolumeBalance ) Name ( ) string {
return "volume.balance"
}
func ( c * commandVolumeBalance ) Help ( ) string {
return ` balance all volumes among volume servers
2019-11-25 14:17:43 +08:00
volume . balance [ - collection ALL | EACH_COLLECTION | < collection_name > ] [ - force ] [ - dataCenter = < data_center_name > ]
2019-05-06 12:17:23 +08:00
Algorithm :
2019-05-08 05:02:01 +08:00
2019-05-06 12:17:23 +08:00
For each type of volume server ( different max volume count limit ) {
2019-05-07 04:30:12 +08:00
for each collection {
balanceWritableVolumes ( )
balanceReadOnlyVolumes ( )
}
2019-05-06 12:17:23 +08:00
}
func balanceWritableVolumes ( ) {
idealWritableVolumes = totalWritableVolumes / numVolumeServers
2019-06-11 12:32:56 +08:00
for hasMovedOneVolume {
2019-05-07 04:30:12 +08:00
sort all volume servers ordered by the number of local writable volumes
2019-05-06 12:17:23 +08:00
pick the volume server B with the highest number of writable volumes y
2020-09-11 14:05:00 +08:00
for any the volume server A with the number of writable volumes x + 1 <= idealWritableVolume {
if y > idealWritableVolumes and x + 1 <= idealWritableVolumes {
if B has a writable volume id v that A does not have , and satisfy v replication requirements {
move writable volume v from A to B
}
2019-05-06 12:17:23 +08:00
}
}
}
}
func balanceReadOnlyVolumes ( ) {
//similar to balanceWritableVolumes
}
`
}
2019-06-05 16:30:24 +08:00
func ( c * commandVolumeBalance ) Do ( args [ ] string , commandEnv * CommandEnv , writer io . Writer ) ( err error ) {
2019-05-06 12:17:23 +08:00
2020-04-24 04:37:31 +08:00
if err = commandEnv . confirmIsLocked ( ) ; err != nil {
return
}
2019-05-06 12:17:23 +08:00
balanceCommand := flag . NewFlagSet ( c . Name ( ) , flag . ContinueOnError )
2019-06-03 17:26:31 +08:00
collection := balanceCommand . String ( "collection" , "EACH_COLLECTION" , "collection name, or use \"ALL_COLLECTIONS\" across collections, \"EACH_COLLECTION\" for each collection" )
2019-05-07 04:30:12 +08:00
dc := balanceCommand . String ( "dataCenter" , "" , "only apply the balancing for this dataCenter" )
2019-06-03 17:26:31 +08:00
applyBalancing := balanceCommand . Bool ( "force" , false , "apply the balancing plan." )
2019-05-06 12:17:23 +08:00
if err = balanceCommand . Parse ( args ) ; err != nil {
return nil
}
var resp * master_pb . VolumeListResponse
2020-02-26 13:50:12 +08:00
err = commandEnv . MasterClient . WithClient ( func ( client master_pb . SeaweedClient ) error {
resp , err = client . VolumeList ( context . Background ( ) , & master_pb . VolumeListRequest { } )
2019-05-06 12:17:23 +08:00
return err
} )
if err != nil {
return err
}
2019-05-07 04:30:12 +08:00
typeToNodes := collectVolumeServersByType ( resp . TopologyInfo , * dc )
2019-12-24 09:58:47 +08:00
for maxVolumeCount , volumeServers := range typeToNodes {
2019-05-06 12:17:23 +08:00
if len ( volumeServers ) < 2 {
2019-12-24 09:58:47 +08:00
fmt . Printf ( "only 1 node is configured max %d volumes, skipping balancing\n" , maxVolumeCount )
2019-05-06 12:17:23 +08:00
continue
}
2019-05-07 05:12:19 +08:00
if * collection == "EACH_COLLECTION" {
2019-05-31 00:27:23 +08:00
collections , err := ListCollectionNames ( commandEnv , true , false )
2019-05-06 13:28:14 +08:00
if err != nil {
return err
}
for _ , c := range collections {
if err = balanceVolumeServers ( commandEnv , volumeServers , resp . VolumeSizeLimitMb * 1024 * 1024 , c , * applyBalancing ) ; err != nil {
return err
}
}
2019-12-24 09:58:47 +08:00
} else if * collection == "ALL_COLLECTIONS" {
if err = balanceVolumeServers ( commandEnv , volumeServers , resp . VolumeSizeLimitMb * 1024 * 1024 , "ALL_COLLECTIONS" , * applyBalancing ) ; err != nil {
2019-05-06 13:28:14 +08:00
return err
}
} else {
if err = balanceVolumeServers ( commandEnv , volumeServers , resp . VolumeSizeLimitMb * 1024 * 1024 , * collection , * applyBalancing ) ; err != nil {
return err
}
2019-05-06 12:58:46 +08:00
}
2019-05-06 13:28:14 +08:00
2019-05-06 12:17:23 +08:00
}
return nil
}
2020-02-22 13:23:25 +08:00
func balanceVolumeServers ( commandEnv * CommandEnv , nodes [ ] * Node , volumeSizeLimit uint64 , collection string , applyBalancing bool ) error {
2019-05-06 12:17:23 +08:00
// balance writable volumes
for _ , n := range nodes {
2019-05-06 13:28:14 +08:00
n . selectVolumes ( func ( v * master_pb . VolumeInformationMessage ) bool {
2019-12-24 09:58:47 +08:00
if collection != "ALL_COLLECTIONS" {
2019-05-06 12:17:23 +08:00
if v . Collection != collection {
return false
}
}
return ! v . ReadOnly && v . Size < volumeSizeLimit
} )
}
2019-05-06 12:58:46 +08:00
if err := balanceSelectedVolume ( commandEnv , nodes , sortWritableVolumes , applyBalancing ) ; err != nil {
return err
}
2019-05-06 12:17:23 +08:00
// balance readable volumes
for _ , n := range nodes {
2019-05-06 13:28:14 +08:00
n . selectVolumes ( func ( v * master_pb . VolumeInformationMessage ) bool {
2019-12-24 09:58:47 +08:00
if collection != "ALL_COLLECTIONS" {
2019-05-06 12:17:23 +08:00
if v . Collection != collection {
return false
}
}
return v . ReadOnly || v . Size >= volumeSizeLimit
} )
}
2019-05-06 12:58:46 +08:00
if err := balanceSelectedVolume ( commandEnv , nodes , sortReadOnlyVolumes , applyBalancing ) ; err != nil {
return err
}
return nil
2019-05-06 12:17:23 +08:00
}
2020-02-22 13:23:25 +08:00
func collectVolumeServersByType ( t * master_pb . TopologyInfo , selectedDataCenter string ) ( typeToNodes map [ uint64 ] [ ] * Node ) {
typeToNodes = make ( map [ uint64 ] [ ] * Node )
2019-05-06 12:17:23 +08:00
for _ , dc := range t . DataCenterInfos {
2019-05-07 04:30:12 +08:00
if selectedDataCenter != "" && dc . Id != selectedDataCenter {
continue
}
2019-05-06 12:17:23 +08:00
for _ , r := range dc . RackInfos {
for _ , dn := range r . DataNodeInfos {
2020-02-22 13:23:25 +08:00
typeToNodes [ dn . MaxVolumeCount ] = append ( typeToNodes [ dn . MaxVolumeCount ] , & Node {
info : dn ,
dc : dc . Id ,
rack : r . Id ,
} )
2019-05-06 12:17:23 +08:00
}
}
}
return
}
type Node struct {
info * master_pb . DataNodeInfo
selectedVolumes map [ uint32 ] * master_pb . VolumeInformationMessage
2020-02-22 13:23:25 +08:00
dc string
rack string
2019-05-06 12:17:23 +08:00
}
func sortWritableVolumes ( volumes [ ] * master_pb . VolumeInformationMessage ) {
sort . Slice ( volumes , func ( i , j int ) bool {
return volumes [ i ] . Size < volumes [ j ] . Size
} )
}
func sortReadOnlyVolumes ( volumes [ ] * master_pb . VolumeInformationMessage ) {
sort . Slice ( volumes , func ( i , j int ) bool {
return volumes [ i ] . Id < volumes [ j ] . Id
} )
}
2020-09-11 14:05:00 +08:00
func balanceSelectedVolume ( commandEnv * CommandEnv , nodes [ ] * Node , sortCandidatesFn func ( volumes [ ] * master_pb . VolumeInformationMessage ) , applyBalancing bool ) ( err error ) {
2019-05-06 12:17:23 +08:00
selectedVolumeCount := 0
for _ , dn := range nodes {
selectedVolumeCount += len ( dn . selectedVolumes )
}
2019-06-11 12:32:56 +08:00
idealSelectedVolumes := ceilDivide ( selectedVolumeCount , len ( nodes ) )
2019-05-06 12:17:23 +08:00
2020-09-11 14:05:00 +08:00
hasMoved := true
2019-05-06 12:17:23 +08:00
2020-09-11 14:05:00 +08:00
for hasMoved {
hasMoved = false
2019-05-06 12:17:23 +08:00
sort . Slice ( nodes , func ( i , j int ) bool {
2019-06-11 12:32:56 +08:00
// TODO sort by free volume slots???
2019-05-06 12:17:23 +08:00
return len ( nodes [ i ] . selectedVolumes ) < len ( nodes [ j ] . selectedVolumes )
} )
2020-09-11 14:05:00 +08:00
fullNode := nodes [ len ( nodes ) - 1 ]
var candidateVolumes [ ] * master_pb . VolumeInformationMessage
for _ , v := range fullNode . selectedVolumes {
candidateVolumes = append ( candidateVolumes , v )
}
sortCandidatesFn ( candidateVolumes )
for i := 0 ; i < len ( nodes ) - 1 ; i ++ {
emptyNode := nodes [ i ]
if ! ( len ( fullNode . selectedVolumes ) > idealSelectedVolumes && len ( emptyNode . selectedVolumes ) + 1 <= idealSelectedVolumes ) {
// no more volume servers with empty slots
break
2019-05-06 12:17:23 +08:00
}
2020-09-11 14:05:00 +08:00
hasMoved , err = attemptToMoveOneVolume ( commandEnv , fullNode , candidateVolumes , emptyNode , applyBalancing )
if err != nil {
return
}
if hasMoved {
// moved one volume
break
2019-05-06 12:17:23 +08:00
}
}
}
2019-05-06 12:58:46 +08:00
return nil
2019-05-06 12:17:23 +08:00
}
2020-09-11 14:05:00 +08:00
func attemptToMoveOneVolume ( commandEnv * CommandEnv , fullNode * Node , candidateVolumes [ ] * master_pb . VolumeInformationMessage , emptyNode * Node , applyBalancing bool ) ( hasMoved bool , err error ) {
for _ , v := range candidateVolumes {
if v . ReplicaPlacement > 0 {
if fullNode . dc != emptyNode . dc && fullNode . rack != emptyNode . rack {
// TODO this logic is too simple, but should work most of the time
// Need a correct algorithm to handle all different cases
continue
}
}
if _ , found := emptyNode . selectedVolumes [ v . Id ] ; ! found {
if err = moveVolume ( commandEnv , v , fullNode , emptyNode , applyBalancing ) ; err == nil {
delete ( fullNode . selectedVolumes , v . Id )
emptyNode . selectedVolumes [ v . Id ] = v
hasMoved = true
break
} else {
return
}
}
}
return
}
2019-06-05 16:30:24 +08:00
func moveVolume ( commandEnv * CommandEnv , v * master_pb . VolumeInformationMessage , fullNode * Node , emptyNode * Node , applyBalancing bool ) error {
2019-05-06 12:17:23 +08:00
collectionPrefix := v . Collection + "_"
if v . Collection == "" {
collectionPrefix = ""
}
fmt . Fprintf ( os . Stdout , "moving volume %s%d %s => %s\n" , collectionPrefix , v . Id , fullNode . info . Id , emptyNode . info . Id )
if applyBalancing {
2020-02-26 13:50:12 +08:00
return LiveMoveVolume ( commandEnv . option . GrpcDialOption , needle . VolumeId ( v . Id ) , fullNode . info . Id , emptyNode . info . Id , 5 * time . Second )
2019-05-06 12:17:23 +08:00
}
2019-05-06 12:58:46 +08:00
return nil
2019-05-06 12:17:23 +08:00
}
2019-05-06 13:28:14 +08:00
func ( node * Node ) selectVolumes ( fn func ( v * master_pb . VolumeInformationMessage ) bool ) {
2019-05-06 12:17:23 +08:00
node . selectedVolumes = make ( map [ uint32 ] * master_pb . VolumeInformationMessage )
for _ , v := range node . info . VolumeInfos {
if fn ( v ) {
node . selectedVolumes [ v . Id ] = v
}
}
}