[shell] volume.balance collect volume servers by dc rack node (#6191)

* chore: balance by rack

* fix: rm check lock

* fix: selected racks

* fix: selected nodes

* fix: containts

* fix: one collectVolumeServersByDcRackNode

* fix: revert lock and add lock

* fix: panic test

* revert noLock
This commit is contained in:
Konstantin Lebedev 2024-11-04 00:08:45 +05:00 committed by GitHub
parent 65fb8fad99
commit 5bddf0c085
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 18 additions and 8 deletions

View File

@ -1,3 +1,5 @@
.PHONY: test
BINARY = weed
SOURCE_DIR = .

View File

@ -6,6 +6,7 @@ import (
"fmt"
"io"
"os"
"strings"
"time"
"github.com/seaweedfs/seaweedfs/weed/pb"
@ -32,7 +33,7 @@ func (c *commandVolumeBalance) Name() string {
func (c *commandVolumeBalance) Help() string {
return `balance all volumes among volume servers
volume.balance [-collection ALL_COLLECTIONS|EACH_COLLECTION|<collection_name>] [-force] [-dataCenter=<data_center_name>]
volume.balance [-collection ALL_COLLECTIONS|EACH_COLLECTION|<collection_name>] [-force] [-dataCenter=<data_center_name>] [-racks=rack_name_one,rack_name_two] [-nodes=192.168.0.1:8080,192.168.0.2:8080]
Algorithm:
@ -73,6 +74,8 @@ func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer
balanceCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
collection := balanceCommand.String("collection", "ALL_COLLECTIONS", "collection name, or use \"ALL_COLLECTIONS\" across collections, \"EACH_COLLECTION\" for each collection")
dc := balanceCommand.String("dataCenter", "", "only apply the balancing for this dataCenter")
racks := balanceCommand.String("racks", "", "only apply the balancing for this racks")
nodes := balanceCommand.String("nodes", "", "only apply the balancing for this nodes")
applyBalancing := balanceCommand.Bool("force", false, "apply the balancing plan.")
if err = balanceCommand.Parse(args); err != nil {
return nil
@ -84,12 +87,12 @@ func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer
}
// collect topology information
topologyInfo, _, err := collectTopologyInfo(commandEnv, 15*time.Second)
topologyInfo, _, err := collectTopologyInfo(commandEnv, 5*time.Second)
if err != nil {
return err
}
volumeServers := collectVolumeServersByDc(topologyInfo, *dc)
volumeServers := collectVolumeServersByDcRackNode(topologyInfo, *dc, *racks, *nodes)
volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo)
diskTypes := collectVolumeDiskTypes(topologyInfo)
@ -142,13 +145,19 @@ func balanceVolumeServersByDiskType(commandEnv *CommandEnv, diskType types.DiskT
return nil
}
func collectVolumeServersByDc(t *master_pb.TopologyInfo, selectedDataCenter string) (nodes []*Node) {
func collectVolumeServersByDcRackNode(t *master_pb.TopologyInfo, selectedDataCenter string, selectedRacks string, selectedNodes string) (nodes []*Node) {
for _, dc := range t.DataCenterInfos {
if selectedDataCenter != "" && dc.Id != selectedDataCenter {
continue
}
for _, r := range dc.RackInfos {
if selectedRacks != "" && !strings.Contains(selectedRacks, r.Id) {
continue
}
for _, dn := range r.DataNodeInfos {
if selectedNodes != "" && !strings.Contains(selectedNodes, dn.Id) {
continue
}
nodes = append(nodes, &Node{
info: dn,
dc: dc.Id,
@ -325,7 +334,6 @@ func attemptToMoveOneVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]
}
func maybeMoveOneVolume(commandEnv *CommandEnv, volumeReplicas map[uint32][]*VolumeReplica, fullNode *Node, candidateVolume *master_pb.VolumeInformationMessage, emptyNode *Node, applyChange bool) (hasMoved bool, err error) {
if !commandEnv.isLocked() {
return false, fmt.Errorf("lock is lost")
}

View File

@ -251,7 +251,7 @@ func TestIsGoodMove(t *testing.T) {
func TestBalance(t *testing.T) {
topologyInfo := parseOutput(topoData)
volumeServers := collectVolumeServersByDc(topologyInfo, "")
volumeServers := collectVolumeServersByDcRackNode(topologyInfo, "", "", "")
volumeReplicas, _ := collectVolumeReplicaLocations(topologyInfo)
diskTypes := collectVolumeDiskTypes(topologyInfo)

View File

@ -110,7 +110,7 @@ func (c *commandVolumeServerEvacuate) volumeServerEvacuate(commandEnv *CommandEn
func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandEnv, volumeServer string, skipNonMoveable, applyChange bool, writer io.Writer) error {
// find this volume server
volumeServers := collectVolumeServersByDc(c.topologyInfo, "")
volumeServers := collectVolumeServersByDcRackNode(c.topologyInfo, "", "", "")
thisNodes, otherNodes := c.nodesOtherThan(volumeServers, volumeServer)
if len(thisNodes) == 0 {
return fmt.Errorf("%s is not found in this cluster", volumeServer)
@ -124,7 +124,7 @@ func (c *commandVolumeServerEvacuate) evacuateNormalVolumes(commandEnv *CommandE
fmt.Fprintf(writer, "update topologyInfo %v", err)
} else {
_, otherNodesNew := c.nodesOtherThan(
collectVolumeServersByDc(topologyInfo, ""), volumeServer)
collectVolumeServersByDcRackNode(topologyInfo, "", "", ""), volumeServer)
if len(otherNodesNew) > 0 {
otherNodes = otherNodesNew
c.topologyInfo = topologyInfo