2021-02-16 18:47:02 +08:00
|
|
|
package shell
|
|
|
|
|
|
|
|
import (
|
|
|
|
"flag"
|
|
|
|
"fmt"
|
2021-08-13 18:09:28 +08:00
|
|
|
"github.com/chrislusf/seaweedfs/weed/glog"
|
2021-09-13 13:47:52 +08:00
|
|
|
"github.com/chrislusf/seaweedfs/weed/pb"
|
2021-02-16 18:47:02 +08:00
|
|
|
"github.com/chrislusf/seaweedfs/weed/pb/master_pb"
|
|
|
|
"github.com/chrislusf/seaweedfs/weed/storage/types"
|
2021-02-22 17:44:18 +08:00
|
|
|
"github.com/chrislusf/seaweedfs/weed/wdclient"
|
2021-02-16 18:47:02 +08:00
|
|
|
"io"
|
2021-07-28 04:53:01 +08:00
|
|
|
"path/filepath"
|
2021-08-10 17:50:28 +08:00
|
|
|
"sync"
|
2021-02-16 18:47:02 +08:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/chrislusf/seaweedfs/weed/storage/needle"
|
|
|
|
)
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
Commands = append(Commands, &commandVolumeTierMove{})
|
|
|
|
}
|
|
|
|
|
|
|
|
type commandVolumeTierMove struct {
|
2021-09-13 13:47:52 +08:00
|
|
|
activeServers map[pb.ServerAddress]struct{}
|
2021-08-10 17:50:28 +08:00
|
|
|
activeServersLock sync.Mutex
|
|
|
|
activeServersCond *sync.Cond
|
2021-02-16 18:47:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandVolumeTierMove) Name() string {
|
2021-02-19 19:39:19 +08:00
|
|
|
return "volume.tier.move"
|
2021-02-16 18:47:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandVolumeTierMove) Help() string {
|
2021-02-22 17:30:07 +08:00
|
|
|
return `change a volume from one disk type to another
|
2021-02-16 18:47:02 +08:00
|
|
|
|
2021-07-28 04:53:01 +08:00
|
|
|
volume.tier.move -fromDiskType=hdd -toDiskType=ssd [-collectionPattern=""] [-fullPercent=95] [-quietFor=1h]
|
2021-02-22 16:28:42 +08:00
|
|
|
|
|
|
|
Even if the volume is replicated, only one replica will be changed and the rest replicas will be dropped.
|
|
|
|
So "volume.fix.replication" and "volume.balance" should be followed.
|
2021-02-16 18:47:02 +08:00
|
|
|
|
|
|
|
`
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *commandVolumeTierMove) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
|
|
|
|
|
2021-09-13 13:47:52 +08:00
|
|
|
c.activeServers = make(map[pb.ServerAddress]struct{})
|
2021-08-10 17:50:28 +08:00
|
|
|
c.activeServersCond = sync.NewCond(new(sync.Mutex))
|
|
|
|
|
2021-02-16 18:47:02 +08:00
|
|
|
tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
|
2021-07-28 04:53:01 +08:00
|
|
|
collectionPattern := tierCommand.String("collectionPattern", "", "match with wildcard characters '*' and '?'")
|
2021-02-16 18:47:02 +08:00
|
|
|
fullPercentage := tierCommand.Float64("fullPercent", 95, "the volume reaches the percentage of max volume size")
|
|
|
|
quietPeriod := tierCommand.Duration("quietFor", 24*time.Hour, "select volumes without no writes for this period")
|
|
|
|
source := tierCommand.String("fromDiskType", "", "the source disk type")
|
|
|
|
target := tierCommand.String("toDiskType", "", "the target disk type")
|
2021-02-22 17:30:07 +08:00
|
|
|
applyChange := tierCommand.Bool("force", false, "actually apply the changes")
|
2021-02-16 18:47:02 +08:00
|
|
|
if err = tierCommand.Parse(args); err != nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-09-14 13:13:34 +08:00
|
|
|
if err = commandEnv.confirmIsLocked(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-02-22 16:28:42 +08:00
|
|
|
fromDiskType := types.ToDiskType(*source)
|
|
|
|
toDiskType := types.ToDiskType(*target)
|
2021-02-16 18:47:02 +08:00
|
|
|
|
2021-02-22 16:28:42 +08:00
|
|
|
if fromDiskType == toDiskType {
|
|
|
|
return fmt.Errorf("source tier %s is the same as target tier %s", fromDiskType, toDiskType)
|
|
|
|
}
|
2021-02-16 18:47:02 +08:00
|
|
|
|
2021-02-22 16:28:42 +08:00
|
|
|
// collect topology information
|
|
|
|
topologyInfo, volumeSizeLimitMb, err := collectTopologyInfo(commandEnv)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2021-02-16 18:47:02 +08:00
|
|
|
}
|
|
|
|
|
2021-02-22 17:30:07 +08:00
|
|
|
// collect all volumes that should change
|
2021-07-28 04:53:01 +08:00
|
|
|
volumeIds, err := collectVolumeIdsForTierChange(commandEnv, topologyInfo, volumeSizeLimitMb, fromDiskType, *collectionPattern, *fullPercentage, *quietPeriod)
|
2021-02-16 18:47:02 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
fmt.Printf("tier move volumes: %v\n", volumeIds)
|
|
|
|
|
2021-02-22 17:30:07 +08:00
|
|
|
_, allLocations := collectVolumeReplicaLocations(topologyInfo)
|
|
|
|
for _, vid := range volumeIds {
|
2021-08-10 17:50:28 +08:00
|
|
|
if err = c.doVolumeTierMove(commandEnv, writer, vid, toDiskType, allLocations, *applyChange); err != nil {
|
2021-02-22 17:30:07 +08:00
|
|
|
fmt.Printf("tier move volume %d: %v\n", vid, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-02-22 17:44:18 +08:00
|
|
|
func isOneOf(server string, locations []wdclient.Location) bool {
|
|
|
|
for _, loc := range locations {
|
|
|
|
if server == loc.Url {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2021-08-10 17:50:28 +08:00
|
|
|
func (c *commandVolumeTierMove) doVolumeTierMove(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, allLocations []location, applyChanges bool) (err error) {
|
2021-02-22 17:30:07 +08:00
|
|
|
// find volume location
|
|
|
|
locations, found := commandEnv.MasterClient.GetLocations(uint32(vid))
|
|
|
|
if !found {
|
|
|
|
return fmt.Errorf("volume %d not found", vid)
|
|
|
|
}
|
|
|
|
|
|
|
|
// find one server with the most empty volume slots with target disk type
|
|
|
|
hasFoundTarget := false
|
|
|
|
keepDataNodesSorted(allLocations, toDiskType)
|
|
|
|
fn := capacityByFreeVolumeCount(toDiskType)
|
2021-08-10 19:13:08 +08:00
|
|
|
wg := sync.WaitGroup{}
|
2021-02-22 17:30:07 +08:00
|
|
|
for _, dst := range allLocations {
|
2021-02-23 19:49:14 +08:00
|
|
|
if fn(dst.dataNode) > 0 && !hasFoundTarget {
|
2021-02-22 17:30:07 +08:00
|
|
|
// ask the volume server to replicate the volume
|
2021-02-22 17:44:18 +08:00
|
|
|
if isOneOf(dst.dataNode.Id, locations) {
|
|
|
|
continue
|
|
|
|
}
|
2021-09-13 13:47:52 +08:00
|
|
|
var sourceVolumeServer pb.ServerAddress
|
2021-02-22 17:30:07 +08:00
|
|
|
for _, loc := range locations {
|
|
|
|
if loc.Url != dst.dataNode.Id {
|
2021-09-13 13:47:52 +08:00
|
|
|
sourceVolumeServer = loc.ServerAddress()
|
2021-02-22 17:30:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if sourceVolumeServer == "" {
|
|
|
|
continue
|
|
|
|
}
|
2021-02-22 17:59:03 +08:00
|
|
|
fmt.Fprintf(writer, "moving volume %d from %s to %s with disk type %s ...\n", vid, sourceVolumeServer, dst.dataNode.Id, toDiskType.ReadableString())
|
2021-02-22 17:30:07 +08:00
|
|
|
hasFoundTarget = true
|
|
|
|
|
|
|
|
if !applyChanges {
|
2021-08-09 06:12:39 +08:00
|
|
|
// adjust volume count
|
|
|
|
dst.dataNode.DiskInfos[string(toDiskType)].VolumeCount++
|
2021-02-22 17:30:07 +08:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2021-09-13 13:47:52 +08:00
|
|
|
destServerAddress := pb.NewServerAddressFromDataNode(dst.dataNode)
|
2021-08-10 18:08:29 +08:00
|
|
|
c.activeServersCond.L.Lock()
|
|
|
|
_, isSourceActive := c.activeServers[sourceVolumeServer]
|
2021-09-13 13:47:52 +08:00
|
|
|
_, isDestActive := c.activeServers[destServerAddress]
|
2021-08-10 18:08:29 +08:00
|
|
|
for isSourceActive || isDestActive {
|
|
|
|
c.activeServersCond.Wait()
|
|
|
|
_, isSourceActive = c.activeServers[sourceVolumeServer]
|
2021-09-13 13:47:52 +08:00
|
|
|
_, isDestActive = c.activeServers[destServerAddress]
|
2021-02-22 17:44:18 +08:00
|
|
|
}
|
2021-08-10 18:08:29 +08:00
|
|
|
c.activeServers[sourceVolumeServer] = struct{}{}
|
2021-09-13 13:47:52 +08:00
|
|
|
c.activeServers[destServerAddress] = struct{}{}
|
2021-08-10 18:08:29 +08:00
|
|
|
c.activeServersCond.L.Unlock()
|
|
|
|
|
2021-08-10 19:13:08 +08:00
|
|
|
wg.Add(1)
|
2021-08-10 18:08:29 +08:00
|
|
|
go func(dst location) {
|
|
|
|
if err := c.doMoveOneVolume(commandEnv, writer, vid, toDiskType, locations, sourceVolumeServer, dst); err != nil {
|
2021-08-10 18:25:18 +08:00
|
|
|
fmt.Fprintf(writer, "move volume %d %s => %s: %v\n", vid, sourceVolumeServer, dst.dataNode.Id, err)
|
2021-08-10 18:08:29 +08:00
|
|
|
}
|
|
|
|
delete(c.activeServers, sourceVolumeServer)
|
2021-09-13 13:47:52 +08:00
|
|
|
delete(c.activeServers, destServerAddress)
|
2021-08-10 18:08:29 +08:00
|
|
|
c.activeServersCond.Signal()
|
2021-08-10 19:13:08 +08:00
|
|
|
wg.Done()
|
2021-08-10 18:08:29 +08:00
|
|
|
}(dst)
|
|
|
|
|
2021-02-22 17:30:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-10 19:13:08 +08:00
|
|
|
wg.Wait()
|
|
|
|
|
2021-02-22 17:30:07 +08:00
|
|
|
if !hasFoundTarget {
|
2021-02-22 17:59:03 +08:00
|
|
|
fmt.Fprintf(writer, "can not find disk type %s for volume %d\n", toDiskType.ReadableString(), vid)
|
2021-02-22 17:30:07 +08:00
|
|
|
}
|
|
|
|
|
2021-02-16 18:47:02 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-09-13 13:47:52 +08:00
|
|
|
func (c *commandVolumeTierMove) doMoveOneVolume(commandEnv *CommandEnv, writer io.Writer, vid needle.VolumeId, toDiskType types.DiskType, locations []wdclient.Location, sourceVolumeServer pb.ServerAddress, dst location) (err error) {
|
2021-08-10 17:50:28 +08:00
|
|
|
|
|
|
|
// mark all replicas as read only
|
2021-08-13 18:09:28 +08:00
|
|
|
if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, false); err != nil {
|
2021-08-10 17:50:28 +08:00
|
|
|
return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err)
|
|
|
|
}
|
2021-09-13 13:47:52 +08:00
|
|
|
if err = LiveMoveVolume(commandEnv.option.GrpcDialOption, writer, vid, sourceVolumeServer, pb.NewServerAddressFromDataNode(dst.dataNode), 5*time.Second, toDiskType.ReadableString(), true); err != nil {
|
2021-08-13 18:09:28 +08:00
|
|
|
|
|
|
|
// mark all replicas as writable
|
|
|
|
if err = markVolumeReplicasWritable(commandEnv.option.GrpcDialOption, vid, locations, true); err != nil {
|
|
|
|
glog.Errorf("mark volume %d as writable on %s: %v", vid, locations[0].Url, err)
|
|
|
|
}
|
|
|
|
|
2021-08-10 17:50:28 +08:00
|
|
|
return fmt.Errorf("move volume %d %s => %s : %v", vid, locations[0].Url, dst.dataNode.Id, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// adjust volume count
|
|
|
|
dst.dataNode.DiskInfos[string(toDiskType)].VolumeCount++
|
|
|
|
|
|
|
|
// remove the remaining replicas
|
|
|
|
for _, loc := range locations {
|
2021-09-13 13:47:52 +08:00
|
|
|
if loc.Url != dst.dataNode.Id && loc.ServerAddress() != sourceVolumeServer {
|
|
|
|
if err = deleteVolume(commandEnv.option.GrpcDialOption, vid, loc.ServerAddress()); err != nil {
|
2021-08-10 17:50:28 +08:00
|
|
|
fmt.Fprintf(writer, "failed to delete volume %d on %s: %v\n", vid, loc.Url, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-07-28 04:53:01 +08:00
|
|
|
func collectVolumeIdsForTierChange(commandEnv *CommandEnv, topologyInfo *master_pb.TopologyInfo, volumeSizeLimitMb uint64, sourceTier types.DiskType, collectionPattern string, fullPercentage float64, quietPeriod time.Duration) (vids []needle.VolumeId, err error) {
|
2021-02-16 18:47:02 +08:00
|
|
|
|
|
|
|
quietSeconds := int64(quietPeriod / time.Second)
|
|
|
|
nowUnixSeconds := time.Now().Unix()
|
|
|
|
|
|
|
|
fmt.Printf("collect %s volumes quiet for: %d seconds\n", sourceTier, quietSeconds)
|
|
|
|
|
|
|
|
vidMap := make(map[uint32]bool)
|
2021-02-22 16:28:42 +08:00
|
|
|
eachDataNode(topologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
|
2021-02-16 18:47:02 +08:00
|
|
|
for _, diskInfo := range dn.DiskInfos {
|
|
|
|
for _, v := range diskInfo.VolumeInfos {
|
2021-07-28 04:53:01 +08:00
|
|
|
// check collection name pattern
|
|
|
|
if collectionPattern != "" {
|
|
|
|
matched, err := filepath.Match(collectionPattern, v.Collection)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if !matched {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if v.ModifiedAtSecond+quietSeconds < nowUnixSeconds && types.ToDiskType(v.DiskType) == sourceTier {
|
2021-02-22 16:28:42 +08:00
|
|
|
if float64(v.Size) > fullPercentage/100*float64(volumeSizeLimitMb)*1024*1024 {
|
2021-02-16 18:47:02 +08:00
|
|
|
vidMap[v.Id] = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
for vid := range vidMap {
|
|
|
|
vids = append(vids, needle.VolumeId(vid))
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|