[master] refactor func ShouldGrowVolumes (#5884)

This commit is contained in:
Konstantin Lebedev 2024-09-04 20:16:44 +05:00 committed by GitHub
parent eb02946c97
commit 67a252ee8a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 121 additions and 62 deletions

View File

@ -27,7 +27,7 @@ func (ms *MasterServer) DoAutomaticVolumeGrow(req *topology.VolumeGrowRequest) {
newVidLocations, err := ms.vg.AutomaticGrowByType(req.Option, ms.grpcDialOption, ms.Topo, req.Count)
glog.V(1).Infoln("finished automatic volume grow, cost ", time.Now().Sub(start))
if err != nil {
glog.Warningf("automatic volume grow %s: %+v", req.Option, err)
glog.V(1).Infof("automatic volume grow failed: %+v", err)
return
}
for _, newVidLocation := range newVidLocations {
@ -38,19 +38,37 @@ func (ms *MasterServer) DoAutomaticVolumeGrow(req *topology.VolumeGrowRequest) {
func (ms *MasterServer) ProcessGrowRequest() {
go func() {
for {
time.Sleep(14*time.Minute + time.Duration(120*rand.Float32())*time.Second)
if !ms.Topo.IsLeader() {
continue
}
for _, vl := range ms.Topo.ListVolumeLayouts() {
if !vl.HasGrowRequest() && vl.ShouldGrowVolumes(&topology.VolumeGrowOption{}) {
dcs := ms.Topo.ListDataCenters()
for _, vlc := range ms.Topo.ListVolumeLayoutCollections() {
vl := vlc.VolumeLayout
if vl.HasGrowRequest() {
continue
}
if vl.ShouldGrowVolumes(vlc.Collection) {
vl.AddGrowRequest()
ms.volumeGrowthRequestChan <- &topology.VolumeGrowRequest{
Option: vl.ToGrowOption(),
Option: vlc.ToGrowOption(),
Count: vl.GetLastGrowCount(),
}
} else {
for _, dc := range dcs {
if vl.ShouldGrowVolumesByDataNode("DataCenter", dc) {
vl.AddGrowRequest()
volumeGrowOption := vlc.ToGrowOption()
volumeGrowOption.DataCenter = dc
ms.volumeGrowthRequestChan <- &topology.VolumeGrowRequest{
Option: volumeGrowOption,
Count: vl.GetLastGrowCount(),
Force: true,
}
}
}
}
}
time.Sleep(14*time.Minute + time.Duration(120*rand.Float32())*time.Second)
}
}()
go func() {
@ -81,19 +99,20 @@ func (ms *MasterServer) ProcessGrowRequest() {
})
// not atomic but it's okay
if !found && vl.ShouldGrowVolumes(option) {
filter.Store(req, nil)
// we have lock called inside vg
go func(req *topology.VolumeGrowRequest, vl *topology.VolumeLayout) {
ms.DoAutomaticVolumeGrow(req)
vl.DoneGrowRequest()
filter.Delete(req)
}(req, vl)
} else {
if found || (!req.Force && !vl.ShouldGrowVolumes(req.Option.Collection)) {
glog.V(4).Infoln("discard volume grow request")
time.Sleep(time.Millisecond * 211)
vl.DoneGrowRequest()
continue
}
filter.Store(req, nil)
// we have lock called inside vg
go func(req *topology.VolumeGrowRequest, vl *topology.VolumeLayout) {
ms.DoAutomaticVolumeGrow(req)
vl.DoneGrowRequest()
filter.Delete(req)
}(req, vl)
}
}()
}

View File

@ -70,13 +70,21 @@ var (
Help: "replica placement mismatch",
}, []string{"collection", "id"})
MasterVolumeLayout = prometheus.NewGaugeVec(
MasterVolumeLayoutWritable = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: "master",
Name: "volume_layout_total",
Help: "Number of volumes in volume layouts",
}, []string{"collection", "dataCenter", "type"})
Name: "volume_layout_writable",
Help: "Number of writable volumes in volume layouts",
}, []string{"collection", "disk", "rp", "ttl"})
MasterVolumeLayoutCrowded = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: "master",
Name: "volume_layout_crowded",
Help: "Number of crowded volumes in volume layouts",
}, []string{"collection", "disk", "rp", "ttl"})
MasterPickForWriteErrorCounter = prometheus.NewCounter(
prometheus.CounterOpts{
@ -281,7 +289,8 @@ func init() {
Gather.MustRegister(MasterReceivedHeartbeatCounter)
Gather.MustRegister(MasterLeaderChangeCounter)
Gather.MustRegister(MasterReplicaPlacementMismatch)
Gather.MustRegister(MasterVolumeLayout)
Gather.MustRegister(MasterVolumeLayoutWritable)
Gather.MustRegister(MasterVolumeLayoutCrowded)
Gather.MustRegister(FilerRequestCounter)
Gather.MustRegister(FilerHandlerCounter)

View File

@ -329,7 +329,7 @@ func (t *Topology) UnRegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) {
}
func (t *Topology) DataCenterExists(dcName string) bool {
return dcName == "" || t.GetOrCreateDataCenter(dcName) != nil
return dcName == "" || t.GetDataCenter(dcName) != nil
}
func (t *Topology) GetDataCenter(dcName string) (dc *DataCenter) {
@ -358,6 +358,15 @@ func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter {
return dc
}
func (t *Topology) ListDataCenters() (dcs []string) {
t.RLock()
defer t.RUnlock()
for _, c := range t.children {
dcs = append(dcs, string(c.(*DataCenter).Id()))
}
return dcs
}
func (t *Topology) SyncDataNodeRegistration(volumes []*master_pb.VolumeInformationMessage, dn *DataNode) (newVolumes, deletedVolumes []storage.VolumeInfo) {
// convert into in memory struct storage.VolumeInfo
var volumeInfos []storage.VolumeInfo

View File

@ -13,6 +13,11 @@ type TopologyInfo struct {
Layouts []VolumeLayoutInfo `json:"Layouts"`
}
type VolumeLayoutCollection struct {
Collection string
VolumeLayout *VolumeLayout
}
func (t *Topology) ToInfo() (info TopologyInfo) {
info.Max = t.diskUsages.GetMaxVolumeCount()
info.Free = t.diskUsages.FreeSpace()
@ -42,10 +47,12 @@ func (t *Topology) ToInfo() (info TopologyInfo) {
return
}
func (t *Topology) ListVolumeLayouts() (volumeLayouts []*VolumeLayout) {
func (t *Topology) ListVolumeLayoutCollections() (volumeLayouts []*VolumeLayoutCollection) {
for _, col := range t.collectionMap.Items() {
for _, volumeLayout := range col.(*Collection).storageType2VolumeLayout.Items() {
volumeLayouts = append(volumeLayouts, volumeLayout.(*VolumeLayout))
volumeLayouts = append(volumeLayouts,
&VolumeLayoutCollection{col.(*Collection).Name, volumeLayout.(*VolumeLayout)},
)
}
}
return volumeLayouts

View File

@ -28,6 +28,7 @@ This package is created to resolve these replica placement issues:
type VolumeGrowRequest struct {
Option *VolumeGrowOption
Count uint32
Force bool
}
type volumeGrowthStrategy struct {

View File

@ -3,7 +3,7 @@ package topology
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/stats"
"math/rand/v2"
"math/rand"
"sync"
"sync/atomic"
"time"
@ -218,17 +218,21 @@ func (vl *VolumeLayout) EnsureCorrectWritables(v *storage.VolumeInfo) {
}
func (vl *VolumeLayout) ensureCorrectWritables(vid needle.VolumeId) {
if vl.enoughCopies(vid) && vl.isAllWritable(vid) {
if !vl.oversizedVolumes.IsTrue(vid) {
vl.setVolumeWritable(vid)
}
isEnoughCopies := vl.enoughCopies(vid)
isAllWritable := vl.isAllWritable(vid)
isOversizedVolume := vl.oversizedVolumes.IsTrue(vid)
if isEnoughCopies && isAllWritable && !isOversizedVolume {
vl.setVolumeWritable(vid)
} else {
if !vl.enoughCopies(vid) {
if !isEnoughCopies {
glog.V(0).Infof("volume %d does not have enough copies", vid)
}
if !vl.isAllWritable(vid) {
if !isAllWritable {
glog.V(0).Infof("volume %d are not all writable", vid)
}
if isOversizedVolume {
glog.V(1).Infof("volume %d are oversized", vid)
}
glog.V(0).Infof("volume %d remove from writable", vid)
vl.removeFromWritable(vid)
}
@ -254,6 +258,10 @@ func (vl *VolumeLayout) isOversized(v *storage.VolumeInfo) bool {
return uint64(v.Size) >= vl.volumeSizeLimit
}
func (vl *VolumeLayout) isCrowdedVolume(v *storage.VolumeInfo) bool {
return float64(v.Size) > float64(vl.volumeSizeLimit)*VolumeGrowStrategy.Threshold
}
func (vl *VolumeLayout) isWritable(v *storage.VolumeInfo) bool {
return !vl.isOversized(v) &&
v.Version == needle.CurrentVersion &&
@ -296,7 +304,7 @@ func (vl *VolumeLayout) PickForWrite(count uint64, option *VolumeGrowOption) (vi
return 0, 0, nil, true, fmt.Errorf("%s in volume layout", noWritableVolumes)
}
if option.DataCenter == "" && option.Rack == "" && option.DataNode == "" {
vid := vl.writables[rand.IntN(lenWriters)]
vid := vl.writables[rand.Intn(lenWriters)]
locationList = vl.vid2location[vid]
if locationList == nil || len(locationList.list) == 0 {
return 0, 0, nil, false, fmt.Errorf("Strangely vid %s is on no machine!", vid.String())
@ -351,40 +359,45 @@ func (vl *VolumeLayout) GetLastGrowCount() uint32 {
return vl.lastGrowCount.Load()
}
func (vl *VolumeLayout) ShouldGrowVolumes(option *VolumeGrowOption) bool {
total, active, crowded := vl.GetActiveVolumeCount(option)
stats.MasterVolumeLayout.WithLabelValues(option.Collection, option.DataCenter, "total").Set(float64(total))
stats.MasterVolumeLayout.WithLabelValues(option.Collection, option.DataCenter, "active").Set(float64(active))
stats.MasterVolumeLayout.WithLabelValues(option.Collection, option.DataCenter, "crowded").Set(float64(crowded))
//glog.V(0).Infof("active volume: %d, high usage volume: %d\n", active, high)
return active <= crowded
func (vl *VolumeLayout) ShouldGrowVolumes(collection string) bool {
writable, crowded := vl.GetWritableVolumeCount()
stats.MasterVolumeLayoutWritable.WithLabelValues(collection, vl.diskType.String(), vl.rp.String(), vl.ttl.String()).Set(float64(writable))
stats.MasterVolumeLayoutCrowded.WithLabelValues(collection, vl.diskType.String(), vl.rp.String(), vl.ttl.String()).Set(float64(crowded))
return writable <= crowded
}
func (vl *VolumeLayout) GetActiveVolumeCount(option *VolumeGrowOption) (total, active, crowded int) {
func (vl *VolumeLayout) ShouldGrowVolumesByDataNode(nodeType string, dataNode string) bool {
vl.accessLock.RLock()
defer vl.accessLock.RUnlock()
if option.DataCenter == "" {
return len(vl.writables), len(vl.writables), len(vl.crowded)
}
total = len(vl.writables)
for _, v := range vl.writables {
writables := make([]needle.VolumeId, len(vl.writables))
copy(writables, vl.writables)
vl.accessLock.RUnlock()
dataNodeId := NodeId(dataNode)
for _, v := range writables {
for _, dn := range vl.vid2location[v].list {
if dn.GetDataCenter().Id() == NodeId(option.DataCenter) {
if option.Rack != "" && dn.GetRack().Id() != NodeId(option.Rack) {
continue
}
if option.DataNode != "" && dn.Id() != NodeId(option.DataNode) {
continue
}
active++
info, _ := dn.GetVolumesById(v)
if float64(info.Size) > float64(vl.volumeSizeLimit)*VolumeGrowStrategy.Threshold {
crowded++
dataNodeFound := false
switch nodeType {
case "DataCenter":
dataNodeFound = dn.GetDataCenter().Id() == dataNodeId
case "Rack":
dataNodeFound = dn.GetRack().Id() == dataNodeId
case "DataNode":
dataNodeFound = dn.Id() == dataNodeId
}
if dataNodeFound {
if info, err := dn.GetVolumesById(v); err == nil && !vl.isCrowdedVolume(&info) {
return false
}
}
}
}
return
return true
}
func (vl *VolumeLayout) GetWritableVolumeCount() (active, crowded int) {
vl.accessLock.RLock()
defer vl.accessLock.RUnlock()
return len(vl.writables), len(vl.crowded)
}
func (vl *VolumeLayout) removeFromWritable(vid needle.VolumeId) bool {
@ -531,12 +544,13 @@ func (vl *VolumeLayout) ToInfo() (info VolumeLayoutInfo) {
return
}
func (vl *VolumeLayout) ToGrowOption() (option *VolumeGrowOption) {
option = &VolumeGrowOption{}
option.ReplicaPlacement = vl.rp
option.Ttl = vl.ttl
option.DiskType = vl.diskType
return
func (vlc *VolumeLayoutCollection) ToGrowOption() (option *VolumeGrowOption) {
return &VolumeGrowOption{
Collection: vlc.Collection,
ReplicaPlacement: vlc.VolumeLayout.rp,
Ttl: vlc.VolumeLayout.ttl,
DiskType: vlc.VolumeLayout.diskType,
}
}
func (vl *VolumeLayout) Stats() *VolumeLayoutStats {