2012-08-24 11:56:09 +08:00
|
|
|
package topology
|
|
|
|
|
2012-08-24 13:56:14 +08:00
|
|
|
import (
|
2014-03-03 14:16:54 +08:00
|
|
|
"errors"
|
2023-03-09 15:58:03 +08:00
|
|
|
"math/rand"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"sync/atomic"
|
|
|
|
|
2022-07-29 15:17:28 +08:00
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
2012-08-24 13:56:14 +08:00
|
|
|
)
|
2012-08-24 11:56:09 +08:00
|
|
|
|
2012-08-28 16:04:39 +08:00
|
|
|
type NodeId string
|
2012-09-03 05:33:48 +08:00
|
|
|
type Node interface {
|
|
|
|
Id() NodeId
|
|
|
|
String() string
|
2020-12-18 05:25:05 +08:00
|
|
|
AvailableSpaceFor(option *VolumeGrowOption) int64
|
|
|
|
ReserveOneVolume(r int64, option *VolumeGrowOption) (*DataNode, error)
|
2021-02-16 18:47:02 +08:00
|
|
|
UpAdjustDiskUsageDelta(deltaDiskUsages *DiskUsages)
|
2019-04-19 12:43:36 +08:00
|
|
|
UpAdjustMaxVolumeId(vid needle.VolumeId)
|
2021-02-16 18:47:02 +08:00
|
|
|
GetDiskUsages() *DiskUsages
|
2012-10-10 11:53:31 +08:00
|
|
|
|
2019-04-19 12:43:36 +08:00
|
|
|
GetMaxVolumeId() needle.VolumeId
|
2012-09-19 16:45:30 +08:00
|
|
|
SetParent(Node)
|
2012-09-03 05:33:48 +08:00
|
|
|
LinkChildNode(node Node)
|
|
|
|
UnlinkChildNode(nodeId NodeId)
|
2021-05-06 18:46:14 +08:00
|
|
|
CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64, growThreshold float64)
|
2012-09-03 16:50:04 +08:00
|
|
|
|
2012-09-09 07:25:44 +08:00
|
|
|
IsDataNode() bool
|
2013-06-20 09:10:38 +08:00
|
|
|
IsRack() bool
|
|
|
|
IsDataCenter() bool
|
2016-05-20 14:57:31 +08:00
|
|
|
Children() []Node
|
2012-09-10 15:18:07 +08:00
|
|
|
Parent() Node
|
2012-10-10 11:53:31 +08:00
|
|
|
|
|
|
|
GetValue() interface{} //get reference to the topology,dc,rack,datanode
|
2012-08-24 11:56:09 +08:00
|
|
|
}
|
2012-09-03 05:33:48 +08:00
|
|
|
type NodeImpl struct {
|
2021-02-16 18:47:02 +08:00
|
|
|
diskUsages *DiskUsages
|
|
|
|
id NodeId
|
|
|
|
parent Node
|
2021-02-18 12:57:08 +08:00
|
|
|
sync.RWMutex // lock children
|
2021-02-16 18:47:02 +08:00
|
|
|
children map[NodeId]Node
|
|
|
|
maxVolumeId needle.VolumeId
|
2012-08-29 15:58:03 +08:00
|
|
|
|
2012-09-03 05:33:48 +08:00
|
|
|
//for rack, data center, topology
|
|
|
|
nodeType string
|
2012-10-10 11:53:31 +08:00
|
|
|
value interface{}
|
2012-09-01 17:20:59 +08:00
|
|
|
}
|
2012-08-31 16:35:11 +08:00
|
|
|
|
2021-02-16 18:47:02 +08:00
|
|
|
func (n *NodeImpl) GetDiskUsages() *DiskUsages {
|
|
|
|
return n.diskUsages
|
|
|
|
}
|
|
|
|
|
2014-03-03 14:16:54 +08:00
|
|
|
// the first node must satisfy filterFirstNodeFn(), the rest nodes must have one free slot
|
2020-12-18 05:25:05 +08:00
|
|
|
func (n *NodeImpl) PickNodesByWeight(numberOfNodes int, option *VolumeGrowOption, filterFirstNodeFn func(dn Node) error) (firstNode Node, restNodes []Node, err error) {
|
2020-03-05 13:41:52 +08:00
|
|
|
var totalWeights int64
|
2015-03-10 15:20:31 +08:00
|
|
|
var errs []string
|
2016-05-20 14:57:31 +08:00
|
|
|
n.RLock()
|
2020-03-05 13:41:52 +08:00
|
|
|
candidates := make([]Node, 0, len(n.children))
|
|
|
|
candidatesWeights := make([]int64, 0, len(n.children))
|
|
|
|
//pick nodes which has enough free volumes as candidates, and use free volumes number as node weight.
|
2014-03-03 14:16:54 +08:00
|
|
|
for _, node := range n.children {
|
2020-12-18 05:25:05 +08:00
|
|
|
if node.AvailableSpaceFor(option) <= 0 {
|
2020-03-05 13:41:52 +08:00
|
|
|
continue
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
2020-12-18 05:25:05 +08:00
|
|
|
totalWeights += node.AvailableSpaceFor(option)
|
2020-03-05 13:41:52 +08:00
|
|
|
candidates = append(candidates, node)
|
2020-12-18 05:25:05 +08:00
|
|
|
candidatesWeights = append(candidatesWeights, node.AvailableSpaceFor(option))
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
2016-05-20 14:57:31 +08:00
|
|
|
n.RUnlock()
|
2020-03-05 13:41:52 +08:00
|
|
|
if len(candidates) < numberOfNodes {
|
2020-03-23 09:32:49 +08:00
|
|
|
glog.V(0).Infoln(n.Id(), "failed to pick", numberOfNodes, "from ", len(candidates), "node candidates")
|
2023-03-09 15:58:03 +08:00
|
|
|
return nil, nil, errors.New("Not enough data nodes found!")
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
|
|
|
|
2020-03-05 13:41:52 +08:00
|
|
|
//pick nodes randomly by weights, the node picked earlier has higher final weights
|
|
|
|
sortedCandidates := make([]Node, 0, len(candidates))
|
2020-03-07 22:12:57 +08:00
|
|
|
for i := 0; i < len(candidates); i++ {
|
2020-03-05 13:41:52 +08:00
|
|
|
weightsInterval := rand.Int63n(totalWeights)
|
|
|
|
lastWeights := int64(0)
|
|
|
|
for k, weights := range candidatesWeights {
|
2020-03-07 22:12:57 +08:00
|
|
|
if (weightsInterval >= lastWeights) && (weightsInterval < lastWeights+weights) {
|
2020-03-05 13:41:52 +08:00
|
|
|
sortedCandidates = append(sortedCandidates, candidates[k])
|
|
|
|
candidatesWeights[k] = 0
|
|
|
|
totalWeights -= weights
|
|
|
|
break
|
|
|
|
}
|
|
|
|
lastWeights += weights
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
|
|
|
}
|
2020-03-05 13:41:52 +08:00
|
|
|
|
|
|
|
restNodes = make([]Node, 0, numberOfNodes-1)
|
|
|
|
ret := false
|
|
|
|
n.RLock()
|
|
|
|
for k, node := range sortedCandidates {
|
|
|
|
if err := filterFirstNodeFn(node); err == nil {
|
|
|
|
firstNode = node
|
|
|
|
if k >= numberOfNodes-1 {
|
|
|
|
restNodes = sortedCandidates[:numberOfNodes-1]
|
|
|
|
} else {
|
|
|
|
restNodes = append(restNodes, sortedCandidates[:k]...)
|
|
|
|
restNodes = append(restNodes, sortedCandidates[k+1:numberOfNodes]...)
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
2020-03-05 13:41:52 +08:00
|
|
|
ret = true
|
|
|
|
break
|
2014-03-03 14:16:54 +08:00
|
|
|
} else {
|
2020-03-05 13:41:52 +08:00
|
|
|
errs = append(errs, string(node.Id())+":"+err.Error())
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
|
|
|
}
|
2020-03-05 13:41:52 +08:00
|
|
|
n.RUnlock()
|
2014-03-03 14:16:54 +08:00
|
|
|
if !ret {
|
2020-03-05 13:41:52 +08:00
|
|
|
return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n"))
|
2014-03-03 14:16:54 +08:00
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2012-09-09 07:25:44 +08:00
|
|
|
func (n *NodeImpl) IsDataNode() bool {
|
|
|
|
return n.nodeType == "DataNode"
|
2012-09-03 05:33:48 +08:00
|
|
|
}
|
|
|
|
func (n *NodeImpl) IsRack() bool {
|
|
|
|
return n.nodeType == "Rack"
|
|
|
|
}
|
|
|
|
func (n *NodeImpl) IsDataCenter() bool {
|
|
|
|
return n.nodeType == "DataCenter"
|
|
|
|
}
|
|
|
|
func (n *NodeImpl) String() string {
|
|
|
|
if n.parent != nil {
|
|
|
|
return n.parent.String() + ":" + string(n.id)
|
|
|
|
}
|
|
|
|
return string(n.id)
|
|
|
|
}
|
|
|
|
func (n *NodeImpl) Id() NodeId {
|
|
|
|
return n.id
|
|
|
|
}
|
2021-02-16 18:47:02 +08:00
|
|
|
func (n *NodeImpl) getOrCreateDisk(diskType types.DiskType) *DiskUsageCounts {
|
|
|
|
return n.diskUsages.getOrCreateDisk(diskType)
|
2020-12-14 11:44:57 +08:00
|
|
|
}
|
2021-02-16 18:47:02 +08:00
|
|
|
func (n *NodeImpl) AvailableSpaceFor(option *VolumeGrowOption) int64 {
|
|
|
|
t := n.getOrCreateDisk(option.DiskType)
|
2022-09-08 00:50:17 +08:00
|
|
|
freeVolumeSlotCount := atomic.LoadInt64(&t.maxVolumeCount) + atomic.LoadInt64(&t.remoteVolumeCount) - atomic.LoadInt64(&t.volumeCount)
|
|
|
|
ecShardCount := atomic.LoadInt64(&t.ecShardCount)
|
|
|
|
if ecShardCount > 0 {
|
|
|
|
freeVolumeSlotCount = freeVolumeSlotCount - ecShardCount/erasure_coding.DataShardsCount - 1
|
2019-06-05 14:41:56 +08:00
|
|
|
}
|
2019-12-04 13:36:42 +08:00
|
|
|
return freeVolumeSlotCount
|
2012-09-03 05:33:48 +08:00
|
|
|
}
|
2012-09-19 16:45:30 +08:00
|
|
|
func (n *NodeImpl) SetParent(node Node) {
|
2012-09-03 05:33:48 +08:00
|
|
|
n.parent = node
|
|
|
|
}
|
2016-05-20 14:57:31 +08:00
|
|
|
func (n *NodeImpl) Children() (ret []Node) {
|
|
|
|
n.RLock()
|
|
|
|
defer n.RUnlock()
|
|
|
|
for _, c := range n.children {
|
|
|
|
ret = append(ret, c)
|
|
|
|
}
|
|
|
|
return ret
|
2012-09-03 16:50:04 +08:00
|
|
|
}
|
|
|
|
func (n *NodeImpl) Parent() Node {
|
2012-09-10 15:18:07 +08:00
|
|
|
return n.parent
|
2012-09-03 16:50:04 +08:00
|
|
|
}
|
2012-10-10 11:53:31 +08:00
|
|
|
func (n *NodeImpl) GetValue() interface{} {
|
|
|
|
return n.value
|
2012-09-19 16:45:30 +08:00
|
|
|
}
|
2020-12-18 05:25:05 +08:00
|
|
|
func (n *NodeImpl) ReserveOneVolume(r int64, option *VolumeGrowOption) (assignedNode *DataNode, err error) {
|
2016-05-20 14:57:31 +08:00
|
|
|
n.RLock()
|
|
|
|
defer n.RUnlock()
|
2012-08-29 15:58:03 +08:00
|
|
|
for _, node := range n.children {
|
2020-12-18 05:25:05 +08:00
|
|
|
freeSpace := node.AvailableSpaceFor(option)
|
2013-06-20 09:10:38 +08:00
|
|
|
// fmt.Println("r =", r, ", node =", node, ", freeSpace =", freeSpace)
|
2012-09-03 05:33:48 +08:00
|
|
|
if freeSpace <= 0 {
|
|
|
|
continue
|
|
|
|
}
|
2012-09-01 17:20:59 +08:00
|
|
|
if r >= freeSpace {
|
2012-08-29 15:58:03 +08:00
|
|
|
r -= freeSpace
|
|
|
|
} else {
|
2020-12-18 05:25:05 +08:00
|
|
|
if node.IsDataNode() && node.AvailableSpaceFor(option) > 0 {
|
2013-06-20 09:10:38 +08:00
|
|
|
// fmt.Println("vid =", vid, " assigned to node =", node, ", freeSpace =", node.FreeSpace())
|
2014-03-03 14:16:54 +08:00
|
|
|
return node.(*DataNode), nil
|
2012-09-03 16:50:04 +08:00
|
|
|
}
|
2020-12-18 05:25:05 +08:00
|
|
|
assignedNode, err = node.ReserveOneVolume(r, option)
|
2018-01-17 19:53:41 +08:00
|
|
|
if err == nil {
|
2014-03-03 14:16:54 +08:00
|
|
|
return
|
2012-09-03 05:33:48 +08:00
|
|
|
}
|
2012-08-29 15:58:03 +08:00
|
|
|
}
|
|
|
|
}
|
2017-08-11 01:26:19 +08:00
|
|
|
return nil, errors.New("No free volume slot found!")
|
2012-08-29 15:58:03 +08:00
|
|
|
}
|
|
|
|
|
2021-02-16 18:47:02 +08:00
|
|
|
func (n *NodeImpl) UpAdjustDiskUsageDelta(deltaDiskUsages *DiskUsages) { //can be negative
|
|
|
|
for diskType, diskUsage := range deltaDiskUsages.usages {
|
|
|
|
existingDisk := n.getOrCreateDisk(diskType)
|
|
|
|
existingDisk.addDiskUsageCounts(diskUsage)
|
2020-03-23 09:32:49 +08:00
|
|
|
}
|
2012-08-29 15:58:03 +08:00
|
|
|
if n.parent != nil {
|
2021-02-16 18:47:02 +08:00
|
|
|
n.parent.UpAdjustDiskUsageDelta(deltaDiskUsages)
|
2012-08-29 15:58:03 +08:00
|
|
|
}
|
2012-08-28 04:52:02 +08:00
|
|
|
}
|
2019-04-19 12:43:36 +08:00
|
|
|
func (n *NodeImpl) UpAdjustMaxVolumeId(vid needle.VolumeId) { //can be negative
|
2012-09-03 05:33:48 +08:00
|
|
|
if n.maxVolumeId < vid {
|
|
|
|
n.maxVolumeId = vid
|
|
|
|
if n.parent != nil {
|
|
|
|
n.parent.UpAdjustMaxVolumeId(vid)
|
|
|
|
}
|
|
|
|
}
|
2012-09-01 17:20:59 +08:00
|
|
|
}
|
2019-04-19 12:43:36 +08:00
|
|
|
func (n *NodeImpl) GetMaxVolumeId() needle.VolumeId {
|
2012-08-31 16:35:11 +08:00
|
|
|
return n.maxVolumeId
|
2012-08-29 16:37:40 +08:00
|
|
|
}
|
|
|
|
|
2012-09-03 05:33:48 +08:00
|
|
|
func (n *NodeImpl) LinkChildNode(node Node) {
|
2016-05-20 14:57:31 +08:00
|
|
|
n.Lock()
|
|
|
|
defer n.Unlock()
|
2021-02-17 02:48:16 +08:00
|
|
|
n.doLinkChildNode(node)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *NodeImpl) doLinkChildNode(node Node) {
|
2012-09-03 05:33:48 +08:00
|
|
|
if n.children[node.Id()] == nil {
|
|
|
|
n.children[node.Id()] = node
|
2021-02-16 18:47:02 +08:00
|
|
|
n.UpAdjustDiskUsageDelta(node.GetDiskUsages())
|
2012-09-17 08:31:15 +08:00
|
|
|
n.UpAdjustMaxVolumeId(node.GetMaxVolumeId())
|
2012-09-19 16:45:30 +08:00
|
|
|
node.SetParent(n)
|
2013-08-09 14:57:22 +08:00
|
|
|
glog.V(0).Infoln(n, "adds child", node.Id())
|
2012-08-29 15:58:03 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-03 05:33:48 +08:00
|
|
|
func (n *NodeImpl) UnlinkChildNode(nodeId NodeId) {
|
2016-05-20 14:57:31 +08:00
|
|
|
n.Lock()
|
|
|
|
defer n.Unlock()
|
2012-09-01 17:20:59 +08:00
|
|
|
node := n.children[nodeId]
|
|
|
|
if node != nil {
|
2016-04-11 18:17:22 +08:00
|
|
|
node.SetParent(nil)
|
2012-09-03 05:33:48 +08:00
|
|
|
delete(n.children, node.Id())
|
2021-02-16 18:47:02 +08:00
|
|
|
n.UpAdjustDiskUsageDelta(node.GetDiskUsages().negative())
|
2017-01-10 17:30:00 +08:00
|
|
|
glog.V(0).Infoln(n, "removes", node.Id())
|
2012-08-29 15:58:03 +08:00
|
|
|
}
|
2012-08-28 04:52:02 +08:00
|
|
|
}
|
2012-09-10 15:18:07 +08:00
|
|
|
|
2021-05-06 18:46:14 +08:00
|
|
|
func (n *NodeImpl) CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64, growThreshold float64) {
|
2012-09-10 15:18:07 +08:00
|
|
|
if n.IsRack() {
|
|
|
|
for _, c := range n.Children() {
|
|
|
|
dn := c.(*DataNode) //can not cast n to DataNode
|
2023-05-22 13:18:50 +08:00
|
|
|
dn.RLock()
|
2016-05-20 14:32:56 +08:00
|
|
|
for _, v := range dn.GetVolumes() {
|
2021-05-06 18:46:14 +08:00
|
|
|
if v.Size >= volumeSizeLimit {
|
2012-12-04 13:27:57 +08:00
|
|
|
//fmt.Println("volume",v.Id,"size",v.Size,">",volumeSizeLimit)
|
2021-05-12 01:05:31 +08:00
|
|
|
n.GetTopology().chanFullVolumes <- v
|
2021-07-01 16:21:14 +08:00
|
|
|
} else if float64(v.Size) > float64(volumeSizeLimit)*growThreshold {
|
2021-05-12 01:05:31 +08:00
|
|
|
n.GetTopology().chanCrowdedVolumes <- v
|
2012-09-10 15:18:07 +08:00
|
|
|
}
|
2022-06-10 18:30:40 +08:00
|
|
|
copyCount := v.ReplicaPlacement.GetCopyCount()
|
|
|
|
if copyCount > 1 {
|
|
|
|
if copyCount > len(n.GetTopology().Lookup(v.Collection, v.Id)) {
|
|
|
|
stats.MasterReplicaPlacementMismatch.WithLabelValues(v.Collection, v.Id.String()).Set(1)
|
|
|
|
} else {
|
|
|
|
stats.MasterReplicaPlacementMismatch.WithLabelValues(v.Collection, v.Id.String()).Set(0)
|
|
|
|
}
|
|
|
|
}
|
2012-09-10 15:18:07 +08:00
|
|
|
}
|
2023-05-22 13:18:50 +08:00
|
|
|
dn.RUnlock()
|
2012-09-10 15:18:07 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for _, c := range n.Children() {
|
2021-05-06 18:46:14 +08:00
|
|
|
c.CollectDeadNodeAndFullVolumes(freshThreshHold, volumeSizeLimit, growThreshold)
|
2012-09-10 15:18:07 +08:00
|
|
|
}
|
|
|
|
}
|
2012-09-19 05:05:12 +08:00
|
|
|
}
|
2012-09-10 15:18:07 +08:00
|
|
|
|
2012-10-10 11:53:31 +08:00
|
|
|
func (n *NodeImpl) GetTopology() *Topology {
|
|
|
|
var p Node
|
|
|
|
p = n
|
|
|
|
for p.Parent() != nil {
|
|
|
|
p = p.Parent()
|
|
|
|
}
|
|
|
|
return p.GetValue().(*Topology)
|
2012-09-10 15:18:07 +08:00
|
|
|
}
|