mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2024-11-27 12:49:41 +08:00
fix deadlock hang when broadcast to clients (#6184)
fix deadlock when broadcast to clients when master thransfer leader, the old master will disconnect with all filers and volumeServers, if the cluster is a big , the broadcast messages may be more big than the max of the channel len 100, then if the KeepConnect was not listen on the channel in disconnect, it will deadlock. and the whole cluster will not serve!
This commit is contained in:
parent
0f2c3648dc
commit
a7973ed7d1
@ -4,12 +4,13 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/google/uuid"
|
||||
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
||||
"net"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/stats"
|
||||
"github.com/seaweedfs/seaweedfs/weed/storage/backend"
|
||||
@ -89,7 +90,7 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
|
||||
glog.V(0).Infof("unregister disconnected volume server %s:%d", dn.Ip, dn.Port)
|
||||
ms.UnRegisterUuids(dn.Ip, dn.Port)
|
||||
|
||||
if len(message.DeletedVids) > 0 || len(message.DeletedEcVids) > 0 {
|
||||
if ms.Topo.IsLeader() && (len(message.DeletedVids) > 0 || len(message.DeletedEcVids) > 0) {
|
||||
ms.broadcastToClients(&master_pb.KeepConnectedResponse{VolumeLocation: message})
|
||||
}
|
||||
}
|
||||
@ -338,8 +339,14 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ
|
||||
|
||||
func (ms *MasterServer) broadcastToClients(message *master_pb.KeepConnectedResponse) {
|
||||
ms.clientChansLock.RLock()
|
||||
for _, ch := range ms.clientChans {
|
||||
ch <- message
|
||||
for client, ch := range ms.clientChans {
|
||||
select {
|
||||
case ch <- message:
|
||||
glog.V(4).Infof("send message to %s", client)
|
||||
default:
|
||||
stats.MasterBroadcastToFullErrorCounter.Inc()
|
||||
glog.Errorf("broadcastToClients %s message full", client)
|
||||
}
|
||||
}
|
||||
ms.clientChansLock.RUnlock()
|
||||
}
|
||||
|
@ -94,6 +94,14 @@ var (
|
||||
Help: "Counter of master pick for write error",
|
||||
})
|
||||
|
||||
MasterBroadcastToFullErrorCounter = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: "master",
|
||||
Name: "broadcast_to_full",
|
||||
Help: "Counter of master broadcast send to full message channel err",
|
||||
})
|
||||
|
||||
MasterLeaderChangeCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: Namespace,
|
||||
@ -314,6 +322,7 @@ func init() {
|
||||
Gather.MustRegister(MasterReplicaPlacementMismatch)
|
||||
Gather.MustRegister(MasterVolumeLayoutWritable)
|
||||
Gather.MustRegister(MasterVolumeLayoutCrowded)
|
||||
Gather.MustRegister(MasterBroadcastToFullErrorCounter)
|
||||
|
||||
Gather.MustRegister(FilerRequestCounter)
|
||||
Gather.MustRegister(FilerHandlerCounter)
|
||||
|
Loading…
Reference in New Issue
Block a user