From 91e4eca1e98cb5195346d90a2cc1fb9c92557213 Mon Sep 17 00:00:00 2001 From: James Hartig Date: Mon, 21 Sep 2020 22:41:38 -0400 Subject: [PATCH 1/2] Fix deadlock with KeepConnected and SendHeartbeat There's the potential where we're writing to a clientConn and it goes away and we're stuck keeping a read lock on clientChansLock. This causes KeepConnected to not be able to remove the client since it requires a write lock on clientChansLock. This ends up backing up SendHeartbeat because it can't get a read lock. --- weed/server/master_grpc_server.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/weed/server/master_grpc_server.go b/weed/server/master_grpc_server.go index f3a2ee013..692909a29 100644 --- a/weed/server/master_grpc_server.go +++ b/weed/server/master_grpc_server.go @@ -187,7 +187,8 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ peerAddress := findClientAddress(stream.Context(), req.GrpcPort) - stopChan := make(chan bool) + // buffer by 1 so we don't end up getting stuck writing to stopChan forever + stopChan := make(chan bool, 1) clientName, messageChan := ms.addClient(req.Name, peerAddress) @@ -247,7 +248,12 @@ func (ms *MasterServer) addClient(clientType string, clientAddress string) (clie clientName = clientType + "@" + clientAddress glog.V(0).Infof("+ client %v", clientName) - messageChan = make(chan *master_pb.VolumeLocation) + // we buffer this because otherwise we end up in a potential deadlock where + // the KeepConnected loop is no longer listening on this channel but we're + // trying to send to it in SendHeartbeat and so we can't lock the + // clientChansLock to remove the channel and we're stuck writing to it + // 100 is probably overkill + messageChan = make(chan *master_pb.VolumeLocation, 100) ms.clientChansLock.Lock() ms.clientChans[clientName] = messageChan From 658fc2e5b619c453ddecc805bc363a25f75ff22d Mon Sep 17 00:00:00 2001 From: James Hartig Date: Mon, 21 Sep 2020 22:43:10 -0400 Subject: [PATCH 2/2] Allow option to enable volume pprof on server --- weed/command/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weed/command/server.go b/weed/command/server.go index 6f40263bb..aee62290a 100644 --- a/weed/command/server.go +++ b/weed/command/server.go @@ -99,7 +99,7 @@ func init() { serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 1024, "limit file size to avoid out of memory") serverOptions.v.publicUrl = cmdServer.Flag.String("volume.publicUrl", "", "publicly accessible address") serverOptions.v.preStopSeconds = cmdServer.Flag.Int("volume.preStopSeconds", 10, "number of seconds between stop send heartbeats and stop volume server") - serverOptions.v.pprof = &False + serverOptions.v.pprof = cmdServer.Flag.Bool("volume.pprof", false, "enable pprof http handlers. precludes --memprofile and --cpuprofile") s3Options.port = cmdServer.Flag.Int("s3.port", 8333, "s3 server http listen port") s3Options.domainName = cmdServer.Flag.String("s3.domainName", "", "suffix of the host name, {bucket}.{domainName}")