From 91e4eca1e98cb5195346d90a2cc1fb9c92557213 Mon Sep 17 00:00:00 2001 From: James Hartig Date: Mon, 21 Sep 2020 22:41:38 -0400 Subject: [PATCH] Fix deadlock with KeepConnected and SendHeartbeat There's the potential where we're writing to a clientConn and it goes away and we're stuck keeping a read lock on clientChansLock. This causes KeepConnected to not be able to remove the client since it requires a write lock on clientChansLock. This ends up backing up SendHeartbeat because it can't get a read lock. --- weed/server/master_grpc_server.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/weed/server/master_grpc_server.go b/weed/server/master_grpc_server.go index f3a2ee013..692909a29 100644 --- a/weed/server/master_grpc_server.go +++ b/weed/server/master_grpc_server.go @@ -187,7 +187,8 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ peerAddress := findClientAddress(stream.Context(), req.GrpcPort) - stopChan := make(chan bool) + // buffer by 1 so we don't end up getting stuck writing to stopChan forever + stopChan := make(chan bool, 1) clientName, messageChan := ms.addClient(req.Name, peerAddress) @@ -247,7 +248,12 @@ func (ms *MasterServer) addClient(clientType string, clientAddress string) (clie clientName = clientType + "@" + clientAddress glog.V(0).Infof("+ client %v", clientName) - messageChan = make(chan *master_pb.VolumeLocation) + // we buffer this because otherwise we end up in a potential deadlock where + // the KeepConnected loop is no longer listening on this channel but we're + // trying to send to it in SendHeartbeat and so we can't lock the + // clientChansLock to remove the channel and we're stuck writing to it + // 100 is probably overkill + messageChan = make(chan *master_pb.VolumeLocation, 100) ms.clientChansLock.Lock() ms.clientChans[clientName] = messageChan