use safe onPeerUpdateDoneCns

This commit is contained in:
Konstantin Lebedev 2022-07-21 15:51:14 +05:00
parent 7875470e74
commit 93ca87b7cb

View file

@ -11,7 +11,6 @@ import (
"regexp" "regexp"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/chrislusf/seaweedfs/weed/cluster" "github.com/chrislusf/seaweedfs/weed/cluster"
@ -33,9 +32,10 @@ import (
) )
const ( const (
SequencerType = "master.sequencer.type" SequencerType = "master.sequencer.type"
SequencerSnowflakeId = "master.sequencer.sequencer_snowflake_id" SequencerSnowflakeId = "master.sequencer.sequencer_snowflake_id"
RaftServerRemovalTime = 72 * time.Minute RaftServerRemovalTime = 72 * time.Minute
ResetRaftServerRemovalTimeMsg = "ResetRaftServerRemovalTime"
) )
type MasterOption struct { type MasterOption struct {
@ -66,8 +66,8 @@ type MasterServer struct {
boundedLeaderChan chan int boundedLeaderChan chan int
onPeerUpdateDoneCn chan string onPeerUpdateDoneCns map[string]*chan string
onPeerUpdateGoroutineCount int32 onPeerUpdateLock sync.RWMutex
// notifying clients // notifying clients
clientChansLock sync.RWMutex clientChansLock sync.RWMutex
@ -119,9 +119,9 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se
Cluster: cluster.NewCluster(), Cluster: cluster.NewCluster(),
} }
ms.boundedLeaderChan = make(chan int, 16) ms.boundedLeaderChan = make(chan int, 16)
ms.onPeerUpdateDoneCn = make(chan string)
ms.MasterClient.OnPeerUpdate = ms.OnPeerUpdate ms.MasterClient.OnPeerUpdate = ms.OnPeerUpdate
ms.onPeerUpdateDoneCns = make(map[string]*chan string)
seq := ms.createSequencer(option) seq := ms.createSequencer(option)
if nil == seq { if nil == seq {
@ -367,16 +367,31 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF
hashicorpRaft.ServerAddress(peerAddress.ToGrpcAddress()), 0, 0) hashicorpRaft.ServerAddress(peerAddress.ToGrpcAddress()), 0, 0)
} }
} }
if atomic.LoadInt32(&ms.onPeerUpdateGoroutineCount) > 0 { ms.onPeerUpdateLock.RLock()
ms.onPeerUpdateDoneCn <- peerName if len(ms.onPeerUpdateDoneCns) > 0 {
for _, onPeerUpdateDoneCn := range ms.onPeerUpdateDoneCns {
*onPeerUpdateDoneCn <- peerName
}
} }
ms.onPeerUpdateLock.RUnlock()
} else if isLeader { } else if isLeader {
if onPeerUpdateDoneCnPrev, ok := ms.onPeerUpdateDoneCns[peerName]; ok {
*onPeerUpdateDoneCnPrev <- ResetRaftServerRemovalTimeMsg
return
}
onPeerUpdateDoneCn := make(chan string)
ms.onPeerUpdateLock.Lock()
ms.onPeerUpdateDoneCns[peerName] = &onPeerUpdateDoneCn
ms.onPeerUpdateLock.Unlock()
go func(peerName string) { go func(peerName string) {
raftServerRemovalTimeAfter := time.After(RaftServerRemovalTime) raftServerRemovalTimeAfter := time.After(RaftServerRemovalTime)
raftServerPingTicker := time.NewTicker(5 * time.Minute) raftServerPingTicker := time.NewTicker(5 * time.Minute)
atomic.AddInt32(&ms.onPeerUpdateGoroutineCount, 1)
defer func() { defer func() {
atomic.AddInt32(&ms.onPeerUpdateGoroutineCount, -1) ms.onPeerUpdateLock.Lock()
delete(ms.onPeerUpdateDoneCns, peerName)
ms.onPeerUpdateLock.Unlock()
close(onPeerUpdateDoneCn)
}() }()
for { for {
select { select {
@ -408,11 +423,16 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF
} }
glog.V(0).Infof("old raft server %s removed", peerName) glog.V(0).Infof("old raft server %s removed", peerName)
return return
case peerDone := <-ms.onPeerUpdateDoneCn: case peerDone := <-onPeerUpdateDoneCn:
if peerName == peerDone { if peerName == peerDone {
glog.V(0).Infof("raft server %s remove canceled on onPeerUpdate", peerName) glog.V(0).Infof("raft server %s remove canceled on onPeerUpdate", peerName)
return return
} }
if peerDone == ResetRaftServerRemovalTimeMsg {
raftServerRemovalTimeAfter = time.After(RaftServerRemovalTime)
glog.V(0).Infof("rest wait %v for raft server %s activity, otherwise delete",
RaftServerRemovalTime, peerName)
}
} }
} }
}(peerName) }(peerName)