From b9933d558973c41ff05f50a509533d1045bae44f Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev <9497591+kmlebedev@users.noreply.github.com> Date: Thu, 6 Oct 2022 21:30:30 +0500 Subject: [PATCH] master server graceful stop (#3797) --- .../compose/local-hashicorp-raft-compose.yml | 6 +-- weed/command/master.go | 8 ++++ weed/server/master_server.go | 37 +++++++++++++------ 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/docker/compose/local-hashicorp-raft-compose.yml b/docker/compose/local-hashicorp-raft-compose.yml index 14b5eb57a..0931b4bc3 100644 --- a/docker/compose/local-hashicorp-raft-compose.yml +++ b/docker/compose/local-hashicorp-raft-compose.yml @@ -6,7 +6,7 @@ services: ports: - 9333:9333 - 19333:19333 - command: "-v=4 master -volumeSizeLimitMB 100 -raftHashicorp -ip=master0 -port=9333 -peers=master1:9334,master2:9335 -mdir=/data" + command: "-v=4 master -volumeSizeLimitMB 100 -raftHashicorp -electionTimeout 1s -ip=master0 -port=9333 -peers=master1:9334,master2:9335 -mdir=/data" volumes: - ./master/0:/data environment: @@ -18,7 +18,7 @@ services: ports: - 9334:9334 - 19334:19334 - command: "-v=4 master -volumeSizeLimitMB 100 -raftHashicorp -ip=master1 -port=9334 -peers=master0:9333,master2:9335 -mdir=/data" + command: "-v=4 master -volumeSizeLimitMB 100 -raftHashicorp -electionTimeout 1s -ip=master1 -port=9334 -peers=master0:9333,master2:9335 -mdir=/data" volumes: - ./master/1:/data environment: @@ -30,7 +30,7 @@ services: ports: - 9335:9335 - 19335:19335 - command: "-v=4 master -volumeSizeLimitMB 100 -raftHashicorp -ip=master2 -port=9335 -peers=master0:9333,master1:9334 -mdir=/data" + command: "-v=4 master -volumeSizeLimitMB 100 -raftHashicorp -electionTimeout 1s -ip=master2 -port=9335 -peers=master0:9333,master1:9334 -mdir=/data" volumes: - ./master/2:/data environment: diff --git a/weed/command/master.go b/weed/command/master.go index f20b2dc62..a74389b1f 100644 --- a/weed/command/master.go +++ b/weed/command/master.go @@ -2,6 +2,7 @@ package command import ( "fmt" + hashicorpRaft "github.com/hashicorp/raft" "net/http" "os" "path" @@ -253,6 +254,13 @@ func startMaster(masterOption MasterOptions, masterWhiteList []string) { go httpS.Serve(masterListener) } + grace.OnInterrupt(ms.Shutdown) + grace.OnInterrupt(grpcS.GracefulStop) + grace.OnReload(func() { + if ms.Topo.HashicorpRaft != nil && ms.Topo.HashicorpRaft.State() == hashicorpRaft.Leader { + ms.Topo.HashicorpRaft.LeadershipTransfer() + } + }) select {} } diff --git a/weed/server/master_server.go b/weed/server/master_server.go index 758f212ad..cd20f78fc 100644 --- a/weed/server/master_server.go +++ b/weed/server/master_server.go @@ -174,39 +174,45 @@ func (ms *MasterServer) SetRaftServer(raftServer *RaftServer) { glog.V(0).Infof("leader change event: %+v => %+v", e.PrevValue(), e.Value()) stats.MasterLeaderChangeCounter.WithLabelValues(fmt.Sprintf("%+v", e.Value())).Inc() if ms.Topo.RaftServer.Leader() != "" { - glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "becomes leader.") + glog.V(0).Infof("[%s] %s becomes leader.", ms.Topo.RaftServer.Name(), ms.Topo.RaftServer.Leader()) } }) - raftServerName = ms.Topo.RaftServer.Name() + raftServerName = fmt.Sprintf("[%s]", ms.Topo.RaftServer.Name()) } else if raftServer.RaftHashicorp != nil { ms.Topo.HashicorpRaft = raftServer.RaftHashicorp leaderCh := raftServer.RaftHashicorp.LeaderCh() - prevLeader := ms.Topo.HashicorpRaft.Leader() + prevLeader, _ := ms.Topo.HashicorpRaft.LeaderWithID() + raftServerName = ms.Topo.HashicorpRaft.String() go func() { for { select { case isLeader := <-leaderCh: - leader := ms.Topo.HashicorpRaft.Leader() + ms.Topo.RaftServerAccessLock.RLock() + leader, _ := ms.Topo.HashicorpRaft.LeaderWithID() + ms.Topo.RaftServerAccessLock.RUnlock() glog.V(0).Infof("is leader %+v change event: %+v => %+v", isLeader, prevLeader, leader) stats.MasterLeaderChangeCounter.WithLabelValues(fmt.Sprintf("%+v", leader)).Inc() prevLeader = leader } } }() - raftServerName = ms.Topo.HashicorpRaft.String() } ms.Topo.RaftServerAccessLock.Unlock() if ms.Topo.IsLeader() { - glog.V(0).Infoln("[", raftServerName, "]", "I am the leader!") + glog.V(0).Infof("%s I am the leader!", raftServerName) } else { + var raftServerLeader string ms.Topo.RaftServerAccessLock.RLock() - if ms.Topo.RaftServer != nil && ms.Topo.RaftServer.Leader() != "" { - glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "is the leader.") - } else if ms.Topo.HashicorpRaft != nil && ms.Topo.HashicorpRaft.Leader() != "" { - glog.V(0).Infoln("[", ms.Topo.HashicorpRaft.String(), "]", ms.Topo.HashicorpRaft.Leader(), "is the leader.") + if ms.Topo.RaftServer != nil { + raftServerLeader = ms.Topo.RaftServer.Leader() + } else if ms.Topo.HashicorpRaft != nil { + raftServerName = ms.Topo.HashicorpRaft.String() + raftServerLeaderAddr, _ := ms.Topo.HashicorpRaft.LeaderWithID() + raftServerLeader = string(raftServerLeaderAddr) } ms.Topo.RaftServerAccessLock.RUnlock() + glog.V(0).Infof("%s %s - is the leader.", raftServerName, raftServerLeader) } } @@ -388,8 +394,17 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF } else { glog.V(0).Infof("master %s successfully responded to ping", peerName) } - return nil }) } } + +func (ms *MasterServer) Shutdown() { + if ms.Topo == nil || ms.Topo.HashicorpRaft == nil { + return + } + if ms.Topo.HashicorpRaft.State() == hashicorpRaft.Leader { + ms.Topo.HashicorpRaft.LeadershipTransfer() + } + ms.Topo.HashicorpRaft.Shutdown() +}