Add raft server access mutex to avoid races (#3503)

This commit is contained in:
Patrick Schmidt 2022-08-24 18:49:05 +02:00 committed by GitHub
parent f7e4359b59
commit 7b424a54dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 72 additions and 25 deletions

View File

@ -206,11 +206,13 @@ func startMaster(masterOption MasterOptions, masterWhiteList []string) {
if !*m.raftHashicorp { if !*m.raftHashicorp {
go func() { go func() {
time.Sleep(timeSleep) time.Sleep(timeSleep)
if ms.Topo.RaftServer.Leader() == "" && ms.Topo.RaftServer.IsLogEmpty() && isTheFirstOne(myMasterAddress, peers) {
if ms.MasterClient.FindLeaderFromOtherPeers(myMasterAddress) == "" { ms.Topo.RaftServerAccessLock.RLock()
isEmptyMaster := ms.Topo.RaftServer.Leader() == "" && ms.Topo.RaftServer.IsLogEmpty()
if isEmptyMaster && isTheFirstOne(myMasterAddress, peers) && ms.MasterClient.FindLeaderFromOtherPeers(myMasterAddress) == "" {
raftServer.DoJoinCommand() raftServer.DoJoinCommand()
} }
} ms.Topo.RaftServerAccessLock.RUnlock()
}() }()
} }

View File

@ -3,7 +3,9 @@ package weed_server
import ( import (
"context" "context"
"fmt" "fmt"
"github.com/hashicorp/raft" "github.com/hashicorp/raft"
"github.com/seaweedfs/seaweedfs/weed/cluster" "github.com/seaweedfs/seaweedfs/weed/cluster"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb" "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
) )
@ -11,11 +13,14 @@ import (
func (ms *MasterServer) RaftListClusterServers(ctx context.Context, req *master_pb.RaftListClusterServersRequest) (*master_pb.RaftListClusterServersResponse, error) { func (ms *MasterServer) RaftListClusterServers(ctx context.Context, req *master_pb.RaftListClusterServersRequest) (*master_pb.RaftListClusterServersResponse, error) {
resp := &master_pb.RaftListClusterServersResponse{} resp := &master_pb.RaftListClusterServersResponse{}
ms.Topo.RaftServerAccessLock.RLock()
if ms.Topo.HashicorpRaft == nil { if ms.Topo.HashicorpRaft == nil {
ms.Topo.RaftServerAccessLock.RUnlock()
return resp, nil return resp, nil
} }
servers := ms.Topo.HashicorpRaft.GetConfiguration().Configuration().Servers servers := ms.Topo.HashicorpRaft.GetConfiguration().Configuration().Servers
ms.Topo.RaftServerAccessLock.RUnlock()
for _, server := range servers { for _, server := range servers {
resp.ClusterServers = append(resp.ClusterServers, &master_pb.RaftListClusterServersResponse_ClusterServers{ resp.ClusterServers = append(resp.ClusterServers, &master_pb.RaftListClusterServersResponse_ClusterServers{
@ -30,6 +35,9 @@ func (ms *MasterServer) RaftListClusterServers(ctx context.Context, req *master_
func (ms *MasterServer) RaftAddServer(ctx context.Context, req *master_pb.RaftAddServerRequest) (*master_pb.RaftAddServerResponse, error) { func (ms *MasterServer) RaftAddServer(ctx context.Context, req *master_pb.RaftAddServerRequest) (*master_pb.RaftAddServerResponse, error) {
resp := &master_pb.RaftAddServerResponse{} resp := &master_pb.RaftAddServerResponse{}
ms.Topo.RaftServerAccessLock.RLock()
defer ms.Topo.RaftServerAccessLock.RUnlock()
if ms.Topo.HashicorpRaft == nil { if ms.Topo.HashicorpRaft == nil {
return resp, nil return resp, nil
} }
@ -54,6 +62,9 @@ func (ms *MasterServer) RaftAddServer(ctx context.Context, req *master_pb.RaftAd
func (ms *MasterServer) RaftRemoveServer(ctx context.Context, req *master_pb.RaftRemoveServerRequest) (*master_pb.RaftRemoveServerResponse, error) { func (ms *MasterServer) RaftRemoveServer(ctx context.Context, req *master_pb.RaftRemoveServerRequest) (*master_pb.RaftRemoveServerResponse, error) {
resp := &master_pb.RaftRemoveServerResponse{} resp := &master_pb.RaftRemoveServerResponse{}
ms.Topo.RaftServerAccessLock.RLock()
defer ms.Topo.RaftServerAccessLock.RUnlock()
if ms.Topo.HashicorpRaft == nil { if ms.Topo.HashicorpRaft == nil {
return resp, nil return resp, nil
} }

View File

@ -166,6 +166,8 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se
func (ms *MasterServer) SetRaftServer(raftServer *RaftServer) { func (ms *MasterServer) SetRaftServer(raftServer *RaftServer) {
var raftServerName string var raftServerName string
ms.Topo.RaftServerAccessLock.Lock()
if raftServer.raftServer != nil { if raftServer.raftServer != nil {
ms.Topo.RaftServer = raftServer.raftServer ms.Topo.RaftServer = raftServer.raftServer
ms.Topo.RaftServer.AddEventListener(raft.LeaderChangeEventType, func(e raft.Event) { ms.Topo.RaftServer.AddEventListener(raft.LeaderChangeEventType, func(e raft.Event) {
@ -193,14 +195,18 @@ func (ms *MasterServer) SetRaftServer(raftServer *RaftServer) {
}() }()
raftServerName = ms.Topo.HashicorpRaft.String() raftServerName = ms.Topo.HashicorpRaft.String()
} }
ms.Topo.RaftServerAccessLock.Unlock()
if ms.Topo.IsLeader() { if ms.Topo.IsLeader() {
glog.V(0).Infoln("[", raftServerName, "]", "I am the leader!") glog.V(0).Infoln("[", raftServerName, "]", "I am the leader!")
} else { } else {
ms.Topo.RaftServerAccessLock.RLock()
if ms.Topo.RaftServer != nil && ms.Topo.RaftServer.Leader() != "" { if ms.Topo.RaftServer != nil && ms.Topo.RaftServer.Leader() != "" {
glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "is the leader.") glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "is the leader.")
} else if ms.Topo.HashicorpRaft != nil && ms.Topo.HashicorpRaft.Leader() != "" { } else if ms.Topo.HashicorpRaft != nil && ms.Topo.HashicorpRaft.Leader() != "" {
glog.V(0).Infoln("[", ms.Topo.HashicorpRaft.String(), "]", ms.Topo.HashicorpRaft.Leader(), "is the leader.") glog.V(0).Infoln("[", ms.Topo.HashicorpRaft.String(), "]", ms.Topo.HashicorpRaft.Leader(), "is the leader.")
} }
ms.Topo.RaftServerAccessLock.RUnlock()
} }
} }
@ -210,16 +216,15 @@ func (ms *MasterServer) proxyToLeader(f http.HandlerFunc) http.HandlerFunc {
f(w, r) f(w, r)
return return
} }
var raftServerLeader string
if ms.Topo.RaftServer != nil && ms.Topo.RaftServer.Leader() != "" { // get the current raft leader
raftServerLeader = ms.Topo.RaftServer.Leader() leaderAddr, _ := ms.Topo.MaybeLeader()
} else if ms.Topo.HashicorpRaft != nil && ms.Topo.HashicorpRaft.Leader() != "" { raftServerLeader := string(leaderAddr)
raftServerLeader = string(ms.Topo.HashicorpRaft.Leader())
}
if raftServerLeader == "" { if raftServerLeader == "" {
f(w, r) f(w, r)
return return
} }
ms.boundedLeaderChan <- 1 ms.boundedLeaderChan <- 1
defer func() { <-ms.boundedLeaderChan }() defer func() { <-ms.boundedLeaderChan }()
targetUrl, err := url.Parse("http://" + raftServerLeader) targetUrl, err := url.Parse("http://" + raftServerLeader)
@ -228,6 +233,8 @@ func (ms *MasterServer) proxyToLeader(f http.HandlerFunc) http.HandlerFunc {
fmt.Errorf("Leader URL http://%s Parse Error: %v", raftServerLeader, err)) fmt.Errorf("Leader URL http://%s Parse Error: %v", raftServerLeader, err))
return return
} }
// proxy to leader
glog.V(4).Infoln("proxying to leader", raftServerLeader) glog.V(4).Infoln("proxying to leader", raftServerLeader)
proxy := httputil.NewSingleHostReverseProxy(targetUrl) proxy := httputil.NewSingleHostReverseProxy(targetUrl)
director := proxy.Director director := proxy.Director
@ -336,6 +343,9 @@ func (ms *MasterServer) createSequencer(option *MasterOption) sequence.Sequencer
} }
func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startFrom time.Time) { func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startFrom time.Time) {
ms.Topo.RaftServerAccessLock.RLock()
defer ms.Topo.RaftServerAccessLock.RUnlock()
if update.NodeType != cluster.MasterType || ms.Topo.HashicorpRaft == nil { if update.NodeType != cluster.MasterType || ms.Topo.HashicorpRaft == nil {
return return
} }

View File

@ -16,6 +16,10 @@ func (ms *MasterServer) uiStatusHandler(w http.ResponseWriter, r *http.Request)
infos := make(map[string]interface{}) infos := make(map[string]interface{})
infos["Up Time"] = time.Now().Sub(startTime).String() infos["Up Time"] = time.Now().Sub(startTime).String()
infos["Max Volume Id"] = ms.Topo.GetMaxVolumeId() infos["Max Volume Id"] = ms.Topo.GetMaxVolumeId()
ms.Topo.RaftServerAccessLock.RLock()
defer ms.Topo.RaftServerAccessLock.RUnlock()
if ms.Topo.RaftServer != nil { if ms.Topo.RaftServer != nil {
args := struct { args := struct {
Version string Version string

View File

@ -44,6 +44,7 @@ type Topology struct {
Configuration *Configuration Configuration *Configuration
RaftServer raft.Server RaftServer raft.Server
RaftServerAccessLock sync.RWMutex
HashicorpRaft *hashicorpRaft.Raft HashicorpRaft *hashicorpRaft.Raft
UuidAccessLock sync.RWMutex UuidAccessLock sync.RWMutex
UuidMap map[string][]string UuidMap map[string][]string
@ -73,6 +74,9 @@ func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, puls
} }
func (t *Topology) IsLeader() bool { func (t *Topology) IsLeader() bool {
t.RaftServerAccessLock.RLock()
defer t.RaftServerAccessLock.RUnlock()
if t.RaftServer != nil { if t.RaftServer != nil {
if t.RaftServer.State() == raft.Leader { if t.RaftServer.State() == raft.Leader {
return true return true
@ -90,23 +94,35 @@ func (t *Topology) IsLeader() bool {
return false return false
} }
func (t *Topology) Leader() (pb.ServerAddress, error) { func (t *Topology) Leader() (l pb.ServerAddress, err error) {
var l pb.ServerAddress
for count := 0; count < 3; count++ { for count := 0; count < 3; count++ {
l, err = t.MaybeLeader()
if err != nil {
return
}
if l != "" {
break
}
time.Sleep(time.Duration(5+count) * time.Second)
}
return
}
func (t *Topology) MaybeLeader() (l pb.ServerAddress, err error) {
t.RaftServerAccessLock.RLock()
defer t.RaftServerAccessLock.RUnlock()
if t.RaftServer != nil { if t.RaftServer != nil {
l = pb.ServerAddress(t.RaftServer.Leader()) l = pb.ServerAddress(t.RaftServer.Leader())
} else if t.HashicorpRaft != nil { } else if t.HashicorpRaft != nil {
l = pb.ServerAddress(t.HashicorpRaft.Leader()) l = pb.ServerAddress(t.HashicorpRaft.Leader())
} else { } else {
return "", errors.New("Raft Server not ready yet!") err = errors.New("Raft Server not ready yet!")
} }
if l != "" {
break return
} else {
time.Sleep(time.Duration(5+count) * time.Second)
}
}
return l, nil
} }
func (t *Topology) Lookup(collection string, vid needle.VolumeId) (dataNodes []*DataNode) { func (t *Topology) Lookup(collection string, vid needle.VolumeId) (dataNodes []*DataNode) {
@ -136,6 +152,10 @@ func (t *Topology) Lookup(collection string, vid needle.VolumeId) (dataNodes []*
func (t *Topology) NextVolumeId() (needle.VolumeId, error) { func (t *Topology) NextVolumeId() (needle.VolumeId, error) {
vid := t.GetMaxVolumeId() vid := t.GetMaxVolumeId()
next := vid.Next() next := vid.Next()
t.RaftServerAccessLock.RLock()
defer t.RaftServerAccessLock.RUnlock()
if t.RaftServer != nil { if t.RaftServer != nil {
if _, err := t.RaftServer.Do(NewMaxVolumeIdCommand(next)); err != nil { if _, err := t.RaftServer.Do(NewMaxVolumeIdCommand(next)); err != nil {
return 0, err return 0, err