Merge pull request #2613 from kmlebedev/masterPromMetrics

Master prom metrics
This commit is contained in:
Chris Lu 2022-01-25 08:13:14 -08:00 committed by GitHub
commit 9596fce562
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 65 additions and 3 deletions

View File

@ -3,6 +3,7 @@ package weed_server
import ( import (
"context" "context"
"github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb"
"github.com/chrislusf/seaweedfs/weed/stats"
"github.com/chrislusf/seaweedfs/weed/storage/backend" "github.com/chrislusf/seaweedfs/weed/storage/backend"
"github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/util"
"net" "net"
@ -57,6 +58,7 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
} else { } else {
glog.Warningf("SendHeartbeat.Recv: %v", err) glog.Warningf("SendHeartbeat.Recv: %v", err)
} }
stats.MasterReceivedHeartbeatCounter.WithLabelValues("error").Inc()
return err return err
} }
@ -74,12 +76,15 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
glog.Warningf("SendHeartbeat.Send volume size to %s:%d %v", dn.Ip, dn.Port, err) glog.Warningf("SendHeartbeat.Send volume size to %s:%d %v", dn.Ip, dn.Port, err)
return err return err
} }
stats.MasterReceivedHeartbeatCounter.WithLabelValues("dataNode").Inc()
dn.Counter++ dn.Counter++
} }
dn.AdjustMaxVolumeCounts(heartbeat.MaxVolumeCounts) dn.AdjustMaxVolumeCounts(heartbeat.MaxVolumeCounts)
glog.V(4).Infof("master received heartbeat %s", heartbeat.String()) glog.V(4).Infof("master received heartbeat %s", heartbeat.String())
stats.MasterReceivedHeartbeatCounter.WithLabelValues("total").Inc()
var dataCenter string var dataCenter string
if dc := dn.GetDataCenter(); dc != nil { if dc := dn.GetDataCenter(); dc != nil {
dataCenter = string(dc.Id()) dataCenter = string(dc.Id())
@ -89,6 +94,12 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
PublicUrl: dn.PublicUrl, PublicUrl: dn.PublicUrl,
DataCenter: dataCenter, DataCenter: dataCenter,
} }
if len(heartbeat.NewVolumes) > 0 {
stats.FilerRequestCounter.WithLabelValues("newVolumes").Inc()
}
if len(heartbeat.DeletedVolumes) > 0 {
stats.FilerRequestCounter.WithLabelValues("deletedVolumes").Inc()
}
if len(heartbeat.NewVolumes) > 0 || len(heartbeat.DeletedVolumes) > 0 { if len(heartbeat.NewVolumes) > 0 || len(heartbeat.DeletedVolumes) > 0 {
// process delta volume ids if exists for fast volume id updates // process delta volume ids if exists for fast volume id updates
for _, volInfo := range heartbeat.NewVolumes { for _, volInfo := range heartbeat.NewVolumes {
@ -103,6 +114,7 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
if len(heartbeat.Volumes) > 0 || heartbeat.HasNoVolumes { if len(heartbeat.Volumes) > 0 || heartbeat.HasNoVolumes {
// process heartbeat.Volumes // process heartbeat.Volumes
stats.MasterReceivedHeartbeatCounter.WithLabelValues("Volumes").Inc()
newVolumes, deletedVolumes := ms.Topo.SyncDataNodeRegistration(heartbeat.Volumes, dn) newVolumes, deletedVolumes := ms.Topo.SyncDataNodeRegistration(heartbeat.Volumes, dn)
for _, v := range newVolumes { for _, v := range newVolumes {
@ -116,7 +128,7 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
} }
if len(heartbeat.NewEcShards) > 0 || len(heartbeat.DeletedEcShards) > 0 { if len(heartbeat.NewEcShards) > 0 || len(heartbeat.DeletedEcShards) > 0 {
stats.MasterReceivedHeartbeatCounter.WithLabelValues("newEcShards").Inc()
// update master internal volume layouts // update master internal volume layouts
ms.Topo.IncrementalSyncDataNodeEcShards(heartbeat.NewEcShards, heartbeat.DeletedEcShards, dn) ms.Topo.IncrementalSyncDataNodeEcShards(heartbeat.NewEcShards, heartbeat.DeletedEcShards, dn)
@ -133,7 +145,8 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ
} }
if len(heartbeat.EcShards) > 0 || heartbeat.HasNoEcShards { if len(heartbeat.EcShards) > 0 || heartbeat.HasNoEcShards {
glog.V(1).Infof("master received ec shards from %s: %+v", dn.Url(), heartbeat.EcShards) stats.MasterReceivedHeartbeatCounter.WithLabelValues("ecShards").Inc()
glog.V(4).Infof("master received ec shards from %s: %+v", dn.Url(), heartbeat.EcShards)
newShards, deletedShards := ms.Topo.SyncDataNodeEcShards(heartbeat.EcShards, dn) newShards, deletedShards := ms.Topo.SyncDataNodeEcShards(heartbeat.EcShards, dn)
// broadcast the ec vid changes to master clients // broadcast the ec vid changes to master clients
@ -224,7 +237,10 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ
} }
case <-ticker.C: case <-ticker.C:
if !ms.Topo.IsLeader() { if !ms.Topo.IsLeader() {
stats.MasterRaftIsleader.Set(0)
return ms.informNewLeader(stream) return ms.informNewLeader(stream)
} else {
stats.MasterRaftIsleader.Set(1)
} }
case <-stopChan: case <-stopChan:
return nil return nil

View File

@ -2,6 +2,7 @@ package weed_server
import ( import (
"fmt" "fmt"
"github.com/chrislusf/seaweedfs/weed/stats"
"net/http" "net/http"
"net/http/httputil" "net/http/httputil"
"net/url" "net/url"
@ -162,6 +163,7 @@ func (ms *MasterServer) SetRaftServer(raftServer *RaftServer) {
ms.Topo.RaftServer = raftServer.raftServer ms.Topo.RaftServer = raftServer.raftServer
ms.Topo.RaftServer.AddEventListener(raft.LeaderChangeEventType, func(e raft.Event) { ms.Topo.RaftServer.AddEventListener(raft.LeaderChangeEventType, func(e raft.Event) {
glog.V(0).Infof("leader change event: %+v => %+v", e.PrevValue(), e.Value()) glog.V(0).Infof("leader change event: %+v => %+v", e.PrevValue(), e.Value())
stats.MasterLeaderChangeCounter.WithLabelValues(fmt.Sprintf("%+v", e.Value())).Inc()
if ms.Topo.RaftServer.Leader() != "" { if ms.Topo.RaftServer.Leader() != "" {
glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "becomes leader.") glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "becomes leader.")
} }

View File

@ -20,6 +20,38 @@ import (
var ( var (
Gather = prometheus.NewRegistry() Gather = prometheus.NewRegistry()
MasterClientConnectCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "SeaweedFS",
Subsystem: "wdclient",
Name: "connect_updates",
Help: "Counter of master client leader updates.",
}, []string{"type"})
MasterRaftIsleader = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "SeaweedFS",
Subsystem: "master",
Name: "is_leader",
Help: "is leader",
})
MasterReceivedHeartbeatCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "SeaweedFS",
Subsystem: "master",
Name: "received_heartbeats",
Help: "Counter of master received heartbeat.",
}, []string{"type"})
MasterLeaderChangeCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "SeaweedFS",
Subsystem: "master",
Name: "leader_changes",
Help: "Counter of master leader changes.",
}, []string{"type"})
FilerRequestCounter = prometheus.NewCounterVec( FilerRequestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: "SeaweedFS", Namespace: "SeaweedFS",
@ -129,6 +161,11 @@ var (
) )
func init() { func init() {
Gather.MustRegister(MasterClientConnectCounter)
Gather.MustRegister(MasterRaftIsleader)
Gather.MustRegister(MasterReceivedHeartbeatCounter)
Gather.MustRegister(MasterLeaderChangeCounter)
Gather.MustRegister(FilerRequestCounter) Gather.MustRegister(FilerRequestCounter)
Gather.MustRegister(FilerRequestHistogram) Gather.MustRegister(FilerRequestHistogram)
Gather.MustRegister(FilerStoreCounter) Gather.MustRegister(FilerStoreCounter)

View File

@ -2,6 +2,7 @@ package wdclient
import ( import (
"context" "context"
"github.com/chrislusf/seaweedfs/weed/stats"
"math/rand" "math/rand"
"time" "time"
@ -96,14 +97,15 @@ func (mc *MasterClient) tryAllMasters() {
func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedLeader pb.ServerAddress) { func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedLeader pb.ServerAddress) {
glog.V(1).Infof("%s masterClient Connecting to master %v", mc.clientType, master) glog.V(1).Infof("%s masterClient Connecting to master %v", mc.clientType, master)
stats.MasterClientConnectCounter.WithLabelValues("total").Inc()
gprcErr := pb.WithMasterClient(true, master, mc.grpcDialOption, func(client master_pb.SeaweedClient) error { gprcErr := pb.WithMasterClient(true, master, mc.grpcDialOption, func(client master_pb.SeaweedClient) error {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
stream, err := client.KeepConnected(ctx) stream, err := client.KeepConnected(ctx)
if err != nil { if err != nil {
glog.V(1).Infof("%s masterClient failed to keep connected to %s: %v", mc.clientType, master, err) glog.V(1).Infof("%s masterClient failed to keep connected to %s: %v", mc.clientType, master, err)
stats.MasterClientConnectCounter.WithLabelValues("failedToKeepConnected").Inc()
return err return err
} }
@ -113,6 +115,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL
Version: util.Version(), Version: util.Version(),
}); err != nil { }); err != nil {
glog.V(0).Infof("%s masterClient failed to send to %s: %v", mc.clientType, master, err) glog.V(0).Infof("%s masterClient failed to send to %s: %v", mc.clientType, master, err)
stats.MasterClientConnectCounter.WithLabelValues("failedToSend").Inc()
return err return err
} }
@ -123,6 +126,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL
resp, err := stream.Recv() resp, err := stream.Recv()
if err != nil { if err != nil {
glog.V(0).Infof("%s masterClient failed to receive from %s: %v", mc.clientType, master, err) glog.V(0).Infof("%s masterClient failed to receive from %s: %v", mc.clientType, master, err)
stats.MasterClientConnectCounter.WithLabelValues("failedToReceive").Inc()
return err return err
} }
@ -131,6 +135,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL
if resp.VolumeLocation.Leader != "" { if resp.VolumeLocation.Leader != "" {
glog.V(0).Infof("redirected to leader %v", resp.VolumeLocation.Leader) glog.V(0).Infof("redirected to leader %v", resp.VolumeLocation.Leader)
nextHintedLeader = pb.ServerAddress(resp.VolumeLocation.Leader) nextHintedLeader = pb.ServerAddress(resp.VolumeLocation.Leader)
stats.MasterClientConnectCounter.WithLabelValues("redirectedToleader").Inc()
return nil return nil
} }
@ -159,6 +164,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL
} else { } else {
glog.V(0).Infof("- %s %s leader:%v\n", update.NodeType, update.Address, update.IsLeader) glog.V(0).Infof("- %s %s leader:%v\n", update.NodeType, update.Address, update.IsLeader)
} }
stats.MasterClientConnectCounter.WithLabelValues("onPeerUpdate").Inc()
mc.OnPeerUpdate(update) mc.OnPeerUpdate(update)
} }
} }
@ -167,6 +173,7 @@ func (mc *MasterClient) tryConnectToMaster(master pb.ServerAddress) (nextHintedL
}) })
if gprcErr != nil { if gprcErr != nil {
stats.MasterClientConnectCounter.WithLabelValues("failed").Inc()
glog.V(1).Infof("%s masterClient failed to connect with master %v: %v", mc.clientType, master, gprcErr) glog.V(1).Infof("%s masterClient failed to connect with master %v: %v", mc.clientType, master, gprcErr)
} }
return return