2019-06-16 03:21:44 +08:00
|
|
|
package stats
|
2019-06-13 17:01:51 +08:00
|
|
|
|
2019-06-15 04:06:01 +08:00
|
|
|
import (
|
2019-06-16 12:46:55 +08:00
|
|
|
"fmt"
|
2020-09-24 20:45:39 +08:00
|
|
|
"log"
|
2021-09-08 07:43:54 +08:00
|
|
|
"net"
|
2020-09-24 20:45:39 +08:00
|
|
|
"net/http"
|
2019-06-16 12:46:55 +08:00
|
|
|
"os"
|
2021-09-08 07:43:54 +08:00
|
|
|
"strconv"
|
2020-05-15 12:08:34 +08:00
|
|
|
"strings"
|
2019-06-15 04:06:01 +08:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2021-06-22 15:54:13 +08:00
|
|
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
2020-09-24 20:45:39 +08:00
|
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
2019-06-15 04:06:01 +08:00
|
|
|
"github.com/prometheus/client_golang/prometheus/push"
|
2022-07-29 15:17:28 +08:00
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
2019-06-15 04:06:01 +08:00
|
|
|
)
|
2019-06-13 17:01:51 +08:00
|
|
|
|
2022-07-27 19:31:49 +08:00
|
|
|
// Readonly volume types
|
|
|
|
const (
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace = "SeaweedFS"
|
2022-07-27 19:31:49 +08:00
|
|
|
IsReadOnly = "IsReadOnly"
|
|
|
|
NoWriteOrDelete = "noWriteOrDelete"
|
|
|
|
NoWriteCanDelete = "noWriteCanDelete"
|
|
|
|
IsDiskSpaceLow = "isDiskSpaceLow"
|
|
|
|
)
|
|
|
|
|
|
|
|
var readOnlyVolumeTypes = [4]string{IsReadOnly, NoWriteOrDelete, NoWriteCanDelete, IsDiskSpaceLow}
|
|
|
|
|
2019-06-13 17:01:51 +08:00
|
|
|
var (
|
2020-10-01 03:59:39 +08:00
|
|
|
Gather = prometheus.NewRegistry()
|
2019-06-14 15:54:56 +08:00
|
|
|
|
2022-01-24 22:09:43 +08:00
|
|
|
MasterClientConnectCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2022-01-24 22:09:43 +08:00
|
|
|
Subsystem: "wdclient",
|
2022-01-25 17:42:47 +08:00
|
|
|
Name: "connect_updates",
|
2022-01-24 22:09:43 +08:00
|
|
|
Help: "Counter of master client leader updates.",
|
|
|
|
}, []string{"type"})
|
|
|
|
|
2022-01-24 23:13:07 +08:00
|
|
|
MasterRaftIsleader = prometheus.NewGauge(
|
|
|
|
prometheus.GaugeOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2022-01-24 23:13:07 +08:00
|
|
|
Subsystem: "master",
|
2022-01-25 17:42:47 +08:00
|
|
|
Name: "is_leader",
|
2022-01-24 23:13:07 +08:00
|
|
|
Help: "is leader",
|
|
|
|
})
|
|
|
|
|
2022-10-08 04:26:29 +08:00
|
|
|
MasterAdminLock = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: Namespace,
|
|
|
|
Subsystem: "master",
|
|
|
|
Name: "admin_lock",
|
|
|
|
Help: "admin lock",
|
|
|
|
}, []string{"client"})
|
|
|
|
|
2022-01-24 22:09:43 +08:00
|
|
|
MasterReceivedHeartbeatCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2022-01-24 22:09:43 +08:00
|
|
|
Subsystem: "master",
|
2022-01-25 17:42:47 +08:00
|
|
|
Name: "received_heartbeats",
|
2022-01-24 22:09:43 +08:00
|
|
|
Help: "Counter of master received heartbeat.",
|
|
|
|
}, []string{"type"})
|
|
|
|
|
2022-06-10 18:30:40 +08:00
|
|
|
MasterReplicaPlacementMismatch = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2022-06-10 18:30:40 +08:00
|
|
|
Subsystem: "master",
|
|
|
|
Name: "replica_placement_mismatch",
|
|
|
|
Help: "replica placement mismatch",
|
|
|
|
}, []string{"collection", "id"})
|
|
|
|
|
2022-01-24 22:09:43 +08:00
|
|
|
MasterLeaderChangeCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2022-01-24 22:09:43 +08:00
|
|
|
Subsystem: "master",
|
2022-01-25 17:42:47 +08:00
|
|
|
Name: "leader_changes",
|
2022-01-24 22:09:43 +08:00
|
|
|
Help: "Counter of master leader changes.",
|
|
|
|
}, []string{"type"})
|
|
|
|
|
2019-06-16 03:21:44 +08:00
|
|
|
FilerRequestCounter = prometheus.NewCounterVec(
|
2019-06-13 17:01:51 +08:00
|
|
|
prometheus.CounterOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2019-06-13 17:01:51 +08:00
|
|
|
Subsystem: "filer",
|
|
|
|
Name: "request_total",
|
|
|
|
Help: "Counter of filer requests.",
|
|
|
|
}, []string{"type"})
|
|
|
|
|
2019-06-16 03:21:44 +08:00
|
|
|
FilerRequestHistogram = prometheus.NewHistogramVec(
|
2019-06-13 17:01:51 +08:00
|
|
|
prometheus.HistogramOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2019-06-13 17:01:51 +08:00
|
|
|
Subsystem: "filer",
|
|
|
|
Name: "request_seconds",
|
|
|
|
Help: "Bucketed histogram of filer request processing time.",
|
2019-06-14 15:54:56 +08:00
|
|
|
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
|
|
|
|
}, []string{"type"})
|
|
|
|
|
2022-06-15 11:33:18 +08:00
|
|
|
FilerServerLastSendTsOfSubscribeGauge = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2022-06-15 11:33:18 +08:00
|
|
|
Subsystem: "filer",
|
|
|
|
Name: "last_send_timestamp_of_subscribe",
|
|
|
|
Help: "The last send timestamp of the filer subscription.",
|
|
|
|
}, []string{"sourceFiler", "clientName", "path"})
|
|
|
|
|
2019-06-23 03:23:25 +08:00
|
|
|
FilerStoreCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2019-06-23 03:23:25 +08:00
|
|
|
Subsystem: "filerStore",
|
|
|
|
Name: "request_total",
|
|
|
|
Help: "Counter of filer store requests.",
|
|
|
|
}, []string{"store", "type"})
|
|
|
|
|
|
|
|
FilerStoreHistogram = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2019-06-23 03:23:25 +08:00
|
|
|
Subsystem: "filerStore",
|
|
|
|
Name: "request_seconds",
|
|
|
|
Help: "Bucketed histogram of filer store request processing time.",
|
|
|
|
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
|
|
|
|
}, []string{"store", "type"})
|
|
|
|
|
2022-06-15 11:33:18 +08:00
|
|
|
FilerSyncOffsetGauge = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2022-06-15 11:33:18 +08:00
|
|
|
Subsystem: "filerSync",
|
|
|
|
Name: "sync_offset",
|
|
|
|
Help: "The offset of the filer synchronization service.",
|
|
|
|
}, []string{"sourceFiler", "targetFiler", "clientName", "path"})
|
|
|
|
|
2019-06-16 03:21:44 +08:00
|
|
|
VolumeServerRequestCounter = prometheus.NewCounterVec(
|
2019-06-14 15:54:56 +08:00
|
|
|
prometheus.CounterOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2019-06-14 15:54:56 +08:00
|
|
|
Subsystem: "volumeServer",
|
|
|
|
Name: "request_total",
|
2019-06-24 15:26:03 +08:00
|
|
|
Help: "Counter of volume server requests.",
|
2019-06-14 15:54:56 +08:00
|
|
|
}, []string{"type"})
|
|
|
|
|
2022-10-13 15:51:20 +08:00
|
|
|
VolumeServerVacuumingCompactCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Namespace: Namespace,
|
|
|
|
Subsystem: "volumeServer",
|
|
|
|
Name: "vacuuming_compact_count",
|
|
|
|
Help: "Counter of volume vacuuming Compact counter",
|
|
|
|
}, []string{"success"})
|
|
|
|
|
|
|
|
VolumeServerVacuumingCommitCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Namespace: Namespace,
|
|
|
|
Subsystem: "volumeServer",
|
|
|
|
Name: "vacuuming_commit_count",
|
|
|
|
Help: "Counter of volume vacuuming commit counter",
|
|
|
|
}, []string{"success"})
|
|
|
|
|
|
|
|
VolumeServerVacuumingHistogram = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: Namespace,
|
|
|
|
Subsystem: "volumeServer",
|
|
|
|
Name: "vacuuming_seconds",
|
|
|
|
Help: "Bucketed histogram of volume server vacuuming processing time.",
|
|
|
|
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
|
|
|
|
}, []string{"type"})
|
|
|
|
|
2019-06-16 03:21:44 +08:00
|
|
|
VolumeServerRequestHistogram = prometheus.NewHistogramVec(
|
2019-06-14 15:54:56 +08:00
|
|
|
prometheus.HistogramOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2019-06-14 15:54:56 +08:00
|
|
|
Subsystem: "volumeServer",
|
|
|
|
Name: "request_seconds",
|
2019-06-24 15:26:03 +08:00
|
|
|
Help: "Bucketed histogram of volume server request processing time.",
|
2019-06-14 15:54:56 +08:00
|
|
|
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
|
2019-06-13 17:01:51 +08:00
|
|
|
}, []string{"type"})
|
2019-06-15 04:06:01 +08:00
|
|
|
|
2019-06-18 12:02:50 +08:00
|
|
|
VolumeServerVolumeCounter = prometheus.NewGaugeVec(
|
2019-06-15 04:06:01 +08:00
|
|
|
prometheus.GaugeOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2019-06-15 04:06:01 +08:00
|
|
|
Subsystem: "volumeServer",
|
|
|
|
Name: "volumes",
|
2019-06-18 12:02:50 +08:00
|
|
|
Help: "Number of volumes or shards.",
|
|
|
|
}, []string{"collection", "type"})
|
2019-06-16 17:24:15 +08:00
|
|
|
|
2020-10-15 18:32:02 +08:00
|
|
|
VolumeServerReadOnlyVolumeGauge = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2020-10-15 18:32:02 +08:00
|
|
|
Subsystem: "volumeServer",
|
|
|
|
Name: "read_only_volumes",
|
|
|
|
Help: "Number of read only volumes.",
|
|
|
|
}, []string{"collection", "type"})
|
|
|
|
|
2019-06-18 12:02:50 +08:00
|
|
|
VolumeServerMaxVolumeCounter = prometheus.NewGauge(
|
2019-06-16 17:24:15 +08:00
|
|
|
prometheus.GaugeOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2019-06-16 17:24:15 +08:00
|
|
|
Subsystem: "volumeServer",
|
2019-06-21 00:56:49 +08:00
|
|
|
Name: "max_volumes",
|
2019-06-18 12:02:50 +08:00
|
|
|
Help: "Maximum number of volumes.",
|
2019-06-16 17:24:15 +08:00
|
|
|
})
|
2019-06-17 12:56:41 +08:00
|
|
|
|
|
|
|
VolumeServerDiskSizeGauge = prometheus.NewGaugeVec(
|
2019-06-16 17:44:20 +08:00
|
|
|
prometheus.GaugeOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2019-06-16 17:44:20 +08:00
|
|
|
Subsystem: "volumeServer",
|
2019-06-17 12:56:41 +08:00
|
|
|
Name: "total_disk_size",
|
2019-06-16 17:44:20 +08:00
|
|
|
Help: "Actual disk size used by volumes.",
|
2019-06-17 12:56:41 +08:00
|
|
|
}, []string{"collection", "type"})
|
2020-09-18 15:09:04 +08:00
|
|
|
|
2020-10-23 00:13:47 +08:00
|
|
|
VolumeServerResourceGauge = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2020-10-23 00:13:47 +08:00
|
|
|
Subsystem: "volumeServer",
|
|
|
|
Name: "resource",
|
|
|
|
Help: "Resource usage",
|
|
|
|
}, []string{"name", "type"})
|
|
|
|
|
2020-09-18 15:09:04 +08:00
|
|
|
S3RequestCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2020-09-18 15:09:04 +08:00
|
|
|
Subsystem: "s3",
|
|
|
|
Name: "request_total",
|
|
|
|
Help: "Counter of s3 requests.",
|
2022-06-16 13:21:25 +08:00
|
|
|
}, []string{"type", "code", "bucket"})
|
|
|
|
|
2020-09-18 15:09:04 +08:00
|
|
|
S3RequestHistogram = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
2022-10-07 19:20:34 +08:00
|
|
|
Namespace: Namespace,
|
2020-09-18 15:09:04 +08:00
|
|
|
Subsystem: "s3",
|
|
|
|
Name: "request_seconds",
|
|
|
|
Help: "Bucketed histogram of s3 request processing time.",
|
|
|
|
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
|
2022-06-16 13:21:25 +08:00
|
|
|
}, []string{"type", "bucket"})
|
2019-06-13 17:01:51 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
func init() {
|
2022-01-24 23:13:07 +08:00
|
|
|
Gather.MustRegister(MasterClientConnectCounter)
|
|
|
|
Gather.MustRegister(MasterRaftIsleader)
|
2022-10-08 04:26:29 +08:00
|
|
|
Gather.MustRegister(MasterAdminLock)
|
2022-01-24 23:13:07 +08:00
|
|
|
Gather.MustRegister(MasterReceivedHeartbeatCounter)
|
|
|
|
Gather.MustRegister(MasterLeaderChangeCounter)
|
2022-06-10 18:30:40 +08:00
|
|
|
Gather.MustRegister(MasterReplicaPlacementMismatch)
|
2022-01-24 23:13:07 +08:00
|
|
|
|
2020-09-25 01:21:23 +08:00
|
|
|
Gather.MustRegister(FilerRequestCounter)
|
|
|
|
Gather.MustRegister(FilerRequestHistogram)
|
|
|
|
Gather.MustRegister(FilerStoreCounter)
|
|
|
|
Gather.MustRegister(FilerStoreHistogram)
|
2022-06-15 11:33:18 +08:00
|
|
|
Gather.MustRegister(FilerSyncOffsetGauge)
|
|
|
|
Gather.MustRegister(FilerServerLastSendTsOfSubscribeGauge)
|
2021-06-22 15:54:13 +08:00
|
|
|
Gather.MustRegister(collectors.NewGoCollector())
|
|
|
|
Gather.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
|
2019-06-14 15:54:56 +08:00
|
|
|
|
2020-09-25 01:21:23 +08:00
|
|
|
Gather.MustRegister(VolumeServerRequestCounter)
|
|
|
|
Gather.MustRegister(VolumeServerRequestHistogram)
|
2022-10-13 15:51:20 +08:00
|
|
|
Gather.MustRegister(VolumeServerVacuumingCompactCounter)
|
|
|
|
Gather.MustRegister(VolumeServerVacuumingCommitCounter)
|
|
|
|
Gather.MustRegister(VolumeServerVacuumingHistogram)
|
2020-09-25 01:21:23 +08:00
|
|
|
Gather.MustRegister(VolumeServerVolumeCounter)
|
|
|
|
Gather.MustRegister(VolumeServerMaxVolumeCounter)
|
2020-10-15 18:32:02 +08:00
|
|
|
Gather.MustRegister(VolumeServerReadOnlyVolumeGauge)
|
2020-09-25 01:21:23 +08:00
|
|
|
Gather.MustRegister(VolumeServerDiskSizeGauge)
|
2020-10-23 00:13:47 +08:00
|
|
|
Gather.MustRegister(VolumeServerResourceGauge)
|
2019-06-15 04:06:01 +08:00
|
|
|
|
2020-09-25 01:21:23 +08:00
|
|
|
Gather.MustRegister(S3RequestCounter)
|
|
|
|
Gather.MustRegister(S3RequestHistogram)
|
2019-06-15 04:06:01 +08:00
|
|
|
}
|
|
|
|
|
2020-09-25 01:21:23 +08:00
|
|
|
func LoopPushingMetric(name, instance, addr string, intervalSeconds int) {
|
2020-09-16 16:39:30 +08:00
|
|
|
if addr == "" || intervalSeconds == 0 {
|
2019-06-24 06:29:49 +08:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-09-21 06:38:59 +08:00
|
|
|
glog.V(0).Infof("%s server sends metrics to %s every %d seconds", name, addr, intervalSeconds)
|
|
|
|
|
2020-09-25 01:21:23 +08:00
|
|
|
pusher := push.New(addr, name).Gatherer(Gather).Grouping("instance", instance)
|
2019-06-14 15:54:56 +08:00
|
|
|
|
2019-06-15 04:06:01 +08:00
|
|
|
for {
|
2020-09-16 16:39:30 +08:00
|
|
|
err := pusher.Push()
|
|
|
|
if err != nil && !strings.HasPrefix(err.Error(), "unexpected status code 200") {
|
|
|
|
glog.V(0).Infof("could not push metrics to prometheus push gateway %s: %v", addr, err)
|
2019-06-18 05:51:47 +08:00
|
|
|
}
|
|
|
|
if intervalSeconds <= 0 {
|
|
|
|
intervalSeconds = 15
|
2019-06-15 04:06:01 +08:00
|
|
|
}
|
|
|
|
time.Sleep(time.Duration(intervalSeconds) * time.Second)
|
|
|
|
}
|
2019-06-13 17:01:51 +08:00
|
|
|
}
|
2019-06-16 12:46:55 +08:00
|
|
|
|
2022-11-25 02:22:59 +08:00
|
|
|
func StartMetricsServer(ip string, port int) {
|
2020-09-24 20:45:39 +08:00
|
|
|
if port == 0 {
|
|
|
|
return
|
|
|
|
}
|
2020-09-25 01:21:23 +08:00
|
|
|
http.Handle("/metrics", promhttp.HandlerFor(Gather, promhttp.HandlerOpts{}))
|
2022-11-25 02:22:59 +08:00
|
|
|
log.Fatal(http.ListenAndServe(fmt.Sprintf("%s:%d", ip, port), nil))
|
2020-09-24 20:45:39 +08:00
|
|
|
}
|
|
|
|
|
2020-03-02 14:13:47 +08:00
|
|
|
func SourceName(port uint32) string {
|
2019-06-16 12:46:55 +08:00
|
|
|
hostname, err := os.Hostname()
|
|
|
|
if err != nil {
|
|
|
|
return "unknown"
|
|
|
|
}
|
2021-09-08 07:43:54 +08:00
|
|
|
return net.JoinHostPort(hostname, strconv.Itoa(int(port)))
|
2019-06-16 12:46:55 +08:00
|
|
|
}
|
2022-07-27 19:31:49 +08:00
|
|
|
|
2022-07-27 19:48:56 +08:00
|
|
|
// todo - can be changed to DeletePartialMatch when https://github.com/prometheus/client_golang/pull/1013 gets released
|
2022-07-27 19:31:49 +08:00
|
|
|
func DeleteCollectionMetrics(collection string) {
|
|
|
|
VolumeServerDiskSizeGauge.DeleteLabelValues(collection, "normal")
|
|
|
|
for _, volume_type := range readOnlyVolumeTypes {
|
|
|
|
VolumeServerReadOnlyVolumeGauge.DeleteLabelValues(collection, volume_type)
|
|
|
|
}
|
|
|
|
VolumeServerVolumeCounter.DeleteLabelValues(collection, "volume")
|
|
|
|
}
|