在使用 Prometheus 的时候,如果我们需要为自己定制一些业务监控的指标,那么很可能你需要自己编写 Exporter,例如我经常使用 Go,这里就记录一下 Go 编写 Exporter 的两种方式,分别是:
- 自己的代码定时刷新值
- 在 Prometheus 抓取的时候实时获取值
这两种方式本质上没有差别,但是在采集时有一些差别:
- 方式一可以以几乎忽略的延迟返回监控数据
- 方式二可能会因为一些难以获取的值而超时或者很久才会返回值
所以当你编写 exporter 的时候需要根据自己的业务场景具体地选择适合你的方式。
方式一:定时采集
这种方式只需要直接将 metric 导入到 prometheus 的框架中即可:
var (
addr = flag.String("listen-address", ":8080", "The address to listen on for HTTP requests.")
normDomain = flag.Float64("normal.domain", 0.0002, "The domain for the normal distribution.")
normMean = flag.Float64("normal.mean", 0.00001, "The mean for the normal distribution.")
oscillationPeriod = flag.Duration("oscillation-period", 10*time.Minute, "The duration of the rate oscillation period.")
)
func main() {
flag.Parse()
var rpcDurations = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "rpc_durations_seconds",
Help: "RPC latency distributions.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{"service"},
)
prometheus.MustRegister(rpcDurations)
start := time.Now()
oscillationFactor := func() float64 {
return 2 + math.Sin(math.Sin(2*math.Pi*float64(time.Since(start))/float64(*oscillationPeriod)))
}
go func() {
for {
v := (rand.NormFloat64() * *normDomain) + *normMean
rpcDurations.WithLabelValues("normal").Observe(v)
time.Sleep(time.Duration(75*oscillationFactor()) * time.Millisecond)
}
}()
http.Handle("/metrics", promhttp.Handler())
log.Fatal(http.ListenAndServe(*addr, nil))
}
简化用法
updated at:2023-06-17
上面这种用法比较啰嗦,所以 SDK 提供了一种更加简单地使用方式:
[root@liqiang.io]# cat main.go
package main
import (
"math/rand"
"net/http"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var histogram = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "random_numbers",
Help: "A histogram of normally distributed random numbers.",
Buckets: prometheus.LinearBuckets(-3, .1, 61),
}, []string{"model"})
func Random() {
for {
histogram.Observe(rand.NormFloat64())
}
}
func main() {
go Random()
http.Handle("/metrics", promhttp.Handler())
http.ListenAndServe(":1971", nil)
}
方式二:按需采集
按需采集的关键是实现 Collector
接口,然后再注册对象。
import (
"github.com/prometheus/client_golang/prometheus"
)
func init() {
prometheus.MustRegister(cpuTemp)
prometheus.MustRegister(hdFailures)
}
type ClusterManager struct {
Zone string
OOMCountDesc *prometheus.Desc
RAMUsageDesc *prometheus.Desc
}
// Describe simply sends the two Descs in the struct to the channel.
func (c *ClusterManager) Describe(ch chan<- *prometheus.Desc) {
ch <- c.OOMCountDesc
ch <- c.RAMUsageDesc
}
func (c *ClusterManager) Collect(ch chan<- prometheus.Metric) {
oomCountByHost, ramUsageByHost := c.ReallyExpensiveAssessmentOfTheSystemState()
for host, oomCount := range oomCountByHost {
ch <- prometheus.MustNewConstMetric(
c.OOMCountDesc,
prometheus.CounterValue,
float64(oomCount),
host,
)
}
for host, ramUsage := range ramUsageByHost {
ch <- prometheus.MustNewConstMetric(
c.RAMUsageDesc,
prometheus.GaugeValue,
ramUsage,
host,
)
}
}
func NewClusterManager(zone string) *ClusterManager {
return &ClusterManager{
Zone: zone,
OOMCountDesc: prometheus.NewDesc(
"clustermanager_oom_crashes_total",
"Number of OOM crashes.",
[]string{"host"},
prometheus.Labels{"zone": zone},
),
RAMUsageDesc: prometheus.NewDesc(
"clustermanager_ram_usage_bytes",
"RAM usage as reported to the cluster manager.",
[]string{"host"},
prometheus.Labels{"zone": zone},
),
}
}
func main() {
workerDB := NewClusterManager("db")
workerCA := NewClusterManager("ca")
// Since we are dealing with custom Collector implementations, it might
// be a good idea to try it out with a pedantic registry.
reg := prometheus.NewPedanticRegistry()
reg.MustRegister(workerDB)
reg.MustRegister(workerCA)
gatherers := prometheus.Gatherers{
reg,
}
h := promhttp.HandlerFor(gatherers,
promhttp.HandlerOpts{
ErrorLog: log.NewErrorLogger(),
ErrorHandling: promhttp.ContinueOnError,
})
http.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
h.ServeHTTP(w, r)
})
log.Infoln("Start server at :8080")
if err := http.ListenAndServe(":8080", nil); err != nil {
log.Errorf("Error occur when start server %v", err)
os.Exit(1)
}
}