在使用 Prometheus 的时候,如果我们需要为自己定制一些业务监控的指标,那么很可能你需要自己编写 Exporter,例如我经常使用 Go,这里就记录一下 Go 编写 Exporter 的两种方式,分别是:

这两种方式本质上没有差别,但是在采集时有一些差别:

所以当你编写 exporter 的时候需要根据自己的业务场景具体地选择适合你的方式。

方式一:定时采集

这种方式只需要直接将 metric 导入到 prometheus 的框架中即可:

  1. var (
  2. addr = flag.String("listen-address", ":8080", "The address to listen on for HTTP requests.")
  3. normDomain = flag.Float64("normal.domain", 0.0002, "The domain for the normal distribution.")
  4. normMean = flag.Float64("normal.mean", 0.00001, "The mean for the normal distribution.")
  5. oscillationPeriod = flag.Duration("oscillation-period", 10*time.Minute, "The duration of the rate oscillation period.")
  6. )
  7. func main() {
  8. flag.Parse()
  9. var rpcDurations = prometheus.NewSummaryVec(
  10. prometheus.SummaryOpts{
  11. Name: "rpc_durations_seconds",
  12. Help: "RPC latency distributions.",
  13. Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
  14. },
  15. []string{"service"},
  16. )
  17. prometheus.MustRegister(rpcDurations)
  18. start := time.Now()
  19. oscillationFactor := func() float64 {
  20. return 2 + math.Sin(math.Sin(2*math.Pi*float64(time.Since(start))/float64(*oscillationPeriod)))
  21. }
  22. go func() {
  23. for {
  24. v := (rand.NormFloat64() * *normDomain) + *normMean
  25. rpcDurations.WithLabelValues("normal").Observe(v)
  26. time.Sleep(time.Duration(75*oscillationFactor()) * time.Millisecond)
  27. }
  28. }()
  29. http.Handle("/metrics", promhttp.Handler())
  30. log.Fatal(http.ListenAndServe(*addr, nil))
  31. }

简化用法

updated at:2023-06-17

上面这种用法比较啰嗦,所以 SDK 提供了一种更加简单地使用方式:

  1. [root@liqiang.io]# cat main.go
  2. package main
  3. import (
  4. "math/rand"
  5. "net/http"
  6. "github.com/prometheus/client_golang/prometheus"
  7. "github.com/prometheus/client_golang/prometheus/promauto"
  8. "github.com/prometheus/client_golang/prometheus/promhttp"
  9. )
  10. var histogram = promauto.NewHistogram(prometheus.HistogramOpts{
  11. Name: "random_numbers",
  12. Help: "A histogram of normally distributed random numbers.",
  13. Buckets: prometheus.LinearBuckets(-3, .1, 61),
  14. }, []string{"model"})
  15. func Random() {
  16. for {
  17. histogram.Observe(rand.NormFloat64())
  18. }
  19. }
  20. func main() {
  21. go Random()
  22. http.Handle("/metrics", promhttp.Handler())
  23. http.ListenAndServe(":1971", nil)
  24. }

方式二:按需采集

按需采集的关键是实现 Collector 接口,然后再注册对象。

  1. import (
  2. "github.com/prometheus/client_golang/prometheus"
  3. )
  4. func init() {
  5. prometheus.MustRegister(cpuTemp)
  6. prometheus.MustRegister(hdFailures)
  7. }
  8. type ClusterManager struct {
  9. Zone string
  10. OOMCountDesc *prometheus.Desc
  11. RAMUsageDesc *prometheus.Desc
  12. }
  13. // Describe simply sends the two Descs in the struct to the channel.
  14. func (c *ClusterManager) Describe(ch chan<- *prometheus.Desc) {
  15. ch <- c.OOMCountDesc
  16. ch <- c.RAMUsageDesc
  17. }
  18. func (c *ClusterManager) Collect(ch chan<- prometheus.Metric) {
  19. oomCountByHost, ramUsageByHost := c.ReallyExpensiveAssessmentOfTheSystemState()
  20. for host, oomCount := range oomCountByHost {
  21. ch <- prometheus.MustNewConstMetric(
  22. c.OOMCountDesc,
  23. prometheus.CounterValue,
  24. float64(oomCount),
  25. host,
  26. )
  27. }
  28. for host, ramUsage := range ramUsageByHost {
  29. ch <- prometheus.MustNewConstMetric(
  30. c.RAMUsageDesc,
  31. prometheus.GaugeValue,
  32. ramUsage,
  33. host,
  34. )
  35. }
  36. }
  37. func NewClusterManager(zone string) *ClusterManager {
  38. return &ClusterManager{
  39. Zone: zone,
  40. OOMCountDesc: prometheus.NewDesc(
  41. "clustermanager_oom_crashes_total",
  42. "Number of OOM crashes.",
  43. []string{"host"},
  44. prometheus.Labels{"zone": zone},
  45. ),
  46. RAMUsageDesc: prometheus.NewDesc(
  47. "clustermanager_ram_usage_bytes",
  48. "RAM usage as reported to the cluster manager.",
  49. []string{"host"},
  50. prometheus.Labels{"zone": zone},
  51. ),
  52. }
  53. }
  54. func main() {
  55. workerDB := NewClusterManager("db")
  56. workerCA := NewClusterManager("ca")
  57. // Since we are dealing with custom Collector implementations, it might
  58. // be a good idea to try it out with a pedantic registry.
  59. reg := prometheus.NewPedanticRegistry()
  60. reg.MustRegister(workerDB)
  61. reg.MustRegister(workerCA)
  62. gatherers := prometheus.Gatherers{
  63. reg,
  64. }
  65. h := promhttp.HandlerFor(gatherers,
  66. promhttp.HandlerOpts{
  67. ErrorLog: log.NewErrorLogger(),
  68. ErrorHandling: promhttp.ContinueOnError,
  69. })
  70. http.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
  71. h.ServeHTTP(w, r)
  72. })
  73. log.Infoln("Start server at :8080")
  74. if err := http.ListenAndServe(":8080", nil); err != nil {
  75. log.Errorf("Error occur when start server %v", err)
  76. os.Exit(1)
  77. }
  78. }

Ref