在使用 Prometheus 的时候,如果我们需要为自己定制一些业务监控的指标,那么很可能你需要自己编写 Exporter,例如我经常使用 Go,这里就记录一下 Go 编写 Exporter 的两种方式,分别是:

这两种方式本质上没有差别,但是在采集时有一些差别:

所以当你编写 exporter 的时候需要根据自己的业务场景具体地选择适合你的方式。

方式一:定时采集

这种方式只需要直接将 metric 导入到 prometheus 的框架中即可:


var (
    addr              = flag.String("listen-address", ":8080", "The address to listen on for HTTP requests.")
    normDomain        = flag.Float64("normal.domain", 0.0002, "The domain for the normal distribution.")
    normMean          = flag.Float64("normal.mean", 0.00001, "The mean for the normal distribution.")
    oscillationPeriod = flag.Duration("oscillation-period", 10*time.Minute, "The duration of the rate oscillation period.")
)

func main() {
    flag.Parse()

    var rpcDurations = prometheus.NewSummaryVec(
        prometheus.SummaryOpts{
            Name:       "rpc_durations_seconds",
            Help:       "RPC latency distributions.",
            Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
        },
        []string{"service"},
    )
    prometheus.MustRegister(rpcDurations)

    start := time.Now()

    oscillationFactor := func() float64 {
        return 2 + math.Sin(math.Sin(2*math.Pi*float64(time.Since(start))/float64(*oscillationPeriod)))
    }
    go func() {
        for {
            v := (rand.NormFloat64() * *normDomain) + *normMean
            rpcDurations.WithLabelValues("normal").Observe(v)
            time.Sleep(time.Duration(75*oscillationFactor()) * time.Millisecond)
        }
    }()

    http.Handle("/metrics", promhttp.Handler())
    log.Fatal(http.ListenAndServe(*addr, nil))
}

方式二:按需采集

按需采集的关键是实现 Collector 接口,然后再注册对象。

import (
    "github.com/prometheus/client_golang/prometheus"
)

func init() {
    prometheus.MustRegister(cpuTemp)
    prometheus.MustRegister(hdFailures)
}

type ClusterManager struct {
    Zone         string
    OOMCountDesc *prometheus.Desc
    RAMUsageDesc *prometheus.Desc
}

// Describe simply sends the two Descs in the struct to the channel.
func (c *ClusterManager) Describe(ch chan<- *prometheus.Desc) {
    ch <- c.OOMCountDesc
    ch <- c.RAMUsageDesc
}

func (c *ClusterManager) Collect(ch chan<- prometheus.Metric) {
    oomCountByHost, ramUsageByHost := c.ReallyExpensiveAssessmentOfTheSystemState()
    for host, oomCount := range oomCountByHost {
        ch <- prometheus.MustNewConstMetric(
            c.OOMCountDesc,
            prometheus.CounterValue,
            float64(oomCount),
            host,
        )
    }
    for host, ramUsage := range ramUsageByHost {
        ch <- prometheus.MustNewConstMetric(
            c.RAMUsageDesc,
            prometheus.GaugeValue,
            ramUsage,
            host,
        )
    }
}

func NewClusterManager(zone string) *ClusterManager {
    return &ClusterManager{
        Zone: zone,
        OOMCountDesc: prometheus.NewDesc(
            "clustermanager_oom_crashes_total",
            "Number of OOM crashes.",
            []string{"host"},
            prometheus.Labels{"zone": zone},
        ),
        RAMUsageDesc: prometheus.NewDesc(
            "clustermanager_ram_usage_bytes",
            "RAM usage as reported to the cluster manager.",
            []string{"host"},
            prometheus.Labels{"zone": zone},
        ),
    }
}

func main() {
    workerDB := NewClusterManager("db")
    workerCA := NewClusterManager("ca")

    // Since we are dealing with custom Collector implementations, it might
    // be a good idea to try it out with a pedantic registry.
    reg := prometheus.NewPedanticRegistry()
    reg.MustRegister(workerDB)
    reg.MustRegister(workerCA)
    gatherers := prometheus.Gatherers{
        reg,
    }

    h := promhttp.HandlerFor(gatherers,
        promhttp.HandlerOpts{
            ErrorLog:      log.NewErrorLogger(),
            ErrorHandling: promhttp.ContinueOnError,
        })
    http.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
        h.ServeHTTP(w, r)
    })
    log.Infoln("Start server at :8080")
    if err := http.ListenAndServe(":8080", nil); err != nil {
        log.Errorf("Error occur when start server %v", err)
        os.Exit(1)
    }
}

Ref