123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- package metrics
- import (
- "context"
- "fmt"
- "net"
- "net/http"
- _ "net/http/pprof"
- "runtime"
- "sync"
- "time"
- "github.com/prometheus/client_golang/prometheus"
- "github.com/prometheus/client_golang/prometheus/promhttp"
- "github.com/rs/zerolog"
- "golang.org/x/net/trace"
- )
- const (
- startupTime = time.Millisecond * 500
- defaultShutdownTimeout = time.Second * 15
- )
- type Config struct {
- ReadyServer *ReadyServer
- QuickTunnelHostname string
- Orchestrator orchestrator
- ShutdownTimeout time.Duration
- }
- type orchestrator interface {
- GetVersionedConfigJSON() ([]byte, error)
- }
- func newMetricsHandler(
- config Config,
- log *zerolog.Logger,
- ) *http.ServeMux {
- router := http.NewServeMux()
- router.Handle("/debug/", http.DefaultServeMux)
- router.Handle("/metrics", promhttp.Handler())
- router.HandleFunc("/healthcheck", func(w http.ResponseWriter, r *http.Request) {
- _, _ = fmt.Fprintf(w, "OK\n")
- })
- if config.ReadyServer != nil {
- router.Handle("/ready", config.ReadyServer)
- }
- router.HandleFunc("/quicktunnel", func(w http.ResponseWriter, r *http.Request) {
- _, _ = fmt.Fprintf(w, `{"hostname":"%s"}`, config.QuickTunnelHostname)
- })
- if config.Orchestrator != nil {
- router.HandleFunc("/config", func(w http.ResponseWriter, r *http.Request) {
- json, err := config.Orchestrator.GetVersionedConfigJSON()
- if err != nil {
- w.WriteHeader(500)
- _, _ = fmt.Fprintf(w, "ERR: %v", err)
- log.Err(err).Msg("Failed to serve config")
- return
- }
- _, _ = w.Write(json)
- })
- }
- return router
- }
- func ServeMetrics(
- l net.Listener,
- ctx context.Context,
- config Config,
- log *zerolog.Logger,
- ) (err error) {
- var wg sync.WaitGroup
- // Metrics port is privileged, so no need for further access control
- trace.AuthRequest = func(*http.Request) (bool, bool) { return true, true }
- // TODO: parameterize ReadTimeout and WriteTimeout. The maximum time we can
- // profile CPU usage depends on WriteTimeout
- h := newMetricsHandler(config, log)
- server := &http.Server{
- ReadTimeout: 10 * time.Second,
- WriteTimeout: 10 * time.Second,
- Handler: h,
- }
- wg.Add(1)
- go func() {
- defer wg.Done()
- err = server.Serve(l)
- }()
- log.Info().Msgf("Starting metrics server on %s", fmt.Sprintf("%v/metrics", l.Addr()))
- // server.Serve will hang if server.Shutdown is called before the server is
- // fully started up. So add artificial delay.
- time.Sleep(startupTime)
- <-ctx.Done()
- shutdownTimeout := config.ShutdownTimeout
- if shutdownTimeout == 0 {
- shutdownTimeout = defaultShutdownTimeout
- }
- ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
- _ = server.Shutdown(ctx)
- cancel()
- wg.Wait()
- if err == http.ErrServerClosed {
- log.Info().Msg("Metrics server stopped")
- return nil
- }
- log.Err(err).Msg("Metrics server failed")
- return err
- }
- func RegisterBuildInfo(buildType, buildTime, version string) {
- buildInfo := prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- // Don't namespace build_info, since we want it to be consistent across all Cloudflare services
- Name: "build_info",
- Help: "Build and version information",
- },
- []string{"goversion", "type", "revision", "version"},
- )
- prometheus.MustRegister(buildInfo)
- buildInfo.WithLabelValues(runtime.Version(), buildType, buildTime, version).Set(1)
- }
|