metrics.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. package metrics
  2. import (
  3. "context"
  4. "fmt"
  5. "net"
  6. "net/http"
  7. _ "net/http/pprof"
  8. "runtime"
  9. "sync"
  10. "time"
  11. "github.com/prometheus/client_golang/prometheus"
  12. "github.com/prometheus/client_golang/prometheus/promhttp"
  13. "github.com/rs/zerolog"
  14. "golang.org/x/net/trace"
  15. )
  16. const (
  17. startupTime = time.Millisecond * 500
  18. defaultShutdownTimeout = time.Second * 15
  19. )
  20. type Config struct {
  21. ReadyServer *ReadyServer
  22. QuickTunnelHostname string
  23. Orchestrator orchestrator
  24. ShutdownTimeout time.Duration
  25. }
  26. type orchestrator interface {
  27. GetVersionedConfigJSON() ([]byte, error)
  28. }
  29. func newMetricsHandler(
  30. config Config,
  31. log *zerolog.Logger,
  32. ) *http.ServeMux {
  33. router := http.NewServeMux()
  34. router.Handle("/debug/", http.DefaultServeMux)
  35. router.Handle("/metrics", promhttp.Handler())
  36. router.HandleFunc("/healthcheck", func(w http.ResponseWriter, r *http.Request) {
  37. _, _ = fmt.Fprintf(w, "OK\n")
  38. })
  39. if config.ReadyServer != nil {
  40. router.Handle("/ready", config.ReadyServer)
  41. }
  42. router.HandleFunc("/quicktunnel", func(w http.ResponseWriter, r *http.Request) {
  43. _, _ = fmt.Fprintf(w, `{"hostname":"%s"}`, config.QuickTunnelHostname)
  44. })
  45. if config.Orchestrator != nil {
  46. router.HandleFunc("/config", func(w http.ResponseWriter, r *http.Request) {
  47. json, err := config.Orchestrator.GetVersionedConfigJSON()
  48. if err != nil {
  49. w.WriteHeader(500)
  50. _, _ = fmt.Fprintf(w, "ERR: %v", err)
  51. log.Err(err).Msg("Failed to serve config")
  52. return
  53. }
  54. _, _ = w.Write(json)
  55. })
  56. }
  57. return router
  58. }
  59. func ServeMetrics(
  60. l net.Listener,
  61. ctx context.Context,
  62. config Config,
  63. log *zerolog.Logger,
  64. ) (err error) {
  65. var wg sync.WaitGroup
  66. // Metrics port is privileged, so no need for further access control
  67. trace.AuthRequest = func(*http.Request) (bool, bool) { return true, true }
  68. // TODO: parameterize ReadTimeout and WriteTimeout. The maximum time we can
  69. // profile CPU usage depends on WriteTimeout
  70. h := newMetricsHandler(config, log)
  71. server := &http.Server{
  72. ReadTimeout: 10 * time.Second,
  73. WriteTimeout: 10 * time.Second,
  74. Handler: h,
  75. }
  76. wg.Add(1)
  77. go func() {
  78. defer wg.Done()
  79. err = server.Serve(l)
  80. }()
  81. log.Info().Msgf("Starting metrics server on %s", fmt.Sprintf("%v/metrics", l.Addr()))
  82. // server.Serve will hang if server.Shutdown is called before the server is
  83. // fully started up. So add artificial delay.
  84. time.Sleep(startupTime)
  85. <-ctx.Done()
  86. shutdownTimeout := config.ShutdownTimeout
  87. if shutdownTimeout == 0 {
  88. shutdownTimeout = defaultShutdownTimeout
  89. }
  90. ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
  91. _ = server.Shutdown(ctx)
  92. cancel()
  93. wg.Wait()
  94. if err == http.ErrServerClosed {
  95. log.Info().Msg("Metrics server stopped")
  96. return nil
  97. }
  98. log.Err(err).Msg("Metrics server failed")
  99. return err
  100. }
  101. func RegisterBuildInfo(buildType, buildTime, version string) {
  102. buildInfo := prometheus.NewGaugeVec(
  103. prometheus.GaugeOpts{
  104. // Don't namespace build_info, since we want it to be consistent across all Cloudflare services
  105. Name: "build_info",
  106. Help: "Build and version information",
  107. },
  108. []string{"goversion", "type", "revision", "version"},
  109. )
  110. prometheus.MustRegister(buildInfo)
  111. buildInfo.WithLabelValues(runtime.Version(), buildType, buildTime, version).Set(1)
  112. }