metrics.go 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. package metrics
  2. import (
  3. "context"
  4. "fmt"
  5. "net"
  6. "net/http"
  7. _ "net/http/pprof"
  8. "runtime"
  9. "sync"
  10. "time"
  11. "github.com/gorilla/mux"
  12. "github.com/prometheus/client_golang/prometheus"
  13. "github.com/prometheus/client_golang/prometheus/promhttp"
  14. "github.com/rs/zerolog"
  15. "golang.org/x/net/trace"
  16. )
  17. const (
  18. shutdownTimeout = time.Second * 15
  19. startupTime = time.Millisecond * 500
  20. )
  21. func newMetricsHandler(readyServer *ReadyServer) *mux.Router {
  22. router := mux.NewRouter()
  23. router.PathPrefix("/debug/").Handler(http.DefaultServeMux)
  24. router.Handle("/metrics", promhttp.Handler())
  25. router.HandleFunc("/healthcheck", func(w http.ResponseWriter, r *http.Request) {
  26. _, _ = fmt.Fprintf(w, "OK\n")
  27. })
  28. if readyServer != nil {
  29. router.Handle("/ready", readyServer)
  30. }
  31. return router
  32. }
  33. func ServeMetrics(
  34. l net.Listener,
  35. shutdownC <-chan struct{},
  36. readyServer *ReadyServer,
  37. log *zerolog.Logger,
  38. ) (err error) {
  39. var wg sync.WaitGroup
  40. // Metrics port is privileged, so no need for further access control
  41. trace.AuthRequest = func(*http.Request) (bool, bool) { return true, true }
  42. // TODO: parameterize ReadTimeout and WriteTimeout. The maximum time we can
  43. // profile CPU usage depends on WriteTimeout
  44. h := newMetricsHandler(readyServer)
  45. server := &http.Server{
  46. ReadTimeout: 10 * time.Second,
  47. WriteTimeout: 10 * time.Second,
  48. Handler: h,
  49. }
  50. wg.Add(1)
  51. go func() {
  52. defer wg.Done()
  53. err = server.Serve(l)
  54. }()
  55. log.Info().Msgf("Starting metrics server on %s", fmt.Sprintf("%v/metrics", l.Addr()))
  56. // server.Serve will hang if server.Shutdown is called before the server is
  57. // fully started up. So add artificial delay.
  58. time.Sleep(startupTime)
  59. <-shutdownC
  60. ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
  61. _ = server.Shutdown(ctx)
  62. cancel()
  63. wg.Wait()
  64. if err == http.ErrServerClosed {
  65. log.Info().Msg("Metrics server stopped")
  66. return nil
  67. }
  68. log.Err(err).Msg("Metrics server failed")
  69. return err
  70. }
  71. func RegisterBuildInfo(buildTime string, version string) {
  72. buildInfo := prometheus.NewGaugeVec(
  73. prometheus.GaugeOpts{
  74. // Don't namespace build_info, since we want it to be consistent across all Cloudflare services
  75. Name: "build_info",
  76. Help: "Build and version information",
  77. },
  78. []string{"goversion", "revision", "version"},
  79. )
  80. prometheus.MustRegister(buildInfo)
  81. buildInfo.WithLabelValues(runtime.Version(), buildTime, version).Set(1)
  82. }