123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199 |
- package metrics
- import (
- "context"
- "fmt"
- "net"
- "net/http"
- _ "net/http/pprof"
- "runtime"
- "sync"
- "time"
- "github.com/facebookgo/grace/gracenet"
- "github.com/prometheus/client_golang/prometheus"
- "github.com/prometheus/client_golang/prometheus/promhttp"
- "github.com/rs/zerolog"
- "golang.org/x/net/trace"
- "github.com/cloudflare/cloudflared/diagnostic"
- )
- const (
- startupTime = time.Millisecond * 500
- defaultShutdownTimeout = time.Second * 15
- )
- // This variable is set at compile time to allow the default local address to change.
- var Runtime = "host"
- func GetMetricsDefaultAddress(runtimeType string) string {
- // When issuing the diagnostic command we may have to reach a server that is
- // running in a virtual enviroment and in that case we must bind to 0.0.0.0
- // otherwise the server won't be reachable.
- switch runtimeType {
- case "virtual":
- return "0.0.0.0:0"
- default:
- return "localhost:0"
- }
- }
- // GetMetricsKnownAddresses returns the addresses used by the metrics server to bind at
- // startup time to allow a semi-deterministic approach to know where the server is listening at.
- // The ports were selected because at the time we are in 2024 and they do not collide with any
- // know/registered port according https://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers.
- func GetMetricsKnownAddresses(runtimeType string) [5]string {
- switch Runtime {
- case "virtual":
- return [5]string{"0.0.0.0:20241", "0.0.0.0:20242", "0.0.0.0:20243", "0.0.0.0:20244", "0.0.0.0:20245"}
- default:
- return [5]string{"localhost:20241", "localhost:20242", "localhost:20243", "localhost:20244", "localhost:20245"}
- }
- }
- type Config struct {
- ReadyServer *ReadyServer
- DiagnosticHandler *diagnostic.Handler
- QuickTunnelHostname string
- Orchestrator orchestrator
- ShutdownTimeout time.Duration
- }
- type orchestrator interface {
- GetVersionedConfigJSON() ([]byte, error)
- }
- func newMetricsHandler(
- config Config,
- log *zerolog.Logger,
- ) *http.ServeMux {
- router := http.NewServeMux()
- router.Handle("/debug/", http.DefaultServeMux)
- router.Handle("/metrics", promhttp.Handler())
- router.HandleFunc("/healthcheck", func(w http.ResponseWriter, r *http.Request) {
- _, _ = fmt.Fprintf(w, "OK\n")
- })
- if config.ReadyServer != nil {
- router.Handle("/ready", config.ReadyServer)
- }
- router.HandleFunc("/quicktunnel", func(w http.ResponseWriter, r *http.Request) {
- _, _ = fmt.Fprintf(w, `{"hostname":"%s"}`, config.QuickTunnelHostname)
- })
- if config.Orchestrator != nil {
- router.HandleFunc("/config", func(w http.ResponseWriter, r *http.Request) {
- json, err := config.Orchestrator.GetVersionedConfigJSON()
- if err != nil {
- w.WriteHeader(500)
- _, _ = fmt.Fprintf(w, "ERR: %v", err)
- log.Err(err).Msg("Failed to serve config")
- return
- }
- _, _ = w.Write(json)
- })
- }
- router.HandleFunc("/diag/configuration", config.DiagnosticHandler.ConfigurationHandler)
- router.HandleFunc("/diag/tunnel", config.DiagnosticHandler.TunnelStateHandler)
- router.HandleFunc("/diag/system", config.DiagnosticHandler.SystemHandler)
- return router
- }
- // CreateMetricsListener will create a new [net.Listener] by using an
- // known set of ports when the default address is passed with the fallback
- // of choosing a random port when none is available.
- //
- // In case the provided address is not the default one then it will be used
- // as is.
- func CreateMetricsListener(listeners *gracenet.Net, laddr string) (net.Listener, error) {
- if laddr == GetMetricsDefaultAddress(Runtime) {
- // On the presence of the default address select
- // a port from the known set of addresses iteratively.
- addresses := GetMetricsKnownAddresses(Runtime)
- for _, address := range addresses {
- listener, err := listeners.Listen("tcp", address)
- if err == nil {
- return listener, nil
- }
- }
- // When no port is available then bind to a random one
- listener, err := listeners.Listen("tcp", laddr)
- if err != nil {
- return nil, fmt.Errorf("failed to listen to default metrics address: %w", err)
- }
- return listener, nil
- }
- // Explicitly got a local address then bind to it
- listener, err := listeners.Listen("tcp", laddr)
- if err != nil {
- return nil, fmt.Errorf("failed to bind to address (%s): %w", laddr, err)
- }
- return listener, nil
- }
- func ServeMetrics(
- l net.Listener,
- ctx context.Context,
- config Config,
- log *zerolog.Logger,
- ) (err error) {
- var wg sync.WaitGroup
- // Metrics port is privileged, so no need for further access control
- trace.AuthRequest = func(*http.Request) (bool, bool) { return true, true }
- // TODO: parameterize ReadTimeout and WriteTimeout. The maximum time we can
- // profile CPU usage depends on WriteTimeout
- h := newMetricsHandler(config, log)
- server := &http.Server{
- ReadTimeout: 10 * time.Second,
- WriteTimeout: 10 * time.Second,
- Handler: h,
- }
- wg.Add(1)
- go func() {
- defer wg.Done()
- err = server.Serve(l)
- }()
- log.Info().Msgf("Starting metrics server on %s", fmt.Sprintf("%v/metrics", l.Addr()))
- // server.Serve will hang if server.Shutdown is called before the server is
- // fully started up. So add artificial delay.
- time.Sleep(startupTime)
- <-ctx.Done()
- shutdownTimeout := config.ShutdownTimeout
- if shutdownTimeout == 0 {
- shutdownTimeout = defaultShutdownTimeout
- }
- ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
- _ = server.Shutdown(ctx)
- cancel()
- wg.Wait()
- if err == http.ErrServerClosed {
- log.Info().Msg("Metrics server stopped")
- return nil
- }
- log.Err(err).Msg("Metrics server failed")
- return err
- }
- func RegisterBuildInfo(buildType, buildTime, version string) {
- buildInfo := prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- // Don't namespace build_info, since we want it to be consistent across all Cloudflare services
- Name: "build_info",
- Help: "Build and version information",
- },
- []string{"goversion", "type", "revision", "version"},
- )
- prometheus.MustRegister(buildInfo)
- buildInfo.WithLabelValues(runtime.Version(), buildType, buildTime, version).Set(1)
- }
|