metrics.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. package connection
  2. import (
  3. "sync"
  4. "time"
  5. "github.com/cloudflare/cloudflared/h2mux"
  6. "github.com/prometheus/client_golang/prometheus"
  7. )
  8. const (
  9. MetricsNamespace = "cloudflared"
  10. TunnelSubsystem = "tunnel"
  11. muxerSubsystem = "muxer"
  12. )
  13. type muxerMetrics struct {
  14. rtt *prometheus.GaugeVec
  15. rttMin *prometheus.GaugeVec
  16. rttMax *prometheus.GaugeVec
  17. receiveWindowAve *prometheus.GaugeVec
  18. sendWindowAve *prometheus.GaugeVec
  19. receiveWindowMin *prometheus.GaugeVec
  20. receiveWindowMax *prometheus.GaugeVec
  21. sendWindowMin *prometheus.GaugeVec
  22. sendWindowMax *prometheus.GaugeVec
  23. inBoundRateCurr *prometheus.GaugeVec
  24. inBoundRateMin *prometheus.GaugeVec
  25. inBoundRateMax *prometheus.GaugeVec
  26. outBoundRateCurr *prometheus.GaugeVec
  27. outBoundRateMin *prometheus.GaugeVec
  28. outBoundRateMax *prometheus.GaugeVec
  29. compBytesBefore *prometheus.GaugeVec
  30. compBytesAfter *prometheus.GaugeVec
  31. compRateAve *prometheus.GaugeVec
  32. }
  33. type tunnelMetrics struct {
  34. timerRetries prometheus.Gauge
  35. serverLocations *prometheus.GaugeVec
  36. // locationLock is a mutex for oldServerLocations
  37. locationLock sync.Mutex
  38. // oldServerLocations stores the last server the tunnel was connected to
  39. oldServerLocations map[string]string
  40. regSuccess *prometheus.CounterVec
  41. regFail *prometheus.CounterVec
  42. rpcFail *prometheus.CounterVec
  43. muxerMetrics *muxerMetrics
  44. tunnelsHA tunnelsForHA
  45. userHostnamesCounts *prometheus.CounterVec
  46. }
  47. func newMuxerMetrics() *muxerMetrics {
  48. rtt := prometheus.NewGaugeVec(
  49. prometheus.GaugeOpts{
  50. Namespace: MetricsNamespace,
  51. Subsystem: muxerSubsystem,
  52. Name: "rtt",
  53. Help: "Round-trip time in millisecond",
  54. },
  55. []string{"connection_id"},
  56. )
  57. prometheus.MustRegister(rtt)
  58. rttMin := prometheus.NewGaugeVec(
  59. prometheus.GaugeOpts{
  60. Namespace: MetricsNamespace,
  61. Subsystem: muxerSubsystem,
  62. Name: "rtt_min",
  63. Help: "Shortest round-trip time in millisecond",
  64. },
  65. []string{"connection_id"},
  66. )
  67. prometheus.MustRegister(rttMin)
  68. rttMax := prometheus.NewGaugeVec(
  69. prometheus.GaugeOpts{
  70. Namespace: MetricsNamespace,
  71. Subsystem: muxerSubsystem,
  72. Name: "rtt_max",
  73. Help: "Longest round-trip time in millisecond",
  74. },
  75. []string{"connection_id"},
  76. )
  77. prometheus.MustRegister(rttMax)
  78. receiveWindowAve := prometheus.NewGaugeVec(
  79. prometheus.GaugeOpts{
  80. Namespace: MetricsNamespace,
  81. Subsystem: muxerSubsystem,
  82. Name: "receive_window_ave",
  83. Help: "Average receive window size in bytes",
  84. },
  85. []string{"connection_id"},
  86. )
  87. prometheus.MustRegister(receiveWindowAve)
  88. sendWindowAve := prometheus.NewGaugeVec(
  89. prometheus.GaugeOpts{
  90. Namespace: MetricsNamespace,
  91. Subsystem: muxerSubsystem,
  92. Name: "send_window_ave",
  93. Help: "Average send window size in bytes",
  94. },
  95. []string{"connection_id"},
  96. )
  97. prometheus.MustRegister(sendWindowAve)
  98. receiveWindowMin := prometheus.NewGaugeVec(
  99. prometheus.GaugeOpts{
  100. Namespace: MetricsNamespace,
  101. Subsystem: muxerSubsystem,
  102. Name: "receive_window_min",
  103. Help: "Smallest receive window size in bytes",
  104. },
  105. []string{"connection_id"},
  106. )
  107. prometheus.MustRegister(receiveWindowMin)
  108. receiveWindowMax := prometheus.NewGaugeVec(
  109. prometheus.GaugeOpts{
  110. Namespace: MetricsNamespace,
  111. Subsystem: muxerSubsystem,
  112. Name: "receive_window_max",
  113. Help: "Largest receive window size in bytes",
  114. },
  115. []string{"connection_id"},
  116. )
  117. prometheus.MustRegister(receiveWindowMax)
  118. sendWindowMin := prometheus.NewGaugeVec(
  119. prometheus.GaugeOpts{
  120. Namespace: MetricsNamespace,
  121. Subsystem: muxerSubsystem,
  122. Name: "send_window_min",
  123. Help: "Smallest send window size in bytes",
  124. },
  125. []string{"connection_id"},
  126. )
  127. prometheus.MustRegister(sendWindowMin)
  128. sendWindowMax := prometheus.NewGaugeVec(
  129. prometheus.GaugeOpts{
  130. Namespace: MetricsNamespace,
  131. Subsystem: muxerSubsystem,
  132. Name: "send_window_max",
  133. Help: "Largest send window size in bytes",
  134. },
  135. []string{"connection_id"},
  136. )
  137. prometheus.MustRegister(sendWindowMax)
  138. inBoundRateCurr := prometheus.NewGaugeVec(
  139. prometheus.GaugeOpts{
  140. Namespace: MetricsNamespace,
  141. Subsystem: muxerSubsystem,
  142. Name: "inbound_bytes_per_sec_curr",
  143. Help: "Current inbounding bytes per second, 0 if there is no incoming connection",
  144. },
  145. []string{"connection_id"},
  146. )
  147. prometheus.MustRegister(inBoundRateCurr)
  148. inBoundRateMin := prometheus.NewGaugeVec(
  149. prometheus.GaugeOpts{
  150. Namespace: MetricsNamespace,
  151. Subsystem: muxerSubsystem,
  152. Name: "inbound_bytes_per_sec_min",
  153. Help: "Minimum non-zero inbounding bytes per second",
  154. },
  155. []string{"connection_id"},
  156. )
  157. prometheus.MustRegister(inBoundRateMin)
  158. inBoundRateMax := prometheus.NewGaugeVec(
  159. prometheus.GaugeOpts{
  160. Namespace: MetricsNamespace,
  161. Subsystem: muxerSubsystem,
  162. Name: "inbound_bytes_per_sec_max",
  163. Help: "Maximum inbounding bytes per second",
  164. },
  165. []string{"connection_id"},
  166. )
  167. prometheus.MustRegister(inBoundRateMax)
  168. outBoundRateCurr := prometheus.NewGaugeVec(
  169. prometheus.GaugeOpts{
  170. Namespace: MetricsNamespace,
  171. Subsystem: muxerSubsystem,
  172. Name: "outbound_bytes_per_sec_curr",
  173. Help: "Current outbounding bytes per second, 0 if there is no outgoing traffic",
  174. },
  175. []string{"connection_id"},
  176. )
  177. prometheus.MustRegister(outBoundRateCurr)
  178. outBoundRateMin := prometheus.NewGaugeVec(
  179. prometheus.GaugeOpts{
  180. Namespace: MetricsNamespace,
  181. Subsystem: muxerSubsystem,
  182. Name: "outbound_bytes_per_sec_min",
  183. Help: "Minimum non-zero outbounding bytes per second",
  184. },
  185. []string{"connection_id"},
  186. )
  187. prometheus.MustRegister(outBoundRateMin)
  188. outBoundRateMax := prometheus.NewGaugeVec(
  189. prometheus.GaugeOpts{
  190. Namespace: MetricsNamespace,
  191. Subsystem: muxerSubsystem,
  192. Name: "outbound_bytes_per_sec_max",
  193. Help: "Maximum outbounding bytes per second",
  194. },
  195. []string{"connection_id"},
  196. )
  197. prometheus.MustRegister(outBoundRateMax)
  198. compBytesBefore := prometheus.NewGaugeVec(
  199. prometheus.GaugeOpts{
  200. Namespace: MetricsNamespace,
  201. Subsystem: muxerSubsystem,
  202. Name: "comp_bytes_before",
  203. Help: "Bytes sent via cross-stream compression, pre compression",
  204. },
  205. []string{"connection_id"},
  206. )
  207. prometheus.MustRegister(compBytesBefore)
  208. compBytesAfter := prometheus.NewGaugeVec(
  209. prometheus.GaugeOpts{
  210. Namespace: MetricsNamespace,
  211. Subsystem: muxerSubsystem,
  212. Name: "comp_bytes_after",
  213. Help: "Bytes sent via cross-stream compression, post compression",
  214. },
  215. []string{"connection_id"},
  216. )
  217. prometheus.MustRegister(compBytesAfter)
  218. compRateAve := prometheus.NewGaugeVec(
  219. prometheus.GaugeOpts{
  220. Namespace: MetricsNamespace,
  221. Subsystem: muxerSubsystem,
  222. Name: "comp_rate_ave",
  223. Help: "Average outbound cross-stream compression ratio",
  224. },
  225. []string{"connection_id"},
  226. )
  227. prometheus.MustRegister(compRateAve)
  228. return &muxerMetrics{
  229. rtt: rtt,
  230. rttMin: rttMin,
  231. rttMax: rttMax,
  232. receiveWindowAve: receiveWindowAve,
  233. sendWindowAve: sendWindowAve,
  234. receiveWindowMin: receiveWindowMin,
  235. receiveWindowMax: receiveWindowMax,
  236. sendWindowMin: sendWindowMin,
  237. sendWindowMax: sendWindowMax,
  238. inBoundRateCurr: inBoundRateCurr,
  239. inBoundRateMin: inBoundRateMin,
  240. inBoundRateMax: inBoundRateMax,
  241. outBoundRateCurr: outBoundRateCurr,
  242. outBoundRateMin: outBoundRateMin,
  243. outBoundRateMax: outBoundRateMax,
  244. compBytesBefore: compBytesBefore,
  245. compBytesAfter: compBytesAfter,
  246. compRateAve: compRateAve,
  247. }
  248. }
  249. func (m *muxerMetrics) update(connectionID string, metrics *h2mux.MuxerMetrics) {
  250. m.rtt.WithLabelValues(connectionID).Set(convertRTTMilliSec(metrics.RTT))
  251. m.rttMin.WithLabelValues(connectionID).Set(convertRTTMilliSec(metrics.RTTMin))
  252. m.rttMax.WithLabelValues(connectionID).Set(convertRTTMilliSec(metrics.RTTMax))
  253. m.receiveWindowAve.WithLabelValues(connectionID).Set(metrics.ReceiveWindowAve)
  254. m.sendWindowAve.WithLabelValues(connectionID).Set(metrics.SendWindowAve)
  255. m.receiveWindowMin.WithLabelValues(connectionID).Set(float64(metrics.ReceiveWindowMin))
  256. m.receiveWindowMax.WithLabelValues(connectionID).Set(float64(metrics.ReceiveWindowMax))
  257. m.sendWindowMin.WithLabelValues(connectionID).Set(float64(metrics.SendWindowMin))
  258. m.sendWindowMax.WithLabelValues(connectionID).Set(float64(metrics.SendWindowMax))
  259. m.inBoundRateCurr.WithLabelValues(connectionID).Set(float64(metrics.InBoundRateCurr))
  260. m.inBoundRateMin.WithLabelValues(connectionID).Set(float64(metrics.InBoundRateMin))
  261. m.inBoundRateMax.WithLabelValues(connectionID).Set(float64(metrics.InBoundRateMax))
  262. m.outBoundRateCurr.WithLabelValues(connectionID).Set(float64(metrics.OutBoundRateCurr))
  263. m.outBoundRateMin.WithLabelValues(connectionID).Set(float64(metrics.OutBoundRateMin))
  264. m.outBoundRateMax.WithLabelValues(connectionID).Set(float64(metrics.OutBoundRateMax))
  265. m.compBytesBefore.WithLabelValues(connectionID).Set(float64(metrics.CompBytesBefore.Value()))
  266. m.compBytesAfter.WithLabelValues(connectionID).Set(float64(metrics.CompBytesAfter.Value()))
  267. m.compRateAve.WithLabelValues(connectionID).Set(float64(metrics.CompRateAve()))
  268. }
  269. func convertRTTMilliSec(t time.Duration) float64 {
  270. return float64(t / time.Millisecond)
  271. }
  272. // Metrics that can be collected without asking the edge
  273. func initTunnelMetrics() *tunnelMetrics {
  274. maxConcurrentRequestsPerTunnel := prometheus.NewGaugeVec(
  275. prometheus.GaugeOpts{
  276. Namespace: MetricsNamespace,
  277. Subsystem: TunnelSubsystem,
  278. Name: "max_concurrent_requests_per_tunnel",
  279. Help: "Largest number of concurrent requests proxied through each tunnel so far",
  280. },
  281. []string{"connection_id"},
  282. )
  283. prometheus.MustRegister(maxConcurrentRequestsPerTunnel)
  284. timerRetries := prometheus.NewGauge(
  285. prometheus.GaugeOpts{
  286. Namespace: MetricsNamespace,
  287. Subsystem: TunnelSubsystem,
  288. Name: "timer_retries",
  289. Help: "Unacknowledged heart beats count",
  290. })
  291. prometheus.MustRegister(timerRetries)
  292. serverLocations := prometheus.NewGaugeVec(
  293. prometheus.GaugeOpts{
  294. Namespace: MetricsNamespace,
  295. Subsystem: TunnelSubsystem,
  296. Name: "server_locations",
  297. Help: "Where each tunnel is connected to. 1 means current location, 0 means previous locations.",
  298. },
  299. []string{"connection_id", "location"},
  300. )
  301. prometheus.MustRegister(serverLocations)
  302. rpcFail := prometheus.NewCounterVec(
  303. prometheus.CounterOpts{
  304. Namespace: MetricsNamespace,
  305. Subsystem: TunnelSubsystem,
  306. Name: "tunnel_rpc_fail",
  307. Help: "Count of RPC connection errors by type",
  308. },
  309. []string{"error", "rpcName"},
  310. )
  311. prometheus.MustRegister(rpcFail)
  312. registerFail := prometheus.NewCounterVec(
  313. prometheus.CounterOpts{
  314. Namespace: MetricsNamespace,
  315. Subsystem: TunnelSubsystem,
  316. Name: "tunnel_register_fail",
  317. Help: "Count of tunnel registration errors by type",
  318. },
  319. []string{"error", "rpcName"},
  320. )
  321. prometheus.MustRegister(registerFail)
  322. userHostnamesCounts := prometheus.NewCounterVec(
  323. prometheus.CounterOpts{
  324. Namespace: MetricsNamespace,
  325. Subsystem: TunnelSubsystem,
  326. Name: "user_hostnames_counts",
  327. Help: "Which user hostnames cloudflared is serving",
  328. },
  329. []string{"userHostname"},
  330. )
  331. prometheus.MustRegister(userHostnamesCounts)
  332. registerSuccess := prometheus.NewCounterVec(
  333. prometheus.CounterOpts{
  334. Namespace: MetricsNamespace,
  335. Subsystem: TunnelSubsystem,
  336. Name: "tunnel_register_success",
  337. Help: "Count of successful tunnel registrations",
  338. },
  339. []string{"rpcName"},
  340. )
  341. prometheus.MustRegister(registerSuccess)
  342. return &tunnelMetrics{
  343. timerRetries: timerRetries,
  344. serverLocations: serverLocations,
  345. oldServerLocations: make(map[string]string),
  346. muxerMetrics: newMuxerMetrics(),
  347. tunnelsHA: newTunnelsForHA(),
  348. regSuccess: registerSuccess,
  349. regFail: registerFail,
  350. rpcFail: rpcFail,
  351. userHostnamesCounts: userHostnamesCounts,
  352. }
  353. }
  354. func (t *tunnelMetrics) updateMuxerMetrics(connectionID string, metrics *h2mux.MuxerMetrics) {
  355. t.muxerMetrics.update(connectionID, metrics)
  356. }
  357. func (t *tunnelMetrics) registerServerLocation(connectionID, loc string) {
  358. t.locationLock.Lock()
  359. defer t.locationLock.Unlock()
  360. if oldLoc, ok := t.oldServerLocations[connectionID]; ok && oldLoc == loc {
  361. return
  362. } else if ok {
  363. t.serverLocations.WithLabelValues(connectionID, oldLoc).Dec()
  364. }
  365. t.serverLocations.WithLabelValues(connectionID, loc).Inc()
  366. t.oldServerLocations[connectionID] = loc
  367. }
  368. var tunnelMetricsInternal struct {
  369. sync.Once
  370. metrics *tunnelMetrics
  371. }
  372. func newTunnelMetrics() *tunnelMetrics {
  373. tunnelMetricsInternal.Do(func() {
  374. tunnelMetricsInternal.metrics = initTunnelMetrics()
  375. })
  376. return tunnelMetricsInternal.metrics
  377. }