1 /*
2 * libwebsockets - small server side websockets and web server implementation
3 *
4 * Copyright (C) 2010 - 2021 Andy Green <andy@warmcat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 * Public apis related to metric collection and reporting
25 */
26
27/* lws_metrics public part */
28
29typedef uint64_t u_mt_t;
30
31enum {
32 LWSMTFL_REPORT_OUTLIERS = (1 << 0),
33 /**< track outliers and report them internally */
34 LWSMTFL_REPORT_OOB = (1 << 1),
35 /**< report events as they happen */
36 LWSMTFL_REPORT_INACTIVITY_AT_PERIODIC = (1 << 2),
37 /**< explicitly externally report no activity at periodic cb, by
38 * default no events in the period is just not reported */
39 LWSMTFL_REPORT_MEAN = (1 << 3),
40 /**< average/min/max is meaningful, else only sum is meaningful */
41 LWSMTFL_REPORT_ONLY_GO = (1 << 4),
42 /**< no-go pieces invalid */
43 LWSMTFL_REPORT_DUTY_WALLCLOCK_US = (1 << 5),
44 /**< aggregate compares to wallclock us for duty cycle */
45 LWSMTFL_REPORT_HIST = (1 << 6),
46 /**< our type is histogram (otherwise, sum / mean aggregation) */
47};
48
49/*
50 * lws_metrics_tag allows your object to accumulate OpenMetrics-style
51 * descriptive tags before accounting for it with a metrics object at the end.
52 *
53 * Tags should represent low entropy information that is likely to repeat
54 * identically, so, eg, http method name, not eg, latency in us which is
55 * unlikely to be seen the same twice.
56 *
57 * Tags are just a list of name=value pairs, used for qualifying the final
58 * metrics entry with decorations in additional dimensions. For example,
59 * rather than keep individual metrics on methods, scheme, mountpoint, result
60 * code, you can keep metrics on http transactions only, and qualify the
61 * transaction metrics entries with tags that can be queried on the metrics
62 * backend to get the finer-grained information.
63 *
64 * http_srv{code="404",mount="/",method="GET",scheme="http"} 3
65 *
66 * For OpenMetrics the tags are converted to a { list } and appended to the base
67 * metrics name before using with actual metrics objects, the same set of tags
68 * on different transactions resolve to the same qualification string.
69 */
70
71typedef struct lws_metrics_tag {
72 lws_dll2_t list;
73
74 const char *name; /* tag, intended to be in .rodata, not copied */
75 /* overallocated value */
76} lws_metrics_tag_t;
77
78LWS_EXTERN LWS_VISIBLE int
79lws_metrics_tag_add(lws_dll2_owner_t *owner, const char *name, const char *val);
80
81#if defined(LWS_WITH_SYS_METRICS)
82/*
83 * wsi-specific version that also appends the tag value to the lifecycle tag
84 * used for logging the wsi identity
85 */
86LWS_EXTERN LWS_VISIBLE int
87lws_metrics_tag_wsi_add(struct lws *wsi, const char *name, const char *val);
88#else
89#define lws_metrics_tag_wsi_add(_a, _b, _c)
90#endif
91
92#if defined(LWS_WITH_SECURE_STREAMS)
93/*
94 * ss-specific version that also appends the tag value to the lifecycle tag
95 * used for logging the ss identity
96 */
97#if defined(LWS_WITH_SYS_METRICS)
98LWS_EXTERN LWS_VISIBLE int
99lws_metrics_tag_ss_add(struct lws_ss_handle *ss, const char *name, const char *val);
100#else
101#define lws_metrics_tag_ss_add(_a, _b, _c)
102#endif
103#endif
104
105LWS_EXTERN LWS_VISIBLE void
106lws_metrics_tags_destroy(lws_dll2_owner_t *owner);
107
108LWS_EXTERN LWS_VISIBLE size_t
109lws_metrics_tags_serialize(lws_dll2_owner_t *owner, char *buf, size_t len);
110
111LWS_EXTERN LWS_VISIBLE const char *
112lws_metrics_tag_get(lws_dll2_owner_t *owner, const char *name);
113
114/* histogram bucket */
115
116typedef struct lws_metric_bucket {
117 struct lws_metric_bucket *next;
118 uint64_t count;
119
120 /* name + NUL is overallocated */
121} lws_metric_bucket_t;
122
123/* get overallocated name of bucket from bucket pointer */
124#define lws_metric_bucket_name_len(_b) (*((uint8_t *)&(_b)[1]))
125#define lws_metric_bucket_name(_b) (((const char *)&(_b)[1]) + 1)
126
127/*
128 * These represent persistent local event measurements. They may aggregate
129 * a large number of events inbetween external dumping of summaries of the
130 * period covered, in two different ways
131 *
132 * 1) aggregation by sum or mean, to absorb multiple scalar readings
133 *
134 * - go / no-go ratio counting
135 * - mean averaging for, eg, latencies
136 * - min / max for averaged values
137 * - period the stats covers
138 *
139 * 2) aggregation by histogram, to absorb a range of outcomes that may occur
140 * multiple times
141 *
142 * - add named buckets to histogram
143 * - bucket has a 64-bit count
144 * - bumping a bucket just increments the count if already exists, else adds
145 * a new one with count set to 1
146 *
147 * The same type with a union covers both cases.
148 *
149 * The lws_system ops api that hooks lws_metrics up to a metrics backend is
150 * given a pointer to these according to the related policy, eg, hourly, or
151 * every event passed straight through.
152 */
153
154typedef struct lws_metric_pub {
155 const char *name;
156 /**< eg, "n.cn.dns", "vh.myendpoint" */
157 void *backend_opaque;
158 /**< ignored by lws, backend handler completely owns it */
159
160 lws_usec_t us_first;
161 /**< us time metric started collecting, reset to us_dumped at dump */
162 lws_usec_t us_last;
163 /**< 0, or us time last event, reset to 0 at last dump */
164 lws_usec_t us_dumped;
165 /**< 0 if never, else us time of last dump to external api */
166
167 /* scope of data in .u is "since last dump" --> */
168
169 union {
170 /* aggregation, by sum or mean */
171
172 struct {
173 u_mt_t sum[2];
174 /**< go, no-go summed for mean or plan sum */
175 u_mt_t min;
176 /**< smallest individual measurement */
177 u_mt_t max;
178 /**< largest individual measurement */
179
180 uint32_t count[2];
181 /**< go, no-go count of measurements in sum */
182 } agg;
183
184 /* histogram with dynamic named buckets */
185
186 struct {
187 lws_metric_bucket_t *head;
188 /**< first bucket in our bucket list */
189
190 uint64_t total_count;
191 /**< total count in all of our buckets */
192 uint32_t list_size;
193 /**< number of buckets in our bucket list */
194 } hist;
195 } u;
196
197 uint8_t flags;
198
199} lws_metric_pub_t;
200
201LWS_EXTERN LWS_VISIBLE void
202lws_metrics_hist_bump_priv_tagged(lws_metric_pub_t *mt, lws_dll2_owner_t *tow,
203 lws_dll2_owner_t *tow2);
204
205
206/*
207 * Calipers are a helper struct for implementing "hanging latency" detection,
208 * where setting the start time and finding the end time may happen in more than
209 * one place.
210 *
211 * There are convenience wrappers to eliminate caliper definitions and code
212 * cleanly if WITH_SYS_METRICS is disabled for the build.
213 */
214
215struct lws_metric;
216
217typedef struct lws_metric_caliper {
218 struct lws_dll2_owner mtags_owner; /**< collect tags here during
219 * caliper lifetime */
220 struct lws_metric *mt; /**< NULL == inactive */
221 lws_usec_t us_start;
222} lws_metric_caliper_t;
223
224#if defined(LWS_WITH_SYS_METRICS)
225#define lws_metrics_caliper_compose(_name) \
226 lws_metric_caliper_t _name;
227#define lws_metrics_caliper_bind(_name, _mt) \
228 { if (_name.mt) { \
229 lwsl_err("caliper: overwrite %s\n", \
230 lws_metrics_priv_to_pub(_name.mt)->name); \
231 assert(0); } \
232 _name.mt = _mt; _name.us_start = lws_now_usecs(); }
233#define lws_metrics_caliper_declare(_name, _mt) \
234 lws_metric_caliper_t _name = { .mt = _mt, .us_start = lws_now_usecs() }
235#define lws_metrics_caliper_report(_name, _go_nogo) \
236 { if (_name.us_start) { lws_metric_event(_name.mt, _go_nogo, \
237 (u_mt_t)(lws_now_usecs() - \
238 _name.us_start)); \
239 } lws_metrics_caliper_done(_name); }
240#define lws_metrics_caliper_report_hist(_name, pwsi) if (_name.mt) { \
241 lws_metrics_hist_bump_priv_tagged(lws_metrics_priv_to_pub(_name.mt), \
242 &_name.mtags_owner, \
243 pwsi ? &((pwsi)->cal_conn.mtags_owner) : NULL); \
244 lws_metrics_caliper_done(_name); }
245
246#define lws_metrics_caliper_cancel(_name) { lws_metrics_caliper_done(_name); }
247#define lws_metrics_hist_bump(_mt, _name) \
248 lws_metrics_hist_bump_(_mt, _name)
249#define lws_metrics_hist_bump_priv(_mt, _name) \
250 lws_metrics_hist_bump_(lws_metrics_priv_to_pub(_mt), _name)
251#define lws_metrics_caliper_done(_name) { \
252 _name.us_start = 0; _name.mt = NULL; \
253 lws_metrics_tags_destroy(&_name.mtags_owner); }
254#else
255#define lws_metrics_caliper_compose(_name)
256#define lws_metrics_caliper_bind(_name, _mt)
257#define lws_metrics_caliper_declare(_name, _mp)
258#define lws_metrics_caliper_report(_name, _go_nogo)
259#define lws_metrics_caliper_report_hist(_name, pwsiconn)
260#define lws_metrics_caliper_cancel(_name)
261#define lws_metrics_hist_bump(_mt, _name)
262#define lws_metrics_hist_bump_priv(_mt, _name)
263#define lws_metrics_caliper_done(_name)
264#endif
265
266/**
267 * lws_metrics_format() - helper to format a metrics object for logging
268 *
269 * \param pub: public part of metrics object
270 * \param buf: output buffer to place string in
271 * \param len: available length of \p buf
272 *
273 * Helper for describing the state of a metrics object as a human-readable
274 * string, accounting for how its flags indicate what it contains. This is not
275 * how you would report metrics, but during development it can be useful to
276 * log them inbetween possibily long report intervals.
277 *
278 * It uses the metric's flags to adapt the format shown appropriately, eg,
279 * as a histogram if LWSMTFL_REPORT_HIST etc
280 */
281LWS_EXTERN LWS_VISIBLE int
282lws_metrics_format(lws_metric_pub_t *pub, lws_metric_bucket_t **sub,
283 char *buf, size_t len);
284
285/**
286 * lws_metrics_hist_bump() - add or increment histogram bucket
287 *
288 * \param pub: public part of metrics object
289 * \param name: bucket name to increment
290 *
291 * Either increment the count of an existing bucket of the right name in the
292 * metrics object, or add a new bucket of the given name and set its count to 1.
293 *
294 * The metrics object must have been created with flag LWSMTFL_REPORT_HIST
295 *
296 * Normally, you will actually use the preprocessor wrapper
297 * lws_metrics_hist_bump() defined above, since this automatically takes care of
298 * removing itself from the build if WITH_SYS_METRICS is not defined, without
299 * needing any preprocessor conditionals.
300 */
301LWS_EXTERN LWS_VISIBLE int
302lws_metrics_hist_bump_(lws_metric_pub_t *pub, const char *name);
303
304LWS_VISIBLE LWS_EXTERN int
305lws_metrics_foreach(struct lws_context *ctx, void *user,
306 int (*cb)(lws_metric_pub_t *pub, void *user));
307
308LWS_VISIBLE LWS_EXTERN int
309lws_metrics_hist_bump_describe_wsi(struct lws *wsi, lws_metric_pub_t *pub,
310 const char *name);
311
312enum {
313 LMT_NORMAL = 0, /* related to successful events */
314 LMT_OUTLIER, /* related to successful events outside of bounds */
315
316 LMT_FAIL, /* related to failed events */
317
318 LMT_COUNT,
319};
320
321typedef enum lws_metric_rpt {
322 LMR_PERIODIC = 0, /* we are reporting on a schedule */
323 LMR_OUTLIER, /* we are reporting the last outlier */
324} lws_metric_rpt_kind_t;
325
326#define METRES_GO 0
327#define METRES_NOGO 1
328
329
330