1/*
2 * libwebsockets - small server side websockets and web server implementation
3 *
4 * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25/* Do not treat - as a terminal character, so "my-token" is one token */
26#define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0)
27/* Separately report aggregate colon-delimited tokens */
28#define LWS_TOKENIZE_F_AGG_COLON (1 << 1)
29/* Enforce sequencing for a simple token , token , token ... list */
30#define LWS_TOKENIZE_F_COMMA_SEP_LIST (1 << 2)
31/* Allow more characters in the tokens and less delimiters... default is
32 * only alphanumeric + underscore in tokens */
33#define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3)
34/* Do not treat . as a terminal character, so "warmcat.com" is one token */
35#define LWS_TOKENIZE_F_DOT_NONTERM (1 << 4)
36/* If something starts looking like a float, like 1.2, force to be string token.
37 * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and
38 * avoids illegal float format detection like 1.myserver.com */
39#define LWS_TOKENIZE_F_NO_FLOATS (1 << 5)
40/* Instead of LWS_TOKZE_INTEGER, report integers as any other string token */
41#define LWS_TOKENIZE_F_NO_INTEGERS (1 << 6)
42/* # makes the rest of the line a comment */
43#define LWS_TOKENIZE_F_HASH_COMMENT (1 << 7)
44/* Do not treat / as a terminal character, so "multipart/related" is one token */
45#define LWS_TOKENIZE_F_SLASH_NONTERM (1 << 8)
46/* Do not treat * as a terminal character, so "myfile*" is one token */
47#define LWS_TOKENIZE_F_ASTERISK_NONTERM (1 << 9)
48/* Do not treat = as a terminal character, so "x=y" is one token */
49#define LWS_TOKENIZE_F_EQUALS_NONTERM (1 << 10)
50
51typedef enum {
52
53 LWS_TOKZE_ERRS = 5, /* the number of errors defined */
54
55 LWS_TOKZE_ERR_BROKEN_UTF8 = -5, /* malformed or partial utf8 */
56 LWS_TOKZE_ERR_UNTERM_STRING = -4, /* ended while we were in "" */
57 LWS_TOKZE_ERR_MALFORMED_FLOAT = -3, /* like 0..1 or 0.1.1 */
58 LWS_TOKZE_ERR_NUM_ON_LHS = -2, /* like 123= or 0.1= */
59 LWS_TOKZE_ERR_COMMA_LIST = -1, /* like ",tok", or, "tok,," */
60
61 LWS_TOKZE_ENDED = 0, /* no more content */
62
63 /* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */
64
65 LWS_TOKZE_DELIMITER, /* a delimiter appeared */
66 LWS_TOKZE_TOKEN, /* a token appeared */
67 LWS_TOKZE_INTEGER, /* an integer appeared */
68 LWS_TOKZE_FLOAT, /* a float appeared */
69 LWS_TOKZE_TOKEN_NAME_EQUALS, /* token [whitespace] = */
70 LWS_TOKZE_TOKEN_NAME_COLON, /* token [whitespace] : (only with
71 LWS_TOKENIZE_F_AGG_COLON flag) */
72 LWS_TOKZE_QUOTED_STRING, /* "*", where * may have any char */
73
74} lws_tokenize_elem;
75
76/*
77 * helper enums to allow caller to enforce legal delimiter sequencing, eg
78 * disallow "token,,token", "token,", and ",token"
79 */
80
81enum lws_tokenize_delimiter_tracking {
82 LWSTZ_DT_NEED_FIRST_CONTENT,
83 LWSTZ_DT_NEED_DELIM,
84 LWSTZ_DT_NEED_NEXT_CONTENT,
85};
86
87typedef struct lws_tokenize {
88 const char *start; /**< set to the start of the string to tokenize */
89 const char *token; /**< the start of an identified token or delimiter */
90 size_t len; /**< set to the length of the string to tokenize */
91 size_t token_len; /**< the length of the identied token or delimiter */
92
93 uint16_t flags; /**< optional LWS_TOKENIZE_F_ flags, or 0 */
94 uint8_t delim;
95
96 int8_t e; /**< convenient for storing lws_tokenize return */
97} lws_tokenize_t;
98
99/**
100 * lws_tokenize() - breaks down a string into tokens and delimiters in-place
101 *
102 * \param ts: the lws_tokenize struct to init
103 * \param start: the string to tokenize
104 * \param flags: LWS_TOKENIZE_F_ option flags
105 *
106 * This initializes the tokenize struct to point to the given string, and
107 * sets the length to 2GiB - 1 (so there must be a terminating NUL)... you can
108 * override this requirement by setting ts.len yourself before using it.
109 *
110 * .delim is also initialized to LWSTZ_DT_NEED_FIRST_CONTENT.
111 */
112
113LWS_VISIBLE LWS_EXTERN void
114lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags);
115
116/**
117 * lws_tokenize() - breaks down a string into tokens and delimiters in-place
118 *
119 * \param ts: the lws_tokenize struct with information and state on what to do
120 *
121 * The \p ts struct should have its start, len and flags members initialized to
122 * reflect the string to be tokenized and any options.
123 *
124 * Then `lws_tokenize()` may be called repeatedly on the struct, returning one
125 * of `lws_tokenize_elem` each time, and with the struct's `token` and
126 * `token_len` members set to describe the content of the delimiter or token
127 * payload each time.
128 *
129 * There are no allocations during the process.
130 *
131 * returns lws_tokenize_elem that was identified (LWS_TOKZE_ENDED means reached
132 * the end of the string).
133 */
134
135LWS_VISIBLE LWS_EXTERN lws_tokenize_elem
136lws_tokenize(struct lws_tokenize *ts);
137
138/**
139 * lws_tokenize_cstr() - copy token string to NUL-terminated buffer
140 *
141 * \param ts: pointer to lws_tokenize struct to operate on
142 * \param str: destination buffer
143 * \pparam max: bytes in destination buffer
144 *
145 * returns 0 if OK or nonzero if the string + NUL won't fit.
146 */
147
148LWS_VISIBLE LWS_EXTERN int
149lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max);
150
151
152/*
153 * lws_strexp: flexible string expansion helper api
154 *
155 * This stateful helper can handle multiple separate input chunks and multiple
156 * output buffer loads with arbitrary boundaries between literals and expanded
157 * symbols. This allows it to handle fragmented input as well as arbitrarily
158 * long symbol expansions that are bigger than the output buffer itself.
159 *
160 * A user callback is used to convert symbol names to the symbol value.
161 *
162 * A single byte buffer for input and another for output can process any
163 * length substitution then. The state object is around 64 bytes on a 64-bit
164 * system and it only uses 8 bytes stack.
165 */
166
167
168typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out,
169 size_t *pos, size_t olen, size_t *exp_ofs);
170
171typedef struct lws_strexp {
172 char name[32];
173 lws_strexp_expand_cb cb;
174 void *priv;
175 char *out;
176 size_t olen;
177 size_t pos;
178
179 size_t exp_ofs;
180
181 uint8_t name_pos;
182 char state;
183} lws_strexp_t;
184
185enum {
186 LSTRX_DONE, /* it completed OK */
187 LSTRX_FILLED_OUT, /* out buf filled and needs resetting */
188 LSTRX_FATAL_NAME_TOO_LONG = -1, /* fatal */
189 LSTRX_FATAL_NAME_UNKNOWN = -2,
190};
191
192
193/**
194 * lws_strexp_init() - initialize an lws_strexp_t for use
195 *
196 * \p exp: the exp object to init
197 * \p priv: the user's object pointer to pass to callback
198 * \p cb: the callback to expand named objects
199 * \p out: the start of the output buffer, or NULL just to get the length
200 * \p olen: the length of the output buffer in bytes
201 *
202 * Prepares an lws_strexp_t for use and sets the initial output buffer
203 *
204 * If \p out is NULL, substitution proceeds normally, but no output is produced,
205 * only the length is returned. olen should be set to the largest feasible
206 * overall length. To use this mode, the substitution callback must also check
207 * for NULL \p out and avoid producing the output.
208 */
209LWS_VISIBLE LWS_EXTERN void
210lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb,
211 char *out, size_t olen);
212
213/**
214 * lws_strexp_reset_out() - reset the output buffer on an existing strexp
215 *
216 * \p exp: the exp object to init
217 * \p out: the start of the output buffer, or NULL to just get length
218 * \p olen: the length of the output buffer in bytes
219 *
220 * Provides a new output buffer for lws_strexp_expand() to continue to write
221 * into. It can be the same as the old one if it has been copied out or used.
222 * The position of the next write will be reset to the start of the given buf.
223 *
224 * If \p out is NULL, substitution proceeds normally, but no output is produced,
225 * only the length is returned. \p olen should be set to the largest feasible
226 * overall length. To use this mode, the substitution callback must also check
227 * for NULL \p out and avoid producing the output.
228 */
229LWS_VISIBLE LWS_EXTERN void
230lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen);
231
232/**
233 * lws_strexp_expand() - copy / expand a string into the output buffer
234 *
235 * \p exp: the exp object for the copy / expansion
236 * \p in: the start of the next input data
237 * \p len: the length of the input data
238 * \p pused_in: pointer to write the amount of input used
239 * \p pused_out: pointer to write the amount of output used
240 *
241 * Copies in to the output buffer set in exp, expanding any ${name} tokens using
242 * the callback. \p *pused_in is set to the number of input chars used and
243 * \p *pused_out the number of output characters used
244 *
245 * May return LSTRX_FILLED_OUT early with *pused < len if the output buffer is
246 * filled. Handle the output buffer and reset it with lws_strexp_reset_out()
247 * before calling again with adjusted in / len to continue.
248 *
249 * In the case of large expansions, the expansion itself may fill the output
250 * buffer, in which case the expansion callback returns the LSTRX_FILLED_OUT
251 * and will be called again to continue with its *exp_ofs parameter set
252 * appropriately.
253 */
254LWS_VISIBLE LWS_EXTERN int
255lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len,
256 size_t *pused_in, size_t *pused_out);
257
258/**
259 * lws_strcmp_wildcard() - strcmp but the first arg can have wildcards
260 *
261 * \p wildcard: a string that may contain zero to three *, and may lack a NUL
262 * \p wlen: length of the wildcard string
263 * \p check: string to test to see if it matches wildcard
264 * \p clen: length of check string
265 *
266 * Like strcmp, but supports patterns like "a*", "a*b", "a*b*" etc
267 * where a and b are arbitrary substrings. Both the wc and check strings need
268 * not be NUL terminated, but are specified by lengths.
269 */
270LWS_VISIBLE LWS_EXTERN int
271lws_strcmp_wildcard(const char *wildcard, size_t wlen, const char *check,
272 size_t clen);
273