| 1 | /* |
| 2 | * libwebsockets - small server side websockets and web server implementation |
| 3 | * |
| 4 | * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com> |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to |
| 8 | * deal in the Software without restriction, including without limitation the |
| 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 10 | * sell copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in |
| 14 | * all copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 22 | * IN THE SOFTWARE. |
| 23 | */ |
| 24 | |
| 25 | /* Do not treat - as a terminal character, so "my-token" is one token */ |
| 26 | #define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0) |
| 27 | /* Separately report aggregate colon-delimited tokens */ |
| 28 | #define LWS_TOKENIZE_F_AGG_COLON (1 << 1) |
| 29 | /* Enforce sequencing for a simple token , token , token ... list */ |
| 30 | #define LWS_TOKENIZE_F_COMMA_SEP_LIST (1 << 2) |
| 31 | /* Allow more characters in the tokens and less delimiters... default is |
| 32 | * only alphanumeric + underscore in tokens */ |
| 33 | #define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3) |
| 34 | /* Do not treat . as a terminal character, so "warmcat.com" is one token */ |
| 35 | #define LWS_TOKENIZE_F_DOT_NONTERM (1 << 4) |
| 36 | /* If something starts looking like a float, like 1.2, force to be string token. |
| 37 | * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and |
| 38 | * avoids illegal float format detection like 1.myserver.com */ |
| 39 | #define LWS_TOKENIZE_F_NO_FLOATS (1 << 5) |
| 40 | /* Instead of LWS_TOKZE_INTEGER, report integers as any other string token */ |
| 41 | #define LWS_TOKENIZE_F_NO_INTEGERS (1 << 6) |
| 42 | /* # makes the rest of the line a comment */ |
| 43 | #define (1 << 7) |
| 44 | /* Do not treat / as a terminal character, so "multipart/related" is one token */ |
| 45 | #define LWS_TOKENIZE_F_SLASH_NONTERM (1 << 8) |
| 46 | /* Do not treat * as a terminal character, so "myfile*" is one token */ |
| 47 | #define LWS_TOKENIZE_F_ASTERISK_NONTERM (1 << 9) |
| 48 | /* Do not treat = as a terminal character, so "x=y" is one token */ |
| 49 | #define LWS_TOKENIZE_F_EQUALS_NONTERM (1 << 10) |
| 50 | /* Do not treat : as a terminal character, so ::1 is one token */ |
| 51 | #define LWS_TOKENIZE_F_COLON_NONTERM (1 << 11) |
| 52 | |
| 53 | /* We're just tokenizing a chunk, don't treat running out of input as final */ |
| 54 | #define LWS_TOKENIZE_F_EXPECT_MORE (1 << 12) |
| 55 | |
| 56 | typedef enum { |
| 57 | |
| 58 | LWS_TOKZE_ERRS = 7, /* the number of errors defined */ |
| 59 | |
| 60 | LWS_TOKZE_TOO_LONG = -7, /* token too long */ |
| 61 | LWS_TOKZE_WANT_READ = -6, /* need more input */ |
| 62 | LWS_TOKZE_ERR_BROKEN_UTF8 = -5, /* malformed or partial utf8 */ |
| 63 | LWS_TOKZE_ERR_UNTERM_STRING = -4, /* ended while we were in "" */ |
| 64 | LWS_TOKZE_ERR_MALFORMED_FLOAT = -3, /* like 0..1 or 0.1.1 */ |
| 65 | LWS_TOKZE_ERR_NUM_ON_LHS = -2, /* like 123= or 0.1= */ |
| 66 | LWS_TOKZE_ERR_COMMA_LIST = -1, /* like ",tok", or, "tok,," */ |
| 67 | |
| 68 | LWS_TOKZE_ENDED = 0, /* no more content */ |
| 69 | |
| 70 | /* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */ |
| 71 | |
| 72 | LWS_TOKZE_DELIMITER, /* a delimiter appeared */ |
| 73 | LWS_TOKZE_TOKEN, /* a token appeared */ |
| 74 | LWS_TOKZE_INTEGER, /* an integer appeared */ |
| 75 | LWS_TOKZE_FLOAT, /* a float appeared */ |
| 76 | LWS_TOKZE_TOKEN_NAME_EQUALS, /* token [whitespace] = */ |
| 77 | LWS_TOKZE_TOKEN_NAME_COLON, /* token [whitespace] : (only with |
| 78 | LWS_TOKENIZE_F_AGG_COLON flag) */ |
| 79 | LWS_TOKZE_QUOTED_STRING, /* "*", where * may have any char */ |
| 80 | |
| 81 | } lws_tokenize_elem; |
| 82 | |
| 83 | /* |
| 84 | * helper enums to allow caller to enforce legal delimiter sequencing, eg |
| 85 | * disallow "token,,token", "token,", and ",token" |
| 86 | */ |
| 87 | |
| 88 | enum lws_tokenize_delimiter_tracking { |
| 89 | LWSTZ_DT_NEED_FIRST_CONTENT, |
| 90 | LWSTZ_DT_NEED_DELIM, |
| 91 | LWSTZ_DT_NEED_NEXT_CONTENT, |
| 92 | }; |
| 93 | |
| 94 | typedef enum { |
| 95 | LWS_TOKZS_LEADING_WHITESPACE, |
| 96 | LWS_TOKZS_QUOTED_STRING, |
| 97 | LWS_TOKZS_TOKEN, |
| 98 | LWS_TOKZS_TOKEN_POST_TERMINAL |
| 99 | } lws_tokenize_state; |
| 100 | |
| 101 | typedef struct lws_tokenize { |
| 102 | char collect[256]; /* token length limit */ |
| 103 | const char *start; /**< set to the start of the string to tokenize */ |
| 104 | const char *token; /**< the start of an identified token or delimiter */ |
| 105 | size_t len; /**< set to the length of the string to tokenize */ |
| 106 | size_t token_len; /**< the length of the identied token or delimiter */ |
| 107 | |
| 108 | lws_tokenize_state state; |
| 109 | |
| 110 | int line; |
| 111 | int effline; |
| 112 | |
| 113 | uint16_t flags; /**< optional LWS_TOKENIZE_F_ flags, or 0 */ |
| 114 | uint8_t delim; |
| 115 | |
| 116 | int8_t e; /**< convenient for storing lws_tokenize return */ |
| 117 | uint8_t reset_token:1; |
| 118 | uint8_t crlf:1; |
| 119 | uint8_t dry:1; |
| 120 | } lws_tokenize_t; |
| 121 | |
| 122 | /** |
| 123 | * lws_tokenize() - breaks down a string into tokens and delimiters in-place |
| 124 | * |
| 125 | * \param ts: the lws_tokenize struct to init |
| 126 | * \param start: the string to tokenize |
| 127 | * \param flags: LWS_TOKENIZE_F_ option flags |
| 128 | * |
| 129 | * This initializes the tokenize struct to point to the given string, and |
| 130 | * sets the length to 2GiB - 1 (so there must be a terminating NUL)... you can |
| 131 | * override this requirement by setting ts.len yourself before using it. |
| 132 | * |
| 133 | * .delim is also initialized to LWSTZ_DT_NEED_FIRST_CONTENT. |
| 134 | */ |
| 135 | |
| 136 | LWS_VISIBLE LWS_EXTERN void |
| 137 | lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags); |
| 138 | |
| 139 | /** |
| 140 | * lws_tokenize() - breaks down a string into tokens and delimiters in-place |
| 141 | * |
| 142 | * \param ts: the lws_tokenize struct with information and state on what to do |
| 143 | * |
| 144 | * The \p ts struct should have its start, len and flags members initialized to |
| 145 | * reflect the string to be tokenized and any options. |
| 146 | * |
| 147 | * Then `lws_tokenize()` may be called repeatedly on the struct, returning one |
| 148 | * of `lws_tokenize_elem` each time, and with the struct's `token` and |
| 149 | * `token_len` members set to describe the content of the delimiter or token |
| 150 | * payload each time. |
| 151 | * |
| 152 | * There are no allocations during the process. |
| 153 | * |
| 154 | * returns lws_tokenize_elem that was identified (LWS_TOKZE_ENDED means reached |
| 155 | * the end of the string). |
| 156 | */ |
| 157 | |
| 158 | LWS_VISIBLE LWS_EXTERN lws_tokenize_elem |
| 159 | lws_tokenize(struct lws_tokenize *ts); |
| 160 | |
| 161 | /** |
| 162 | * lws_tokenize_cstr() - copy token string to NUL-terminated buffer |
| 163 | * |
| 164 | * \param ts: pointer to lws_tokenize struct to operate on |
| 165 | * \param str: destination buffer |
| 166 | * \pparam max: bytes in destination buffer |
| 167 | * |
| 168 | * returns 0 if OK or nonzero if the string + NUL won't fit. |
| 169 | */ |
| 170 | |
| 171 | LWS_VISIBLE LWS_EXTERN int |
| 172 | lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max); |
| 173 | |
| 174 | |
| 175 | /* |
| 176 | * lws_strexp: flexible string expansion helper api |
| 177 | * |
| 178 | * This stateful helper can handle multiple separate input chunks and multiple |
| 179 | * output buffer loads with arbitrary boundaries between literals and expanded |
| 180 | * symbols. This allows it to handle fragmented input as well as arbitrarily |
| 181 | * long symbol expansions that are bigger than the output buffer itself. |
| 182 | * |
| 183 | * A user callback is used to convert symbol names to the symbol value. |
| 184 | * |
| 185 | * A single byte buffer for input and another for output can process any |
| 186 | * length substitution then. The state object is around 64 bytes on a 64-bit |
| 187 | * system and it only uses 8 bytes stack. |
| 188 | */ |
| 189 | |
| 190 | |
| 191 | typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out, |
| 192 | size_t *pos, size_t olen, size_t *exp_ofs); |
| 193 | |
| 194 | typedef struct lws_strexp { |
| 195 | char name[32]; |
| 196 | lws_strexp_expand_cb cb; |
| 197 | void *priv; |
| 198 | char *out; |
| 199 | size_t olen; |
| 200 | size_t pos; |
| 201 | |
| 202 | size_t exp_ofs; |
| 203 | |
| 204 | uint8_t name_pos; |
| 205 | char state; |
| 206 | } lws_strexp_t; |
| 207 | |
| 208 | enum { |
| 209 | LSTRX_DONE, /* it completed OK */ |
| 210 | LSTRX_FILLED_OUT, /* out buf filled and needs resetting */ |
| 211 | LSTRX_FATAL_NAME_TOO_LONG = -1, /* fatal */ |
| 212 | LSTRX_FATAL_NAME_UNKNOWN = -2, |
| 213 | }; |
| 214 | |
| 215 | |
| 216 | /** |
| 217 | * lws_strexp_init() - initialize an lws_strexp_t for use |
| 218 | * |
| 219 | * \p exp: the exp object to init |
| 220 | * \p priv: the user's object pointer to pass to callback |
| 221 | * \p cb: the callback to expand named objects |
| 222 | * \p out: the start of the output buffer, or NULL just to get the length |
| 223 | * \p olen: the length of the output buffer in bytes |
| 224 | * |
| 225 | * Prepares an lws_strexp_t for use and sets the initial output buffer |
| 226 | * |
| 227 | * If \p out is NULL, substitution proceeds normally, but no output is produced, |
| 228 | * only the length is returned. olen should be set to the largest feasible |
| 229 | * overall length. To use this mode, the substitution callback must also check |
| 230 | * for NULL \p out and avoid producing the output. |
| 231 | */ |
| 232 | LWS_VISIBLE LWS_EXTERN void |
| 233 | lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb, |
| 234 | char *out, size_t olen); |
| 235 | |
| 236 | /** |
| 237 | * lws_strexp_reset_out() - reset the output buffer on an existing strexp |
| 238 | * |
| 239 | * \p exp: the exp object to init |
| 240 | * \p out: the start of the output buffer, or NULL to just get length |
| 241 | * \p olen: the length of the output buffer in bytes |
| 242 | * |
| 243 | * Provides a new output buffer for lws_strexp_expand() to continue to write |
| 244 | * into. It can be the same as the old one if it has been copied out or used. |
| 245 | * The position of the next write will be reset to the start of the given buf. |
| 246 | * |
| 247 | * If \p out is NULL, substitution proceeds normally, but no output is produced, |
| 248 | * only the length is returned. \p olen should be set to the largest feasible |
| 249 | * overall length. To use this mode, the substitution callback must also check |
| 250 | * for NULL \p out and avoid producing the output. |
| 251 | */ |
| 252 | LWS_VISIBLE LWS_EXTERN void |
| 253 | lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen); |
| 254 | |
| 255 | /** |
| 256 | * lws_strexp_expand() - copy / expand a string into the output buffer |
| 257 | * |
| 258 | * \p exp: the exp object for the copy / expansion |
| 259 | * \p in: the start of the next input data |
| 260 | * \p len: the length of the input data |
| 261 | * \p pused_in: pointer to write the amount of input used |
| 262 | * \p pused_out: pointer to write the amount of output used |
| 263 | * |
| 264 | * Copies in to the output buffer set in exp, expanding any ${name} tokens using |
| 265 | * the callback. \p *pused_in is set to the number of input chars used and |
| 266 | * \p *pused_out the number of output characters used |
| 267 | * |
| 268 | * May return LSTRX_FILLED_OUT early with *pused < len if the output buffer is |
| 269 | * filled. Handle the output buffer and reset it with lws_strexp_reset_out() |
| 270 | * before calling again with adjusted in / len to continue. |
| 271 | * |
| 272 | * In the case of large expansions, the expansion itself may fill the output |
| 273 | * buffer, in which case the expansion callback returns the LSTRX_FILLED_OUT |
| 274 | * and will be called again to continue with its *exp_ofs parameter set |
| 275 | * appropriately. |
| 276 | */ |
| 277 | LWS_VISIBLE LWS_EXTERN int |
| 278 | lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len, |
| 279 | size_t *pused_in, size_t *pused_out); |
| 280 | |
| 281 | /** |
| 282 | * lws_strcmp_wildcard() - strcmp but the first arg can have wildcards |
| 283 | * |
| 284 | * \p wildcard: a string that may contain zero to three *, and may lack a NUL |
| 285 | * \p wlen: length of the wildcard string |
| 286 | * \p check: string to test to see if it matches wildcard |
| 287 | * \p clen: length of check string |
| 288 | * |
| 289 | * Like strcmp, but supports patterns like "a*", "a*b", "a*b*" etc |
| 290 | * where a and b are arbitrary substrings. Both the wc and check strings need |
| 291 | * not be NUL terminated, but are specified by lengths. |
| 292 | */ |
| 293 | LWS_VISIBLE LWS_EXTERN int |
| 294 | lws_strcmp_wildcard(const char *wildcard, size_t wlen, const char *check, |
| 295 | size_t clen); |
| 296 | |