1/* (c) Magnus Auvinen. See licence.txt in the root of the distribution for more information. */
2/* If you are missing that file, acquire a complete release at teeworlds.com. */
3
4#ifndef BASE_STR_H
5#define BASE_STR_H
6
7#include <cinttypes>
8#include <cstdarg>
9#include <cstddef>
10#include <cstdint>
11#include <cstring>
12
13/**
14 * String related functions.
15 *
16 * @defgroup Strings Strings
17 */
18
19#ifdef __MINGW32__
20#undef PRId64
21#undef PRIu64
22#undef PRIX64
23#define PRId64 "I64d"
24#define PRIu64 "I64u"
25#define PRIX64 "I64X"
26#define PRIzu "Iu"
27#else
28#define PRIzu "zu"
29#endif
30
31/**
32 * Copies a string to another.
33 *
34 * @ingroup Strings
35 *
36 * @param dst Pointer to a buffer that shall receive the string.
37 * @param src String to be copied.
38 * @param dst_size Size of the buffer dst.
39 *
40 * @return Length of written string, even if it has been truncated
41 *
42 * @remark The strings are treated as null-terminated strings.
43 * @remark Guarantees that dst string will contain null-termination.
44 */
45int str_copy(char *dst, const char *src, int dst_size);
46
47/**
48 * Copies a string to a fixed-size array of chars.
49 *
50 * @ingroup Strings
51 *
52 * @param dst Array that shall receive the string.
53 * @param src String to be copied.
54 *
55 * @remark The strings are treated as null-terminated strings.
56 * @remark Guarantees that dst string will contain null-termination.
57 */
58template<int N>
59void str_copy(char (&dst)[N], const char *src)
60{
61 str_copy(dst, src, N);
62}
63
64/**
65 * Appends a string to another.
66 *
67 * @ingroup Strings
68 *
69 * @param dst Pointer to a buffer that contains a string.
70 * @param src String to append.
71 * @param dst_size Size of the buffer of the dst string.
72 *
73 * @remark The strings are treated as null-terminated strings.
74 * @remark Guarantees that dst string will contain null-termination.
75 */
76void str_append(char *dst, const char *src, int dst_size);
77
78/**
79 * Appends a string to a fixed-size array of chars.
80 *
81 * @ingroup Strings
82 *
83 * @param dst Array that shall receive the string.
84 * @param src String to append.
85 *
86 * @remark The strings are treated as null-terminated strings.
87 * @remark Guarantees that dst string will contain null-termination.
88 */
89template<int N>
90void str_append(char (&dst)[N], const char *src)
91{
92 str_append(dst, src, N);
93}
94
95/**
96 * Truncates a string to a given length.
97 *
98 * @ingroup Strings
99 *
100 * @param dst Pointer to a buffer that shall receive the string.
101 * @param dst_size Size of the buffer dst.
102 * @param src String to be truncated.
103 * @param truncation_len Maximum length of the returned string (not
104 * counting the null-termination).
105 *
106 * @remark The strings are treated as null-terminated strings.
107 * @remark Guarantees that dst string will contain null-termination.
108 */
109void str_truncate(char *dst, int dst_size, const char *src, int truncation_len);
110
111/**
112 * Returns the length of a null-terminated string.
113 *
114 * @ingroup Strings
115 *
116 * @param str Pointer to the string.
117 *
118 * @return Length of string in bytes excluding the null-termination.
119 */
120int str_length(const char *str);
121
122/**
123 * Performs printf formatting into a buffer.
124 *
125 * @ingroup Strings
126 *
127 * @param buffer Pointer to the buffer to receive the formatted string.
128 * @param buffer_size Size of the buffer.
129 * @param format printf formatting string.
130 * @param args The variable argument list.
131 *
132 * @return Length of written string, even if it has been truncated.
133 *
134 * @remark See the C manual for syntax for the printf formatting string.
135 * @remark The strings are treated as null-terminated strings.
136 * @remark Guarantees that buffer string will contain null-termination.
137 */
138[[gnu::format(printf, 3, 0)]] int str_format_v(char *buffer, int buffer_size, const char *format, va_list args);
139
140/**
141 * Performs printf formatting into a buffer.
142 *
143 * @ingroup Strings
144 *
145 * @param buffer Pointer to the buffer to receive the formatted string.
146 * @param buffer_size Size of the buffer.
147 * @param format printf formatting string.
148 * @param ... Parameters for the formatting.
149 *
150 * @return Length of written string, even if it has been truncated.
151 *
152 * @remark See the C manual for syntax for the printf formatting string.
153 * @remark The strings are treated as null-terminated strings.
154 * @remark Guarantees that buffer string will contain null-termination.
155 */
156[[gnu::format(printf, 3, 4)]] int str_format(char *buffer, int buffer_size, const char *format, ...);
157
158#if !defined(CONF_DEBUG)
159int str_format_int(char *buffer, size_t buffer_size, int value);
160
161template<typename... Args>
162int str_format_opt(char *buffer, int buffer_size, const char *format, Args... args)
163{
164 static_assert(sizeof...(args) > 0, "Use str_copy instead of str_format without format arguments");
165 return str_format(buffer, buffer_size, format, args...);
166}
167
168template<>
169inline int str_format_opt(char *buffer, int buffer_size, const char *format, int val) // NOLINT(readability-inconsistent-declaration-parameter-name)
170{
171 if(strcmp(format, "%d") == 0)
172 {
173 return str_format_int(buffer, buffer_size, val);
174 }
175 else
176 {
177 return str_format(buffer, buffer_size, format, val);
178 }
179}
180
181#define str_format str_format_opt
182#endif
183
184char str_uppercase(char c);
185
186bool str_isnum(char c);
187
188int str_isallnum(const char *str);
189
190int str_isallnum_hex(const char *str);
191
192/**
193 * Determines whether a character is whitespace.
194 *
195 * @ingroup Strings
196 *
197 * @param c the character to check.
198 *
199 * @return `1` if the character is whitespace, `0` otherwise.
200 *
201 * @remark The following characters are considered whitespace: ` `, `\n`, `\r`, `\t`.
202 */
203int str_isspace(char c);
204
205/**
206 * Trims specific number of words at the start of a string.
207 *
208 * @ingroup Strings
209 *
210 * @param str String to trim the words from.
211 * @param words Count of words to trim.
212 *
213 * @return Trimmed string
214 *
215 * @remark The strings are treated as null-terminated strings.
216 * @remark Leading whitespace is always trimmed.
217 */
218const char *str_trim_words(const char *str, int words);
219
220/**
221 * Check whether string has ASCII control characters.
222 *
223 * @ingroup Strings
224 *
225 * @param str String to check.
226 *
227 * @return Whether the string has ASCII control characters.
228 *
229 * @remark The strings are treated as null-terminated strings.
230 */
231bool str_has_cc(const char *str);
232
233/**
234 * Replaces all characters below 32 with whitespace.
235 *
236 * @ingroup Strings
237 *
238 * @param str String to sanitize.
239 *
240 * @remark The strings are treated as null-terminated strings.
241 */
242void str_sanitize_cc(char *str);
243
244/**
245 * Replaces all characters below 32 with whitespace with
246 * exception to `\t`, `\n` and `\n`.
247 *
248 * @ingroup Strings
249 *
250 * @param str String to sanitize.
251 *
252 * @remark The strings are treated as null-terminated strings.
253 */
254void str_sanitize(char *str);
255
256/**
257 * Replaces all invalid filename characters with whitespace.
258 *
259 * @param str String to sanitize.
260 * @remark The strings are treated as null-terminated strings.
261 */
262void str_sanitize_filename(char *str);
263
264/**
265 * Checks if a string is a valid filename on all supported platforms.
266 *
267 * @param str Filename to check.
268 *
269 * @return `true` if the string is a valid filename, `false` otherwise.
270 *
271 * @remark The strings are treated as null-terminated strings.
272 */
273bool str_valid_filename(const char *str);
274
275/**
276 * Compares two strings case insensitive, digit chars will be compared as numbers.
277 *
278 * @ingroup Strings
279 *
280 * @param a String to compare.
281 * @param b String to compare.
282 *
283 * @return `< 0` - String a is less than string b
284 * @return `0` - String a is equal to string b
285 * @return `> 0` - String a is greater than string b
286 *
287 * @remark The strings are treated as null-terminated strings.
288 */
289int str_comp_filenames(const char *a, const char *b);
290
291/**
292 * Removes leading and trailing spaces and limits the use of multiple spaces.
293 *
294 * @ingroup Strings
295 *
296 * @param str String to clean up.
297 *
298 * @remark The strings are treated as null-terminated strings.
299 */
300void str_clean_whitespaces(char *str);
301
302/**
303 * Skips leading non-whitespace characters.
304 *
305 * @ingroup Strings
306 *
307 * @param str Pointer to the string.
308 *
309 * @return Pointer to the first whitespace character found
310 * within the string.
311 *
312 * @remark The strings are treated as null-terminated strings.
313 * @remark Whitespace is defined according to str_isspace.
314 */
315char *str_skip_to_whitespace(char *str);
316
317/**
318 * @ingroup Strings
319 *
320 * @see str_skip_to_whitespace
321 */
322const char *str_skip_to_whitespace_const(const char *str);
323
324/**
325 * Skips leading whitespace characters.
326 *
327 * @ingroup Strings
328 *
329 * @param str Pointer to the string.
330 *
331 * @return Pointer to the first non-whitespace character found
332 * within the string.
333 *
334 * @remark The strings are treated as null-terminated strings.
335 * @remark Whitespace is defined according to str_isspace.
336 */
337char *str_skip_whitespaces(char *str);
338
339/**
340 * @ingroup Strings
341 *
342 * @see str_skip_whitespaces
343 */
344const char *str_skip_whitespaces_const(const char *str);
345
346/**
347 * Compares to strings case insensitively.
348 *
349 * @ingroup Strings
350 *
351 * @param a String to compare.
352 * @param b String to compare.
353 *
354 * @return `< 0` if string a is less than string b.
355 * @return `0` if string a is equal to string b.
356 * @return `> 0` if string a is greater than string b.
357 *
358 * @remark Only guaranteed to work with a-z/A-Z.
359 * @remark The strings are treated as null-terminated strings.
360 */
361int str_comp_nocase(const char *a, const char *b);
362
363/**
364 * Compares up to `num` characters of two strings case insensitively.
365 *
366 * @ingroup Strings
367 *
368 * @param a String to compare.
369 * @param b String to compare.
370 * @param num Maximum characters to compare.
371 *
372 * @return `< 0` if string a is less than string b.
373 * @return `0` if string a is equal to string b.
374 * @return `> 0` if string a is greater than string b.
375 *
376 * @remark Only guaranteed to work with a-z/A-Z.
377 * @remark Use `str_utf8_comp_nocase_num` for unicode support.
378 * @remark The strings are treated as null-terminated strings.
379 */
380int str_comp_nocase_num(const char *a, const char *b, int num);
381
382/**
383 * Compares two strings case sensitive.
384 *
385 * @ingroup Strings
386 *
387 * @param a String to compare.
388 * @param b String to compare.
389 *
390 * @return `< 0` if string a is less than string b.
391 * @return `0` if string a is equal to string b.
392 * @return `> 0` if string a is greater than string b.
393 *
394 * @remark The strings are treated as null-terminated strings.
395 */
396int str_comp(const char *a, const char *b);
397
398/**
399 * Compares up to `num` characters of two strings case sensitive.
400 *
401 * @ingroup Strings
402 *
403 * @param a String to compare.
404 * @param b String to compare.
405 * @param num Maximum characters to compare.
406 *
407 * @return `< 0` if string a is less than string b.
408 * @return `0` if string a is equal to string b.
409 * @return `> 0` if string a is greater than string b.
410 *
411 * @remark The strings are treated as null-terminated strings.
412 */
413int str_comp_num(const char *a, const char *b, int num);
414
415/**
416 * Checks case insensitive whether the string begins with a certain prefix.
417 *
418 * @ingroup Strings
419 *
420 * @param str String to check.
421 * @param prefix Prefix to look for.
422 *
423 * @return A pointer to the string `str` after the string prefix, or `nullptr` if
424 * the string prefix isn't a prefix of the string `str`.
425 *
426 * @remark The strings are treated as null-terminated strings.
427 */
428const char *str_startswith_nocase(const char *str, const char *prefix);
429
430/**
431 * Checks case sensitive whether the string begins with a certain prefix.
432 *
433 * @ingroup Strings
434 *
435 * @param str String to check.
436 * @param prefix Prefix to look for.
437 *
438 * @return A pointer to the string `str` after the string prefix, or `nullptr` if
439 * the string prefix isn't a prefix of the string `str`.
440 *
441 * @remark The strings are treated as null-terminated strings.
442 */
443const char *str_startswith(const char *str, const char *prefix);
444
445/**
446 * Checks case insensitive whether the string ends with a certain suffix.
447 *
448 * @ingroup Strings
449 *
450 * @param str String to check.
451 * @param suffix Suffix to look.
452 *
453 * @return A pointer to the beginning of the suffix in the string `str`.
454 * @return `nullptr` if the string suffix isn't a suffix of the string `str`.
455 *
456 * @remark The strings are treated as null-terminated strings.
457 */
458const char *str_endswith_nocase(const char *str, const char *suffix);
459
460/**
461 * Checks case sensitive whether the string ends with a certain suffix.
462 *
463 * @param str String to check.
464 * @param suffix Suffix to look for.
465 *
466 * @return A pointer to the beginning of the suffix in the string `str`.
467 * @return `nullptr` if the string suffix isn't a suffix of the string `str`.
468 *
469 * @remark The strings are treated as null-terminated strings.
470 */
471const char *str_endswith(const char *str, const char *suffix);
472
473/**
474 * Finds a string inside another string case insensitively.
475 *
476 * @ingroup Strings
477 *
478 * @param haystack String to search in.
479 * @param needle String to search for.
480 *
481 * @return A pointer into `haystack` where the needle was found.
482 * @return Returns `nullptr` if `needle` could not be found.
483 *
484 * @remark Only guaranteed to work with a-z/A-Z.
485 * @remark Use str_utf8_find_nocase for unicode support.
486 * @remark The strings are treated as null-terminated strings.
487 */
488const char *str_find_nocase(const char *haystack, const char *needle);
489
490/**
491 * Finds a string inside another string case sensitive.
492 *
493 * @ingroup Strings
494 *
495 * @param haystack String to search in.
496 * @param needle String to search for.
497 *
498 * @return A pointer into `haystack` where the needle was found.
499 * @return Returns `nullptr` if `needle` could not be found.
500 *
501 * @remark The strings are treated as null-terminated strings.
502 */
503const char *str_find(const char *haystack, const char *needle);
504
505/**
506 * Writes the next token after str into buf, returns the rest of the string.
507 *
508 * @ingroup Strings
509 *
510 * @param str Pointer to string.
511 * @param delim Delimiter for tokenization.
512 * @param buffer Buffer to store token in.
513 * @param buffer_size Size of the buffer.
514 *
515 * @return Pointer to rest of the string.
516 *
517 * @remark The token is always null-terminated.
518 */
519const char *str_next_token(const char *str, const char *delim, char *buffer, int buffer_size);
520
521/**
522 * Checks if needle is in list delimited by delim.
523 *
524 * @param list List.
525 * @param delim List delimiter.
526 * @param needle Item that is being looked for.
527 *
528 * @return `1` - Item is in list.
529 * @return `0` - Item isn't in list.
530 *
531 * @remark The strings are treated as null-terminated strings.
532 */
533int str_in_list(const char *list, const char *delim, const char *needle);
534
535/**
536 * @ingroup Strings
537 *
538 * @param haystack String to search in.
539 * @param delim String to search for.
540 * @param offset Number of characters into `haystack`.
541 * @param start Will be set to the first delimiter on the left side of the offset (or `haystack` start).
542 * @param end Will be set to the first delimiter on the right side of the offset (or `haystack` end).
543 *
544 * @return `true` if both delimiters were found.
545 * @return `false` if a delimiter is missing (it uses `haystack` start and end as fallback).
546 *
547 * @remark The strings are treated as null-terminated strings.
548 */
549bool str_delimiters_around_offset(const char *haystack, const char *delim, int offset, int *start, int *end);
550
551/**
552 * Finds the last occurrence of a character
553 *
554 * @ingroup Strings
555 *
556 * @param haystack String to search in.
557 * @param needle Character to search for.
558
559 * @return A pointer into haystack where the needle was found.
560 * @return Returns `nullptr` if needle could not be found.
561 *
562 * @remark The strings are treated as null-terminated strings.
563 * @remark The zero-terminator character can also be found with this function.
564 */
565const char *str_rchr(const char *haystack, char needle);
566
567/**
568 * Counts the number of occurrences of a character in a string.
569 *
570 * @ingroup Strings
571 *
572 * @param haystack String to count in.
573 * @param needle Character to count.
574
575 * @return The number of characters in the haystack string matching
576 * the needle character.
577 *
578 * @remark The strings are treated as null-terminated strings.
579 * @remark The number of zero-terminator characters cannot be counted.
580 */
581int str_countchr(const char *haystack, char needle);
582
583/**
584 * Takes a datablock and generates a hex string of it, with spaces between bytes.
585 *
586 * @ingroup Strings
587 *
588 * @param dst Buffer to fill with hex data.
589 * @param dst_size Size of the buffer (at least 3 * data_size + 1 to contain all data).
590 * @param data Data to turn into hex.
591 * @param data_size Size of the data.
592 *
593 * @remark The destination buffer will be null-terminated.
594 */
595void str_hex(char *dst, int dst_size, const void *data, int data_size);
596
597/**
598 * Takes a datablock and generates a hex string of it, in the C style array format,
599 * i.e. with bytes formatted in 0x00-0xFF notation and commas with spaces between the bytes.
600 * The output can be split over multiple lines by specifying the maximum number of bytes
601 * that should be printed per line.
602 *
603 * @ingroup Strings
604 *
605 * @param dst Buffer to fill with hex data.
606 * @param dst_size Size of the buffer (at least `6 * data_size + 1` to contain all data).
607 * @param data Data to turn into hex.
608 * @param data_size Size of the data.
609 * @param bytes_per_line After this many printed bytes a newline will be printed.
610 *
611 * @remark The destination buffer will be null-terminated.
612 */
613void str_hex_cstyle(char *dst, int dst_size, const void *data, int data_size, int bytes_per_line = 12);
614
615/**
616 * Takes a hex string *without spaces between bytes* and returns a byte array.
617 *
618 * @ingroup Strings
619 *
620 * @param dst Buffer for the byte array.
621 * @param dst_size size of the buffer.
622 * @param src String to decode.
623 *
624 * @return `2` if string doesn't exactly fit the buffer.
625 * @return `1` if invalid character in string.
626 * @return `0` if success.
627 *
628 * @remark The contents of the buffer is only valid on success.
629 */
630int str_hex_decode(void *dst, int dst_size, const char *src);
631
632/**
633 * Takes a datablock and generates the base64 encoding of it.
634 *
635 * @ingroup Strings
636 *
637 * @param dst Buffer to fill with base64 data.
638 * @param dst_size Size of the buffer.
639 * @param data Data to turn into base64.
640 * @param data_size Size of the data.
641 *
642 * @remark The destination buffer will be null-terminated
643 */
644void str_base64(char *dst, int dst_size, const void *data, int data_size);
645
646/**
647 * Takes a base64 string without any whitespace and correct
648 * padding and returns a byte array.
649 *
650 * @ingroup Strings
651 *
652 * @param dst Buffer for the byte array.
653 * @param dst_size Size of the buffer.
654 * @param data String to decode.
655 *
656 * @return `< 0` - Error.
657 * @return `<= 0` - Success, length of the resulting byte buffer.
658 *
659 * @remark The contents of the buffer is only valid on success.
660 */
661int str_base64_decode(void *dst, int dst_size, const char *data);
662
663/**
664 * Escapes \ and " characters in a string.
665 *
666 * @param dst Destination array pointer, gets increased, will point to the terminating null.
667 * @param src Source array.
668 * @param end End of destination array.
669 */
670void str_escape(char **dst, const char *src, const char *end);
671
672int str_toint(const char *str);
673bool str_toint(const char *str, int *out);
674int str_toint_base(const char *str, int base);
675unsigned long str_toulong_base(const char *str, int base);
676int64_t str_toint64_base(const char *str, int base = 10);
677float str_tofloat(const char *str);
678bool str_tofloat(const char *str, float *out);
679
680unsigned str_quickhash(const char *str);
681
682/**
683 * Encode a UTF-8 character.
684 *
685 * @ingroup Strings
686 *
687 * @param ptr Pointer to a buffer that should receive the data. Should be able to hold at least 4 bytes.
688 * @param chr Unicode codepoint to encode.
689 *
690 * @return Number of bytes put into the buffer.
691 *
692 * @remark Does not do null-termination of the string.
693 */
694int str_utf8_encode(char *ptr, int chr);
695
696/**
697 * Decodes a UTF-8 codepoint.
698 *
699 * @ingroup Strings
700 *
701 * @param ptr Pointer to a UTF-8 string. This pointer will be moved forward.
702 *
703 * @return The Unicode codepoint. `-1` for invalid input and 0 for end of string.
704 *
705 * @remark This function will also move the pointer forward.
706 * @remark You may call this function again after an error occurred.
707 * @remark The strings are treated as null-terminated.
708 */
709int str_utf8_decode(const char **ptr);
710
711/**
712 * Truncates a UTF-8 encoded string to a given length.
713 *
714 * @ingroup Strings
715 *
716 * @param dst Pointer to a buffer that shall receive the string.
717 * @param dst_size Size of the buffer dst.
718 * @param src String to be truncated.
719 * @param truncation_len Maximum codepoints in the returned string.
720 *
721 * @remark The strings are treated as utf8-encoded null-terminated strings.
722 * @remark Guarantees that dst string will contain null-termination.
723 */
724void str_utf8_truncate(char *dst, int dst_size, const char *src, int truncation_len);
725
726/**
727 * Fixes truncation of a Unicode character at the end of a UTF-8 string.
728 *
729 * @ingroup Strings
730 *
731 * @param str UTF-8 string.
732 *
733 * @return The new string length.
734 *
735 * @remark The strings are treated as null-terminated.
736 */
737int str_utf8_fix_truncation(char *str);
738
739/**
740 * Removes trailing characters that render as spaces by modifying the string in-place.
741 *
742 * @ingroup Strings
743 *
744 * @param param Input string.
745 *
746 * @remark The string is modified in-place.
747 * @remark The strings are treated as null-terminated.
748 */
749void str_utf8_trim_right(char *param);
750
751/**
752 * Converts the given UTF-8 string to lowercase (locale insensitive).
753 *
754 * @ingroup Strings
755 *
756 * @param input String to convert to lowercase.
757 * @param output Buffer that will receive the lowercase string.
758 * @param size Size of the output buffer.
759 *
760 * @remark The strings are treated as zero-terminated strings.
761 * @remark This function does not work in-place as converting a UTF-8 string to
762 * lowercase may increase its length.
763 */
764void str_utf8_tolower(const char *input, char *output, size_t size);
765
766/**
767 * Checks whether the given Unicode codepoint renders as space.
768 *
769 * @ingroup Strings
770 *
771 * @param code Unicode codepoint to check.
772 *
773 * @return Whether the codepoint is a space.
774 */
775int str_utf8_isspace(int code);
776
777/**
778 * Checks whether a given byte is the start of a UTF-8 character.
779 *
780 * @ingroup Strings
781 *
782 * @param c Byte to check.
783 *
784 * @return Whether the char starts a UTF-8 character.
785 */
786int str_utf8_isstart(char c);
787
788/**
789 * Moves a cursor backwards in an UTF-8 string,
790 *
791 * @ingroup Strings
792 *
793 * @param str UTF-8 string.
794 * @param cursor Position in the string.
795 *
796 * @return New cursor position.
797 *
798 * @remark Won't move the cursor less then 0.
799 * @remark The strings are treated as null-terminated.
800 */
801int str_utf8_rewind(const char *str, int cursor);
802
803/**
804 * Finds a UTF-8 string inside another UTF-8 string case insensitively.
805 *
806 * @ingroup Strings
807 *
808 * @param haystack String to search in.
809 * @param needle String to search for.
810 * @param end A pointer that will be set to a pointer into haystack directly behind the
811 * last character where the needle was found. Will be set to `nullptr `if needle
812 * could not be found. Optional parameter.
813 *
814 * @return A pointer into haystack where the needle was found.
815 * @return Returns `nullptr` if needle could not be found.
816 *
817 * @remark The strings are treated as null-terminated strings.
818 */
819const char *str_utf8_find_nocase(const char *haystack, const char *needle, const char **end = nullptr);
820
821/**
822 * Compares two UTF-8 strings case insensitively.
823 *
824 * @ingroup Strings
825 *
826 * @param a String to compare.
827 * @param b String to compare.
828 *
829 * @return `< 0` if string a is less than string b.
830 * @return `0` if string a is equal to string b.
831 * @return `> 0` if string a is greater than string b.
832 */
833int str_utf8_comp_nocase(const char *a, const char *b);
834
835/**
836 * Compares up to `num` bytes of two UTF-8 strings case insensitively.
837 *
838 * @ingroup Strings
839 *
840 * @param a String to compare.
841 * @param b String to compare.
842 * @param num Maximum bytes to compare.
843 *
844 * @return `< 0` if string a is less than string b.
845 * @return `0` if string a is equal to string b.
846 * @return `> 0` if string a is greater than string b.
847 */
848int str_utf8_comp_nocase_num(const char *a, const char *b, int num);
849
850/**
851 * Skips leading characters that render as spaces.
852 *
853 * @ingroup Strings
854 *
855 * @param str Input string.
856 *
857 * @return Pointer to the first non-whitespace character found within the string.
858 * @remark The strings are treated as null-terminated strings.
859 */
860const char *str_utf8_skip_whitespaces(const char *str);
861
862/**
863 * Moves a cursor forwards in an UTF-8 string.
864 *
865 * @ingroup Strings
866 *
867 * @param str UTF-8 string.
868 * @param cursor Position in the string.
869 *
870 * @return New cursor position.
871 *
872 * @remark Won't move the cursor beyond the null-termination marker.
873 * @remark The strings are treated as null-terminated.
874 */
875int str_utf8_forward(const char *str, int cursor);
876
877/**
878 * Checks if a strings contains just valid UTF-8 characters.
879 *
880 * @ingroup Strings
881 *
882 * @param str Pointer to a possible UTF-8 string.
883 *
884 * @return `0` if invalid characters were found, `1` if only valid characters were found.
885 *
886 * @remark The string is treated as null-terminated UTF-8 string.
887 */
888int str_utf8_check(const char *str);
889
890/**
891 * Copies a number of UTF-8 characters from one string to another.
892 *
893 * @ingroup Strings
894 *
895 * @param dst Pointer to a buffer that shall receive the string.
896 * @param src String to be copied.
897 * @param dst_size Size of the buffer dst.
898 * @param num Maximum number of UTF-8 characters to be copied.
899 *
900 * @remark The strings are treated as null-terminated strings.
901 * @remark Guarantees that dst string will contain null-termination.
902 */
903void str_utf8_copy_num(char *dst, const char *src, int dst_size, int num);
904
905/**
906 * Determines the byte size and UTF-8 character count of a UTF-8 string.
907 *
908 * @ingroup Strings
909 *
910 * @param str Pointer to the string.
911 * @param max_size Maximum number of bytes to count.
912 * @param max_count Maximum number of UTF-8 characters to count.
913 * @param size Pointer to store size (number of non. Zero bytes) of the string.
914 * @param count Pointer to store count of UTF-8 characters of the string.
915 *
916 * @remark The string is treated as null-terminated UTF-8 string.
917 * @remark It's the user's responsibility to make sure the bounds are aligned.
918 */
919void str_utf8_stats(const char *str, size_t max_size, size_t max_count, size_t *size, size_t *count);
920
921/**
922 * Converts a byte offset of a UTF-8 string to the UTF-8 character offset.
923 *
924 * @ingroup Strings
925 *
926 * @param str Pointer to the string.
927 * @param byte_offset Offset in bytes.
928 *
929 * @return Offset in UTF-8 characters. Clamped to the maximum length of the string in UTF-8 characters.
930 *
931 * @remark The string is treated as a null-terminated UTF-8 string.
932 * @remark It's the user's responsibility to make sure the bounds are aligned.
933 */
934size_t str_utf8_offset_bytes_to_chars(const char *str, size_t byte_offset);
935
936/**
937 * Converts a UTF-8 character offset of a UTF-8 string to the byte offset.
938 *
939 * @ingroup Strings
940 *
941 * @param str Pointer to the string.
942 * @param char_offset Offset in UTF-8 characters.
943 *
944 * @return Offset in bytes. Clamped to the maximum length of the string in bytes.
945 *
946 * @remark The string is treated as a null-terminated UTF-8 string.
947 * @remark It's the user's responsibility to make sure the bounds are aligned.
948 */
949size_t str_utf8_offset_chars_to_bytes(const char *str, size_t char_offset);
950
951/**
952 * Computes the edit distance between two strings.
953 *
954 * @param a First string for the edit distance.
955 * @param b Second string for the edit distance.
956 *
957 * @return The edit distance between the both strings.
958 *
959 * @remark The strings are treated as null-terminated strings.
960 */
961int str_utf8_dist(const char *a, const char *b);
962
963/**
964 * Computes the edit distance between two strings, allows buffers
965 * to be passed in.
966 *
967 * @ingroup Strings
968 *
969 * @param a First string for the edit distance.
970 * @param b Second string for the edit distance.
971 * @param buf Buffer for the function.
972 * @param buf_len Length of the buffer, must be at least as long as
973 * twice the length of both strings combined plus two.
974 *
975 * @return The edit distance between the both strings.
976 *
977 * @remark The strings are treated as null-terminated strings.
978 */
979int str_utf8_dist_buffer(const char *a, const char *b, int *buf, int buf_len);
980
981/**
982 * Computes the edit distance between two strings, allows buffers
983 * to be passed in.
984 *
985 * @ingroup Strings
986 *
987 * @param a First string for the edit distance.
988 * @param a_len Length of the first string.
989 * @param b Second string for the edit distance.
990 * @param b_len Length of the second string.
991 * @param buf Buffer for the function.
992 * @param buf_len Length of the buffer, must be at least as long as
993 * the length of both strings combined plus two.
994 *
995 * @return The edit distance between the both strings.
996 *
997 * @remark The strings are treated as null-terminated strings.
998 */
999int str_utf32_dist_buffer(const int *a, int a_len, const int *b, int b_len, int *buf, int buf_len);
1000
1001int str_utf8_to_skeleton(const char *str, int *buf, int buf_len);
1002
1003/**
1004 * Checks if two strings only differ by confusable characters.
1005 *
1006 * @ingroup Strings
1007 *
1008 * @param str1 String to compare.
1009 * @param str2 String to compare.
1010 *
1011 * @return `0` if the strings are confusables.
1012 */
1013int str_utf8_comp_confusable(const char *str1, const char *str2);
1014
1015/**
1016 * Converts the given Unicode codepoint to lowercase (locale insensitive).
1017 *
1018 * @ingroup Strings
1019 *
1020 * @param code Unicode codepoint to convert.
1021 *
1022 * @return Lowercase codepoint, or the original codepoint if there is no lowercase version.
1023 */
1024int str_utf8_tolower_codepoint(int code);
1025
1026#endif
1027