str.cpp source code [DDNet/base/str.cpp]

1	/ (c) Magnus Auvinen. See licence.txt in the root of the distribution for more information. /
2	/ If you are missing that file, acquire a complete release at teeworlds.com. /
3
4	#include "str.h"
5
6	#include "dbg.h"
7	#include "detect.h"
8	#include "math.h"
9	#include "mem.h"
10
11	#include <cctype>
12	#include <charconv> // std::to_chars
13	#include <cstdarg>
14	#include <cstdio>
15	#include <cstdlib>
16	#include <cstring>
17
18	int str_copy(char dst, const* char src, int* dst_size)
19	{
20	dst[`0`] = `'\0'`;
21	strncat(dest: dst, src: src, n: dst_size - `1`);
22	return str_utf8_fix_truncation(str: dst);
23	}
24
25	void str_append(char dst, const* char src, int* dst_size)
26	{
27	int s = str_length(str: dst);
28	int i = `0`;
29	while(s < dst_size)
30	{
31	dst[s] = src[i];
32	if(!src[i]) / check for null termination /
33	break;
34	s++;
35	i++;
36	}
37
38	dst[dst_size - `1`] = `0`; / assure null termination /
39	str_utf8_fix_truncation(str: dst);
40	}
41
42	void str_truncate(char dst, int* dst_size, const char src, int* truncation_len)
43	{
44	int size = dst_size;
45	if(truncation_len < size)
46	{
47	size = truncation_len + `1`;
48	}
49	str_copy(dst, src, dst_size: size);
50	}
51
52	int str_length(const char *str)
53	{
54	return (int)strlen(s: str);
55	}
56
57	int str_format_v(char buffer, int* buffer_size, const char *format, va_list args)
58	{
59	#if defined(CONF_FAMILY_WINDOWS)
60	_vsprintf_p(buffer, buffer_size, format, args);
61	buffer[buffer_size - `1`] = `0`; / assure null termination /
62	#else
63	vsnprintf(s: buffer, maxlen: buffer_size, format: format, arg: args);
64	/ null termination is assured by definition of vsnprintf /
65	#endif
66	return str_utf8_fix_truncation(str: buffer);
67	}
68
69	#if !defined(CONF_DEBUG)
70	int str_format_int(char buffer, size_t buffer_size, int* value)
71	{
72	buffer[`0`] = `'\0'`; // Fix false positive clang-analyzer-core.UndefinedBinaryOperatorResult when using result
73	auto result = std::to_chars(buffer, buffer + buffer_size - `1`, value);
74	result.ptr[`0`] = `'\0'`;
75	return result.ptr - buffer;
76	}
77	#endif
78
79	#undef str_format
80	int str_format(char buffer, int* buffer_size, const char *format, ...)
81	{
82	va_list args;
83	va_start(args, format);
84	int length = str_format_v(buffer, buffer_size, format, args);
85	va_end(args);
86	return length;
87	}
88	#if !defined(CONF_DEBUG)
89	#define str_format str_format_opt
90	#endif
91
92	char str_uppercase(char c)
93	{
94	if(c >= `'a'` && c <= `'z'`)
95	return `'A'` + (c - `'a'`);
96	return c;
97	}
98
99	bool str_isnum(char c)
100	{
101	return c >= `'0'` && c <= `'9'`;
102	}
103
104	int str_isallnum(const char *str)
105	{
106	while(*str)
107	{
108	if(!str_isnum(c: *str))
109	return `0`;
110	str++;
111	}
112	return `1`;
113	}
114
115	int str_isallnum_hex(const char *str)
116	{
117	while(*str)
118	{
119	if(!str_isnum(c: str) && !(str >= `'a'` && str <= `'f'`) && !(str >= `'A'` && *str <= `'F'`))
120	return `0`;
121	str++;
122	}
123	return `1`;
124	}
125
126	int str_isspace(char c)
127	{
128	return c == `' '` \|\| c == `'\n'` \|\| c == `'\r'` \|\| c == `'\t'`;
129	}
130
131	const char str_trim_words(const* char str, int* words)
132	{
133	while(str && str_isspace(c: str))
134	str++;
135	while(words && *str)
136	{
137	if(str_isspace(c: str) && !str_isspace(c: (str + `1`)))
138	words--;
139	str++;
140	}
141	return str;
142	}
143
144	bool str_has_cc(const char *str)
145	{
146	unsigned char s = (unsigned* char *)str;
147	while(*s)
148	{
149	if(*s < `32`)
150	{
151	return true;
152	}
153	s++;
154	}
155	return false;
156	}
157
158	/ makes sure that the string only contains the characters between 32 and 255 /
159	void str_sanitize_cc(char *str_in)
160	{
161	unsigned char str = (unsigned* char *)str_in;
162	while(*str)
163	{
164	if(*str < `32`)
165	*str = `' '`;
166	str++;
167	}
168	}
169
170	/ makes sure that the string only contains the characters between 32 and 255 + \r\n\t /
171	void str_sanitize(char *str_in)
172	{
173	unsigned char str = (unsigned* char *)str_in;
174	while(*str)
175	{
176	if(str < `32` && !(str == `'\r'`) && !(str == `'\n'`) && !(str == `'\t'`))
177	*str = `' '`;
178	str++;
179	}
180	}
181
182	void str_sanitize_filename(char *str_in)
183	{
184	unsigned char str = (unsigned* char *)str_in;
185	while(*str)
186	{
187	if(str <= `0x1F` \|\| str == `0x7F` \|\| str == `'\\'` \|\| str == `'/'` \|\| str == `'\|'` \|\| str == `':'` \|\|
188	str == `''` \|\| str == `'?'` \|\| str == `'<'` \|\| str == `'>'` \|\| str == `'"'`)
189	{
190	*str = `' '`;
191	}
192	str++;
193	}
194	}
195
196	bool str_valid_filename(const char *str)
197	{
198	// References:
199	// - https://en.wikipedia.org/w/index.php?title=Filename&oldid=1281340521#Comparison_of_filename_limitations
200	// - https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file (last update 2024-08-28)
201	if(str[`0`] == `'\0'`)
202	{
203	return false; // empty name not allowed
204	}
205
206	bool prev_space = false;
207	bool prev_period = false;
208	bool first_space_checked = false;
209	const char *iterator = str;
210	while(*iterator)
211	{
212	const int code = str_utf8_decode(ptr: &iterator);
213	if(code <= `0x1F` \|\| code == `0x7F` \|\| code == `'\\'` \|\| code == `'/'` \|\| code == `'\|'` \|\| code == `':'` \|\|
214	code == `'*'` \|\| code == `'?'` \|\| code == `'<'` \|\| code == `'>'` \|\| code == `'"'`)
215	{
216	return false; // disallowed characters, mostly for Windows
217	}
218	else if(str_utf8_isspace(code) && code != `' '`)
219	{
220	return false; // we only allow regular space characters
221	}
222	if(code == `' '`)
223	{
224	if(!first_space_checked)
225	{
226	return false; // leading spaces not allowed
227	}
228	if(prev_space)
229	{
230	return false; // multiple consecutive spaces not allowed
231	}
232	prev_space = true;
233	prev_period = false;
234	}
235	else
236	{
237	prev_space = false;
238	prev_period = code == `'.'`;
239	first_space_checked = true;
240	}
241	}
242	if(prev_space \|\| prev_period)
243	{
244	return false; // trailing spaces and periods not allowed
245	}
246
247	static constexpr const char *RESERVED_NAMES[] = {
248	"CON", "PRN", "AUX", "NUL",
249	"COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "COM¹", "COM²", "COM³",
250	"LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", "LPT¹", "LPT²", "LPT³"};
251	for(const char *reserved_name : RESERVED_NAMES)
252	{
253	const char *prefix = str_startswith_nocase(str, prefix: reserved_name);
254	if(prefix != nullptr && (prefix[`0`] == `'\0'` \|\| prefix[`0`] == `'.'`))
255	{
256	return false; // reserved name not allowed when it makes up the entire filename or when followed by period
257	}
258	}
259
260	return true;
261	}
262
263	int str_comp_filenames(const char a, const* char *b)
264	{
265	int result;
266
267	for(; a && b; ++a, ++b)
268	{
269	if(str_isnum(c: a) && str_isnum(c: b))
270	{
271	result = `0`;
272	do
273	{
274	if(!result)
275	result = a - b;
276	++a;
277	++b;
278	} while(str_isnum(c: a) && str_isnum(c: b));
279
280	if(str_isnum(c: *a))
281	return `1`;
282	else if(str_isnum(c: *b))
283	return -`1`;
284	else if(result \|\| a == `'\0'` \|\| b == `'\0'`)
285	return result;
286	}
287
288	result = tolower(c: a) - tolower(c: b);
289	if(result)
290	return result;
291	}
292	return a - b;
293	}
294
295	void str_clean_whitespaces(char *str)
296	{
297	char *read = str;
298	char *write = str;
299
300	/ skip initial whitespace /
301	while(*read == `' '`)
302	read++;
303
304	/ end of read string is detected in the loop /
305	while(true)
306	{
307	/ skip whitespace /
308	int found_whitespace = `0`;
309	for(; *read == `' '`; read++)
310	found_whitespace = `1`;
311	/ if not at the end of the string, put a found whitespace here /
312	if(*read)
313	{
314	if(found_whitespace)
315	*write++ = `' '`;
316	write++ = read++;
317	}
318	else
319	{
320	*write = `0`;
321	break;
322	}
323	}
324	}
325
326	char str_skip_to_whitespace(char* *str)
327	{
328	while(str && !str_isspace(c: str))
329	str++;
330	return str;
331	}
332
333	const char str_skip_to_whitespace_const(const* char *str)
334	{
335	while(str && !str_isspace(c: str))
336	str++;
337	return str;
338	}
339
340	char str_skip_whitespaces(char* *str)
341	{
342	while(str && str_isspace(c: str))
343	str++;
344	return str;
345	}
346
347	const char str_skip_whitespaces_const(const* char *str)
348	{
349	while(str && str_isspace(c: str))
350	str++;
351	return str;
352	}
353
354	/ case /
355	int str_comp_nocase(const char a, const* char *b)
356	{
357	#if defined(CONF_FAMILY_WINDOWS)
358	return _stricmp(a, b);
359	#else
360	return strcasecmp(s1: a, s2: b);
361	#endif
362	}
363
364	int str_comp_nocase_num(const char a, const* char b, int* num)
365	{
366	#if defined(CONF_FAMILY_WINDOWS)
367	return _strnicmp(a, b, num);
368	#else
369	return strncasecmp(s1: a, s2: b, n: num);
370	#endif
371	}
372
373	int str_comp(const char a, const* char *b)
374	{
375	return strcmp(s1: a, s2: b);
376	}
377
378	int str_comp_num(const char a, const* char b, int* num)
379	{
380	return strncmp(s1: a, s2: b, n: num);
381	}
382
383	const char str_startswith_nocase(const* char str, const* char *prefix)
384	{
385	int prefixl = str_length(str: prefix);
386	if(str_comp_nocase_num(a: str, b: prefix, num: prefixl) == `0`)
387	{
388	return str + prefixl;
389	}
390	else
391	{
392	return nullptr;
393	}
394	}
395
396	const char str_startswith(const* char str, const* char *prefix)
397	{
398	int prefixl = str_length(str: prefix);
399	if(str_comp_num(a: str, b: prefix, num: prefixl) == `0`)
400	{
401	return str + prefixl;
402	}
403	else
404	{
405	return nullptr;
406	}
407	}
408
409	const char str_endswith_nocase(const* char str, const* char *suffix)
410	{
411	int strl = str_length(str);
412	int suffixl = str_length(str: suffix);
413	const char *strsuffix;
414	if(strl < suffixl)
415	{
416	return nullptr;
417	}
418	strsuffix = str + strl - suffixl;
419	if(str_comp_nocase(a: strsuffix, b: suffix) == `0`)
420	{
421	return strsuffix;
422	}
423	else
424	{
425	return nullptr;
426	}
427	}
428
429	const char str_endswith(const* char str, const* char *suffix)
430	{
431	int strl = str_length(str);
432	int suffixl = str_length(str: suffix);
433	const char *strsuffix;
434	if(strl < suffixl)
435	{
436	return nullptr;
437	}
438	strsuffix = str + strl - suffixl;
439	if(str_comp(a: strsuffix, b: suffix) == `0`)
440	{
441	return strsuffix;
442	}
443	else
444	{
445	return nullptr;
446	}
447	}
448
449	const char str_find_nocase(const* char haystack, const* char *needle)
450	{
451	while(haystack) /* native implementation /
452	{
453	const char *a = haystack;
454	const char *b = needle;
455	while(a && b && tolower(c: (unsigned char)a) == tolower(c: (unsigned* char)*b))
456	{
457	a++;
458	b++;
459	}
460	if(!(*b))
461	return haystack;
462	haystack++;
463	}
464
465	return nullptr;
466	}
467
468	const char str_find(const* char haystack, const* char *needle)
469	{
470	while(haystack) /* native implementation /
471	{
472	const char *a = haystack;
473	const char *b = needle;
474	while(a && b && a == b)
475	{
476	a++;
477	b++;
478	}
479	if(!(*b))
480	return haystack;
481	haystack++;
482	}
483
484	return nullptr;
485	}
486
487	static const char str_token_get(const* char str, const* char delim, int* *length)
488	{
489	size_t len = strspn(s: str, accept: delim);
490	if(len > `1`)
491	str++;
492	else
493	str += len;
494	if(!*str)
495	return nullptr;
496
497	*length = strcspn(s: str, reject: delim);
498	return str;
499	}
500
501	const char str_next_token(const* char str, const* char delim, char* buffer, int* buffer_size)
502	{
503	int len = `0`;
504	const char *tok = str_token_get(str, delim, length: &len);
505	if(len < `0` \|\| tok == nullptr)
506	{
507	buffer[`0`] = `'\0'`;
508	return nullptr;
509	}
510
511	len = buffer_size > len ? len : buffer_size - `1`;
512	mem_copy(dest: buffer, source: tok, size: len);
513	buffer[len] = `'\0'`;
514
515	return tok + len;
516	}
517
518	int str_in_list(const char list, const* char delim, const* char *needle)
519	{
520	const char *tok = list;
521	int len = `0`, notfound = `1`, needlelen = str_length(str: needle);
522
523	while(notfound && (tok = str_token_get(str: tok, delim, length: &len)))
524	{
525	notfound = needlelen != len \|\| str_comp_num(a: tok, b: needle, num: len);
526	tok = tok + len;
527	}
528
529	return !notfound;
530	}
531
532	bool str_delimiters_around_offset(const char haystack, const* char delim, int* offset, int start, int* *end)
533	{
534	bool found = true;
535	const char *search = haystack;
536	const int delim_len = str_length(str: delim);
537	*start = `0`;
538	while(str_find(haystack: search, needle: delim))
539	{
540	const char *test = str_find(haystack: search, needle: delim) + delim_len;
541	int distance = test - haystack;
542	if(distance > offset)
543	break;
544
545	*start = distance;
546	search = test;
547	}
548	if(search == haystack)
549	found = false;
550
551	if(str_find(haystack: search, needle: delim))
552	{
553	*end = str_find(haystack: search, needle: delim) - haystack;
554	}
555	else
556	{
557	*end = str_length(str: haystack);
558	found = false;
559	}
560
561	return found;
562	}
563
564	const char str_rchr(const* char haystack, char* needle)
565	{
566	return strrchr(s: haystack, c: needle);
567	}
568
569	int str_countchr(const char haystack, char* needle)
570	{
571	int count = `0`;
572	while(*haystack)
573	{
574	if(*haystack == needle)
575	count++;
576	haystack++;
577	}
578	return count;
579	}
580
581	void str_hex(char dst, int* dst_size, const void data, int* data_size)
582	{
583	static const char hex[] = "0123456789ABCDEF";
584	int data_index;
585	int dst_index;
586	for(data_index = `0`, dst_index = `0`; data_index < data_size && dst_index < dst_size - `3`; data_index++)
587	{
588	dst[data_index * `3`] = hex[((const unsigned char *)data)[data_index] >> `4`];
589	dst[data_index * `3` + `1`] = hex[((const unsigned char *)data)[data_index] & `0xf`];
590	dst[data_index * `3` + `2`] = `' '`;
591	dst_index += `3`;
592	}
593	dst[dst_index] = `'\0'`;
594	}
595
596	void str_hex_cstyle(char dst, int* dst_size, const void data, int* data_size, int bytes_per_line)
597	{
598	static const char hex[] = "0123456789ABCDEF";
599	int data_index;
600	int dst_index;
601	int remaining_bytes_per_line = bytes_per_line;
602	for(data_index = `0`, dst_index = `0`; data_index < data_size && dst_index < dst_size - `6`; data_index++)
603	{
604	--remaining_bytes_per_line;
605	dst[data_index * `6`] = `'0'`;
606	dst[data_index * `6` + `1`] = `'x'`;
607	dst[data_index * `6` + `2`] = hex[((const unsigned char *)data)[data_index] >> `4`];
608	dst[data_index * `6` + `3`] = hex[((const unsigned char *)data)[data_index] & `0xf`];
609	dst[data_index * `6` + `4`] = `','`;
610	if(remaining_bytes_per_line == `0`)
611	{
612	dst[data_index * `6` + `5`] = `'\n'`;
613	remaining_bytes_per_line = bytes_per_line;
614	}
615	else
616	{
617	dst[data_index * `6` + `5`] = `' '`;
618	}
619	dst_index += `6`;
620	}
621	dst[dst_index] = `'\0'`;
622	// Remove trailing comma and space/newline
623	if(dst_index >= `1`)
624	dst[dst_index - `1`] = `'\0'`;
625	if(dst_index >= `2`)
626	dst[dst_index - `2`] = `'\0'`;
627	}
628
629	static int hexval(char x)
630	{
631	switch(x)
632	{
633	case `'0'`: return `0`;
634	case `'1'`: return `1`;
635	case `'2'`: return `2`;
636	case `'3'`: return `3`;
637	case `'4'`: return `4`;
638	case `'5'`: return `5`;
639	case `'6'`: return `6`;
640	case `'7'`: return `7`;
641	case `'8'`: return `8`;
642	case `'9'`: return `9`;
643	case `'a'`:
644	case `'A'`: return `10`;
645	case `'b'`:
646	case `'B'`: return `11`;
647	case `'c'`:
648	case `'C'`: return `12`;
649	case `'d'`:
650	case `'D'`: return `13`;
651	case `'e'`:
652	case `'E'`: return `14`;
653	case `'f'`:
654	case `'F'`: return `15`;
655	default: return -`1`;
656	}
657	}
658
659	static int byteval(const char hex, unsigned* char *dst)
660	{
661	int v1 = hexval(x: hex[`0`]);
662	int v2 = hexval(x: hex[`1`]);
663
664	if(v1 < `0` \|\| v2 < `0`)
665	return `1`;
666
667	dst = v1 `16` + v2;
668	return `0`;
669	}
670
671	int str_hex_decode(void dst, int* dst_size, const char *src)
672	{
673	unsigned char cdst = (unsigned* char *)dst;
674	int slen = str_length(str: src);
675	int len = slen / `2`;
676	int i;
677	if(slen != dst_size * `2`)
678	return `2`;
679
680	for(i = `0`; i < len && dst_size; i++, dst_size--)
681	{
682	if(byteval(hex: src + i * `2`, dst: cdst++))
683	return `1`;
684	}
685	return `0`;
686	}
687
688	void str_base64(char dst, int* dst_size, const void data_raw, int* data_size)
689	{
690	static const char DIGITS[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
691
692	const unsigned char data = (const* unsigned char *)data_raw;
693	unsigned value = `0`;
694	int num_bits = `0`;
695	int i = `0`;
696	int o = `0`;
697
698	dst_size -= `1`;
699	dst[dst_size] = `0`;
700	while(true)
701	{
702	if(num_bits < `6` && i < data_size)
703	{
704	value = (value << `8`) \| data[i];
705	num_bits += `8`;
706	i += `1`;
707	}
708	if(o == dst_size)
709	{
710	return;
711	}
712	if(num_bits > `0`)
713	{
714	unsigned padded;
715	if(num_bits >= `6`)
716	{
717	padded = (value >> (num_bits - `6`)) & `0x3f`;
718	}
719	else
720	{
721	padded = (value << (`6` - num_bits)) & `0x3f`;
722	}
723	dst[o] = DIGITS[padded];
724	num_bits -= `6`;
725	o += `1`;
726	}
727	else if(o % `4` != `0`)
728	{
729	dst[o] = `'='`;
730	o += `1`;
731	}
732	else
733	{
734	dst[o] = `0`;
735	return;
736	}
737	}
738	}
739
740	static int base64_digit_value(char digit)
741	{
742	if(`'A'` <= digit && digit <= `'Z'`)
743	{
744	return digit - `'A'`;
745	}
746	else if(`'a'` <= digit && digit <= `'z'`)
747	{
748	return digit - `'a'` + `26`;
749	}
750	else if(`'0'` <= digit && digit <= `'9'`)
751	{
752	return digit - `'0'` + `52`;
753	}
754	else if(digit == `'+'`)
755	{
756	return `62`;
757	}
758	else if(digit == `'/'`)
759	{
760	return `63`;
761	}
762	return -`1`;
763	}
764
765	int str_base64_decode(void dst_raw, int* dst_size, const char *data)
766	{
767	unsigned char dst = (unsigned* char *)dst_raw;
768	int data_len = str_length(str: data);
769
770	int i;
771	int o = `0`;
772
773	if(data_len % `4` != `0`)
774	{
775	return -`3`;
776	}
777	if(data_len / `4` * `3` > dst_size)
778	{
779	// Output buffer too small.
780	return -`2`;
781	}
782	for(i = `0`; i < data_len; i += `4`)
783	{
784	int num_output_bytes = `3`;
785	char copy[`4`];
786	int d[`4`];
787	int value;
788	int b;
789	mem_copy(dest: copy, source: data + i, size: sizeof(copy));
790	if(i == data_len - `4`)
791	{
792	if(copy[`3`] == `'='`)
793	{
794	copy[`3`] = `'A'`;
795	num_output_bytes = `2`;
796	if(copy[`2`] == `'='`)
797	{
798	copy[`2`] = `'A'`;
799	num_output_bytes = `1`;
800	}
801	}
802	}
803	d[`0`] = base64_digit_value(digit: copy[`0`]);
804	d[`1`] = base64_digit_value(digit: copy[`1`]);
805	d[`2`] = base64_digit_value(digit: copy[`2`]);
806	d[`3`] = base64_digit_value(digit: copy[`3`]);
807	if(d[`0`] == -`1` \|\| d[`1`] == -`1` \|\| d[`2`] == -`1` \|\| d[`3`] == -`1`)
808	{
809	// Invalid digit.
810	return -`1`;
811	}
812	value = (d[`0`] << `18`) \| (d[`1`] << `12`) \| (d[`2`] << `6`) \| d[`3`];
813	for(b = `0`; b < `3`; b++)
814	{
815	unsigned char byte_value = (value >> (`16` - `8` * b)) & `0xff`;
816	if(b < num_output_bytes)
817	{
818	dst[o] = byte_value;
819	o += `1`;
820	}
821	else
822	{
823	if(byte_value != `0`)
824	{
825	// Padding not zeroed.
826	return -`2`;
827	}
828	}
829	}
830	}
831	return o;
832	}
833
834	void str_escape(char *dst, const* char src, const* char *end)
835	{
836	while(src && dst + `1` < end)
837	{
838	if(src == `'"'` \|\| src == `'\\'`) // escape \ and "
839	{
840	if(*dst + `2` < end)
841	(dst)++ = `'\\'`;
842	else
843	break;
844	}
845	(dst)++ = *src++;
846	}
847	**dst = `0`;
848	}
849
850	int str_toint(const char *str)
851	{
852	return str_toint_base(str, base: `10`);
853	}
854
855	bool str_toint(const char str, int* *out)
856	{
857	// returns true if conversion was successful
858	char *end;
859	int value = strtol(nptr: str, endptr: &end, base: `10`);
860	if(*end != `'\0'`)
861	return false;
862	if(out != nullptr)
863	*out = value;
864	return true;
865	}
866
867	int str_toint_base(const char str, int* base)
868	{
869	return strtol(nptr: str, endptr: nullptr, base: base);
870	}
871
872	unsigned long str_toulong_base(const char str, int* base)
873	{
874	return strtoul(nptr: str, endptr: nullptr, base: base);
875	}
876
877	int64_t str_toint64_base(const char str, int* base)
878	{
879	return strtoll(nptr: str, endptr: nullptr, base: base);
880	}
881
882	float str_tofloat(const char *str)
883	{
884	return strtod(nptr: str, endptr: nullptr);
885	}
886
887	bool str_tofloat(const char str, float* *out)
888	{
889	// returns true if conversion was successful
890	char *end;
891	float value = strtod(nptr: str, endptr: &end);
892	if(*end != `'\0'`)
893	return false;
894	if(out != nullptr)
895	*out = value;
896	return true;
897	}
898
899	unsigned str_quickhash(const char *str)
900	{
901	unsigned hash = `5381`;
902	for(; *str; str++)
903	hash = ((hash << `5`) + hash) + (str); /* hash * 33 + c /
904	return hash;
905	}
906
907	int str_utf8_encode(char ptr, int* chr)
908	{
909	/ encode /
910	if(chr <= `0x7F`)
911	{
912	ptr[`0`] = (char)chr;
913	return `1`;
914	}
915	else if(chr <= `0x7FF`)
916	{
917	ptr[`0`] = `0xC0` \| ((chr >> `6`) & `0x1F`);
918	ptr[`1`] = `0x80` \| (chr & `0x3F`);
919	return `2`;
920	}
921	else if(chr <= `0xFFFF`)
922	{
923	ptr[`0`] = `0xE0` \| ((chr >> `12`) & `0x0F`);
924	ptr[`1`] = `0x80` \| ((chr >> `6`) & `0x3F`);
925	ptr[`2`] = `0x80` \| (chr & `0x3F`);
926	return `3`;
927	}
928	else if(chr <= `0x10FFFF`)
929	{
930	ptr[`0`] = `0xF0` \| ((chr >> `18`) & `0x07`);
931	ptr[`1`] = `0x80` \| ((chr >> `12`) & `0x3F`);
932	ptr[`2`] = `0x80` \| ((chr >> `6`) & `0x3F`);
933	ptr[`3`] = `0x80` \| (chr & `0x3F`);
934	return `4`;
935	}
936
937	return `0`;
938	}
939
940	static unsigned char str_byte_next(const char **ptr)
941	{
942	unsigned char byte_value = **ptr;
943	(*ptr)++;
944	return byte_value;
945	}
946
947	static void str_byte_rewind(const char **ptr)
948	{
949	(*ptr)--;
950	}
951
952	int str_utf8_decode(const char **ptr)
953	{
954	// As per https://encoding.spec.whatwg.org/#utf-8-decoder.
955	unsigned char utf8_lower_boundary = `0x80`;
956	unsigned char utf8_upper_boundary = `0xBF`;
957	int utf8_code_point = `0`;
958	int utf8_bytes_seen = `0`;
959	int utf8_bytes_needed = `0`;
960	while(true)
961	{
962	unsigned char byte_value = str_byte_next(ptr);
963	if(utf8_bytes_needed == `0`)
964	{
965	if(byte_value <= `0x7F`)
966	{
967	return byte_value;
968	}
969	else if(`0xC2` <= byte_value && byte_value <= `0xDF`)
970	{
971	utf8_bytes_needed = `1`;
972	utf8_code_point = byte_value - `0xC0`;
973	}
974	else if(`0xE0` <= byte_value && byte_value <= `0xEF`)
975	{
976	if(byte_value == `0xE0`)
977	utf8_lower_boundary = `0xA0`;
978	if(byte_value == `0xED`)
979	utf8_upper_boundary = `0x9F`;
980	utf8_bytes_needed = `2`;
981	utf8_code_point = byte_value - `0xE0`;
982	}
983	else if(`0xF0` <= byte_value && byte_value <= `0xF4`)
984	{
985	if(byte_value == `0xF0`)
986	utf8_lower_boundary = `0x90`;
987	if(byte_value == `0xF4`)
988	utf8_upper_boundary = `0x8F`;
989	utf8_bytes_needed = `3`;
990	utf8_code_point = byte_value - `0xF0`;
991	}
992	else
993	{
994	return -`1`; // Error.
995	}
996	utf8_code_point = utf8_code_point << (`6` * utf8_bytes_needed);
997	continue;
998	}
999	if(!(utf8_lower_boundary <= byte_value && byte_value <= utf8_upper_boundary))
1000	{
1001	// Resetting variables not necessary, will be done when
1002	// the function is called again.
1003	str_byte_rewind(ptr);
1004	return -`1`;
1005	}
1006	utf8_lower_boundary = `0x80`;
1007	utf8_upper_boundary = `0xBF`;
1008	utf8_bytes_seen += `1`;
1009	utf8_code_point = utf8_code_point + ((byte_value - `0x80`) << (`6` * (utf8_bytes_needed - utf8_bytes_seen)));
1010	if(utf8_bytes_seen != utf8_bytes_needed)
1011	{
1012	continue;
1013	}
1014	// Resetting variables not necessary, see above.
1015	return utf8_code_point;
1016	}
1017	}
1018
1019	void str_utf8_truncate(char dst, int* dst_size, const char src, int* truncation_len)
1020	{
1021	int size = -`1`;
1022	const char *cursor = src;
1023	int pos = `0`;
1024	while(pos <= truncation_len && cursor - src < dst_size && size != cursor - src)
1025	{
1026	size = cursor - src;
1027	if(str_utf8_decode(ptr: &cursor) == `0`)
1028	{
1029	break;
1030	}
1031	pos++;
1032	}
1033	str_copy(dst, src, dst_size: size + `1`);
1034	}
1035
1036	int str_utf8_fix_truncation(char *str)
1037	{
1038	int len = str_length(str);
1039	if(len > `0`)
1040	{
1041	int last_char_index = str_utf8_rewind(str, cursor: len);
1042	const char *last_char = str + last_char_index;
1043	// Fix truncated UTF-8.
1044	if(str_utf8_decode(ptr: &last_char) == -`1`)
1045	{
1046	str[last_char_index] = `0`;
1047	return last_char_index;
1048	}
1049	}
1050	return len;
1051	}
1052
1053	void str_utf8_trim_right(char *param)
1054	{
1055	const char *str = param;
1056	char end = nullptr*;
1057	while(*str)
1058	{
1059	char str_old = (char* *)str;
1060	int code = str_utf8_decode(ptr: &str);
1061
1062	// check if unicode is not empty
1063	if(!str_utf8_isspace(code))
1064	{
1065	end = nullptr;
1066	}
1067	else if(!end)
1068	{
1069	end = str_old;
1070	}
1071	}
1072	if(end)
1073	{
1074	*end = `0`;
1075	}
1076	}
1077
1078	void str_utf8_tolower(const char input, char* *output, size_t size)
1079	{
1080	size_t out_pos = `0`;
1081	while(*input)
1082	{
1083	const int code = str_utf8_tolower_codepoint(code: str_utf8_decode(ptr: &input));
1084	char encoded_code[`4`];
1085	const int code_size = str_utf8_encode(ptr: encoded_code, chr: code);
1086	if(out_pos + code_size + `1` > size) // +1 for null termination
1087	{
1088	break;
1089	}
1090	mem_copy(dest: &output[out_pos], source: encoded_code, size: code_size);
1091	out_pos += code_size;
1092	}
1093	output[out_pos] = `'\0'`;
1094	}
1095
1096	int str_utf8_isspace(int code)
1097	{
1098	return code <= `0x0020` \|\| code == `0x0085` \|\| code == `0x00A0` \|\| code == `0x034F` \|\|
1099	code == `0x115F` \|\| code == `0x1160` \|\| code == `0x1680` \|\| code == `0x180E` \|\|
1100	(code >= `0x2000` && code <= `0x200F`) \|\| (code >= `0x2028` && code <= `0x202F`) \|\|
1101	(code >= `0x205F` && code <= `0x2064`) \|\| (code >= `0x206A` && code <= `0x206F`) \|\|
1102	code == `0x2800` \|\| code == `0x3000` \|\| code == `0x3164` \|\|
1103	(code >= `0xFE00` && code <= `0xFE0F`) \|\| code == `0xFEFF` \|\| code == `0xFFA0` \|\|
1104	(code >= `0xFFF9` && code <= `0xFFFC`);
1105	}
1106
1107	int str_utf8_isstart(char c)
1108	{
1109	if((c & `0xC0`) == `0x80`) / 10xxxxxx /
1110	return `0`;
1111	return `1`;
1112	}
1113
1114	int str_utf8_rewind(const char str, int* cursor)
1115	{
1116	while(cursor)
1117	{
1118	cursor--;
1119	if(str_utf8_isstart(c: *(str + cursor)))
1120	break;
1121	}
1122	return cursor;
1123	}
1124
1125	const char str_utf8_find_nocase(const* char haystack, const* char needle, const* char **end)
1126	{
1127	while(haystack) /* native implementation /
1128	{
1129	const char *a = haystack;
1130	const char *b = needle;
1131	const char *a_next = a;
1132	const char *b_next = b;
1133	while(a && b && str_utf8_tolower_codepoint(code: str_utf8_decode(ptr: &a_next)) == str_utf8_tolower_codepoint(code: str_utf8_decode(ptr: &b_next)))
1134	{
1135	a = a_next;
1136	b = b_next;
1137	}
1138	if(!(*b))
1139	{
1140	if(end != nullptr)
1141	*end = a_next;
1142	return haystack;
1143	}
1144	str_utf8_decode(ptr: &haystack);
1145	}
1146
1147	if(end != nullptr)
1148	end = nullptr*;
1149	return nullptr;
1150	}
1151
1152	int str_utf8_comp_nocase(const char a, const* char *b)
1153	{
1154	int code_a;
1155	int code_b;
1156
1157	while(a && b)
1158	{
1159	code_a = str_utf8_tolower_codepoint(code: str_utf8_decode(ptr: &a));
1160	code_b = str_utf8_tolower_codepoint(code: str_utf8_decode(ptr: &b));
1161
1162	if(code_a != code_b)
1163	return code_a - code_b;
1164	}
1165	return (unsigned char)a - (unsigned* char)*b;
1166	}
1167
1168	int str_utf8_comp_nocase_num(const char a, const* char b, int* num)
1169	{
1170	int code_a;
1171	int code_b;
1172	const char *old_a = a;
1173
1174	if(num <= `0`)
1175	return `0`;
1176
1177	while(a && b)
1178	{
1179	code_a = str_utf8_tolower_codepoint(code: str_utf8_decode(ptr: &a));
1180	code_b = str_utf8_tolower_codepoint(code: str_utf8_decode(ptr: &b));
1181
1182	if(code_a != code_b)
1183	return code_a - code_b;
1184
1185	if(a - old_a >= num)
1186	return `0`;
1187	}
1188
1189	return (unsigned char)a - (unsigned* char)*b;
1190	}
1191
1192	const char str_utf8_skip_whitespaces(const* char *str)
1193	{
1194	const char *str_old;
1195	int code;
1196
1197	while(*str)
1198	{
1199	str_old = str;
1200	code = str_utf8_decode(ptr: &str);
1201
1202	// check if unicode is not empty
1203	if(!str_utf8_isspace(code))
1204	{
1205	return str_old;
1206	}
1207	}
1208
1209	return str;
1210	}
1211
1212	int str_utf8_forward(const char str, int* cursor)
1213	{
1214	const char *ptr = str + cursor;
1215	if(str_utf8_decode(ptr: &ptr) == `0`)
1216	{
1217	return cursor;
1218	}
1219	return ptr - str;
1220	}
1221
1222	int str_utf8_check(const char *str)
1223	{
1224	int codepoint;
1225	while((codepoint = str_utf8_decode(ptr: &str)))
1226	{
1227	if(codepoint == -`1`)
1228	{
1229	return `0`;
1230	}
1231	}
1232	return `1`;
1233	}
1234
1235	void str_utf8_copy_num(char dst, const* char src, int* dst_size, int num)
1236	{
1237	int new_cursor;
1238	int cursor = `0`;
1239
1240	while(src[cursor] && num > `0`)
1241	{
1242	new_cursor = str_utf8_forward(str: src, cursor);
1243	if(new_cursor >= dst_size) // reserve 1 byte for the null termination
1244	break;
1245	else
1246	cursor = new_cursor;
1247	--num;
1248	}
1249
1250	str_copy(dst, src, dst_size: cursor < dst_size ? cursor + `1` : dst_size);
1251	}
1252
1253	void str_utf8_stats(const char str, size_t max_size, size_t max_count, size_t size, size_t *count)
1254	{
1255	const char *cursor = str;
1256	*size = `0`;
1257	*count = `0`;
1258	while(size < max_size && count < max_count)
1259	{
1260	if(str_utf8_decode(ptr: &cursor) == `0`)
1261	{
1262	break;
1263	}
1264	if((size_t)(cursor - str) >= max_size)
1265	{
1266	break;
1267	}
1268	*size = cursor - str;
1269	++(*count);
1270	}
1271	}
1272
1273	size_t str_utf8_offset_bytes_to_chars(const char *str, size_t byte_offset)
1274	{
1275	size_t char_offset = `0`;
1276	size_t current_offset = `0`;
1277	while(current_offset < byte_offset)
1278	{
1279	const size_t prev_byte_offset = current_offset;
1280	current_offset = str_utf8_forward(str, cursor: current_offset);
1281	if(current_offset == prev_byte_offset)
1282	break;
1283	char_offset++;
1284	}
1285	return char_offset;
1286	}
1287
1288	size_t str_utf8_offset_chars_to_bytes(const char *str, size_t char_offset)
1289	{
1290	size_t byte_offset = `0`;
1291	for(size_t i = `0`; i < char_offset; i++)
1292	{
1293	const size_t prev_byte_offset = byte_offset;
1294	byte_offset = str_utf8_forward(str, cursor: byte_offset);
1295	if(byte_offset == prev_byte_offset)
1296	break;
1297	}
1298	return byte_offset;
1299	}
1300
1301	int str_utf8_dist(const char a, const* char *b)
1302	{
1303	int buf_len = `2` * (str_length(str: a) + `1` + str_length(str: b) + `1`);
1304	int buf = (int* )calloc(nmemb: buf_len, size: sizeof(buf));
1305	int result = str_utf8_dist_buffer(a, b, buf, buf_len);
1306	free(ptr: buf);
1307	return result;
1308	}
1309
1310	static int str_to_utf32_unchecked(const char str, int* **out)
1311	{
1312	int out_len = `0`;
1313	while((**out = str_utf8_decode(ptr: &str)))
1314	{
1315	(*out)++;
1316	out_len++;
1317	}
1318	return out_len;
1319	}
1320
1321	int str_utf8_dist_buffer(const char a_utf8, const* char b_utf8, int* buf, int* buf_len)
1322	{
1323	int a_utf8_len = str_length(str: a_utf8);
1324	int b_utf8_len = str_length(str: b_utf8);
1325	int a, b; // UTF-32
1326	int a_len, b_len; // UTF-32 length
1327	dbg_assert(buf_len >= `2` * (a_utf8_len + `1` + b_utf8_len + `1`), "buffer too small");
1328	if(a_utf8_len > b_utf8_len)
1329	{
1330	const char *tmp2 = a_utf8;
1331	a_utf8 = b_utf8;
1332	b_utf8 = tmp2;
1333	}
1334	a = buf;
1335	a_len = str_to_utf32_unchecked(str: a_utf8, out: &buf);
1336	b = buf;
1337	b_len = str_to_utf32_unchecked(str: b_utf8, out: &buf);
1338	return str_utf32_dist_buffer(a, a_len, b, b_len, buf, buf_len: buf_len - b_len - a_len);
1339	}
1340
1341	int str_utf32_dist_buffer(const int a, int* a_len, const int b, int* b_len, int buf, int* buf_len)
1342	{
1343	int i, j;
1344	dbg_assert(buf_len >= (a_len + `1`) + (b_len + `1`), "buffer too small");
1345	if(a_len > b_len)
1346	{
1347	int tmp1 = a_len;
1348	const int *tmp2 = a;
1349
1350	a_len = b_len;
1351	a = b;
1352
1353	b_len = tmp1;
1354	b = tmp2;
1355	}
1356	#define B(i, j) buf[((j) & 1) * (a_len + 1) + (i)]
1357	for(i = `0`; i <= a_len; i++)
1358	{
1359	B(i, `0`) = i;
1360	}
1361	for(j = `1`; j <= b_len; j++)
1362	{
1363	B(`0`, j) = j;
1364	for(i = `1`; i <= a_len; i++)
1365	{
1366	int subst = (a[i - `1`] != b[j - `1`]);
1367	B(i, j) = minimum(
1368	B(i - `1`, j) + `1`,
1369	B(i, j - `1`) + `1`,
1370	B(i - `1`, j - `1`) + subst);
1371	}
1372	}
1373	return B(a_len, b_len);
1374	#undef B
1375	}
1376

Browse the source code of DDNet/base/str.cpp