1#include "confusables.h"
2
3#include <base/system.h>
4
5#include <cstddef>
6
7static int str_utf8_skeleton(int ch, const int **skeleton, int *skeleton_len)
8{
9 int i;
10 for(i = 0; i < NUM_DECOMPS; i++)
11 {
12 if(ch == decomp_chars[i])
13 {
14 int offset = decomp_slices[i].offset;
15 int length = decomp_lengths[decomp_slices[i].length];
16
17 *skeleton = &decomp_data[offset];
18 *skeleton_len = length;
19 return 1;
20 }
21 else if(ch < decomp_chars[i])
22 {
23 break;
24 }
25 }
26 *skeleton = NULL;
27 *skeleton_len = 1;
28 return 0;
29}
30
31struct SKELETON
32{
33 const int *skeleton;
34 int skeleton_len;
35 const char *str;
36};
37
38static void str_utf8_skeleton_begin(struct SKELETON *skel, const char *str)
39{
40 skel->skeleton = NULL;
41 skel->skeleton_len = 0;
42 skel->str = str;
43}
44
45static int str_utf8_skeleton_next(struct SKELETON *skel)
46{
47 int ch = 0;
48 while(skel->skeleton_len == 0)
49 {
50 ch = str_utf8_decode(ptr: &skel->str);
51 if(ch == 0)
52 {
53 return 0;
54 }
55 str_utf8_skeleton(ch, skeleton: &skel->skeleton, skeleton_len: &skel->skeleton_len);
56 }
57 skel->skeleton_len--;
58 if(skel->skeleton != NULL)
59 {
60 ch = *skel->skeleton;
61 skel->skeleton++;
62 }
63 return ch;
64}
65
66int str_utf8_to_skeleton(const char *str, int *buf, int buf_len)
67{
68 int i;
69 struct SKELETON skel;
70 str_utf8_skeleton_begin(skel: &skel, str);
71 for(i = 0; i < buf_len; i++)
72 {
73 int ch = str_utf8_skeleton_next(skel: &skel);
74 if(ch == 0)
75 {
76 break;
77 }
78 buf[i] = ch;
79 }
80 return i;
81}
82
83int str_utf8_comp_confusable(const char *str1, const char *str2)
84{
85 struct SKELETON skel1;
86 struct SKELETON skel2;
87
88 str_utf8_skeleton_begin(skel: &skel1, str: str1);
89 str_utf8_skeleton_begin(skel: &skel2, str: str2);
90
91 while(true)
92 {
93 int ch1 = str_utf8_skeleton_next(skel: &skel1);
94 int ch2 = str_utf8_skeleton_next(skel: &skel2);
95
96 if(ch1 == 0 || ch2 == 0)
97 return ch1 != ch2;
98
99 if(ch1 != ch2)
100 return 1;
101 }
102}
103
104#define CONFUSABLES_DATA
105#include "confusables_data.h"
106#undef CONFUSABLES_DATA
107