1 | #include "confusables.h" |
2 | |
3 | #include <base/system.h> |
4 | |
5 | #include <cstddef> |
6 | |
7 | static int str_utf8_skeleton(int ch, const int **skeleton, int *skeleton_len) |
8 | { |
9 | int i; |
10 | for(i = 0; i < NUM_DECOMPS; i++) |
11 | { |
12 | if(ch == decomp_chars[i]) |
13 | { |
14 | int offset = decomp_slices[i].offset; |
15 | int length = decomp_lengths[decomp_slices[i].length]; |
16 | |
17 | *skeleton = &decomp_data[offset]; |
18 | *skeleton_len = length; |
19 | return 1; |
20 | } |
21 | else if(ch < decomp_chars[i]) |
22 | { |
23 | break; |
24 | } |
25 | } |
26 | *skeleton = NULL; |
27 | *skeleton_len = 1; |
28 | return 0; |
29 | } |
30 | |
31 | struct SKELETON |
32 | { |
33 | const int *skeleton; |
34 | int skeleton_len; |
35 | const char *str; |
36 | }; |
37 | |
38 | static void str_utf8_skeleton_begin(struct SKELETON *skel, const char *str) |
39 | { |
40 | skel->skeleton = NULL; |
41 | skel->skeleton_len = 0; |
42 | skel->str = str; |
43 | } |
44 | |
45 | static int str_utf8_skeleton_next(struct SKELETON *skel) |
46 | { |
47 | int ch = 0; |
48 | while(skel->skeleton_len == 0) |
49 | { |
50 | ch = str_utf8_decode(ptr: &skel->str); |
51 | if(ch == 0) |
52 | { |
53 | return 0; |
54 | } |
55 | str_utf8_skeleton(ch, skeleton: &skel->skeleton, skeleton_len: &skel->skeleton_len); |
56 | } |
57 | skel->skeleton_len--; |
58 | if(skel->skeleton != NULL) |
59 | { |
60 | ch = *skel->skeleton; |
61 | skel->skeleton++; |
62 | } |
63 | return ch; |
64 | } |
65 | |
66 | int str_utf8_to_skeleton(const char *str, int *buf, int buf_len) |
67 | { |
68 | int i; |
69 | struct SKELETON skel; |
70 | str_utf8_skeleton_begin(skel: &skel, str); |
71 | for(i = 0; i < buf_len; i++) |
72 | { |
73 | int ch = str_utf8_skeleton_next(skel: &skel); |
74 | if(ch == 0) |
75 | { |
76 | break; |
77 | } |
78 | buf[i] = ch; |
79 | } |
80 | return i; |
81 | } |
82 | |
83 | int str_utf8_comp_confusable(const char *str1, const char *str2) |
84 | { |
85 | struct SKELETON skel1; |
86 | struct SKELETON skel2; |
87 | |
88 | str_utf8_skeleton_begin(skel: &skel1, str: str1); |
89 | str_utf8_skeleton_begin(skel: &skel2, str: str2); |
90 | |
91 | while(true) |
92 | { |
93 | int ch1 = str_utf8_skeleton_next(skel: &skel1); |
94 | int ch2 = str_utf8_skeleton_next(skel: &skel2); |
95 | |
96 | if(ch1 == 0 || ch2 == 0) |
97 | return ch1 != ch2; |
98 | |
99 | if(ch1 != ch2) |
100 | return 1; |
101 | } |
102 | } |
103 | |
104 | #define CONFUSABLES_DATA |
105 | #include "confusables_data.h" |
106 | #undef CONFUSABLES_DATA |
107 | |