| 1 | #include "confusables.h" |
| 2 | |
| 3 | #include <base/system.h> |
| 4 | |
| 5 | #include <cstddef> |
| 6 | |
| 7 | static int str_utf8_skeleton(int ch, const int **skeleton, int *skeleton_len) |
| 8 | { |
| 9 | int i; |
| 10 | for(i = 0; i < NUM_DECOMPS; i++) |
| 11 | { |
| 12 | if(ch == decomp_chars[i]) |
| 13 | { |
| 14 | int offset = decomp_slices[i].offset; |
| 15 | int length = decomp_lengths[decomp_slices[i].length]; |
| 16 | |
| 17 | *skeleton = &decomp_data[offset]; |
| 18 | *skeleton_len = length; |
| 19 | return 1; |
| 20 | } |
| 21 | else if(ch < decomp_chars[i]) |
| 22 | { |
| 23 | break; |
| 24 | } |
| 25 | } |
| 26 | *skeleton = nullptr; |
| 27 | *skeleton_len = 1; |
| 28 | return 0; |
| 29 | } |
| 30 | |
| 31 | struct SKELETON |
| 32 | { |
| 33 | const int *skeleton; |
| 34 | int skeleton_len; |
| 35 | const char *str; |
| 36 | }; |
| 37 | |
| 38 | static void str_utf8_skeleton_begin(struct SKELETON *skel, const char *str) |
| 39 | { |
| 40 | skel->skeleton = nullptr; |
| 41 | skel->skeleton_len = 0; |
| 42 | skel->str = str; |
| 43 | } |
| 44 | |
| 45 | static int str_utf8_skeleton_next(struct SKELETON *skel) |
| 46 | { |
| 47 | int ch = 0; |
| 48 | while(skel->skeleton_len == 0) |
| 49 | { |
| 50 | ch = str_utf8_decode(ptr: &skel->str); |
| 51 | if(ch == 0) |
| 52 | { |
| 53 | return 0; |
| 54 | } |
| 55 | str_utf8_skeleton(ch, skeleton: &skel->skeleton, skeleton_len: &skel->skeleton_len); |
| 56 | } |
| 57 | skel->skeleton_len--; |
| 58 | if(skel->skeleton != nullptr) |
| 59 | { |
| 60 | ch = *skel->skeleton; |
| 61 | skel->skeleton++; |
| 62 | } |
| 63 | return ch; |
| 64 | } |
| 65 | |
| 66 | int str_utf8_to_skeleton(const char *str, int *buf, int buf_len) |
| 67 | { |
| 68 | int i; |
| 69 | struct SKELETON skel; |
| 70 | str_utf8_skeleton_begin(skel: &skel, str); |
| 71 | for(i = 0; i < buf_len; i++) |
| 72 | { |
| 73 | int ch = str_utf8_skeleton_next(skel: &skel); |
| 74 | if(ch == 0) |
| 75 | { |
| 76 | break; |
| 77 | } |
| 78 | buf[i] = ch; |
| 79 | } |
| 80 | return i; |
| 81 | } |
| 82 | |
| 83 | int str_utf8_comp_confusable(const char *str1, const char *str2) |
| 84 | { |
| 85 | struct SKELETON skel1; |
| 86 | struct SKELETON skel2; |
| 87 | |
| 88 | str_utf8_skeleton_begin(skel: &skel1, str: str1); |
| 89 | str_utf8_skeleton_begin(skel: &skel2, str: str2); |
| 90 | |
| 91 | while(true) |
| 92 | { |
| 93 | int ch1 = str_utf8_skeleton_next(skel: &skel1); |
| 94 | int ch2 = str_utf8_skeleton_next(skel: &skel2); |
| 95 | |
| 96 | if(ch1 == 0 || ch2 == 0) |
| 97 | return ch1 != ch2; |
| 98 | |
| 99 | if(ch1 != ch2) |
| 100 | return 1; |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | #define CONFUSABLES_DATA |
| 105 | #include "confusables_data.h" |
| 106 | #undef CONFUSABLES_DATA |
| 107 | |