1#ifndef NU_UTF8_INTERNAL_H
2#define NU_UTF8_INTERNAL_H
3
4#include <sys/types.h>
5
6static inline
7unsigned utf8_char_length(const char c) {
8 const unsigned char uc = c;
9
10 if ((uc & 0x80) == 0) return 1;
11 if ((uc & 0xE0) == 0xC0) return 2;
12 if ((uc & 0xF0) == 0xE0) return 3;
13 if ((uc & 0xF8) == 0xF0) return 4;
14
15 return 0; /* undefined */
16}
17
18static inline
19void utf8_2b(const char *p, uint32_t *codepoint) {
20 const unsigned char *up = (const unsigned char *)(p);
21
22 /* UTF-8: 110xxxxx 10xxxxxx
23 * |__ 1st unicode octet
24 * 110xxx00 << 6 -> 00000xxx 00000000 |
25 * --------
26 * 110000xx << 6 -> 00000xxx xx000000 |__ 2nd unicode octet
27 * 10xxxxxx -> 00000xxx xxxxxxxx |
28 * -------- */
29 *codepoint = (*(up) & 0x1C) << 6
30 | ((*(up) & 0x03) << 6 | (*(up + 1) & 0x3F));
31}
32
33static inline
34void utf8_3b(const char *p, uint32_t *codepoint) {
35 const unsigned char *up = (const unsigned char *)(p);
36
37 /* UTF-8: 1110xxxx 10xxxxxx 10xxxxxx
38 *
39 * 1110xxxx << 12 -> xxxx0000 0000000 |__ 1st unicode octet
40 * 10xxxx00 << 6 -> xxxxxxxx 0000000 |
41 * --------
42 * 100000xx << 6 -> xxxxxxxx xx00000 |__ 2nd unicode octet
43 * 10xxxxxx -> xxxxxxxx xxxxxxx |
44 * ------- */
45 *codepoint =
46 ((*(up) & 0x0F) << 12 | (*(up + 1) & 0x3C) << 6)
47 | ((*(up + 1) & 0x03) << 6 | (*(up + 2) & 0x3F));
48}
49
50static inline
51void utf8_4b(const char *p, uint32_t *codepoint) {
52 const unsigned char *up = (const unsigned char *)(p);
53
54 /* UTF-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
55 *
56 * 11110xxx << 18 -> 00xxx00 00000000 00000000 |__ 1st unicode octet
57 * 10xx0000 << 12 -> 00xxxxx 00000000 00000000 |
58 * -------
59 * 1000xxxx << 12 -> 00xxxxx xxxx0000 00000000 |__ 2nd unicode octet
60 * 10xxxx00 << 6 -> 00xxxxx xxxxxxxx 00000000 |
61 * --------
62 * 100000xx << 6 -> 00xxxxx xxxxxxxx xx000000 |__ 3rd unicode octet
63 * 10xxxxxx -> 00xxxxx xxxxxxxx xxxxxxxx |
64 * --------- */
65 *codepoint =
66 ((*(up) & 0x07) << 18 | (*(up + 1) & 0x30) << 12)
67 | ((*(up + 1) & 0x0F) << 12 | (*(up + 2) & 0x3C) << 6)
68 | ((*(up + 2) & 0x03) << 6 | (*(up + 3) & 0x3F));
69}
70
71static inline
72unsigned utf8_codepoint_length(uint32_t codepoint) {
73 if (codepoint < 128) return 1;
74 if (codepoint < 0x0800) return 2;
75 if (codepoint < 0x10000) return 3;
76
77 return 4; /* de facto max length in UTF-8 */
78}
79
80static inline
81void b2_utf8(uint32_t codepoint, char *p) {
82 unsigned char *up = (unsigned char *)(p);
83
84 /* UNICODE: 00000xxx xxxxxxxx
85 *
86 * 00000xxx >> 6 -> 110xxx00 10000000 |__ 1st UTF-8 octet
87 * xxxxxxxx >> 6 -> 110xxxxx 10000000 |
88 * --------
89 * |__ 2nd UTF-8 octet
90 * xxxxxxxx -> 110xxxxx 10xxxxxx |
91 * -------- */
92 *(up) = (0xC0 | (codepoint & 0xFF00) >> 6 | (codepoint & 0xFF) >> 6);
93 *(up + 1) = (0x80 | (codepoint & 0x3F));
94}
95
96static inline
97void b3_utf8(uint32_t codepoint, char *p) {
98 unsigned char *up = (unsigned char *)(p);
99
100 /* UNICODE: xxxxxxxx xxxxxxxx
101 * |__ 1st UTF-8 octet
102 * xxxxxxxx >> 12 -> 1110xxxx 10000000 10000000 |
103 * --------
104 * xxxxxxxx >> 6 -> 1110xxxx 10xxxx00 10000000 |__ 2nd UTF-8 octet
105 * xxxxxxxx >> 6 -> 1110xxxx 10xxxxxx 10000000 |
106 * --------
107 * |__ 3rd UTF-8 octet
108 * xxxxxxxx -> 1110xxxx 10xxxxxx 10xxxxxx |
109 * -------- */
110 *(up) = (0xE0 | (codepoint & 0xF000) >> 12);
111 *(up + 1) = (0x80 | (codepoint & 0x0F00) >> 6 | (codepoint & 0xC0) >> 6);
112 *(up + 2) = (0x80 | (codepoint & 0x3F));
113}
114
115static inline
116void b4_utf8(uint32_t codepoint, char *p) {
117 unsigned char *up = (unsigned char *)(p);
118
119 /* UNICODE: 000xxxxx xxxxxxxx xxxxxxxx
120 * |__ 1st UTF-8 octet
121 * 000xxxxx >> 18 -> 11110xxx 1000000 10000000 10000000 |
122 * --------
123 * 000xxxxx >> 12 -> 11110xxx 10xx000 10000000 10000000 |__ 2nd UTF-8 octet
124 * xxxxxxxx >> 12 -> 11110xxx 10xxxxx 10000000 10000000 |
125 * -------
126 * xxxxxxxx >> 6 -> 11110xxx 10xxxxx 10xxxxx0 10000000 |__ 3rd UTF-8 octet
127 * xxxxxxxx >> 6 -> 11110xxx 10xxxxx 10xxxxxx 10000000 |
128 * --------
129 * |__ 4th UTF-8 octet
130 * xxxxxxxx -> 11110xxx 10xxxxx 10xxxxxx 10000000 | */
131 *(up) = (0xF0 | ((codepoint & 0x1C0000) >> 18));
132 *(up + 1) = (0x80 | (codepoint & 0x030000) >> 12 | (codepoint & 0x00E000) >> 12);
133 *(up + 2) = (0x80 | (codepoint & 0x001F00) >> 6 | (codepoint & 0x0000E0) >> 6);
134 *(up + 3) = (0x80 | (codepoint & 0x3F));
135}
136
137static inline
138int utf8_validread_basic(const char *p, size_t max_len) {
139 const unsigned char *up = (const unsigned char *)(p);
140
141 /* it should be 0xxxxxxx or 110xxxxx or 1110xxxx or 11110xxx
142 * latter should be followed by number of 10xxxxxx */
143
144 unsigned len = utf8_char_length(c: *p);
145
146 /* codepoints longer than 6 bytes does not currently exist
147 * and not currently supported
148 * TODO: longer UTF-8 sequences support
149 */
150 if (max_len < len) {
151 return 0;
152 }
153
154 switch (len) {
155 case 1: return 1; /* one byte codepoint */
156 case 2: return ((*(up + 1) & 0xC0) == 0x80 ? 2 : 0);
157 case 3: return ((*(up + 1) & 0xC0) == 0x80
158 && (*(up + 2) & 0xC0) == 0x80 ? 3 : 0);
159
160 case 4: return ((*(up + 1) & 0xC0) == 0x80
161 && (*(up + 2) & 0xC0) == 0x80
162 && (*(up + 3) & 0xC0) == 0x80 ? 4 : 0);
163 }
164
165 return 0;
166}
167
168#endif /* NU_UTF8_INTERNAL_H */
169

source code of qtlocation/src/3rdparty/mapbox-gl-native/vendor/nunicode/include/libnu/utf8_internal.h