1 | /* Test c8rtomb. |
2 | Copyright (C) 2022-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <errno.h> |
20 | #include <limits.h> |
21 | #include <locale.h> |
22 | #include <stdio.h> |
23 | #include <stdlib.h> |
24 | #include <string.h> |
25 | #include <uchar.h> |
26 | #include <wchar.h> |
27 | #include <support/check.h> |
28 | #include <support/support.h> |
29 | |
30 | static int |
31 | test_truncated_code_unit_sequence (void) |
32 | { |
33 | /* Missing trailing code unit for a two code byte unit sequence. */ |
34 | { |
35 | const char8_t *u8s = (const char8_t*) u8"\xC2" ; |
36 | char buf[MB_LEN_MAX] = { 0 }; |
37 | mbstate_t s = { 0 }; |
38 | |
39 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
40 | errno = 0; |
41 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
42 | TEST_COMPARE (errno, EILSEQ); |
43 | } |
44 | |
45 | /* Missing first trailing code unit for a three byte code unit sequence. */ |
46 | { |
47 | const char8_t *u8s = (const char8_t*) u8"\xE0" ; |
48 | char buf[MB_LEN_MAX] = { 0 }; |
49 | mbstate_t s = { 0 }; |
50 | |
51 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
52 | errno = 0; |
53 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
54 | TEST_COMPARE (errno, EILSEQ); |
55 | } |
56 | |
57 | /* Missing second trailing code unit for a three byte code unit sequence. */ |
58 | { |
59 | const char8_t *u8s = (const char8_t*) u8"\xE0\xA0" ; |
60 | char buf[MB_LEN_MAX] = { 0 }; |
61 | mbstate_t s = { 0 }; |
62 | |
63 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
64 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
65 | errno = 0; |
66 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1); |
67 | TEST_COMPARE (errno, EILSEQ); |
68 | } |
69 | |
70 | /* Missing first trailing code unit for a four byte code unit sequence. */ |
71 | { |
72 | const char8_t *u8s = (const char8_t*) u8"\xF0" ; |
73 | char buf[MB_LEN_MAX] = { 0 }; |
74 | mbstate_t s = { 0 }; |
75 | |
76 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
77 | errno = 0; |
78 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
79 | TEST_COMPARE (errno, EILSEQ); |
80 | } |
81 | |
82 | /* Missing second trailing code unit for a four byte code unit sequence. */ |
83 | { |
84 | const char8_t *u8s = (const char8_t*) u8"\xF0\x90" ; |
85 | char buf[MB_LEN_MAX] = { 0 }; |
86 | mbstate_t s = { 0 }; |
87 | |
88 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
89 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
90 | errno = 0; |
91 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1); |
92 | TEST_COMPARE (errno, EILSEQ); |
93 | } |
94 | |
95 | /* Missing third trailing code unit for a four byte code unit sequence. */ |
96 | { |
97 | const char8_t *u8s = (const char8_t*) u8"\xF0\x90\x80" ; |
98 | char buf[MB_LEN_MAX] = { 0 }; |
99 | mbstate_t s = { 0 }; |
100 | |
101 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
102 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
103 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); |
104 | errno = 0; |
105 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) -1); |
106 | TEST_COMPARE (errno, EILSEQ); |
107 | } |
108 | |
109 | return 0; |
110 | } |
111 | |
112 | static int |
113 | test_invalid_trailing_code_unit_sequence (void) |
114 | { |
115 | /* Invalid trailing code unit for a two code byte unit sequence. */ |
116 | { |
117 | const char8_t *u8s = (const char8_t*) u8"\xC2\xC0" ; |
118 | char buf[MB_LEN_MAX] = { 0 }; |
119 | mbstate_t s = { 0 }; |
120 | |
121 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
122 | errno = 0; |
123 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
124 | TEST_COMPARE (errno, EILSEQ); |
125 | } |
126 | |
127 | /* Invalid first trailing code unit for a three byte code unit sequence. */ |
128 | { |
129 | const char8_t *u8s = (const char8_t*) u8"\xE0\xC0" ; |
130 | char buf[MB_LEN_MAX] = { 0 }; |
131 | mbstate_t s = { 0 }; |
132 | |
133 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
134 | errno = 0; |
135 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
136 | TEST_COMPARE (errno, EILSEQ); |
137 | } |
138 | |
139 | /* Invalid second trailing code unit for a three byte code unit sequence. */ |
140 | { |
141 | const char8_t *u8s = (const char8_t*) u8"\xE0\xA0\xC0" ; |
142 | char buf[MB_LEN_MAX] = { 0 }; |
143 | mbstate_t s = { 0 }; |
144 | |
145 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
146 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
147 | errno = 0; |
148 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1); |
149 | TEST_COMPARE (errno, EILSEQ); |
150 | } |
151 | |
152 | /* Invalid first trailing code unit for a four byte code unit sequence. */ |
153 | { |
154 | const char8_t *u8s = (const char8_t*) u8"\xF0\xC0" ; |
155 | char buf[MB_LEN_MAX] = { 0 }; |
156 | mbstate_t s = { 0 }; |
157 | |
158 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
159 | errno = 0; |
160 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
161 | TEST_COMPARE (errno, EILSEQ); |
162 | } |
163 | |
164 | /* Invalid second trailing code unit for a four byte code unit sequence. */ |
165 | { |
166 | const char8_t *u8s = (const char8_t*) u8"\xF0\x90\xC0" ; |
167 | char buf[MB_LEN_MAX] = { 0 }; |
168 | mbstate_t s = { 0 }; |
169 | |
170 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
171 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
172 | errno = 0; |
173 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1); |
174 | TEST_COMPARE (errno, EILSEQ); |
175 | } |
176 | |
177 | /* Invalid third trailing code unit for a four byte code unit sequence. */ |
178 | { |
179 | const char8_t *u8s = (const char8_t*) u8"\xF0\x90\x80\xC0" ; |
180 | char buf[MB_LEN_MAX] = { 0 }; |
181 | mbstate_t s = { 0 }; |
182 | |
183 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
184 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
185 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); |
186 | errno = 0; |
187 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) -1); |
188 | TEST_COMPARE (errno, EILSEQ); |
189 | } |
190 | |
191 | return 0; |
192 | } |
193 | |
194 | static int |
195 | test_lone_trailing_code_units (void) |
196 | { |
197 | /* Lone trailing code unit. */ |
198 | const char8_t *u8s = (const char8_t*) u8"\x80" ; |
199 | char buf[MB_LEN_MAX] = { 0 }; |
200 | mbstate_t s = { 0 }; |
201 | |
202 | errno = 0; |
203 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1); |
204 | TEST_COMPARE (errno, EILSEQ); |
205 | |
206 | return 0; |
207 | } |
208 | |
209 | static int |
210 | test_overlong_encoding (void) |
211 | { |
212 | /* Two byte overlong encoding. */ |
213 | { |
214 | const char8_t *u8s = (const char8_t*) u8"\xC0\x80" ; |
215 | char buf[MB_LEN_MAX] = { 0 }; |
216 | mbstate_t s = { 0 }; |
217 | |
218 | errno = 0; |
219 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1); |
220 | TEST_COMPARE (errno, EILSEQ); |
221 | } |
222 | |
223 | /* Two byte overlong encoding. */ |
224 | { |
225 | const char8_t *u8s = (const char8_t*) u8"\xC1\x80" ; |
226 | char buf[MB_LEN_MAX] = { 0 }; |
227 | mbstate_t s = { 0 }; |
228 | |
229 | errno = 0; |
230 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1); |
231 | TEST_COMPARE (errno, EILSEQ); |
232 | } |
233 | |
234 | /* Three byte overlong encoding. */ |
235 | { |
236 | const char8_t *u8s = (const char8_t*) u8"\xE0\x9F\xBF" ; |
237 | char buf[MB_LEN_MAX] = { 0 }; |
238 | mbstate_t s = { 0 }; |
239 | |
240 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
241 | errno = 0; |
242 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
243 | TEST_COMPARE (errno, EILSEQ); |
244 | } |
245 | |
246 | /* Four byte overlong encoding. */ |
247 | { |
248 | const char8_t *u8s = (const char8_t*) u8"\xF0\x8F\xBF\xBF" ; |
249 | char buf[MB_LEN_MAX] = { 0 }; |
250 | mbstate_t s = { 0 }; |
251 | |
252 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
253 | errno = 0; |
254 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
255 | TEST_COMPARE (errno, EILSEQ); |
256 | } |
257 | |
258 | return 0; |
259 | } |
260 | |
261 | static int |
262 | test_surrogate_range (void) |
263 | { |
264 | /* Would encode U+D800. */ |
265 | { |
266 | const char8_t *u8s = (const char8_t*) u8"\xED\xA0\x80" ; |
267 | char buf[MB_LEN_MAX] = { 0 }; |
268 | mbstate_t s = { 0 }; |
269 | |
270 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
271 | errno = 0; |
272 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
273 | TEST_COMPARE (errno, EILSEQ); |
274 | } |
275 | |
276 | /* Would encode U+DFFF. */ |
277 | { |
278 | const char8_t *u8s = (const char8_t*) u8"\xED\xBF\xBF" ; |
279 | char buf[MB_LEN_MAX] = { 0 }; |
280 | mbstate_t s = { 0 }; |
281 | |
282 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
283 | errno = 0; |
284 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
285 | TEST_COMPARE (errno, EILSEQ); |
286 | } |
287 | |
288 | return 0; |
289 | } |
290 | |
291 | static int |
292 | test_out_of_range_encoding (void) |
293 | { |
294 | /* Would encode U+00110000. */ |
295 | { |
296 | const char8_t *u8s = (const char8_t*) u8"\xF4\x90\x80\x80" ; |
297 | char buf[MB_LEN_MAX] = { 0 }; |
298 | mbstate_t s = { 0 }; |
299 | |
300 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
301 | errno = 0; |
302 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); |
303 | TEST_COMPARE (errno, EILSEQ); |
304 | } |
305 | |
306 | /* Would encode U+00140000. */ |
307 | { |
308 | const char8_t *u8s = (const char8_t*) u8"\xF5\x90\x80\x80" ; |
309 | char buf[MB_LEN_MAX] = { 0 }; |
310 | mbstate_t s = { 0 }; |
311 | |
312 | errno = 0; |
313 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1); |
314 | TEST_COMPARE (errno, EILSEQ); |
315 | } |
316 | |
317 | return 0; |
318 | } |
319 | |
320 | static int |
321 | test_null_output_buffer (void) |
322 | { |
323 | /* Null character with an initial state. */ |
324 | { |
325 | mbstate_t s = { 0 }; |
326 | |
327 | TEST_COMPARE (c8rtomb (NULL, u8"X" [0], &s), (size_t) 1); |
328 | /* Assert the state is now an initial state. */ |
329 | TEST_VERIFY (mbsinit (&s)); |
330 | } |
331 | |
332 | /* Null buffer with a state corresponding to an incompletely read code |
333 | unit sequence. In this case, an error occurs since insufficient |
334 | information is available to complete the already started code unit |
335 | sequence and return to the initial state. */ |
336 | { |
337 | char buf[MB_LEN_MAX] = { 0 }; |
338 | mbstate_t s = { 0 }; |
339 | |
340 | TEST_COMPARE (c8rtomb (buf, u8"\xC2" [0], &s), (size_t) 0); |
341 | errno = 0; |
342 | TEST_COMPARE (c8rtomb (NULL, u8"\x80" [0], &s), (size_t) -1); |
343 | TEST_COMPARE (errno, EILSEQ); |
344 | } |
345 | |
346 | return 0; |
347 | } |
348 | |
349 | static int |
350 | test_utf8 (void) |
351 | { |
352 | xsetlocale (LC_ALL, locale: "de_DE.UTF-8" ); |
353 | |
354 | /* Null character. */ |
355 | { |
356 | /* U+0000 => 0x00 */ |
357 | const char8_t *u8s = (const char8_t*) u8"\x00" ; |
358 | char buf[MB_LEN_MAX] = { 0 }; |
359 | mbstate_t s = { 0 }; |
360 | |
361 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 1); |
362 | TEST_COMPARE (buf[0], (char) 0x00); |
363 | TEST_VERIFY (mbsinit (&s)); |
364 | } |
365 | |
366 | /* First non-null character in the code point range that maps to a single |
367 | code unit. */ |
368 | { |
369 | /* U+0001 => 0x01 */ |
370 | const char8_t *u8s = (const char8_t*) u8"\x01" ; |
371 | char buf[MB_LEN_MAX] = { 0 }; |
372 | mbstate_t s = { 0 }; |
373 | |
374 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 1); |
375 | TEST_COMPARE (buf[0], (char) 0x01); |
376 | TEST_VERIFY (mbsinit (&s)); |
377 | } |
378 | |
379 | /* Last character in the code point range that maps to a single code unit. */ |
380 | { |
381 | /* U+007F => 0x7F */ |
382 | const char8_t *u8s = (const char8_t*) u8"\x7F" ; |
383 | char buf[MB_LEN_MAX] = { 0 }; |
384 | mbstate_t s = { 0 }; |
385 | |
386 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 1); |
387 | TEST_COMPARE (buf[0], (char) 0x7F); |
388 | TEST_VERIFY (mbsinit (&s)); |
389 | } |
390 | |
391 | /* First character in the code point range that maps to two code units. */ |
392 | { |
393 | /* U+0080 => 0xC2 0x80 */ |
394 | const char8_t *u8s = (const char8_t*) u8"\xC2\x80" ; |
395 | char buf[MB_LEN_MAX] = { 0 }; |
396 | mbstate_t s = { 0 }; |
397 | |
398 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
399 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 2); |
400 | TEST_COMPARE (buf[0], (char) 0xC2); |
401 | TEST_COMPARE (buf[1], (char) 0x80); |
402 | TEST_VERIFY (mbsinit (&s)); |
403 | } |
404 | |
405 | /* Last character in the code point range that maps to two code units. */ |
406 | { |
407 | /* U+07FF => 0xDF 0xBF */ |
408 | const char8_t *u8s = (const char8_t*) u8"\u07FF" ; |
409 | char buf[MB_LEN_MAX] = { 0 }; |
410 | mbstate_t s = { 0 }; |
411 | |
412 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
413 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 2); |
414 | TEST_COMPARE (buf[0], (char) 0xDF); |
415 | TEST_COMPARE (buf[1], (char) 0xBF); |
416 | TEST_VERIFY (mbsinit (&s)); |
417 | } |
418 | |
419 | /* First character in the code point range that maps to three code units. */ |
420 | { |
421 | /* U+0800 => 0xE0 0xA0 0x80 */ |
422 | const char8_t *u8s = (const char8_t*) u8"\u0800" ; |
423 | char buf[MB_LEN_MAX] = { 0 }; |
424 | mbstate_t s = { 0 }; |
425 | |
426 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
427 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
428 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); |
429 | TEST_COMPARE (buf[0], (char) 0xE0); |
430 | TEST_COMPARE (buf[1], (char) 0xA0); |
431 | TEST_COMPARE (buf[2], (char) 0x80); |
432 | TEST_VERIFY (mbsinit (&s)); |
433 | } |
434 | |
435 | /* Last character in the code point range that maps to three code units |
436 | before the surrogate code point range. */ |
437 | { |
438 | /* U+D7FF => 0xED 0x9F 0xBF */ |
439 | const char8_t *u8s = (const char8_t*) u8"\uD7FF" ; |
440 | char buf[MB_LEN_MAX] = { 0 }; |
441 | mbstate_t s = { 0 }; |
442 | |
443 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
444 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
445 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); |
446 | TEST_COMPARE (buf[0], (char) 0xED); |
447 | TEST_COMPARE (buf[1], (char) 0x9F); |
448 | TEST_COMPARE (buf[2], (char) 0xBF); |
449 | TEST_VERIFY (mbsinit (&s)); |
450 | } |
451 | |
452 | /* First character in the code point range that maps to three code units |
453 | after the surrogate code point range. */ |
454 | { |
455 | /* U+E000 => 0xEE 0x80 0x80 */ |
456 | const char8_t *u8s = (const char8_t*) u8"\uE000" ; |
457 | char buf[MB_LEN_MAX] = { 0 }; |
458 | mbstate_t s = { 0 }; |
459 | |
460 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
461 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
462 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); |
463 | TEST_COMPARE (buf[0], (char) 0xEE); |
464 | TEST_COMPARE (buf[1], (char) 0x80); |
465 | TEST_COMPARE (buf[2], (char) 0x80); |
466 | TEST_VERIFY (mbsinit (&s)); |
467 | } |
468 | |
469 | /* Not a BOM. */ |
470 | { |
471 | /* U+FEFF => 0xEF 0xBB 0xBF */ |
472 | const char8_t *u8s = (const char8_t*) u8"\uFEFF" ; |
473 | char buf[MB_LEN_MAX] = { 0 }; |
474 | mbstate_t s = { 0 }; |
475 | |
476 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
477 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
478 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); |
479 | TEST_COMPARE (buf[0], (char) 0xEF); |
480 | TEST_COMPARE (buf[1], (char) 0xBB); |
481 | TEST_COMPARE (buf[2], (char) 0xBF); |
482 | TEST_VERIFY (mbsinit (&s)); |
483 | } |
484 | |
485 | /* Replacement character. */ |
486 | { |
487 | /* U+FFFD => 0xEF 0xBF 0xBD */ |
488 | const char8_t *u8s = (const char8_t*) u8"\uFFFD" ; |
489 | char buf[MB_LEN_MAX] = { 0 }; |
490 | mbstate_t s = { 0 }; |
491 | |
492 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
493 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
494 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); |
495 | TEST_COMPARE (buf[0], (char) 0xEF); |
496 | TEST_COMPARE (buf[1], (char) 0xBF); |
497 | TEST_COMPARE (buf[2], (char) 0xBD); |
498 | TEST_VERIFY (mbsinit (&s)); |
499 | } |
500 | |
501 | /* Last character in the code point range that maps to three code units. */ |
502 | { |
503 | /* U+FFFF => 0xEF 0xBF 0xBF */ |
504 | const char8_t *u8s = (const char8_t*) u8"\uFFFF" ; |
505 | char buf[MB_LEN_MAX] = { 0 }; |
506 | mbstate_t s = { 0 }; |
507 | |
508 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
509 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
510 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); |
511 | TEST_COMPARE (buf[0], (char) 0xEF); |
512 | TEST_COMPARE (buf[1], (char) 0xBF); |
513 | TEST_COMPARE (buf[2], (char) 0xBF); |
514 | TEST_VERIFY (mbsinit (&s)); |
515 | } |
516 | |
517 | /* First character in the code point range that maps to four code units. */ |
518 | { |
519 | /* U+10000 => 0xF0 0x90 0x80 0x80 */ |
520 | const char8_t *u8s = (const char8_t*) u8"\U00010000" ; |
521 | char buf[MB_LEN_MAX] = { 0 }; |
522 | mbstate_t s = { 0 }; |
523 | |
524 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
525 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
526 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); |
527 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 4); |
528 | TEST_COMPARE (buf[0], (char) 0xF0); |
529 | TEST_COMPARE (buf[1], (char) 0x90); |
530 | TEST_COMPARE (buf[2], (char) 0x80); |
531 | TEST_COMPARE (buf[3], (char) 0x80); |
532 | TEST_VERIFY (mbsinit (&s)); |
533 | } |
534 | |
535 | /* Last character in the code point range that maps to four code units. */ |
536 | { |
537 | /* U+10FFFF => 0xF4 0x8F 0xBF 0xBF */ |
538 | const char8_t *u8s = (const char8_t*) u8"\U0010FFFF" ; |
539 | char buf[MB_LEN_MAX] = { 0 }; |
540 | mbstate_t s = { 0 }; |
541 | |
542 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
543 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
544 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); |
545 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 4); |
546 | TEST_COMPARE (buf[0], (char) 0xF4); |
547 | TEST_COMPARE (buf[1], (char) 0x8F); |
548 | TEST_COMPARE (buf[2], (char) 0xBF); |
549 | TEST_COMPARE (buf[3], (char) 0xBF); |
550 | TEST_VERIFY (mbsinit (&s)); |
551 | } |
552 | |
553 | return 0; |
554 | } |
555 | |
556 | static int |
557 | test_big5_hkscs (void) |
558 | { |
559 | xsetlocale (LC_ALL, locale: "zh_HK.BIG5-HKSCS" ); |
560 | |
561 | /* A pair of two byte UTF-8 code unit sequences that map a Unicode code |
562 | point and combining character to a single double byte character. */ |
563 | { |
564 | /* U+00CA U+0304 => 0x88 0x62 */ |
565 | const char8_t *u8s = (const char8_t*) u8"\u00CA\u0304" ; |
566 | char buf[MB_LEN_MAX] = { 0 }; |
567 | mbstate_t s = { 0 }; |
568 | |
569 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
570 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
571 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); |
572 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 2); |
573 | TEST_COMPARE (buf[0], (char) 0x88); |
574 | TEST_COMPARE (buf[1], (char) 0x62); |
575 | TEST_VERIFY (mbsinit (&s)); |
576 | } |
577 | |
578 | /* Another pair of two byte UTF-8 code unit sequences that map a Unicode code |
579 | point and combining character to a single double byte character. */ |
580 | { |
581 | /* U+00EA U+030C => 0x88 0xA5 */ |
582 | const char8_t *u8s = (const char8_t*) u8"\u00EA\u030C" ; |
583 | char buf[MB_LEN_MAX] = { 0 }; |
584 | mbstate_t s = { 0 }; |
585 | |
586 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); |
587 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); |
588 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); |
589 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 2); |
590 | TEST_COMPARE (buf[0], (char) 0x88); |
591 | TEST_COMPARE (buf[1], (char) 0xA5); |
592 | TEST_VERIFY (mbsinit (&s)); |
593 | } |
594 | |
595 | return 0; |
596 | } |
597 | |
598 | static int |
599 | do_test (void) |
600 | { |
601 | test_truncated_code_unit_sequence (); |
602 | test_invalid_trailing_code_unit_sequence (); |
603 | test_lone_trailing_code_units (); |
604 | test_overlong_encoding (); |
605 | test_surrogate_range (); |
606 | test_out_of_range_encoding (); |
607 | test_null_output_buffer (); |
608 | test_utf8 (); |
609 | test_big5_hkscs (); |
610 | return 0; |
611 | } |
612 | |
613 | #include <support/test-driver.c> |
614 | |