1 | /* GLIB - Library of useful routines for C programming |
2 | * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald |
3 | * |
4 | * This library is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU Lesser General Public |
6 | * License as published by the Free Software Foundation; either |
7 | * version 2.1 of the License, or (at your option) any later version. |
8 | * |
9 | * This library is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
16 | */ |
17 | |
18 | /* |
19 | * Modified by the GLib Team and others 1997-2000. See the AUTHORS |
20 | * file for a list of people on the GLib Team. See the ChangeLog |
21 | * files for a list of changes. These files are distributed with |
22 | * GLib at ftp://ftp.gtk.org/pub/gtk/. |
23 | */ |
24 | |
25 | #undef G_DISABLE_ASSERT |
26 | #undef G_LOG_DOMAIN |
27 | |
28 | #include <locale.h> |
29 | #include <string.h> |
30 | |
31 | #include <glib.h> |
32 | |
33 | /* Bug 311337 */ |
34 | static void |
35 | test_iconv_state (void) |
36 | { |
37 | const gchar *in = "\xf4\xe5\xf8\xe5\xed" ; |
38 | const gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d" ; |
39 | gchar *out; |
40 | gsize bytes_read = 0; |
41 | gsize bytes_written = 0; |
42 | GError *error = NULL; |
43 | |
44 | out = g_convert (str: in, len: -1, to_codeset: "UTF-8" , from_codeset: "CP1255" , |
45 | bytes_read: &bytes_read, bytes_written: &bytes_written, error: &error); |
46 | |
47 | if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION) |
48 | return; /* silently skip if CP1255 is not supported, see bug 467707 */ |
49 | |
50 | g_assert_no_error (error); |
51 | g_assert_cmpint (bytes_read, ==, 5); |
52 | g_assert_cmpint (bytes_written, ==, 10); |
53 | g_assert_cmpstr (out, ==, expected); |
54 | g_free (mem: out); |
55 | } |
56 | |
57 | /* Some tests involving "vulgar fraction one half" (U+00BD). This is |
58 | * represented in UTF-8 as \xC2\xBD, in ISO-8859-1 as \xBD, and is not |
59 | * represented in ISO-8859-15. */ |
60 | static void |
61 | test_one_half (void) |
62 | { |
63 | const gchar *in_utf8 = "\xc2\xbd" ; |
64 | gchar *out; |
65 | gsize bytes_read = 0; |
66 | gsize bytes_written = 0; |
67 | GError *error = NULL; |
68 | |
69 | out = g_convert (str: in_utf8, len: -1, |
70 | to_codeset: "ISO-8859-1" , from_codeset: "UTF-8" , |
71 | bytes_read: &bytes_read, bytes_written: &bytes_written, |
72 | error: &error); |
73 | |
74 | g_assert_no_error (error); |
75 | g_assert_cmpint (bytes_read, ==, 2); |
76 | g_assert_cmpint (bytes_written, ==, 1); |
77 | g_assert_cmpstr (out, ==, "\xbd" ); |
78 | g_free (mem: out); |
79 | |
80 | out = g_convert (str: in_utf8, len: -1, |
81 | to_codeset: "ISO-8859-15" , from_codeset: "UTF-8" , |
82 | bytes_read: &bytes_read, bytes_written: &bytes_written, |
83 | error: &error); |
84 | |
85 | g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE); |
86 | g_assert_cmpint (bytes_read, ==, 0); |
87 | g_assert_cmpint (bytes_written, ==, 0); |
88 | g_assert_cmpstr (out, ==, NULL); |
89 | g_clear_error (err: &error); |
90 | g_free (mem: out); |
91 | |
92 | out = g_convert_with_fallback (str: in_utf8, len: -1, |
93 | to_codeset: "ISO8859-15" , from_codeset: "UTF-8" , |
94 | fallback: "a" , |
95 | bytes_read: &bytes_read, bytes_written: &bytes_written, |
96 | error: &error); |
97 | |
98 | g_assert_no_error (error); |
99 | g_assert_cmpint (bytes_read, ==, 2); |
100 | g_assert_cmpint (bytes_written, ==, 1); |
101 | g_assert_cmpstr (out, ==, "a" ); |
102 | g_free (mem: out); |
103 | } |
104 | |
105 | static void |
106 | test_byte_order (void) |
107 | { |
108 | gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */ |
109 | gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03}; |
110 | const gchar *expected = "\xce\x93" ; |
111 | gchar *out; |
112 | gsize bytes_read = 0; |
113 | gsize bytes_written = 0; |
114 | GError *error = NULL; |
115 | |
116 | out = g_convert (str: in_be, len: sizeof (in_be), |
117 | to_codeset: "UTF-8" , from_codeset: "UTF-16" , |
118 | bytes_read: &bytes_read, bytes_written: &bytes_written, |
119 | error: &error); |
120 | |
121 | g_assert_no_error (error); |
122 | g_assert_cmpint (bytes_read, ==, 4); |
123 | g_assert_cmpint (bytes_written, ==, 2); |
124 | g_assert_cmpstr (out, ==, expected); |
125 | g_free (mem: out); |
126 | |
127 | out = g_convert (str: in_le, len: sizeof (in_le), |
128 | to_codeset: "UTF-8" , from_codeset: "UTF-16" , |
129 | bytes_read: &bytes_read, bytes_written: &bytes_written, |
130 | error: &error); |
131 | |
132 | g_assert_no_error (error); |
133 | g_assert_cmpint (bytes_read, ==, 4); |
134 | g_assert_cmpint (bytes_written, ==, 2); |
135 | g_assert_cmpstr (out, ==, expected); |
136 | g_free (mem: out); |
137 | } |
138 | |
139 | static void |
140 | check_utf8_to_ucs4 (const char *utf8, |
141 | gsize utf8_len, |
142 | const gunichar *ucs4, |
143 | glong ucs4_len, |
144 | glong error_pos) |
145 | { |
146 | gunichar *result, *result2, *result3; |
147 | glong items_read, items_read2; |
148 | glong items_written, items_written2; |
149 | GError *error, *error2, *error3; |
150 | gint i; |
151 | |
152 | if (!error_pos) |
153 | { |
154 | /* check the fast conversion */ |
155 | result = g_utf8_to_ucs4_fast (str: utf8, len: utf8_len, items_written: &items_written); |
156 | |
157 | g_assert_cmpint (items_written, ==, ucs4_len); |
158 | g_assert (result); |
159 | for (i = 0; i <= items_written; i++) |
160 | g_assert (result[i] == ucs4[i]); |
161 | |
162 | g_free (mem: result); |
163 | } |
164 | |
165 | error = NULL; |
166 | result = g_utf8_to_ucs4 (str: utf8, len: utf8_len, items_read: &items_read, items_written: &items_written, error: &error); |
167 | |
168 | if (utf8_len == strlen (s: utf8)) |
169 | { |
170 | /* check that len == -1 yields identical results */ |
171 | error2 = NULL; |
172 | result2 = g_utf8_to_ucs4 (str: utf8, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2); |
173 | g_assert (error || items_read2 == items_read); |
174 | g_assert (error || items_written2 == items_written); |
175 | g_assert_cmpint (!!result, ==, !!result2); |
176 | g_assert_cmpint (!!error, ==, !!error2); |
177 | if (result) |
178 | for (i = 0; i <= items_written; i++) |
179 | g_assert (result[i] == result2[i]); |
180 | |
181 | g_free (mem: result2); |
182 | if (error2) |
183 | g_error_free (error: error2); |
184 | } |
185 | |
186 | error3 = NULL; |
187 | result3 = g_utf8_to_ucs4 (str: utf8, len: utf8_len, NULL, NULL, error: &error3); |
188 | |
189 | if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT) |
190 | { |
191 | g_assert_no_error (error); |
192 | g_assert_cmpint (items_read, ==, error_pos); |
193 | g_assert_cmpint (items_written, ==, ucs4_len); |
194 | g_assert (result); |
195 | for (i = 0; i <= items_written; i++) |
196 | g_assert (result[i] == ucs4[i]); |
197 | g_error_free (error: error3); |
198 | } |
199 | else if (error_pos) |
200 | { |
201 | g_assert (error != NULL); |
202 | g_assert (result == NULL); |
203 | g_assert_cmpint (items_read, ==, error_pos); |
204 | g_error_free (error); |
205 | |
206 | g_assert (error3 != NULL); |
207 | g_assert (result3 == NULL); |
208 | g_error_free (error: error3); |
209 | } |
210 | else |
211 | { |
212 | g_assert_no_error (error); |
213 | g_assert_cmpint (items_read, ==, utf8_len); |
214 | g_assert_cmpint (items_written, ==, ucs4_len); |
215 | g_assert (result); |
216 | for (i = 0; i <= items_written; i++) |
217 | g_assert (result[i] == ucs4[i]); |
218 | |
219 | g_assert_no_error (error3); |
220 | g_assert (result3); |
221 | for (i = 0; i <= ucs4_len; i++) |
222 | g_assert (result3[i] == ucs4[i]); |
223 | } |
224 | |
225 | g_free (mem: result); |
226 | g_free (mem: result3); |
227 | } |
228 | |
229 | static void |
230 | check_ucs4_to_utf8 (const gunichar *ucs4, |
231 | glong ucs4_len, |
232 | const char *utf8, |
233 | glong utf8_len, |
234 | glong error_pos) |
235 | { |
236 | gchar *result, *result2, *result3; |
237 | glong items_read, items_read2; |
238 | glong items_written, items_written2; |
239 | GError *error, *error2, *error3; |
240 | |
241 | error = NULL; |
242 | result = g_ucs4_to_utf8 (str: ucs4, len: ucs4_len, items_read: &items_read, items_written: &items_written, error: &error); |
243 | |
244 | if (ucs4[ucs4_len] == 0) |
245 | { |
246 | /* check that len == -1 yields identical results */ |
247 | error2 = NULL; |
248 | result2 = g_ucs4_to_utf8 (str: ucs4, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2); |
249 | |
250 | g_assert (error || items_read2 == items_read); |
251 | g_assert (error || items_written2 == items_written); |
252 | g_assert_cmpint (!!result, ==, !!result2); |
253 | g_assert_cmpint (!!error, ==, !!error2); |
254 | if (result) |
255 | g_assert_cmpstr (result, ==, result2); |
256 | |
257 | g_free (mem: result2); |
258 | if (error2) |
259 | g_error_free (error: error2); |
260 | } |
261 | |
262 | error3 = NULL; |
263 | result3 = g_ucs4_to_utf8 (str: ucs4, len: ucs4_len, NULL, NULL, error: &error3); |
264 | |
265 | if (error_pos) |
266 | { |
267 | g_assert (error != NULL); |
268 | g_assert (result == NULL); |
269 | g_assert_cmpint (items_read, ==, error_pos); |
270 | g_error_free (error); |
271 | |
272 | g_assert (error3 != NULL); |
273 | g_assert (result3 == NULL); |
274 | g_error_free (error: error3); |
275 | } |
276 | else |
277 | { |
278 | g_assert_no_error (error); |
279 | g_assert_cmpint (items_read, ==, ucs4_len); |
280 | g_assert_cmpint (items_written, ==, utf8_len); |
281 | g_assert (result); |
282 | g_assert_cmpstr (result, ==, utf8); |
283 | |
284 | g_assert_no_error (error3); |
285 | g_assert (result3); |
286 | g_assert_cmpstr (result3, ==, utf8); |
287 | } |
288 | |
289 | g_free (mem: result); |
290 | g_free (mem: result3); |
291 | } |
292 | |
293 | static void |
294 | check_utf8_to_utf16 (const char *utf8, |
295 | gsize utf8_len, |
296 | const gunichar2 *utf16, |
297 | glong utf16_len, |
298 | glong error_pos) |
299 | { |
300 | gunichar2 *result, *result2, *result3; |
301 | glong items_read, items_read2; |
302 | glong items_written, items_written2; |
303 | GError *error, *error2, *error3; |
304 | gint i; |
305 | |
306 | error = NULL; |
307 | result = g_utf8_to_utf16 (str: utf8, len: utf8_len, items_read: &items_read, items_written: &items_written, error: &error); |
308 | |
309 | if (utf8_len == strlen (s: utf8)) |
310 | { |
311 | /* check that len == -1 yields identical results */ |
312 | error2 = NULL; |
313 | result2 = g_utf8_to_utf16 (str: utf8, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2); |
314 | g_assert (error || items_read2 == items_read); |
315 | g_assert (error || items_written2 == items_written); |
316 | g_assert_cmpint (!!result, ==, !!result2); |
317 | g_assert_cmpint (!!error, ==, !!error2); |
318 | if (result) |
319 | for (i = 0; i <= items_written; i++) |
320 | g_assert (result[i] == result2[i]); |
321 | |
322 | g_free (mem: result2); |
323 | if (error2) |
324 | g_error_free (error: error2); |
325 | } |
326 | |
327 | error3 = NULL; |
328 | result3 = g_utf8_to_utf16 (str: utf8, len: utf8_len, NULL, NULL, error: &error3); |
329 | |
330 | if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT) |
331 | { |
332 | g_assert_no_error (error); |
333 | g_assert_cmpint (items_read, ==, error_pos); |
334 | g_assert_cmpint (items_written, ==, utf16_len); |
335 | g_assert (result); |
336 | for (i = 0; i <= items_written; i++) |
337 | g_assert (result[i] == utf16[i]); |
338 | g_error_free (error: error3); |
339 | } |
340 | else if (error_pos) |
341 | { |
342 | g_assert (error != NULL); |
343 | g_assert (result == NULL); |
344 | g_assert_cmpint (items_read, ==, error_pos); |
345 | g_error_free (error); |
346 | |
347 | g_assert (error3 != NULL); |
348 | g_assert (result3 == NULL); |
349 | g_error_free (error: error3); |
350 | } |
351 | else |
352 | { |
353 | g_assert_no_error (error); |
354 | g_assert_cmpint (items_read, ==, utf8_len); |
355 | g_assert_cmpint (items_written, ==, utf16_len); |
356 | g_assert (result); |
357 | for (i = 0; i <= items_written; i++) |
358 | g_assert (result[i] == utf16[i]); |
359 | |
360 | g_assert_no_error (error3); |
361 | g_assert (result3); |
362 | for (i = 0; i <= utf16_len; i++) |
363 | g_assert (result3[i] == utf16[i]); |
364 | } |
365 | |
366 | g_free (mem: result); |
367 | g_free (mem: result3); |
368 | } |
369 | |
370 | static void |
371 | check_utf16_to_utf8 (const gunichar2 *utf16, |
372 | glong utf16_len, |
373 | const char *utf8, |
374 | glong utf8_len, |
375 | glong error_pos) |
376 | { |
377 | gchar *result, *result2, *result3; |
378 | glong items_read, items_read2; |
379 | glong items_written, items_written2; |
380 | GError *error, *error2, *error3; |
381 | |
382 | error = NULL; |
383 | result = g_utf16_to_utf8 (str: utf16, len: utf16_len, items_read: &items_read, items_written: &items_written, error: &error); |
384 | if (utf16[utf16_len] == 0) |
385 | { |
386 | /* check that len == -1 yields identical results */ |
387 | error2 = NULL; |
388 | result2 = g_utf16_to_utf8 (str: utf16, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2); |
389 | |
390 | g_assert (error || items_read2 == items_read); |
391 | g_assert (error || items_written2 == items_written); |
392 | g_assert_cmpint (!!result, ==, !!result2); |
393 | g_assert_cmpint (!!error, ==, !!error2); |
394 | if (result) |
395 | g_assert_cmpstr (result, ==, result2); |
396 | |
397 | g_free (mem: result2); |
398 | if (error2) |
399 | g_error_free (error: error2); |
400 | } |
401 | |
402 | error3 = NULL; |
403 | result3 = g_utf16_to_utf8 (str: utf16, len: utf16_len, NULL, NULL, error: &error3); |
404 | |
405 | if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT) |
406 | { |
407 | g_assert_no_error (error); |
408 | g_assert_cmpint (items_read, ==, error_pos); |
409 | g_assert_cmpint (items_read + 1, ==, utf16_len); |
410 | g_assert_cmpint (items_written, ==, utf8_len); |
411 | g_assert (result); |
412 | g_assert_cmpstr (result, ==, utf8); |
413 | g_error_free (error: error3); |
414 | } |
415 | else if (error_pos) |
416 | { |
417 | g_assert (error != NULL); |
418 | g_assert (result == NULL); |
419 | g_assert_cmpint (items_read, ==, error_pos); |
420 | g_error_free (error); |
421 | |
422 | g_assert (error3 != NULL); |
423 | g_assert (result3 == NULL); |
424 | g_error_free (error: error3); |
425 | } |
426 | else |
427 | { |
428 | g_assert_no_error (error); |
429 | g_assert_cmpint (items_read, ==, utf16_len); |
430 | g_assert_cmpint (items_written, ==, utf8_len); |
431 | g_assert (result); |
432 | g_assert_cmpstr (result, ==, utf8); |
433 | |
434 | g_assert_no_error (error3); |
435 | g_assert (result3); |
436 | g_assert_cmpstr (result3, ==, utf8); |
437 | } |
438 | |
439 | g_free (mem: result); |
440 | g_free (mem: result3); |
441 | } |
442 | |
443 | static void |
444 | check_ucs4_to_utf16 (const gunichar *ucs4, |
445 | glong ucs4_len, |
446 | const gunichar2 *utf16, |
447 | glong utf16_len, |
448 | glong error_pos) |
449 | { |
450 | gunichar2 *result, *result2, *result3; |
451 | glong items_read, items_read2; |
452 | glong items_written, items_written2; |
453 | GError *error, *error2, *error3; |
454 | gint i; |
455 | |
456 | error = NULL; |
457 | result = g_ucs4_to_utf16 (str: ucs4, len: ucs4_len, items_read: &items_read, items_written: &items_written, error: &error); |
458 | |
459 | if (ucs4[ucs4_len] == 0) |
460 | { |
461 | /* check that len == -1 yields identical results */ |
462 | error2 = NULL; |
463 | result2 = g_ucs4_to_utf16 (str: ucs4, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2); |
464 | |
465 | g_assert (error || items_read2 == items_read); |
466 | g_assert (error || items_written2 == items_written); |
467 | g_assert_cmpint (!!result, ==, !!result2); |
468 | g_assert_cmpint (!!error, ==, !!error2); |
469 | if (result) |
470 | for (i = 0; i <= utf16_len; i++) |
471 | g_assert (result[i] == result2[i]); |
472 | |
473 | g_free (mem: result2); |
474 | if (error2) |
475 | g_error_free (error: error2); |
476 | } |
477 | |
478 | error3 = NULL; |
479 | result3 = g_ucs4_to_utf16 (str: ucs4, len: -1, NULL, NULL, error: &error3); |
480 | |
481 | if (error_pos) |
482 | { |
483 | g_assert (error != NULL); |
484 | g_assert (result == NULL); |
485 | g_assert_cmpint (items_read, ==, error_pos); |
486 | g_error_free (error); |
487 | |
488 | g_assert (error3 != NULL); |
489 | g_assert (result3 == NULL); |
490 | g_error_free (error: error3); |
491 | } |
492 | else |
493 | { |
494 | g_assert_no_error (error); |
495 | g_assert_cmpint (items_read, ==, ucs4_len); |
496 | g_assert_cmpint (items_written, ==, utf16_len); |
497 | g_assert (result); |
498 | for (i = 0; i <= utf16_len; i++) |
499 | g_assert (result[i] == utf16[i]); |
500 | |
501 | g_assert_no_error (error3); |
502 | g_assert (result3); |
503 | for (i = 0; i <= utf16_len; i++) |
504 | g_assert (result3[i] == utf16[i]); |
505 | } |
506 | |
507 | g_free (mem: result); |
508 | g_free (mem: result3); |
509 | } |
510 | |
511 | static void |
512 | check_utf16_to_ucs4 (const gunichar2 *utf16, |
513 | glong utf16_len, |
514 | const gunichar *ucs4, |
515 | glong ucs4_len, |
516 | glong error_pos) |
517 | { |
518 | gunichar *result, *result2, *result3; |
519 | glong items_read, items_read2; |
520 | glong items_written, items_written2; |
521 | GError *error, *error2, *error3; |
522 | gint i; |
523 | |
524 | error = NULL; |
525 | result = g_utf16_to_ucs4 (str: utf16, len: utf16_len, items_read: &items_read, items_written: &items_written, error: &error); |
526 | if (utf16[utf16_len] == 0) |
527 | { |
528 | /* check that len == -1 yields identical results */ |
529 | error2 = NULL; |
530 | result2 = g_utf16_to_ucs4 (str: utf16, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2); |
531 | g_assert (error || items_read2 == items_read); |
532 | g_assert (error || items_written2 == items_written); |
533 | g_assert_cmpint (!!result, ==, !!result2); |
534 | g_assert_cmpint (!!error, ==, !!error2); |
535 | if (result) |
536 | for (i = 0; i <= items_written; i++) |
537 | g_assert (result[i] == result2[i]); |
538 | |
539 | g_free (mem: result2); |
540 | if (error2) |
541 | g_error_free (error: error2); |
542 | } |
543 | |
544 | error3 = NULL; |
545 | result3 = g_utf16_to_ucs4 (str: utf16, len: utf16_len, NULL, NULL, error: &error3); |
546 | |
547 | if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT) |
548 | { |
549 | g_assert_no_error (error); |
550 | g_assert_cmpint (items_read, ==, error_pos); |
551 | g_assert_cmpint (items_read + 1, ==, utf16_len); |
552 | g_assert_cmpint (items_written, ==, ucs4_len); |
553 | g_assert (result); |
554 | for (i = 0; i <= items_written; i++) |
555 | g_assert (result[i] == ucs4[i]); |
556 | g_error_free (error: error3); |
557 | } |
558 | else if (error_pos) |
559 | { |
560 | g_assert (error != NULL); |
561 | g_assert (result == NULL); |
562 | g_assert_cmpint (items_read, ==, error_pos); |
563 | g_error_free (error); |
564 | |
565 | g_assert (error3 != NULL); |
566 | g_assert (result3 == NULL); |
567 | g_error_free (error: error3); |
568 | } |
569 | else |
570 | { |
571 | g_assert_no_error (error); |
572 | g_assert_cmpint (items_read, ==, utf16_len); |
573 | g_assert_cmpint (items_written, ==, ucs4_len); |
574 | g_assert (result); |
575 | for (i = 0; i <= ucs4_len; i++) |
576 | g_assert (result[i] == ucs4[i]); |
577 | |
578 | g_assert_no_error (error3); |
579 | g_assert (result3); |
580 | for (i = 0; i <= ucs4_len; i++) |
581 | g_assert (result3[i] == ucs4[i]); |
582 | } |
583 | |
584 | g_free (mem: result); |
585 | g_free (mem: result3); |
586 | } |
587 | |
588 | static void |
589 | test_unicode_conversions (void) |
590 | { |
591 | const char *utf8; |
592 | gunichar ucs4[100]; |
593 | gunichar2 utf16[100]; |
594 | |
595 | utf8 = "abc" ; |
596 | ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0; |
597 | utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0; |
598 | |
599 | check_utf8_to_ucs4 (utf8, utf8_len: 3, ucs4, ucs4_len: 3, error_pos: 0); |
600 | check_ucs4_to_utf8 (ucs4, ucs4_len: 3, utf8, utf8_len: 3, error_pos: 0); |
601 | check_utf8_to_utf16 (utf8, utf8_len: 3, utf16, utf16_len: 3, error_pos: 0); |
602 | check_utf16_to_utf8 (utf16, utf16_len: 3, utf8, utf8_len: 3, error_pos: 0); |
603 | check_ucs4_to_utf16 (ucs4, ucs4_len: 3, utf16, utf16_len: 3, error_pos: 0); |
604 | check_utf16_to_ucs4 (utf16, utf16_len: 3, ucs4, ucs4_len: 3, error_pos: 0); |
605 | |
606 | utf8 = "\316\261\316\262\316\263" ; |
607 | ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0; |
608 | utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0; |
609 | |
610 | check_utf8_to_ucs4 (utf8, utf8_len: 6, ucs4, ucs4_len: 3, error_pos: 0); |
611 | check_ucs4_to_utf8 (ucs4, ucs4_len: 3, utf8, utf8_len: 6, error_pos: 0); |
612 | check_utf8_to_utf16 (utf8, utf8_len: 6, utf16, utf16_len: 3, error_pos: 0); |
613 | check_utf16_to_utf8 (utf16, utf16_len: 3, utf8, utf8_len: 6, error_pos: 0); |
614 | check_ucs4_to_utf16 (ucs4, ucs4_len: 3, utf16, utf16_len: 3, error_pos: 0); |
615 | check_utf16_to_ucs4 (utf16, utf16_len: 3, ucs4, ucs4_len: 3, error_pos: 0); |
616 | |
617 | /* partial utf8 character */ |
618 | utf8 = "abc\316" ; |
619 | ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0; |
620 | utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0; |
621 | |
622 | check_utf8_to_ucs4 (utf8, utf8_len: 4, ucs4, ucs4_len: 3, error_pos: 3); |
623 | check_utf8_to_utf16 (utf8, utf8_len: 4, utf16, utf16_len: 3, error_pos: 3); |
624 | |
625 | /* invalid utf8 */ |
626 | utf8 = "abc\316\316" ; |
627 | ucs4[0] = 0; |
628 | utf16[0] = 0; |
629 | |
630 | check_utf8_to_ucs4 (utf8, utf8_len: 5, ucs4, ucs4_len: 0, error_pos: 3); |
631 | check_utf8_to_utf16 (utf8, utf8_len: 5, utf16, utf16_len: 0, error_pos: 3); |
632 | |
633 | /* partial utf16 character */ |
634 | utf8 = "ab" ; |
635 | ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0; |
636 | utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0; |
637 | |
638 | check_utf16_to_utf8 (utf16, utf16_len: 3, utf8, utf8_len: 2, error_pos: 2); |
639 | check_utf16_to_ucs4 (utf16, utf16_len: 3, ucs4, ucs4_len: 2, error_pos: 2); |
640 | |
641 | /* invalid utf16 */ |
642 | utf8 = NULL; |
643 | ucs4[0] = 0; |
644 | utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0; |
645 | |
646 | check_utf16_to_utf8 (utf16, utf16_len: 3, utf8, utf8_len: 0, error_pos: 2); |
647 | check_utf16_to_ucs4 (utf16, utf16_len: 3, ucs4, ucs4_len: 0, error_pos: 2); |
648 | |
649 | /* invalid ucs4 */ |
650 | utf8 = NULL; |
651 | ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0; |
652 | utf16[0] = 0; |
653 | |
654 | check_ucs4_to_utf8 (ucs4, ucs4_len: 3, utf8, utf8_len: 0, error_pos: 2); |
655 | check_ucs4_to_utf16 (ucs4, ucs4_len: 3, utf16, utf16_len: 0, error_pos: 2); |
656 | } |
657 | |
658 | static void |
659 | test_filename_utf8 (void) |
660 | { |
661 | const gchar *filename = "/my/path/to/foo" ; |
662 | gchar *utf8; |
663 | gchar *back; |
664 | GError *error; |
665 | |
666 | error = NULL; |
667 | utf8 = g_filename_to_utf8 (opsysstring: filename, len: -1, NULL, NULL, error: &error); |
668 | g_assert_no_error (error); |
669 | back = g_filename_from_utf8 (utf8string: utf8, len: -1, NULL, NULL, error: &error); |
670 | g_assert_no_error (error); |
671 | g_assert_cmpstr (back, ==, filename); |
672 | |
673 | g_free (mem: utf8); |
674 | g_free (mem: back); |
675 | } |
676 | |
677 | static void |
678 | test_filename_display (void) |
679 | { |
680 | const gchar *filename = "/my/path/to/foo" ; |
681 | char *display; |
682 | |
683 | display = g_filename_display_basename (filename); |
684 | g_assert_cmpstr (display, ==, "foo" ); |
685 | |
686 | g_free (mem: display); |
687 | } |
688 | |
689 | /* g_convert() should accept and produce text buffers with embedded |
690 | * nul bytes/characters. |
691 | */ |
692 | static void |
693 | test_convert_embedded_nul (void) |
694 | { |
695 | gchar *res; |
696 | gsize bytes_read, bytes_written; |
697 | GError *error = NULL; |
698 | |
699 | res = g_convert (str: "ab\0\xf6" , len: 4, to_codeset: "UTF-8" , from_codeset: "ISO-8859-1" , |
700 | bytes_read: &bytes_read, bytes_written: &bytes_written, error: &error); |
701 | g_assert_no_error (error); |
702 | g_assert_cmpuint (bytes_read, ==, 4); |
703 | g_assert_cmpmem (res, bytes_written, "ab\0\xc3\xb6" , 5); |
704 | g_free (mem: res); |
705 | } |
706 | |
707 | static void |
708 | test_locale_to_utf8_embedded_nul (void) |
709 | { |
710 | g_test_trap_subprocess (test_path: "/conversion/locale-to-utf8/embedded-nul/subprocess/utf8" , usec_timeout: 0, test_flags: 0); |
711 | g_test_trap_assert_passed (); |
712 | g_test_trap_subprocess (test_path: "/conversion/locale-to-utf8/embedded-nul/subprocess/iconv" , usec_timeout: 0, test_flags: 0); |
713 | g_test_trap_assert_passed (); |
714 | } |
715 | |
716 | /* Test that embedded nul characters in UTF-8 input to g_locale_to_utf8() |
717 | * result in an error. |
718 | */ |
719 | static void |
720 | test_locale_to_utf8_embedded_nul_utf8 (void) |
721 | { |
722 | gchar *res; |
723 | gsize bytes_read; |
724 | GError *error = NULL; |
725 | |
726 | setlocale (LC_ALL, locale: "" ); |
727 | g_setenv (variable: "CHARSET" , value: "UTF-8" , TRUE); |
728 | g_assert_true (g_get_charset (NULL)); |
729 | |
730 | res = g_locale_to_utf8 (opsysstring: "ab\0c" , len: 4, bytes_read: &bytes_read, NULL, error: &error); |
731 | |
732 | g_assert_null (res); |
733 | g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE); |
734 | g_assert_cmpuint (bytes_read, ==, 2); |
735 | g_error_free (error); |
736 | } |
737 | |
738 | /* Test that embedded nul characters in output of g_locale_to_utf8(), |
739 | * when converted from non-UTF8 input, result in an error. |
740 | */ |
741 | static void |
742 | test_locale_to_utf8_embedded_nul_iconv (void) |
743 | { |
744 | gchar *res; |
745 | GError *error = NULL; |
746 | |
747 | setlocale (LC_ALL, locale: "C" ); |
748 | g_setenv (variable: "CHARSET" , value: "US-ASCII" , TRUE); |
749 | g_assert_false (g_get_charset (NULL)); |
750 | |
751 | res = g_locale_to_utf8 (opsysstring: "ab\0c" , len: 4, NULL, NULL, error: &error); |
752 | |
753 | g_assert_null (res); |
754 | g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL); |
755 | g_error_free (error); |
756 | } |
757 | |
758 | static void |
759 | test_locale_from_utf8_embedded_nul (void) |
760 | { |
761 | g_test_trap_subprocess (test_path: "/conversion/locale-from-utf8/embedded-nul/subprocess/utf8" , usec_timeout: 0, test_flags: 0); |
762 | g_test_trap_assert_passed (); |
763 | g_test_trap_subprocess (test_path: "/conversion/locale-from-utf8/embedded-nul/subprocess/iconv" , usec_timeout: 0, test_flags: 0); |
764 | g_test_trap_assert_passed (); |
765 | } |
766 | |
767 | /* Test that embedded nul characters in input to g_locale_from_utf8(), |
768 | * when converting (copying) to UTF-8 output, result in an error. |
769 | */ |
770 | static void |
771 | test_locale_from_utf8_embedded_nul_utf8 (void) |
772 | { |
773 | gchar *res; |
774 | gsize bytes_read; |
775 | GError *error = NULL; |
776 | |
777 | setlocale (LC_ALL, locale: "" ); |
778 | g_setenv (variable: "CHARSET" , value: "UTF-8" , TRUE); |
779 | g_assert_true (g_get_charset (NULL)); |
780 | |
781 | res = g_locale_from_utf8 (utf8string: "ab\0c" , len: 4, bytes_read: &bytes_read, NULL, error: &error); |
782 | |
783 | g_assert_null (res); |
784 | g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE); |
785 | g_assert_cmpuint (bytes_read, ==, 2); |
786 | g_error_free (error); |
787 | } |
788 | |
789 | /* Test that embedded nul characters in input to g_locale_from_utf8(), |
790 | * when converting to non-UTF-8 output, result in an error. |
791 | */ |
792 | static void |
793 | test_locale_from_utf8_embedded_nul_iconv (void) |
794 | { |
795 | gchar *res; |
796 | gsize bytes_read; |
797 | GError *error = NULL; |
798 | |
799 | setlocale (LC_ALL, locale: "C" ); |
800 | g_setenv (variable: "CHARSET" , value: "US-ASCII" , TRUE); |
801 | g_assert_false (g_get_charset (NULL)); |
802 | |
803 | res = g_locale_from_utf8 (utf8string: "ab\0c" , len: 4, bytes_read: &bytes_read, NULL, error: &error); |
804 | |
805 | g_assert_null (res); |
806 | g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE); |
807 | g_assert_cmpuint (bytes_read, ==, 2); |
808 | g_error_free (error); |
809 | } |
810 | |
811 | static void |
812 | test_filename_to_utf8_embedded_nul (void) |
813 | { |
814 | g_test_trap_subprocess (test_path: "/conversion/filename-to-utf8/embedded-nul/subprocess/utf8" , usec_timeout: 0, test_flags: 0); |
815 | g_test_trap_assert_passed (); |
816 | g_test_trap_subprocess (test_path: "/conversion/filename-to-utf8/embedded-nul/subprocess/iconv" , usec_timeout: 0, test_flags: 0); |
817 | g_test_trap_assert_passed (); |
818 | } |
819 | |
820 | /* Test that embedded nul characters in UTF-8 input to g_filename_to_utf8() |
821 | * result in an error. |
822 | */ |
823 | static void |
824 | test_filename_to_utf8_embedded_nul_utf8 (void) |
825 | { |
826 | gchar *res; |
827 | gsize bytes_read; |
828 | GError *error = NULL; |
829 | |
830 | #ifndef G_OS_WIN32 |
831 | /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */ |
832 | g_setenv (variable: "G_FILENAME_ENCODING" , value: "UTF-8" , TRUE); |
833 | g_assert_true (g_get_filename_charsets (NULL)); |
834 | #endif |
835 | |
836 | res = g_filename_to_utf8 (opsysstring: "ab\0c" , len: 4, bytes_read: &bytes_read, NULL, error: &error); |
837 | |
838 | g_assert_null (res); |
839 | g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE); |
840 | g_assert_cmpuint (bytes_read, ==, 2); |
841 | g_error_free (error); |
842 | } |
843 | |
844 | /* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8() |
845 | * result in an error. |
846 | */ |
847 | static void |
848 | test_filename_to_utf8_embedded_nul_iconv (void) |
849 | { |
850 | gchar *res; |
851 | gsize bytes_read; |
852 | GError *error = NULL; |
853 | |
854 | #ifndef G_OS_WIN32 |
855 | /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */ |
856 | g_setenv (variable: "G_FILENAME_ENCODING" , value: "US-ASCII" , TRUE); |
857 | g_assert_false (g_get_filename_charsets (NULL)); |
858 | #endif |
859 | |
860 | res = g_filename_to_utf8 (opsysstring: "ab\0c" , len: 4, bytes_read: &bytes_read, NULL, error: &error); |
861 | |
862 | g_assert_null (res); |
863 | g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE); |
864 | g_assert_cmpuint (bytes_read, ==, 2); |
865 | g_error_free (error); |
866 | } |
867 | |
868 | static void |
869 | test_filename_from_utf8_embedded_nul (void) |
870 | { |
871 | g_test_trap_subprocess (test_path: "/conversion/filename-from-utf8/embedded-nul/subprocess/utf8" , usec_timeout: 0, test_flags: 0); |
872 | g_test_trap_assert_passed (); |
873 | g_test_trap_subprocess (test_path: "/conversion/filename-from-utf8/embedded-nul/subprocess/iconv" , usec_timeout: 0, test_flags: 0); |
874 | g_test_trap_assert_passed (); |
875 | } |
876 | |
877 | /* Test that embedded nul characters in input to g_filename_from_utf8(), |
878 | * when converting (copying) to UTF-8 output, result in an error. |
879 | */ |
880 | static void |
881 | test_filename_from_utf8_embedded_nul_utf8 (void) |
882 | { |
883 | gchar *res; |
884 | gsize bytes_read; |
885 | GError *error = NULL; |
886 | |
887 | #ifndef G_OS_WIN32 |
888 | /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */ |
889 | g_setenv (variable: "G_FILENAME_ENCODING" , value: "UTF-8" , TRUE); |
890 | g_assert_true (g_get_filename_charsets (NULL)); |
891 | #endif |
892 | |
893 | res = g_filename_from_utf8 (utf8string: "ab\0c" , len: 4, bytes_read: &bytes_read, NULL, error: &error); |
894 | |
895 | g_assert_null (res); |
896 | g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE); |
897 | g_assert_cmpuint (bytes_read, ==, 2); |
898 | g_error_free (error); |
899 | } |
900 | |
901 | /* Test that embedded nul characters in input to g_filename_from_utf8(), |
902 | * when converting to non-UTF-8 output, result in an error. |
903 | */ |
904 | static void |
905 | test_filename_from_utf8_embedded_nul_iconv (void) |
906 | { |
907 | gchar *res; |
908 | gsize bytes_read; |
909 | GError *error = NULL; |
910 | |
911 | #ifndef G_OS_WIN32 |
912 | /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */ |
913 | g_setenv (variable: "G_FILENAME_ENCODING" , value: "US-ASCII" , TRUE); |
914 | g_assert_false (g_get_filename_charsets (NULL)); |
915 | #endif |
916 | |
917 | res = g_filename_from_utf8 (utf8string: "ab\0c" , len: 4, bytes_read: &bytes_read, NULL, error: &error); |
918 | |
919 | g_assert_null (res); |
920 | g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE); |
921 | g_assert_cmpuint (bytes_read, ==, 2); |
922 | g_error_free (error); |
923 | } |
924 | |
925 | static void |
926 | test_no_conv (void) |
927 | { |
928 | const gchar *in = "" ; |
929 | gchar *out G_GNUC_UNUSED; |
930 | gsize bytes_read = 0; |
931 | gsize bytes_written = 0; |
932 | GError *error = NULL; |
933 | |
934 | out = g_convert (str: in, len: -1, to_codeset: "XXX" , from_codeset: "UVZ" , |
935 | bytes_read: &bytes_read, bytes_written: &bytes_written, error: &error); |
936 | |
937 | /* error code is unreliable, since we mishandle errno there */ |
938 | g_assert (error && error->domain == G_CONVERT_ERROR); |
939 | g_error_free (error); |
940 | } |
941 | |
942 | int |
943 | main (int argc, char *argv[]) |
944 | { |
945 | g_test_init (argc: &argc, argv: &argv, NULL); |
946 | |
947 | g_test_add_func (testpath: "/conversion/no-conv" , test_func: test_no_conv); |
948 | g_test_add_func (testpath: "/conversion/iconv-state" , test_func: test_iconv_state); |
949 | g_test_add_func (testpath: "/conversion/illegal-sequence" , test_func: test_one_half); |
950 | g_test_add_func (testpath: "/conversion/byte-order" , test_func: test_byte_order); |
951 | g_test_add_func (testpath: "/conversion/unicode" , test_func: test_unicode_conversions); |
952 | g_test_add_func (testpath: "/conversion/filename-utf8" , test_func: test_filename_utf8); |
953 | g_test_add_func (testpath: "/conversion/filename-display" , test_func: test_filename_display); |
954 | g_test_add_func (testpath: "/conversion/convert-embedded-nul" , test_func: test_convert_embedded_nul); |
955 | g_test_add_func (testpath: "/conversion/locale-to-utf8/embedded-nul" , test_func: test_locale_to_utf8_embedded_nul); |
956 | g_test_add_func (testpath: "/conversion/locale-to-utf8/embedded-nul/subprocess/utf8" , test_func: test_locale_to_utf8_embedded_nul_utf8); |
957 | g_test_add_func (testpath: "/conversion/locale-to-utf8/embedded-nul/subprocess/iconv" , test_func: test_locale_to_utf8_embedded_nul_iconv); |
958 | g_test_add_func (testpath: "/conversion/locale-from-utf8/embedded-nul" , test_func: test_locale_from_utf8_embedded_nul); |
959 | g_test_add_func (testpath: "/conversion/locale-from-utf8/embedded-nul/subprocess/utf8" , test_func: test_locale_from_utf8_embedded_nul_utf8); |
960 | g_test_add_func (testpath: "/conversion/locale-from-utf8/embedded-nul/subprocess/iconv" , test_func: test_locale_from_utf8_embedded_nul_iconv); |
961 | g_test_add_func (testpath: "/conversion/filename-to-utf8/embedded-nul" , test_func: test_filename_to_utf8_embedded_nul); |
962 | g_test_add_func (testpath: "/conversion/filename-to-utf8/embedded-nul/subprocess/utf8" , test_func: test_filename_to_utf8_embedded_nul_utf8); |
963 | g_test_add_func (testpath: "/conversion/filename-to-utf8/embedded-nul/subprocess/iconv" , test_func: test_filename_to_utf8_embedded_nul_iconv); |
964 | g_test_add_func (testpath: "/conversion/filename-from-utf8/embedded-nul" , test_func: test_filename_from_utf8_embedded_nul); |
965 | g_test_add_func (testpath: "/conversion/filename-from-utf8/embedded-nul/subprocess/utf8" , test_func: test_filename_from_utf8_embedded_nul_utf8); |
966 | g_test_add_func (testpath: "/conversion/filename-from-utf8/embedded-nul/subprocess/iconv" , test_func: test_filename_from_utf8_embedded_nul_iconv); |
967 | |
968 | return g_test_run (); |
969 | } |
970 | |