1/* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18/*
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
23 */
24
25#undef G_DISABLE_ASSERT
26#undef G_LOG_DOMAIN
27
28#include <locale.h>
29#include <string.h>
30
31#include <glib.h>
32
33/* Bug 311337 */
34static void
35test_iconv_state (void)
36{
37 const gchar *in = "\xf4\xe5\xf8\xe5\xed";
38 const gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
39 gchar *out;
40 gsize bytes_read = 0;
41 gsize bytes_written = 0;
42 GError *error = NULL;
43
44 out = g_convert (str: in, len: -1, to_codeset: "UTF-8", from_codeset: "CP1255",
45 bytes_read: &bytes_read, bytes_written: &bytes_written, error: &error);
46
47 if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
48 return; /* silently skip if CP1255 is not supported, see bug 467707 */
49
50 g_assert_no_error (error);
51 g_assert_cmpint (bytes_read, ==, 5);
52 g_assert_cmpint (bytes_written, ==, 10);
53 g_assert_cmpstr (out, ==, expected);
54 g_free (mem: out);
55}
56
57/* Some tests involving "vulgar fraction one half" (U+00BD). This is
58 * represented in UTF-8 as \xC2\xBD, in ISO-8859-1 as \xBD, and is not
59 * represented in ISO-8859-15. */
60static void
61test_one_half (void)
62{
63 const gchar *in_utf8 = "\xc2\xbd";
64 gchar *out;
65 gsize bytes_read = 0;
66 gsize bytes_written = 0;
67 GError *error = NULL;
68
69 out = g_convert (str: in_utf8, len: -1,
70 to_codeset: "ISO-8859-1", from_codeset: "UTF-8",
71 bytes_read: &bytes_read, bytes_written: &bytes_written,
72 error: &error);
73
74 g_assert_no_error (error);
75 g_assert_cmpint (bytes_read, ==, 2);
76 g_assert_cmpint (bytes_written, ==, 1);
77 g_assert_cmpstr (out, ==, "\xbd");
78 g_free (mem: out);
79
80 out = g_convert (str: in_utf8, len: -1,
81 to_codeset: "ISO-8859-15", from_codeset: "UTF-8",
82 bytes_read: &bytes_read, bytes_written: &bytes_written,
83 error: &error);
84
85 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
86 g_assert_cmpint (bytes_read, ==, 0);
87 g_assert_cmpint (bytes_written, ==, 0);
88 g_assert_cmpstr (out, ==, NULL);
89 g_clear_error (err: &error);
90 g_free (mem: out);
91
92 out = g_convert_with_fallback (str: in_utf8, len: -1,
93 to_codeset: "ISO8859-15", from_codeset: "UTF-8",
94 fallback: "a",
95 bytes_read: &bytes_read, bytes_written: &bytes_written,
96 error: &error);
97
98 g_assert_no_error (error);
99 g_assert_cmpint (bytes_read, ==, 2);
100 g_assert_cmpint (bytes_written, ==, 1);
101 g_assert_cmpstr (out, ==, "a");
102 g_free (mem: out);
103}
104
105static void
106test_byte_order (void)
107{
108 gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
109 gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
110 const gchar *expected = "\xce\x93";
111 gchar *out;
112 gsize bytes_read = 0;
113 gsize bytes_written = 0;
114 GError *error = NULL;
115
116 out = g_convert (str: in_be, len: sizeof (in_be),
117 to_codeset: "UTF-8", from_codeset: "UTF-16",
118 bytes_read: &bytes_read, bytes_written: &bytes_written,
119 error: &error);
120
121 g_assert_no_error (error);
122 g_assert_cmpint (bytes_read, ==, 4);
123 g_assert_cmpint (bytes_written, ==, 2);
124 g_assert_cmpstr (out, ==, expected);
125 g_free (mem: out);
126
127 out = g_convert (str: in_le, len: sizeof (in_le),
128 to_codeset: "UTF-8", from_codeset: "UTF-16",
129 bytes_read: &bytes_read, bytes_written: &bytes_written,
130 error: &error);
131
132 g_assert_no_error (error);
133 g_assert_cmpint (bytes_read, ==, 4);
134 g_assert_cmpint (bytes_written, ==, 2);
135 g_assert_cmpstr (out, ==, expected);
136 g_free (mem: out);
137}
138
139static void
140check_utf8_to_ucs4 (const char *utf8,
141 gsize utf8_len,
142 const gunichar *ucs4,
143 glong ucs4_len,
144 glong error_pos)
145{
146 gunichar *result, *result2, *result3;
147 glong items_read, items_read2;
148 glong items_written, items_written2;
149 GError *error, *error2, *error3;
150 gint i;
151
152 if (!error_pos)
153 {
154 /* check the fast conversion */
155 result = g_utf8_to_ucs4_fast (str: utf8, len: utf8_len, items_written: &items_written);
156
157 g_assert_cmpint (items_written, ==, ucs4_len);
158 g_assert (result);
159 for (i = 0; i <= items_written; i++)
160 g_assert (result[i] == ucs4[i]);
161
162 g_free (mem: result);
163 }
164
165 error = NULL;
166 result = g_utf8_to_ucs4 (str: utf8, len: utf8_len, items_read: &items_read, items_written: &items_written, error: &error);
167
168 if (utf8_len == strlen (s: utf8))
169 {
170 /* check that len == -1 yields identical results */
171 error2 = NULL;
172 result2 = g_utf8_to_ucs4 (str: utf8, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2);
173 g_assert (error || items_read2 == items_read);
174 g_assert (error || items_written2 == items_written);
175 g_assert_cmpint (!!result, ==, !!result2);
176 g_assert_cmpint (!!error, ==, !!error2);
177 if (result)
178 for (i = 0; i <= items_written; i++)
179 g_assert (result[i] == result2[i]);
180
181 g_free (mem: result2);
182 if (error2)
183 g_error_free (error: error2);
184 }
185
186 error3 = NULL;
187 result3 = g_utf8_to_ucs4 (str: utf8, len: utf8_len, NULL, NULL, error: &error3);
188
189 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
190 {
191 g_assert_no_error (error);
192 g_assert_cmpint (items_read, ==, error_pos);
193 g_assert_cmpint (items_written, ==, ucs4_len);
194 g_assert (result);
195 for (i = 0; i <= items_written; i++)
196 g_assert (result[i] == ucs4[i]);
197 g_error_free (error: error3);
198 }
199 else if (error_pos)
200 {
201 g_assert (error != NULL);
202 g_assert (result == NULL);
203 g_assert_cmpint (items_read, ==, error_pos);
204 g_error_free (error);
205
206 g_assert (error3 != NULL);
207 g_assert (result3 == NULL);
208 g_error_free (error: error3);
209 }
210 else
211 {
212 g_assert_no_error (error);
213 g_assert_cmpint (items_read, ==, utf8_len);
214 g_assert_cmpint (items_written, ==, ucs4_len);
215 g_assert (result);
216 for (i = 0; i <= items_written; i++)
217 g_assert (result[i] == ucs4[i]);
218
219 g_assert_no_error (error3);
220 g_assert (result3);
221 for (i = 0; i <= ucs4_len; i++)
222 g_assert (result3[i] == ucs4[i]);
223 }
224
225 g_free (mem: result);
226 g_free (mem: result3);
227}
228
229static void
230check_ucs4_to_utf8 (const gunichar *ucs4,
231 glong ucs4_len,
232 const char *utf8,
233 glong utf8_len,
234 glong error_pos)
235{
236 gchar *result, *result2, *result3;
237 glong items_read, items_read2;
238 glong items_written, items_written2;
239 GError *error, *error2, *error3;
240
241 error = NULL;
242 result = g_ucs4_to_utf8 (str: ucs4, len: ucs4_len, items_read: &items_read, items_written: &items_written, error: &error);
243
244 if (ucs4[ucs4_len] == 0)
245 {
246 /* check that len == -1 yields identical results */
247 error2 = NULL;
248 result2 = g_ucs4_to_utf8 (str: ucs4, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2);
249
250 g_assert (error || items_read2 == items_read);
251 g_assert (error || items_written2 == items_written);
252 g_assert_cmpint (!!result, ==, !!result2);
253 g_assert_cmpint (!!error, ==, !!error2);
254 if (result)
255 g_assert_cmpstr (result, ==, result2);
256
257 g_free (mem: result2);
258 if (error2)
259 g_error_free (error: error2);
260 }
261
262 error3 = NULL;
263 result3 = g_ucs4_to_utf8 (str: ucs4, len: ucs4_len, NULL, NULL, error: &error3);
264
265 if (error_pos)
266 {
267 g_assert (error != NULL);
268 g_assert (result == NULL);
269 g_assert_cmpint (items_read, ==, error_pos);
270 g_error_free (error);
271
272 g_assert (error3 != NULL);
273 g_assert (result3 == NULL);
274 g_error_free (error: error3);
275 }
276 else
277 {
278 g_assert_no_error (error);
279 g_assert_cmpint (items_read, ==, ucs4_len);
280 g_assert_cmpint (items_written, ==, utf8_len);
281 g_assert (result);
282 g_assert_cmpstr (result, ==, utf8);
283
284 g_assert_no_error (error3);
285 g_assert (result3);
286 g_assert_cmpstr (result3, ==, utf8);
287 }
288
289 g_free (mem: result);
290 g_free (mem: result3);
291}
292
293static void
294check_utf8_to_utf16 (const char *utf8,
295 gsize utf8_len,
296 const gunichar2 *utf16,
297 glong utf16_len,
298 glong error_pos)
299{
300 gunichar2 *result, *result2, *result3;
301 glong items_read, items_read2;
302 glong items_written, items_written2;
303 GError *error, *error2, *error3;
304 gint i;
305
306 error = NULL;
307 result = g_utf8_to_utf16 (str: utf8, len: utf8_len, items_read: &items_read, items_written: &items_written, error: &error);
308
309 if (utf8_len == strlen (s: utf8))
310 {
311 /* check that len == -1 yields identical results */
312 error2 = NULL;
313 result2 = g_utf8_to_utf16 (str: utf8, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2);
314 g_assert (error || items_read2 == items_read);
315 g_assert (error || items_written2 == items_written);
316 g_assert_cmpint (!!result, ==, !!result2);
317 g_assert_cmpint (!!error, ==, !!error2);
318 if (result)
319 for (i = 0; i <= items_written; i++)
320 g_assert (result[i] == result2[i]);
321
322 g_free (mem: result2);
323 if (error2)
324 g_error_free (error: error2);
325 }
326
327 error3 = NULL;
328 result3 = g_utf8_to_utf16 (str: utf8, len: utf8_len, NULL, NULL, error: &error3);
329
330 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
331 {
332 g_assert_no_error (error);
333 g_assert_cmpint (items_read, ==, error_pos);
334 g_assert_cmpint (items_written, ==, utf16_len);
335 g_assert (result);
336 for (i = 0; i <= items_written; i++)
337 g_assert (result[i] == utf16[i]);
338 g_error_free (error: error3);
339 }
340 else if (error_pos)
341 {
342 g_assert (error != NULL);
343 g_assert (result == NULL);
344 g_assert_cmpint (items_read, ==, error_pos);
345 g_error_free (error);
346
347 g_assert (error3 != NULL);
348 g_assert (result3 == NULL);
349 g_error_free (error: error3);
350 }
351 else
352 {
353 g_assert_no_error (error);
354 g_assert_cmpint (items_read, ==, utf8_len);
355 g_assert_cmpint (items_written, ==, utf16_len);
356 g_assert (result);
357 for (i = 0; i <= items_written; i++)
358 g_assert (result[i] == utf16[i]);
359
360 g_assert_no_error (error3);
361 g_assert (result3);
362 for (i = 0; i <= utf16_len; i++)
363 g_assert (result3[i] == utf16[i]);
364 }
365
366 g_free (mem: result);
367 g_free (mem: result3);
368}
369
370static void
371check_utf16_to_utf8 (const gunichar2 *utf16,
372 glong utf16_len,
373 const char *utf8,
374 glong utf8_len,
375 glong error_pos)
376{
377 gchar *result, *result2, *result3;
378 glong items_read, items_read2;
379 glong items_written, items_written2;
380 GError *error, *error2, *error3;
381
382 error = NULL;
383 result = g_utf16_to_utf8 (str: utf16, len: utf16_len, items_read: &items_read, items_written: &items_written, error: &error);
384 if (utf16[utf16_len] == 0)
385 {
386 /* check that len == -1 yields identical results */
387 error2 = NULL;
388 result2 = g_utf16_to_utf8 (str: utf16, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2);
389
390 g_assert (error || items_read2 == items_read);
391 g_assert (error || items_written2 == items_written);
392 g_assert_cmpint (!!result, ==, !!result2);
393 g_assert_cmpint (!!error, ==, !!error2);
394 if (result)
395 g_assert_cmpstr (result, ==, result2);
396
397 g_free (mem: result2);
398 if (error2)
399 g_error_free (error: error2);
400 }
401
402 error3 = NULL;
403 result3 = g_utf16_to_utf8 (str: utf16, len: utf16_len, NULL, NULL, error: &error3);
404
405 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
406 {
407 g_assert_no_error (error);
408 g_assert_cmpint (items_read, ==, error_pos);
409 g_assert_cmpint (items_read + 1, ==, utf16_len);
410 g_assert_cmpint (items_written, ==, utf8_len);
411 g_assert (result);
412 g_assert_cmpstr (result, ==, utf8);
413 g_error_free (error: error3);
414 }
415 else if (error_pos)
416 {
417 g_assert (error != NULL);
418 g_assert (result == NULL);
419 g_assert_cmpint (items_read, ==, error_pos);
420 g_error_free (error);
421
422 g_assert (error3 != NULL);
423 g_assert (result3 == NULL);
424 g_error_free (error: error3);
425 }
426 else
427 {
428 g_assert_no_error (error);
429 g_assert_cmpint (items_read, ==, utf16_len);
430 g_assert_cmpint (items_written, ==, utf8_len);
431 g_assert (result);
432 g_assert_cmpstr (result, ==, utf8);
433
434 g_assert_no_error (error3);
435 g_assert (result3);
436 g_assert_cmpstr (result3, ==, utf8);
437 }
438
439 g_free (mem: result);
440 g_free (mem: result3);
441}
442
443static void
444check_ucs4_to_utf16 (const gunichar *ucs4,
445 glong ucs4_len,
446 const gunichar2 *utf16,
447 glong utf16_len,
448 glong error_pos)
449{
450 gunichar2 *result, *result2, *result3;
451 glong items_read, items_read2;
452 glong items_written, items_written2;
453 GError *error, *error2, *error3;
454 gint i;
455
456 error = NULL;
457 result = g_ucs4_to_utf16 (str: ucs4, len: ucs4_len, items_read: &items_read, items_written: &items_written, error: &error);
458
459 if (ucs4[ucs4_len] == 0)
460 {
461 /* check that len == -1 yields identical results */
462 error2 = NULL;
463 result2 = g_ucs4_to_utf16 (str: ucs4, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2);
464
465 g_assert (error || items_read2 == items_read);
466 g_assert (error || items_written2 == items_written);
467 g_assert_cmpint (!!result, ==, !!result2);
468 g_assert_cmpint (!!error, ==, !!error2);
469 if (result)
470 for (i = 0; i <= utf16_len; i++)
471 g_assert (result[i] == result2[i]);
472
473 g_free (mem: result2);
474 if (error2)
475 g_error_free (error: error2);
476 }
477
478 error3 = NULL;
479 result3 = g_ucs4_to_utf16 (str: ucs4, len: -1, NULL, NULL, error: &error3);
480
481 if (error_pos)
482 {
483 g_assert (error != NULL);
484 g_assert (result == NULL);
485 g_assert_cmpint (items_read, ==, error_pos);
486 g_error_free (error);
487
488 g_assert (error3 != NULL);
489 g_assert (result3 == NULL);
490 g_error_free (error: error3);
491 }
492 else
493 {
494 g_assert_no_error (error);
495 g_assert_cmpint (items_read, ==, ucs4_len);
496 g_assert_cmpint (items_written, ==, utf16_len);
497 g_assert (result);
498 for (i = 0; i <= utf16_len; i++)
499 g_assert (result[i] == utf16[i]);
500
501 g_assert_no_error (error3);
502 g_assert (result3);
503 for (i = 0; i <= utf16_len; i++)
504 g_assert (result3[i] == utf16[i]);
505 }
506
507 g_free (mem: result);
508 g_free (mem: result3);
509}
510
511static void
512check_utf16_to_ucs4 (const gunichar2 *utf16,
513 glong utf16_len,
514 const gunichar *ucs4,
515 glong ucs4_len,
516 glong error_pos)
517{
518 gunichar *result, *result2, *result3;
519 glong items_read, items_read2;
520 glong items_written, items_written2;
521 GError *error, *error2, *error3;
522 gint i;
523
524 error = NULL;
525 result = g_utf16_to_ucs4 (str: utf16, len: utf16_len, items_read: &items_read, items_written: &items_written, error: &error);
526 if (utf16[utf16_len] == 0)
527 {
528 /* check that len == -1 yields identical results */
529 error2 = NULL;
530 result2 = g_utf16_to_ucs4 (str: utf16, len: -1, items_read: &items_read2, items_written: &items_written2, error: &error2);
531 g_assert (error || items_read2 == items_read);
532 g_assert (error || items_written2 == items_written);
533 g_assert_cmpint (!!result, ==, !!result2);
534 g_assert_cmpint (!!error, ==, !!error2);
535 if (result)
536 for (i = 0; i <= items_written; i++)
537 g_assert (result[i] == result2[i]);
538
539 g_free (mem: result2);
540 if (error2)
541 g_error_free (error: error2);
542 }
543
544 error3 = NULL;
545 result3 = g_utf16_to_ucs4 (str: utf16, len: utf16_len, NULL, NULL, error: &error3);
546
547 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
548 {
549 g_assert_no_error (error);
550 g_assert_cmpint (items_read, ==, error_pos);
551 g_assert_cmpint (items_read + 1, ==, utf16_len);
552 g_assert_cmpint (items_written, ==, ucs4_len);
553 g_assert (result);
554 for (i = 0; i <= items_written; i++)
555 g_assert (result[i] == ucs4[i]);
556 g_error_free (error: error3);
557 }
558 else if (error_pos)
559 {
560 g_assert (error != NULL);
561 g_assert (result == NULL);
562 g_assert_cmpint (items_read, ==, error_pos);
563 g_error_free (error);
564
565 g_assert (error3 != NULL);
566 g_assert (result3 == NULL);
567 g_error_free (error: error3);
568 }
569 else
570 {
571 g_assert_no_error (error);
572 g_assert_cmpint (items_read, ==, utf16_len);
573 g_assert_cmpint (items_written, ==, ucs4_len);
574 g_assert (result);
575 for (i = 0; i <= ucs4_len; i++)
576 g_assert (result[i] == ucs4[i]);
577
578 g_assert_no_error (error3);
579 g_assert (result3);
580 for (i = 0; i <= ucs4_len; i++)
581 g_assert (result3[i] == ucs4[i]);
582 }
583
584 g_free (mem: result);
585 g_free (mem: result3);
586}
587
588static void
589test_unicode_conversions (void)
590{
591 const char *utf8;
592 gunichar ucs4[100];
593 gunichar2 utf16[100];
594
595 utf8 = "abc";
596 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
597 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
598
599 check_utf8_to_ucs4 (utf8, utf8_len: 3, ucs4, ucs4_len: 3, error_pos: 0);
600 check_ucs4_to_utf8 (ucs4, ucs4_len: 3, utf8, utf8_len: 3, error_pos: 0);
601 check_utf8_to_utf16 (utf8, utf8_len: 3, utf16, utf16_len: 3, error_pos: 0);
602 check_utf16_to_utf8 (utf16, utf16_len: 3, utf8, utf8_len: 3, error_pos: 0);
603 check_ucs4_to_utf16 (ucs4, ucs4_len: 3, utf16, utf16_len: 3, error_pos: 0);
604 check_utf16_to_ucs4 (utf16, utf16_len: 3, ucs4, ucs4_len: 3, error_pos: 0);
605
606 utf8 = "\316\261\316\262\316\263";
607 ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
608 utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
609
610 check_utf8_to_ucs4 (utf8, utf8_len: 6, ucs4, ucs4_len: 3, error_pos: 0);
611 check_ucs4_to_utf8 (ucs4, ucs4_len: 3, utf8, utf8_len: 6, error_pos: 0);
612 check_utf8_to_utf16 (utf8, utf8_len: 6, utf16, utf16_len: 3, error_pos: 0);
613 check_utf16_to_utf8 (utf16, utf16_len: 3, utf8, utf8_len: 6, error_pos: 0);
614 check_ucs4_to_utf16 (ucs4, ucs4_len: 3, utf16, utf16_len: 3, error_pos: 0);
615 check_utf16_to_ucs4 (utf16, utf16_len: 3, ucs4, ucs4_len: 3, error_pos: 0);
616
617 /* partial utf8 character */
618 utf8 = "abc\316";
619 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
620 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
621
622 check_utf8_to_ucs4 (utf8, utf8_len: 4, ucs4, ucs4_len: 3, error_pos: 3);
623 check_utf8_to_utf16 (utf8, utf8_len: 4, utf16, utf16_len: 3, error_pos: 3);
624
625 /* invalid utf8 */
626 utf8 = "abc\316\316";
627 ucs4[0] = 0;
628 utf16[0] = 0;
629
630 check_utf8_to_ucs4 (utf8, utf8_len: 5, ucs4, ucs4_len: 0, error_pos: 3);
631 check_utf8_to_utf16 (utf8, utf8_len: 5, utf16, utf16_len: 0, error_pos: 3);
632
633 /* partial utf16 character */
634 utf8 = "ab";
635 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
636 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
637
638 check_utf16_to_utf8 (utf16, utf16_len: 3, utf8, utf8_len: 2, error_pos: 2);
639 check_utf16_to_ucs4 (utf16, utf16_len: 3, ucs4, ucs4_len: 2, error_pos: 2);
640
641 /* invalid utf16 */
642 utf8 = NULL;
643 ucs4[0] = 0;
644 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
645
646 check_utf16_to_utf8 (utf16, utf16_len: 3, utf8, utf8_len: 0, error_pos: 2);
647 check_utf16_to_ucs4 (utf16, utf16_len: 3, ucs4, ucs4_len: 0, error_pos: 2);
648
649 /* invalid ucs4 */
650 utf8 = NULL;
651 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
652 utf16[0] = 0;
653
654 check_ucs4_to_utf8 (ucs4, ucs4_len: 3, utf8, utf8_len: 0, error_pos: 2);
655 check_ucs4_to_utf16 (ucs4, ucs4_len: 3, utf16, utf16_len: 0, error_pos: 2);
656}
657
658static void
659test_filename_utf8 (void)
660{
661 const gchar *filename = "/my/path/to/foo";
662 gchar *utf8;
663 gchar *back;
664 GError *error;
665
666 error = NULL;
667 utf8 = g_filename_to_utf8 (opsysstring: filename, len: -1, NULL, NULL, error: &error);
668 g_assert_no_error (error);
669 back = g_filename_from_utf8 (utf8string: utf8, len: -1, NULL, NULL, error: &error);
670 g_assert_no_error (error);
671 g_assert_cmpstr (back, ==, filename);
672
673 g_free (mem: utf8);
674 g_free (mem: back);
675}
676
677static void
678test_filename_display (void)
679{
680 const gchar *filename = "/my/path/to/foo";
681 char *display;
682
683 display = g_filename_display_basename (filename);
684 g_assert_cmpstr (display, ==, "foo");
685
686 g_free (mem: display);
687}
688
689/* g_convert() should accept and produce text buffers with embedded
690 * nul bytes/characters.
691 */
692static void
693test_convert_embedded_nul (void)
694{
695 gchar *res;
696 gsize bytes_read, bytes_written;
697 GError *error = NULL;
698
699 res = g_convert (str: "ab\0\xf6", len: 4, to_codeset: "UTF-8", from_codeset: "ISO-8859-1",
700 bytes_read: &bytes_read, bytes_written: &bytes_written, error: &error);
701 g_assert_no_error (error);
702 g_assert_cmpuint (bytes_read, ==, 4);
703 g_assert_cmpmem (res, bytes_written, "ab\0\xc3\xb6", 5);
704 g_free (mem: res);
705}
706
707static void
708test_locale_to_utf8_embedded_nul (void)
709{
710 g_test_trap_subprocess (test_path: "/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", usec_timeout: 0, test_flags: 0);
711 g_test_trap_assert_passed ();
712 g_test_trap_subprocess (test_path: "/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", usec_timeout: 0, test_flags: 0);
713 g_test_trap_assert_passed ();
714}
715
716/* Test that embedded nul characters in UTF-8 input to g_locale_to_utf8()
717 * result in an error.
718 */
719static void
720test_locale_to_utf8_embedded_nul_utf8 (void)
721{
722 gchar *res;
723 gsize bytes_read;
724 GError *error = NULL;
725
726 setlocale (LC_ALL, locale: "");
727 g_setenv (variable: "CHARSET", value: "UTF-8", TRUE);
728 g_assert_true (g_get_charset (NULL));
729
730 res = g_locale_to_utf8 (opsysstring: "ab\0c", len: 4, bytes_read: &bytes_read, NULL, error: &error);
731
732 g_assert_null (res);
733 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
734 g_assert_cmpuint (bytes_read, ==, 2);
735 g_error_free (error);
736}
737
738/* Test that embedded nul characters in output of g_locale_to_utf8(),
739 * when converted from non-UTF8 input, result in an error.
740 */
741static void
742test_locale_to_utf8_embedded_nul_iconv (void)
743{
744 gchar *res;
745 GError *error = NULL;
746
747 setlocale (LC_ALL, locale: "C");
748 g_setenv (variable: "CHARSET", value: "US-ASCII", TRUE);
749 g_assert_false (g_get_charset (NULL));
750
751 res = g_locale_to_utf8 (opsysstring: "ab\0c", len: 4, NULL, NULL, error: &error);
752
753 g_assert_null (res);
754 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
755 g_error_free (error);
756}
757
758static void
759test_locale_from_utf8_embedded_nul (void)
760{
761 g_test_trap_subprocess (test_path: "/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", usec_timeout: 0, test_flags: 0);
762 g_test_trap_assert_passed ();
763 g_test_trap_subprocess (test_path: "/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", usec_timeout: 0, test_flags: 0);
764 g_test_trap_assert_passed ();
765}
766
767/* Test that embedded nul characters in input to g_locale_from_utf8(),
768 * when converting (copying) to UTF-8 output, result in an error.
769 */
770static void
771test_locale_from_utf8_embedded_nul_utf8 (void)
772{
773 gchar *res;
774 gsize bytes_read;
775 GError *error = NULL;
776
777 setlocale (LC_ALL, locale: "");
778 g_setenv (variable: "CHARSET", value: "UTF-8", TRUE);
779 g_assert_true (g_get_charset (NULL));
780
781 res = g_locale_from_utf8 (utf8string: "ab\0c", len: 4, bytes_read: &bytes_read, NULL, error: &error);
782
783 g_assert_null (res);
784 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
785 g_assert_cmpuint (bytes_read, ==, 2);
786 g_error_free (error);
787}
788
789/* Test that embedded nul characters in input to g_locale_from_utf8(),
790 * when converting to non-UTF-8 output, result in an error.
791 */
792static void
793test_locale_from_utf8_embedded_nul_iconv (void)
794{
795 gchar *res;
796 gsize bytes_read;
797 GError *error = NULL;
798
799 setlocale (LC_ALL, locale: "C");
800 g_setenv (variable: "CHARSET", value: "US-ASCII", TRUE);
801 g_assert_false (g_get_charset (NULL));
802
803 res = g_locale_from_utf8 (utf8string: "ab\0c", len: 4, bytes_read: &bytes_read, NULL, error: &error);
804
805 g_assert_null (res);
806 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
807 g_assert_cmpuint (bytes_read, ==, 2);
808 g_error_free (error);
809}
810
811static void
812test_filename_to_utf8_embedded_nul (void)
813{
814 g_test_trap_subprocess (test_path: "/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", usec_timeout: 0, test_flags: 0);
815 g_test_trap_assert_passed ();
816 g_test_trap_subprocess (test_path: "/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", usec_timeout: 0, test_flags: 0);
817 g_test_trap_assert_passed ();
818}
819
820/* Test that embedded nul characters in UTF-8 input to g_filename_to_utf8()
821 * result in an error.
822 */
823static void
824test_filename_to_utf8_embedded_nul_utf8 (void)
825{
826 gchar *res;
827 gsize bytes_read;
828 GError *error = NULL;
829
830#ifndef G_OS_WIN32
831 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
832 g_setenv (variable: "G_FILENAME_ENCODING", value: "UTF-8", TRUE);
833 g_assert_true (g_get_filename_charsets (NULL));
834#endif
835
836 res = g_filename_to_utf8 (opsysstring: "ab\0c", len: 4, bytes_read: &bytes_read, NULL, error: &error);
837
838 g_assert_null (res);
839 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
840 g_assert_cmpuint (bytes_read, ==, 2);
841 g_error_free (error);
842}
843
844/* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()
845 * result in an error.
846 */
847static void
848test_filename_to_utf8_embedded_nul_iconv (void)
849{
850 gchar *res;
851 gsize bytes_read;
852 GError *error = NULL;
853
854#ifndef G_OS_WIN32
855 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
856 g_setenv (variable: "G_FILENAME_ENCODING", value: "US-ASCII", TRUE);
857 g_assert_false (g_get_filename_charsets (NULL));
858#endif
859
860 res = g_filename_to_utf8 (opsysstring: "ab\0c", len: 4, bytes_read: &bytes_read, NULL, error: &error);
861
862 g_assert_null (res);
863 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
864 g_assert_cmpuint (bytes_read, ==, 2);
865 g_error_free (error);
866}
867
868static void
869test_filename_from_utf8_embedded_nul (void)
870{
871 g_test_trap_subprocess (test_path: "/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", usec_timeout: 0, test_flags: 0);
872 g_test_trap_assert_passed ();
873 g_test_trap_subprocess (test_path: "/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", usec_timeout: 0, test_flags: 0);
874 g_test_trap_assert_passed ();
875}
876
877/* Test that embedded nul characters in input to g_filename_from_utf8(),
878 * when converting (copying) to UTF-8 output, result in an error.
879 */
880static void
881test_filename_from_utf8_embedded_nul_utf8 (void)
882{
883 gchar *res;
884 gsize bytes_read;
885 GError *error = NULL;
886
887#ifndef G_OS_WIN32
888 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
889 g_setenv (variable: "G_FILENAME_ENCODING", value: "UTF-8", TRUE);
890 g_assert_true (g_get_filename_charsets (NULL));
891#endif
892
893 res = g_filename_from_utf8 (utf8string: "ab\0c", len: 4, bytes_read: &bytes_read, NULL, error: &error);
894
895 g_assert_null (res);
896 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
897 g_assert_cmpuint (bytes_read, ==, 2);
898 g_error_free (error);
899}
900
901/* Test that embedded nul characters in input to g_filename_from_utf8(),
902 * when converting to non-UTF-8 output, result in an error.
903 */
904static void
905test_filename_from_utf8_embedded_nul_iconv (void)
906{
907 gchar *res;
908 gsize bytes_read;
909 GError *error = NULL;
910
911#ifndef G_OS_WIN32
912 /* G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() */
913 g_setenv (variable: "G_FILENAME_ENCODING", value: "US-ASCII", TRUE);
914 g_assert_false (g_get_filename_charsets (NULL));
915#endif
916
917 res = g_filename_from_utf8 (utf8string: "ab\0c", len: 4, bytes_read: &bytes_read, NULL, error: &error);
918
919 g_assert_null (res);
920 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
921 g_assert_cmpuint (bytes_read, ==, 2);
922 g_error_free (error);
923}
924
925static void
926test_no_conv (void)
927{
928 const gchar *in = "";
929 gchar *out G_GNUC_UNUSED;
930 gsize bytes_read = 0;
931 gsize bytes_written = 0;
932 GError *error = NULL;
933
934 out = g_convert (str: in, len: -1, to_codeset: "XXX", from_codeset: "UVZ",
935 bytes_read: &bytes_read, bytes_written: &bytes_written, error: &error);
936
937 /* error code is unreliable, since we mishandle errno there */
938 g_assert (error && error->domain == G_CONVERT_ERROR);
939 g_error_free (error);
940}
941
942int
943main (int argc, char *argv[])
944{
945 g_test_init (argc: &argc, argv: &argv, NULL);
946
947 g_test_add_func (testpath: "/conversion/no-conv", test_func: test_no_conv);
948 g_test_add_func (testpath: "/conversion/iconv-state", test_func: test_iconv_state);
949 g_test_add_func (testpath: "/conversion/illegal-sequence", test_func: test_one_half);
950 g_test_add_func (testpath: "/conversion/byte-order", test_func: test_byte_order);
951 g_test_add_func (testpath: "/conversion/unicode", test_func: test_unicode_conversions);
952 g_test_add_func (testpath: "/conversion/filename-utf8", test_func: test_filename_utf8);
953 g_test_add_func (testpath: "/conversion/filename-display", test_func: test_filename_display);
954 g_test_add_func (testpath: "/conversion/convert-embedded-nul", test_func: test_convert_embedded_nul);
955 g_test_add_func (testpath: "/conversion/locale-to-utf8/embedded-nul", test_func: test_locale_to_utf8_embedded_nul);
956 g_test_add_func (testpath: "/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", test_func: test_locale_to_utf8_embedded_nul_utf8);
957 g_test_add_func (testpath: "/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", test_func: test_locale_to_utf8_embedded_nul_iconv);
958 g_test_add_func (testpath: "/conversion/locale-from-utf8/embedded-nul", test_func: test_locale_from_utf8_embedded_nul);
959 g_test_add_func (testpath: "/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", test_func: test_locale_from_utf8_embedded_nul_utf8);
960 g_test_add_func (testpath: "/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", test_func: test_locale_from_utf8_embedded_nul_iconv);
961 g_test_add_func (testpath: "/conversion/filename-to-utf8/embedded-nul", test_func: test_filename_to_utf8_embedded_nul);
962 g_test_add_func (testpath: "/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", test_func: test_filename_to_utf8_embedded_nul_utf8);
963 g_test_add_func (testpath: "/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", test_func: test_filename_to_utf8_embedded_nul_iconv);
964 g_test_add_func (testpath: "/conversion/filename-from-utf8/embedded-nul", test_func: test_filename_from_utf8_embedded_nul);
965 g_test_add_func (testpath: "/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", test_func: test_filename_from_utf8_embedded_nul_utf8);
966 g_test_add_func (testpath: "/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", test_func: test_filename_from_utf8_embedded_nul_iconv);
967
968 return g_test_run ();
969}
970

source code of gtk/subprojects/glib/glib/tests/convert.c