convert.c source code [gtk/subprojects/glib/glib/tests/convert.c]

1	/ GLIB - Library of useful routines for C programming*
2	* Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
3	*
4	* This library is free software; you can redistribute it and/or
5	* modify it under the terms of the GNU Lesser General Public
6	* License as published by the Free Software Foundation; either
7	* version 2.1 of the License, or (at your option) any later version.
8	*
9	* This library is distributed in the hope that it will be useful,
10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	* Lesser General Public License for more details.
13	*
14	* You should have received a copy of the GNU Lesser General Public
15	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
16	*/
17
18	/*
19	* Modified by the GLib Team and others 1997-2000. See the AUTHORS
20	* file for a list of people on the GLib Team. See the ChangeLog
21	* files for a list of changes. These files are distributed with
22	* GLib at ftp://ftp.gtk.org/pub/gtk/.
23	*/
24
25	#undef G_DISABLE_ASSERT
26	#undef G_LOG_DOMAIN
27
28	#include <locale.h>
29	#include <string.h>
30
31	#include <glib.h>
32
33	/ Bug 311337 /
34	static void
35	test_iconv_state (void)
36	{
37	const gchar *in = "\xf4\xe5\xf8\xe5\xed";
38	const gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
39	gchar *out;
40	gsize bytes_read = `0`;
41	gsize bytes_written = `0`;
42	GError *error = NULL;
43
44	out = g_convert (str: in, len: -`1`, to_codeset: "UTF-8", from_codeset: "CP1255",
45	bytes_read: &bytes_read, bytes_written: &bytes_written, error: &error);
46
47	if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
48	return; / silently skip if CP1255 is not supported, see bug 467707 /
49
50	g_assert_no_error (error);
51	g_assert_cmpint (bytes_read, ==, `5`);
52	g_assert_cmpint (bytes_written, ==, `10`);
53	g_assert_cmpstr (out, ==, expected);
54	g_free (mem: out);
55	}
56
57	/ Some tests involving "vulgar fraction one half" (U+00BD). This is*
58	* represented in UTF-8 as \xC2\xBD, in ISO-8859-1 as \xBD, and is not
59	* represented in ISO-8859-15. */
60	static void
61	test_one_half (void)
62	{
63	const gchar *in_utf8 = "\xc2\xbd";
64	gchar *out;
65	gsize bytes_read = `0`;
66	gsize bytes_written = `0`;
67	GError *error = NULL;
68
69	out = g_convert (str: in_utf8, len: -`1`,
70	to_codeset: "ISO-8859-1", from_codeset: "UTF-8",
71	bytes_read: &bytes_read, bytes_written: &bytes_written,
72	error: &error);
73
74	g_assert_no_error (error);
75	g_assert_cmpint (bytes_read, ==, `2`);
76	g_assert_cmpint (bytes_written, ==, `1`);
77	g_assert_cmpstr (out, ==, "\xbd");
78	g_free (mem: out);
79
80	out = g_convert (str: in_utf8, len: -`1`,
81	to_codeset: "ISO-8859-15", from_codeset: "UTF-8",
82	bytes_read: &bytes_read, bytes_written: &bytes_written,
83	error: &error);
84
85	g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
86	g_assert_cmpint (bytes_read, ==, `0`);
87	g_assert_cmpint (bytes_written, ==, `0`);
88	g_assert_cmpstr (out, ==, NULL);
89	g_clear_error (err: &error);
90	g_free (mem: out);
91
92	out = g_convert_with_fallback (str: in_utf8, len: -`1`,
93	to_codeset: "ISO8859-15", from_codeset: "UTF-8",
94	fallback: "a",
95	bytes_read: &bytes_read, bytes_written: &bytes_written,
96	error: &error);
97
98	g_assert_no_error (error);
99	g_assert_cmpint (bytes_read, ==, `2`);
100	g_assert_cmpint (bytes_written, ==, `1`);
101	g_assert_cmpstr (out, ==, "a");
102	g_free (mem: out);
103	}
104
105	static void
106	test_byte_order (void)
107	{
108	gchar in_be[`4`] = { `0xfe`, `0xff`, `0x03`, `0x93`}; / capital gamma /
109	gchar in_le[`4`] = { `0xff`, `0xfe`, `0x93`, `0x03`};
110	const gchar *expected = "\xce\x93";
111	gchar *out;
112	gsize bytes_read = `0`;
113	gsize bytes_written = `0`;
114	GError *error = NULL;
115
116	out = g_convert (str: in_be, len: sizeof (in_be),
117	to_codeset: "UTF-8", from_codeset: "UTF-16",
118	bytes_read: &bytes_read, bytes_written: &bytes_written,
119	error: &error);
120
121	g_assert_no_error (error);
122	g_assert_cmpint (bytes_read, ==, `4`);
123	g_assert_cmpint (bytes_written, ==, `2`);
124	g_assert_cmpstr (out, ==, expected);
125	g_free (mem: out);
126
127	out = g_convert (str: in_le, len: sizeof (in_le),
128	to_codeset: "UTF-8", from_codeset: "UTF-16",
129	bytes_read: &bytes_read, bytes_written: &bytes_written,
130	error: &error);
131
132	g_assert_no_error (error);
133	g_assert_cmpint (bytes_read, ==, `4`);
134	g_assert_cmpint (bytes_written, ==, `2`);
135	g_assert_cmpstr (out, ==, expected);
136	g_free (mem: out);
137	}
138
139	static void
140	check_utf8_to_ucs4 (const char *utf8,
141	gsize utf8_len,
142	const gunichar *ucs4,
143	glong ucs4_len,
144	glong error_pos)
145	{
146	gunichar result, result2, *result3;
147	glong items_read, items_read2;
148	glong items_written, items_written2;
149	GError error, error2, *error3;
150	gint i;
151
152	if (!error_pos)
153	{
154	/ check the fast conversion /
155	result = g_utf8_to_ucs4_fast (str: utf8, len: utf8_len, items_written: &items_written);
156
157	g_assert_cmpint (items_written, ==, ucs4_len);
158	g_assert (result);
159	for (i = `0`; i <= items_written; i++)
160	g_assert (result[i] == ucs4[i]);
161
162	g_free (mem: result);
163	}
164
165	error = NULL;
166	result = g_utf8_to_ucs4 (str: utf8, len: utf8_len, items_read: &items_read, items_written: &items_written, error: &error);
167
168	if (utf8_len == strlen (s: utf8))
169	{
170	/ check that len == -1 yields identical results /
171	error2 = NULL;
172	result2 = g_utf8_to_ucs4 (str: utf8, len: -`1`, items_read: &items_read2, items_written: &items_written2, error: &error2);
173	g_assert (error \|\| items_read2 == items_read);
174	g_assert (error \|\| items_written2 == items_written);
175	g_assert_cmpint (!!result, ==, !!result2);
176	g_assert_cmpint (!!error, ==, !!error2);
177	if (result)
178	for (i = `0`; i <= items_written; i++)
179	g_assert (result[i] == result2[i]);
180
181	g_free (mem: result2);
182	if (error2)
183	g_error_free (error: error2);
184	}
185
186	error3 = NULL;
187	result3 = g_utf8_to_ucs4 (str: utf8, len: utf8_len, NULL, NULL, error: &error3);
188
189	if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
190	{
191	g_assert_no_error (error);
192	g_assert_cmpint (items_read, ==, error_pos);
193	g_assert_cmpint (items_written, ==, ucs4_len);
194	g_assert (result);
195	for (i = `0`; i <= items_written; i++)
196	g_assert (result[i] == ucs4[i]);
197	g_error_free (error: error3);
198	}
199	else if (error_pos)
200	{
201	g_assert (error != NULL);
202	g_assert (result == NULL);
203	g_assert_cmpint (items_read, ==, error_pos);
204	g_error_free (error);
205
206	g_assert (error3 != NULL);
207	g_assert (result3 == NULL);
208	g_error_free (error: error3);
209	}
210	else
211	{
212	g_assert_no_error (error);
213	g_assert_cmpint (items_read, ==, utf8_len);
214	g_assert_cmpint (items_written, ==, ucs4_len);
215	g_assert (result);
216	for (i = `0`; i <= items_written; i++)
217	g_assert (result[i] == ucs4[i]);
218
219	g_assert_no_error (error3);
220	g_assert (result3);
221	for (i = `0`; i <= ucs4_len; i++)
222	g_assert (result3[i] == ucs4[i]);
223	}
224
225	g_free (mem: result);
226	g_free (mem: result3);
227	}
228
229	static void
230	check_ucs4_to_utf8 (const gunichar *ucs4,
231	glong ucs4_len,
232	const char *utf8,
233	glong utf8_len,
234	glong error_pos)
235	{
236	gchar result, result2, *result3;
237	glong items_read, items_read2;
238	glong items_written, items_written2;
239	GError error, error2, *error3;
240
241	error = NULL;
242	result = g_ucs4_to_utf8 (str: ucs4, len: ucs4_len, items_read: &items_read, items_written: &items_written, error: &error);
243
244	if (ucs4[ucs4_len] == `0`)
245	{
246	/ check that len == -1 yields identical results /
247	error2 = NULL;
248	result2 = g_ucs4_to_utf8 (str: ucs4, len: -`1`, items_read: &items_read2, items_written: &items_written2, error: &error2);
249
250	g_assert (error \|\| items_read2 == items_read);
251	g_assert (error \|\| items_written2 == items_written);
252	g_assert_cmpint (!!result, ==, !!result2);
253	g_assert_cmpint (!!error, ==, !!error2);
254	if (result)
255	g_assert_cmpstr (result, ==, result2);
256
257	g_free (mem: result2);
258	if (error2)
259	g_error_free (error: error2);
260	}
261
262	error3 = NULL;
263	result3 = g_ucs4_to_utf8 (str: ucs4, len: ucs4_len, NULL, NULL, error: &error3);
264
265	if (error_pos)
266	{
267	g_assert (error != NULL);
268	g_assert (result == NULL);
269	g_assert_cmpint (items_read, ==, error_pos);
270	g_error_free (error);
271
272	g_assert (error3 != NULL);
273	g_assert (result3 == NULL);
274	g_error_free (error: error3);
275	}
276	else
277	{
278	g_assert_no_error (error);
279	g_assert_cmpint (items_read, ==, ucs4_len);
280	g_assert_cmpint (items_written, ==, utf8_len);
281	g_assert (result);
282	g_assert_cmpstr (result, ==, utf8);
283
284	g_assert_no_error (error3);
285	g_assert (result3);
286	g_assert_cmpstr (result3, ==, utf8);
287	}
288
289	g_free (mem: result);
290	g_free (mem: result3);
291	}
292
293	static void
294	check_utf8_to_utf16 (const char *utf8,
295	gsize utf8_len,
296	const gunichar2 *utf16,
297	glong utf16_len,
298	glong error_pos)
299	{
300	gunichar2 result, result2, *result3;
301	glong items_read, items_read2;
302	glong items_written, items_written2;
303	GError error, error2, *error3;
304	gint i;
305
306	error = NULL;
307	result = g_utf8_to_utf16 (str: utf8, len: utf8_len, items_read: &items_read, items_written: &items_written, error: &error);
308
309	if (utf8_len == strlen (s: utf8))
310	{
311	/ check that len == -1 yields identical results /
312	error2 = NULL;
313	result2 = g_utf8_to_utf16 (str: utf8, len: -`1`, items_read: &items_read2, items_written: &items_written2, error: &error2);
314	g_assert (error \|\| items_read2 == items_read);
315	g_assert (error \|\| items_written2 == items_written);
316	g_assert_cmpint (!!result, ==, !!result2);
317	g_assert_cmpint (!!error, ==, !!error2);
318	if (result)
319	for (i = `0`; i <= items_written; i++)
320	g_assert (result[i] == result2[i]);
321
322	g_free (mem: result2);
323	if (error2)
324	g_error_free (error: error2);
325	}
326
327	error3 = NULL;
328	result3 = g_utf8_to_utf16 (str: utf8, len: utf8_len, NULL, NULL, error: &error3);
329
330	if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
331	{
332	g_assert_no_error (error);
333	g_assert_cmpint (items_read, ==, error_pos);
334	g_assert_cmpint (items_written, ==, utf16_len);
335	g_assert (result);
336	for (i = `0`; i <= items_written; i++)
337	g_assert (result[i] == utf16[i]);
338	g_error_free (error: error3);
339	}
340	else if (error_pos)
341	{
342	g_assert (error != NULL);
343	g_assert (result == NULL);
344	g_assert_cmpint (items_read, ==, error_pos);
345	g_error_free (error);
346
347	g_assert (error3 != NULL);
348	g_assert (result3 == NULL);
349	g_error_free (error: error3);
350	}
351	else
352	{
353	g_assert_no_error (error);
354	g_assert_cmpint (items_read, ==, utf8_len);
355	g_assert_cmpint (items_written, ==, utf16_len);
356	g_assert (result);
357	for (i = `0`; i <= items_written; i++)
358	g_assert (result[i] == utf16[i]);
359
360	g_assert_no_error (error3);
361	g_assert (result3);
362	for (i = `0`; i <= utf16_len; i++)
363	g_assert (result3[i] == utf16[i]);
364	}
365
366	g_free (mem: result);
367	g_free (mem: result3);
368	}
369
370	static void
371	check_utf16_to_utf8 (const gunichar2 *utf16,
372	glong utf16_len,
373	const char *utf8,
374	glong utf8_len,
375	glong error_pos)
376	{
377	gchar result, result2, *result3;
378	glong items_read, items_read2;
379	glong items_written, items_written2;
380	GError error, error2, *error3;
381
382	error = NULL;
383	result = g_utf16_to_utf8 (str: utf16, len: utf16_len, items_read: &items_read, items_written: &items_written, error: &error);
384	if (utf16[utf16_len] == `0`)
385	{
386	/ check that len == -1 yields identical results /
387	error2 = NULL;
388	result2 = g_utf16_to_utf8 (str: utf16, len: -`1`, items_read: &items_read2, items_written: &items_written2, error: &error2);
389
390	g_assert (error \|\| items_read2 == items_read);
391	g_assert (error \|\| items_written2 == items_written);
392	g_assert_cmpint (!!result, ==, !!result2);
393	g_assert_cmpint (!!error, ==, !!error2);
394	if (result)
395	g_assert_cmpstr (result, ==, result2);
396
397	g_free (mem: result2);
398	if (error2)
399	g_error_free (error: error2);
400	}
401
402	error3 = NULL;
403	result3 = g_utf16_to_utf8 (str: utf16, len: utf16_len, NULL, NULL, error: &error3);
404
405	if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
406	{
407	g_assert_no_error (error);
408	g_assert_cmpint (items_read, ==, error_pos);
409	g_assert_cmpint (items_read + `1`, ==, utf16_len);
410	g_assert_cmpint (items_written, ==, utf8_len);
411	g_assert (result);
412	g_assert_cmpstr (result, ==, utf8);
413	g_error_free (error: error3);
414	}
415	else if (error_pos)
416	{
417	g_assert (error != NULL);
418	g_assert (result == NULL);
419	g_assert_cmpint (items_read, ==, error_pos);
420	g_error_free (error);
421
422	g_assert (error3 != NULL);
423	g_assert (result3 == NULL);
424	g_error_free (error: error3);
425	}
426	else
427	{
428	g_assert_no_error (error);
429	g_assert_cmpint (items_read, ==, utf16_len);
430	g_assert_cmpint (items_written, ==, utf8_len);
431	g_assert (result);
432	g_assert_cmpstr (result, ==, utf8);
433
434	g_assert_no_error (error3);
435	g_assert (result3);
436	g_assert_cmpstr (result3, ==, utf8);
437	}
438
439	g_free (mem: result);
440	g_free (mem: result3);
441	}
442
443	static void
444	check_ucs4_to_utf16 (const gunichar *ucs4,
445	glong ucs4_len,
446	const gunichar2 *utf16,
447	glong utf16_len,
448	glong error_pos)
449	{
450	gunichar2 result, result2, *result3;
451	glong items_read, items_read2;
452	glong items_written, items_written2;
453	GError error, error2, *error3;
454	gint i;
455
456	error = NULL;
457	result = g_ucs4_to_utf16 (str: ucs4, len: ucs4_len, items_read: &items_read, items_written: &items_written, error: &error);
458
459	if (ucs4[ucs4_len] == `0`)
460	{
461	/ check that len == -1 yields identical results /
462	error2 = NULL;
463	result2 = g_ucs4_to_utf16 (str: ucs4, len: -`1`, items_read: &items_read2, items_written: &items_written2, error: &error2);
464
465	g_assert (error \|\| items_read2 == items_read);
466	g_assert (error \|\| items_written2 == items_written);
467	g_assert_cmpint (!!result, ==, !!result2);
468	g_assert_cmpint (!!error, ==, !!error2);
469	if (result)
470	for (i = `0`; i <= utf16_len; i++)
471	g_assert (result[i] == result2[i]);
472
473	g_free (mem: result2);
474	if (error2)
475	g_error_free (error: error2);
476	}
477
478	error3 = NULL;
479	result3 = g_ucs4_to_utf16 (str: ucs4, len: -`1`, NULL, NULL, error: &error3);
480
481	if (error_pos)
482	{
483	g_assert (error != NULL);
484	g_assert (result == NULL);
485	g_assert_cmpint (items_read, ==, error_pos);
486	g_error_free (error);
487
488	g_assert (error3 != NULL);
489	g_assert (result3 == NULL);
490	g_error_free (error: error3);
491	}
492	else
493	{
494	g_assert_no_error (error);
495	g_assert_cmpint (items_read, ==, ucs4_len);
496	g_assert_cmpint (items_written, ==, utf16_len);
497	g_assert (result);
498	for (i = `0`; i <= utf16_len; i++)
499	g_assert (result[i] == utf16[i]);
500
501	g_assert_no_error (error3);
502	g_assert (result3);
503	for (i = `0`; i <= utf16_len; i++)
504	g_assert (result3[i] == utf16[i]);
505	}
506
507	g_free (mem: result);
508	g_free (mem: result3);
509	}
510
511	static void
512	check_utf16_to_ucs4 (const gunichar2 *utf16,
513	glong utf16_len,
514	const gunichar *ucs4,
515	glong ucs4_len,
516	glong error_pos)
517	{
518	gunichar result, result2, *result3;
519	glong items_read, items_read2;
520	glong items_written, items_written2;
521	GError error, error2, *error3;
522	gint i;
523
524	error = NULL;
525	result = g_utf16_to_ucs4 (str: utf16, len: utf16_len, items_read: &items_read, items_written: &items_written, error: &error);
526	if (utf16[utf16_len] == `0`)
527	{
528	/ check that len == -1 yields identical results /
529	error2 = NULL;
530	result2 = g_utf16_to_ucs4 (str: utf16, len: -`1`, items_read: &items_read2, items_written: &items_written2, error: &error2);
531	g_assert (error \|\| items_read2 == items_read);
532	g_assert (error \|\| items_written2 == items_written);
533	g_assert_cmpint (!!result, ==, !!result2);
534	g_assert_cmpint (!!error, ==, !!error2);
535	if (result)
536	for (i = `0`; i <= items_written; i++)
537	g_assert (result[i] == result2[i]);
538
539	g_free (mem: result2);
540	if (error2)
541	g_error_free (error: error2);
542	}
543
544	error3 = NULL;
545	result3 = g_utf16_to_ucs4 (str: utf16, len: utf16_len, NULL, NULL, error: &error3);
546
547	if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
548	{
549	g_assert_no_error (error);
550	g_assert_cmpint (items_read, ==, error_pos);
551	g_assert_cmpint (items_read + `1`, ==, utf16_len);
552	g_assert_cmpint (items_written, ==, ucs4_len);
553	g_assert (result);
554	for (i = `0`; i <= items_written; i++)
555	g_assert (result[i] == ucs4[i]);
556	g_error_free (error: error3);
557	}
558	else if (error_pos)
559	{
560	g_assert (error != NULL);
561	g_assert (result == NULL);
562	g_assert_cmpint (items_read, ==, error_pos);
563	g_error_free (error);
564
565	g_assert (error3 != NULL);
566	g_assert (result3 == NULL);
567	g_error_free (error: error3);
568	}
569	else
570	{
571	g_assert_no_error (error);
572	g_assert_cmpint (items_read, ==, utf16_len);
573	g_assert_cmpint (items_written, ==, ucs4_len);
574	g_assert (result);
575	for (i = `0`; i <= ucs4_len; i++)
576	g_assert (result[i] == ucs4[i]);
577
578	g_assert_no_error (error3);
579	g_assert (result3);
580	for (i = `0`; i <= ucs4_len; i++)
581	g_assert (result3[i] == ucs4[i]);
582	}
583
584	g_free (mem: result);
585	g_free (mem: result3);
586	}
587
588	static void
589	test_unicode_conversions (void)
590	{
591	const char *utf8;
592	gunichar ucs4[`100`];
593	gunichar2 utf16[`100`];
594
595	utf8 = "abc";
596	ucs4[`0`] = `0x61`; ucs4[`1`] = `0x62`; ucs4[`2`] = `0x63`; ucs4[`3`] = `0`;
597	utf16[`0`] = `0x61`; utf16[`1`] = `0x62`; utf16[`2`] = `0x63`; utf16[`3`] = `0`;
598
599	check_utf8_to_ucs4 (utf8, utf8_len: `3`, ucs4, ucs4_len: `3`, error_pos: `0`);
600	check_ucs4_to_utf8 (ucs4, ucs4_len: `3`, utf8, utf8_len: `3`, error_pos: `0`);
601	check_utf8_to_utf16 (utf8, utf8_len: `3`, utf16, utf16_len: `3`, error_pos: `0`);
602	check_utf16_to_utf8 (utf16, utf16_len: `3`, utf8, utf8_len: `3`, error_pos: `0`);
603	check_ucs4_to_utf16 (ucs4, ucs4_len: `3`, utf16, utf16_len: `3`, error_pos: `0`);
604	check_utf16_to_ucs4 (utf16, utf16_len: `3`, ucs4, ucs4_len: `3`, error_pos: `0`);
605
606	utf8 = "\316\261\316\262\316\263";
607	ucs4[`0`] = `0x03b1`; ucs4[`1`] = `0x03b2`; ucs4[`2`] = `0x03b3`; ucs4[`3`] = `0`;
608	utf16[`0`] = `0x03b1`; utf16[`1`] = `0x03b2`; utf16[`2`] = `0x03b3`; utf16[`3`] = `0`;
609
610	check_utf8_to_ucs4 (utf8, utf8_len: `6`, ucs4, ucs4_len: `3`, error_pos: `0`);
611	check_ucs4_to_utf8 (ucs4, ucs4_len: `3`, utf8, utf8_len: `6`, error_pos: `0`);
612	check_utf8_to_utf16 (utf8, utf8_len: `6`, utf16, utf16_len: `3`, error_pos: `0`);
613	check_utf16_to_utf8 (utf16, utf16_len: `3`, utf8, utf8_len: `6`, error_pos: `0`);
614	check_ucs4_to_utf16 (ucs4, ucs4_len: `3`, utf16, utf16_len: `3`, error_pos: `0`);
615	check_utf16_to_ucs4 (utf16, utf16_len: `3`, ucs4, ucs4_len: `3`, error_pos: `0`);
616
617	/ partial utf8 character /
618	utf8 = "abc\316";
619	ucs4[`0`] = `0x61`; ucs4[`1`] = `0x62`; ucs4[`2`] = `0x63`; ucs4[`3`] = `0`;
620	utf16[`0`] = `0x61`; utf16[`1`] = `0x62`; utf16[`2`] = `0x63`; utf16[`3`] = `0`;
621
622	check_utf8_to_ucs4 (utf8, utf8_len: `4`, ucs4, ucs4_len: `3`, error_pos: `3`);
623	check_utf8_to_utf16 (utf8, utf8_len: `4`, utf16, utf16_len: `3`, error_pos: `3`);
624
625	/ invalid utf8 /
626	utf8 = "abc\316\316";
627	ucs4[`0`] = `0`;
628	utf16[`0`] = `0`;
629
630	check_utf8_to_ucs4 (utf8, utf8_len: `5`, ucs4, ucs4_len: `0`, error_pos: `3`);
631	check_utf8_to_utf16 (utf8, utf8_len: `5`, utf16, utf16_len: `0`, error_pos: `3`);
632
633	/ partial utf16 character /
634	utf8 = "ab";
635	ucs4[`0`] = `0x61`; ucs4[`1`] = `0x62`; ucs4[`2`] = `0`;
636	utf16[`0`] = `0x61`; utf16[`1`] = `0x62`; utf16[`2`] = `0xd801`; utf16[`3`] = `0`;
637
638	check_utf16_to_utf8 (utf16, utf16_len: `3`, utf8, utf8_len: `2`, error_pos: `2`);
639	check_utf16_to_ucs4 (utf16, utf16_len: `3`, ucs4, ucs4_len: `2`, error_pos: `2`);
640
641	/ invalid utf16 /
642	utf8 = NULL;
643	ucs4[`0`] = `0`;
644	utf16[`0`] = `0x61`; utf16[`1`] = `0x62`; utf16[`2`] = `0xdc01`; utf16[`3`] = `0`;
645
646	check_utf16_to_utf8 (utf16, utf16_len: `3`, utf8, utf8_len: `0`, error_pos: `2`);
647	check_utf16_to_ucs4 (utf16, utf16_len: `3`, ucs4, ucs4_len: `0`, error_pos: `2`);
648
649	/ invalid ucs4 /
650	utf8 = NULL;
651	ucs4[`0`] = `0x61`; ucs4[`1`] = `0x62`; ucs4[`2`] = `0x80000000`; ucs4[`3`] = `0`;
652	utf16[`0`] = `0`;
653
654	check_ucs4_to_utf8 (ucs4, ucs4_len: `3`, utf8, utf8_len: `0`, error_pos: `2`);
655	check_ucs4_to_utf16 (ucs4, ucs4_len: `3`, utf16, utf16_len: `0`, error_pos: `2`);
656	}
657
658	static void
659	test_filename_utf8 (void)
660	{
661	const gchar *filename = "/my/path/to/foo";
662	gchar *utf8;
663	gchar *back;
664	GError *error;
665
666	error = NULL;
667	utf8 = g_filename_to_utf8 (opsysstring: filename, len: -`1`, NULL, NULL, error: &error);
668	g_assert_no_error (error);
669	back = g_filename_from_utf8 (utf8string: utf8, len: -`1`, NULL, NULL, error: &error);
670	g_assert_no_error (error);
671	g_assert_cmpstr (back, ==, filename);
672
673	g_free (mem: utf8);
674	g_free (mem: back);
675	}
676
677	static void
678	test_filename_display (void)
679	{
680	const gchar *filename = "/my/path/to/foo";
681	char *display;
682
683	display = g_filename_display_basename (filename);
684	g_assert_cmpstr (display, ==, "foo");
685
686	g_free (mem: display);
687	}
688
689	/ g_convert() should accept and produce text buffers with embedded*
690	* nul bytes/characters.
691	*/
692	static void
693	test_convert_embedded_nul (void)
694	{
695	gchar *res;
696	gsize bytes_read, bytes_written;
697	GError *error = NULL;
698
699	res = g_convert (str: "ab\0\xf6", len: `4`, to_codeset: "UTF-8", from_codeset: "ISO-8859-1",
700	bytes_read: &bytes_read, bytes_written: &bytes_written, error: &error);
701	g_assert_no_error (error);
702	g_assert_cmpuint (bytes_read, ==, `4`);
703	g_assert_cmpmem (res, bytes_written, "ab\0\xc3\xb6", `5`);
704	g_free (mem: res);
705	}
706
707	static void
708	test_locale_to_utf8_embedded_nul (void)
709	{
710	g_test_trap_subprocess (test_path: "/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", usec_timeout: `0`, test_flags: `0`);
711	g_test_trap_assert_passed ();
712	g_test_trap_subprocess (test_path: "/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", usec_timeout: `0`, test_flags: `0`);
713	g_test_trap_assert_passed ();
714	}
715
716	/ Test that embedded nul characters in UTF-8 input to g_locale_to_utf8()*
717	* result in an error.
718	*/
719	static void
720	test_locale_to_utf8_embedded_nul_utf8 (void)
721	{
722	gchar *res;
723	gsize bytes_read;
724	GError *error = NULL;
725
726	setlocale (LC_ALL, locale: "");
727	g_setenv (variable: "CHARSET", value: "UTF-8", TRUE);
728	g_assert_true (g_get_charset (NULL));
729
730	res = g_locale_to_utf8 (opsysstring: "ab\0c", len: `4`, bytes_read: &bytes_read, NULL, error: &error);
731
732	g_assert_null (res);
733	g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
734	g_assert_cmpuint (bytes_read, ==, `2`);
735	g_error_free (error);
736	}
737
738	/ Test that embedded nul characters in output of g_locale_to_utf8(),*
739	* when converted from non-UTF8 input, result in an error.
740	*/
741	static void
742	test_locale_to_utf8_embedded_nul_iconv (void)
743	{
744	gchar *res;
745	GError *error = NULL;
746
747	setlocale (LC_ALL, locale: "C");
748	g_setenv (variable: "CHARSET", value: "US-ASCII", TRUE);
749	g_assert_false (g_get_charset (NULL));
750
751	res = g_locale_to_utf8 (opsysstring: "ab\0c", len: `4`, NULL, NULL, error: &error);
752
753	g_assert_null (res);
754	g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
755	g_error_free (error);
756	}
757
758	static void
759	test_locale_from_utf8_embedded_nul (void)
760	{
761	g_test_trap_subprocess (test_path: "/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", usec_timeout: `0`, test_flags: `0`);
762	g_test_trap_assert_passed ();
763	g_test_trap_subprocess (test_path: "/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", usec_timeout: `0`, test_flags: `0`);
764	g_test_trap_assert_passed ();
765	}
766
767	/ Test that embedded nul characters in input to g_locale_from_utf8(),*
768	* when converting (copying) to UTF-8 output, result in an error.
769	*/
770	static void
771	test_locale_from_utf8_embedded_nul_utf8 (void)
772	{
773	gchar *res;
774	gsize bytes_read;
775	GError *error = NULL;
776
777	setlocale (LC_ALL, locale: "");
778	g_setenv (variable: "CHARSET", value: "UTF-8", TRUE);
779	g_assert_true (g_get_charset (NULL));
780
781	res = g_locale_from_utf8 (utf8string: "ab\0c", len: `4`, bytes_read: &bytes_read, NULL, error: &error);
782
783	g_assert_null (res);
784	g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
785	g_assert_cmpuint (bytes_read, ==, `2`);
786	g_error_free (error);
787	}
788
789	/ Test that embedded nul characters in input to g_locale_from_utf8(),*
790	* when converting to non-UTF-8 output, result in an error.
791	*/
792	static void
793	test_locale_from_utf8_embedded_nul_iconv (void)
794	{
795	gchar *res;
796	gsize bytes_read;
797	GError *error = NULL;
798
799	setlocale (LC_ALL, locale: "C");
800	g_setenv (variable: "CHARSET", value: "US-ASCII", TRUE);
801	g_assert_false (g_get_charset (NULL));
802
803	res = g_locale_from_utf8 (utf8string: "ab\0c", len: `4`, bytes_read: &bytes_read, NULL, error: &error);
804
805	g_assert_null (res);
806	g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
807	g_assert_cmpuint (bytes_read, ==, `2`);
808	g_error_free (error);
809	}
810
811	static void
812	test_filename_to_utf8_embedded_nul (void)
813	{
814	g_test_trap_subprocess (test_path: "/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", usec_timeout: `0`, test_flags: `0`);
815	g_test_trap_assert_passed ();
816	g_test_trap_subprocess (test_path: "/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", usec_timeout: `0`, test_flags: `0`);
817	g_test_trap_assert_passed ();
818	}
819
820	/ Test that embedded nul characters in UTF-8 input to g_filename_to_utf8()*
821	* result in an error.
822	*/
823	static void
824	test_filename_to_utf8_embedded_nul_utf8 (void)
825	{
826	gchar *res;
827	gsize bytes_read;
828	GError *error = NULL;
829
830	#ifndef G_OS_WIN32
831	/ G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() /
832	g_setenv (variable: "G_FILENAME_ENCODING", value: "UTF-8", TRUE);
833	g_assert_true (g_get_filename_charsets (NULL));
834	#endif
835
836	res = g_filename_to_utf8 (opsysstring: "ab\0c", len: `4`, bytes_read: &bytes_read, NULL, error: &error);
837
838	g_assert_null (res);
839	g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
840	g_assert_cmpuint (bytes_read, ==, `2`);
841	g_error_free (error);
842	}
843
844	/ Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()*
845	* result in an error.
846	*/
847	static void
848	test_filename_to_utf8_embedded_nul_iconv (void)
849	{
850	gchar *res;
851	gsize bytes_read;
852	GError *error = NULL;
853
854	#ifndef G_OS_WIN32
855	/ G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() /
856	g_setenv (variable: "G_FILENAME_ENCODING", value: "US-ASCII", TRUE);
857	g_assert_false (g_get_filename_charsets (NULL));
858	#endif
859
860	res = g_filename_to_utf8 (opsysstring: "ab\0c", len: `4`, bytes_read: &bytes_read, NULL, error: &error);
861
862	g_assert_null (res);
863	g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
864	g_assert_cmpuint (bytes_read, ==, `2`);
865	g_error_free (error);
866	}
867
868	static void
869	test_filename_from_utf8_embedded_nul (void)
870	{
871	g_test_trap_subprocess (test_path: "/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", usec_timeout: `0`, test_flags: `0`);
872	g_test_trap_assert_passed ();
873	g_test_trap_subprocess (test_path: "/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", usec_timeout: `0`, test_flags: `0`);
874	g_test_trap_assert_passed ();
875	}
876
877	/ Test that embedded nul characters in input to g_filename_from_utf8(),*
878	* when converting (copying) to UTF-8 output, result in an error.
879	*/
880	static void
881	test_filename_from_utf8_embedded_nul_utf8 (void)
882	{
883	gchar *res;
884	gsize bytes_read;
885	GError *error = NULL;
886
887	#ifndef G_OS_WIN32
888	/ G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() /
889	g_setenv (variable: "G_FILENAME_ENCODING", value: "UTF-8", TRUE);
890	g_assert_true (g_get_filename_charsets (NULL));
891	#endif
892
893	res = g_filename_from_utf8 (utf8string: "ab\0c", len: `4`, bytes_read: &bytes_read, NULL, error: &error);
894
895	g_assert_null (res);
896	g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
897	g_assert_cmpuint (bytes_read, ==, `2`);
898	g_error_free (error);
899	}
900
901	/ Test that embedded nul characters in input to g_filename_from_utf8(),*
902	* when converting to non-UTF-8 output, result in an error.
903	*/
904	static void
905	test_filename_from_utf8_embedded_nul_iconv (void)
906	{
907	gchar *res;
908	gsize bytes_read;
909	GError *error = NULL;
910
911	#ifndef G_OS_WIN32
912	/ G_FILENAME_ENCODING has no effect on Windows for g_get_filename_charsets() /
913	g_setenv (variable: "G_FILENAME_ENCODING", value: "US-ASCII", TRUE);
914	g_assert_false (g_get_filename_charsets (NULL));
915	#endif
916
917	res = g_filename_from_utf8 (utf8string: "ab\0c", len: `4`, bytes_read: &bytes_read, NULL, error: &error);
918
919	g_assert_null (res);
920	g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
921	g_assert_cmpuint (bytes_read, ==, `2`);
922	g_error_free (error);
923	}
924
925	static void
926	test_no_conv (void)
927	{
928	const gchar *in = "";
929	gchar *out G_GNUC_UNUSED;
930	gsize bytes_read = `0`;
931	gsize bytes_written = `0`;
932	GError *error = NULL;
933
934	out = g_convert (str: in, len: -`1`, to_codeset: "XXX", from_codeset: "UVZ",
935	bytes_read: &bytes_read, bytes_written: &bytes_written, error: &error);
936
937	/ error code is unreliable, since we mishandle errno there /
938	g_assert (error && error->domain == G_CONVERT_ERROR);
939	g_error_free (error);
940	}
941
942	int
943	main (int argc, char *argv[])
944	{
945	g_test_init (argc: &argc, argv: &argv, NULL);
946
947	g_test_add_func (testpath: "/conversion/no-conv", test_func: test_no_conv);
948	g_test_add_func (testpath: "/conversion/iconv-state", test_func: test_iconv_state);
949	g_test_add_func (testpath: "/conversion/illegal-sequence", test_func: test_one_half);
950	g_test_add_func (testpath: "/conversion/byte-order", test_func: test_byte_order);
951	g_test_add_func (testpath: "/conversion/unicode", test_func: test_unicode_conversions);
952	g_test_add_func (testpath: "/conversion/filename-utf8", test_func: test_filename_utf8);
953	g_test_add_func (testpath: "/conversion/filename-display", test_func: test_filename_display);
954	g_test_add_func (testpath: "/conversion/convert-embedded-nul", test_func: test_convert_embedded_nul);
955	g_test_add_func (testpath: "/conversion/locale-to-utf8/embedded-nul", test_func: test_locale_to_utf8_embedded_nul);
956	g_test_add_func (testpath: "/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", test_func: test_locale_to_utf8_embedded_nul_utf8);
957	g_test_add_func (testpath: "/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", test_func: test_locale_to_utf8_embedded_nul_iconv);
958	g_test_add_func (testpath: "/conversion/locale-from-utf8/embedded-nul", test_func: test_locale_from_utf8_embedded_nul);
959	g_test_add_func (testpath: "/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", test_func: test_locale_from_utf8_embedded_nul_utf8);
960	g_test_add_func (testpath: "/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", test_func: test_locale_from_utf8_embedded_nul_iconv);
961	g_test_add_func (testpath: "/conversion/filename-to-utf8/embedded-nul", test_func: test_filename_to_utf8_embedded_nul);
962	g_test_add_func (testpath: "/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", test_func: test_filename_to_utf8_embedded_nul_utf8);
963	g_test_add_func (testpath: "/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", test_func: test_filename_to_utf8_embedded_nul_iconv);
964	g_test_add_func (testpath: "/conversion/filename-from-utf8/embedded-nul", test_func: test_filename_from_utf8_embedded_nul);
965	g_test_add_func (testpath: "/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", test_func: test_filename_from_utf8_embedded_nul_utf8);
966	g_test_add_func (testpath: "/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", test_func: test_filename_from_utf8_embedded_nul_iconv);
967
968	return g_test_run ();
969	}
970

source code of gtk/subprojects/glib/glib/tests/convert.c