1/* Unit tests for utilities
2 * Copyright (C) 2010 Red Hat, Inc.
3 * Copyright (C) 2011 Google, Inc.
4 *
5 * This work is provided "as is"; redistribution and modification
6 * in whole or in part, in any medium, physical or electronic is
7 * permitted without restriction.
8 *
9 * This work is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 * In no event shall the authors or contributors be liable for any
14 * direct, indirect, incidental, special, exemplary, or consequential
15 * damages (including, but not limited to, procurement of substitute
16 * goods or services; loss of use, data, or profits; or business
17 * interruption) however caused and on any theory of liability, whether
18 * in contract, strict liability, or tort (including negligence or
19 * otherwise) arising in any way out of the use of this software, even
20 * if advised of the possibility of such damage.
21 *
22 * Author: Matthias Clasen, Behdad Esfahbod
23 */
24
25/* We are testing some deprecated APIs here */
26#ifndef GLIB_DISABLE_DEPRECATION_WARNINGS
27#define GLIB_DISABLE_DEPRECATION_WARNINGS
28#endif
29
30#include <locale.h>
31
32#include "glib.h"
33
34#include "glib/gunidecomp.h"
35
36/* Test that g_unichar_validate() returns the correct value for various
37 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
38static void
39test_unichar_validate (void)
40{
41 g_assert_true (g_unichar_validate ('j'));
42 g_assert_true (g_unichar_validate (8356));
43 g_assert_true (g_unichar_validate (8356));
44 g_assert_true (g_unichar_validate (0xFDD1));
45 g_assert_true (g_unichar_validate (917760));
46 g_assert_false (g_unichar_validate (0x110000));
47}
48
49/* Test that g_unichar_type() returns the correct value for various
50 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
51static void
52test_unichar_character_type (void)
53{
54 guint i;
55 struct {
56 GUnicodeType type;
57 gunichar c;
58 } examples[] = {
59 { G_UNICODE_CONTROL, 0x000D },
60 { G_UNICODE_FORMAT, 0x200E },
61 /* G_UNICODE_UNASSIGNED */
62 { G_UNICODE_PRIVATE_USE, 0xE000 },
63 { G_UNICODE_SURROGATE, 0xD800 },
64 { G_UNICODE_LOWERCASE_LETTER, 0x0061 },
65 { G_UNICODE_MODIFIER_LETTER, 0x02B0 },
66 { G_UNICODE_OTHER_LETTER, 0x3400 },
67 { G_UNICODE_TITLECASE_LETTER, 0x01C5 },
68 { G_UNICODE_UPPERCASE_LETTER, 0xFF21 },
69 { G_UNICODE_SPACING_MARK, 0x0903 },
70 { G_UNICODE_ENCLOSING_MARK, 0x20DD },
71 { G_UNICODE_NON_SPACING_MARK, 0xA806 },
72 { G_UNICODE_DECIMAL_NUMBER, 0xFF10 },
73 { G_UNICODE_LETTER_NUMBER, 0x16EE },
74 { G_UNICODE_OTHER_NUMBER, 0x17F0 },
75 { G_UNICODE_CONNECT_PUNCTUATION, 0x005F },
76 { G_UNICODE_DASH_PUNCTUATION, 0x058A },
77 { G_UNICODE_CLOSE_PUNCTUATION, 0x0F3B },
78 { G_UNICODE_FINAL_PUNCTUATION, 0x2019 },
79 { G_UNICODE_INITIAL_PUNCTUATION, 0x2018 },
80 { G_UNICODE_OTHER_PUNCTUATION, 0x2016 },
81 { G_UNICODE_OPEN_PUNCTUATION, 0x0F3A },
82 { G_UNICODE_CURRENCY_SYMBOL, 0x20A0 },
83 { G_UNICODE_MODIFIER_SYMBOL, 0x309B },
84 { G_UNICODE_MATH_SYMBOL, 0xFB29 },
85 { G_UNICODE_OTHER_SYMBOL, 0x00A6 },
86 { G_UNICODE_LINE_SEPARATOR, 0x2028 },
87 { G_UNICODE_PARAGRAPH_SEPARATOR, 0x2029 },
88 { G_UNICODE_SPACE_SEPARATOR, 0x202F },
89 };
90
91 for (i = 0; i < G_N_ELEMENTS (examples); i++)
92 {
93 g_assert_cmpint (g_unichar_type (examples[i].c), ==, examples[i].type);
94 }
95
96 /*** Testing TYPE() border cases ***/
97 g_assert_cmpint (g_unichar_type (0x3FF5), ==, 0x07);
98 /* U+FFEFF Plane 15 Private Use */
99 g_assert_cmpint (g_unichar_type (0xFFEFF), ==, 0x03);
100 /* U+E0001 Language Tag */
101 g_assert_cmpint (g_unichar_type (0xE0001), ==, 0x01);
102 g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR), ==, 0x02);
103 g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR + 1), ==, 0x02);
104 g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1), ==, 0x02);
105 g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1 + 1), ==, 0x02);
106}
107
108/* Test that g_unichar_break_type() returns the correct value for various
109 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
110static void
111test_unichar_break_type (void)
112{
113 guint i;
114 struct {
115 GUnicodeBreakType type;
116 gunichar c;
117 } examples[] = {
118 { G_UNICODE_BREAK_MANDATORY, 0x2028 },
119 { G_UNICODE_BREAK_CARRIAGE_RETURN, 0x000D },
120 { G_UNICODE_BREAK_LINE_FEED, 0x000A },
121 { G_UNICODE_BREAK_COMBINING_MARK, 0x0300 },
122 { G_UNICODE_BREAK_SURROGATE, 0xD800 },
123 { G_UNICODE_BREAK_ZERO_WIDTH_SPACE, 0x200B },
124 { G_UNICODE_BREAK_INSEPARABLE, 0x2024 },
125 { G_UNICODE_BREAK_NON_BREAKING_GLUE, 0x00A0 },
126 { G_UNICODE_BREAK_CONTINGENT, 0xFFFC },
127 { G_UNICODE_BREAK_SPACE, 0x0020 },
128 { G_UNICODE_BREAK_AFTER, 0x05BE },
129 { G_UNICODE_BREAK_BEFORE, 0x02C8 },
130 { G_UNICODE_BREAK_BEFORE_AND_AFTER, 0x2014 },
131 { G_UNICODE_BREAK_HYPHEN, 0x002D },
132 { G_UNICODE_BREAK_NON_STARTER, 0x17D6 },
133 { G_UNICODE_BREAK_OPEN_PUNCTUATION, 0x0028 },
134 { G_UNICODE_BREAK_CLOSE_PARANTHESIS, 0x0029 },
135 { G_UNICODE_BREAK_CLOSE_PUNCTUATION, 0x007D },
136 { G_UNICODE_BREAK_QUOTATION, 0x0022 },
137 { G_UNICODE_BREAK_EXCLAMATION, 0x0021 },
138 { G_UNICODE_BREAK_IDEOGRAPHIC, 0x2E80 },
139 { G_UNICODE_BREAK_NUMERIC, 0x0030 },
140 { G_UNICODE_BREAK_INFIX_SEPARATOR, 0x002C },
141 { G_UNICODE_BREAK_SYMBOL, 0x002F },
142 { G_UNICODE_BREAK_ALPHABETIC, 0x0023 },
143 { G_UNICODE_BREAK_PREFIX, 0x0024 },
144 { G_UNICODE_BREAK_POSTFIX, 0x0025 },
145 { G_UNICODE_BREAK_COMPLEX_CONTEXT, 0x0E01 },
146 { G_UNICODE_BREAK_AMBIGUOUS, 0x00F7 },
147 { G_UNICODE_BREAK_UNKNOWN, 0xE000 },
148 { G_UNICODE_BREAK_NEXT_LINE, 0x0085 },
149 { G_UNICODE_BREAK_WORD_JOINER, 0x2060 },
150 { G_UNICODE_BREAK_HANGUL_L_JAMO, 0x1100 },
151 { G_UNICODE_BREAK_HANGUL_V_JAMO, 0x1160 },
152 { G_UNICODE_BREAK_HANGUL_T_JAMO, 0x11A8 },
153 { G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, 0xAC00 },
154 { G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE, 0xAC01 },
155 { G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER, 0x3041 },
156 { G_UNICODE_BREAK_HEBREW_LETTER, 0x05D0 },
157 { G_UNICODE_BREAK_REGIONAL_INDICATOR, 0x1F1F6 },
158 { G_UNICODE_BREAK_EMOJI_BASE, 0x1F466 },
159 { G_UNICODE_BREAK_EMOJI_MODIFIER, 0x1F3FB },
160 { G_UNICODE_BREAK_ZERO_WIDTH_JOINER, 0x200D },
161 };
162
163 for (i = 0; i < G_N_ELEMENTS (examples); i++)
164 {
165 g_assert_cmpint (g_unichar_break_type (examples[i].c), ==, examples[i].type);
166 }
167}
168
169/* Test that g_unichar_get_script() returns the correct value for various
170 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
171static void
172test_unichar_script (void)
173{
174 guint i;
175 struct {
176 GUnicodeScript script;
177 gunichar c;
178 } examples[] = {
179 { G_UNICODE_SCRIPT_COMMON, 0x002A },
180 { G_UNICODE_SCRIPT_INHERITED, 0x1CED },
181 { G_UNICODE_SCRIPT_INHERITED, 0x0670 },
182 { G_UNICODE_SCRIPT_ARABIC, 0x060D },
183 { G_UNICODE_SCRIPT_ARMENIAN, 0x0559 },
184 { G_UNICODE_SCRIPT_BENGALI, 0x09CD },
185 { G_UNICODE_SCRIPT_BOPOMOFO, 0x31B6 },
186 { G_UNICODE_SCRIPT_CHEROKEE, 0x13A2 },
187 { G_UNICODE_SCRIPT_COPTIC, 0x2CFD },
188 { G_UNICODE_SCRIPT_CYRILLIC, 0x0482 },
189 { G_UNICODE_SCRIPT_DESERET, 0x10401 },
190 { G_UNICODE_SCRIPT_DEVANAGARI, 0x094D },
191 { G_UNICODE_SCRIPT_ETHIOPIC, 0x1258 },
192 { G_UNICODE_SCRIPT_GEORGIAN, 0x10FC },
193 { G_UNICODE_SCRIPT_GOTHIC, 0x10341 },
194 { G_UNICODE_SCRIPT_GREEK, 0x0375 },
195 { G_UNICODE_SCRIPT_GUJARATI, 0x0A83 },
196 { G_UNICODE_SCRIPT_GURMUKHI, 0x0A3C },
197 { G_UNICODE_SCRIPT_HAN, 0x3005 },
198 { G_UNICODE_SCRIPT_HANGUL, 0x1100 },
199 { G_UNICODE_SCRIPT_HEBREW, 0x05BF },
200 { G_UNICODE_SCRIPT_HIRAGANA, 0x309F },
201 { G_UNICODE_SCRIPT_KANNADA, 0x0CBC },
202 { G_UNICODE_SCRIPT_KATAKANA, 0x30FF },
203 { G_UNICODE_SCRIPT_KHMER, 0x17DD },
204 { G_UNICODE_SCRIPT_LAO, 0x0EDD },
205 { G_UNICODE_SCRIPT_LATIN, 0x0061 },
206 { G_UNICODE_SCRIPT_MALAYALAM, 0x0D3D },
207 { G_UNICODE_SCRIPT_MONGOLIAN, 0x1843 },
208 { G_UNICODE_SCRIPT_MYANMAR, 0x1031 },
209 { G_UNICODE_SCRIPT_OGHAM, 0x169C },
210 { G_UNICODE_SCRIPT_OLD_ITALIC, 0x10322 },
211 { G_UNICODE_SCRIPT_ORIYA, 0x0B3C },
212 { G_UNICODE_SCRIPT_RUNIC, 0x16EF },
213 { G_UNICODE_SCRIPT_SINHALA, 0x0DBD },
214 { G_UNICODE_SCRIPT_SYRIAC, 0x0711 },
215 { G_UNICODE_SCRIPT_TAMIL, 0x0B82 },
216 { G_UNICODE_SCRIPT_TELUGU, 0x0C03 },
217 { G_UNICODE_SCRIPT_THAANA, 0x07B1 },
218 { G_UNICODE_SCRIPT_THAI, 0x0E31 },
219 { G_UNICODE_SCRIPT_TIBETAN, 0x0FD4 },
220 { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1400 },
221 { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1401 },
222 { G_UNICODE_SCRIPT_YI, 0xA015 },
223 { G_UNICODE_SCRIPT_TAGALOG, 0x1700 },
224 { G_UNICODE_SCRIPT_HANUNOO, 0x1720 },
225 { G_UNICODE_SCRIPT_BUHID, 0x1740 },
226 { G_UNICODE_SCRIPT_TAGBANWA, 0x1760 },
227 { G_UNICODE_SCRIPT_BRAILLE, 0x2800 },
228 { G_UNICODE_SCRIPT_CYPRIOT, 0x10808 },
229 { G_UNICODE_SCRIPT_LIMBU, 0x1932 },
230 { G_UNICODE_SCRIPT_OSMANYA, 0x10480 },
231 { G_UNICODE_SCRIPT_SHAVIAN, 0x10450 },
232 { G_UNICODE_SCRIPT_LINEAR_B, 0x10000 },
233 { G_UNICODE_SCRIPT_TAI_LE, 0x1950 },
234 { G_UNICODE_SCRIPT_UGARITIC, 0x1039F },
235 { G_UNICODE_SCRIPT_NEW_TAI_LUE, 0x1980 },
236 { G_UNICODE_SCRIPT_BUGINESE, 0x1A1F },
237 { G_UNICODE_SCRIPT_GLAGOLITIC, 0x2C00 },
238 { G_UNICODE_SCRIPT_TIFINAGH, 0x2D6F },
239 { G_UNICODE_SCRIPT_SYLOTI_NAGRI, 0xA800 },
240 { G_UNICODE_SCRIPT_OLD_PERSIAN, 0x103D0 },
241 { G_UNICODE_SCRIPT_KHAROSHTHI, 0x10A3F },
242 { G_UNICODE_SCRIPT_UNKNOWN, 0x1111111 },
243 { G_UNICODE_SCRIPT_BALINESE, 0x1B04 },
244 { G_UNICODE_SCRIPT_CUNEIFORM, 0x12000 },
245 { G_UNICODE_SCRIPT_PHOENICIAN, 0x10900 },
246 { G_UNICODE_SCRIPT_PHAGS_PA, 0xA840 },
247 { G_UNICODE_SCRIPT_NKO, 0x07C0 },
248 { G_UNICODE_SCRIPT_KAYAH_LI, 0xA900 },
249 { G_UNICODE_SCRIPT_LEPCHA, 0x1C00 },
250 { G_UNICODE_SCRIPT_REJANG, 0xA930 },
251 { G_UNICODE_SCRIPT_SUNDANESE, 0x1B80 },
252 { G_UNICODE_SCRIPT_SAURASHTRA, 0xA880 },
253 { G_UNICODE_SCRIPT_CHAM, 0xAA00 },
254 { G_UNICODE_SCRIPT_OL_CHIKI, 0x1C50 },
255 { G_UNICODE_SCRIPT_VAI, 0xA500 },
256 { G_UNICODE_SCRIPT_CARIAN, 0x102A0 },
257 { G_UNICODE_SCRIPT_LYCIAN, 0x10280 },
258 { G_UNICODE_SCRIPT_LYDIAN, 0x1093F },
259 { G_UNICODE_SCRIPT_AVESTAN, 0x10B00 },
260 { G_UNICODE_SCRIPT_BAMUM, 0xA6A0 },
261 { G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, 0x13000 },
262 { G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, 0x10840 },
263 { G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, 0x10B60 },
264 { G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, 0x10B40 },
265 { G_UNICODE_SCRIPT_JAVANESE, 0xA980 },
266 { G_UNICODE_SCRIPT_KAITHI, 0x11082 },
267 { G_UNICODE_SCRIPT_LISU, 0xA4D0 },
268 { G_UNICODE_SCRIPT_MEETEI_MAYEK, 0xABE5 },
269 { G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, 0x10A60 },
270 { G_UNICODE_SCRIPT_OLD_TURKIC, 0x10C00 },
271 { G_UNICODE_SCRIPT_SAMARITAN, 0x0800 },
272 { G_UNICODE_SCRIPT_TAI_THAM, 0x1A20 },
273 { G_UNICODE_SCRIPT_TAI_VIET, 0xAA80 },
274 { G_UNICODE_SCRIPT_BATAK, 0x1BC0 },
275 { G_UNICODE_SCRIPT_BRAHMI, 0x11000 },
276 { G_UNICODE_SCRIPT_MANDAIC, 0x0840 },
277 { G_UNICODE_SCRIPT_CHAKMA, 0x11100 },
278 { G_UNICODE_SCRIPT_MEROITIC_CURSIVE, 0x109A0 },
279 { G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, 0x10980 },
280 { G_UNICODE_SCRIPT_MIAO, 0x16F00 },
281 { G_UNICODE_SCRIPT_SHARADA, 0x11180 },
282 { G_UNICODE_SCRIPT_SORA_SOMPENG, 0x110D0 },
283 { G_UNICODE_SCRIPT_TAKRI, 0x11680 },
284 { G_UNICODE_SCRIPT_BASSA_VAH, 0x16AD0 },
285 { G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, 0x10530 },
286 { G_UNICODE_SCRIPT_DUPLOYAN, 0x1BC00 },
287 { G_UNICODE_SCRIPT_ELBASAN, 0x10500 },
288 { G_UNICODE_SCRIPT_GRANTHA, 0x11301 },
289 { G_UNICODE_SCRIPT_KHOJKI, 0x11200 },
290 { G_UNICODE_SCRIPT_KHUDAWADI, 0x112B0 },
291 { G_UNICODE_SCRIPT_LINEAR_A, 0x10600 },
292 { G_UNICODE_SCRIPT_MAHAJANI, 0x11150 },
293 { G_UNICODE_SCRIPT_MANICHAEAN, 0x10AC0 },
294 { G_UNICODE_SCRIPT_MENDE_KIKAKUI, 0x1E800 },
295 { G_UNICODE_SCRIPT_MODI, 0x11600 },
296 { G_UNICODE_SCRIPT_MRO, 0x16A40 },
297 { G_UNICODE_SCRIPT_NABATAEAN, 0x10880 },
298 { G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, 0x10A80 },
299 { G_UNICODE_SCRIPT_OLD_PERMIC, 0x10350 },
300 { G_UNICODE_SCRIPT_PAHAWH_HMONG, 0x16B00 },
301 { G_UNICODE_SCRIPT_PALMYRENE, 0x10860 },
302 { G_UNICODE_SCRIPT_PAU_CIN_HAU, 0x11AC0 },
303 { G_UNICODE_SCRIPT_PSALTER_PAHLAVI, 0x10B80 },
304 { G_UNICODE_SCRIPT_SIDDHAM, 0x11580 },
305 { G_UNICODE_SCRIPT_TIRHUTA, 0x11480 },
306 { G_UNICODE_SCRIPT_WARANG_CITI, 0x118A0 },
307 { G_UNICODE_SCRIPT_CHEROKEE, 0x0AB71 },
308 { G_UNICODE_SCRIPT_HATRAN, 0x108E0 },
309 { G_UNICODE_SCRIPT_OLD_HUNGARIAN, 0x10C80 },
310 { G_UNICODE_SCRIPT_MULTANI, 0x11280 },
311 { G_UNICODE_SCRIPT_AHOM, 0x11700 },
312 { G_UNICODE_SCRIPT_CUNEIFORM, 0x12480 },
313 { G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS, 0x14400 },
314 { G_UNICODE_SCRIPT_SIGNWRITING, 0x1D800 },
315 { G_UNICODE_SCRIPT_ADLAM, 0x1E900 },
316 { G_UNICODE_SCRIPT_BHAIKSUKI, 0x11C00 },
317 { G_UNICODE_SCRIPT_MARCHEN, 0x11C70 },
318 { G_UNICODE_SCRIPT_NEWA, 0x11400 },
319 { G_UNICODE_SCRIPT_OSAGE, 0x104B0 },
320 { G_UNICODE_SCRIPT_TANGUT, 0x16FE0 },
321 { G_UNICODE_SCRIPT_MASARAM_GONDI, 0x11D00 },
322 { G_UNICODE_SCRIPT_NUSHU, 0x1B170 },
323 { G_UNICODE_SCRIPT_SOYOMBO, 0x11A50 },
324 { G_UNICODE_SCRIPT_ZANABAZAR_SQUARE, 0x11A00 },
325 { G_UNICODE_SCRIPT_DOGRA, 0x11800 },
326 { G_UNICODE_SCRIPT_GUNJALA_GONDI, 0x11D60 },
327 { G_UNICODE_SCRIPT_HANIFI_ROHINGYA, 0x10D00 },
328 { G_UNICODE_SCRIPT_MAKASAR, 0x11EE0 },
329 { G_UNICODE_SCRIPT_MEDEFAIDRIN, 0x16E40 },
330 { G_UNICODE_SCRIPT_OLD_SOGDIAN, 0x10F00 },
331 { G_UNICODE_SCRIPT_SOGDIAN, 0x10F30 },
332 { G_UNICODE_SCRIPT_ELYMAIC, 0x10FE0 },
333 { G_UNICODE_SCRIPT_NANDINAGARI, 0x119A0 },
334 { G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, 0x1E100 },
335 { G_UNICODE_SCRIPT_WANCHO, 0x1E2C0 },
336 { G_UNICODE_SCRIPT_CHORASMIAN, 0x10FB0 },
337 { G_UNICODE_SCRIPT_DIVES_AKURU, 0x11900 },
338 { G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT, 0x18B00 },
339 { G_UNICODE_SCRIPT_YEZIDI, 0x10E80 },
340 };
341 for (i = 0; i < G_N_ELEMENTS (examples); i++)
342 g_assert_cmpint (g_unichar_get_script (examples[i].c), ==, examples[i].script);
343}
344
345/* Test that g_unichar_combining_class() returns the correct value for
346 * various ASCII and Unicode alphabetic, numeric, and other, codepoints. */
347static void
348test_combining_class (void)
349{
350 guint i;
351 struct {
352 gint class;
353 gunichar c;
354 } examples[] = {
355 { 0, 0x0020 },
356 { 1, 0x0334 },
357 { 7, 0x093C },
358 { 8, 0x3099 },
359 { 9, 0x094D },
360 { 10, 0x05B0 },
361 { 11, 0x05B1 },
362 { 12, 0x05B2 },
363 { 13, 0x05B3 },
364 { 14, 0x05B4 },
365 { 15, 0x05B5 },
366 { 16, 0x05B6 },
367 { 17, 0x05B7 },
368 { 18, 0x05B8 },
369 { 19, 0x05B9 },
370 { 20, 0x05BB },
371 { 21, 0x05BC },
372 { 22, 0x05BD },
373 { 23, 0x05BF },
374 { 24, 0x05C1 },
375 { 25, 0x05C2 },
376 { 26, 0xFB1E },
377 { 27, 0x064B },
378 { 28, 0x064C },
379 { 29, 0x064D },
380 /* ... */
381 { 228, 0x05AE },
382 { 230, 0x0300 },
383 { 232, 0x302C },
384 { 233, 0x0362 },
385 { 234, 0x0360 },
386 { 234, 0x1DCD },
387 { 240, 0x0345 }
388 };
389 for (i = 0; i < G_N_ELEMENTS (examples); i++)
390 {
391 g_assert_cmpint (g_unichar_combining_class (examples[i].c), ==, examples[i].class);
392 }
393}
394
395/* Test that g_unichar_get_mirror() returns the correct value for various
396 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
397static void
398test_mirror (void)
399{
400 gunichar mirror;
401
402 g_assert_true (g_unichar_get_mirror_char ('(', &mirror));
403 g_assert_cmpint (mirror, ==, ')');
404 g_assert_true (g_unichar_get_mirror_char (')', &mirror));
405 g_assert_cmpint (mirror, ==, '(');
406 g_assert_true (g_unichar_get_mirror_char ('{', &mirror));
407 g_assert_cmpint (mirror, ==, '}');
408 g_assert_true (g_unichar_get_mirror_char ('}', &mirror));
409 g_assert_cmpint (mirror, ==, '{');
410 g_assert_true (g_unichar_get_mirror_char (0x208D, &mirror));
411 g_assert_cmpint (mirror, ==, 0x208E);
412 g_assert_true (g_unichar_get_mirror_char (0x208E, &mirror));
413 g_assert_cmpint (mirror, ==, 0x208D);
414 g_assert_false (g_unichar_get_mirror_char ('a', &mirror));
415}
416
417/* Test that g_utf8_strup() returns the correct value for various
418 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
419static void
420test_strup (void)
421{
422 char *str_up = NULL;
423 const char *str = "AaZz09x;\x03\x45"
424 "\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */
425 "\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
426
427 /* Testing degenerated cases */
428 if (g_test_undefined ())
429 {
430 g_test_expect_message (G_LOG_DOMAIN, log_level: G_LOG_LEVEL_CRITICAL,
431 pattern: "*assertion*!= NULL*");
432 str_up = g_utf8_strup (NULL, len: 0);
433 g_test_assert_expected_messages ();
434 }
435
436 str_up = g_utf8_strup (str, len: strlen (s: str));
437 /* Tricky, comparing two unicode strings with an ASCII function */
438 g_assert_cmpstr (str_up, ==, "AAZZ09X;\003E\357\274\241\357\274\241");
439 g_free (mem: str_up);
440}
441
442/* Test that g_utf8_strdown() returns the correct value for various
443 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
444static void
445test_strdown (void)
446{
447 char *str_down = NULL;
448 const char *str = "AaZz09x;\x03\x07"
449 "\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */
450 "\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
451
452 /* Testing degenerated cases */
453 if (g_test_undefined ())
454 {
455 g_test_expect_message (G_LOG_DOMAIN, log_level: G_LOG_LEVEL_CRITICAL,
456 pattern: "*assertion*!= NULL*");
457 str_down = g_utf8_strdown (NULL, len: 0);
458 g_test_assert_expected_messages ();
459 }
460
461 str_down = g_utf8_strdown (str, len: strlen (s: str));
462 /* Tricky, comparing two unicode strings with an ASCII function */
463 g_assert_cmpstr (str_down, ==, "aazz09x;\003\007\357\275\201\357\275\201");
464 g_free (mem: str_down);
465}
466
467/* Test that g_utf8_strup() and g_utf8_strdown() return the correct
468 * value for Turkish 'i' with and without dot above. */
469static void
470test_turkish_strupdown (void)
471{
472 char *str_up = NULL;
473 char *str_down = NULL;
474 const char *str = "iII"
475 "\xcc\x87" /* COMBINING DOT ABOVE (U+307) */
476 "\xc4\xb1" /* LATIN SMALL LETTER DOTLESS I (U+131) */
477 "\xc4\xb0"; /* LATIN CAPITAL LETTER I WITH DOT ABOVE (U+130) */
478
479 char *oldlocale = g_strdup (str: setlocale (LC_ALL, locale: "tr_TR"));
480
481 if (oldlocale == NULL)
482 {
483 g_test_skip (msg: "locale tr_TR not available");
484 return;
485 }
486
487 str_up = g_utf8_strup (str, len: strlen (s: str));
488 str_down = g_utf8_strdown (str, len: strlen (s: str));
489 /* i => LATIN CAPITAL LETTER I WITH DOT ABOVE,
490 * I => I,
491 * I + COMBINING DOT ABOVE => I + COMBINING DOT ABOVE,
492 * LATIN SMALL LETTER DOTLESS I => I,
493 * LATIN CAPITAL LETTER I WITH DOT ABOVE => LATIN CAPITAL LETTER I WITH DOT ABOVE */
494 g_assert_cmpstr (str_up, ==, "\xc4\xb0II\xcc\x87I\xc4\xb0");
495 /* i => i,
496 * I => LATIN SMALL LETTER DOTLESS I,
497 * I + COMBINING DOT ABOVE => i,
498 * LATIN SMALL LETTER DOTLESS I => LATIN SMALL LETTER DOTLESS I,
499 * LATIN CAPITAL LETTER I WITH DOT ABOVE => i */
500 g_assert_cmpstr (str_down, ==, "i\xc4\xb1i\xc4\xb1i");
501 g_free (mem: str_up);
502 g_free (mem: str_down);
503
504 setlocale (LC_ALL, locale: oldlocale);
505 g_free (mem: oldlocale);
506}
507
508/* Test that g_utf8_casefold() returns the correct value for various
509 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
510static void
511test_casefold (void)
512{
513 char *str_casefold = NULL;
514 const char *str = "AaZz09x;"
515 "\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */
516 "\xEF\xBC\xA1"; /* Unichar 'a' (U+FF41) */
517
518 /* Testing degenerated cases */
519 if (g_test_undefined ())
520 {
521 g_test_expect_message (G_LOG_DOMAIN, log_level: G_LOG_LEVEL_CRITICAL,
522 pattern: "*assertion*!= NULL*");
523 str_casefold = g_utf8_casefold (NULL, len: 0);
524 g_test_assert_expected_messages ();
525 }
526
527 str_casefold = g_utf8_casefold (str, len: strlen (s: str));
528 /* Tricky, comparing two unicode strings with an ASCII function */
529 g_assert_cmpstr (str_casefold, ==, "aazz09x;\357\275\201\357\275\201");
530 g_free (mem: str_casefold);
531}
532
533/* Test that g_unichar_ismark() returns the correct value for various
534 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
535static void
536test_mark (void)
537{
538 g_assert_true (g_unichar_ismark (0x0903));
539 g_assert_true (g_unichar_ismark (0x20DD));
540 g_assert_true (g_unichar_ismark (0xA806));
541 g_assert_false (g_unichar_ismark ('a'));
542
543 /*** Testing TYPE() border cases ***/
544 g_assert_false (g_unichar_ismark (0x3FF5));
545 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
546 g_assert_false (g_unichar_ismark (0xFFEFF));
547 /* U+E0001 Language Tag */
548 g_assert_false (g_unichar_ismark (0xE0001));
549 g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR));
550 g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR + 1));
551 g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1));
552 g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1 + 1));
553}
554
555/* Test that g_unichar_isspace() returns the correct value for various
556 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
557static void
558test_space (void)
559{
560 g_assert_false (g_unichar_isspace ('a'));
561 g_assert_true (g_unichar_isspace (' '));
562 g_assert_true (g_unichar_isspace ('\t'));
563 g_assert_true (g_unichar_isspace ('\n'));
564 g_assert_true (g_unichar_isspace ('\r'));
565 g_assert_true (g_unichar_isspace ('\f'));
566 g_assert_false (g_unichar_isspace (0xff41)); /* Unicode fullwidth 'a' */
567 g_assert_true (g_unichar_isspace (0x202F)); /* Unicode space separator */
568 g_assert_true (g_unichar_isspace (0x2028)); /* Unicode line separator */
569 g_assert_true (g_unichar_isspace (0x2029)); /* Unicode paragraph separator */
570
571 /*** Testing TYPE() border cases ***/
572 g_assert_false (g_unichar_isspace (0x3FF5));
573 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
574 g_assert_false (g_unichar_isspace (0xFFEFF));
575 /* U+E0001 Language Tag */
576 g_assert_false (g_unichar_isspace (0xE0001));
577 g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR));
578 g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR + 1));
579 g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1));
580 g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1 + 1));
581}
582
583/* Test that g_unichar_isalnum() returns the correct value for various
584 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
585static void
586test_alnum (void)
587{
588 g_assert_false (g_unichar_isalnum (' '));
589 g_assert_true (g_unichar_isalnum ('a'));
590 g_assert_true (g_unichar_isalnum ('z'));
591 g_assert_true (g_unichar_isalnum ('0'));
592 g_assert_true (g_unichar_isalnum ('9'));
593 g_assert_true (g_unichar_isalnum ('A'));
594 g_assert_true (g_unichar_isalnum ('Z'));
595 g_assert_false (g_unichar_isalnum ('-'));
596 g_assert_false (g_unichar_isalnum ('*'));
597 g_assert_true (g_unichar_isalnum (0xFF21)); /* Unichar fullwidth 'A' */
598 g_assert_true (g_unichar_isalnum (0xFF3A)); /* Unichar fullwidth 'Z' */
599 g_assert_true (g_unichar_isalnum (0xFF41)); /* Unichar fullwidth 'a' */
600 g_assert_true (g_unichar_isalnum (0xFF5A)); /* Unichar fullwidth 'z' */
601 g_assert_true (g_unichar_isalnum (0xFF10)); /* Unichar fullwidth '0' */
602 g_assert_true (g_unichar_isalnum (0xFF19)); /* Unichar fullwidth '9' */
603 g_assert_false (g_unichar_isalnum (0xFF0A)); /* Unichar fullwidth '*' */
604
605 /*** Testing TYPE() border cases ***/
606 g_assert_true (g_unichar_isalnum (0x3FF5));
607 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
608 g_assert_false (g_unichar_isalnum (0xFFEFF));
609 /* U+E0001 Language Tag */
610 g_assert_false (g_unichar_isalnum (0xE0001));
611 g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR));
612 g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR + 1));
613 g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1));
614 g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1 + 1));
615}
616
617/* Test that g_unichar_isalpha() returns the correct value for various
618 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
619static void
620test_alpha (void)
621{
622 g_assert_false (g_unichar_isalpha (' '));
623 g_assert_true (g_unichar_isalpha ('a'));
624 g_assert_true (g_unichar_isalpha ('z'));
625 g_assert_false (g_unichar_isalpha ('0'));
626 g_assert_false (g_unichar_isalpha ('9'));
627 g_assert_true (g_unichar_isalpha ('A'));
628 g_assert_true (g_unichar_isalpha ('Z'));
629 g_assert_false (g_unichar_isalpha ('-'));
630 g_assert_false (g_unichar_isalpha ('*'));
631 g_assert_true (g_unichar_isalpha (0xFF21)); /* Unichar fullwidth 'A' */
632 g_assert_true (g_unichar_isalpha (0xFF3A)); /* Unichar fullwidth 'Z' */
633 g_assert_true (g_unichar_isalpha (0xFF41)); /* Unichar fullwidth 'a' */
634 g_assert_true (g_unichar_isalpha (0xFF5A)); /* Unichar fullwidth 'z' */
635 g_assert_false (g_unichar_isalpha (0xFF10)); /* Unichar fullwidth '0' */
636 g_assert_false (g_unichar_isalpha (0xFF19)); /* Unichar fullwidth '9' */
637 g_assert_false (g_unichar_isalpha (0xFF0A)); /* Unichar fullwidth '*' */
638
639 /*** Testing TYPE() border cases ***/
640 g_assert_true (g_unichar_isalpha (0x3FF5));
641 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
642 g_assert_false (g_unichar_isalpha (0xFFEFF));
643 /* U+E0001 Language Tag */
644 g_assert_false (g_unichar_isalpha (0xE0001));
645 g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR));
646 g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR + 1));
647 g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1));
648 g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1 + 1));
649}
650
651/* Test that g_unichar_isdigit() returns the correct value for various
652 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
653static void
654test_digit (void)
655{
656 g_assert_false (g_unichar_isdigit (' '));
657 g_assert_false (g_unichar_isdigit ('a'));
658 g_assert_true (g_unichar_isdigit ('0'));
659 g_assert_true (g_unichar_isdigit ('9'));
660 g_assert_false (g_unichar_isdigit ('A'));
661 g_assert_false (g_unichar_isdigit ('-'));
662 g_assert_false (g_unichar_isdigit ('*'));
663 g_assert_false (g_unichar_isdigit (0xFF21)); /* Unichar fullwidth 'A' */
664 g_assert_false (g_unichar_isdigit (0xFF3A)); /* Unichar fullwidth 'Z' */
665 g_assert_false (g_unichar_isdigit (0xFF41)); /* Unichar fullwidth 'a' */
666 g_assert_false (g_unichar_isdigit (0xFF5A)); /* Unichar fullwidth 'z' */
667 g_assert_true (g_unichar_isdigit (0xFF10)); /* Unichar fullwidth '0' */
668 g_assert_true (g_unichar_isdigit (0xFF19)); /* Unichar fullwidth '9' */
669 g_assert_false (g_unichar_isdigit (0xFF0A)); /* Unichar fullwidth '*' */
670
671 /*** Testing TYPE() border cases ***/
672 g_assert_false (g_unichar_isdigit (0x3FF5));
673 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
674 g_assert_false (g_unichar_isdigit (0xFFEFF));
675 /* U+E0001 Language Tag */
676 g_assert_false (g_unichar_isdigit (0xE0001));
677 g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR));
678 g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR + 1));
679 g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1));
680 g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1 + 1));
681}
682
683/* Test that g_unichar_digit_value() returns the correct value for various
684 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
685static void
686test_digit_value (void)
687{
688 g_assert_cmpint (g_unichar_digit_value (' '), ==, -1);
689 g_assert_cmpint (g_unichar_digit_value ('a'), ==, -1);
690 g_assert_cmpint (g_unichar_digit_value ('0'), ==, 0);
691 g_assert_cmpint (g_unichar_digit_value ('9'), ==, 9);
692 g_assert_cmpint (g_unichar_digit_value ('A'), ==, -1);
693 g_assert_cmpint (g_unichar_digit_value ('-'), ==, -1);
694 g_assert_cmpint (g_unichar_digit_value (0xFF21), ==, -1); /* Unichar 'A' */
695 g_assert_cmpint (g_unichar_digit_value (0xFF3A), ==, -1); /* Unichar 'Z' */
696 g_assert_cmpint (g_unichar_digit_value (0xFF41), ==, -1); /* Unichar 'a' */
697 g_assert_cmpint (g_unichar_digit_value (0xFF5A), ==, -1); /* Unichar 'z' */
698 g_assert_cmpint (g_unichar_digit_value (0xFF10), ==, 0); /* Unichar '0' */
699 g_assert_cmpint (g_unichar_digit_value (0xFF19), ==, 9); /* Unichar '9' */
700 g_assert_cmpint (g_unichar_digit_value (0xFF0A), ==, -1); /* Unichar '*' */
701
702 /*** Testing TYPE() border cases ***/
703 g_assert_cmpint (g_unichar_digit_value (0x3FF5), ==, -1);
704 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
705 g_assert_cmpint (g_unichar_digit_value (0xFFEFF), ==, -1);
706 /* U+E0001 Language Tag */
707 g_assert_cmpint (g_unichar_digit_value (0xE0001), ==, -1);
708 g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR), ==, -1);
709 g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR + 1), ==, -1);
710 g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1);
711 g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
712}
713
714/* Test that g_unichar_isxdigit() returns the correct value for various
715 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
716static void
717test_xdigit (void)
718{
719 g_assert_false (g_unichar_isxdigit (' '));
720 g_assert_true (g_unichar_isxdigit ('a'));
721 g_assert_true (g_unichar_isxdigit ('f'));
722 g_assert_false (g_unichar_isxdigit ('g'));
723 g_assert_false (g_unichar_isxdigit ('z'));
724 g_assert_true (g_unichar_isxdigit ('0'));
725 g_assert_true (g_unichar_isxdigit ('9'));
726 g_assert_true (g_unichar_isxdigit ('A'));
727 g_assert_true (g_unichar_isxdigit ('F'));
728 g_assert_false (g_unichar_isxdigit ('G'));
729 g_assert_false (g_unichar_isxdigit ('Z'));
730 g_assert_false (g_unichar_isxdigit ('-'));
731 g_assert_false (g_unichar_isxdigit ('*'));
732 g_assert_true (g_unichar_isxdigit (0xFF21)); /* Unichar fullwidth 'A' */
733 g_assert_true (g_unichar_isxdigit (0xFF26)); /* Unichar fullwidth 'F' */
734 g_assert_false (g_unichar_isxdigit (0xFF27)); /* Unichar fullwidth 'G' */
735 g_assert_false (g_unichar_isxdigit (0xFF3A)); /* Unichar fullwidth 'Z' */
736 g_assert_true (g_unichar_isxdigit (0xFF41)); /* Unichar fullwidth 'a' */
737 g_assert_true (g_unichar_isxdigit (0xFF46)); /* Unichar fullwidth 'f' */
738 g_assert_false (g_unichar_isxdigit (0xFF47)); /* Unichar fullwidth 'g' */
739 g_assert_false (g_unichar_isxdigit (0xFF5A)); /* Unichar fullwidth 'z' */
740 g_assert_true (g_unichar_isxdigit (0xFF10)); /* Unichar fullwidth '0' */
741 g_assert_true (g_unichar_isxdigit (0xFF19)); /* Unichar fullwidth '9' */
742 g_assert_false (g_unichar_isxdigit (0xFF0A)); /* Unichar fullwidth '*' */
743
744 /*** Testing TYPE() border cases ***/
745 g_assert_false (g_unichar_isxdigit (0x3FF5));
746 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
747 g_assert_false (g_unichar_isxdigit (0xFFEFF));
748 /* U+E0001 Language Tag */
749 g_assert_false (g_unichar_isxdigit (0xE0001));
750 g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR));
751 g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR + 1));
752 g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1));
753 g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1 + 1));
754}
755
756/* Test that g_unichar_xdigit_value() returns the correct value for various
757 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
758static void
759test_xdigit_value (void)
760{
761 g_assert_cmpint (g_unichar_xdigit_value (' '), ==, -1);
762 g_assert_cmpint (g_unichar_xdigit_value ('a'), ==, 10);
763 g_assert_cmpint (g_unichar_xdigit_value ('f'), ==, 15);
764 g_assert_cmpint (g_unichar_xdigit_value ('g'), ==, -1);
765 g_assert_cmpint (g_unichar_xdigit_value ('0'), ==, 0);
766 g_assert_cmpint (g_unichar_xdigit_value ('9'), ==, 9);
767 g_assert_cmpint (g_unichar_xdigit_value ('A'), ==, 10);
768 g_assert_cmpint (g_unichar_xdigit_value ('F'), ==, 15);
769 g_assert_cmpint (g_unichar_xdigit_value ('G'), ==, -1);
770 g_assert_cmpint (g_unichar_xdigit_value ('-'), ==, -1);
771 g_assert_cmpint (g_unichar_xdigit_value (0xFF21), ==, 10); /* Unichar 'A' */
772 g_assert_cmpint (g_unichar_xdigit_value (0xFF26), ==, 15); /* Unichar 'F' */
773 g_assert_cmpint (g_unichar_xdigit_value (0xFF27), ==, -1); /* Unichar 'G' */
774 g_assert_cmpint (g_unichar_xdigit_value (0xFF3A), ==, -1); /* Unichar 'Z' */
775 g_assert_cmpint (g_unichar_xdigit_value (0xFF41), ==, 10); /* Unichar 'a' */
776 g_assert_cmpint (g_unichar_xdigit_value (0xFF46), ==, 15); /* Unichar 'f' */
777 g_assert_cmpint (g_unichar_xdigit_value (0xFF47), ==, -1); /* Unichar 'g' */
778 g_assert_cmpint (g_unichar_xdigit_value (0xFF5A), ==, -1); /* Unichar 'z' */
779 g_assert_cmpint (g_unichar_xdigit_value (0xFF10), ==, 0); /* Unichar '0' */
780 g_assert_cmpint (g_unichar_xdigit_value (0xFF19), ==, 9); /* Unichar '9' */
781 g_assert_cmpint (g_unichar_xdigit_value (0xFF0A), ==, -1); /* Unichar '*' */
782
783 /*** Testing TYPE() border cases ***/
784 g_assert_cmpint (g_unichar_xdigit_value (0x3FF5), ==, -1);
785 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
786 g_assert_cmpint (g_unichar_xdigit_value (0xFFEFF), ==, -1);
787 /* U+E0001 Language Tag */
788 g_assert_cmpint (g_unichar_xdigit_value (0xE0001), ==, -1);
789 g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR), ==, -1);
790 g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR + 1), ==, -1);
791 g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1);
792 g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1);
793}
794
795/* Test that g_unichar_ispunct() returns the correct value for various
796 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
797static void
798test_punctuation (void)
799{
800 g_assert_false (g_unichar_ispunct (' '));
801 g_assert_false (g_unichar_ispunct ('a'));
802 g_assert_true (g_unichar_ispunct ('.'));
803 g_assert_true (g_unichar_ispunct (','));
804 g_assert_true (g_unichar_ispunct (';'));
805 g_assert_true (g_unichar_ispunct (':'));
806 g_assert_true (g_unichar_ispunct ('-'));
807
808 g_assert_false (g_unichar_ispunct (0xFF21)); /* Unichar fullwidth 'A' */
809 g_assert_true (g_unichar_ispunct (0x005F)); /* Unichar fullwidth '.' */
810 g_assert_true (g_unichar_ispunct (0x058A)); /* Unichar fullwidth '-' */
811
812 /*** Testing TYPE() border cases ***/
813 g_assert_false (g_unichar_ispunct (0x3FF5));
814 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
815 g_assert_false (g_unichar_ispunct (0xFFEFF));
816 /* U+E0001 Language Tag */
817 g_assert_false (g_unichar_ispunct (0xE0001));
818 g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR));
819 g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR + 1));
820 g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1));
821 g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1 + 1));
822}
823
824/* Test that g_unichar_iscntrl() returns the correct value for various
825 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
826static void
827test_cntrl (void)
828{
829 g_assert_true (g_unichar_iscntrl (0x08));
830 g_assert_false (g_unichar_iscntrl ('a'));
831 g_assert_true (g_unichar_iscntrl (0x007F)); /* Unichar fullwidth <del> */
832 g_assert_true (g_unichar_iscntrl (0x009F)); /* Unichar fullwidth control */
833
834 /*** Testing TYPE() border cases ***/
835 g_assert_false (g_unichar_iscntrl (0x3FF5));
836 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
837 g_assert_false (g_unichar_iscntrl (0xFFEFF));
838 /* U+E0001 Language Tag */
839 g_assert_false (g_unichar_iscntrl (0xE0001));
840 g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR));
841 g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR + 1));
842 g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1));
843 g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1 + 1));
844}
845
846/* Test that g_unichar_isgraph() returns the correct value for various
847 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
848static void
849test_graph (void)
850{
851 g_assert_false (g_unichar_isgraph (0x08));
852 g_assert_false (g_unichar_isgraph (' '));
853 g_assert_true (g_unichar_isgraph ('a'));
854 g_assert_true (g_unichar_isgraph ('0'));
855 g_assert_true (g_unichar_isgraph ('9'));
856 g_assert_true (g_unichar_isgraph ('A'));
857 g_assert_true (g_unichar_isgraph ('-'));
858 g_assert_true (g_unichar_isgraph ('*'));
859 g_assert_true (g_unichar_isgraph (0xFF21)); /* Unichar fullwidth 'A' */
860 g_assert_true (g_unichar_isgraph (0xFF3A)); /* Unichar fullwidth 'Z' */
861 g_assert_true (g_unichar_isgraph (0xFF41)); /* Unichar fullwidth 'a' */
862 g_assert_true (g_unichar_isgraph (0xFF5A)); /* Unichar fullwidth 'z' */
863 g_assert_true (g_unichar_isgraph (0xFF10)); /* Unichar fullwidth '0' */
864 g_assert_true (g_unichar_isgraph (0xFF19)); /* Unichar fullwidth '9' */
865 g_assert_true (g_unichar_isgraph (0xFF0A)); /* Unichar fullwidth '*' */
866 g_assert_false (g_unichar_isgraph (0x007F)); /* Unichar fullwidth <del> */
867 g_assert_false (g_unichar_isgraph (0x009F)); /* Unichar fullwidth control */
868
869 /*** Testing TYPE() border cases ***/
870 g_assert_true (g_unichar_isgraph (0x3FF5));
871 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
872 g_assert_true (g_unichar_isgraph (0xFFEFF));
873 /* U+E0001 Language Tag */
874 g_assert_false (g_unichar_isgraph (0xE0001));
875 g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR));
876 g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR + 1));
877 g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1));
878 g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1 + 1));
879}
880
881/* Test that g_unichar_iszerowidth() returns the correct value for various
882 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
883static void
884test_zerowidth (void)
885{
886 g_assert_false (g_unichar_iszerowidth (0x00AD));
887 g_assert_false (g_unichar_iszerowidth (0x115F));
888 g_assert_true (g_unichar_iszerowidth (0x1160));
889 g_assert_true (g_unichar_iszerowidth (0x11AA));
890 g_assert_true (g_unichar_iszerowidth (0x11FF));
891 g_assert_false (g_unichar_iszerowidth (0x1200));
892 g_assert_false (g_unichar_iszerowidth (0x200A));
893 g_assert_true (g_unichar_iszerowidth (0x200B));
894 g_assert_true (g_unichar_iszerowidth (0x200C));
895 g_assert_true (g_unichar_iszerowidth (0x591));
896
897 /*** Testing TYPE() border cases ***/
898 g_assert_false (g_unichar_iszerowidth (0x3FF5));
899 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
900 g_assert_false (g_unichar_iszerowidth (0xFFEFF));
901 /* U+E0001 Language Tag */
902 g_assert_true (g_unichar_iszerowidth (0xE0001));
903 g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR));
904 g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR + 1));
905 g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1));
906 g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1 + 1));
907}
908
909/* Test that g_unichar_istitle() returns the correct value for various
910 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
911static void
912test_title (void)
913{
914 g_assert_true (g_unichar_istitle (0x01c5));
915 g_assert_true (g_unichar_istitle (0x1f88));
916 g_assert_true (g_unichar_istitle (0x1fcc));
917 g_assert_false (g_unichar_istitle ('a'));
918 g_assert_false (g_unichar_istitle ('A'));
919 g_assert_false (g_unichar_istitle (';'));
920
921 /*** Testing TYPE() border cases ***/
922 g_assert_false (g_unichar_istitle (0x3FF5));
923 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
924 g_assert_false (g_unichar_istitle (0xFFEFF));
925 /* U+E0001 Language Tag */
926 g_assert_false (g_unichar_istitle (0xE0001));
927 g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR));
928 g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR + 1));
929 g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1));
930 g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1 + 1));
931
932 g_assert_cmphex (g_unichar_totitle (0x0000), ==, 0x0000);
933 g_assert_cmphex (g_unichar_totitle (0x01c6), ==, 0x01c5);
934 g_assert_cmphex (g_unichar_totitle (0x01c4), ==, 0x01c5);
935 g_assert_cmphex (g_unichar_totitle (0x01c5), ==, 0x01c5);
936 g_assert_cmphex (g_unichar_totitle (0x1f80), ==, 0x1f88);
937 g_assert_cmphex (g_unichar_totitle (0x1f88), ==, 0x1f88);
938 g_assert_cmphex (g_unichar_totitle ('a'), ==, 'A');
939 g_assert_cmphex (g_unichar_totitle ('A'), ==, 'A');
940
941 /*** Testing TYPE() border cases ***/
942 g_assert_cmphex (g_unichar_totitle (0x3FF5), ==, 0x3FF5);
943 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
944 g_assert_cmphex (g_unichar_totitle (0xFFEFF), ==, 0xFFEFF);
945 g_assert_cmphex (g_unichar_totitle (0xDFFFF), ==, 0xDFFFF);
946 /* U+E0001 Language Tag */
947 g_assert_cmphex (g_unichar_totitle (0xE0001), ==, 0xE0001);
948 g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR), ==,
949 G_UNICODE_LAST_CHAR);
950 g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR + 1), ==,
951 (G_UNICODE_LAST_CHAR + 1));
952 g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1), ==,
953 (G_UNICODE_LAST_CHAR_PART1));
954 g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
955 (G_UNICODE_LAST_CHAR_PART1 + 1));
956}
957
958/* Test that g_unichar_isupper() returns the correct value for various
959 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
960static void
961test_upper (void)
962{
963 g_assert_false (g_unichar_isupper (' '));
964 g_assert_false (g_unichar_isupper ('0'));
965 g_assert_false (g_unichar_isupper ('a'));
966 g_assert_true (g_unichar_isupper ('A'));
967 g_assert_false (g_unichar_isupper (0xff41)); /* Unicode fullwidth 'a' */
968 g_assert_true (g_unichar_isupper (0xff21)); /* Unicode fullwidth 'A' */
969
970 /*** Testing TYPE() border cases ***/
971 g_assert_false (g_unichar_isupper (0x3FF5));
972 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
973 g_assert_false (g_unichar_isupper (0xFFEFF));
974 /* U+E0001 Language Tag */
975 g_assert_false (g_unichar_isupper (0xE0001));
976 g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR));
977 g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR + 1));
978 g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1));
979 g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1 + 1));
980}
981
982/* Test that g_unichar_islower() returns the correct value for various
983 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
984static void
985test_lower (void)
986{
987 g_assert_false (g_unichar_islower (' '));
988 g_assert_false (g_unichar_islower ('0'));
989 g_assert_true (g_unichar_islower ('a'));
990 g_assert_false (g_unichar_islower ('A'));
991 g_assert_true (g_unichar_islower (0xff41)); /* Unicode fullwidth 'a' */
992 g_assert_false (g_unichar_islower (0xff21)); /* Unicode fullwidth 'A' */
993
994 /*** Testing TYPE() border cases ***/
995 g_assert_false (g_unichar_islower (0x3FF5));
996 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
997 g_assert_false (g_unichar_islower (0xFFEFF));
998 /* U+E0001 Language Tag */
999 g_assert_false (g_unichar_islower (0xE0001));
1000 g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR));
1001 g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR + 1));
1002 g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1));
1003 g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1 + 1));
1004}
1005
1006/* Test that g_unichar_isprint() returns the correct value for various
1007 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1008static void
1009test_print (void)
1010{
1011 g_assert_true (g_unichar_isprint (' '));
1012 g_assert_true (g_unichar_isprint ('0'));
1013 g_assert_true (g_unichar_isprint ('a'));
1014 g_assert_true (g_unichar_isprint ('A'));
1015 g_assert_true (g_unichar_isprint (0xff41)); /* Unicode fullwidth 'a' */
1016 g_assert_true (g_unichar_isprint (0xff21)); /* Unicode fullwidth 'A' */
1017
1018 /*** Testing TYPE() border cases ***/
1019 g_assert_true (g_unichar_isprint (0x3FF5));
1020 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
1021 g_assert_true (g_unichar_isprint (0xFFEFF));
1022 /* U+E0001 Language Tag */
1023 g_assert_false (g_unichar_isprint (0xE0001));
1024 g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR));
1025 g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR + 1));
1026 g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1));
1027 g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1 + 1));
1028}
1029
1030/* Test that g_unichar_toupper() and g_unichar_tolower() return the
1031 * correct values for various ASCII and Unicode alphabetic, numeric,
1032 * and other, codepoints. */
1033static void
1034test_cases (void)
1035{
1036 g_assert_cmphex (g_unichar_toupper (0x0), ==, 0x0);
1037 g_assert_cmphex (g_unichar_tolower (0x0), ==, 0x0);
1038 g_assert_cmphex (g_unichar_toupper ('a'), ==, 'A');
1039 g_assert_cmphex (g_unichar_toupper ('A'), ==, 'A');
1040 /* Unicode fullwidth 'a' == 'A' */
1041 g_assert_cmphex (g_unichar_toupper (0xff41), ==, 0xff21);
1042 /* Unicode fullwidth 'A' == 'A' */
1043 g_assert_cmphex (g_unichar_toupper (0xff21), ==, 0xff21);
1044 g_assert_cmphex (g_unichar_toupper (0x01C5), ==, 0x01C4);
1045 g_assert_cmphex (g_unichar_toupper (0x01C6), ==, 0x01C4);
1046 g_assert_cmphex (g_unichar_tolower ('A'), ==, 'a');
1047 g_assert_cmphex (g_unichar_tolower ('a'), ==, 'a');
1048 /* Unicode fullwidth 'A' == 'a' */
1049 g_assert_cmphex (g_unichar_tolower (0xff21), ==, 0xff41);
1050 /* Unicode fullwidth 'a' == 'a' */
1051 g_assert_cmphex (g_unichar_tolower (0xff41), ==, 0xff41);
1052 g_assert_cmphex (g_unichar_tolower (0x01C4), ==, 0x01C6);
1053 g_assert_cmphex (g_unichar_tolower (0x01C5), ==, 0x01C6);
1054 g_assert_cmphex (g_unichar_tolower (0x1F8A), ==, 0x1F82);
1055 g_assert_cmphex (g_unichar_totitle (0x1F8A), ==, 0x1F8A);
1056 g_assert_cmphex (g_unichar_toupper (0x1F8A), ==, 0x1F8A);
1057 g_assert_cmphex (g_unichar_tolower (0x1FB2), ==, 0x1FB2);
1058 g_assert_cmphex (g_unichar_toupper (0x1FB2), ==, 0x1FB2);
1059
1060 /* U+130 is a special case, it's a 'I' with a dot on top */
1061 g_assert_cmphex (g_unichar_tolower (0x130), ==, 0x69);
1062
1063 /* Testing ATTTABLE() border cases */
1064 g_assert_cmphex (g_unichar_toupper (0x1D6FE), ==, 0x1D6FE);
1065
1066 /*** Testing TYPE() border cases ***/
1067 g_assert_cmphex (g_unichar_toupper (0x3FF5), ==, 0x3FF5);
1068 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
1069 g_assert_cmphex (g_unichar_toupper (0xFFEFF), ==, 0xFFEFF);
1070 g_assert_cmphex (g_unichar_toupper (0xDFFFF), ==, 0xDFFFF);
1071 /* U+E0001 Language Tag */
1072 g_assert_cmphex (g_unichar_toupper (0xE0001), ==, 0xE0001);
1073 g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR), ==,
1074 G_UNICODE_LAST_CHAR);
1075 g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR + 1), ==,
1076 (G_UNICODE_LAST_CHAR + 1));
1077 g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1), ==,
1078 (G_UNICODE_LAST_CHAR_PART1));
1079 g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
1080 (G_UNICODE_LAST_CHAR_PART1 + 1));
1081
1082 /* Testing ATTTABLE() border cases */
1083 g_assert_cmphex (g_unichar_tolower (0x1D6FA), ==, 0x1D6FA);
1084
1085 /*** Testing TYPE() border cases ***/
1086 g_assert_cmphex (g_unichar_tolower (0x3FF5), ==, 0x3FF5);
1087 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
1088 g_assert_cmphex (g_unichar_tolower (0xFFEFF), ==, 0xFFEFF);
1089 g_assert_cmphex (g_unichar_tolower (0xDFFFF), ==, 0xDFFFF);
1090 /* U+E0001 Language Tag */
1091 g_assert_cmphex (g_unichar_tolower (0xE0001), ==, 0xE0001);
1092 g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR), ==,
1093 G_UNICODE_LAST_CHAR);
1094 g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR + 1), ==,
1095 (G_UNICODE_LAST_CHAR + 1));
1096 g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1), ==,
1097 G_UNICODE_LAST_CHAR_PART1);
1098 g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1 + 1), ==,
1099 (G_UNICODE_LAST_CHAR_PART1 + 1));
1100}
1101
1102/* Test that g_unichar_isdefined() returns the correct value for various
1103 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1104static void
1105test_defined (void)
1106{
1107 g_assert_true (g_unichar_isdefined (0x0903));
1108 g_assert_true (g_unichar_isdefined (0x20DD));
1109 g_assert_true (g_unichar_isdefined (0x20BA));
1110 g_assert_true (g_unichar_isdefined (0xA806));
1111 g_assert_true (g_unichar_isdefined ('a'));
1112 g_assert_false (g_unichar_isdefined (0x10C49));
1113 g_assert_false (g_unichar_isdefined (0x169D));
1114
1115 /*** Testing TYPE() border cases ***/
1116 g_assert_true (g_unichar_isdefined (0x3FF5));
1117 /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */
1118 g_assert_true (g_unichar_isdefined (0xFFEFF));
1119 g_assert_false (g_unichar_isdefined (0xDFFFF));
1120 /* U+E0001 Language Tag */
1121 g_assert_true (g_unichar_isdefined (0xE0001));
1122 g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR));
1123 g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR + 1));
1124 g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1));
1125 g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1 + 1));
1126}
1127
1128/* Test that g_unichar_iswide() returns the correct value for various
1129 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1130static void
1131test_wide (void)
1132{
1133 guint i;
1134 struct {
1135 gunichar c;
1136 enum {
1137 NOT_WIDE,
1138 WIDE_CJK,
1139 WIDE
1140 } wide;
1141 } examples[] = {
1142 /* Neutral */
1143 { 0x0000, NOT_WIDE },
1144 { 0x0483, NOT_WIDE },
1145 { 0x0641, NOT_WIDE },
1146 { 0xFFFC, NOT_WIDE },
1147 { 0x10000, NOT_WIDE },
1148 { 0xE0001, NOT_WIDE },
1149 { 0x2FFFE, NOT_WIDE },
1150 { 0x3FFFE, NOT_WIDE },
1151
1152 /* Narrow */
1153 { 0x0020, NOT_WIDE },
1154 { 0x0041, NOT_WIDE },
1155 { 0x27E6, NOT_WIDE },
1156
1157 /* Halfwidth */
1158 { 0x20A9, NOT_WIDE },
1159 { 0xFF61, NOT_WIDE },
1160 { 0xFF69, NOT_WIDE },
1161 { 0xFFEE, NOT_WIDE },
1162
1163 /* Ambiguous */
1164 { 0x00A1, WIDE_CJK },
1165 { 0x00BE, WIDE_CJK },
1166 { 0x02DD, WIDE_CJK },
1167 { 0x2020, WIDE_CJK },
1168 { 0xFFFD, WIDE_CJK },
1169 { 0x00A1, WIDE_CJK },
1170 { 0x1F100, WIDE_CJK },
1171 { 0xE0100, WIDE_CJK },
1172 { 0x100000, WIDE_CJK },
1173 { 0x10FFFD, WIDE_CJK },
1174
1175 /* Fullwidth */
1176 { 0x3000, WIDE },
1177 { 0xFF60, WIDE },
1178
1179 /* Wide */
1180 { 0x2329, WIDE },
1181 { 0x3001, WIDE },
1182 { 0xFE69, WIDE },
1183 { 0x30000, WIDE },
1184 { 0x3FFFD, WIDE },
1185
1186 /* Default Wide blocks */
1187 { 0x4DBF, WIDE },
1188 { 0x9FFF, WIDE },
1189 { 0xFAFF, WIDE },
1190 { 0x2A6DF, WIDE },
1191 { 0x2B73F, WIDE },
1192 { 0x2B81F, WIDE },
1193 { 0x2FA1F, WIDE },
1194
1195 /* Uniode-5.2 character additions */
1196 /* Wide */
1197 { 0x115F, WIDE },
1198
1199 /* Uniode-6.0 character additions */
1200 /* Wide */
1201 { 0x2B740, WIDE },
1202 { 0x1B000, WIDE },
1203
1204 { 0x111111, NOT_WIDE }
1205 };
1206
1207 for (i = 0; i < G_N_ELEMENTS (examples); i++)
1208 {
1209 g_assert_cmpint (g_unichar_iswide (examples[i].c), ==,
1210 (examples[i].wide == WIDE));
1211 g_assert_cmpint (g_unichar_iswide_cjk (examples[i].c), ==,
1212 (examples[i].wide != NOT_WIDE));
1213 }
1214};
1215
1216/* Test that g_unichar_compose() returns the correct value for various
1217 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1218static void
1219test_compose (void)
1220{
1221 gunichar ch;
1222
1223 /* Not composable */
1224 g_assert_false (g_unichar_compose (0x0041, 0x0042, &ch) && ch == 0);
1225 g_assert_false (g_unichar_compose (0x0041, 0, &ch) && ch == 0);
1226 g_assert_false (g_unichar_compose (0x0066, 0x0069, &ch) && ch == 0);
1227
1228 /* Tricky non-composable */
1229 g_assert_false (g_unichar_compose (0x0308, 0x0301, &ch) && ch == 0); /* !0x0344 */
1230 g_assert_false (g_unichar_compose (0x0F71, 0x0F72, &ch) && ch == 0); /* !0x0F73 */
1231
1232 /* Singletons should not compose */
1233 g_assert_false (g_unichar_compose (0x212B, 0, &ch) && ch == 0);
1234 g_assert_false (g_unichar_compose (0x00C5, 0, &ch) && ch == 0);
1235 g_assert_false (g_unichar_compose (0x2126, 0, &ch) && ch == 0);
1236 g_assert_false (g_unichar_compose (0x03A9, 0, &ch) && ch == 0);
1237
1238 /* Pairs */
1239 g_assert_true (g_unichar_compose (0x0041, 0x030A, &ch) && ch == 0x00C5);
1240 g_assert_true (g_unichar_compose (0x006F, 0x0302, &ch) && ch == 0x00F4);
1241 g_assert_true (g_unichar_compose (0x1E63, 0x0307, &ch) && ch == 0x1E69);
1242 g_assert_true (g_unichar_compose (0x0073, 0x0323, &ch) && ch == 0x1E63);
1243 g_assert_true (g_unichar_compose (0x0064, 0x0307, &ch) && ch == 0x1E0B);
1244 g_assert_true (g_unichar_compose (0x0064, 0x0323, &ch) && ch == 0x1E0D);
1245
1246 /* Hangul */
1247 g_assert_true (g_unichar_compose (0xD4CC, 0x11B6, &ch) && ch == 0xD4DB);
1248 g_assert_true (g_unichar_compose (0x1111, 0x1171, &ch) && ch == 0xD4CC);
1249 g_assert_true (g_unichar_compose (0xCE20, 0x11B8, &ch) && ch == 0xCE31);
1250 g_assert_true (g_unichar_compose (0x110E, 0x1173, &ch) && ch == 0xCE20);
1251}
1252
1253/* Test that g_unichar_decompose() returns the correct value for various
1254 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1255static void
1256test_decompose (void)
1257{
1258 gunichar a, b;
1259
1260 /* Not decomposable */
1261 g_assert_false (g_unichar_decompose (0x0041, &a, &b) && a == 0x0041 && b == 0);
1262 g_assert_false (g_unichar_decompose (0xFB01, &a, &b) && a == 0xFB01 && b == 0);
1263
1264 /* Singletons */
1265 g_assert_true (g_unichar_decompose (0x212B, &a, &b) && a == 0x00C5 && b == 0);
1266 g_assert_true (g_unichar_decompose (0x2126, &a, &b) && a == 0x03A9 && b == 0);
1267
1268 /* Tricky pairs */
1269 g_assert_true (g_unichar_decompose (0x0344, &a, &b) && a == 0x0308 && b == 0x0301);
1270 g_assert_true (g_unichar_decompose (0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72);
1271
1272 /* Pairs */
1273 g_assert_true (g_unichar_decompose (0x00C5, &a, &b) && a == 0x0041 && b == 0x030A);
1274 g_assert_true (g_unichar_decompose (0x00F4, &a, &b) && a == 0x006F && b == 0x0302);
1275 g_assert_true (g_unichar_decompose (0x1E69, &a, &b) && a == 0x1E63 && b == 0x0307);
1276 g_assert_true (g_unichar_decompose (0x1E63, &a, &b) && a == 0x0073 && b == 0x0323);
1277 g_assert_true (g_unichar_decompose (0x1E0B, &a, &b) && a == 0x0064 && b == 0x0307);
1278 g_assert_true (g_unichar_decompose (0x1E0D, &a, &b) && a == 0x0064 && b == 0x0323);
1279
1280 /* Hangul */
1281 g_assert_true (g_unichar_decompose (0xD4DB, &a, &b) && a == 0xD4CC && b == 0x11B6);
1282 g_assert_true (g_unichar_decompose (0xD4CC, &a, &b) && a == 0x1111 && b == 0x1171);
1283 g_assert_true (g_unichar_decompose (0xCE31, &a, &b) && a == 0xCE20 && b == 0x11B8);
1284 g_assert_true (g_unichar_decompose (0xCE20, &a, &b) && a == 0x110E && b == 0x1173);
1285}
1286
1287/* Test that g_unichar_fully_decompose() returns the correct value for
1288 * various ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1289static void
1290test_fully_decompose_canonical (void)
1291{
1292 gunichar decomp[5];
1293 gsize len;
1294
1295#define TEST_DECOMP(ch, expected_len, a, b, c, d) \
1296 len = g_unichar_fully_decompose (ch, FALSE, decomp, G_N_ELEMENTS (decomp)); \
1297 g_assert_cmpint (expected_len, ==, len); \
1298 if (expected_len >= 1) g_assert_cmphex (decomp[0], ==, a); \
1299 if (expected_len >= 2) g_assert_cmphex (decomp[1], ==, b); \
1300 if (expected_len >= 3) g_assert_cmphex (decomp[2], ==, c); \
1301 if (expected_len >= 4) g_assert_cmphex (decomp[3], ==, d); \
1302
1303#define TEST0(ch) TEST_DECOMP (ch, 1, ch, 0, 0, 0)
1304#define TEST1(ch, a) TEST_DECOMP (ch, 1, a, 0, 0, 0)
1305#define TEST2(ch, a, b) TEST_DECOMP (ch, 2, a, b, 0, 0)
1306#define TEST3(ch, a, b, c) TEST_DECOMP (ch, 3, a, b, c, 0)
1307#define TEST4(ch, a, b, c, d) TEST_DECOMP (ch, 4, a, b, c, d)
1308
1309 /* Not decomposable */
1310 TEST0 (0x0041);
1311 TEST0 (0xFB01);
1312
1313 /* Singletons */
1314 TEST2 (0x212B, 0x0041, 0x030A);
1315 TEST1 (0x2126, 0x03A9);
1316
1317 /* Tricky pairs */
1318 TEST2 (0x0344, 0x0308, 0x0301);
1319 TEST2 (0x0F73, 0x0F71, 0x0F72);
1320
1321 /* General */
1322 TEST2 (0x00C5, 0x0041, 0x030A);
1323 TEST2 (0x00F4, 0x006F, 0x0302);
1324 TEST3 (0x1E69, 0x0073, 0x0323, 0x0307);
1325 TEST2 (0x1E63, 0x0073, 0x0323);
1326 TEST2 (0x1E0B, 0x0064, 0x0307);
1327 TEST2 (0x1E0D, 0x0064, 0x0323);
1328
1329 /* Hangul */
1330 TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6);
1331 TEST2 (0xD4CC, 0x1111, 0x1171);
1332 TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8);
1333 TEST2 (0xCE20, 0x110E, 0x1173);
1334
1335#undef TEST_DECOMP
1336}
1337
1338/* Test that g_unicode_canonical_decomposition() returns the correct
1339 * value for various ASCII and Unicode alphabetic, numeric, and other,
1340 * codepoints. */
1341static void
1342test_canonical_decomposition (void)
1343{
1344 gunichar *decomp;
1345 gsize len;
1346
1347#define TEST_DECOMP(ch, expected_len, a, b, c, d) \
1348 decomp = g_unicode_canonical_decomposition (ch, &len); \
1349 g_assert_cmpint (expected_len, ==, len); \
1350 if (expected_len >= 1) g_assert_cmphex (decomp[0], ==, a); \
1351 if (expected_len >= 2) g_assert_cmphex (decomp[1], ==, b); \
1352 if (expected_len >= 3) g_assert_cmphex (decomp[2], ==, c); \
1353 if (expected_len >= 4) g_assert_cmphex (decomp[3], ==, d); \
1354 g_free (decomp);
1355
1356#define TEST0(ch) TEST_DECOMP (ch, 1, ch, 0, 0, 0)
1357#define TEST1(ch, a) TEST_DECOMP (ch, 1, a, 0, 0, 0)
1358#define TEST2(ch, a, b) TEST_DECOMP (ch, 2, a, b, 0, 0)
1359#define TEST3(ch, a, b, c) TEST_DECOMP (ch, 3, a, b, c, 0)
1360#define TEST4(ch, a, b, c, d) TEST_DECOMP (ch, 4, a, b, c, d)
1361
1362 /* Not decomposable */
1363 TEST0 (0x0041);
1364 TEST0 (0xFB01);
1365
1366 /* Singletons */
1367 TEST2 (0x212B, 0x0041, 0x030A);
1368 TEST1 (0x2126, 0x03A9);
1369
1370 /* Tricky pairs */
1371 TEST2 (0x0344, 0x0308, 0x0301);
1372 TEST2 (0x0F73, 0x0F71, 0x0F72);
1373
1374 /* General */
1375 TEST2 (0x00C5, 0x0041, 0x030A);
1376 TEST2 (0x00F4, 0x006F, 0x0302);
1377 TEST3 (0x1E69, 0x0073, 0x0323, 0x0307);
1378 TEST2 (0x1E63, 0x0073, 0x0323);
1379 TEST2 (0x1E0B, 0x0064, 0x0307);
1380 TEST2 (0x1E0D, 0x0064, 0x0323);
1381
1382 /* Hangul */
1383 TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6);
1384 TEST2 (0xD4CC, 0x1111, 0x1171);
1385 TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8);
1386 TEST2 (0xCE20, 0x110E, 0x1173);
1387
1388#undef TEST_DECOMP
1389}
1390
1391/* Test that g_unichar_decompose() whenever encouttering a char ch
1392 * decomposes into a and b, b itself won't decompose any further. */
1393static void
1394test_decompose_tail (void)
1395{
1396 gunichar ch, a, b, c, d;
1397
1398 /* Test that whenever a char ch decomposes into a and b, b itself
1399 * won't decompose any further. */
1400
1401 for (ch = 0; ch < 0x110000; ch++)
1402 if (g_unichar_decompose (ch, a: &a, b: &b))
1403 g_assert_false (g_unichar_decompose (b, &c, &d));
1404 else
1405 {
1406 g_assert_cmpuint (a, ==, ch);
1407 g_assert_cmpuint (b, ==, 0);
1408 }
1409}
1410
1411/* Test that all canonical decompositions of g_unichar_fully_decompose()
1412 * are at most 4 in length, and compatibility decompositions are
1413 * at most 18 in length. */
1414static void
1415test_fully_decompose_len (void)
1416{
1417 gunichar ch;
1418
1419 /* Test that all canonical decompositions are at most 4 in length,
1420 * and compatibility decompositions are at most 18 in length.
1421 */
1422
1423 for (ch = 0; ch < 0x110000; ch++) {
1424 g_assert_cmpint (g_unichar_fully_decompose (ch, FALSE, NULL, 0), <=, 4);
1425 g_assert_cmpint (g_unichar_fully_decompose (ch, TRUE, NULL, 0), <=, 18);
1426 }
1427}
1428
1429/* Test that g_unichar_decompose() returns the correct value for various
1430 * ASCII and Unicode alphabetic, numeric, and other, codepoints. */
1431static void
1432test_iso15924 (void)
1433{
1434 const struct {
1435 GUnicodeScript script;
1436 char four_letter_code[5];
1437 } data[] = {
1438 { G_UNICODE_SCRIPT_COMMON, "Zyyy" },
1439 { G_UNICODE_SCRIPT_INHERITED, "Zinh" },
1440 { G_UNICODE_SCRIPT_ARABIC, "Arab" },
1441 { G_UNICODE_SCRIPT_ARMENIAN, "Armn" },
1442 { G_UNICODE_SCRIPT_BENGALI, "Beng" },
1443 { G_UNICODE_SCRIPT_BOPOMOFO, "Bopo" },
1444 { G_UNICODE_SCRIPT_CHEROKEE, "Cher" },
1445 { G_UNICODE_SCRIPT_COPTIC, "Copt" },
1446 { G_UNICODE_SCRIPT_CYRILLIC, "Cyrl" },
1447 { G_UNICODE_SCRIPT_DESERET, "Dsrt" },
1448 { G_UNICODE_SCRIPT_DEVANAGARI, "Deva" },
1449 { G_UNICODE_SCRIPT_ETHIOPIC, "Ethi" },
1450 { G_UNICODE_SCRIPT_GEORGIAN, "Geor" },
1451 { G_UNICODE_SCRIPT_GOTHIC, "Goth" },
1452 { G_UNICODE_SCRIPT_GREEK, "Grek" },
1453 { G_UNICODE_SCRIPT_GUJARATI, "Gujr" },
1454 { G_UNICODE_SCRIPT_GURMUKHI, "Guru" },
1455 { G_UNICODE_SCRIPT_HAN, "Hani" },
1456 { G_UNICODE_SCRIPT_HANGUL, "Hang" },
1457 { G_UNICODE_SCRIPT_HEBREW, "Hebr" },
1458 { G_UNICODE_SCRIPT_HIRAGANA, "Hira" },
1459 { G_UNICODE_SCRIPT_KANNADA, "Knda" },
1460 { G_UNICODE_SCRIPT_KATAKANA, "Kana" },
1461 { G_UNICODE_SCRIPT_KHMER, "Khmr" },
1462 { G_UNICODE_SCRIPT_LAO, "Laoo" },
1463 { G_UNICODE_SCRIPT_LATIN, "Latn" },
1464 { G_UNICODE_SCRIPT_MALAYALAM, "Mlym" },
1465 { G_UNICODE_SCRIPT_MONGOLIAN, "Mong" },
1466 { G_UNICODE_SCRIPT_MYANMAR, "Mymr" },
1467 { G_UNICODE_SCRIPT_OGHAM, "Ogam" },
1468 { G_UNICODE_SCRIPT_OLD_ITALIC, "Ital" },
1469 { G_UNICODE_SCRIPT_ORIYA, "Orya" },
1470 { G_UNICODE_SCRIPT_RUNIC, "Runr" },
1471 { G_UNICODE_SCRIPT_SINHALA, "Sinh" },
1472 { G_UNICODE_SCRIPT_SYRIAC, "Syrc" },
1473 { G_UNICODE_SCRIPT_TAMIL, "Taml" },
1474 { G_UNICODE_SCRIPT_TELUGU, "Telu" },
1475 { G_UNICODE_SCRIPT_THAANA, "Thaa" },
1476 { G_UNICODE_SCRIPT_THAI, "Thai" },
1477 { G_UNICODE_SCRIPT_TIBETAN, "Tibt" },
1478 { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, "Cans" },
1479 { G_UNICODE_SCRIPT_YI, "Yiii" },
1480 { G_UNICODE_SCRIPT_TAGALOG, "Tglg" },
1481 { G_UNICODE_SCRIPT_HANUNOO, "Hano" },
1482 { G_UNICODE_SCRIPT_BUHID, "Buhd" },
1483 { G_UNICODE_SCRIPT_TAGBANWA, "Tagb" },
1484
1485 /* Unicode-4.0 additions */
1486 { G_UNICODE_SCRIPT_BRAILLE, "Brai" },
1487 { G_UNICODE_SCRIPT_CYPRIOT, "Cprt" },
1488 { G_UNICODE_SCRIPT_LIMBU, "Limb" },
1489 { G_UNICODE_SCRIPT_OSMANYA, "Osma" },
1490 { G_UNICODE_SCRIPT_SHAVIAN, "Shaw" },
1491 { G_UNICODE_SCRIPT_LINEAR_B, "Linb" },
1492 { G_UNICODE_SCRIPT_TAI_LE, "Tale" },
1493 { G_UNICODE_SCRIPT_UGARITIC, "Ugar" },
1494
1495 /* Unicode-4.1 additions */
1496 { G_UNICODE_SCRIPT_NEW_TAI_LUE, "Talu" },
1497 { G_UNICODE_SCRIPT_BUGINESE, "Bugi" },
1498 { G_UNICODE_SCRIPT_GLAGOLITIC, "Glag" },
1499 { G_UNICODE_SCRIPT_TIFINAGH, "Tfng" },
1500 { G_UNICODE_SCRIPT_SYLOTI_NAGRI, "Sylo" },
1501 { G_UNICODE_SCRIPT_OLD_PERSIAN, "Xpeo" },
1502 { G_UNICODE_SCRIPT_KHAROSHTHI, "Khar" },
1503
1504 /* Unicode-5.0 additions */
1505 { G_UNICODE_SCRIPT_UNKNOWN, "Zzzz" },
1506 { G_UNICODE_SCRIPT_BALINESE, "Bali" },
1507 { G_UNICODE_SCRIPT_CUNEIFORM, "Xsux" },
1508 { G_UNICODE_SCRIPT_PHOENICIAN, "Phnx" },
1509 { G_UNICODE_SCRIPT_PHAGS_PA, "Phag" },
1510 { G_UNICODE_SCRIPT_NKO, "Nkoo" },
1511
1512 /* Unicode-5.1 additions */
1513 { G_UNICODE_SCRIPT_KAYAH_LI, "Kali" },
1514 { G_UNICODE_SCRIPT_LEPCHA, "Lepc" },
1515 { G_UNICODE_SCRIPT_REJANG, "Rjng" },
1516 { G_UNICODE_SCRIPT_SUNDANESE, "Sund" },
1517 { G_UNICODE_SCRIPT_SAURASHTRA, "Saur" },
1518 { G_UNICODE_SCRIPT_CHAM, "Cham" },
1519 { G_UNICODE_SCRIPT_OL_CHIKI, "Olck" },
1520 { G_UNICODE_SCRIPT_VAI, "Vaii" },
1521 { G_UNICODE_SCRIPT_CARIAN, "Cari" },
1522 { G_UNICODE_SCRIPT_LYCIAN, "Lyci" },
1523 { G_UNICODE_SCRIPT_LYDIAN, "Lydi" },
1524
1525 /* Unicode-5.2 additions */
1526 { G_UNICODE_SCRIPT_AVESTAN, "Avst" },
1527 { G_UNICODE_SCRIPT_BAMUM, "Bamu" },
1528 { G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, "Egyp" },
1529 { G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, "Armi" },
1530 { G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, "Phli" },
1531 { G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, "Prti" },
1532 { G_UNICODE_SCRIPT_JAVANESE, "Java" },
1533 { G_UNICODE_SCRIPT_KAITHI, "Kthi" },
1534 { G_UNICODE_SCRIPT_LISU, "Lisu" },
1535 { G_UNICODE_SCRIPT_MEETEI_MAYEK, "Mtei" },
1536 { G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, "Sarb" },
1537 { G_UNICODE_SCRIPT_OLD_TURKIC, "Orkh" },
1538 { G_UNICODE_SCRIPT_SAMARITAN, "Samr" },
1539 { G_UNICODE_SCRIPT_TAI_THAM, "Lana" },
1540 { G_UNICODE_SCRIPT_TAI_VIET, "Tavt" },
1541
1542 /* Unicode-6.0 additions */
1543 { G_UNICODE_SCRIPT_BATAK, "Batk" },
1544 { G_UNICODE_SCRIPT_BRAHMI, "Brah" },
1545 { G_UNICODE_SCRIPT_MANDAIC, "Mand" },
1546
1547 /* Unicode-6.1 additions */
1548 { G_UNICODE_SCRIPT_CHAKMA, "Cakm" },
1549 { G_UNICODE_SCRIPT_MEROITIC_CURSIVE, "Merc" },
1550 { G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, "Mero" },
1551 { G_UNICODE_SCRIPT_MIAO, "Plrd" },
1552 { G_UNICODE_SCRIPT_SHARADA, "Shrd" },
1553 { G_UNICODE_SCRIPT_SORA_SOMPENG, "Sora" },
1554 { G_UNICODE_SCRIPT_TAKRI, "Takr" },
1555
1556 /* Unicode 7.0 additions */
1557 { G_UNICODE_SCRIPT_BASSA_VAH, "Bass" },
1558 { G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, "Aghb" },
1559 { G_UNICODE_SCRIPT_DUPLOYAN, "Dupl" },
1560 { G_UNICODE_SCRIPT_ELBASAN, "Elba" },
1561 { G_UNICODE_SCRIPT_GRANTHA, "Gran" },
1562 { G_UNICODE_SCRIPT_KHOJKI, "Khoj" },
1563 { G_UNICODE_SCRIPT_KHUDAWADI, "Sind" },
1564 { G_UNICODE_SCRIPT_LINEAR_A, "Lina" },
1565 { G_UNICODE_SCRIPT_MAHAJANI, "Mahj" },
1566 { G_UNICODE_SCRIPT_MANICHAEAN, "Mani" },
1567 { G_UNICODE_SCRIPT_MENDE_KIKAKUI, "Mend" },
1568 { G_UNICODE_SCRIPT_MODI, "Modi" },
1569 { G_UNICODE_SCRIPT_MRO, "Mroo" },
1570 { G_UNICODE_SCRIPT_NABATAEAN, "Nbat" },
1571 { G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, "Narb" },
1572 { G_UNICODE_SCRIPT_OLD_PERMIC, "Perm" },
1573 { G_UNICODE_SCRIPT_PAHAWH_HMONG, "Hmng" },
1574 { G_UNICODE_SCRIPT_PALMYRENE, "Palm" },
1575 { G_UNICODE_SCRIPT_PAU_CIN_HAU, "Pauc" },
1576 { G_UNICODE_SCRIPT_PSALTER_PAHLAVI, "Phlp" },
1577 { G_UNICODE_SCRIPT_SIDDHAM, "Sidd" },
1578 { G_UNICODE_SCRIPT_TIRHUTA, "Tirh" },
1579 { G_UNICODE_SCRIPT_WARANG_CITI, "Wara" },
1580
1581 /* Unicode 8.0 additions */
1582 { G_UNICODE_SCRIPT_AHOM, "Ahom" },
1583 { G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS, "Hluw" },
1584 { G_UNICODE_SCRIPT_HATRAN, "Hatr" },
1585 { G_UNICODE_SCRIPT_MULTANI, "Mult" },
1586 { G_UNICODE_SCRIPT_OLD_HUNGARIAN, "Hung" },
1587 { G_UNICODE_SCRIPT_SIGNWRITING, "Sgnw" },
1588
1589 /* Unicode 9.0 additions */
1590 { G_UNICODE_SCRIPT_ADLAM, "Adlm" },
1591 { G_UNICODE_SCRIPT_BHAIKSUKI, "Bhks" },
1592 { G_UNICODE_SCRIPT_MARCHEN, "Marc" },
1593 { G_UNICODE_SCRIPT_NEWA, "Newa" },
1594 { G_UNICODE_SCRIPT_OSAGE, "Osge" },
1595 { G_UNICODE_SCRIPT_TANGUT, "Tang" },
1596
1597 /* Unicode 10.0 additions */
1598 { G_UNICODE_SCRIPT_MASARAM_GONDI, "Gonm" },
1599 { G_UNICODE_SCRIPT_NUSHU, "Nshu" },
1600 { G_UNICODE_SCRIPT_SOYOMBO, "Soyo" },
1601 { G_UNICODE_SCRIPT_ZANABAZAR_SQUARE, "Zanb" },
1602
1603 /* Unicode 11.0 additions */
1604 { G_UNICODE_SCRIPT_DOGRA, "Dogr" },
1605 { G_UNICODE_SCRIPT_GUNJALA_GONDI, "Gong" },
1606 { G_UNICODE_SCRIPT_HANIFI_ROHINGYA, "Rohg" },
1607 { G_UNICODE_SCRIPT_MAKASAR, "Maka" },
1608 { G_UNICODE_SCRIPT_MEDEFAIDRIN, "Medf" },
1609 { G_UNICODE_SCRIPT_OLD_SOGDIAN, "Sogo" },
1610 { G_UNICODE_SCRIPT_SOGDIAN, "Sogd" },
1611
1612 /* Unicode 12.0 additions */
1613 { G_UNICODE_SCRIPT_ELYMAIC, "Elym" },
1614 { G_UNICODE_SCRIPT_NANDINAGARI, "Nand" },
1615 { G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, "Hmnp" },
1616 { G_UNICODE_SCRIPT_WANCHO, "Wcho" },
1617
1618 /* Unicode 13.0 additions */
1619 { G_UNICODE_SCRIPT_CHORASMIAN, "Chrs" },
1620 { G_UNICODE_SCRIPT_DIVES_AKURU, "Diak" },
1621 { G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT, "Kits" },
1622 { G_UNICODE_SCRIPT_YEZIDI, "Yezi" },
1623 };
1624 guint i;
1625
1626 g_assert_cmphex (0, ==,
1627 g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_INVALID_CODE));
1628 g_assert_cmphex (0x5A7A7A7A, ==, g_unicode_script_to_iso15924 (1000));
1629 g_assert_cmphex (0x41726162, ==,
1630 g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_ARABIC));
1631
1632 g_assert_cmphex (G_UNICODE_SCRIPT_INVALID_CODE, ==,
1633 g_unicode_script_from_iso15924 (0));
1634 g_assert_cmphex (G_UNICODE_SCRIPT_UNKNOWN, ==,
1635 g_unicode_script_from_iso15924 (0x12345678));
1636
1637#define PACK(a,b,c,d) \
1638 ((guint32)((((guint8)(a))<<24)|(((guint8)(b))<<16)|(((guint8)(c))<<8)|((guint8)(d))))
1639
1640 for (i = 0; i < G_N_ELEMENTS (data); i++)
1641 {
1642 guint32 code = PACK (data[i].four_letter_code[0],
1643 data[i].four_letter_code[1],
1644 data[i].four_letter_code[2],
1645 data[i].four_letter_code[3]);
1646
1647 g_assert_cmphex (g_unicode_script_to_iso15924 (data[i].script), ==, code);
1648 g_assert_cmpint (g_unicode_script_from_iso15924 (code), ==, data[i].script);
1649 }
1650
1651#undef PACK
1652}
1653
1654int
1655main (int argc,
1656 char *argv[])
1657{
1658 g_test_init (argc: &argc, argv: &argv, NULL);
1659
1660 g_test_add_func (testpath: "/unicode/alnum", test_func: test_alnum);
1661 g_test_add_func (testpath: "/unicode/alpha", test_func: test_alpha);
1662 g_test_add_func (testpath: "/unicode/break-type", test_func: test_unichar_break_type);
1663 g_test_add_func (testpath: "/unicode/canonical-decomposition", test_func: test_canonical_decomposition);
1664 g_test_add_func (testpath: "/unicode/casefold", test_func: test_casefold);
1665 g_test_add_func (testpath: "/unicode/cases", test_func: test_cases);
1666 g_test_add_func (testpath: "/unicode/character-type", test_func: test_unichar_character_type);
1667 g_test_add_func (testpath: "/unicode/cntrl", test_func: test_cntrl);
1668 g_test_add_func (testpath: "/unicode/combining-class", test_func: test_combining_class);
1669 g_test_add_func (testpath: "/unicode/compose", test_func: test_compose);
1670 g_test_add_func (testpath: "/unicode/decompose", test_func: test_decompose);
1671 g_test_add_func (testpath: "/unicode/decompose-tail", test_func: test_decompose_tail);
1672 g_test_add_func (testpath: "/unicode/defined", test_func: test_defined);
1673 g_test_add_func (testpath: "/unicode/digit", test_func: test_digit);
1674 g_test_add_func (testpath: "/unicode/digit-value", test_func: test_digit_value);
1675 g_test_add_func (testpath: "/unicode/fully-decompose-canonical", test_func: test_fully_decompose_canonical);
1676 g_test_add_func (testpath: "/unicode/fully-decompose-len", test_func: test_fully_decompose_len);
1677 g_test_add_func (testpath: "/unicode/graph", test_func: test_graph);
1678 g_test_add_func (testpath: "/unicode/iso15924", test_func: test_iso15924);
1679 g_test_add_func (testpath: "/unicode/lower", test_func: test_lower);
1680 g_test_add_func (testpath: "/unicode/mark", test_func: test_mark);
1681 g_test_add_func (testpath: "/unicode/mirror", test_func: test_mirror);
1682 g_test_add_func (testpath: "/unicode/print", test_func: test_print);
1683 g_test_add_func (testpath: "/unicode/punctuation", test_func: test_punctuation);
1684 g_test_add_func (testpath: "/unicode/script", test_func: test_unichar_script);
1685 g_test_add_func (testpath: "/unicode/space", test_func: test_space);
1686 g_test_add_func (testpath: "/unicode/strdown", test_func: test_strdown);
1687 g_test_add_func (testpath: "/unicode/strup", test_func: test_strup);
1688 g_test_add_func (testpath: "/unicode/turkish-strupdown", test_func: test_turkish_strupdown);
1689 g_test_add_func (testpath: "/unicode/title", test_func: test_title);
1690 g_test_add_func (testpath: "/unicode/upper", test_func: test_upper);
1691 g_test_add_func (testpath: "/unicode/validate", test_func: test_unichar_validate);
1692 g_test_add_func (testpath: "/unicode/wide", test_func: test_wide);
1693 g_test_add_func (testpath: "/unicode/xdigit", test_func: test_xdigit);
1694 g_test_add_func (testpath: "/unicode/xdigit-value", test_func: test_xdigit_value);
1695 g_test_add_func (testpath: "/unicode/zero-width", test_func: test_zerowidth);
1696
1697 return g_test_run();
1698}
1699

source code of gtk/subprojects/glib/glib/tests/unicode.c