1 | /* Unit tests for utilities |
2 | * Copyright (C) 2010 Red Hat, Inc. |
3 | * Copyright (C) 2011 Google, Inc. |
4 | * |
5 | * This work is provided "as is"; redistribution and modification |
6 | * in whole or in part, in any medium, physical or electronic is |
7 | * permitted without restriction. |
8 | * |
9 | * This work is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
12 | * |
13 | * In no event shall the authors or contributors be liable for any |
14 | * direct, indirect, incidental, special, exemplary, or consequential |
15 | * damages (including, but not limited to, procurement of substitute |
16 | * goods or services; loss of use, data, or profits; or business |
17 | * interruption) however caused and on any theory of liability, whether |
18 | * in contract, strict liability, or tort (including negligence or |
19 | * otherwise) arising in any way out of the use of this software, even |
20 | * if advised of the possibility of such damage. |
21 | * |
22 | * Author: Matthias Clasen, Behdad Esfahbod |
23 | */ |
24 | |
25 | /* We are testing some deprecated APIs here */ |
26 | #ifndef GLIB_DISABLE_DEPRECATION_WARNINGS |
27 | #define GLIB_DISABLE_DEPRECATION_WARNINGS |
28 | #endif |
29 | |
30 | #include <locale.h> |
31 | |
32 | #include "glib.h" |
33 | |
34 | #include "glib/gunidecomp.h" |
35 | |
36 | /* Test that g_unichar_validate() returns the correct value for various |
37 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
38 | static void |
39 | test_unichar_validate (void) |
40 | { |
41 | g_assert_true (g_unichar_validate ('j')); |
42 | g_assert_true (g_unichar_validate (8356)); |
43 | g_assert_true (g_unichar_validate (8356)); |
44 | g_assert_true (g_unichar_validate (0xFDD1)); |
45 | g_assert_true (g_unichar_validate (917760)); |
46 | g_assert_false (g_unichar_validate (0x110000)); |
47 | } |
48 | |
49 | /* Test that g_unichar_type() returns the correct value for various |
50 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
51 | static void |
52 | test_unichar_character_type (void) |
53 | { |
54 | guint i; |
55 | struct { |
56 | GUnicodeType type; |
57 | gunichar c; |
58 | } examples[] = { |
59 | { G_UNICODE_CONTROL, 0x000D }, |
60 | { G_UNICODE_FORMAT, 0x200E }, |
61 | /* G_UNICODE_UNASSIGNED */ |
62 | { G_UNICODE_PRIVATE_USE, 0xE000 }, |
63 | { G_UNICODE_SURROGATE, 0xD800 }, |
64 | { G_UNICODE_LOWERCASE_LETTER, 0x0061 }, |
65 | { G_UNICODE_MODIFIER_LETTER, 0x02B0 }, |
66 | { G_UNICODE_OTHER_LETTER, 0x3400 }, |
67 | { G_UNICODE_TITLECASE_LETTER, 0x01C5 }, |
68 | { G_UNICODE_UPPERCASE_LETTER, 0xFF21 }, |
69 | { G_UNICODE_SPACING_MARK, 0x0903 }, |
70 | { G_UNICODE_ENCLOSING_MARK, 0x20DD }, |
71 | { G_UNICODE_NON_SPACING_MARK, 0xA806 }, |
72 | { G_UNICODE_DECIMAL_NUMBER, 0xFF10 }, |
73 | { G_UNICODE_LETTER_NUMBER, 0x16EE }, |
74 | { G_UNICODE_OTHER_NUMBER, 0x17F0 }, |
75 | { G_UNICODE_CONNECT_PUNCTUATION, 0x005F }, |
76 | { G_UNICODE_DASH_PUNCTUATION, 0x058A }, |
77 | { G_UNICODE_CLOSE_PUNCTUATION, 0x0F3B }, |
78 | { G_UNICODE_FINAL_PUNCTUATION, 0x2019 }, |
79 | { G_UNICODE_INITIAL_PUNCTUATION, 0x2018 }, |
80 | { G_UNICODE_OTHER_PUNCTUATION, 0x2016 }, |
81 | { G_UNICODE_OPEN_PUNCTUATION, 0x0F3A }, |
82 | { G_UNICODE_CURRENCY_SYMBOL, 0x20A0 }, |
83 | { G_UNICODE_MODIFIER_SYMBOL, 0x309B }, |
84 | { G_UNICODE_MATH_SYMBOL, 0xFB29 }, |
85 | { G_UNICODE_OTHER_SYMBOL, 0x00A6 }, |
86 | { G_UNICODE_LINE_SEPARATOR, 0x2028 }, |
87 | { G_UNICODE_PARAGRAPH_SEPARATOR, 0x2029 }, |
88 | { G_UNICODE_SPACE_SEPARATOR, 0x202F }, |
89 | }; |
90 | |
91 | for (i = 0; i < G_N_ELEMENTS (examples); i++) |
92 | { |
93 | g_assert_cmpint (g_unichar_type (examples[i].c), ==, examples[i].type); |
94 | } |
95 | |
96 | /*** Testing TYPE() border cases ***/ |
97 | g_assert_cmpint (g_unichar_type (0x3FF5), ==, 0x07); |
98 | /* U+FFEFF Plane 15 Private Use */ |
99 | g_assert_cmpint (g_unichar_type (0xFFEFF), ==, 0x03); |
100 | /* U+E0001 Language Tag */ |
101 | g_assert_cmpint (g_unichar_type (0xE0001), ==, 0x01); |
102 | g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR), ==, 0x02); |
103 | g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR + 1), ==, 0x02); |
104 | g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1), ==, 0x02); |
105 | g_assert_cmpint (g_unichar_type (G_UNICODE_LAST_CHAR_PART1 + 1), ==, 0x02); |
106 | } |
107 | |
108 | /* Test that g_unichar_break_type() returns the correct value for various |
109 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
110 | static void |
111 | test_unichar_break_type (void) |
112 | { |
113 | guint i; |
114 | struct { |
115 | GUnicodeBreakType type; |
116 | gunichar c; |
117 | } examples[] = { |
118 | { G_UNICODE_BREAK_MANDATORY, 0x2028 }, |
119 | { G_UNICODE_BREAK_CARRIAGE_RETURN, 0x000D }, |
120 | { G_UNICODE_BREAK_LINE_FEED, 0x000A }, |
121 | { G_UNICODE_BREAK_COMBINING_MARK, 0x0300 }, |
122 | { G_UNICODE_BREAK_SURROGATE, 0xD800 }, |
123 | { G_UNICODE_BREAK_ZERO_WIDTH_SPACE, 0x200B }, |
124 | { G_UNICODE_BREAK_INSEPARABLE, 0x2024 }, |
125 | { G_UNICODE_BREAK_NON_BREAKING_GLUE, 0x00A0 }, |
126 | { G_UNICODE_BREAK_CONTINGENT, 0xFFFC }, |
127 | { G_UNICODE_BREAK_SPACE, 0x0020 }, |
128 | { G_UNICODE_BREAK_AFTER, 0x05BE }, |
129 | { G_UNICODE_BREAK_BEFORE, 0x02C8 }, |
130 | { G_UNICODE_BREAK_BEFORE_AND_AFTER, 0x2014 }, |
131 | { G_UNICODE_BREAK_HYPHEN, 0x002D }, |
132 | { G_UNICODE_BREAK_NON_STARTER, 0x17D6 }, |
133 | { G_UNICODE_BREAK_OPEN_PUNCTUATION, 0x0028 }, |
134 | { G_UNICODE_BREAK_CLOSE_PARANTHESIS, 0x0029 }, |
135 | { G_UNICODE_BREAK_CLOSE_PUNCTUATION, 0x007D }, |
136 | { G_UNICODE_BREAK_QUOTATION, 0x0022 }, |
137 | { G_UNICODE_BREAK_EXCLAMATION, 0x0021 }, |
138 | { G_UNICODE_BREAK_IDEOGRAPHIC, 0x2E80 }, |
139 | { G_UNICODE_BREAK_NUMERIC, 0x0030 }, |
140 | { G_UNICODE_BREAK_INFIX_SEPARATOR, 0x002C }, |
141 | { G_UNICODE_BREAK_SYMBOL, 0x002F }, |
142 | { G_UNICODE_BREAK_ALPHABETIC, 0x0023 }, |
143 | { G_UNICODE_BREAK_PREFIX, 0x0024 }, |
144 | { G_UNICODE_BREAK_POSTFIX, 0x0025 }, |
145 | { G_UNICODE_BREAK_COMPLEX_CONTEXT, 0x0E01 }, |
146 | { G_UNICODE_BREAK_AMBIGUOUS, 0x00F7 }, |
147 | { G_UNICODE_BREAK_UNKNOWN, 0xE000 }, |
148 | { G_UNICODE_BREAK_NEXT_LINE, 0x0085 }, |
149 | { G_UNICODE_BREAK_WORD_JOINER, 0x2060 }, |
150 | { G_UNICODE_BREAK_HANGUL_L_JAMO, 0x1100 }, |
151 | { G_UNICODE_BREAK_HANGUL_V_JAMO, 0x1160 }, |
152 | { G_UNICODE_BREAK_HANGUL_T_JAMO, 0x11A8 }, |
153 | { G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, 0xAC00 }, |
154 | { G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE, 0xAC01 }, |
155 | { G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER, 0x3041 }, |
156 | { G_UNICODE_BREAK_HEBREW_LETTER, 0x05D0 }, |
157 | { G_UNICODE_BREAK_REGIONAL_INDICATOR, 0x1F1F6 }, |
158 | { G_UNICODE_BREAK_EMOJI_BASE, 0x1F466 }, |
159 | { G_UNICODE_BREAK_EMOJI_MODIFIER, 0x1F3FB }, |
160 | { G_UNICODE_BREAK_ZERO_WIDTH_JOINER, 0x200D }, |
161 | }; |
162 | |
163 | for (i = 0; i < G_N_ELEMENTS (examples); i++) |
164 | { |
165 | g_assert_cmpint (g_unichar_break_type (examples[i].c), ==, examples[i].type); |
166 | } |
167 | } |
168 | |
169 | /* Test that g_unichar_get_script() returns the correct value for various |
170 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
171 | static void |
172 | test_unichar_script (void) |
173 | { |
174 | guint i; |
175 | struct { |
176 | GUnicodeScript script; |
177 | gunichar c; |
178 | } examples[] = { |
179 | { G_UNICODE_SCRIPT_COMMON, 0x002A }, |
180 | { G_UNICODE_SCRIPT_INHERITED, 0x1CED }, |
181 | { G_UNICODE_SCRIPT_INHERITED, 0x0670 }, |
182 | { G_UNICODE_SCRIPT_ARABIC, 0x060D }, |
183 | { G_UNICODE_SCRIPT_ARMENIAN, 0x0559 }, |
184 | { G_UNICODE_SCRIPT_BENGALI, 0x09CD }, |
185 | { G_UNICODE_SCRIPT_BOPOMOFO, 0x31B6 }, |
186 | { G_UNICODE_SCRIPT_CHEROKEE, 0x13A2 }, |
187 | { G_UNICODE_SCRIPT_COPTIC, 0x2CFD }, |
188 | { G_UNICODE_SCRIPT_CYRILLIC, 0x0482 }, |
189 | { G_UNICODE_SCRIPT_DESERET, 0x10401 }, |
190 | { G_UNICODE_SCRIPT_DEVANAGARI, 0x094D }, |
191 | { G_UNICODE_SCRIPT_ETHIOPIC, 0x1258 }, |
192 | { G_UNICODE_SCRIPT_GEORGIAN, 0x10FC }, |
193 | { G_UNICODE_SCRIPT_GOTHIC, 0x10341 }, |
194 | { G_UNICODE_SCRIPT_GREEK, 0x0375 }, |
195 | { G_UNICODE_SCRIPT_GUJARATI, 0x0A83 }, |
196 | { G_UNICODE_SCRIPT_GURMUKHI, 0x0A3C }, |
197 | { G_UNICODE_SCRIPT_HAN, 0x3005 }, |
198 | { G_UNICODE_SCRIPT_HANGUL, 0x1100 }, |
199 | { G_UNICODE_SCRIPT_HEBREW, 0x05BF }, |
200 | { G_UNICODE_SCRIPT_HIRAGANA, 0x309F }, |
201 | { G_UNICODE_SCRIPT_KANNADA, 0x0CBC }, |
202 | { G_UNICODE_SCRIPT_KATAKANA, 0x30FF }, |
203 | { G_UNICODE_SCRIPT_KHMER, 0x17DD }, |
204 | { G_UNICODE_SCRIPT_LAO, 0x0EDD }, |
205 | { G_UNICODE_SCRIPT_LATIN, 0x0061 }, |
206 | { G_UNICODE_SCRIPT_MALAYALAM, 0x0D3D }, |
207 | { G_UNICODE_SCRIPT_MONGOLIAN, 0x1843 }, |
208 | { G_UNICODE_SCRIPT_MYANMAR, 0x1031 }, |
209 | { G_UNICODE_SCRIPT_OGHAM, 0x169C }, |
210 | { G_UNICODE_SCRIPT_OLD_ITALIC, 0x10322 }, |
211 | { G_UNICODE_SCRIPT_ORIYA, 0x0B3C }, |
212 | { G_UNICODE_SCRIPT_RUNIC, 0x16EF }, |
213 | { G_UNICODE_SCRIPT_SINHALA, 0x0DBD }, |
214 | { G_UNICODE_SCRIPT_SYRIAC, 0x0711 }, |
215 | { G_UNICODE_SCRIPT_TAMIL, 0x0B82 }, |
216 | { G_UNICODE_SCRIPT_TELUGU, 0x0C03 }, |
217 | { G_UNICODE_SCRIPT_THAANA, 0x07B1 }, |
218 | { G_UNICODE_SCRIPT_THAI, 0x0E31 }, |
219 | { G_UNICODE_SCRIPT_TIBETAN, 0x0FD4 }, |
220 | { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1400 }, |
221 | { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, 0x1401 }, |
222 | { G_UNICODE_SCRIPT_YI, 0xA015 }, |
223 | { G_UNICODE_SCRIPT_TAGALOG, 0x1700 }, |
224 | { G_UNICODE_SCRIPT_HANUNOO, 0x1720 }, |
225 | { G_UNICODE_SCRIPT_BUHID, 0x1740 }, |
226 | { G_UNICODE_SCRIPT_TAGBANWA, 0x1760 }, |
227 | { G_UNICODE_SCRIPT_BRAILLE, 0x2800 }, |
228 | { G_UNICODE_SCRIPT_CYPRIOT, 0x10808 }, |
229 | { G_UNICODE_SCRIPT_LIMBU, 0x1932 }, |
230 | { G_UNICODE_SCRIPT_OSMANYA, 0x10480 }, |
231 | { G_UNICODE_SCRIPT_SHAVIAN, 0x10450 }, |
232 | { G_UNICODE_SCRIPT_LINEAR_B, 0x10000 }, |
233 | { G_UNICODE_SCRIPT_TAI_LE, 0x1950 }, |
234 | { G_UNICODE_SCRIPT_UGARITIC, 0x1039F }, |
235 | { G_UNICODE_SCRIPT_NEW_TAI_LUE, 0x1980 }, |
236 | { G_UNICODE_SCRIPT_BUGINESE, 0x1A1F }, |
237 | { G_UNICODE_SCRIPT_GLAGOLITIC, 0x2C00 }, |
238 | { G_UNICODE_SCRIPT_TIFINAGH, 0x2D6F }, |
239 | { G_UNICODE_SCRIPT_SYLOTI_NAGRI, 0xA800 }, |
240 | { G_UNICODE_SCRIPT_OLD_PERSIAN, 0x103D0 }, |
241 | { G_UNICODE_SCRIPT_KHAROSHTHI, 0x10A3F }, |
242 | { G_UNICODE_SCRIPT_UNKNOWN, 0x1111111 }, |
243 | { G_UNICODE_SCRIPT_BALINESE, 0x1B04 }, |
244 | { G_UNICODE_SCRIPT_CUNEIFORM, 0x12000 }, |
245 | { G_UNICODE_SCRIPT_PHOENICIAN, 0x10900 }, |
246 | { G_UNICODE_SCRIPT_PHAGS_PA, 0xA840 }, |
247 | { G_UNICODE_SCRIPT_NKO, 0x07C0 }, |
248 | { G_UNICODE_SCRIPT_KAYAH_LI, 0xA900 }, |
249 | { G_UNICODE_SCRIPT_LEPCHA, 0x1C00 }, |
250 | { G_UNICODE_SCRIPT_REJANG, 0xA930 }, |
251 | { G_UNICODE_SCRIPT_SUNDANESE, 0x1B80 }, |
252 | { G_UNICODE_SCRIPT_SAURASHTRA, 0xA880 }, |
253 | { G_UNICODE_SCRIPT_CHAM, 0xAA00 }, |
254 | { G_UNICODE_SCRIPT_OL_CHIKI, 0x1C50 }, |
255 | { G_UNICODE_SCRIPT_VAI, 0xA500 }, |
256 | { G_UNICODE_SCRIPT_CARIAN, 0x102A0 }, |
257 | { G_UNICODE_SCRIPT_LYCIAN, 0x10280 }, |
258 | { G_UNICODE_SCRIPT_LYDIAN, 0x1093F }, |
259 | { G_UNICODE_SCRIPT_AVESTAN, 0x10B00 }, |
260 | { G_UNICODE_SCRIPT_BAMUM, 0xA6A0 }, |
261 | { G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, 0x13000 }, |
262 | { G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, 0x10840 }, |
263 | { G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, 0x10B60 }, |
264 | { G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, 0x10B40 }, |
265 | { G_UNICODE_SCRIPT_JAVANESE, 0xA980 }, |
266 | { G_UNICODE_SCRIPT_KAITHI, 0x11082 }, |
267 | { G_UNICODE_SCRIPT_LISU, 0xA4D0 }, |
268 | { G_UNICODE_SCRIPT_MEETEI_MAYEK, 0xABE5 }, |
269 | { G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, 0x10A60 }, |
270 | { G_UNICODE_SCRIPT_OLD_TURKIC, 0x10C00 }, |
271 | { G_UNICODE_SCRIPT_SAMARITAN, 0x0800 }, |
272 | { G_UNICODE_SCRIPT_TAI_THAM, 0x1A20 }, |
273 | { G_UNICODE_SCRIPT_TAI_VIET, 0xAA80 }, |
274 | { G_UNICODE_SCRIPT_BATAK, 0x1BC0 }, |
275 | { G_UNICODE_SCRIPT_BRAHMI, 0x11000 }, |
276 | { G_UNICODE_SCRIPT_MANDAIC, 0x0840 }, |
277 | { G_UNICODE_SCRIPT_CHAKMA, 0x11100 }, |
278 | { G_UNICODE_SCRIPT_MEROITIC_CURSIVE, 0x109A0 }, |
279 | { G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, 0x10980 }, |
280 | { G_UNICODE_SCRIPT_MIAO, 0x16F00 }, |
281 | { G_UNICODE_SCRIPT_SHARADA, 0x11180 }, |
282 | { G_UNICODE_SCRIPT_SORA_SOMPENG, 0x110D0 }, |
283 | { G_UNICODE_SCRIPT_TAKRI, 0x11680 }, |
284 | { G_UNICODE_SCRIPT_BASSA_VAH, 0x16AD0 }, |
285 | { G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, 0x10530 }, |
286 | { G_UNICODE_SCRIPT_DUPLOYAN, 0x1BC00 }, |
287 | { G_UNICODE_SCRIPT_ELBASAN, 0x10500 }, |
288 | { G_UNICODE_SCRIPT_GRANTHA, 0x11301 }, |
289 | { G_UNICODE_SCRIPT_KHOJKI, 0x11200 }, |
290 | { G_UNICODE_SCRIPT_KHUDAWADI, 0x112B0 }, |
291 | { G_UNICODE_SCRIPT_LINEAR_A, 0x10600 }, |
292 | { G_UNICODE_SCRIPT_MAHAJANI, 0x11150 }, |
293 | { G_UNICODE_SCRIPT_MANICHAEAN, 0x10AC0 }, |
294 | { G_UNICODE_SCRIPT_MENDE_KIKAKUI, 0x1E800 }, |
295 | { G_UNICODE_SCRIPT_MODI, 0x11600 }, |
296 | { G_UNICODE_SCRIPT_MRO, 0x16A40 }, |
297 | { G_UNICODE_SCRIPT_NABATAEAN, 0x10880 }, |
298 | { G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, 0x10A80 }, |
299 | { G_UNICODE_SCRIPT_OLD_PERMIC, 0x10350 }, |
300 | { G_UNICODE_SCRIPT_PAHAWH_HMONG, 0x16B00 }, |
301 | { G_UNICODE_SCRIPT_PALMYRENE, 0x10860 }, |
302 | { G_UNICODE_SCRIPT_PAU_CIN_HAU, 0x11AC0 }, |
303 | { G_UNICODE_SCRIPT_PSALTER_PAHLAVI, 0x10B80 }, |
304 | { G_UNICODE_SCRIPT_SIDDHAM, 0x11580 }, |
305 | { G_UNICODE_SCRIPT_TIRHUTA, 0x11480 }, |
306 | { G_UNICODE_SCRIPT_WARANG_CITI, 0x118A0 }, |
307 | { G_UNICODE_SCRIPT_CHEROKEE, 0x0AB71 }, |
308 | { G_UNICODE_SCRIPT_HATRAN, 0x108E0 }, |
309 | { G_UNICODE_SCRIPT_OLD_HUNGARIAN, 0x10C80 }, |
310 | { G_UNICODE_SCRIPT_MULTANI, 0x11280 }, |
311 | { G_UNICODE_SCRIPT_AHOM, 0x11700 }, |
312 | { G_UNICODE_SCRIPT_CUNEIFORM, 0x12480 }, |
313 | { G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS, 0x14400 }, |
314 | { G_UNICODE_SCRIPT_SIGNWRITING, 0x1D800 }, |
315 | { G_UNICODE_SCRIPT_ADLAM, 0x1E900 }, |
316 | { G_UNICODE_SCRIPT_BHAIKSUKI, 0x11C00 }, |
317 | { G_UNICODE_SCRIPT_MARCHEN, 0x11C70 }, |
318 | { G_UNICODE_SCRIPT_NEWA, 0x11400 }, |
319 | { G_UNICODE_SCRIPT_OSAGE, 0x104B0 }, |
320 | { G_UNICODE_SCRIPT_TANGUT, 0x16FE0 }, |
321 | { G_UNICODE_SCRIPT_MASARAM_GONDI, 0x11D00 }, |
322 | { G_UNICODE_SCRIPT_NUSHU, 0x1B170 }, |
323 | { G_UNICODE_SCRIPT_SOYOMBO, 0x11A50 }, |
324 | { G_UNICODE_SCRIPT_ZANABAZAR_SQUARE, 0x11A00 }, |
325 | { G_UNICODE_SCRIPT_DOGRA, 0x11800 }, |
326 | { G_UNICODE_SCRIPT_GUNJALA_GONDI, 0x11D60 }, |
327 | { G_UNICODE_SCRIPT_HANIFI_ROHINGYA, 0x10D00 }, |
328 | { G_UNICODE_SCRIPT_MAKASAR, 0x11EE0 }, |
329 | { G_UNICODE_SCRIPT_MEDEFAIDRIN, 0x16E40 }, |
330 | { G_UNICODE_SCRIPT_OLD_SOGDIAN, 0x10F00 }, |
331 | { G_UNICODE_SCRIPT_SOGDIAN, 0x10F30 }, |
332 | { G_UNICODE_SCRIPT_ELYMAIC, 0x10FE0 }, |
333 | { G_UNICODE_SCRIPT_NANDINAGARI, 0x119A0 }, |
334 | { G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, 0x1E100 }, |
335 | { G_UNICODE_SCRIPT_WANCHO, 0x1E2C0 }, |
336 | { G_UNICODE_SCRIPT_CHORASMIAN, 0x10FB0 }, |
337 | { G_UNICODE_SCRIPT_DIVES_AKURU, 0x11900 }, |
338 | { G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT, 0x18B00 }, |
339 | { G_UNICODE_SCRIPT_YEZIDI, 0x10E80 }, |
340 | }; |
341 | for (i = 0; i < G_N_ELEMENTS (examples); i++) |
342 | g_assert_cmpint (g_unichar_get_script (examples[i].c), ==, examples[i].script); |
343 | } |
344 | |
345 | /* Test that g_unichar_combining_class() returns the correct value for |
346 | * various ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
347 | static void |
348 | test_combining_class (void) |
349 | { |
350 | guint i; |
351 | struct { |
352 | gint class; |
353 | gunichar c; |
354 | } examples[] = { |
355 | { 0, 0x0020 }, |
356 | { 1, 0x0334 }, |
357 | { 7, 0x093C }, |
358 | { 8, 0x3099 }, |
359 | { 9, 0x094D }, |
360 | { 10, 0x05B0 }, |
361 | { 11, 0x05B1 }, |
362 | { 12, 0x05B2 }, |
363 | { 13, 0x05B3 }, |
364 | { 14, 0x05B4 }, |
365 | { 15, 0x05B5 }, |
366 | { 16, 0x05B6 }, |
367 | { 17, 0x05B7 }, |
368 | { 18, 0x05B8 }, |
369 | { 19, 0x05B9 }, |
370 | { 20, 0x05BB }, |
371 | { 21, 0x05BC }, |
372 | { 22, 0x05BD }, |
373 | { 23, 0x05BF }, |
374 | { 24, 0x05C1 }, |
375 | { 25, 0x05C2 }, |
376 | { 26, 0xFB1E }, |
377 | { 27, 0x064B }, |
378 | { 28, 0x064C }, |
379 | { 29, 0x064D }, |
380 | /* ... */ |
381 | { 228, 0x05AE }, |
382 | { 230, 0x0300 }, |
383 | { 232, 0x302C }, |
384 | { 233, 0x0362 }, |
385 | { 234, 0x0360 }, |
386 | { 234, 0x1DCD }, |
387 | { 240, 0x0345 } |
388 | }; |
389 | for (i = 0; i < G_N_ELEMENTS (examples); i++) |
390 | { |
391 | g_assert_cmpint (g_unichar_combining_class (examples[i].c), ==, examples[i].class); |
392 | } |
393 | } |
394 | |
395 | /* Test that g_unichar_get_mirror() returns the correct value for various |
396 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
397 | static void |
398 | test_mirror (void) |
399 | { |
400 | gunichar mirror; |
401 | |
402 | g_assert_true (g_unichar_get_mirror_char ('(', &mirror)); |
403 | g_assert_cmpint (mirror, ==, ')'); |
404 | g_assert_true (g_unichar_get_mirror_char (')', &mirror)); |
405 | g_assert_cmpint (mirror, ==, '('); |
406 | g_assert_true (g_unichar_get_mirror_char ('{', &mirror)); |
407 | g_assert_cmpint (mirror, ==, '}'); |
408 | g_assert_true (g_unichar_get_mirror_char ('}', &mirror)); |
409 | g_assert_cmpint (mirror, ==, '{'); |
410 | g_assert_true (g_unichar_get_mirror_char (0x208D, &mirror)); |
411 | g_assert_cmpint (mirror, ==, 0x208E); |
412 | g_assert_true (g_unichar_get_mirror_char (0x208E, &mirror)); |
413 | g_assert_cmpint (mirror, ==, 0x208D); |
414 | g_assert_false (g_unichar_get_mirror_char ('a', &mirror)); |
415 | } |
416 | |
417 | /* Test that g_utf8_strup() returns the correct value for various |
418 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
419 | static void |
420 | test_strup (void) |
421 | { |
422 | char *str_up = NULL; |
423 | const char *str = "AaZz09x;\x03\x45" |
424 | "\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */ |
425 | "\xEF\xBC\xA1" ; /* Unichar 'a' (U+FF41) */ |
426 | |
427 | /* Testing degenerated cases */ |
428 | if (g_test_undefined ()) |
429 | { |
430 | g_test_expect_message (G_LOG_DOMAIN, log_level: G_LOG_LEVEL_CRITICAL, |
431 | pattern: "*assertion*!= NULL*" ); |
432 | str_up = g_utf8_strup (NULL, len: 0); |
433 | g_test_assert_expected_messages (); |
434 | } |
435 | |
436 | str_up = g_utf8_strup (str, len: strlen (s: str)); |
437 | /* Tricky, comparing two unicode strings with an ASCII function */ |
438 | g_assert_cmpstr (str_up, ==, "AAZZ09X;\003E\357\274\241\357\274\241" ); |
439 | g_free (mem: str_up); |
440 | } |
441 | |
442 | /* Test that g_utf8_strdown() returns the correct value for various |
443 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
444 | static void |
445 | test_strdown (void) |
446 | { |
447 | char *str_down = NULL; |
448 | const char *str = "AaZz09x;\x03\x07" |
449 | "\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */ |
450 | "\xEF\xBC\xA1" ; /* Unichar 'a' (U+FF41) */ |
451 | |
452 | /* Testing degenerated cases */ |
453 | if (g_test_undefined ()) |
454 | { |
455 | g_test_expect_message (G_LOG_DOMAIN, log_level: G_LOG_LEVEL_CRITICAL, |
456 | pattern: "*assertion*!= NULL*" ); |
457 | str_down = g_utf8_strdown (NULL, len: 0); |
458 | g_test_assert_expected_messages (); |
459 | } |
460 | |
461 | str_down = g_utf8_strdown (str, len: strlen (s: str)); |
462 | /* Tricky, comparing two unicode strings with an ASCII function */ |
463 | g_assert_cmpstr (str_down, ==, "aazz09x;\003\007\357\275\201\357\275\201" ); |
464 | g_free (mem: str_down); |
465 | } |
466 | |
467 | /* Test that g_utf8_strup() and g_utf8_strdown() return the correct |
468 | * value for Turkish 'i' with and without dot above. */ |
469 | static void |
470 | test_turkish_strupdown (void) |
471 | { |
472 | char *str_up = NULL; |
473 | char *str_down = NULL; |
474 | const char *str = "iII" |
475 | "\xcc\x87" /* COMBINING DOT ABOVE (U+307) */ |
476 | "\xc4\xb1" /* LATIN SMALL LETTER DOTLESS I (U+131) */ |
477 | "\xc4\xb0" ; /* LATIN CAPITAL LETTER I WITH DOT ABOVE (U+130) */ |
478 | |
479 | char *oldlocale = g_strdup (str: setlocale (LC_ALL, locale: "tr_TR" )); |
480 | |
481 | if (oldlocale == NULL) |
482 | { |
483 | g_test_skip (msg: "locale tr_TR not available" ); |
484 | return; |
485 | } |
486 | |
487 | str_up = g_utf8_strup (str, len: strlen (s: str)); |
488 | str_down = g_utf8_strdown (str, len: strlen (s: str)); |
489 | /* i => LATIN CAPITAL LETTER I WITH DOT ABOVE, |
490 | * I => I, |
491 | * I + COMBINING DOT ABOVE => I + COMBINING DOT ABOVE, |
492 | * LATIN SMALL LETTER DOTLESS I => I, |
493 | * LATIN CAPITAL LETTER I WITH DOT ABOVE => LATIN CAPITAL LETTER I WITH DOT ABOVE */ |
494 | g_assert_cmpstr (str_up, ==, "\xc4\xb0II\xcc\x87I\xc4\xb0" ); |
495 | /* i => i, |
496 | * I => LATIN SMALL LETTER DOTLESS I, |
497 | * I + COMBINING DOT ABOVE => i, |
498 | * LATIN SMALL LETTER DOTLESS I => LATIN SMALL LETTER DOTLESS I, |
499 | * LATIN CAPITAL LETTER I WITH DOT ABOVE => i */ |
500 | g_assert_cmpstr (str_down, ==, "i\xc4\xb1i\xc4\xb1i" ); |
501 | g_free (mem: str_up); |
502 | g_free (mem: str_down); |
503 | |
504 | setlocale (LC_ALL, locale: oldlocale); |
505 | g_free (mem: oldlocale); |
506 | } |
507 | |
508 | /* Test that g_utf8_casefold() returns the correct value for various |
509 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
510 | static void |
511 | test_casefold (void) |
512 | { |
513 | char *str_casefold = NULL; |
514 | const char *str = "AaZz09x;" |
515 | "\xEF\xBD\x81" /* Unichar 'A' (U+FF21) */ |
516 | "\xEF\xBC\xA1" ; /* Unichar 'a' (U+FF41) */ |
517 | |
518 | /* Testing degenerated cases */ |
519 | if (g_test_undefined ()) |
520 | { |
521 | g_test_expect_message (G_LOG_DOMAIN, log_level: G_LOG_LEVEL_CRITICAL, |
522 | pattern: "*assertion*!= NULL*" ); |
523 | str_casefold = g_utf8_casefold (NULL, len: 0); |
524 | g_test_assert_expected_messages (); |
525 | } |
526 | |
527 | str_casefold = g_utf8_casefold (str, len: strlen (s: str)); |
528 | /* Tricky, comparing two unicode strings with an ASCII function */ |
529 | g_assert_cmpstr (str_casefold, ==, "aazz09x;\357\275\201\357\275\201" ); |
530 | g_free (mem: str_casefold); |
531 | } |
532 | |
533 | /* Test that g_unichar_ismark() returns the correct value for various |
534 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
535 | static void |
536 | test_mark (void) |
537 | { |
538 | g_assert_true (g_unichar_ismark (0x0903)); |
539 | g_assert_true (g_unichar_ismark (0x20DD)); |
540 | g_assert_true (g_unichar_ismark (0xA806)); |
541 | g_assert_false (g_unichar_ismark ('a')); |
542 | |
543 | /*** Testing TYPE() border cases ***/ |
544 | g_assert_false (g_unichar_ismark (0x3FF5)); |
545 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
546 | g_assert_false (g_unichar_ismark (0xFFEFF)); |
547 | /* U+E0001 Language Tag */ |
548 | g_assert_false (g_unichar_ismark (0xE0001)); |
549 | g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR)); |
550 | g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR + 1)); |
551 | g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1)); |
552 | g_assert_false (g_unichar_ismark (G_UNICODE_LAST_CHAR_PART1 + 1)); |
553 | } |
554 | |
555 | /* Test that g_unichar_isspace() returns the correct value for various |
556 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
557 | static void |
558 | test_space (void) |
559 | { |
560 | g_assert_false (g_unichar_isspace ('a')); |
561 | g_assert_true (g_unichar_isspace (' ')); |
562 | g_assert_true (g_unichar_isspace ('\t')); |
563 | g_assert_true (g_unichar_isspace ('\n')); |
564 | g_assert_true (g_unichar_isspace ('\r')); |
565 | g_assert_true (g_unichar_isspace ('\f')); |
566 | g_assert_false (g_unichar_isspace (0xff41)); /* Unicode fullwidth 'a' */ |
567 | g_assert_true (g_unichar_isspace (0x202F)); /* Unicode space separator */ |
568 | g_assert_true (g_unichar_isspace (0x2028)); /* Unicode line separator */ |
569 | g_assert_true (g_unichar_isspace (0x2029)); /* Unicode paragraph separator */ |
570 | |
571 | /*** Testing TYPE() border cases ***/ |
572 | g_assert_false (g_unichar_isspace (0x3FF5)); |
573 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
574 | g_assert_false (g_unichar_isspace (0xFFEFF)); |
575 | /* U+E0001 Language Tag */ |
576 | g_assert_false (g_unichar_isspace (0xE0001)); |
577 | g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR)); |
578 | g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR + 1)); |
579 | g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1)); |
580 | g_assert_false (g_unichar_isspace (G_UNICODE_LAST_CHAR_PART1 + 1)); |
581 | } |
582 | |
583 | /* Test that g_unichar_isalnum() returns the correct value for various |
584 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
585 | static void |
586 | test_alnum (void) |
587 | { |
588 | g_assert_false (g_unichar_isalnum (' ')); |
589 | g_assert_true (g_unichar_isalnum ('a')); |
590 | g_assert_true (g_unichar_isalnum ('z')); |
591 | g_assert_true (g_unichar_isalnum ('0')); |
592 | g_assert_true (g_unichar_isalnum ('9')); |
593 | g_assert_true (g_unichar_isalnum ('A')); |
594 | g_assert_true (g_unichar_isalnum ('Z')); |
595 | g_assert_false (g_unichar_isalnum ('-')); |
596 | g_assert_false (g_unichar_isalnum ('*')); |
597 | g_assert_true (g_unichar_isalnum (0xFF21)); /* Unichar fullwidth 'A' */ |
598 | g_assert_true (g_unichar_isalnum (0xFF3A)); /* Unichar fullwidth 'Z' */ |
599 | g_assert_true (g_unichar_isalnum (0xFF41)); /* Unichar fullwidth 'a' */ |
600 | g_assert_true (g_unichar_isalnum (0xFF5A)); /* Unichar fullwidth 'z' */ |
601 | g_assert_true (g_unichar_isalnum (0xFF10)); /* Unichar fullwidth '0' */ |
602 | g_assert_true (g_unichar_isalnum (0xFF19)); /* Unichar fullwidth '9' */ |
603 | g_assert_false (g_unichar_isalnum (0xFF0A)); /* Unichar fullwidth '*' */ |
604 | |
605 | /*** Testing TYPE() border cases ***/ |
606 | g_assert_true (g_unichar_isalnum (0x3FF5)); |
607 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
608 | g_assert_false (g_unichar_isalnum (0xFFEFF)); |
609 | /* U+E0001 Language Tag */ |
610 | g_assert_false (g_unichar_isalnum (0xE0001)); |
611 | g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR)); |
612 | g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR + 1)); |
613 | g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1)); |
614 | g_assert_false (g_unichar_isalnum (G_UNICODE_LAST_CHAR_PART1 + 1)); |
615 | } |
616 | |
617 | /* Test that g_unichar_isalpha() returns the correct value for various |
618 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
619 | static void |
620 | test_alpha (void) |
621 | { |
622 | g_assert_false (g_unichar_isalpha (' ')); |
623 | g_assert_true (g_unichar_isalpha ('a')); |
624 | g_assert_true (g_unichar_isalpha ('z')); |
625 | g_assert_false (g_unichar_isalpha ('0')); |
626 | g_assert_false (g_unichar_isalpha ('9')); |
627 | g_assert_true (g_unichar_isalpha ('A')); |
628 | g_assert_true (g_unichar_isalpha ('Z')); |
629 | g_assert_false (g_unichar_isalpha ('-')); |
630 | g_assert_false (g_unichar_isalpha ('*')); |
631 | g_assert_true (g_unichar_isalpha (0xFF21)); /* Unichar fullwidth 'A' */ |
632 | g_assert_true (g_unichar_isalpha (0xFF3A)); /* Unichar fullwidth 'Z' */ |
633 | g_assert_true (g_unichar_isalpha (0xFF41)); /* Unichar fullwidth 'a' */ |
634 | g_assert_true (g_unichar_isalpha (0xFF5A)); /* Unichar fullwidth 'z' */ |
635 | g_assert_false (g_unichar_isalpha (0xFF10)); /* Unichar fullwidth '0' */ |
636 | g_assert_false (g_unichar_isalpha (0xFF19)); /* Unichar fullwidth '9' */ |
637 | g_assert_false (g_unichar_isalpha (0xFF0A)); /* Unichar fullwidth '*' */ |
638 | |
639 | /*** Testing TYPE() border cases ***/ |
640 | g_assert_true (g_unichar_isalpha (0x3FF5)); |
641 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
642 | g_assert_false (g_unichar_isalpha (0xFFEFF)); |
643 | /* U+E0001 Language Tag */ |
644 | g_assert_false (g_unichar_isalpha (0xE0001)); |
645 | g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR)); |
646 | g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR + 1)); |
647 | g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1)); |
648 | g_assert_false (g_unichar_isalpha (G_UNICODE_LAST_CHAR_PART1 + 1)); |
649 | } |
650 | |
651 | /* Test that g_unichar_isdigit() returns the correct value for various |
652 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
653 | static void |
654 | test_digit (void) |
655 | { |
656 | g_assert_false (g_unichar_isdigit (' ')); |
657 | g_assert_false (g_unichar_isdigit ('a')); |
658 | g_assert_true (g_unichar_isdigit ('0')); |
659 | g_assert_true (g_unichar_isdigit ('9')); |
660 | g_assert_false (g_unichar_isdigit ('A')); |
661 | g_assert_false (g_unichar_isdigit ('-')); |
662 | g_assert_false (g_unichar_isdigit ('*')); |
663 | g_assert_false (g_unichar_isdigit (0xFF21)); /* Unichar fullwidth 'A' */ |
664 | g_assert_false (g_unichar_isdigit (0xFF3A)); /* Unichar fullwidth 'Z' */ |
665 | g_assert_false (g_unichar_isdigit (0xFF41)); /* Unichar fullwidth 'a' */ |
666 | g_assert_false (g_unichar_isdigit (0xFF5A)); /* Unichar fullwidth 'z' */ |
667 | g_assert_true (g_unichar_isdigit (0xFF10)); /* Unichar fullwidth '0' */ |
668 | g_assert_true (g_unichar_isdigit (0xFF19)); /* Unichar fullwidth '9' */ |
669 | g_assert_false (g_unichar_isdigit (0xFF0A)); /* Unichar fullwidth '*' */ |
670 | |
671 | /*** Testing TYPE() border cases ***/ |
672 | g_assert_false (g_unichar_isdigit (0x3FF5)); |
673 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
674 | g_assert_false (g_unichar_isdigit (0xFFEFF)); |
675 | /* U+E0001 Language Tag */ |
676 | g_assert_false (g_unichar_isdigit (0xE0001)); |
677 | g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR)); |
678 | g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR + 1)); |
679 | g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1)); |
680 | g_assert_false (g_unichar_isdigit (G_UNICODE_LAST_CHAR_PART1 + 1)); |
681 | } |
682 | |
683 | /* Test that g_unichar_digit_value() returns the correct value for various |
684 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
685 | static void |
686 | test_digit_value (void) |
687 | { |
688 | g_assert_cmpint (g_unichar_digit_value (' '), ==, -1); |
689 | g_assert_cmpint (g_unichar_digit_value ('a'), ==, -1); |
690 | g_assert_cmpint (g_unichar_digit_value ('0'), ==, 0); |
691 | g_assert_cmpint (g_unichar_digit_value ('9'), ==, 9); |
692 | g_assert_cmpint (g_unichar_digit_value ('A'), ==, -1); |
693 | g_assert_cmpint (g_unichar_digit_value ('-'), ==, -1); |
694 | g_assert_cmpint (g_unichar_digit_value (0xFF21), ==, -1); /* Unichar 'A' */ |
695 | g_assert_cmpint (g_unichar_digit_value (0xFF3A), ==, -1); /* Unichar 'Z' */ |
696 | g_assert_cmpint (g_unichar_digit_value (0xFF41), ==, -1); /* Unichar 'a' */ |
697 | g_assert_cmpint (g_unichar_digit_value (0xFF5A), ==, -1); /* Unichar 'z' */ |
698 | g_assert_cmpint (g_unichar_digit_value (0xFF10), ==, 0); /* Unichar '0' */ |
699 | g_assert_cmpint (g_unichar_digit_value (0xFF19), ==, 9); /* Unichar '9' */ |
700 | g_assert_cmpint (g_unichar_digit_value (0xFF0A), ==, -1); /* Unichar '*' */ |
701 | |
702 | /*** Testing TYPE() border cases ***/ |
703 | g_assert_cmpint (g_unichar_digit_value (0x3FF5), ==, -1); |
704 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
705 | g_assert_cmpint (g_unichar_digit_value (0xFFEFF), ==, -1); |
706 | /* U+E0001 Language Tag */ |
707 | g_assert_cmpint (g_unichar_digit_value (0xE0001), ==, -1); |
708 | g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR), ==, -1); |
709 | g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR + 1), ==, -1); |
710 | g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1); |
711 | g_assert_cmpint (g_unichar_digit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1); |
712 | } |
713 | |
714 | /* Test that g_unichar_isxdigit() returns the correct value for various |
715 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
716 | static void |
717 | test_xdigit (void) |
718 | { |
719 | g_assert_false (g_unichar_isxdigit (' ')); |
720 | g_assert_true (g_unichar_isxdigit ('a')); |
721 | g_assert_true (g_unichar_isxdigit ('f')); |
722 | g_assert_false (g_unichar_isxdigit ('g')); |
723 | g_assert_false (g_unichar_isxdigit ('z')); |
724 | g_assert_true (g_unichar_isxdigit ('0')); |
725 | g_assert_true (g_unichar_isxdigit ('9')); |
726 | g_assert_true (g_unichar_isxdigit ('A')); |
727 | g_assert_true (g_unichar_isxdigit ('F')); |
728 | g_assert_false (g_unichar_isxdigit ('G')); |
729 | g_assert_false (g_unichar_isxdigit ('Z')); |
730 | g_assert_false (g_unichar_isxdigit ('-')); |
731 | g_assert_false (g_unichar_isxdigit ('*')); |
732 | g_assert_true (g_unichar_isxdigit (0xFF21)); /* Unichar fullwidth 'A' */ |
733 | g_assert_true (g_unichar_isxdigit (0xFF26)); /* Unichar fullwidth 'F' */ |
734 | g_assert_false (g_unichar_isxdigit (0xFF27)); /* Unichar fullwidth 'G' */ |
735 | g_assert_false (g_unichar_isxdigit (0xFF3A)); /* Unichar fullwidth 'Z' */ |
736 | g_assert_true (g_unichar_isxdigit (0xFF41)); /* Unichar fullwidth 'a' */ |
737 | g_assert_true (g_unichar_isxdigit (0xFF46)); /* Unichar fullwidth 'f' */ |
738 | g_assert_false (g_unichar_isxdigit (0xFF47)); /* Unichar fullwidth 'g' */ |
739 | g_assert_false (g_unichar_isxdigit (0xFF5A)); /* Unichar fullwidth 'z' */ |
740 | g_assert_true (g_unichar_isxdigit (0xFF10)); /* Unichar fullwidth '0' */ |
741 | g_assert_true (g_unichar_isxdigit (0xFF19)); /* Unichar fullwidth '9' */ |
742 | g_assert_false (g_unichar_isxdigit (0xFF0A)); /* Unichar fullwidth '*' */ |
743 | |
744 | /*** Testing TYPE() border cases ***/ |
745 | g_assert_false (g_unichar_isxdigit (0x3FF5)); |
746 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
747 | g_assert_false (g_unichar_isxdigit (0xFFEFF)); |
748 | /* U+E0001 Language Tag */ |
749 | g_assert_false (g_unichar_isxdigit (0xE0001)); |
750 | g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR)); |
751 | g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR + 1)); |
752 | g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1)); |
753 | g_assert_false (g_unichar_isxdigit (G_UNICODE_LAST_CHAR_PART1 + 1)); |
754 | } |
755 | |
756 | /* Test that g_unichar_xdigit_value() returns the correct value for various |
757 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
758 | static void |
759 | test_xdigit_value (void) |
760 | { |
761 | g_assert_cmpint (g_unichar_xdigit_value (' '), ==, -1); |
762 | g_assert_cmpint (g_unichar_xdigit_value ('a'), ==, 10); |
763 | g_assert_cmpint (g_unichar_xdigit_value ('f'), ==, 15); |
764 | g_assert_cmpint (g_unichar_xdigit_value ('g'), ==, -1); |
765 | g_assert_cmpint (g_unichar_xdigit_value ('0'), ==, 0); |
766 | g_assert_cmpint (g_unichar_xdigit_value ('9'), ==, 9); |
767 | g_assert_cmpint (g_unichar_xdigit_value ('A'), ==, 10); |
768 | g_assert_cmpint (g_unichar_xdigit_value ('F'), ==, 15); |
769 | g_assert_cmpint (g_unichar_xdigit_value ('G'), ==, -1); |
770 | g_assert_cmpint (g_unichar_xdigit_value ('-'), ==, -1); |
771 | g_assert_cmpint (g_unichar_xdigit_value (0xFF21), ==, 10); /* Unichar 'A' */ |
772 | g_assert_cmpint (g_unichar_xdigit_value (0xFF26), ==, 15); /* Unichar 'F' */ |
773 | g_assert_cmpint (g_unichar_xdigit_value (0xFF27), ==, -1); /* Unichar 'G' */ |
774 | g_assert_cmpint (g_unichar_xdigit_value (0xFF3A), ==, -1); /* Unichar 'Z' */ |
775 | g_assert_cmpint (g_unichar_xdigit_value (0xFF41), ==, 10); /* Unichar 'a' */ |
776 | g_assert_cmpint (g_unichar_xdigit_value (0xFF46), ==, 15); /* Unichar 'f' */ |
777 | g_assert_cmpint (g_unichar_xdigit_value (0xFF47), ==, -1); /* Unichar 'g' */ |
778 | g_assert_cmpint (g_unichar_xdigit_value (0xFF5A), ==, -1); /* Unichar 'z' */ |
779 | g_assert_cmpint (g_unichar_xdigit_value (0xFF10), ==, 0); /* Unichar '0' */ |
780 | g_assert_cmpint (g_unichar_xdigit_value (0xFF19), ==, 9); /* Unichar '9' */ |
781 | g_assert_cmpint (g_unichar_xdigit_value (0xFF0A), ==, -1); /* Unichar '*' */ |
782 | |
783 | /*** Testing TYPE() border cases ***/ |
784 | g_assert_cmpint (g_unichar_xdigit_value (0x3FF5), ==, -1); |
785 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
786 | g_assert_cmpint (g_unichar_xdigit_value (0xFFEFF), ==, -1); |
787 | /* U+E0001 Language Tag */ |
788 | g_assert_cmpint (g_unichar_xdigit_value (0xE0001), ==, -1); |
789 | g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR), ==, -1); |
790 | g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR + 1), ==, -1); |
791 | g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1), ==, -1); |
792 | g_assert_cmpint (g_unichar_xdigit_value (G_UNICODE_LAST_CHAR_PART1 + 1), ==, -1); |
793 | } |
794 | |
795 | /* Test that g_unichar_ispunct() returns the correct value for various |
796 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
797 | static void |
798 | test_punctuation (void) |
799 | { |
800 | g_assert_false (g_unichar_ispunct (' ')); |
801 | g_assert_false (g_unichar_ispunct ('a')); |
802 | g_assert_true (g_unichar_ispunct ('.')); |
803 | g_assert_true (g_unichar_ispunct (',')); |
804 | g_assert_true (g_unichar_ispunct (';')); |
805 | g_assert_true (g_unichar_ispunct (':')); |
806 | g_assert_true (g_unichar_ispunct ('-')); |
807 | |
808 | g_assert_false (g_unichar_ispunct (0xFF21)); /* Unichar fullwidth 'A' */ |
809 | g_assert_true (g_unichar_ispunct (0x005F)); /* Unichar fullwidth '.' */ |
810 | g_assert_true (g_unichar_ispunct (0x058A)); /* Unichar fullwidth '-' */ |
811 | |
812 | /*** Testing TYPE() border cases ***/ |
813 | g_assert_false (g_unichar_ispunct (0x3FF5)); |
814 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
815 | g_assert_false (g_unichar_ispunct (0xFFEFF)); |
816 | /* U+E0001 Language Tag */ |
817 | g_assert_false (g_unichar_ispunct (0xE0001)); |
818 | g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR)); |
819 | g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR + 1)); |
820 | g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1)); |
821 | g_assert_false (g_unichar_ispunct (G_UNICODE_LAST_CHAR_PART1 + 1)); |
822 | } |
823 | |
824 | /* Test that g_unichar_iscntrl() returns the correct value for various |
825 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
826 | static void |
827 | test_cntrl (void) |
828 | { |
829 | g_assert_true (g_unichar_iscntrl (0x08)); |
830 | g_assert_false (g_unichar_iscntrl ('a')); |
831 | g_assert_true (g_unichar_iscntrl (0x007F)); /* Unichar fullwidth <del> */ |
832 | g_assert_true (g_unichar_iscntrl (0x009F)); /* Unichar fullwidth control */ |
833 | |
834 | /*** Testing TYPE() border cases ***/ |
835 | g_assert_false (g_unichar_iscntrl (0x3FF5)); |
836 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
837 | g_assert_false (g_unichar_iscntrl (0xFFEFF)); |
838 | /* U+E0001 Language Tag */ |
839 | g_assert_false (g_unichar_iscntrl (0xE0001)); |
840 | g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR)); |
841 | g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR + 1)); |
842 | g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1)); |
843 | g_assert_false (g_unichar_iscntrl (G_UNICODE_LAST_CHAR_PART1 + 1)); |
844 | } |
845 | |
846 | /* Test that g_unichar_isgraph() returns the correct value for various |
847 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
848 | static void |
849 | test_graph (void) |
850 | { |
851 | g_assert_false (g_unichar_isgraph (0x08)); |
852 | g_assert_false (g_unichar_isgraph (' ')); |
853 | g_assert_true (g_unichar_isgraph ('a')); |
854 | g_assert_true (g_unichar_isgraph ('0')); |
855 | g_assert_true (g_unichar_isgraph ('9')); |
856 | g_assert_true (g_unichar_isgraph ('A')); |
857 | g_assert_true (g_unichar_isgraph ('-')); |
858 | g_assert_true (g_unichar_isgraph ('*')); |
859 | g_assert_true (g_unichar_isgraph (0xFF21)); /* Unichar fullwidth 'A' */ |
860 | g_assert_true (g_unichar_isgraph (0xFF3A)); /* Unichar fullwidth 'Z' */ |
861 | g_assert_true (g_unichar_isgraph (0xFF41)); /* Unichar fullwidth 'a' */ |
862 | g_assert_true (g_unichar_isgraph (0xFF5A)); /* Unichar fullwidth 'z' */ |
863 | g_assert_true (g_unichar_isgraph (0xFF10)); /* Unichar fullwidth '0' */ |
864 | g_assert_true (g_unichar_isgraph (0xFF19)); /* Unichar fullwidth '9' */ |
865 | g_assert_true (g_unichar_isgraph (0xFF0A)); /* Unichar fullwidth '*' */ |
866 | g_assert_false (g_unichar_isgraph (0x007F)); /* Unichar fullwidth <del> */ |
867 | g_assert_false (g_unichar_isgraph (0x009F)); /* Unichar fullwidth control */ |
868 | |
869 | /*** Testing TYPE() border cases ***/ |
870 | g_assert_true (g_unichar_isgraph (0x3FF5)); |
871 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
872 | g_assert_true (g_unichar_isgraph (0xFFEFF)); |
873 | /* U+E0001 Language Tag */ |
874 | g_assert_false (g_unichar_isgraph (0xE0001)); |
875 | g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR)); |
876 | g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR + 1)); |
877 | g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1)); |
878 | g_assert_false (g_unichar_isgraph (G_UNICODE_LAST_CHAR_PART1 + 1)); |
879 | } |
880 | |
881 | /* Test that g_unichar_iszerowidth() returns the correct value for various |
882 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
883 | static void |
884 | test_zerowidth (void) |
885 | { |
886 | g_assert_false (g_unichar_iszerowidth (0x00AD)); |
887 | g_assert_false (g_unichar_iszerowidth (0x115F)); |
888 | g_assert_true (g_unichar_iszerowidth (0x1160)); |
889 | g_assert_true (g_unichar_iszerowidth (0x11AA)); |
890 | g_assert_true (g_unichar_iszerowidth (0x11FF)); |
891 | g_assert_false (g_unichar_iszerowidth (0x1200)); |
892 | g_assert_false (g_unichar_iszerowidth (0x200A)); |
893 | g_assert_true (g_unichar_iszerowidth (0x200B)); |
894 | g_assert_true (g_unichar_iszerowidth (0x200C)); |
895 | g_assert_true (g_unichar_iszerowidth (0x591)); |
896 | |
897 | /*** Testing TYPE() border cases ***/ |
898 | g_assert_false (g_unichar_iszerowidth (0x3FF5)); |
899 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
900 | g_assert_false (g_unichar_iszerowidth (0xFFEFF)); |
901 | /* U+E0001 Language Tag */ |
902 | g_assert_true (g_unichar_iszerowidth (0xE0001)); |
903 | g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR)); |
904 | g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR + 1)); |
905 | g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1)); |
906 | g_assert_false (g_unichar_iszerowidth (G_UNICODE_LAST_CHAR_PART1 + 1)); |
907 | } |
908 | |
909 | /* Test that g_unichar_istitle() returns the correct value for various |
910 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
911 | static void |
912 | test_title (void) |
913 | { |
914 | g_assert_true (g_unichar_istitle (0x01c5)); |
915 | g_assert_true (g_unichar_istitle (0x1f88)); |
916 | g_assert_true (g_unichar_istitle (0x1fcc)); |
917 | g_assert_false (g_unichar_istitle ('a')); |
918 | g_assert_false (g_unichar_istitle ('A')); |
919 | g_assert_false (g_unichar_istitle (';')); |
920 | |
921 | /*** Testing TYPE() border cases ***/ |
922 | g_assert_false (g_unichar_istitle (0x3FF5)); |
923 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
924 | g_assert_false (g_unichar_istitle (0xFFEFF)); |
925 | /* U+E0001 Language Tag */ |
926 | g_assert_false (g_unichar_istitle (0xE0001)); |
927 | g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR)); |
928 | g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR + 1)); |
929 | g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1)); |
930 | g_assert_false (g_unichar_istitle (G_UNICODE_LAST_CHAR_PART1 + 1)); |
931 | |
932 | g_assert_cmphex (g_unichar_totitle (0x0000), ==, 0x0000); |
933 | g_assert_cmphex (g_unichar_totitle (0x01c6), ==, 0x01c5); |
934 | g_assert_cmphex (g_unichar_totitle (0x01c4), ==, 0x01c5); |
935 | g_assert_cmphex (g_unichar_totitle (0x01c5), ==, 0x01c5); |
936 | g_assert_cmphex (g_unichar_totitle (0x1f80), ==, 0x1f88); |
937 | g_assert_cmphex (g_unichar_totitle (0x1f88), ==, 0x1f88); |
938 | g_assert_cmphex (g_unichar_totitle ('a'), ==, 'A'); |
939 | g_assert_cmphex (g_unichar_totitle ('A'), ==, 'A'); |
940 | |
941 | /*** Testing TYPE() border cases ***/ |
942 | g_assert_cmphex (g_unichar_totitle (0x3FF5), ==, 0x3FF5); |
943 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
944 | g_assert_cmphex (g_unichar_totitle (0xFFEFF), ==, 0xFFEFF); |
945 | g_assert_cmphex (g_unichar_totitle (0xDFFFF), ==, 0xDFFFF); |
946 | /* U+E0001 Language Tag */ |
947 | g_assert_cmphex (g_unichar_totitle (0xE0001), ==, 0xE0001); |
948 | g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR), ==, |
949 | G_UNICODE_LAST_CHAR); |
950 | g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR + 1), ==, |
951 | (G_UNICODE_LAST_CHAR + 1)); |
952 | g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1), ==, |
953 | (G_UNICODE_LAST_CHAR_PART1)); |
954 | g_assert_cmphex (g_unichar_totitle (G_UNICODE_LAST_CHAR_PART1 + 1), ==, |
955 | (G_UNICODE_LAST_CHAR_PART1 + 1)); |
956 | } |
957 | |
958 | /* Test that g_unichar_isupper() returns the correct value for various |
959 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
960 | static void |
961 | test_upper (void) |
962 | { |
963 | g_assert_false (g_unichar_isupper (' ')); |
964 | g_assert_false (g_unichar_isupper ('0')); |
965 | g_assert_false (g_unichar_isupper ('a')); |
966 | g_assert_true (g_unichar_isupper ('A')); |
967 | g_assert_false (g_unichar_isupper (0xff41)); /* Unicode fullwidth 'a' */ |
968 | g_assert_true (g_unichar_isupper (0xff21)); /* Unicode fullwidth 'A' */ |
969 | |
970 | /*** Testing TYPE() border cases ***/ |
971 | g_assert_false (g_unichar_isupper (0x3FF5)); |
972 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
973 | g_assert_false (g_unichar_isupper (0xFFEFF)); |
974 | /* U+E0001 Language Tag */ |
975 | g_assert_false (g_unichar_isupper (0xE0001)); |
976 | g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR)); |
977 | g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR + 1)); |
978 | g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1)); |
979 | g_assert_false (g_unichar_isupper (G_UNICODE_LAST_CHAR_PART1 + 1)); |
980 | } |
981 | |
982 | /* Test that g_unichar_islower() returns the correct value for various |
983 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
984 | static void |
985 | test_lower (void) |
986 | { |
987 | g_assert_false (g_unichar_islower (' ')); |
988 | g_assert_false (g_unichar_islower ('0')); |
989 | g_assert_true (g_unichar_islower ('a')); |
990 | g_assert_false (g_unichar_islower ('A')); |
991 | g_assert_true (g_unichar_islower (0xff41)); /* Unicode fullwidth 'a' */ |
992 | g_assert_false (g_unichar_islower (0xff21)); /* Unicode fullwidth 'A' */ |
993 | |
994 | /*** Testing TYPE() border cases ***/ |
995 | g_assert_false (g_unichar_islower (0x3FF5)); |
996 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
997 | g_assert_false (g_unichar_islower (0xFFEFF)); |
998 | /* U+E0001 Language Tag */ |
999 | g_assert_false (g_unichar_islower (0xE0001)); |
1000 | g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR)); |
1001 | g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR + 1)); |
1002 | g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1)); |
1003 | g_assert_false (g_unichar_islower (G_UNICODE_LAST_CHAR_PART1 + 1)); |
1004 | } |
1005 | |
1006 | /* Test that g_unichar_isprint() returns the correct value for various |
1007 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
1008 | static void |
1009 | test_print (void) |
1010 | { |
1011 | g_assert_true (g_unichar_isprint (' ')); |
1012 | g_assert_true (g_unichar_isprint ('0')); |
1013 | g_assert_true (g_unichar_isprint ('a')); |
1014 | g_assert_true (g_unichar_isprint ('A')); |
1015 | g_assert_true (g_unichar_isprint (0xff41)); /* Unicode fullwidth 'a' */ |
1016 | g_assert_true (g_unichar_isprint (0xff21)); /* Unicode fullwidth 'A' */ |
1017 | |
1018 | /*** Testing TYPE() border cases ***/ |
1019 | g_assert_true (g_unichar_isprint (0x3FF5)); |
1020 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
1021 | g_assert_true (g_unichar_isprint (0xFFEFF)); |
1022 | /* U+E0001 Language Tag */ |
1023 | g_assert_false (g_unichar_isprint (0xE0001)); |
1024 | g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR)); |
1025 | g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR + 1)); |
1026 | g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1)); |
1027 | g_assert_false (g_unichar_isprint (G_UNICODE_LAST_CHAR_PART1 + 1)); |
1028 | } |
1029 | |
1030 | /* Test that g_unichar_toupper() and g_unichar_tolower() return the |
1031 | * correct values for various ASCII and Unicode alphabetic, numeric, |
1032 | * and other, codepoints. */ |
1033 | static void |
1034 | test_cases (void) |
1035 | { |
1036 | g_assert_cmphex (g_unichar_toupper (0x0), ==, 0x0); |
1037 | g_assert_cmphex (g_unichar_tolower (0x0), ==, 0x0); |
1038 | g_assert_cmphex (g_unichar_toupper ('a'), ==, 'A'); |
1039 | g_assert_cmphex (g_unichar_toupper ('A'), ==, 'A'); |
1040 | /* Unicode fullwidth 'a' == 'A' */ |
1041 | g_assert_cmphex (g_unichar_toupper (0xff41), ==, 0xff21); |
1042 | /* Unicode fullwidth 'A' == 'A' */ |
1043 | g_assert_cmphex (g_unichar_toupper (0xff21), ==, 0xff21); |
1044 | g_assert_cmphex (g_unichar_toupper (0x01C5), ==, 0x01C4); |
1045 | g_assert_cmphex (g_unichar_toupper (0x01C6), ==, 0x01C4); |
1046 | g_assert_cmphex (g_unichar_tolower ('A'), ==, 'a'); |
1047 | g_assert_cmphex (g_unichar_tolower ('a'), ==, 'a'); |
1048 | /* Unicode fullwidth 'A' == 'a' */ |
1049 | g_assert_cmphex (g_unichar_tolower (0xff21), ==, 0xff41); |
1050 | /* Unicode fullwidth 'a' == 'a' */ |
1051 | g_assert_cmphex (g_unichar_tolower (0xff41), ==, 0xff41); |
1052 | g_assert_cmphex (g_unichar_tolower (0x01C4), ==, 0x01C6); |
1053 | g_assert_cmphex (g_unichar_tolower (0x01C5), ==, 0x01C6); |
1054 | g_assert_cmphex (g_unichar_tolower (0x1F8A), ==, 0x1F82); |
1055 | g_assert_cmphex (g_unichar_totitle (0x1F8A), ==, 0x1F8A); |
1056 | g_assert_cmphex (g_unichar_toupper (0x1F8A), ==, 0x1F8A); |
1057 | g_assert_cmphex (g_unichar_tolower (0x1FB2), ==, 0x1FB2); |
1058 | g_assert_cmphex (g_unichar_toupper (0x1FB2), ==, 0x1FB2); |
1059 | |
1060 | /* U+130 is a special case, it's a 'I' with a dot on top */ |
1061 | g_assert_cmphex (g_unichar_tolower (0x130), ==, 0x69); |
1062 | |
1063 | /* Testing ATTTABLE() border cases */ |
1064 | g_assert_cmphex (g_unichar_toupper (0x1D6FE), ==, 0x1D6FE); |
1065 | |
1066 | /*** Testing TYPE() border cases ***/ |
1067 | g_assert_cmphex (g_unichar_toupper (0x3FF5), ==, 0x3FF5); |
1068 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
1069 | g_assert_cmphex (g_unichar_toupper (0xFFEFF), ==, 0xFFEFF); |
1070 | g_assert_cmphex (g_unichar_toupper (0xDFFFF), ==, 0xDFFFF); |
1071 | /* U+E0001 Language Tag */ |
1072 | g_assert_cmphex (g_unichar_toupper (0xE0001), ==, 0xE0001); |
1073 | g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR), ==, |
1074 | G_UNICODE_LAST_CHAR); |
1075 | g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR + 1), ==, |
1076 | (G_UNICODE_LAST_CHAR + 1)); |
1077 | g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1), ==, |
1078 | (G_UNICODE_LAST_CHAR_PART1)); |
1079 | g_assert_cmphex (g_unichar_toupper (G_UNICODE_LAST_CHAR_PART1 + 1), ==, |
1080 | (G_UNICODE_LAST_CHAR_PART1 + 1)); |
1081 | |
1082 | /* Testing ATTTABLE() border cases */ |
1083 | g_assert_cmphex (g_unichar_tolower (0x1D6FA), ==, 0x1D6FA); |
1084 | |
1085 | /*** Testing TYPE() border cases ***/ |
1086 | g_assert_cmphex (g_unichar_tolower (0x3FF5), ==, 0x3FF5); |
1087 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
1088 | g_assert_cmphex (g_unichar_tolower (0xFFEFF), ==, 0xFFEFF); |
1089 | g_assert_cmphex (g_unichar_tolower (0xDFFFF), ==, 0xDFFFF); |
1090 | /* U+E0001 Language Tag */ |
1091 | g_assert_cmphex (g_unichar_tolower (0xE0001), ==, 0xE0001); |
1092 | g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR), ==, |
1093 | G_UNICODE_LAST_CHAR); |
1094 | g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR + 1), ==, |
1095 | (G_UNICODE_LAST_CHAR + 1)); |
1096 | g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1), ==, |
1097 | G_UNICODE_LAST_CHAR_PART1); |
1098 | g_assert_cmphex (g_unichar_tolower (G_UNICODE_LAST_CHAR_PART1 + 1), ==, |
1099 | (G_UNICODE_LAST_CHAR_PART1 + 1)); |
1100 | } |
1101 | |
1102 | /* Test that g_unichar_isdefined() returns the correct value for various |
1103 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
1104 | static void |
1105 | test_defined (void) |
1106 | { |
1107 | g_assert_true (g_unichar_isdefined (0x0903)); |
1108 | g_assert_true (g_unichar_isdefined (0x20DD)); |
1109 | g_assert_true (g_unichar_isdefined (0x20BA)); |
1110 | g_assert_true (g_unichar_isdefined (0xA806)); |
1111 | g_assert_true (g_unichar_isdefined ('a')); |
1112 | g_assert_false (g_unichar_isdefined (0x10C49)); |
1113 | g_assert_false (g_unichar_isdefined (0x169D)); |
1114 | |
1115 | /*** Testing TYPE() border cases ***/ |
1116 | g_assert_true (g_unichar_isdefined (0x3FF5)); |
1117 | /* U+FFEFF Plane 15 Private Use (needed to be > G_UNICODE_MAX_TABLE_INDEX) */ |
1118 | g_assert_true (g_unichar_isdefined (0xFFEFF)); |
1119 | g_assert_false (g_unichar_isdefined (0xDFFFF)); |
1120 | /* U+E0001 Language Tag */ |
1121 | g_assert_true (g_unichar_isdefined (0xE0001)); |
1122 | g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR)); |
1123 | g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR + 1)); |
1124 | g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1)); |
1125 | g_assert_false (g_unichar_isdefined (G_UNICODE_LAST_CHAR_PART1 + 1)); |
1126 | } |
1127 | |
1128 | /* Test that g_unichar_iswide() returns the correct value for various |
1129 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
1130 | static void |
1131 | test_wide (void) |
1132 | { |
1133 | guint i; |
1134 | struct { |
1135 | gunichar c; |
1136 | enum { |
1137 | NOT_WIDE, |
1138 | WIDE_CJK, |
1139 | WIDE |
1140 | } wide; |
1141 | } examples[] = { |
1142 | /* Neutral */ |
1143 | { 0x0000, NOT_WIDE }, |
1144 | { 0x0483, NOT_WIDE }, |
1145 | { 0x0641, NOT_WIDE }, |
1146 | { 0xFFFC, NOT_WIDE }, |
1147 | { 0x10000, NOT_WIDE }, |
1148 | { 0xE0001, NOT_WIDE }, |
1149 | { 0x2FFFE, NOT_WIDE }, |
1150 | { 0x3FFFE, NOT_WIDE }, |
1151 | |
1152 | /* Narrow */ |
1153 | { 0x0020, NOT_WIDE }, |
1154 | { 0x0041, NOT_WIDE }, |
1155 | { 0x27E6, NOT_WIDE }, |
1156 | |
1157 | /* Halfwidth */ |
1158 | { 0x20A9, NOT_WIDE }, |
1159 | { 0xFF61, NOT_WIDE }, |
1160 | { 0xFF69, NOT_WIDE }, |
1161 | { 0xFFEE, NOT_WIDE }, |
1162 | |
1163 | /* Ambiguous */ |
1164 | { 0x00A1, WIDE_CJK }, |
1165 | { 0x00BE, WIDE_CJK }, |
1166 | { 0x02DD, WIDE_CJK }, |
1167 | { 0x2020, WIDE_CJK }, |
1168 | { 0xFFFD, WIDE_CJK }, |
1169 | { 0x00A1, WIDE_CJK }, |
1170 | { 0x1F100, WIDE_CJK }, |
1171 | { 0xE0100, WIDE_CJK }, |
1172 | { 0x100000, WIDE_CJK }, |
1173 | { 0x10FFFD, WIDE_CJK }, |
1174 | |
1175 | /* Fullwidth */ |
1176 | { 0x3000, WIDE }, |
1177 | { 0xFF60, WIDE }, |
1178 | |
1179 | /* Wide */ |
1180 | { 0x2329, WIDE }, |
1181 | { 0x3001, WIDE }, |
1182 | { 0xFE69, WIDE }, |
1183 | { 0x30000, WIDE }, |
1184 | { 0x3FFFD, WIDE }, |
1185 | |
1186 | /* Default Wide blocks */ |
1187 | { 0x4DBF, WIDE }, |
1188 | { 0x9FFF, WIDE }, |
1189 | { 0xFAFF, WIDE }, |
1190 | { 0x2A6DF, WIDE }, |
1191 | { 0x2B73F, WIDE }, |
1192 | { 0x2B81F, WIDE }, |
1193 | { 0x2FA1F, WIDE }, |
1194 | |
1195 | /* Uniode-5.2 character additions */ |
1196 | /* Wide */ |
1197 | { 0x115F, WIDE }, |
1198 | |
1199 | /* Uniode-6.0 character additions */ |
1200 | /* Wide */ |
1201 | { 0x2B740, WIDE }, |
1202 | { 0x1B000, WIDE }, |
1203 | |
1204 | { 0x111111, NOT_WIDE } |
1205 | }; |
1206 | |
1207 | for (i = 0; i < G_N_ELEMENTS (examples); i++) |
1208 | { |
1209 | g_assert_cmpint (g_unichar_iswide (examples[i].c), ==, |
1210 | (examples[i].wide == WIDE)); |
1211 | g_assert_cmpint (g_unichar_iswide_cjk (examples[i].c), ==, |
1212 | (examples[i].wide != NOT_WIDE)); |
1213 | } |
1214 | }; |
1215 | |
1216 | /* Test that g_unichar_compose() returns the correct value for various |
1217 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
1218 | static void |
1219 | test_compose (void) |
1220 | { |
1221 | gunichar ch; |
1222 | |
1223 | /* Not composable */ |
1224 | g_assert_false (g_unichar_compose (0x0041, 0x0042, &ch) && ch == 0); |
1225 | g_assert_false (g_unichar_compose (0x0041, 0, &ch) && ch == 0); |
1226 | g_assert_false (g_unichar_compose (0x0066, 0x0069, &ch) && ch == 0); |
1227 | |
1228 | /* Tricky non-composable */ |
1229 | g_assert_false (g_unichar_compose (0x0308, 0x0301, &ch) && ch == 0); /* !0x0344 */ |
1230 | g_assert_false (g_unichar_compose (0x0F71, 0x0F72, &ch) && ch == 0); /* !0x0F73 */ |
1231 | |
1232 | /* Singletons should not compose */ |
1233 | g_assert_false (g_unichar_compose (0x212B, 0, &ch) && ch == 0); |
1234 | g_assert_false (g_unichar_compose (0x00C5, 0, &ch) && ch == 0); |
1235 | g_assert_false (g_unichar_compose (0x2126, 0, &ch) && ch == 0); |
1236 | g_assert_false (g_unichar_compose (0x03A9, 0, &ch) && ch == 0); |
1237 | |
1238 | /* Pairs */ |
1239 | g_assert_true (g_unichar_compose (0x0041, 0x030A, &ch) && ch == 0x00C5); |
1240 | g_assert_true (g_unichar_compose (0x006F, 0x0302, &ch) && ch == 0x00F4); |
1241 | g_assert_true (g_unichar_compose (0x1E63, 0x0307, &ch) && ch == 0x1E69); |
1242 | g_assert_true (g_unichar_compose (0x0073, 0x0323, &ch) && ch == 0x1E63); |
1243 | g_assert_true (g_unichar_compose (0x0064, 0x0307, &ch) && ch == 0x1E0B); |
1244 | g_assert_true (g_unichar_compose (0x0064, 0x0323, &ch) && ch == 0x1E0D); |
1245 | |
1246 | /* Hangul */ |
1247 | g_assert_true (g_unichar_compose (0xD4CC, 0x11B6, &ch) && ch == 0xD4DB); |
1248 | g_assert_true (g_unichar_compose (0x1111, 0x1171, &ch) && ch == 0xD4CC); |
1249 | g_assert_true (g_unichar_compose (0xCE20, 0x11B8, &ch) && ch == 0xCE31); |
1250 | g_assert_true (g_unichar_compose (0x110E, 0x1173, &ch) && ch == 0xCE20); |
1251 | } |
1252 | |
1253 | /* Test that g_unichar_decompose() returns the correct value for various |
1254 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
1255 | static void |
1256 | test_decompose (void) |
1257 | { |
1258 | gunichar a, b; |
1259 | |
1260 | /* Not decomposable */ |
1261 | g_assert_false (g_unichar_decompose (0x0041, &a, &b) && a == 0x0041 && b == 0); |
1262 | g_assert_false (g_unichar_decompose (0xFB01, &a, &b) && a == 0xFB01 && b == 0); |
1263 | |
1264 | /* Singletons */ |
1265 | g_assert_true (g_unichar_decompose (0x212B, &a, &b) && a == 0x00C5 && b == 0); |
1266 | g_assert_true (g_unichar_decompose (0x2126, &a, &b) && a == 0x03A9 && b == 0); |
1267 | |
1268 | /* Tricky pairs */ |
1269 | g_assert_true (g_unichar_decompose (0x0344, &a, &b) && a == 0x0308 && b == 0x0301); |
1270 | g_assert_true (g_unichar_decompose (0x0F73, &a, &b) && a == 0x0F71 && b == 0x0F72); |
1271 | |
1272 | /* Pairs */ |
1273 | g_assert_true (g_unichar_decompose (0x00C5, &a, &b) && a == 0x0041 && b == 0x030A); |
1274 | g_assert_true (g_unichar_decompose (0x00F4, &a, &b) && a == 0x006F && b == 0x0302); |
1275 | g_assert_true (g_unichar_decompose (0x1E69, &a, &b) && a == 0x1E63 && b == 0x0307); |
1276 | g_assert_true (g_unichar_decompose (0x1E63, &a, &b) && a == 0x0073 && b == 0x0323); |
1277 | g_assert_true (g_unichar_decompose (0x1E0B, &a, &b) && a == 0x0064 && b == 0x0307); |
1278 | g_assert_true (g_unichar_decompose (0x1E0D, &a, &b) && a == 0x0064 && b == 0x0323); |
1279 | |
1280 | /* Hangul */ |
1281 | g_assert_true (g_unichar_decompose (0xD4DB, &a, &b) && a == 0xD4CC && b == 0x11B6); |
1282 | g_assert_true (g_unichar_decompose (0xD4CC, &a, &b) && a == 0x1111 && b == 0x1171); |
1283 | g_assert_true (g_unichar_decompose (0xCE31, &a, &b) && a == 0xCE20 && b == 0x11B8); |
1284 | g_assert_true (g_unichar_decompose (0xCE20, &a, &b) && a == 0x110E && b == 0x1173); |
1285 | } |
1286 | |
1287 | /* Test that g_unichar_fully_decompose() returns the correct value for |
1288 | * various ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
1289 | static void |
1290 | test_fully_decompose_canonical (void) |
1291 | { |
1292 | gunichar decomp[5]; |
1293 | gsize len; |
1294 | |
1295 | #define TEST_DECOMP(ch, expected_len, a, b, c, d) \ |
1296 | len = g_unichar_fully_decompose (ch, FALSE, decomp, G_N_ELEMENTS (decomp)); \ |
1297 | g_assert_cmpint (expected_len, ==, len); \ |
1298 | if (expected_len >= 1) g_assert_cmphex (decomp[0], ==, a); \ |
1299 | if (expected_len >= 2) g_assert_cmphex (decomp[1], ==, b); \ |
1300 | if (expected_len >= 3) g_assert_cmphex (decomp[2], ==, c); \ |
1301 | if (expected_len >= 4) g_assert_cmphex (decomp[3], ==, d); \ |
1302 | |
1303 | #define TEST0(ch) TEST_DECOMP (ch, 1, ch, 0, 0, 0) |
1304 | #define TEST1(ch, a) TEST_DECOMP (ch, 1, a, 0, 0, 0) |
1305 | #define TEST2(ch, a, b) TEST_DECOMP (ch, 2, a, b, 0, 0) |
1306 | #define TEST3(ch, a, b, c) TEST_DECOMP (ch, 3, a, b, c, 0) |
1307 | #define TEST4(ch, a, b, c, d) TEST_DECOMP (ch, 4, a, b, c, d) |
1308 | |
1309 | /* Not decomposable */ |
1310 | TEST0 (0x0041); |
1311 | TEST0 (0xFB01); |
1312 | |
1313 | /* Singletons */ |
1314 | TEST2 (0x212B, 0x0041, 0x030A); |
1315 | TEST1 (0x2126, 0x03A9); |
1316 | |
1317 | /* Tricky pairs */ |
1318 | TEST2 (0x0344, 0x0308, 0x0301); |
1319 | TEST2 (0x0F73, 0x0F71, 0x0F72); |
1320 | |
1321 | /* General */ |
1322 | TEST2 (0x00C5, 0x0041, 0x030A); |
1323 | TEST2 (0x00F4, 0x006F, 0x0302); |
1324 | TEST3 (0x1E69, 0x0073, 0x0323, 0x0307); |
1325 | TEST2 (0x1E63, 0x0073, 0x0323); |
1326 | TEST2 (0x1E0B, 0x0064, 0x0307); |
1327 | TEST2 (0x1E0D, 0x0064, 0x0323); |
1328 | |
1329 | /* Hangul */ |
1330 | TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6); |
1331 | TEST2 (0xD4CC, 0x1111, 0x1171); |
1332 | TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8); |
1333 | TEST2 (0xCE20, 0x110E, 0x1173); |
1334 | |
1335 | #undef TEST_DECOMP |
1336 | } |
1337 | |
1338 | /* Test that g_unicode_canonical_decomposition() returns the correct |
1339 | * value for various ASCII and Unicode alphabetic, numeric, and other, |
1340 | * codepoints. */ |
1341 | static void |
1342 | test_canonical_decomposition (void) |
1343 | { |
1344 | gunichar *decomp; |
1345 | gsize len; |
1346 | |
1347 | #define TEST_DECOMP(ch, expected_len, a, b, c, d) \ |
1348 | decomp = g_unicode_canonical_decomposition (ch, &len); \ |
1349 | g_assert_cmpint (expected_len, ==, len); \ |
1350 | if (expected_len >= 1) g_assert_cmphex (decomp[0], ==, a); \ |
1351 | if (expected_len >= 2) g_assert_cmphex (decomp[1], ==, b); \ |
1352 | if (expected_len >= 3) g_assert_cmphex (decomp[2], ==, c); \ |
1353 | if (expected_len >= 4) g_assert_cmphex (decomp[3], ==, d); \ |
1354 | g_free (decomp); |
1355 | |
1356 | #define TEST0(ch) TEST_DECOMP (ch, 1, ch, 0, 0, 0) |
1357 | #define TEST1(ch, a) TEST_DECOMP (ch, 1, a, 0, 0, 0) |
1358 | #define TEST2(ch, a, b) TEST_DECOMP (ch, 2, a, b, 0, 0) |
1359 | #define TEST3(ch, a, b, c) TEST_DECOMP (ch, 3, a, b, c, 0) |
1360 | #define TEST4(ch, a, b, c, d) TEST_DECOMP (ch, 4, a, b, c, d) |
1361 | |
1362 | /* Not decomposable */ |
1363 | TEST0 (0x0041); |
1364 | TEST0 (0xFB01); |
1365 | |
1366 | /* Singletons */ |
1367 | TEST2 (0x212B, 0x0041, 0x030A); |
1368 | TEST1 (0x2126, 0x03A9); |
1369 | |
1370 | /* Tricky pairs */ |
1371 | TEST2 (0x0344, 0x0308, 0x0301); |
1372 | TEST2 (0x0F73, 0x0F71, 0x0F72); |
1373 | |
1374 | /* General */ |
1375 | TEST2 (0x00C5, 0x0041, 0x030A); |
1376 | TEST2 (0x00F4, 0x006F, 0x0302); |
1377 | TEST3 (0x1E69, 0x0073, 0x0323, 0x0307); |
1378 | TEST2 (0x1E63, 0x0073, 0x0323); |
1379 | TEST2 (0x1E0B, 0x0064, 0x0307); |
1380 | TEST2 (0x1E0D, 0x0064, 0x0323); |
1381 | |
1382 | /* Hangul */ |
1383 | TEST3 (0xD4DB, 0x1111, 0x1171, 0x11B6); |
1384 | TEST2 (0xD4CC, 0x1111, 0x1171); |
1385 | TEST3 (0xCE31, 0x110E, 0x1173, 0x11B8); |
1386 | TEST2 (0xCE20, 0x110E, 0x1173); |
1387 | |
1388 | #undef TEST_DECOMP |
1389 | } |
1390 | |
1391 | /* Test that g_unichar_decompose() whenever encouttering a char ch |
1392 | * decomposes into a and b, b itself won't decompose any further. */ |
1393 | static void |
1394 | test_decompose_tail (void) |
1395 | { |
1396 | gunichar ch, a, b, c, d; |
1397 | |
1398 | /* Test that whenever a char ch decomposes into a and b, b itself |
1399 | * won't decompose any further. */ |
1400 | |
1401 | for (ch = 0; ch < 0x110000; ch++) |
1402 | if (g_unichar_decompose (ch, a: &a, b: &b)) |
1403 | g_assert_false (g_unichar_decompose (b, &c, &d)); |
1404 | else |
1405 | { |
1406 | g_assert_cmpuint (a, ==, ch); |
1407 | g_assert_cmpuint (b, ==, 0); |
1408 | } |
1409 | } |
1410 | |
1411 | /* Test that all canonical decompositions of g_unichar_fully_decompose() |
1412 | * are at most 4 in length, and compatibility decompositions are |
1413 | * at most 18 in length. */ |
1414 | static void |
1415 | test_fully_decompose_len (void) |
1416 | { |
1417 | gunichar ch; |
1418 | |
1419 | /* Test that all canonical decompositions are at most 4 in length, |
1420 | * and compatibility decompositions are at most 18 in length. |
1421 | */ |
1422 | |
1423 | for (ch = 0; ch < 0x110000; ch++) { |
1424 | g_assert_cmpint (g_unichar_fully_decompose (ch, FALSE, NULL, 0), <=, 4); |
1425 | g_assert_cmpint (g_unichar_fully_decompose (ch, TRUE, NULL, 0), <=, 18); |
1426 | } |
1427 | } |
1428 | |
1429 | /* Test that g_unichar_decompose() returns the correct value for various |
1430 | * ASCII and Unicode alphabetic, numeric, and other, codepoints. */ |
1431 | static void |
1432 | test_iso15924 (void) |
1433 | { |
1434 | const struct { |
1435 | GUnicodeScript script; |
1436 | char four_letter_code[5]; |
1437 | } data[] = { |
1438 | { G_UNICODE_SCRIPT_COMMON, "Zyyy" }, |
1439 | { G_UNICODE_SCRIPT_INHERITED, "Zinh" }, |
1440 | { G_UNICODE_SCRIPT_ARABIC, "Arab" }, |
1441 | { G_UNICODE_SCRIPT_ARMENIAN, "Armn" }, |
1442 | { G_UNICODE_SCRIPT_BENGALI, "Beng" }, |
1443 | { G_UNICODE_SCRIPT_BOPOMOFO, "Bopo" }, |
1444 | { G_UNICODE_SCRIPT_CHEROKEE, "Cher" }, |
1445 | { G_UNICODE_SCRIPT_COPTIC, "Copt" }, |
1446 | { G_UNICODE_SCRIPT_CYRILLIC, "Cyrl" }, |
1447 | { G_UNICODE_SCRIPT_DESERET, "Dsrt" }, |
1448 | { G_UNICODE_SCRIPT_DEVANAGARI, "Deva" }, |
1449 | { G_UNICODE_SCRIPT_ETHIOPIC, "Ethi" }, |
1450 | { G_UNICODE_SCRIPT_GEORGIAN, "Geor" }, |
1451 | { G_UNICODE_SCRIPT_GOTHIC, "Goth" }, |
1452 | { G_UNICODE_SCRIPT_GREEK, "Grek" }, |
1453 | { G_UNICODE_SCRIPT_GUJARATI, "Gujr" }, |
1454 | { G_UNICODE_SCRIPT_GURMUKHI, "Guru" }, |
1455 | { G_UNICODE_SCRIPT_HAN, "Hani" }, |
1456 | { G_UNICODE_SCRIPT_HANGUL, "Hang" }, |
1457 | { G_UNICODE_SCRIPT_HEBREW, "Hebr" }, |
1458 | { G_UNICODE_SCRIPT_HIRAGANA, "Hira" }, |
1459 | { G_UNICODE_SCRIPT_KANNADA, "Knda" }, |
1460 | { G_UNICODE_SCRIPT_KATAKANA, "Kana" }, |
1461 | { G_UNICODE_SCRIPT_KHMER, "Khmr" }, |
1462 | { G_UNICODE_SCRIPT_LAO, "Laoo" }, |
1463 | { G_UNICODE_SCRIPT_LATIN, "Latn" }, |
1464 | { G_UNICODE_SCRIPT_MALAYALAM, "Mlym" }, |
1465 | { G_UNICODE_SCRIPT_MONGOLIAN, "Mong" }, |
1466 | { G_UNICODE_SCRIPT_MYANMAR, "Mymr" }, |
1467 | { G_UNICODE_SCRIPT_OGHAM, "Ogam" }, |
1468 | { G_UNICODE_SCRIPT_OLD_ITALIC, "Ital" }, |
1469 | { G_UNICODE_SCRIPT_ORIYA, "Orya" }, |
1470 | { G_UNICODE_SCRIPT_RUNIC, "Runr" }, |
1471 | { G_UNICODE_SCRIPT_SINHALA, "Sinh" }, |
1472 | { G_UNICODE_SCRIPT_SYRIAC, "Syrc" }, |
1473 | { G_UNICODE_SCRIPT_TAMIL, "Taml" }, |
1474 | { G_UNICODE_SCRIPT_TELUGU, "Telu" }, |
1475 | { G_UNICODE_SCRIPT_THAANA, "Thaa" }, |
1476 | { G_UNICODE_SCRIPT_THAI, "Thai" }, |
1477 | { G_UNICODE_SCRIPT_TIBETAN, "Tibt" }, |
1478 | { G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, "Cans" }, |
1479 | { G_UNICODE_SCRIPT_YI, "Yiii" }, |
1480 | { G_UNICODE_SCRIPT_TAGALOG, "Tglg" }, |
1481 | { G_UNICODE_SCRIPT_HANUNOO, "Hano" }, |
1482 | { G_UNICODE_SCRIPT_BUHID, "Buhd" }, |
1483 | { G_UNICODE_SCRIPT_TAGBANWA, "Tagb" }, |
1484 | |
1485 | /* Unicode-4.0 additions */ |
1486 | { G_UNICODE_SCRIPT_BRAILLE, "Brai" }, |
1487 | { G_UNICODE_SCRIPT_CYPRIOT, "Cprt" }, |
1488 | { G_UNICODE_SCRIPT_LIMBU, "Limb" }, |
1489 | { G_UNICODE_SCRIPT_OSMANYA, "Osma" }, |
1490 | { G_UNICODE_SCRIPT_SHAVIAN, "Shaw" }, |
1491 | { G_UNICODE_SCRIPT_LINEAR_B, "Linb" }, |
1492 | { G_UNICODE_SCRIPT_TAI_LE, "Tale" }, |
1493 | { G_UNICODE_SCRIPT_UGARITIC, "Ugar" }, |
1494 | |
1495 | /* Unicode-4.1 additions */ |
1496 | { G_UNICODE_SCRIPT_NEW_TAI_LUE, "Talu" }, |
1497 | { G_UNICODE_SCRIPT_BUGINESE, "Bugi" }, |
1498 | { G_UNICODE_SCRIPT_GLAGOLITIC, "Glag" }, |
1499 | { G_UNICODE_SCRIPT_TIFINAGH, "Tfng" }, |
1500 | { G_UNICODE_SCRIPT_SYLOTI_NAGRI, "Sylo" }, |
1501 | { G_UNICODE_SCRIPT_OLD_PERSIAN, "Xpeo" }, |
1502 | { G_UNICODE_SCRIPT_KHAROSHTHI, "Khar" }, |
1503 | |
1504 | /* Unicode-5.0 additions */ |
1505 | { G_UNICODE_SCRIPT_UNKNOWN, "Zzzz" }, |
1506 | { G_UNICODE_SCRIPT_BALINESE, "Bali" }, |
1507 | { G_UNICODE_SCRIPT_CUNEIFORM, "Xsux" }, |
1508 | { G_UNICODE_SCRIPT_PHOENICIAN, "Phnx" }, |
1509 | { G_UNICODE_SCRIPT_PHAGS_PA, "Phag" }, |
1510 | { G_UNICODE_SCRIPT_NKO, "Nkoo" }, |
1511 | |
1512 | /* Unicode-5.1 additions */ |
1513 | { G_UNICODE_SCRIPT_KAYAH_LI, "Kali" }, |
1514 | { G_UNICODE_SCRIPT_LEPCHA, "Lepc" }, |
1515 | { G_UNICODE_SCRIPT_REJANG, "Rjng" }, |
1516 | { G_UNICODE_SCRIPT_SUNDANESE, "Sund" }, |
1517 | { G_UNICODE_SCRIPT_SAURASHTRA, "Saur" }, |
1518 | { G_UNICODE_SCRIPT_CHAM, "Cham" }, |
1519 | { G_UNICODE_SCRIPT_OL_CHIKI, "Olck" }, |
1520 | { G_UNICODE_SCRIPT_VAI, "Vaii" }, |
1521 | { G_UNICODE_SCRIPT_CARIAN, "Cari" }, |
1522 | { G_UNICODE_SCRIPT_LYCIAN, "Lyci" }, |
1523 | { G_UNICODE_SCRIPT_LYDIAN, "Lydi" }, |
1524 | |
1525 | /* Unicode-5.2 additions */ |
1526 | { G_UNICODE_SCRIPT_AVESTAN, "Avst" }, |
1527 | { G_UNICODE_SCRIPT_BAMUM, "Bamu" }, |
1528 | { G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, "Egyp" }, |
1529 | { G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, "Armi" }, |
1530 | { G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, "Phli" }, |
1531 | { G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, "Prti" }, |
1532 | { G_UNICODE_SCRIPT_JAVANESE, "Java" }, |
1533 | { G_UNICODE_SCRIPT_KAITHI, "Kthi" }, |
1534 | { G_UNICODE_SCRIPT_LISU, "Lisu" }, |
1535 | { G_UNICODE_SCRIPT_MEETEI_MAYEK, "Mtei" }, |
1536 | { G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, "Sarb" }, |
1537 | { G_UNICODE_SCRIPT_OLD_TURKIC, "Orkh" }, |
1538 | { G_UNICODE_SCRIPT_SAMARITAN, "Samr" }, |
1539 | { G_UNICODE_SCRIPT_TAI_THAM, "Lana" }, |
1540 | { G_UNICODE_SCRIPT_TAI_VIET, "Tavt" }, |
1541 | |
1542 | /* Unicode-6.0 additions */ |
1543 | { G_UNICODE_SCRIPT_BATAK, "Batk" }, |
1544 | { G_UNICODE_SCRIPT_BRAHMI, "Brah" }, |
1545 | { G_UNICODE_SCRIPT_MANDAIC, "Mand" }, |
1546 | |
1547 | /* Unicode-6.1 additions */ |
1548 | { G_UNICODE_SCRIPT_CHAKMA, "Cakm" }, |
1549 | { G_UNICODE_SCRIPT_MEROITIC_CURSIVE, "Merc" }, |
1550 | { G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, "Mero" }, |
1551 | { G_UNICODE_SCRIPT_MIAO, "Plrd" }, |
1552 | { G_UNICODE_SCRIPT_SHARADA, "Shrd" }, |
1553 | { G_UNICODE_SCRIPT_SORA_SOMPENG, "Sora" }, |
1554 | { G_UNICODE_SCRIPT_TAKRI, "Takr" }, |
1555 | |
1556 | /* Unicode 7.0 additions */ |
1557 | { G_UNICODE_SCRIPT_BASSA_VAH, "Bass" }, |
1558 | { G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, "Aghb" }, |
1559 | { G_UNICODE_SCRIPT_DUPLOYAN, "Dupl" }, |
1560 | { G_UNICODE_SCRIPT_ELBASAN, "Elba" }, |
1561 | { G_UNICODE_SCRIPT_GRANTHA, "Gran" }, |
1562 | { G_UNICODE_SCRIPT_KHOJKI, "Khoj" }, |
1563 | { G_UNICODE_SCRIPT_KHUDAWADI, "Sind" }, |
1564 | { G_UNICODE_SCRIPT_LINEAR_A, "Lina" }, |
1565 | { G_UNICODE_SCRIPT_MAHAJANI, "Mahj" }, |
1566 | { G_UNICODE_SCRIPT_MANICHAEAN, "Mani" }, |
1567 | { G_UNICODE_SCRIPT_MENDE_KIKAKUI, "Mend" }, |
1568 | { G_UNICODE_SCRIPT_MODI, "Modi" }, |
1569 | { G_UNICODE_SCRIPT_MRO, "Mroo" }, |
1570 | { G_UNICODE_SCRIPT_NABATAEAN, "Nbat" }, |
1571 | { G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, "Narb" }, |
1572 | { G_UNICODE_SCRIPT_OLD_PERMIC, "Perm" }, |
1573 | { G_UNICODE_SCRIPT_PAHAWH_HMONG, "Hmng" }, |
1574 | { G_UNICODE_SCRIPT_PALMYRENE, "Palm" }, |
1575 | { G_UNICODE_SCRIPT_PAU_CIN_HAU, "Pauc" }, |
1576 | { G_UNICODE_SCRIPT_PSALTER_PAHLAVI, "Phlp" }, |
1577 | { G_UNICODE_SCRIPT_SIDDHAM, "Sidd" }, |
1578 | { G_UNICODE_SCRIPT_TIRHUTA, "Tirh" }, |
1579 | { G_UNICODE_SCRIPT_WARANG_CITI, "Wara" }, |
1580 | |
1581 | /* Unicode 8.0 additions */ |
1582 | { G_UNICODE_SCRIPT_AHOM, "Ahom" }, |
1583 | { G_UNICODE_SCRIPT_ANATOLIAN_HIEROGLYPHS, "Hluw" }, |
1584 | { G_UNICODE_SCRIPT_HATRAN, "Hatr" }, |
1585 | { G_UNICODE_SCRIPT_MULTANI, "Mult" }, |
1586 | { G_UNICODE_SCRIPT_OLD_HUNGARIAN, "Hung" }, |
1587 | { G_UNICODE_SCRIPT_SIGNWRITING, "Sgnw" }, |
1588 | |
1589 | /* Unicode 9.0 additions */ |
1590 | { G_UNICODE_SCRIPT_ADLAM, "Adlm" }, |
1591 | { G_UNICODE_SCRIPT_BHAIKSUKI, "Bhks" }, |
1592 | { G_UNICODE_SCRIPT_MARCHEN, "Marc" }, |
1593 | { G_UNICODE_SCRIPT_NEWA, "Newa" }, |
1594 | { G_UNICODE_SCRIPT_OSAGE, "Osge" }, |
1595 | { G_UNICODE_SCRIPT_TANGUT, "Tang" }, |
1596 | |
1597 | /* Unicode 10.0 additions */ |
1598 | { G_UNICODE_SCRIPT_MASARAM_GONDI, "Gonm" }, |
1599 | { G_UNICODE_SCRIPT_NUSHU, "Nshu" }, |
1600 | { G_UNICODE_SCRIPT_SOYOMBO, "Soyo" }, |
1601 | { G_UNICODE_SCRIPT_ZANABAZAR_SQUARE, "Zanb" }, |
1602 | |
1603 | /* Unicode 11.0 additions */ |
1604 | { G_UNICODE_SCRIPT_DOGRA, "Dogr" }, |
1605 | { G_UNICODE_SCRIPT_GUNJALA_GONDI, "Gong" }, |
1606 | { G_UNICODE_SCRIPT_HANIFI_ROHINGYA, "Rohg" }, |
1607 | { G_UNICODE_SCRIPT_MAKASAR, "Maka" }, |
1608 | { G_UNICODE_SCRIPT_MEDEFAIDRIN, "Medf" }, |
1609 | { G_UNICODE_SCRIPT_OLD_SOGDIAN, "Sogo" }, |
1610 | { G_UNICODE_SCRIPT_SOGDIAN, "Sogd" }, |
1611 | |
1612 | /* Unicode 12.0 additions */ |
1613 | { G_UNICODE_SCRIPT_ELYMAIC, "Elym" }, |
1614 | { G_UNICODE_SCRIPT_NANDINAGARI, "Nand" }, |
1615 | { G_UNICODE_SCRIPT_NYIAKENG_PUACHUE_HMONG, "Hmnp" }, |
1616 | { G_UNICODE_SCRIPT_WANCHO, "Wcho" }, |
1617 | |
1618 | /* Unicode 13.0 additions */ |
1619 | { G_UNICODE_SCRIPT_CHORASMIAN, "Chrs" }, |
1620 | { G_UNICODE_SCRIPT_DIVES_AKURU, "Diak" }, |
1621 | { G_UNICODE_SCRIPT_KHITAN_SMALL_SCRIPT, "Kits" }, |
1622 | { G_UNICODE_SCRIPT_YEZIDI, "Yezi" }, |
1623 | }; |
1624 | guint i; |
1625 | |
1626 | g_assert_cmphex (0, ==, |
1627 | g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_INVALID_CODE)); |
1628 | g_assert_cmphex (0x5A7A7A7A, ==, g_unicode_script_to_iso15924 (1000)); |
1629 | g_assert_cmphex (0x41726162, ==, |
1630 | g_unicode_script_to_iso15924 (G_UNICODE_SCRIPT_ARABIC)); |
1631 | |
1632 | g_assert_cmphex (G_UNICODE_SCRIPT_INVALID_CODE, ==, |
1633 | g_unicode_script_from_iso15924 (0)); |
1634 | g_assert_cmphex (G_UNICODE_SCRIPT_UNKNOWN, ==, |
1635 | g_unicode_script_from_iso15924 (0x12345678)); |
1636 | |
1637 | #define PACK(a,b,c,d) \ |
1638 | ((guint32)((((guint8)(a))<<24)|(((guint8)(b))<<16)|(((guint8)(c))<<8)|((guint8)(d)))) |
1639 | |
1640 | for (i = 0; i < G_N_ELEMENTS (data); i++) |
1641 | { |
1642 | guint32 code = PACK (data[i].four_letter_code[0], |
1643 | data[i].four_letter_code[1], |
1644 | data[i].four_letter_code[2], |
1645 | data[i].four_letter_code[3]); |
1646 | |
1647 | g_assert_cmphex (g_unicode_script_to_iso15924 (data[i].script), ==, code); |
1648 | g_assert_cmpint (g_unicode_script_from_iso15924 (code), ==, data[i].script); |
1649 | } |
1650 | |
1651 | #undef PACK |
1652 | } |
1653 | |
1654 | int |
1655 | main (int argc, |
1656 | char *argv[]) |
1657 | { |
1658 | g_test_init (argc: &argc, argv: &argv, NULL); |
1659 | |
1660 | g_test_add_func (testpath: "/unicode/alnum" , test_func: test_alnum); |
1661 | g_test_add_func (testpath: "/unicode/alpha" , test_func: test_alpha); |
1662 | g_test_add_func (testpath: "/unicode/break-type" , test_func: test_unichar_break_type); |
1663 | g_test_add_func (testpath: "/unicode/canonical-decomposition" , test_func: test_canonical_decomposition); |
1664 | g_test_add_func (testpath: "/unicode/casefold" , test_func: test_casefold); |
1665 | g_test_add_func (testpath: "/unicode/cases" , test_func: test_cases); |
1666 | g_test_add_func (testpath: "/unicode/character-type" , test_func: test_unichar_character_type); |
1667 | g_test_add_func (testpath: "/unicode/cntrl" , test_func: test_cntrl); |
1668 | g_test_add_func (testpath: "/unicode/combining-class" , test_func: test_combining_class); |
1669 | g_test_add_func (testpath: "/unicode/compose" , test_func: test_compose); |
1670 | g_test_add_func (testpath: "/unicode/decompose" , test_func: test_decompose); |
1671 | g_test_add_func (testpath: "/unicode/decompose-tail" , test_func: test_decompose_tail); |
1672 | g_test_add_func (testpath: "/unicode/defined" , test_func: test_defined); |
1673 | g_test_add_func (testpath: "/unicode/digit" , test_func: test_digit); |
1674 | g_test_add_func (testpath: "/unicode/digit-value" , test_func: test_digit_value); |
1675 | g_test_add_func (testpath: "/unicode/fully-decompose-canonical" , test_func: test_fully_decompose_canonical); |
1676 | g_test_add_func (testpath: "/unicode/fully-decompose-len" , test_func: test_fully_decompose_len); |
1677 | g_test_add_func (testpath: "/unicode/graph" , test_func: test_graph); |
1678 | g_test_add_func (testpath: "/unicode/iso15924" , test_func: test_iso15924); |
1679 | g_test_add_func (testpath: "/unicode/lower" , test_func: test_lower); |
1680 | g_test_add_func (testpath: "/unicode/mark" , test_func: test_mark); |
1681 | g_test_add_func (testpath: "/unicode/mirror" , test_func: test_mirror); |
1682 | g_test_add_func (testpath: "/unicode/print" , test_func: test_print); |
1683 | g_test_add_func (testpath: "/unicode/punctuation" , test_func: test_punctuation); |
1684 | g_test_add_func (testpath: "/unicode/script" , test_func: test_unichar_script); |
1685 | g_test_add_func (testpath: "/unicode/space" , test_func: test_space); |
1686 | g_test_add_func (testpath: "/unicode/strdown" , test_func: test_strdown); |
1687 | g_test_add_func (testpath: "/unicode/strup" , test_func: test_strup); |
1688 | g_test_add_func (testpath: "/unicode/turkish-strupdown" , test_func: test_turkish_strupdown); |
1689 | g_test_add_func (testpath: "/unicode/title" , test_func: test_title); |
1690 | g_test_add_func (testpath: "/unicode/upper" , test_func: test_upper); |
1691 | g_test_add_func (testpath: "/unicode/validate" , test_func: test_unichar_validate); |
1692 | g_test_add_func (testpath: "/unicode/wide" , test_func: test_wide); |
1693 | g_test_add_func (testpath: "/unicode/xdigit" , test_func: test_xdigit); |
1694 | g_test_add_func (testpath: "/unicode/xdigit-value" , test_func: test_xdigit_value); |
1695 | g_test_add_func (testpath: "/unicode/zero-width" , test_func: test_zerowidth); |
1696 | |
1697 | return g_test_run(); |
1698 | } |
1699 | |