1 | /* Pango |
2 | * testboundaries_ucd.c: Test text boundary algorithms with test data from |
3 | * Unicode Character Database. |
4 | * |
5 | * Copyright (C) 2003 Noah Levitt |
6 | * |
7 | * This library is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Library General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2 of the License, or (at your option) any later version. |
11 | * |
12 | * This library is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Library General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Library General Public |
18 | * License along with this library; if not, write to the |
19 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
20 | * Boston, MA 02111-1307, USA. |
21 | */ |
22 | |
23 | #include <pango/pango.h> |
24 | #include <stdlib.h> |
25 | #include <string.h> |
26 | #include <locale.h> |
27 | |
28 | static gboolean failed = FALSE; |
29 | |
30 | /* PangoLogAttr has to be the same size as guint or this hack breaks */ |
31 | typedef union |
32 | { |
33 | PangoLogAttr attr; |
34 | guint bits; |
35 | } |
36 | AttrBits; |
37 | |
38 | /* counts the number of multiplication and divison signs up to the first |
39 | * '#' or null character */ |
40 | static gint |
41 | count_attrs (gchar *line) |
42 | { |
43 | gunichar ch; |
44 | gchar *p = line; |
45 | gint count = 0; |
46 | |
47 | for (;;) |
48 | { |
49 | ch = g_utf8_get_char (p); |
50 | |
51 | switch (ch) |
52 | { |
53 | /* MULTIPLICATION SIGN, DIVISION SIGN */ |
54 | case 0x00d7: case 0x00f7: |
55 | count++; |
56 | break; |
57 | |
58 | /* null char, NUMBER SIGN */ |
59 | case 0x0000: case 0x0023: |
60 | return count; |
61 | |
62 | default: |
63 | break; |
64 | } |
65 | |
66 | p = g_utf8_next_char (p); |
67 | } |
68 | /* not reached */ |
69 | } |
70 | |
71 | static gboolean |
72 | parse_line (gchar *line, |
73 | AttrBits bits, |
74 | gchar **str_return, |
75 | PangoLogAttr **attr_return, |
76 | gint *num_attrs) |
77 | { |
78 | GString *gs; |
79 | gunichar ch, character; |
80 | gchar *p, *q; |
81 | gint i; |
82 | AttrBits temp_attr; |
83 | |
84 | *num_attrs = count_attrs (line); |
85 | *attr_return = g_new (PangoLogAttr, *num_attrs); |
86 | |
87 | p = line; |
88 | i = 0; |
89 | gs = g_string_new (NULL); |
90 | |
91 | for (;;) |
92 | { |
93 | temp_attr.bits = 0; |
94 | |
95 | /* skip white space */ |
96 | do |
97 | { |
98 | ch = g_utf8_get_char (p); |
99 | p = g_utf8_next_char (p); |
100 | } |
101 | while (g_unichar_isspace (c: ch)); |
102 | |
103 | switch (ch) |
104 | { |
105 | case 0x00f7: /* DIVISION SIGN: boundary here */ |
106 | temp_attr.bits |= bits.bits; |
107 | G_GNUC_FALLTHROUGH; |
108 | |
109 | case 0x00d7: /* MULTIPLICATION SIGN: no boundary here */ |
110 | break; |
111 | |
112 | case 0x0000: |
113 | case 0x0023: |
114 | *str_return = g_string_free (string: gs, FALSE); |
115 | return TRUE; |
116 | |
117 | default: /* unexpected character */ |
118 | g_free (mem: *attr_return); |
119 | return FALSE; |
120 | } |
121 | |
122 | (*attr_return)[i] = temp_attr.attr; |
123 | |
124 | /* skip white space */ |
125 | do |
126 | { |
127 | ch = g_utf8_get_char (p); |
128 | p = g_utf8_next_char (p); |
129 | } |
130 | while (g_unichar_isspace (c: ch)); |
131 | p = g_utf8_prev_char (p); |
132 | |
133 | if (ch == 0x0023 || ch == 0x0000) |
134 | { |
135 | *str_return = g_string_free (string: gs, FALSE); |
136 | return TRUE; |
137 | } |
138 | |
139 | character = strtoul (nptr: p, endptr: &q, base: 16); |
140 | if (q < p + 4 || q > p + 6 || character > 0x10ffff) |
141 | { |
142 | g_free (mem: *attr_return); |
143 | return FALSE; |
144 | } |
145 | |
146 | p = q; |
147 | |
148 | gs = g_string_append_unichar (string: gs, wc: character); |
149 | |
150 | i++; |
151 | } |
152 | } |
153 | |
154 | static gboolean |
155 | attrs_equal (PangoLogAttr *attrs1, |
156 | PangoLogAttr *attrs2, |
157 | gint len, |
158 | AttrBits bits) |
159 | { |
160 | AttrBits a, b; |
161 | gint i; |
162 | |
163 | for (i = 0; i < len; i++) |
164 | { |
165 | a.bits = 0; |
166 | a.attr = attrs1[i]; |
167 | |
168 | b.bits = 0; |
169 | b.attr = attrs2[i]; |
170 | |
171 | /* can't do a straight comparison because the bitmask may have |
172 | * multiple bits set, and as long as attr&bitmask is not zero, it |
173 | * counts as being set */ |
174 | if (((a.bits & bits.bits) && !(b.bits & bits.bits)) || |
175 | (!(a.bits & bits.bits) && (b.bits & bits.bits))) |
176 | return FALSE; |
177 | } |
178 | |
179 | return TRUE; |
180 | } |
181 | |
182 | static gchar * |
183 | make_test_string (gchar *string, |
184 | PangoLogAttr *attrs, |
185 | AttrBits bits) |
186 | { |
187 | GString *gs = g_string_new (NULL); |
188 | gint i = 0; |
189 | AttrBits a; |
190 | gchar *p = string; |
191 | gunichar ch; |
192 | |
193 | for (;;) |
194 | { |
195 | a.bits = 0; |
196 | a.attr = attrs[i]; |
197 | if ((a.bits & bits.bits) != 0) |
198 | gs = g_string_append_unichar (string: gs, wc: 0x00f7); |
199 | else |
200 | gs = g_string_append_unichar (string: gs, wc: 0x00d7); |
201 | |
202 | g_string_append_c (gs, ' '); |
203 | |
204 | if (*p == '\0') |
205 | break; |
206 | |
207 | ch = g_utf8_get_char (p); |
208 | g_string_append_printf (string: gs, format: "%04X " , ch); |
209 | |
210 | p = g_utf8_next_char (p); |
211 | i++; |
212 | } |
213 | |
214 | return g_string_free (string: gs, FALSE); |
215 | } |
216 | |
217 | static void |
218 | do_test (const gchar *filename, |
219 | AttrBits bits) |
220 | { |
221 | GIOChannel *channel; |
222 | GIOStatus status; |
223 | gchar *line; |
224 | gsize length, terminator_pos; |
225 | GError *error; |
226 | gchar *string; |
227 | PangoLogAttr *expected_attrs; |
228 | gint num_attrs; |
229 | gint i; |
230 | |
231 | error = NULL; |
232 | channel = g_io_channel_new_file (filename, mode: "r" , error: &error); |
233 | if (g_error_matches (error, G_FILE_ERROR, code: G_FILE_ERROR_NOENT)) |
234 | { |
235 | g_test_skip (msg: "Test file not found" ); |
236 | g_error_free (error); |
237 | return; |
238 | } |
239 | |
240 | g_assert_no_error (error); |
241 | |
242 | if (g_test_verbose ()) g_test_message (format: "Filename: %s" , filename); |
243 | |
244 | i = 1; |
245 | for (;;) |
246 | { |
247 | error = NULL; |
248 | status = g_io_channel_read_line (channel, str_return: &line, length: &length, terminator_pos: &terminator_pos, error: &error); |
249 | g_assert_no_error (error); |
250 | |
251 | switch (status) |
252 | { |
253 | case G_IO_STATUS_ERROR: |
254 | failed = TRUE; |
255 | goto done; |
256 | |
257 | case G_IO_STATUS_EOF: |
258 | goto done; |
259 | |
260 | case G_IO_STATUS_AGAIN: |
261 | continue; |
262 | |
263 | case G_IO_STATUS_NORMAL: |
264 | line[terminator_pos] = '\0'; |
265 | break; |
266 | |
267 | default: |
268 | break; |
269 | } |
270 | |
271 | if (g_test_verbose ()) g_test_message (format: "Parsing line: %s" , line); |
272 | g_assert_true (parse_line (line, bits, &string, &expected_attrs, &num_attrs)); |
273 | |
274 | if (num_attrs > 0) |
275 | { |
276 | PangoLogAttr *attrs = g_new0 (PangoLogAttr, num_attrs); |
277 | pango_get_log_attrs (text: string, length: -1, level: 0, language: pango_language_from_string (language: "C" ), attrs, attrs_len: num_attrs); |
278 | |
279 | if (! attrs_equal (attrs1: attrs, attrs2: expected_attrs, len: num_attrs, bits)) |
280 | { |
281 | gchar *str = make_test_string (string, attrs, bits); |
282 | char * = strchr (s: line, c: '#'); |
283 | if (comments) /* don't print the # comment in the error message. print it separately */ |
284 | { |
285 | *comments = '\0'; |
286 | comments++; |
287 | } |
288 | else |
289 | { |
290 | comments = (char *)"" ; |
291 | } |
292 | |
293 | if (g_test_verbose ()) g_test_message (format: "%s: line %d failed" , filename, i); |
294 | if (g_test_verbose ()) g_test_message (format: " expected: %s" , line); |
295 | if (g_test_verbose ()) g_test_message (format: " returned: %s" , str); |
296 | if (g_test_verbose ()) g_test_message (format: " comments: %s" , comments); |
297 | |
298 | g_free (mem: str); |
299 | failed = TRUE; |
300 | } |
301 | g_free (mem: attrs); |
302 | } |
303 | g_free (mem: string); |
304 | g_free (mem: expected_attrs); |
305 | g_free (mem: line); |
306 | |
307 | i++; |
308 | } |
309 | |
310 | done: |
311 | if (channel) |
312 | g_io_channel_unref (channel); |
313 | if (error) |
314 | g_error_free (error); |
315 | |
316 | g_assert_true (!failed); |
317 | } |
318 | |
319 | static void |
320 | test_grapheme_break (void) |
321 | { |
322 | const char *filename; |
323 | AttrBits bits; |
324 | |
325 | filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "GraphemeBreakTest.txt" , NULL); |
326 | bits.bits = 0; |
327 | bits.attr.is_cursor_position = 1; |
328 | do_test (filename, bits); |
329 | } |
330 | |
331 | static void |
332 | test_emoji_break (void) |
333 | { |
334 | const char *filename; |
335 | AttrBits bits; |
336 | |
337 | filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "EmojiBreakTest.txt" , NULL); |
338 | bits.bits = 0; |
339 | bits.attr.is_cursor_position = 1; |
340 | do_test (filename, bits); |
341 | } |
342 | |
343 | static void |
344 | test_char_break (void) |
345 | { |
346 | const char *filename; |
347 | AttrBits bits; |
348 | |
349 | filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "CharBreakTest.txt" , NULL); |
350 | bits.bits = 0; |
351 | bits.attr.is_char_break = 1; |
352 | do_test (filename, bits); |
353 | } |
354 | |
355 | static void |
356 | test_word_break (void) |
357 | { |
358 | const char *filename; |
359 | AttrBits bits; |
360 | |
361 | filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "WordBreakTest.txt" , NULL); |
362 | bits.bits = 0; |
363 | bits.attr.is_word_boundary = 1; |
364 | do_test (filename, bits); |
365 | } |
366 | |
367 | static void |
368 | test_sentence_break (void) |
369 | { |
370 | const char *filename; |
371 | AttrBits bits; |
372 | |
373 | filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "SentenceBreakTest.txt" , NULL); |
374 | bits.bits = 0; |
375 | bits.attr.is_sentence_boundary = 1; |
376 | do_test (filename, bits); |
377 | } |
378 | |
379 | static void |
380 | test_line_break (void) |
381 | { |
382 | const char *filename; |
383 | AttrBits bits; |
384 | |
385 | |
386 | filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "LineBreakTest.txt" , NULL); |
387 | bits.bits = 0; |
388 | bits.attr.is_line_break = 1; |
389 | bits.attr.is_mandatory_break = 1; |
390 | |
391 | do_test (filename, bits); |
392 | } |
393 | |
394 | |
395 | gint |
396 | main (gint argc, |
397 | gchar **argv) |
398 | { |
399 | setlocale (LC_ALL, locale: "" ); |
400 | |
401 | g_test_init (argc: &argc, argv: &argv, NULL); |
402 | |
403 | g_test_add_func (testpath: "/text/break/grapheme" , test_func: test_grapheme_break); |
404 | g_test_add_func (testpath: "/text/break/word" , test_func: test_word_break); |
405 | g_test_add_func (testpath: "/text/break/sentence" , test_func: test_sentence_break); |
406 | g_test_add_func (testpath: "/text/break/line" , test_func: test_line_break); |
407 | g_test_add_func (testpath: "/text/break/emoji" , test_func: test_emoji_break); |
408 | g_test_add_func (testpath: "/text/break/char" , test_func: test_char_break); |
409 | |
410 | return g_test_run (); |
411 | } |
412 | |