1/* Pango
2 * testboundaries_ucd.c: Test text boundary algorithms with test data from
3 * Unicode Character Database.
4 *
5 * Copyright (C) 2003 Noah Levitt
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 */
22
23#include <pango/pango.h>
24#include <stdlib.h>
25#include <string.h>
26#include <locale.h>
27
28static gboolean failed = FALSE;
29
30/* PangoLogAttr has to be the same size as guint or this hack breaks */
31typedef union
32{
33 PangoLogAttr attr;
34 guint bits;
35}
36AttrBits;
37
38/* counts the number of multiplication and divison signs up to the first
39 * '#' or null character */
40static gint
41count_attrs (gchar *line)
42{
43 gunichar ch;
44 gchar *p = line;
45 gint count = 0;
46
47 for (;;)
48 {
49 ch = g_utf8_get_char (p);
50
51 switch (ch)
52 {
53 /* MULTIPLICATION SIGN, DIVISION SIGN */
54 case 0x00d7: case 0x00f7:
55 count++;
56 break;
57
58 /* null char, NUMBER SIGN */
59 case 0x0000: case 0x0023:
60 return count;
61
62 default:
63 break;
64 }
65
66 p = g_utf8_next_char (p);
67 }
68 /* not reached */
69}
70
71static gboolean
72parse_line (gchar *line,
73 AttrBits bits,
74 gchar **str_return,
75 PangoLogAttr **attr_return,
76 gint *num_attrs)
77{
78 GString *gs;
79 gunichar ch, character;
80 gchar *p, *q;
81 gint i;
82 AttrBits temp_attr;
83
84 *num_attrs = count_attrs (line);
85 *attr_return = g_new (PangoLogAttr, *num_attrs);
86
87 p = line;
88 i = 0;
89 gs = g_string_new (NULL);
90
91 for (;;)
92 {
93 temp_attr.bits = 0;
94
95 /* skip white space */
96 do
97 {
98 ch = g_utf8_get_char (p);
99 p = g_utf8_next_char (p);
100 }
101 while (g_unichar_isspace (c: ch));
102
103 switch (ch)
104 {
105 case 0x00f7: /* DIVISION SIGN: boundary here */
106 temp_attr.bits |= bits.bits;
107 G_GNUC_FALLTHROUGH;
108
109 case 0x00d7: /* MULTIPLICATION SIGN: no boundary here */
110 break;
111
112 case 0x0000:
113 case 0x0023:
114 *str_return = g_string_free (string: gs, FALSE);
115 return TRUE;
116
117 default: /* unexpected character */
118 g_free (mem: *attr_return);
119 return FALSE;
120 }
121
122 (*attr_return)[i] = temp_attr.attr;
123
124 /* skip white space */
125 do
126 {
127 ch = g_utf8_get_char (p);
128 p = g_utf8_next_char (p);
129 }
130 while (g_unichar_isspace (c: ch));
131 p = g_utf8_prev_char (p);
132
133 if (ch == 0x0023 || ch == 0x0000)
134 {
135 *str_return = g_string_free (string: gs, FALSE);
136 return TRUE;
137 }
138
139 character = strtoul (nptr: p, endptr: &q, base: 16);
140 if (q < p + 4 || q > p + 6 || character > 0x10ffff)
141 {
142 g_free (mem: *attr_return);
143 return FALSE;
144 }
145
146 p = q;
147
148 gs = g_string_append_unichar (string: gs, wc: character);
149
150 i++;
151 }
152}
153
154static gboolean
155attrs_equal (PangoLogAttr *attrs1,
156 PangoLogAttr *attrs2,
157 gint len,
158 AttrBits bits)
159{
160 AttrBits a, b;
161 gint i;
162
163 for (i = 0; i < len; i++)
164 {
165 a.bits = 0;
166 a.attr = attrs1[i];
167
168 b.bits = 0;
169 b.attr = attrs2[i];
170
171 /* can't do a straight comparison because the bitmask may have
172 * multiple bits set, and as long as attr&bitmask is not zero, it
173 * counts as being set */
174 if (((a.bits & bits.bits) && !(b.bits & bits.bits)) ||
175 (!(a.bits & bits.bits) && (b.bits & bits.bits)))
176 return FALSE;
177 }
178
179 return TRUE;
180}
181
182static gchar *
183make_test_string (gchar *string,
184 PangoLogAttr *attrs,
185 AttrBits bits)
186{
187 GString *gs = g_string_new (NULL);
188 gint i = 0;
189 AttrBits a;
190 gchar *p = string;
191 gunichar ch;
192
193 for (;;)
194 {
195 a.bits = 0;
196 a.attr = attrs[i];
197 if ((a.bits & bits.bits) != 0)
198 gs = g_string_append_unichar (string: gs, wc: 0x00f7);
199 else
200 gs = g_string_append_unichar (string: gs, wc: 0x00d7);
201
202 g_string_append_c (gs, ' ');
203
204 if (*p == '\0')
205 break;
206
207 ch = g_utf8_get_char (p);
208 g_string_append_printf (string: gs, format: "%04X ", ch);
209
210 p = g_utf8_next_char (p);
211 i++;
212 }
213
214 return g_string_free (string: gs, FALSE);
215}
216
217static void
218do_test (const gchar *filename,
219 AttrBits bits)
220{
221 GIOChannel *channel;
222 GIOStatus status;
223 gchar *line;
224 gsize length, terminator_pos;
225 GError *error;
226 gchar *string;
227 PangoLogAttr *expected_attrs;
228 gint num_attrs;
229 gint i;
230
231 error = NULL;
232 channel = g_io_channel_new_file (filename, mode: "r", error: &error);
233 if (g_error_matches (error, G_FILE_ERROR, code: G_FILE_ERROR_NOENT))
234 {
235 g_test_skip (msg: "Test file not found");
236 g_error_free (error);
237 return;
238 }
239
240 g_assert_no_error (error);
241
242 if (g_test_verbose ()) g_test_message (format: "Filename: %s", filename);
243
244 i = 1;
245 for (;;)
246 {
247 error = NULL;
248 status = g_io_channel_read_line (channel, str_return: &line, length: &length, terminator_pos: &terminator_pos, error: &error);
249 g_assert_no_error (error);
250
251 switch (status)
252 {
253 case G_IO_STATUS_ERROR:
254 failed = TRUE;
255 goto done;
256
257 case G_IO_STATUS_EOF:
258 goto done;
259
260 case G_IO_STATUS_AGAIN:
261 continue;
262
263 case G_IO_STATUS_NORMAL:
264 line[terminator_pos] = '\0';
265 break;
266
267 default:
268 break;
269 }
270
271 if (g_test_verbose ()) g_test_message (format: "Parsing line: %s", line);
272 g_assert_true (parse_line (line, bits, &string, &expected_attrs, &num_attrs));
273
274 if (num_attrs > 0)
275 {
276 PangoLogAttr *attrs = g_new0 (PangoLogAttr, num_attrs);
277 pango_get_log_attrs (text: string, length: -1, level: 0, language: pango_language_from_string (language: "C"), attrs, attrs_len: num_attrs);
278
279 if (! attrs_equal (attrs1: attrs, attrs2: expected_attrs, len: num_attrs, bits))
280 {
281 gchar *str = make_test_string (string, attrs, bits);
282 char *comments = strchr (s: line, c: '#');
283 if (comments) /* don't print the # comment in the error message. print it separately */
284 {
285 *comments = '\0';
286 comments++;
287 }
288 else
289 {
290 comments = (char *)"";
291 }
292
293 if (g_test_verbose ()) g_test_message (format: "%s: line %d failed", filename, i);
294 if (g_test_verbose ()) g_test_message (format: " expected: %s", line);
295 if (g_test_verbose ()) g_test_message (format: " returned: %s", str);
296 if (g_test_verbose ()) g_test_message (format: " comments: %s", comments);
297
298 g_free (mem: str);
299 failed = TRUE;
300 }
301 g_free (mem: attrs);
302 }
303 g_free (mem: string);
304 g_free (mem: expected_attrs);
305 g_free (mem: line);
306
307 i++;
308 }
309
310done:
311 if (channel)
312 g_io_channel_unref (channel);
313 if (error)
314 g_error_free (error);
315
316 g_assert_true (!failed);
317}
318
319static void
320test_grapheme_break (void)
321{
322 const char *filename;
323 AttrBits bits;
324
325 filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "GraphemeBreakTest.txt", NULL);
326 bits.bits = 0;
327 bits.attr.is_cursor_position = 1;
328 do_test (filename, bits);
329}
330
331static void
332test_emoji_break (void)
333{
334 const char *filename;
335 AttrBits bits;
336
337 filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "EmojiBreakTest.txt", NULL);
338 bits.bits = 0;
339 bits.attr.is_cursor_position = 1;
340 do_test (filename, bits);
341}
342
343static void
344test_char_break (void)
345{
346 const char *filename;
347 AttrBits bits;
348
349 filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "CharBreakTest.txt", NULL);
350 bits.bits = 0;
351 bits.attr.is_char_break = 1;
352 do_test (filename, bits);
353}
354
355static void
356test_word_break (void)
357{
358 const char *filename;
359 AttrBits bits;
360
361 filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "WordBreakTest.txt", NULL);
362 bits.bits = 0;
363 bits.attr.is_word_boundary = 1;
364 do_test (filename, bits);
365}
366
367static void
368test_sentence_break (void)
369{
370 const char *filename;
371 AttrBits bits;
372
373 filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "SentenceBreakTest.txt", NULL);
374 bits.bits = 0;
375 bits.attr.is_sentence_boundary = 1;
376 do_test (filename, bits);
377}
378
379static void
380test_line_break (void)
381{
382 const char *filename;
383 AttrBits bits;
384
385
386 filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "LineBreakTest.txt", NULL);
387 bits.bits = 0;
388 bits.attr.is_line_break = 1;
389 bits.attr.is_mandatory_break = 1;
390
391 do_test (filename, bits);
392}
393
394
395gint
396main (gint argc,
397 gchar **argv)
398{
399 setlocale (LC_ALL, locale: "");
400
401 g_test_init (argc: &argc, argv: &argv, NULL);
402
403 g_test_add_func (testpath: "/text/break/grapheme", test_func: test_grapheme_break);
404 g_test_add_func (testpath: "/text/break/word", test_func: test_word_break);
405 g_test_add_func (testpath: "/text/break/sentence", test_func: test_sentence_break);
406 g_test_add_func (testpath: "/text/break/line", test_func: test_line_break);
407 g_test_add_func (testpath: "/text/break/emoji", test_func: test_emoji_break);
408 g_test_add_func (testpath: "/text/break/char", test_func: test_char_break);
409
410 return g_test_run ();
411}
412

source code of gtk/subprojects/pango/tests/testboundaries_ucd.c