1/* Pango
2 * testboundaries.c: Test text boundary algorithms
3 *
4 * Copyright (C) 1999-2000 Red Hat Software
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
20 */
21
22#include <string.h>
23#include <stdlib.h>
24#include <stdio.h>
25
26#include <glib.h>
27#include <pango/pango.h>
28
29#ifndef G_OS_WIN32
30#include <unistd.h>
31#endif
32
33#define CHFORMAT "%0#6x"
34
35/* FIXME for now this just tests that the breaking of some sample
36 * text conforms to certain rules and invariants. But eventually
37 * we should also have test-result pairs, i.e. a string and some
38 * encoding of the correct way to break the string, to check
39 * more precisely that things worked
40 */
41
42
43static int offset = 0;
44static int line = 0;
45static gunichar current_wc = 0;
46static const char *line_start = NULL;
47static const char *line_end = NULL;
48
49typedef void (* CharForeachFunc) (gunichar wc,
50 gunichar prev_wc,
51 gunichar next_wc,
52 GUnicodeType type,
53 GUnicodeType prev_type,
54 GUnicodeType next_type,
55 PangoLogAttr *attr,
56 PangoLogAttr *prev_attr,
57 PangoLogAttr *next_attr,
58 gpointer data);
59
60static void
61log_attr_foreach (const char *text,
62 PangoLogAttr *attrs,
63 CharForeachFunc func,
64 gpointer data)
65{
66 const gchar *next = text;
67 gint length = strlen (s: text);
68 const gchar *end = text + length;
69 gint i = 0;
70 gunichar prev_wc;
71 gunichar next_wc;
72 GUnicodeType prev_type;
73 GUnicodeType next_type;
74
75 if (next == end)
76 return;
77
78 offset = 0;
79 line = 1;
80
81 prev_type = (GUnicodeType) -1;
82 prev_wc = 0;
83
84 next_wc = g_utf8_get_char (p: next);
85 next_type = g_unichar_type (c: next_wc);
86
87 line_start = text;
88 line_end = text;
89
90 while (next_wc != 0)
91 {
92 GUnicodeType type;
93 gunichar wc;
94
95 wc = next_wc;
96 type = next_type;
97
98 current_wc = wc;
99
100 next = g_utf8_next_char (next);
101 line_end = next;
102
103 if (next >= end)
104 next_wc = 0;
105 else
106 next_wc = g_utf8_get_char (p: next);
107
108 if (next_wc)
109 next_type = g_unichar_type (c: next_wc);
110
111 (* func) (wc, prev_wc, next_wc,
112 type, prev_type, next_type,
113 &attrs[i],
114 i != 0 ? &attrs[i-1] : NULL,
115 next_wc != 0 ? &attrs[i+1] : NULL,
116 data);
117
118 prev_type = type;
119 prev_wc = wc;
120 ++i;
121 ++offset;
122 if (wc == '\n')
123 {
124 ++line;
125 offset = 0;
126 line_start = next;
127 line_end = next;
128 }
129 }
130}
131
132static void
133check_line_char (gunichar wc,
134 gunichar prev_wc,
135 gunichar next_wc,
136 GUnicodeType type,
137 GUnicodeType prev_type,
138 GUnicodeType next_type,
139 PangoLogAttr *attr,
140 PangoLogAttr *prev_attr,
141 PangoLogAttr *next_attr,
142 gpointer data)
143{
144 GUnicodeBreakType break_type;
145 GUnicodeBreakType prev_break_type;
146
147 break_type = g_unichar_break_type (c: wc);
148 if (prev_wc)
149 prev_break_type = g_unichar_break_type (c: prev_wc);
150 else
151 prev_break_type = G_UNICODE_BREAK_UNKNOWN;
152
153 if (wc == '\n')
154 {
155 if (prev_wc == '\r')
156 {
157 if (g_test_verbose ()) if (g_test_verbose ()) g_test_message (format: "Do not line break between \\r and \\n");
158 g_assert_false (attr->is_line_break);
159 }
160
161 if (next_attr != NULL)
162 {
163 if (g_test_verbose ()) g_test_message (format: "Line break after \\n");
164 g_assert_true (next_attr->is_line_break);
165 }
166 }
167
168 if (attr->is_line_break)
169 {
170 if (g_test_verbose ()) g_test_message (format: "first char in string should not be marked as a line break");
171 g_assert_false (prev_wc == 0);
172 }
173
174 if (break_type == G_UNICODE_BREAK_SPACE)
175 {
176 if (g_test_verbose ()) g_test_message (format: "can't break lines before a space unless a mandatory break char precedes it or a combining mark follows; prev char was: " CHFORMAT, prev_wc);
177 g_assert_false (attr->is_line_break && prev_attr != NULL &&
178 !attr->is_mandatory_break &&
179 !(next_wc && g_unichar_break_type (next_wc) == G_UNICODE_BREAK_COMBINING_MARK));
180 }
181
182 if (attr->is_mandatory_break)
183 {
184 if (g_test_verbose ()) g_test_message (format: "mandatory breaks must also be marked as regular breaks");
185 g_assert_true (attr->is_line_break);
186 }
187
188
189 /* FIXME use the break tables from break.c to automatically
190 * check invariants for each cell in the table. Shouldn't
191 * be that hard to do.
192 */
193
194 if (g_test_verbose ()) g_test_message (format: "can't break between two open punctuation chars");
195 g_assert_false (break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
196 prev_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
197 attr->is_line_break &&
198 !attr->is_mandatory_break);
199
200 if (g_test_verbose ()) g_test_message (format: "can't break between two close punctuation chars");
201 g_assert_false (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
202 prev_break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
203 attr->is_line_break &&
204 !attr->is_mandatory_break);
205
206 if (g_test_verbose ()) g_test_message (format: "can't break letter-quotemark sequence");
207 g_assert_false (break_type == G_UNICODE_BREAK_QUOTATION &&
208 prev_break_type == G_UNICODE_BREAK_ALPHABETIC &&
209 attr->is_line_break &&
210 !attr->is_mandatory_break);
211}
212
213static void
214check_line_invariants (const char *text,
215 PangoLogAttr *attrs)
216{
217 log_attr_foreach (text, attrs, func: check_line_char, NULL);
218}
219
220static void
221check_word_invariants (const char *text,
222 PangoLogAttr *attrs)
223{
224
225
226}
227
228static void
229check_sentence_invariants (const char *text,
230 PangoLogAttr *attrs)
231{
232
233
234}
235
236static void
237check_grapheme_invariants (const char *text,
238 PangoLogAttr *attrs)
239{
240
241
242}
243
244#if 0
245static void print_sentences (const char *text,
246 PangoLogAttr *attrs);
247static void
248print_sentences (const char *text,
249 PangoLogAttr *attrs)
250{
251 const char *p;
252 const char *last;
253 int i = 0;
254
255 last = text;
256 p = text;
257
258 while (*p)
259 {
260 if (attrs[i].is_sentence_boundary)
261 {
262 char *s = g_strndup (last, p - last);
263 printf ("%s\n", s);
264 g_free (s);
265 last = p;
266 }
267
268 p = g_utf8_next_char (p);
269 ++i;
270 }
271}
272#endif
273
274static void
275check_invariants (const char *text)
276{
277 int len;
278 PangoLogAttr *attrs;
279
280 g_assert_true (g_utf8_validate (text, -1, NULL));
281
282 len = g_utf8_strlen (p: text, max: -1);
283 attrs = g_new0 (PangoLogAttr, len + 1);
284
285 pango_get_log_attrs (text,
286 length: -1,
287 level: 0,
288 language: pango_language_from_string (language: "C"),
289 attrs,
290 attrs_len: len + 1);
291
292 check_line_invariants (text, attrs);
293 check_sentence_invariants (text, attrs);
294 check_grapheme_invariants (text, attrs);
295 check_word_invariants (text, attrs);
296
297#if 0
298 print_sentences (text, attrs);
299#endif
300
301 g_free (mem: attrs);
302}
303
304static void
305test_boundaries (void)
306{
307 const char *filename;
308 GError *error = NULL;
309 char *text;
310
311 filename = g_test_get_filename (file_type: G_TEST_DIST, first_path: "boundaries.utf8", NULL);
312
313 if (g_test_verbose ()) g_test_message (format: "sample file: %s\n", filename);
314
315 g_file_get_contents (filename, contents: &text, NULL, error: &error);
316 g_assert_no_error (error);
317
318 check_invariants (text);
319
320 g_free (mem: text);
321}
322
323int
324main (int argc, char *argv[])
325{
326 g_test_init (argc: &argc, argv: &argv, NULL);
327
328 g_test_add_func (testpath: "/text/boundaries", test_func: test_boundaries);
329
330 return g_test_run ();
331}
332

source code of gtk/subprojects/pango/tests/testboundaries.c