1/* Pango
2 *
3 * Copyright (C) 1999 Red Hat Software
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
19 */
20
21#include "config.h"
22
23#include "validate-log-attrs.h"
24#include "pango.h"
25#include <string.h>
26
27/* {{{ Validation */
28
29G_DEFINE_QUARK(pango-validate-error-quark, pango_validate_error)
30
31typedef gboolean (* CharForeachFunc) (int pos,
32 gunichar wc,
33 gunichar prev_wc,
34 gunichar next_wc,
35 GUnicodeType type,
36 GUnicodeType prev_type,
37 GUnicodeType next_type,
38 const PangoLogAttr *attr,
39 const PangoLogAttr *prev_attr,
40 const PangoLogAttr *next_attr,
41 gboolean *after_zws,
42 GError **error);
43
44static gboolean
45log_attr_foreach (const char *text,
46 int length,
47 const PangoLogAttr *attrs,
48 int attrs_len,
49 CharForeachFunc func,
50 GError **error)
51{
52 const gchar *next = text;
53 const gchar *end = text + length;
54 gint i = 0;
55 gunichar prev_wc;
56 gunichar next_wc;
57 GUnicodeType prev_type;
58 GUnicodeType next_type;
59 gboolean after_zws;
60
61 if (next == end)
62 goto done;
63
64 prev_type = (GUnicodeType) -1;
65 prev_wc = 0;
66
67 next_wc = g_utf8_get_char (p: next);
68 next_type = g_unichar_type (c: next_wc);
69
70 after_zws = FALSE;
71
72 while (next_wc != 0)
73 {
74 GUnicodeType type;
75 gunichar wc;
76
77 wc = next_wc;
78 type = next_type;
79
80 next = g_utf8_next_char (next);
81
82 if (next >= end)
83 next_wc = 0;
84 else
85 next_wc = g_utf8_get_char (p: next);
86
87 if (next_wc)
88 next_type = g_unichar_type (c: next_wc);
89
90 if (!func (i,
91 wc, prev_wc, next_wc,
92 type, prev_type, next_type,
93 &attrs[i],
94 i != 0 ? &attrs[i - 1] : NULL,
95 &attrs[i + 1],
96 &after_zws,
97 error))
98 return FALSE;
99
100 prev_type = type;
101 prev_wc = wc;
102 i++;
103 }
104
105done:
106 return TRUE;
107}
108
109static gboolean
110check_line_char (int pos,
111 gunichar wc,
112 gunichar prev_wc,
113 gunichar next_wc,
114 GUnicodeType type,
115 GUnicodeType prev_type,
116 GUnicodeType next_type,
117 const PangoLogAttr *attr,
118 const PangoLogAttr *prev_attr,
119 const PangoLogAttr *next_attr,
120 gboolean *after_zws,
121 GError **error)
122{
123 GUnicodeBreakType break_type;
124 GUnicodeBreakType prev_break_type;
125
126 break_type = g_unichar_break_type (c: wc);
127
128 if (prev_wc)
129 prev_break_type = g_unichar_break_type (c: prev_wc);
130 else
131 prev_break_type = G_UNICODE_BREAK_UNKNOWN;
132
133 if (prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE ||
134 (prev_break_type == G_UNICODE_BREAK_SPACE && *after_zws))
135 *after_zws = TRUE;
136 else
137 *after_zws = FALSE;
138
139 if (wc == '\n' && prev_wc == '\r')
140 {
141 if (attr->is_line_break)
142 {
143 g_set_error (err: error,
144 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
145 format: "char %#x %d: Do not break between \\r and \\n (LB5)", wc, pos);
146 return FALSE;
147 }
148 }
149
150 if (prev_wc == 0 && wc != 0)
151 {
152 if (attr->is_line_break)
153 {
154 g_set_error (err: error,
155 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
156 format: "char %#x %d: Do not break before first char (LB2)", wc, pos);
157 return FALSE;
158 }
159 }
160
161 if (next_wc == 0)
162 {
163 if (!next_attr->is_line_break)
164 {
165 g_set_error (err: error,
166 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
167 format: "char %#x %d: Always break after the last char (LB3)", wc, pos);
168 return FALSE;
169 }
170 }
171
172 if (prev_break_type == G_UNICODE_BREAK_MANDATORY)
173 {
174 if (!attr->is_mandatory_break)
175 {
176 g_set_error (err: error,
177 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
178 format: "char %#x %d: Always break after hard line breaks (LB4)", wc, pos);
179 return FALSE;
180 }
181 }
182
183 if (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
184 prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
185 prev_break_type == G_UNICODE_BREAK_NEXT_LINE)
186 {
187 if (!attr->is_mandatory_break)
188 {
189 g_set_error (err: error,
190 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
191 format: "char %#x %d: Always break after CR, LF and NL (LB5)", wc, pos);
192 return FALSE;
193 }
194 }
195
196 if (break_type == G_UNICODE_BREAK_MANDATORY ||
197 break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
198 break_type == G_UNICODE_BREAK_LINE_FEED ||
199 break_type == G_UNICODE_BREAK_NEXT_LINE)
200 {
201 if (attr->is_line_break)
202 {
203 g_set_error (err: error,
204 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
205 format: "char %#x %d: Do not break before hard line beaks (LB6)", wc, pos);
206 return FALSE;
207 }
208 }
209
210 if (break_type == G_UNICODE_BREAK_SPACE ||
211 break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
212 {
213 if (attr->is_line_break && prev_attr != NULL &&
214 !attr->is_mandatory_break &&
215 !(next_wc && g_unichar_break_type (c: next_wc) == G_UNICODE_BREAK_COMBINING_MARK))
216 {
217 g_set_error (err: error,
218 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
219 format: "char %#x %d: Can't break before a space unless mandatory precedes or combining mark follows (LB7)", wc, pos);
220 return FALSE;
221 }
222 }
223
224 if (break_type != G_UNICODE_BREAK_ZERO_WIDTH_SPACE &&
225 break_type != G_UNICODE_BREAK_SPACE &&
226 *after_zws)
227 {
228 if (!attr->is_line_break)
229 {
230 g_set_error (err: error,
231 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
232 format: "char %#x %d: Break before a char following ZWS, even if spaces intervene (LB8)", wc, pos);
233 return FALSE;
234 }
235 }
236
237 if (break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)
238 {
239 if (attr->is_line_break)
240 {
241 g_set_error (err: error,
242 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
243 format: "char %#x %d: Do not break after ZWJ (LB8a)", wc, pos);
244 return FALSE;
245 }
246 }
247
248 /* TODO: check LB9 */
249
250 if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER ||
251 break_type == G_UNICODE_BREAK_WORD_JOINER)
252 {
253 if (attr->is_line_break)
254 {
255 g_set_error (err: error,
256 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
257 format: "char %#x %d: Do not break before or after WJ (LB11)", wc, pos);
258 return FALSE;
259 }
260 }
261
262 if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE)
263 {
264 g_set_error (err: error,
265 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
266 format: "char %#x %d: Do not break after GL (LB12)", wc, pos);
267 return FALSE;
268 }
269
270 /* internal consistency */
271
272 if (attr->is_mandatory_break && !attr->is_line_break)
273 {
274 g_set_error (err: error,
275 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK,
276 format: "char %#x %d: Mandatory breaks must also be marked as regular breaks", wc, pos);
277 return FALSE;
278 }
279
280 return TRUE;
281}
282
283static gboolean
284check_line_invariants (const char *text,
285 int length,
286 const PangoLogAttr *attrs,
287 int attrs_len,
288 GError **error)
289{
290 return log_attr_foreach (text, length,
291 attrs, attrs_len,
292 func: check_line_char, error);
293}
294
295static gboolean
296check_grapheme_invariants (const char *text,
297 int length,
298 const PangoLogAttr *attrs,
299 int attrs_len,
300 GError **error)
301{
302 return TRUE;
303}
304
305static gboolean
306check_word_invariants (const char *text,
307 int length,
308 const PangoLogAttr *attrs,
309 int attrs_len,
310 GError **error)
311{
312 enum {
313 AFTER_START,
314 AFTER_END
315 } state = AFTER_END;
316
317 for (int i = 0; i < attrs_len; i++)
318 {
319 /* Check that word starts and ends are alternating */
320 switch (state)
321 {
322 case AFTER_END:
323 if (attrs[i].is_word_start)
324 {
325 if (attrs[i].is_word_end)
326 state = AFTER_END;
327 else
328 state = AFTER_START;
329 break;
330 }
331 if (attrs[i].is_word_end)
332 {
333 g_set_error (err: error,
334 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_WORD,
335 format: "char %d: Unexpected word end", i);
336 return FALSE;
337 }
338 break;
339
340 case AFTER_START:
341 if (attrs[i].is_word_end)
342 {
343 if (attrs[i].is_word_start)
344 state = AFTER_START;
345 else
346 state = AFTER_END;
347 break;
348 }
349 if (attrs[i].is_word_start)
350 {
351 g_set_error (err: error,
352 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_WORD,
353 format: "char %d: Unexpected word start", i);
354 return FALSE;
355 }
356 break;
357
358 default:
359 g_assert_not_reached ();
360 }
361
362 /* Check that words don't end in the middle of graphemes */
363 if (attrs[i].is_word_boundary && !attrs[i].is_cursor_position)
364 {
365 g_set_error (err: error,
366 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_SENTENCE,
367 format: "char %d: Word ends inside a grapheme", i);
368 return FALSE;
369 }
370 }
371
372 return TRUE;
373}
374
375static gboolean
376check_sentence_invariants (const char *text,
377 int length,
378 const PangoLogAttr *attrs,
379 int attrs_len,
380 GError **error)
381{
382 enum {
383 AFTER_START,
384 AFTER_END
385 } state = AFTER_END;
386
387 for (int i = 0; i < attrs_len; i++)
388 {
389 /* Check that word starts and ends are alternating */
390 switch (state)
391 {
392 case AFTER_END:
393 if (attrs[i].is_sentence_start)
394 {
395 if (attrs[i].is_sentence_end)
396 state = AFTER_END;
397 else
398 state = AFTER_START;
399 break;
400 }
401 if (attrs[i].is_sentence_end)
402 {
403 g_set_error (err: error,
404 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_SENTENCE,
405 format: "char %d: Unexpected sentence end", i);
406 return FALSE;
407 }
408 break;
409
410 case AFTER_START:
411 if (attrs[i].is_sentence_end)
412 {
413 if (attrs[i].is_sentence_start)
414 state = AFTER_START;
415 else
416 state = AFTER_END;
417 break;
418 }
419 if (attrs[i].is_sentence_start)
420 {
421 g_set_error (err: error,
422 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_SENTENCE,
423 format: "char %d: Unexpected sentence start", i);
424 return FALSE;
425 }
426 break;
427
428 default:
429 g_assert_not_reached ();
430 }
431 }
432
433 return TRUE;
434}
435
436static gboolean
437check_space_invariants (const char *text,
438 int length,
439 const PangoLogAttr *log_attrs,
440 int attrs_len,
441 GError **error)
442{
443 for (int i = 0; i < attrs_len; i++)
444 {
445 if (log_attrs[i].is_expandable_space && !log_attrs[i].is_white)
446 {
447 g_set_error (err: error,
448 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_SPACE,
449 format: "char %d: Expandable space must be space", i);
450 return FALSE;
451 }
452 }
453
454 return TRUE;
455}
456
457/* }}} */
458/* {{{ Public API */
459
460/*
461 * pango_validate_log_attrs:
462 * @text: text to which @log_attrs belong
463 * @length: length of @text
464 * @log_attrs: `PangoLogAttr` array to validate
465 * @attrs_len: length of @log_attrs
466 *
467 * Apply sanity checks to @log_attrs.
468 *
469 * This function checks some conditions that Pango
470 * relies on. It is not guaranteed to be an exhaustive
471 * validity test. Currentlty, it checks that
472 *
473 * - There's no break before the first char
474 * - Mandatory breaks are line breaks
475 * - Line breaks are char breaks
476 * - Lines aren't broken between \\r and \\n
477 * - Lines aren't broken before a space (unless the break
478 * is mandatory, or the space precedes a combining mark)
479 * - Lines aren't broken between two open punctuation
480 * or between two close punctuation characters
481 * - Lines aren't broken between a letter and a quotation mark
482 * - Word starts and ends alternate
483 * - Sentence starts and ends alternate
484 * - Expandable spaces are spaces
485 * - Words don't end in the middle of graphemes
486 * - Sentences don't end in the middle of words
487 *
488 * Returns: %TRUE if @log_attrs are valid
489 */
490gboolean
491pango_validate_log_attrs (const char *text,
492 int length,
493 const PangoLogAttr *log_attrs,
494 int attrs_len,
495 GError **error)
496{
497 int n_chars;
498
499 n_chars = g_utf8_strlen (p: text, max: length);
500 if (attrs_len != n_chars + 1)
501 {
502 g_set_error_literal (err: error,
503 PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_FAILED,
504 message: "Array has wrong length");
505 return FALSE;
506 }
507
508 if (!check_line_invariants (text, length, attrs: log_attrs, attrs_len, error))
509 return FALSE;
510
511 if (!check_grapheme_invariants (text, length, attrs: log_attrs, attrs_len, error))
512 return FALSE;
513
514 if (!check_word_invariants (text, length, attrs: log_attrs, attrs_len, error))
515 return FALSE;
516
517 if (!check_sentence_invariants (text, length, attrs: log_attrs, attrs_len, error))
518 return FALSE;
519
520 if (!check_space_invariants (text, length, log_attrs, attrs_len, error))
521 return FALSE;
522
523 return TRUE;
524}
525
526 /* }}} */
527
528/* vim:set foldmethod=marker expandtab: */
529

source code of gtk/subprojects/pango/tests/validate-log-attrs.c