1 | /* Pango |
2 | * |
3 | * Copyright (C) 1999 Red Hat Software |
4 | * |
5 | * This library is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU Library General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2 of the License, or (at your option) any later version. |
9 | * |
10 | * This library is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Library General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU Library General Public |
16 | * License along with this library; if not, write to the |
17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
18 | * Boston, MA 02111-1307, USA. |
19 | */ |
20 | |
21 | #include "config.h" |
22 | |
23 | #include "validate-log-attrs.h" |
24 | #include "pango.h" |
25 | #include <string.h> |
26 | |
27 | /* {{{ Validation */ |
28 | |
29 | G_DEFINE_QUARK(pango-validate-error-quark, pango_validate_error) |
30 | |
31 | typedef gboolean (* CharForeachFunc) (int pos, |
32 | gunichar wc, |
33 | gunichar prev_wc, |
34 | gunichar next_wc, |
35 | GUnicodeType type, |
36 | GUnicodeType prev_type, |
37 | GUnicodeType next_type, |
38 | const PangoLogAttr *attr, |
39 | const PangoLogAttr *prev_attr, |
40 | const PangoLogAttr *next_attr, |
41 | gboolean *after_zws, |
42 | GError **error); |
43 | |
44 | static gboolean |
45 | log_attr_foreach (const char *text, |
46 | int length, |
47 | const PangoLogAttr *attrs, |
48 | int attrs_len, |
49 | CharForeachFunc func, |
50 | GError **error) |
51 | { |
52 | const gchar *next = text; |
53 | const gchar *end = text + length; |
54 | gint i = 0; |
55 | gunichar prev_wc; |
56 | gunichar next_wc; |
57 | GUnicodeType prev_type; |
58 | GUnicodeType next_type; |
59 | gboolean after_zws; |
60 | |
61 | if (next == end) |
62 | goto done; |
63 | |
64 | prev_type = (GUnicodeType) -1; |
65 | prev_wc = 0; |
66 | |
67 | next_wc = g_utf8_get_char (p: next); |
68 | next_type = g_unichar_type (c: next_wc); |
69 | |
70 | after_zws = FALSE; |
71 | |
72 | while (next_wc != 0) |
73 | { |
74 | GUnicodeType type; |
75 | gunichar wc; |
76 | |
77 | wc = next_wc; |
78 | type = next_type; |
79 | |
80 | next = g_utf8_next_char (next); |
81 | |
82 | if (next >= end) |
83 | next_wc = 0; |
84 | else |
85 | next_wc = g_utf8_get_char (p: next); |
86 | |
87 | if (next_wc) |
88 | next_type = g_unichar_type (c: next_wc); |
89 | |
90 | if (!func (i, |
91 | wc, prev_wc, next_wc, |
92 | type, prev_type, next_type, |
93 | &attrs[i], |
94 | i != 0 ? &attrs[i - 1] : NULL, |
95 | &attrs[i + 1], |
96 | &after_zws, |
97 | error)) |
98 | return FALSE; |
99 | |
100 | prev_type = type; |
101 | prev_wc = wc; |
102 | i++; |
103 | } |
104 | |
105 | done: |
106 | return TRUE; |
107 | } |
108 | |
109 | static gboolean |
110 | check_line_char (int pos, |
111 | gunichar wc, |
112 | gunichar prev_wc, |
113 | gunichar next_wc, |
114 | GUnicodeType type, |
115 | GUnicodeType prev_type, |
116 | GUnicodeType next_type, |
117 | const PangoLogAttr *attr, |
118 | const PangoLogAttr *prev_attr, |
119 | const PangoLogAttr *next_attr, |
120 | gboolean *after_zws, |
121 | GError **error) |
122 | { |
123 | GUnicodeBreakType break_type; |
124 | GUnicodeBreakType prev_break_type; |
125 | |
126 | break_type = g_unichar_break_type (c: wc); |
127 | |
128 | if (prev_wc) |
129 | prev_break_type = g_unichar_break_type (c: prev_wc); |
130 | else |
131 | prev_break_type = G_UNICODE_BREAK_UNKNOWN; |
132 | |
133 | if (prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE || |
134 | (prev_break_type == G_UNICODE_BREAK_SPACE && *after_zws)) |
135 | *after_zws = TRUE; |
136 | else |
137 | *after_zws = FALSE; |
138 | |
139 | if (wc == '\n' && prev_wc == '\r') |
140 | { |
141 | if (attr->is_line_break) |
142 | { |
143 | g_set_error (err: error, |
144 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
145 | format: "char %#x %d: Do not break between \\r and \\n (LB5)" , wc, pos); |
146 | return FALSE; |
147 | } |
148 | } |
149 | |
150 | if (prev_wc == 0 && wc != 0) |
151 | { |
152 | if (attr->is_line_break) |
153 | { |
154 | g_set_error (err: error, |
155 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
156 | format: "char %#x %d: Do not break before first char (LB2)" , wc, pos); |
157 | return FALSE; |
158 | } |
159 | } |
160 | |
161 | if (next_wc == 0) |
162 | { |
163 | if (!next_attr->is_line_break) |
164 | { |
165 | g_set_error (err: error, |
166 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
167 | format: "char %#x %d: Always break after the last char (LB3)" , wc, pos); |
168 | return FALSE; |
169 | } |
170 | } |
171 | |
172 | if (prev_break_type == G_UNICODE_BREAK_MANDATORY) |
173 | { |
174 | if (!attr->is_mandatory_break) |
175 | { |
176 | g_set_error (err: error, |
177 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
178 | format: "char %#x %d: Always break after hard line breaks (LB4)" , wc, pos); |
179 | return FALSE; |
180 | } |
181 | } |
182 | |
183 | if (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN || |
184 | prev_break_type == G_UNICODE_BREAK_LINE_FEED || |
185 | prev_break_type == G_UNICODE_BREAK_NEXT_LINE) |
186 | { |
187 | if (!attr->is_mandatory_break) |
188 | { |
189 | g_set_error (err: error, |
190 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
191 | format: "char %#x %d: Always break after CR, LF and NL (LB5)" , wc, pos); |
192 | return FALSE; |
193 | } |
194 | } |
195 | |
196 | if (break_type == G_UNICODE_BREAK_MANDATORY || |
197 | break_type == G_UNICODE_BREAK_CARRIAGE_RETURN || |
198 | break_type == G_UNICODE_BREAK_LINE_FEED || |
199 | break_type == G_UNICODE_BREAK_NEXT_LINE) |
200 | { |
201 | if (attr->is_line_break) |
202 | { |
203 | g_set_error (err: error, |
204 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
205 | format: "char %#x %d: Do not break before hard line beaks (LB6)" , wc, pos); |
206 | return FALSE; |
207 | } |
208 | } |
209 | |
210 | if (break_type == G_UNICODE_BREAK_SPACE || |
211 | break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE) |
212 | { |
213 | if (attr->is_line_break && prev_attr != NULL && |
214 | !attr->is_mandatory_break && |
215 | !(next_wc && g_unichar_break_type (c: next_wc) == G_UNICODE_BREAK_COMBINING_MARK)) |
216 | { |
217 | g_set_error (err: error, |
218 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
219 | format: "char %#x %d: Can't break before a space unless mandatory precedes or combining mark follows (LB7)" , wc, pos); |
220 | return FALSE; |
221 | } |
222 | } |
223 | |
224 | if (break_type != G_UNICODE_BREAK_ZERO_WIDTH_SPACE && |
225 | break_type != G_UNICODE_BREAK_SPACE && |
226 | *after_zws) |
227 | { |
228 | if (!attr->is_line_break) |
229 | { |
230 | g_set_error (err: error, |
231 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
232 | format: "char %#x %d: Break before a char following ZWS, even if spaces intervene (LB8)" , wc, pos); |
233 | return FALSE; |
234 | } |
235 | } |
236 | |
237 | if (break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER) |
238 | { |
239 | if (attr->is_line_break) |
240 | { |
241 | g_set_error (err: error, |
242 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
243 | format: "char %#x %d: Do not break after ZWJ (LB8a)" , wc, pos); |
244 | return FALSE; |
245 | } |
246 | } |
247 | |
248 | /* TODO: check LB9 */ |
249 | |
250 | if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER || |
251 | break_type == G_UNICODE_BREAK_WORD_JOINER) |
252 | { |
253 | if (attr->is_line_break) |
254 | { |
255 | g_set_error (err: error, |
256 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
257 | format: "char %#x %d: Do not break before or after WJ (LB11)" , wc, pos); |
258 | return FALSE; |
259 | } |
260 | } |
261 | |
262 | if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE) |
263 | { |
264 | g_set_error (err: error, |
265 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
266 | format: "char %#x %d: Do not break after GL (LB12)" , wc, pos); |
267 | return FALSE; |
268 | } |
269 | |
270 | /* internal consistency */ |
271 | |
272 | if (attr->is_mandatory_break && !attr->is_line_break) |
273 | { |
274 | g_set_error (err: error, |
275 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_BREAK, |
276 | format: "char %#x %d: Mandatory breaks must also be marked as regular breaks" , wc, pos); |
277 | return FALSE; |
278 | } |
279 | |
280 | return TRUE; |
281 | } |
282 | |
283 | static gboolean |
284 | check_line_invariants (const char *text, |
285 | int length, |
286 | const PangoLogAttr *attrs, |
287 | int attrs_len, |
288 | GError **error) |
289 | { |
290 | return log_attr_foreach (text, length, |
291 | attrs, attrs_len, |
292 | func: check_line_char, error); |
293 | } |
294 | |
295 | static gboolean |
296 | check_grapheme_invariants (const char *text, |
297 | int length, |
298 | const PangoLogAttr *attrs, |
299 | int attrs_len, |
300 | GError **error) |
301 | { |
302 | return TRUE; |
303 | } |
304 | |
305 | static gboolean |
306 | check_word_invariants (const char *text, |
307 | int length, |
308 | const PangoLogAttr *attrs, |
309 | int attrs_len, |
310 | GError **error) |
311 | { |
312 | enum { |
313 | AFTER_START, |
314 | AFTER_END |
315 | } state = AFTER_END; |
316 | |
317 | for (int i = 0; i < attrs_len; i++) |
318 | { |
319 | /* Check that word starts and ends are alternating */ |
320 | switch (state) |
321 | { |
322 | case AFTER_END: |
323 | if (attrs[i].is_word_start) |
324 | { |
325 | if (attrs[i].is_word_end) |
326 | state = AFTER_END; |
327 | else |
328 | state = AFTER_START; |
329 | break; |
330 | } |
331 | if (attrs[i].is_word_end) |
332 | { |
333 | g_set_error (err: error, |
334 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_WORD, |
335 | format: "char %d: Unexpected word end" , i); |
336 | return FALSE; |
337 | } |
338 | break; |
339 | |
340 | case AFTER_START: |
341 | if (attrs[i].is_word_end) |
342 | { |
343 | if (attrs[i].is_word_start) |
344 | state = AFTER_START; |
345 | else |
346 | state = AFTER_END; |
347 | break; |
348 | } |
349 | if (attrs[i].is_word_start) |
350 | { |
351 | g_set_error (err: error, |
352 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_WORD, |
353 | format: "char %d: Unexpected word start" , i); |
354 | return FALSE; |
355 | } |
356 | break; |
357 | |
358 | default: |
359 | g_assert_not_reached (); |
360 | } |
361 | |
362 | /* Check that words don't end in the middle of graphemes */ |
363 | if (attrs[i].is_word_boundary && !attrs[i].is_cursor_position) |
364 | { |
365 | g_set_error (err: error, |
366 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_SENTENCE, |
367 | format: "char %d: Word ends inside a grapheme" , i); |
368 | return FALSE; |
369 | } |
370 | } |
371 | |
372 | return TRUE; |
373 | } |
374 | |
375 | static gboolean |
376 | check_sentence_invariants (const char *text, |
377 | int length, |
378 | const PangoLogAttr *attrs, |
379 | int attrs_len, |
380 | GError **error) |
381 | { |
382 | enum { |
383 | AFTER_START, |
384 | AFTER_END |
385 | } state = AFTER_END; |
386 | |
387 | for (int i = 0; i < attrs_len; i++) |
388 | { |
389 | /* Check that word starts and ends are alternating */ |
390 | switch (state) |
391 | { |
392 | case AFTER_END: |
393 | if (attrs[i].is_sentence_start) |
394 | { |
395 | if (attrs[i].is_sentence_end) |
396 | state = AFTER_END; |
397 | else |
398 | state = AFTER_START; |
399 | break; |
400 | } |
401 | if (attrs[i].is_sentence_end) |
402 | { |
403 | g_set_error (err: error, |
404 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_SENTENCE, |
405 | format: "char %d: Unexpected sentence end" , i); |
406 | return FALSE; |
407 | } |
408 | break; |
409 | |
410 | case AFTER_START: |
411 | if (attrs[i].is_sentence_end) |
412 | { |
413 | if (attrs[i].is_sentence_start) |
414 | state = AFTER_START; |
415 | else |
416 | state = AFTER_END; |
417 | break; |
418 | } |
419 | if (attrs[i].is_sentence_start) |
420 | { |
421 | g_set_error (err: error, |
422 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_SENTENCE, |
423 | format: "char %d: Unexpected sentence start" , i); |
424 | return FALSE; |
425 | } |
426 | break; |
427 | |
428 | default: |
429 | g_assert_not_reached (); |
430 | } |
431 | } |
432 | |
433 | return TRUE; |
434 | } |
435 | |
436 | static gboolean |
437 | check_space_invariants (const char *text, |
438 | int length, |
439 | const PangoLogAttr *log_attrs, |
440 | int attrs_len, |
441 | GError **error) |
442 | { |
443 | for (int i = 0; i < attrs_len; i++) |
444 | { |
445 | if (log_attrs[i].is_expandable_space && !log_attrs[i].is_white) |
446 | { |
447 | g_set_error (err: error, |
448 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_SPACE, |
449 | format: "char %d: Expandable space must be space" , i); |
450 | return FALSE; |
451 | } |
452 | } |
453 | |
454 | return TRUE; |
455 | } |
456 | |
457 | /* }}} */ |
458 | /* {{{ Public API */ |
459 | |
460 | /* |
461 | * pango_validate_log_attrs: |
462 | * @text: text to which @log_attrs belong |
463 | * @length: length of @text |
464 | * @log_attrs: `PangoLogAttr` array to validate |
465 | * @attrs_len: length of @log_attrs |
466 | * |
467 | * Apply sanity checks to @log_attrs. |
468 | * |
469 | * This function checks some conditions that Pango |
470 | * relies on. It is not guaranteed to be an exhaustive |
471 | * validity test. Currentlty, it checks that |
472 | * |
473 | * - There's no break before the first char |
474 | * - Mandatory breaks are line breaks |
475 | * - Line breaks are char breaks |
476 | * - Lines aren't broken between \\r and \\n |
477 | * - Lines aren't broken before a space (unless the break |
478 | * is mandatory, or the space precedes a combining mark) |
479 | * - Lines aren't broken between two open punctuation |
480 | * or between two close punctuation characters |
481 | * - Lines aren't broken between a letter and a quotation mark |
482 | * - Word starts and ends alternate |
483 | * - Sentence starts and ends alternate |
484 | * - Expandable spaces are spaces |
485 | * - Words don't end in the middle of graphemes |
486 | * - Sentences don't end in the middle of words |
487 | * |
488 | * Returns: %TRUE if @log_attrs are valid |
489 | */ |
490 | gboolean |
491 | pango_validate_log_attrs (const char *text, |
492 | int length, |
493 | const PangoLogAttr *log_attrs, |
494 | int attrs_len, |
495 | GError **error) |
496 | { |
497 | int n_chars; |
498 | |
499 | n_chars = g_utf8_strlen (p: text, max: length); |
500 | if (attrs_len != n_chars + 1) |
501 | { |
502 | g_set_error_literal (err: error, |
503 | PANGO_VALIDATE_ERROR, code: PANGO_VALIDATE_ERROR_FAILED, |
504 | message: "Array has wrong length" ); |
505 | return FALSE; |
506 | } |
507 | |
508 | if (!check_line_invariants (text, length, attrs: log_attrs, attrs_len, error)) |
509 | return FALSE; |
510 | |
511 | if (!check_grapheme_invariants (text, length, attrs: log_attrs, attrs_len, error)) |
512 | return FALSE; |
513 | |
514 | if (!check_word_invariants (text, length, attrs: log_attrs, attrs_len, error)) |
515 | return FALSE; |
516 | |
517 | if (!check_sentence_invariants (text, length, attrs: log_attrs, attrs_len, error)) |
518 | return FALSE; |
519 | |
520 | if (!check_space_invariants (text, length, log_attrs, attrs_len, error)) |
521 | return FALSE; |
522 | |
523 | return TRUE; |
524 | } |
525 | |
526 | /* }}} */ |
527 | |
528 | /* vim:set foldmethod=marker expandtab: */ |
529 | |