1 | /* |
2 | * Copyright © 2009 Red Hat, Inc. |
3 | * Copyright © 2011 Codethink Limited |
4 | * Copyright © 2011,2012 Google, Inc. |
5 | * |
6 | * This is part of HarfBuzz, a text shaping library. |
7 | * |
8 | * Permission is hereby granted, without written agreement and without |
9 | * license or royalty fees, to use, copy, modify, and distribute this |
10 | * software and its documentation for any purpose, provided that the |
11 | * above copyright notice and the following two paragraphs appear in |
12 | * all copies of this software. |
13 | * |
14 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
15 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
16 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
17 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
18 | * DAMAGE. |
19 | * |
20 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
21 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
22 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
23 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
24 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
25 | * |
26 | * Red Hat Author(s): Behdad Esfahbod |
27 | * Codethink Author(s): Ryan Lortie |
28 | * Google Author(s): Behdad Esfahbod |
29 | */ |
30 | |
31 | #ifndef HB_H_IN |
32 | #error "Include <hb.h> instead." |
33 | #endif |
34 | |
35 | #ifndef HB_UNICODE_H |
36 | #define HB_UNICODE_H |
37 | |
38 | #include "hb-common.h" |
39 | |
40 | HB_BEGIN_DECLS |
41 | |
42 | |
43 | /* hb_unicode_general_category_t */ |
44 | |
45 | /* Unicode Character Database property: General_Category (gc) */ |
46 | typedef enum |
47 | { |
48 | HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */ |
49 | HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */ |
50 | HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */ |
51 | HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */ |
52 | HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */ |
53 | HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */ |
54 | HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */ |
55 | HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */ |
56 | HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */ |
57 | HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */ |
58 | HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */ |
59 | HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */ |
60 | HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */ |
61 | HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */ |
62 | HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */ |
63 | HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */ |
64 | HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */ |
65 | HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */ |
66 | HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */ |
67 | HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */ |
68 | HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */ |
69 | HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */ |
70 | HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */ |
71 | HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */ |
72 | HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */ |
73 | HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */ |
74 | HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */ |
75 | HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */ |
76 | HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */ |
77 | HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */ |
78 | } hb_unicode_general_category_t; |
79 | |
80 | /* hb_unicode_combining_class_t */ |
81 | |
82 | /* Note: newer versions of Unicode may add new values. Clients should be ready to handle |
83 | * any value in the 0..254 range being returned from hb_unicode_combining_class(). |
84 | */ |
85 | |
86 | /* Unicode Character Database property: Canonical_Combining_Class (ccc) */ |
87 | typedef enum |
88 | { |
89 | HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0, |
90 | HB_UNICODE_COMBINING_CLASS_OVERLAY = 1, |
91 | HB_UNICODE_COMBINING_CLASS_NUKTA = 7, |
92 | HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8, |
93 | HB_UNICODE_COMBINING_CLASS_VIRAMA = 9, |
94 | |
95 | /* Hebrew */ |
96 | HB_UNICODE_COMBINING_CLASS_CCC10 = 10, |
97 | HB_UNICODE_COMBINING_CLASS_CCC11 = 11, |
98 | HB_UNICODE_COMBINING_CLASS_CCC12 = 12, |
99 | HB_UNICODE_COMBINING_CLASS_CCC13 = 13, |
100 | HB_UNICODE_COMBINING_CLASS_CCC14 = 14, |
101 | HB_UNICODE_COMBINING_CLASS_CCC15 = 15, |
102 | HB_UNICODE_COMBINING_CLASS_CCC16 = 16, |
103 | HB_UNICODE_COMBINING_CLASS_CCC17 = 17, |
104 | HB_UNICODE_COMBINING_CLASS_CCC18 = 18, |
105 | HB_UNICODE_COMBINING_CLASS_CCC19 = 19, |
106 | HB_UNICODE_COMBINING_CLASS_CCC20 = 20, |
107 | HB_UNICODE_COMBINING_CLASS_CCC21 = 21, |
108 | HB_UNICODE_COMBINING_CLASS_CCC22 = 22, |
109 | HB_UNICODE_COMBINING_CLASS_CCC23 = 23, |
110 | HB_UNICODE_COMBINING_CLASS_CCC24 = 24, |
111 | HB_UNICODE_COMBINING_CLASS_CCC25 = 25, |
112 | HB_UNICODE_COMBINING_CLASS_CCC26 = 26, |
113 | |
114 | /* Arabic */ |
115 | HB_UNICODE_COMBINING_CLASS_CCC27 = 27, |
116 | HB_UNICODE_COMBINING_CLASS_CCC28 = 28, |
117 | HB_UNICODE_COMBINING_CLASS_CCC29 = 29, |
118 | HB_UNICODE_COMBINING_CLASS_CCC30 = 30, |
119 | HB_UNICODE_COMBINING_CLASS_CCC31 = 31, |
120 | HB_UNICODE_COMBINING_CLASS_CCC32 = 32, |
121 | HB_UNICODE_COMBINING_CLASS_CCC33 = 33, |
122 | HB_UNICODE_COMBINING_CLASS_CCC34 = 34, |
123 | HB_UNICODE_COMBINING_CLASS_CCC35 = 35, |
124 | |
125 | /* Syriac */ |
126 | HB_UNICODE_COMBINING_CLASS_CCC36 = 36, |
127 | |
128 | /* Telugu */ |
129 | HB_UNICODE_COMBINING_CLASS_CCC84 = 84, |
130 | HB_UNICODE_COMBINING_CLASS_CCC91 = 91, |
131 | |
132 | /* Thai */ |
133 | HB_UNICODE_COMBINING_CLASS_CCC103 = 103, |
134 | HB_UNICODE_COMBINING_CLASS_CCC107 = 107, |
135 | |
136 | /* Lao */ |
137 | HB_UNICODE_COMBINING_CLASS_CCC118 = 118, |
138 | HB_UNICODE_COMBINING_CLASS_CCC122 = 122, |
139 | |
140 | /* Tibetan */ |
141 | HB_UNICODE_COMBINING_CLASS_CCC129 = 129, |
142 | HB_UNICODE_COMBINING_CLASS_CCC130 = 130, |
143 | HB_UNICODE_COMBINING_CLASS_CCC133 = 132, |
144 | |
145 | |
146 | HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200, |
147 | HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202, |
148 | HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214, |
149 | HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216, |
150 | HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218, |
151 | HB_UNICODE_COMBINING_CLASS_BELOW = 220, |
152 | HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222, |
153 | HB_UNICODE_COMBINING_CLASS_LEFT = 224, |
154 | HB_UNICODE_COMBINING_CLASS_RIGHT = 226, |
155 | HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228, |
156 | HB_UNICODE_COMBINING_CLASS_ABOVE = 230, |
157 | HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232, |
158 | HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233, |
159 | HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234, |
160 | |
161 | HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240, |
162 | |
163 | HB_UNICODE_COMBINING_CLASS_INVALID = 255 |
164 | } hb_unicode_combining_class_t; |
165 | |
166 | |
167 | /* |
168 | * hb_unicode_funcs_t |
169 | */ |
170 | |
171 | typedef struct hb_unicode_funcs_t hb_unicode_funcs_t; |
172 | |
173 | |
174 | /* |
175 | * just give me the best implementation you've got there. |
176 | */ |
177 | HB_EXTERN hb_unicode_funcs_t * |
178 | hb_unicode_funcs_get_default (void); |
179 | |
180 | |
181 | HB_EXTERN hb_unicode_funcs_t * |
182 | hb_unicode_funcs_create (hb_unicode_funcs_t *parent); |
183 | |
184 | HB_EXTERN hb_unicode_funcs_t * |
185 | hb_unicode_funcs_get_empty (void); |
186 | |
187 | HB_EXTERN hb_unicode_funcs_t * |
188 | hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs); |
189 | |
190 | HB_EXTERN void |
191 | hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs); |
192 | |
193 | HB_EXTERN hb_bool_t |
194 | hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs, |
195 | hb_user_data_key_t *key, |
196 | void * data, |
197 | hb_destroy_func_t destroy, |
198 | hb_bool_t replace); |
199 | |
200 | |
201 | HB_EXTERN void * |
202 | hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs, |
203 | hb_user_data_key_t *key); |
204 | |
205 | |
206 | HB_EXTERN void |
207 | hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs); |
208 | |
209 | HB_EXTERN hb_bool_t |
210 | hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs); |
211 | |
212 | HB_EXTERN hb_unicode_funcs_t * |
213 | hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs); |
214 | |
215 | |
216 | /* |
217 | * funcs |
218 | */ |
219 | |
220 | /* typedefs */ |
221 | |
222 | typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs, |
223 | hb_codepoint_t unicode, |
224 | void *user_data); |
225 | typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs, |
226 | hb_codepoint_t unicode, |
227 | void *user_data); |
228 | typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs, |
229 | hb_codepoint_t unicode, |
230 | void *user_data); |
231 | typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs, |
232 | hb_codepoint_t unicode, |
233 | void *user_data); |
234 | typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs, |
235 | hb_codepoint_t unicode, |
236 | void *user_data); |
237 | |
238 | typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs, |
239 | hb_codepoint_t a, |
240 | hb_codepoint_t b, |
241 | hb_codepoint_t *ab, |
242 | void *user_data); |
243 | typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs, |
244 | hb_codepoint_t ab, |
245 | hb_codepoint_t *a, |
246 | hb_codepoint_t *b, |
247 | void *user_data); |
248 | |
249 | /** |
250 | * hb_unicode_decompose_compatibility_func_t: |
251 | * @ufuncs: a Unicode function structure |
252 | * @u: codepoint to decompose |
253 | * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into |
254 | * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func() |
255 | * |
256 | * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed. |
257 | * The complete length of the decomposition will be returned. |
258 | * |
259 | * If @u has no compatibility decomposition, zero should be returned. |
260 | * |
261 | * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any |
262 | * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations |
263 | * of this function type must ensure that they do not write past the provided array. |
264 | * |
265 | * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available. |
266 | */ |
267 | typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs, |
268 | hb_codepoint_t u, |
269 | hb_codepoint_t *decomposed, |
270 | void *user_data); |
271 | |
272 | /* See Unicode 6.1 for details on the maximum decomposition length. */ |
273 | #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */ |
274 | |
275 | /* setters */ |
276 | |
277 | /** |
278 | * hb_unicode_funcs_set_combining_class_func: |
279 | * @ufuncs: a Unicode function structure |
280 | * @func: (closure user_data) (destroy destroy) (scope notified): |
281 | * @user_data: |
282 | * @destroy: |
283 | * |
284 | * |
285 | * |
286 | * Since: 0.9.2 |
287 | **/ |
288 | HB_EXTERN void |
289 | hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs, |
290 | hb_unicode_combining_class_func_t func, |
291 | void *user_data, hb_destroy_func_t destroy); |
292 | |
293 | /** |
294 | * hb_unicode_funcs_set_eastasian_width_func: |
295 | * @ufuncs: a Unicode function structure |
296 | * @func: (closure user_data) (destroy destroy) (scope notified): |
297 | * @user_data: |
298 | * @destroy: |
299 | * |
300 | * |
301 | * |
302 | * Since: 0.9.2 |
303 | **/ |
304 | HB_EXTERN void |
305 | hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs, |
306 | hb_unicode_eastasian_width_func_t func, |
307 | void *user_data, hb_destroy_func_t destroy); |
308 | |
309 | /** |
310 | * hb_unicode_funcs_set_general_category_func: |
311 | * @ufuncs: a Unicode function structure |
312 | * @func: (closure user_data) (destroy destroy) (scope notified): |
313 | * @user_data: |
314 | * @destroy: |
315 | * |
316 | * |
317 | * |
318 | * Since: 0.9.2 |
319 | **/ |
320 | HB_EXTERN void |
321 | hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs, |
322 | hb_unicode_general_category_func_t func, |
323 | void *user_data, hb_destroy_func_t destroy); |
324 | |
325 | /** |
326 | * hb_unicode_funcs_set_mirroring_func: |
327 | * @ufuncs: a Unicode function structure |
328 | * @func: (closure user_data) (destroy destroy) (scope notified): |
329 | * @user_data: |
330 | * @destroy: |
331 | * |
332 | * |
333 | * |
334 | * Since: 0.9.2 |
335 | **/ |
336 | HB_EXTERN void |
337 | hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs, |
338 | hb_unicode_mirroring_func_t func, |
339 | void *user_data, hb_destroy_func_t destroy); |
340 | |
341 | /** |
342 | * hb_unicode_funcs_set_script_func: |
343 | * @ufuncs: a Unicode function structure |
344 | * @func: (closure user_data) (destroy destroy) (scope notified): |
345 | * @user_data: |
346 | * @destroy: |
347 | * |
348 | * |
349 | * |
350 | * Since: 0.9.2 |
351 | **/ |
352 | HB_EXTERN void |
353 | hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs, |
354 | hb_unicode_script_func_t func, |
355 | void *user_data, hb_destroy_func_t destroy); |
356 | |
357 | /** |
358 | * hb_unicode_funcs_set_compose_func: |
359 | * @ufuncs: a Unicode function structure |
360 | * @func: (closure user_data) (destroy destroy) (scope notified): |
361 | * @user_data: |
362 | * @destroy: |
363 | * |
364 | * |
365 | * |
366 | * Since: 0.9.2 |
367 | **/ |
368 | HB_EXTERN void |
369 | hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs, |
370 | hb_unicode_compose_func_t func, |
371 | void *user_data, hb_destroy_func_t destroy); |
372 | |
373 | /** |
374 | * hb_unicode_funcs_set_decompose_func: |
375 | * @ufuncs: a Unicode function structure |
376 | * @func: (closure user_data) (destroy destroy) (scope notified): |
377 | * @user_data: |
378 | * @destroy: |
379 | * |
380 | * |
381 | * |
382 | * Since: 0.9.2 |
383 | **/ |
384 | HB_EXTERN void |
385 | hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs, |
386 | hb_unicode_decompose_func_t func, |
387 | void *user_data, hb_destroy_func_t destroy); |
388 | |
389 | /** |
390 | * hb_unicode_funcs_set_decompose_compatibility_func: |
391 | * @ufuncs: a Unicode function structure |
392 | * @func: (closure user_data) (destroy destroy) (scope notified): |
393 | * @user_data: |
394 | * @destroy: |
395 | * |
396 | * |
397 | * |
398 | * Since: 0.9.2 |
399 | **/ |
400 | HB_EXTERN void |
401 | hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs, |
402 | hb_unicode_decompose_compatibility_func_t func, |
403 | void *user_data, hb_destroy_func_t destroy); |
404 | |
405 | /* accessors */ |
406 | |
407 | /** |
408 | * hb_unicode_combining_class: |
409 | * |
410 | * Since: 0.9.2 |
411 | **/ |
412 | HB_EXTERN hb_unicode_combining_class_t |
413 | hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs, |
414 | hb_codepoint_t unicode); |
415 | |
416 | /** |
417 | * hb_unicode_eastasian_width: |
418 | * |
419 | * Since: 0.9.2 |
420 | **/ |
421 | HB_EXTERN unsigned int |
422 | hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs, |
423 | hb_codepoint_t unicode); |
424 | |
425 | /** |
426 | * hb_unicode_general_category: |
427 | * |
428 | * Since: 0.9.2 |
429 | **/ |
430 | HB_EXTERN hb_unicode_general_category_t |
431 | hb_unicode_general_category (hb_unicode_funcs_t *ufuncs, |
432 | hb_codepoint_t unicode); |
433 | |
434 | /** |
435 | * hb_unicode_mirroring: |
436 | * |
437 | * Since: 0.9.2 |
438 | **/ |
439 | HB_EXTERN hb_codepoint_t |
440 | hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs, |
441 | hb_codepoint_t unicode); |
442 | |
443 | /** |
444 | * hb_unicode_script: |
445 | * |
446 | * Since: 0.9.2 |
447 | **/ |
448 | HB_EXTERN hb_script_t |
449 | hb_unicode_script (hb_unicode_funcs_t *ufuncs, |
450 | hb_codepoint_t unicode); |
451 | |
452 | HB_EXTERN hb_bool_t |
453 | hb_unicode_compose (hb_unicode_funcs_t *ufuncs, |
454 | hb_codepoint_t a, |
455 | hb_codepoint_t b, |
456 | hb_codepoint_t *ab); |
457 | |
458 | HB_EXTERN hb_bool_t |
459 | hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, |
460 | hb_codepoint_t ab, |
461 | hb_codepoint_t *a, |
462 | hb_codepoint_t *b); |
463 | |
464 | HB_EXTERN unsigned int |
465 | hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, |
466 | hb_codepoint_t u, |
467 | hb_codepoint_t *decomposed); |
468 | |
469 | HB_END_DECLS |
470 | |
471 | #endif /* HB_UNICODE_H */ |
472 | |