1use core::convert::TryFrom;
2
3pub use unicode_ccc::CanonicalCombiningClass;
4// TODO: prefer unic-ucd-normal::CanonicalCombiningClass
5pub use unicode_properties::GeneralCategory as hb_unicode_general_category_t;
6
7use crate::Script;
8
9// Space estimates based on:
10// https://unicode.org/charts/PDF/U2000.pdf
11// https://docs.microsoft.com/en-us/typography/develop/character-design-standards/whitespace
12pub mod hb_unicode_funcs_t {
13 pub type space_t = u8;
14 pub const NOT_SPACE: u8 = 0;
15 pub const SPACE_EM: u8 = 1;
16 pub const SPACE_EM_2: u8 = 2;
17 pub const SPACE_EM_3: u8 = 3;
18 pub const SPACE_EM_4: u8 = 4;
19 pub const SPACE_EM_5: u8 = 5;
20 pub const SPACE_EM_6: u8 = 6;
21 pub const SPACE_EM_16: u8 = 16;
22 pub const SPACE_4_EM_18: u8 = 17; // 4/18th of an EM!
23 pub const SPACE: u8 = 18;
24 pub const SPACE_FIGURE: u8 = 19;
25 pub const SPACE_PUNCTUATION: u8 = 20;
26 pub const SPACE_NARROW: u8 = 21;
27}
28
29#[allow(dead_code)]
30pub mod modified_combining_class {
31 // Hebrew
32 //
33 // We permute the "fixed-position" classes 10-26 into the order
34 // described in the SBL Hebrew manual:
35 //
36 // https://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
37 //
38 // (as recommended by:
39 // https://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering/msg22823/)
40 //
41 // More details here:
42 // https://bugzilla.mozilla.org/show_bug.cgi?id=662055
43 pub const CCC10: u8 = 22; // sheva
44 pub const CCC11: u8 = 15; // hataf segol
45 pub const CCC12: u8 = 16; // hataf patah
46 pub const CCC13: u8 = 17; // hataf qamats
47 pub const CCC14: u8 = 23; // hiriq
48 pub const CCC15: u8 = 18; // tsere
49 pub const CCC16: u8 = 19; // segol
50 pub const CCC17: u8 = 20; // patah
51 pub const CCC18: u8 = 21; // qamats & qamats qatan
52 pub const CCC19: u8 = 14; // holam & holam haser for vav
53 pub const CCC20: u8 = 24; // qubuts
54 pub const CCC21: u8 = 12; // dagesh
55 pub const CCC22: u8 = 25; // meteg
56 pub const CCC23: u8 = 13; // rafe
57 pub const CCC24: u8 = 10; // shin dot
58 pub const CCC25: u8 = 11; // sin dot
59 pub const CCC26: u8 = 26; // point varika
60
61 // Arabic
62 //
63 // Modify to move Shadda (ccc=33) before other marks. See:
64 // https://unicode.org/faq/normalization.html#8
65 // https://unicode.org/faq/normalization.html#9
66 pub const CCC27: u8 = 28; // fathatan
67 pub const CCC28: u8 = 29; // dammatan
68 pub const CCC29: u8 = 30; // kasratan
69 pub const CCC30: u8 = 31; // fatha
70 pub const CCC31: u8 = 32; // damma
71 pub const CCC32: u8 = 33; // kasra
72 pub const CCC33: u8 = 27; // shadda
73 pub const CCC34: u8 = 34; // sukun
74 pub const CCC35: u8 = 35; // superscript alef
75
76 // Syriac
77 pub const CCC36: u8 = 36; // superscript alaph
78
79 // Telugu
80 //
81 // Modify Telugu length marks (ccc=84, ccc=91).
82 // These are the only matras in the main Indic scripts range that have
83 // a non-zero ccc. That makes them reorder with the Halant that is
84 // ccc=9. Just zero them, we don't need them in our Indic shaper.
85 pub const CCC84: u8 = 0; // length mark
86 pub const CCC91: u8 = 0; // ai length mark
87
88 // Thai
89 //
90 // Modify U+0E38 and U+0E39 (ccc=103) to be reordered before U+0E3A (ccc=9).
91 // Assign 3, which is unassigned otherwise.
92 // Uniscribe does this reordering too.
93 pub const CCC103: u8 = 3; // sara u / sara uu
94 pub const CCC107: u8 = 107; // mai *
95
96 // Lao
97 pub const CCC118: u8 = 118; // sign u / sign uu
98 pub const CCC122: u8 = 122; // mai *
99
100 // Tibetan
101 //
102 // In case of multiple vowel-signs, use u first (but after achung)
103 // this allows Dzongkha multi-vowel shortcuts to render correctly
104 pub const CCC129: u8 = 129; // sign aa
105 pub const CCC130: u8 = 132; // sign i
106 pub const CCC132: u8 = 131; // sign u
107}
108
109#[rustfmt::skip]
110const MODIFIED_COMBINING_CLASS: &[u8; 256] = &[
111 CanonicalCombiningClass::NotReordered as u8,
112 CanonicalCombiningClass::Overlay as u8,
113 2, 3, 4, 5, 6,
114 CanonicalCombiningClass::Nukta as u8,
115 CanonicalCombiningClass::KanaVoicing as u8,
116 CanonicalCombiningClass::Virama as u8,
117
118 // Hebrew
119 modified_combining_class::CCC10,
120 modified_combining_class::CCC11,
121 modified_combining_class::CCC12,
122 modified_combining_class::CCC13,
123 modified_combining_class::CCC14,
124 modified_combining_class::CCC15,
125 modified_combining_class::CCC16,
126 modified_combining_class::CCC17,
127 modified_combining_class::CCC18,
128 modified_combining_class::CCC19,
129 modified_combining_class::CCC20,
130 modified_combining_class::CCC21,
131 modified_combining_class::CCC22,
132 modified_combining_class::CCC23,
133 modified_combining_class::CCC24,
134 modified_combining_class::CCC25,
135 modified_combining_class::CCC26,
136
137 // Arabic
138 modified_combining_class::CCC27,
139 modified_combining_class::CCC28,
140 modified_combining_class::CCC29,
141 modified_combining_class::CCC30,
142 modified_combining_class::CCC31,
143 modified_combining_class::CCC32,
144 modified_combining_class::CCC33,
145 modified_combining_class::CCC34,
146 modified_combining_class::CCC35,
147
148 // Syriac
149 modified_combining_class::CCC36,
150
151 37, 38, 39,
152 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
153 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
154 80, 81, 82, 83,
155
156 // Telugu
157 modified_combining_class::CCC84,
158 85, 86, 87, 88, 89, 90,
159 modified_combining_class::CCC91,
160 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
161
162 // Thai
163 modified_combining_class::CCC103,
164 104, 105, 106,
165 modified_combining_class::CCC107,
166 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
167
168 // Lao
169 modified_combining_class::CCC118,
170 119, 120, 121,
171 modified_combining_class::CCC122,
172 123, 124, 125, 126, 127, 128,
173
174 // Tibetan
175 modified_combining_class::CCC129,
176 modified_combining_class::CCC130,
177 131,
178 modified_combining_class::CCC132,
179 133, 134, 135, 136, 137, 138, 139,
180
181
182 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
183 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
184 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
185 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
186 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
187 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
188
189 CanonicalCombiningClass::AttachedBelowLeft as u8,
190 201,
191 CanonicalCombiningClass::AttachedBelow as u8,
192 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
193 CanonicalCombiningClass::AttachedAbove as u8,
194 215,
195 CanonicalCombiningClass::AttachedAboveRight as u8,
196 217,
197 CanonicalCombiningClass::BelowLeft as u8,
198 219,
199 CanonicalCombiningClass::Below as u8,
200 221,
201 CanonicalCombiningClass::BelowRight as u8,
202 223,
203 CanonicalCombiningClass::Left as u8,
204 225,
205 CanonicalCombiningClass::Right as u8,
206 227,
207 CanonicalCombiningClass::AboveLeft as u8,
208 229,
209 CanonicalCombiningClass::Above as u8,
210 231,
211 CanonicalCombiningClass::AboveRight as u8,
212 CanonicalCombiningClass::DoubleBelow as u8,
213 CanonicalCombiningClass::DoubleAbove as u8,
214 235, 236, 237, 238, 239,
215 CanonicalCombiningClass::IotaSubscript as u8,
216 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
217 255, // RB_UNICODE_COMBINING_CLASS_INVALID
218];
219
220pub trait GeneralCategoryExt {
221 fn to_rb(&self) -> u32;
222 fn from_rb(gc: u32) -> Self;
223 fn is_mark(&self) -> bool;
224 fn is_letter(&self) -> bool;
225}
226
227#[rustfmt::skip]
228impl GeneralCategoryExt for hb_unicode_general_category_t {
229 fn to_rb(&self) -> u32 {
230 match *self {
231 hb_unicode_general_category_t::ClosePunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,
232 hb_unicode_general_category_t::ConnectorPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,
233 hb_unicode_general_category_t::Control => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL,
234 hb_unicode_general_category_t::CurrencySymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,
235 hb_unicode_general_category_t::DashPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,
236 hb_unicode_general_category_t::DecimalNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,
237 hb_unicode_general_category_t::EnclosingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,
238 hb_unicode_general_category_t::FinalPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,
239 hb_unicode_general_category_t::Format => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
240 hb_unicode_general_category_t::InitialPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,
241 hb_unicode_general_category_t::LetterNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,
242 hb_unicode_general_category_t::LineSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,
243 hb_unicode_general_category_t::LowercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,
244 hb_unicode_general_category_t::MathSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,
245 hb_unicode_general_category_t::ModifierLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,
246 hb_unicode_general_category_t::ModifierSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,
247 hb_unicode_general_category_t::NonspacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
248 hb_unicode_general_category_t::OpenPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,
249 hb_unicode_general_category_t::OtherLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,
250 hb_unicode_general_category_t::OtherNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,
251 hb_unicode_general_category_t::OtherPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,
252 hb_unicode_general_category_t::OtherSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,
253 hb_unicode_general_category_t::ParagraphSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,
254 hb_unicode_general_category_t::PrivateUse => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,
255 hb_unicode_general_category_t::SpaceSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR,
256 hb_unicode_general_category_t::SpacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,
257 hb_unicode_general_category_t::Surrogate => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE,
258 hb_unicode_general_category_t::TitlecaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,
259 hb_unicode_general_category_t::Unassigned => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,
260 hb_unicode_general_category_t::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER
261 }
262 }
263
264 fn from_rb(gc: u32) -> Self {
265 match gc {
266 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION => hb_unicode_general_category_t::ClosePunctuation,
267 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION => hb_unicode_general_category_t::ConnectorPunctuation,
268 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL => hb_unicode_general_category_t::Control,
269 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL => hb_unicode_general_category_t::CurrencySymbol,
270 hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION => hb_unicode_general_category_t::DashPunctuation,
271 hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER => hb_unicode_general_category_t::DecimalNumber,
272 hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK => hb_unicode_general_category_t::EnclosingMark,
273 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION => hb_unicode_general_category_t::FinalPunctuation,
274 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT => hb_unicode_general_category_t::Format,
275 hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION => hb_unicode_general_category_t::InitialPunctuation,
276 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER => hb_unicode_general_category_t::LetterNumber,
277 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR => hb_unicode_general_category_t::LineSeparator,
278 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER => hb_unicode_general_category_t::LowercaseLetter,
279 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL => hb_unicode_general_category_t::MathSymbol,
280 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER => hb_unicode_general_category_t::ModifierLetter,
281 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL => hb_unicode_general_category_t::ModifierSymbol,
282 hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK => hb_unicode_general_category_t::NonspacingMark,
283 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION => hb_unicode_general_category_t::OpenPunctuation,
284 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER => hb_unicode_general_category_t::OtherLetter,
285 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER => hb_unicode_general_category_t::OtherNumber,
286 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION => hb_unicode_general_category_t::OtherPunctuation,
287 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL => hb_unicode_general_category_t::OtherSymbol,
288 hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR => hb_unicode_general_category_t::ParagraphSeparator,
289 hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE => hb_unicode_general_category_t::PrivateUse,
290 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR => hb_unicode_general_category_t::SpaceSeparator,
291 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK => hb_unicode_general_category_t::SpacingMark,
292 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE => hb_unicode_general_category_t::Surrogate,
293 hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER => hb_unicode_general_category_t::TitlecaseLetter,
294 hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED => hb_unicode_general_category_t::Unassigned,
295 hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER => hb_unicode_general_category_t::UppercaseLetter,
296 _ => unreachable!()
297 }
298 }
299
300 fn is_mark(&self) -> bool {
301 matches!(*self,
302 hb_unicode_general_category_t::SpacingMark |
303 hb_unicode_general_category_t::EnclosingMark |
304 hb_unicode_general_category_t::NonspacingMark)
305 }
306
307 fn is_letter(&self) -> bool {
308 matches!(*self,
309 hb_unicode_general_category_t::LowercaseLetter |
310 hb_unicode_general_category_t::ModifierLetter |
311 hb_unicode_general_category_t::OtherLetter |
312 hb_unicode_general_category_t::TitlecaseLetter |
313 hb_unicode_general_category_t::UppercaseLetter)
314 }
315}
316
317pub trait CharExt {
318 fn script(self) -> Script;
319 fn general_category(self) -> hb_unicode_general_category_t;
320 fn space_fallback(self) -> hb_unicode_funcs_t::space_t;
321 fn modified_combining_class(self) -> u8;
322 fn mirrored(self) -> Option<char>;
323 fn is_emoji_extended_pictographic(self) -> bool;
324 fn is_default_ignorable(self) -> bool;
325 fn is_variation_selector(self) -> bool;
326 fn vertical(self) -> Option<char>;
327}
328
329impl CharExt for char {
330 fn script(self) -> Script {
331 use crate::script;
332 use unicode_script as us;
333
334 match unicode_script::UnicodeScript::script(&self) {
335 us::Script::Common => script::COMMON,
336 us::Script::Inherited => script::INHERITED,
337 us::Script::Adlam => script::ADLAM,
338 us::Script::Ahom => script::AHOM,
339 us::Script::Anatolian_Hieroglyphs => script::ANATOLIAN_HIEROGLYPHS,
340 us::Script::Arabic => script::ARABIC,
341 us::Script::Armenian => script::ARMENIAN,
342 us::Script::Avestan => script::AVESTAN,
343 us::Script::Balinese => script::BALINESE,
344 us::Script::Bamum => script::BAMUM,
345 us::Script::Bassa_Vah => script::BASSA_VAH,
346 us::Script::Batak => script::BATAK,
347 us::Script::Bengali => script::BENGALI,
348 us::Script::Bhaiksuki => script::BHAIKSUKI,
349 us::Script::Bopomofo => script::BOPOMOFO,
350 us::Script::Brahmi => script::BRAHMI,
351 us::Script::Braille => script::BRAILLE,
352 us::Script::Buginese => script::BUGINESE,
353 us::Script::Buhid => script::BUHID,
354 us::Script::Canadian_Aboriginal => script::CANADIAN_SYLLABICS,
355 us::Script::Carian => script::CARIAN,
356 us::Script::Caucasian_Albanian => script::CAUCASIAN_ALBANIAN,
357 us::Script::Chakma => script::CHAKMA,
358 us::Script::Cham => script::CHAM,
359 us::Script::Cherokee => script::CHEROKEE,
360 us::Script::Chorasmian => script::CHORASMIAN,
361 us::Script::Coptic => script::COPTIC,
362 us::Script::Cuneiform => script::CUNEIFORM,
363 us::Script::Cypriot => script::CYPRIOT,
364 us::Script::Cyrillic => script::CYRILLIC,
365 us::Script::Deseret => script::DESERET,
366 us::Script::Devanagari => script::DEVANAGARI,
367 us::Script::Dives_Akuru => script::DIVES_AKURU,
368 us::Script::Dogra => script::DOGRA,
369 us::Script::Duployan => script::DUPLOYAN,
370 us::Script::Egyptian_Hieroglyphs => script::EGYPTIAN_HIEROGLYPHS,
371 us::Script::Elbasan => script::ELBASAN,
372 us::Script::Elymaic => script::ELYMAIC,
373 us::Script::Ethiopic => script::ETHIOPIC,
374 us::Script::Georgian => script::GEORGIAN,
375 us::Script::Glagolitic => script::GLAGOLITIC,
376 us::Script::Gothic => script::GOTHIC,
377 us::Script::Grantha => script::GRANTHA,
378 us::Script::Greek => script::GREEK,
379 us::Script::Gujarati => script::GUJARATI,
380 us::Script::Gunjala_Gondi => script::GUNJALA_GONDI,
381 us::Script::Gurmukhi => script::GURMUKHI,
382 us::Script::Han => script::HAN,
383 us::Script::Hangul => script::HANGUL,
384 us::Script::Hanifi_Rohingya => script::HANIFI_ROHINGYA,
385 us::Script::Hanunoo => script::HANUNOO,
386 us::Script::Hatran => script::HATRAN,
387 us::Script::Hebrew => script::HEBREW,
388 us::Script::Hiragana => script::HIRAGANA,
389 us::Script::Imperial_Aramaic => script::IMPERIAL_ARAMAIC,
390 us::Script::Inscriptional_Pahlavi => script::INSCRIPTIONAL_PAHLAVI,
391 us::Script::Inscriptional_Parthian => script::INSCRIPTIONAL_PARTHIAN,
392 us::Script::Javanese => script::JAVANESE,
393 us::Script::Kaithi => script::KAITHI,
394 us::Script::Kannada => script::KANNADA,
395 us::Script::Katakana => script::KATAKANA,
396 us::Script::Kayah_Li => script::KAYAH_LI,
397 us::Script::Kharoshthi => script::KHAROSHTHI,
398 us::Script::Khitan_Small_Script => script::KHITAN_SMALL_SCRIPT,
399 us::Script::Khmer => script::KHMER,
400 us::Script::Khojki => script::KHOJKI,
401 us::Script::Khudawadi => script::KHUDAWADI,
402 us::Script::Lao => script::LAO,
403 us::Script::Latin => script::LATIN,
404 us::Script::Lepcha => script::LEPCHA,
405 us::Script::Limbu => script::LIMBU,
406 us::Script::Linear_A => script::LINEAR_A,
407 us::Script::Linear_B => script::LINEAR_B,
408 us::Script::Lisu => script::LISU,
409 us::Script::Lycian => script::LYCIAN,
410 us::Script::Lydian => script::LYDIAN,
411 us::Script::Mahajani => script::MAHAJANI,
412 us::Script::Makasar => script::MAKASAR,
413 us::Script::Malayalam => script::MALAYALAM,
414 us::Script::Mandaic => script::MANDAIC,
415 us::Script::Manichaean => script::MANICHAEAN,
416 us::Script::Marchen => script::MARCHEN,
417 us::Script::Masaram_Gondi => script::MASARAM_GONDI,
418 us::Script::Medefaidrin => script::MEDEFAIDRIN,
419 us::Script::Meetei_Mayek => script::MEETEI_MAYEK,
420 us::Script::Mende_Kikakui => script::MENDE_KIKAKUI,
421 us::Script::Meroitic_Cursive => script::MEROITIC_CURSIVE,
422 us::Script::Meroitic_Hieroglyphs => script::MEROITIC_HIEROGLYPHS,
423 us::Script::Miao => script::MIAO,
424 us::Script::Modi => script::MODI,
425 us::Script::Mongolian => script::MONGOLIAN,
426 us::Script::Mro => script::MRO,
427 us::Script::Multani => script::MULTANI,
428 us::Script::Myanmar => script::MYANMAR,
429 us::Script::Nabataean => script::NABATAEAN,
430 us::Script::Nandinagari => script::NANDINAGARI,
431 us::Script::New_Tai_Lue => script::NEW_TAI_LUE,
432 us::Script::Newa => script::NEWA,
433 us::Script::Nko => script::NKO,
434 us::Script::Nushu => script::NUSHU,
435 us::Script::Nyiakeng_Puachue_Hmong => script::NYIAKENG_PUACHUE_HMONG,
436 us::Script::Ogham => script::OGHAM,
437 us::Script::Ol_Chiki => script::OL_CHIKI,
438 us::Script::Old_Hungarian => script::OLD_HUNGARIAN,
439 us::Script::Old_Italic => script::OLD_ITALIC,
440 us::Script::Old_North_Arabian => script::OLD_NORTH_ARABIAN,
441 us::Script::Old_Permic => script::OLD_PERMIC,
442 us::Script::Old_Persian => script::OLD_PERSIAN,
443 us::Script::Old_Sogdian => script::OLD_SOGDIAN,
444 us::Script::Old_South_Arabian => script::OLD_SOUTH_ARABIAN,
445 us::Script::Old_Turkic => script::OLD_TURKIC,
446 us::Script::Oriya => script::ORIYA,
447 us::Script::Osage => script::OSAGE,
448 us::Script::Osmanya => script::OSMANYA,
449 us::Script::Pahawh_Hmong => script::PAHAWH_HMONG,
450 us::Script::Palmyrene => script::PALMYRENE,
451 us::Script::Pau_Cin_Hau => script::PAU_CIN_HAU,
452 us::Script::Phags_Pa => script::PHAGS_PA,
453 us::Script::Phoenician => script::PHOENICIAN,
454 us::Script::Psalter_Pahlavi => script::PSALTER_PAHLAVI,
455 us::Script::Rejang => script::REJANG,
456 us::Script::Runic => script::RUNIC,
457 us::Script::Samaritan => script::SAMARITAN,
458 us::Script::Saurashtra => script::SAURASHTRA,
459 us::Script::Sharada => script::SHARADA,
460 us::Script::Shavian => script::SHAVIAN,
461 us::Script::Siddham => script::SIDDHAM,
462 us::Script::SignWriting => script::SIGNWRITING,
463 us::Script::Sinhala => script::SINHALA,
464 us::Script::Sogdian => script::SOGDIAN,
465 us::Script::Sora_Sompeng => script::SORA_SOMPENG,
466 us::Script::Soyombo => script::SOYOMBO,
467 us::Script::Sundanese => script::SUNDANESE,
468 us::Script::Syloti_Nagri => script::SYLOTI_NAGRI,
469 us::Script::Syriac => script::SYRIAC,
470 us::Script::Tagalog => script::TAGALOG,
471 us::Script::Tagbanwa => script::TAGBANWA,
472 us::Script::Tai_Le => script::TAI_LE,
473 us::Script::Tai_Tham => script::TAI_THAM,
474 us::Script::Tai_Viet => script::TAI_VIET,
475 us::Script::Takri => script::TAKRI,
476 us::Script::Tamil => script::TAMIL,
477 us::Script::Tangut => script::TANGUT,
478 us::Script::Telugu => script::TELUGU,
479 us::Script::Thaana => script::THAANA,
480 us::Script::Thai => script::THAI,
481 us::Script::Tibetan => script::TIBETAN,
482 us::Script::Tifinagh => script::TIFINAGH,
483 us::Script::Tirhuta => script::TIRHUTA,
484 us::Script::Ugaritic => script::UGARITIC,
485 us::Script::Vai => script::VAI,
486 us::Script::Wancho => script::WANCHO,
487 us::Script::Warang_Citi => script::WARANG_CITI,
488 us::Script::Yezidi => script::YEZIDI,
489 us::Script::Yi => script::YI,
490 us::Script::Zanabazar_Square => script::ZANABAZAR_SQUARE,
491 _ => script::UNKNOWN,
492 }
493 }
494
495 fn general_category(self) -> hb_unicode_general_category_t {
496 unicode_properties::general_category::UnicodeGeneralCategory::general_category(self)
497 }
498
499 fn space_fallback(self) -> hb_unicode_funcs_t::space_t {
500 use hb_unicode_funcs_t::*;
501
502 // All GC=Zs chars that can use a fallback.
503 match self {
504 '\u{0020}' => SPACE, // SPACE
505 '\u{00A0}' => SPACE, // NO-BREAK SPACE
506 '\u{2000}' => SPACE_EM_2, // EN QUAD
507 '\u{2001}' => SPACE_EM, // EM QUAD
508 '\u{2002}' => SPACE_EM_2, // EN SPACE
509 '\u{2003}' => SPACE_EM, // EM SPACE
510 '\u{2004}' => SPACE_EM_3, // THREE-PER-EM SPACE
511 '\u{2005}' => SPACE_EM_4, // FOUR-PER-EM SPACE
512 '\u{2006}' => SPACE_EM_6, // SIX-PER-EM SPACE
513 '\u{2007}' => SPACE_FIGURE, // FIGURE SPACE
514 '\u{2008}' => SPACE_PUNCTUATION, // PUNCTUATION SPACE
515 '\u{2009}' => SPACE_EM_5, // THIN SPACE
516 '\u{200A}' => SPACE_EM_16, // HAIR SPACE
517 '\u{202F}' => SPACE_NARROW, // NARROW NO-BREAK SPACE
518 '\u{205F}' => SPACE_4_EM_18, // MEDIUM MATHEMATICAL SPACE
519 '\u{3000}' => SPACE_EM, // IDEOGRAPHIC SPACE
520 _ => NOT_SPACE, // OGHAM SPACE MARK
521 }
522 }
523
524 fn modified_combining_class(self) -> u8 {
525 let u = self;
526
527 // Reorder SAKOT to ensure it comes after any tone marks.
528 if u == '\u{1A60}' {
529 return 254;
530 }
531
532 // Reorder PADMA to ensure it comes after any vowel marks.
533 if u == '\u{0FC6}' {
534 return 254;
535 }
536
537 // Reorder TSA -PHRU to reorder before U+0F74
538 if u == '\u{0F39}' {
539 return 127;
540 }
541
542 let k = unicode_ccc::get_canonical_combining_class(u);
543 MODIFIED_COMBINING_CLASS[k as usize]
544 }
545
546 fn mirrored(self) -> Option<char> {
547 unicode_bidi_mirroring::get_mirrored(self)
548 }
549
550 fn is_emoji_extended_pictographic(self) -> bool {
551 // Generated by scripts/gen-unicode-is-emoji-ext-pict.py
552 match self as u32 {
553 0x00A9 => true,
554 0x00AE => true,
555 0x203C => true,
556 0x2049 => true,
557 0x2122 => true,
558 0x2139 => true,
559 0x2194..=0x2199 => true,
560 0x21A9..=0x21AA => true,
561 0x231A..=0x231B => true,
562 0x2328 => true,
563 0x2388 => true,
564 0x23CF => true,
565 0x23E9..=0x23F3 => true,
566 0x23F8..=0x23FA => true,
567 0x24C2 => true,
568 0x25AA..=0x25AB => true,
569 0x25B6 => true,
570 0x25C0 => true,
571 0x25FB..=0x25FE => true,
572 0x2600..=0x2605 => true,
573 0x2607..=0x2612 => true,
574 0x2614..=0x2685 => true,
575 0x2690..=0x2705 => true,
576 0x2708..=0x2712 => true,
577 0x2714 => true,
578 0x2716 => true,
579 0x271D => true,
580 0x2721 => true,
581 0x2728 => true,
582 0x2733..=0x2734 => true,
583 0x2744 => true,
584 0x2747 => true,
585 0x274C => true,
586 0x274E => true,
587 0x2753..=0x2755 => true,
588 0x2757 => true,
589 0x2763..=0x2767 => true,
590 0x2795..=0x2797 => true,
591 0x27A1 => true,
592 0x27B0 => true,
593 0x27BF => true,
594 0x2934..=0x2935 => true,
595 0x2B05..=0x2B07 => true,
596 0x2B1B..=0x2B1C => true,
597 0x2B50 => true,
598 0x2B55 => true,
599 0x3030 => true,
600 0x303D => true,
601 0x3297 => true,
602 0x3299 => true,
603 0x1F000..=0x1F0FF => true,
604 0x1F10D..=0x1F10F => true,
605 0x1F12F => true,
606 0x1F16C..=0x1F171 => true,
607 0x1F17E..=0x1F17F => true,
608 0x1F18E => true,
609 0x1F191..=0x1F19A => true,
610 0x1F1AD..=0x1F1E5 => true,
611 0x1F201..=0x1F20F => true,
612 0x1F21A => true,
613 0x1F22F => true,
614 0x1F232..=0x1F23A => true,
615 0x1F23C..=0x1F23F => true,
616 0x1F249..=0x1F3FA => true,
617 0x1F400..=0x1F53D => true,
618 0x1F546..=0x1F64F => true,
619 0x1F680..=0x1F6FF => true,
620 0x1F774..=0x1F77F => true,
621 0x1F7D5..=0x1F7FF => true,
622 0x1F80C..=0x1F80F => true,
623 0x1F848..=0x1F84F => true,
624 0x1F85A..=0x1F85F => true,
625 0x1F888..=0x1F88F => true,
626 0x1F8AE..=0x1F8FF => true,
627 0x1F90C..=0x1F93A => true,
628 0x1F93C..=0x1F945 => true,
629 0x1F947..=0x1FAFF => true,
630 0x1FC00..=0x1FFFD => true,
631 _ => false,
632 }
633 }
634
635 /// Default_Ignorable codepoints:
636 ///
637 /// Note: While U+115F, U+1160, U+3164 and U+FFA0 are Default_Ignorable,
638 /// we do NOT want to hide them, as the way Uniscribe has implemented them
639 /// is with regular spacing glyphs, and that's the way fonts are made to work.
640 /// As such, we make exceptions for those four.
641 /// Also ignoring U+1BCA0..1BCA3. https://github.com/harfbuzz/harfbuzz/issues/503
642 ///
643 /// Unicode 14.0:
644 /// $ grep '; Default_Ignorable_Code_Point ' DerivedCoreProperties.txt | sed 's/;.*#/#/'
645 /// 00AD # Cf SOFT HYPHEN
646 /// 034F # Mn COMBINING GRAPHEME JOINER
647 /// 061C # Cf ARABIC LETTER MARK
648 /// 115F..1160 # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
649 /// 17B4..17B5 # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
650 /// 180B..180D # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
651 /// 180E # Cf MONGOLIAN VOWEL SEPARATOR
652 /// 180F # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
653 /// 200B..200F # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
654 /// 202A..202E # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
655 /// 2060..2064 # Cf [5] WORD JOINER..INVISIBLE PLUS
656 /// 2065 # Cn <reserved-2065>
657 /// 2066..206F # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
658 /// 3164 # Lo HANGUL FILLER
659 /// FE00..FE0F # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
660 /// FEFF # Cf ZERO WIDTH NO-BREAK SPACE
661 /// FFA0 # Lo HALFWIDTH HANGUL FILLER
662 /// FFF0..FFF8 # Cn [9] <reserved-FFF0>..<reserved-FFF8>
663 /// 1BCA0..1BCA3 # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
664 /// 1D173..1D17A # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
665 /// E0000 # Cn <reserved-E0000>
666 /// E0001 # Cf LANGUAGE TAG
667 /// E0002..E001F # Cn [30] <reserved-E0002>..<reserved-E001F>
668 /// E0020..E007F # Cf [96] TAG SPACE..CANCEL TAG
669 /// E0080..E00FF # Cn [128] <reserved-E0080>..<reserved-E00FF>
670 /// E0100..E01EF # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
671 /// E01F0..E0FFF # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
672 fn is_default_ignorable(self) -> bool {
673 let ch = u32::from(self);
674 let plane = ch >> 16;
675 if plane == 0 {
676 // BMP
677 let page = ch >> 8;
678 match page {
679 0x00 => ch == 0x00AD,
680 0x03 => ch == 0x034F,
681 0x06 => ch == 0x061C,
682 0x17 => (0x17B4..=0x17B5).contains(&ch),
683 0x18 => (0x180B..=0x180E).contains(&ch),
684 0x20 => {
685 (0x200B..=0x200F).contains(&ch)
686 || (0x202A..=0x202E).contains(&ch)
687 || (0x2060..=0x206F).contains(&ch)
688 }
689 0xFE => (0xFE00..=0xFE0F).contains(&ch) || ch == 0xFEFF,
690 0xFF => (0xFFF0..=0xFFF8).contains(&ch),
691 _ => false,
692 }
693 } else {
694 // Other planes
695 match plane {
696 0x01 => (0x1D173..=0x1D17A).contains(&ch),
697 0x0E => (0xE0000..=0xE0FFF).contains(&ch),
698 _ => false,
699 }
700 }
701 }
702
703 fn is_variation_selector(self) -> bool {
704 // U+180B..180D, U+180F MONGOLIAN FREE VARIATION SELECTORs are handled in the
705 //Arabic shaper. No need to match them here.
706 let ch = u32::from(self);
707 (0x0FE00..=0x0FE0F).contains(&ch) || // VARIATION SELECTOR - 1..16
708 (0xE0100..=0xE01EF).contains(&ch) // VARIATION SELECTOR - 17..256
709 }
710
711 fn vertical(self) -> Option<char> {
712 Some(match u32::from(self) >> 8 {
713 0x20 => match self {
714 '\u{2013}' => '\u{fe32}', // EN DASH
715 '\u{2014}' => '\u{fe31}', // EM DASH
716 '\u{2025}' => '\u{fe30}', // TWO DOT LEADER
717 '\u{2026}' => '\u{fe19}', // HORIZONTAL ELLIPSIS
718 _ => return None,
719 },
720 0x30 => match self {
721 '\u{3001}' => '\u{fe11}', // IDEOGRAPHIC COMMA
722 '\u{3002}' => '\u{fe12}', // IDEOGRAPHIC FULL STOP
723 '\u{3008}' => '\u{fe3f}', // LEFT ANGLE BRACKET
724 '\u{3009}' => '\u{fe40}', // RIGHT ANGLE BRACKET
725 '\u{300a}' => '\u{fe3d}', // LEFT DOUBLE ANGLE BRACKET
726 '\u{300b}' => '\u{fe3e}', // RIGHT DOUBLE ANGLE BRACKET
727 '\u{300c}' => '\u{fe41}', // LEFT CORNER BRACKET
728 '\u{300d}' => '\u{fe42}', // RIGHT CORNER BRACKET
729 '\u{300e}' => '\u{fe43}', // LEFT WHITE CORNER BRACKET
730 '\u{300f}' => '\u{fe44}', // RIGHT WHITE CORNER BRACKET
731 '\u{3010}' => '\u{fe3b}', // LEFT BLACK LENTICULAR BRACKET
732 '\u{3011}' => '\u{fe3c}', // RIGHT BLACK LENTICULAR BRACKET
733 '\u{3014}' => '\u{fe39}', // LEFT TORTOISE SHELL BRACKET
734 '\u{3015}' => '\u{fe3a}', // RIGHT TORTOISE SHELL BRACKET
735 '\u{3016}' => '\u{fe17}', // LEFT WHITE LENTICULAR BRACKET
736 '\u{3017}' => '\u{fe18}', // RIGHT WHITE LENTICULAR BRACKET
737 _ => return None,
738 },
739 0xfe => match self {
740 '\u{fe4f}' => '\u{fe34}', // WAVY LOW LINE
741 _ => return None,
742 },
743 0xff => match self {
744 '\u{ff01}' => '\u{fe15}', // FULLWIDTH EXCLAMATION MARK
745 '\u{ff08}' => '\u{fe35}', // FULLWIDTH LEFT PARENTHESIS
746 '\u{ff09}' => '\u{fe36}', // FULLWIDTH RIGHT PARENTHESIS
747 '\u{ff0c}' => '\u{fe10}', // FULLWIDTH COMMA
748 '\u{ff1a}' => '\u{fe13}', // FULLWIDTH COLON
749 '\u{ff1b}' => '\u{fe14}', // FULLWIDTH SEMICOLON
750 '\u{ff1f}' => '\u{fe16}', // FULLWIDTH QUESTION MARK
751 '\u{ff3b}' => '\u{fe47}', // FULLWIDTH LEFT SQUARE BRACKET
752 '\u{ff3d}' => '\u{fe48}', // FULLWIDTH RIGHT SQUARE BRACKET
753 '\u{ff3f}' => '\u{fe33}', // FULLWIDTH LOW LINE
754 '\u{ff5b}' => '\u{fe37}', // FULLWIDTH LEFT CURLY BRACKET
755 '\u{ff5d}' => '\u{fe38}', // FULLWIDTH RIGHT CURLY BRACKET
756 _ => return None,
757 },
758 _ => return None,
759 })
760 }
761}
762
763const S_BASE: u32 = 0xAC00;
764const L_BASE: u32 = 0x1100;
765const V_BASE: u32 = 0x1161;
766const T_BASE: u32 = 0x11A7;
767const L_COUNT: u32 = 19;
768const V_COUNT: u32 = 21;
769const T_COUNT: u32 = 28;
770const N_COUNT: u32 = V_COUNT * T_COUNT;
771const S_COUNT: u32 = L_COUNT * N_COUNT;
772
773pub fn compose(a: char, b: char) -> Option<char> {
774 if let Some(ab: char) = compose_hangul(a, b) {
775 return Some(ab);
776 }
777
778 let needle: u64 = (a as u64) << 32 | (b as u64);
779 superResult::unicode_norm::COMPOSITION_TABLE
780 .binary_search_by(|item| item.0.cmp(&needle))
781 .map(|idx: usize| super::unicode_norm::COMPOSITION_TABLE[idx].1)
782 .ok()
783}
784
785fn compose_hangul(a: char, b: char) -> Option<char> {
786 let l: u32 = u32::from(a);
787 let v: u32 = u32::from(b);
788 if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
789 let r: u32 = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
790 Some(char::try_from(r).unwrap())
791 } else if S_BASE <= l
792 && l <= (S_BASE + S_COUNT - T_COUNT)
793 && T_BASE <= v
794 && v < (T_BASE + T_COUNT)
795 && (l - S_BASE) % T_COUNT == 0
796 {
797 let r: u32 = l + (v - T_BASE);
798 Some(char::try_from(r).unwrap())
799 } else {
800 None
801 }
802}
803
804pub fn decompose(ab: char) -> Option<(char, char)> {
805 if let Some(ab: (char, char)) = decompose_hangul(ab) {
806 return Some(ab);
807 }
808
809 superResult<(char, char), usize>::unicode_norm::DECOMPOSITION_TABLE
810 .binary_search_by(|item| item.0.cmp(&ab))
811 .map(|idx: usize| {
812 let chars: &(char, char, Option) = &super::unicode_norm::DECOMPOSITION_TABLE[idx];
813 (chars.1, chars.2.unwrap_or(default:'\0'))
814 })
815 .ok()
816}
817
818pub fn decompose_hangul(ab: char) -> Option<(char, char)> {
819 let si: u32 = u32::from(ab).wrapping_sub(S_BASE);
820 if si >= S_COUNT {
821 return None;
822 }
823
824 let (a: u32, b: u32) = if si % T_COUNT != 0 {
825 // LV,T
826 (S_BASE + (si / T_COUNT) * T_COUNT, T_BASE + (si % T_COUNT))
827 } else {
828 // L,V
829 (L_BASE + (si / N_COUNT), V_BASE + (si % N_COUNT) / T_COUNT)
830 };
831
832 Some((char::try_from(a).unwrap(), char::try_from(b).unwrap()))
833}
834
835#[cfg(test)]
836mod tests {
837 #[test]
838 fn check_unicode_version() {
839 assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (16, 0, 0));
840 assert_eq!(unicode_ccc::UNICODE_VERSION, (16, 0, 0));
841 assert_eq!(unicode_properties::UNICODE_VERSION, (16, 0, 0));
842 assert_eq!(unicode_script::UNICODE_VERSION, (16, 0, 0));
843 assert_eq!(crate::hb::unicode_norm::UNICODE_VERSION, (16, 0, 0));
844 }
845}
846
847// TODO: remove
848pub mod hb_gc {
849 pub const RB_UNICODE_GENERAL_CATEGORY_CONTROL: u32 = 0;
850 pub const RB_UNICODE_GENERAL_CATEGORY_FORMAT: u32 = 1;
851 pub const RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED: u32 = 2;
852 pub const RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE: u32 = 3;
853 pub const RB_UNICODE_GENERAL_CATEGORY_SURROGATE: u32 = 4;
854 pub const RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER: u32 = 5;
855 pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER: u32 = 6;
856 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER: u32 = 7;
857 pub const RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER: u32 = 8;
858 pub const RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER: u32 = 9;
859 pub const RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK: u32 = 10;
860 pub const RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK: u32 = 11;
861 pub const RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK: u32 = 12;
862 pub const RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER: u32 = 13;
863 pub const RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER: u32 = 14;
864 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER: u32 = 15;
865 pub const RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: u32 = 16;
866 pub const RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION: u32 = 17;
867 pub const RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION: u32 = 18;
868 pub const RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION: u32 = 19;
869 pub const RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: u32 = 20;
870 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION: u32 = 21;
871 pub const RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION: u32 = 22;
872 pub const RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL: u32 = 23;
873 pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL: u32 = 24;
874 pub const RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL: u32 = 25;
875 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL: u32 = 26;
876 pub const RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR: u32 = 27;
877 pub const RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR: u32 = 28;
878 pub const RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR: u32 = 29;
879}
880