1use core::convert::TryFrom;
2
3pub use unicode_ccc::CanonicalCombiningClass;
4pub use unicode_properties::GeneralCategory; // TODO: prefer unic-ucd-normal::CanonicalCombiningClass
5
6use crate::Script;
7
8// Space estimates based on:
9// https://unicode.org/charts/PDF/U2000.pdf
10// https://docs.microsoft.com/en-us/typography/develop/character-design-standards/whitespace
11pub type Space = u8;
12pub mod space {
13 pub const SPACE_EM: u8 = 1;
14 pub const SPACE_EM_2: u8 = 2;
15 pub const SPACE_EM_3: u8 = 3;
16 pub const SPACE_EM_4: u8 = 4;
17 pub const SPACE_EM_5: u8 = 5;
18 pub const SPACE_EM_6: u8 = 6;
19 pub const SPACE_EM_16: u8 = 16;
20 pub const SPACE_4_EM_18: u8 = 17; // 4/18th of an EM!
21 pub const SPACE: u8 = 18;
22 pub const SPACE_FIGURE: u8 = 19;
23 pub const SPACE_PUNCTUATION: u8 = 20;
24 pub const SPACE_NARROW: u8 = 21;
25}
26
27#[allow(dead_code)]
28pub mod modified_combining_class {
29 // Hebrew
30 //
31 // We permute the "fixed-position" classes 10-26 into the order
32 // described in the SBL Hebrew manual:
33 //
34 // https://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf
35 //
36 // (as recommended by:
37 // https://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering/msg22823/)
38 //
39 // More details here:
40 // https://bugzilla.mozilla.org/show_bug.cgi?id=662055
41 pub const CCC10: u8 = 22; // sheva
42 pub const CCC11: u8 = 15; // hataf segol
43 pub const CCC12: u8 = 16; // hataf patah
44 pub const CCC13: u8 = 17; // hataf qamats
45 pub const CCC14: u8 = 23; // hiriq
46 pub const CCC15: u8 = 18; // tsere
47 pub const CCC16: u8 = 19; // segol
48 pub const CCC17: u8 = 20; // patah
49 pub const CCC18: u8 = 21; // qamats & qamats qatan
50 pub const CCC19: u8 = 14; // holam & holam haser for vav
51 pub const CCC20: u8 = 24; // qubuts
52 pub const CCC21: u8 = 12; // dagesh
53 pub const CCC22: u8 = 25; // meteg
54 pub const CCC23: u8 = 13; // rafe
55 pub const CCC24: u8 = 10; // shin dot
56 pub const CCC25: u8 = 11; // sin dot
57 pub const CCC26: u8 = 26; // point varika
58
59 // Arabic
60 //
61 // Modify to move Shadda (ccc=33) before other marks. See:
62 // https://unicode.org/faq/normalization.html#8
63 // https://unicode.org/faq/normalization.html#9
64 pub const CCC27: u8 = 28; // fathatan
65 pub const CCC28: u8 = 29; // dammatan
66 pub const CCC29: u8 = 30; // kasratan
67 pub const CCC30: u8 = 31; // fatha
68 pub const CCC31: u8 = 32; // damma
69 pub const CCC32: u8 = 33; // kasra
70 pub const CCC33: u8 = 27; // shadda
71 pub const CCC34: u8 = 34; // sukun
72 pub const CCC35: u8 = 35; // superscript alef
73
74 // Syriac
75 pub const CCC36: u8 = 36; // superscript alaph
76
77 // Telugu
78 //
79 // Modify Telugu length marks (ccc=84, ccc=91).
80 // These are the only matras in the main Indic scripts range that have
81 // a non-zero ccc. That makes them reorder with the Halant that is
82 // ccc=9. Just zero them, we don't need them in our Indic shaper.
83 pub const CCC84: u8 = 0; // length mark
84 pub const CCC91: u8 = 0; // ai length mark
85
86 // Thai
87 //
88 // Modify U+0E38 and U+0E39 (ccc=103) to be reordered before U+0E3A (ccc=9).
89 // Assign 3, which is unassigned otherwise.
90 // Uniscribe does this reordering too.
91 pub const CCC103: u8 = 3; // sara u / sara uu
92 pub const CCC107: u8 = 107; // mai *
93
94 // Lao
95 pub const CCC118: u8 = 118; // sign u / sign uu
96 pub const CCC122: u8 = 122; // mai *
97
98 // Tibetan
99 //
100 // In case of multiple vowel-signs, use u first (but after achung)
101 // this allows Dzongkha multi-vowel shortcuts to render correctly
102 pub const CCC129: u8 = 129; // sign aa
103 pub const CCC130: u8 = 132; // sign i
104 pub const CCC132: u8 = 131; // sign u
105}
106
107#[rustfmt::skip]
108const MODIFIED_COMBINING_CLASS: &[u8; 256] = &[
109 CanonicalCombiningClass::NotReordered as u8,
110 CanonicalCombiningClass::Overlay as u8,
111 2, 3, 4, 5, 6,
112 CanonicalCombiningClass::Nukta as u8,
113 CanonicalCombiningClass::KanaVoicing as u8,
114 CanonicalCombiningClass::Virama as u8,
115
116 // Hebrew
117 modified_combining_class::CCC10,
118 modified_combining_class::CCC11,
119 modified_combining_class::CCC12,
120 modified_combining_class::CCC13,
121 modified_combining_class::CCC14,
122 modified_combining_class::CCC15,
123 modified_combining_class::CCC16,
124 modified_combining_class::CCC17,
125 modified_combining_class::CCC18,
126 modified_combining_class::CCC19,
127 modified_combining_class::CCC20,
128 modified_combining_class::CCC21,
129 modified_combining_class::CCC22,
130 modified_combining_class::CCC23,
131 modified_combining_class::CCC24,
132 modified_combining_class::CCC25,
133 modified_combining_class::CCC26,
134
135 // Arabic
136 modified_combining_class::CCC27,
137 modified_combining_class::CCC28,
138 modified_combining_class::CCC29,
139 modified_combining_class::CCC30,
140 modified_combining_class::CCC31,
141 modified_combining_class::CCC32,
142 modified_combining_class::CCC33,
143 modified_combining_class::CCC34,
144 modified_combining_class::CCC35,
145
146 // Syriac
147 modified_combining_class::CCC36,
148
149 37, 38, 39,
150 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
151 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
152 80, 81, 82, 83,
153
154 // Telugu
155 modified_combining_class::CCC84,
156 85, 86, 87, 88, 89, 90,
157 modified_combining_class::CCC91,
158 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
159
160 // Thai
161 modified_combining_class::CCC103,
162 104, 105, 106,
163 modified_combining_class::CCC107,
164 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
165
166 // Lao
167 modified_combining_class::CCC118,
168 119, 120, 121,
169 modified_combining_class::CCC122,
170 123, 124, 125, 126, 127, 128,
171
172 // Tibetan
173 modified_combining_class::CCC129,
174 modified_combining_class::CCC130,
175 131,
176 modified_combining_class::CCC132,
177 133, 134, 135, 136, 137, 138, 139,
178
179
180 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
181 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
182 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
183 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
184 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
185 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
186
187 CanonicalCombiningClass::AttachedBelowLeft as u8,
188 201,
189 CanonicalCombiningClass::AttachedBelow as u8,
190 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
191 CanonicalCombiningClass::AttachedAbove as u8,
192 215,
193 CanonicalCombiningClass::AttachedAboveRight as u8,
194 217,
195 CanonicalCombiningClass::BelowLeft as u8,
196 219,
197 CanonicalCombiningClass::Below as u8,
198 221,
199 CanonicalCombiningClass::BelowRight as u8,
200 223,
201 CanonicalCombiningClass::Left as u8,
202 225,
203 CanonicalCombiningClass::Right as u8,
204 227,
205 CanonicalCombiningClass::AboveLeft as u8,
206 229,
207 CanonicalCombiningClass::Above as u8,
208 231,
209 CanonicalCombiningClass::AboveRight as u8,
210 CanonicalCombiningClass::DoubleBelow as u8,
211 CanonicalCombiningClass::DoubleAbove as u8,
212 235, 236, 237, 238, 239,
213 CanonicalCombiningClass::IotaSubscript as u8,
214 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
215 255, // RB_UNICODE_COMBINING_CLASS_INVALID
216];
217
218pub trait GeneralCategoryExt {
219 fn to_rb(&self) -> u32;
220 fn from_rb(gc: u32) -> Self;
221 fn is_mark(&self) -> bool;
222 fn is_letter(&self) -> bool;
223}
224
225#[rustfmt::skip]
226impl GeneralCategoryExt for GeneralCategory {
227 fn to_rb(&self) -> u32 {
228 match *self {
229 GeneralCategory::ClosePunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,
230 GeneralCategory::ConnectorPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,
231 GeneralCategory::Control => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL,
232 GeneralCategory::CurrencySymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,
233 GeneralCategory::DashPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,
234 GeneralCategory::DecimalNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,
235 GeneralCategory::EnclosingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,
236 GeneralCategory::FinalPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,
237 GeneralCategory::Format => hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
238 GeneralCategory::InitialPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,
239 GeneralCategory::LetterNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,
240 GeneralCategory::LineSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,
241 GeneralCategory::LowercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,
242 GeneralCategory::MathSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,
243 GeneralCategory::ModifierLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,
244 GeneralCategory::ModifierSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,
245 GeneralCategory::NonspacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
246 GeneralCategory::OpenPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,
247 GeneralCategory::OtherLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,
248 GeneralCategory::OtherNumber => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,
249 GeneralCategory::OtherPunctuation => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,
250 GeneralCategory::OtherSymbol => hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,
251 GeneralCategory::ParagraphSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,
252 GeneralCategory::PrivateUse => hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,
253 GeneralCategory::SpaceSeparator => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR,
254 GeneralCategory::SpacingMark => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,
255 GeneralCategory::Surrogate => hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE,
256 GeneralCategory::TitlecaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,
257 GeneralCategory::Unassigned => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,
258 GeneralCategory::UppercaseLetter => hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,
259 }
260 }
261
262 fn from_rb(gc: u32) -> Self {
263 match gc {
264 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION => GeneralCategory::ClosePunctuation,
265 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION => GeneralCategory::ConnectorPunctuation,
266 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CONTROL => GeneralCategory::Control,
267 hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL => GeneralCategory::CurrencySymbol,
268 hb_gc::RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION => GeneralCategory::DashPunctuation,
269 hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER => GeneralCategory::DecimalNumber,
270 hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK => GeneralCategory::EnclosingMark,
271 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION => GeneralCategory::FinalPunctuation,
272 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT => GeneralCategory::Format,
273 hb_gc::RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION => GeneralCategory::InitialPunctuation,
274 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER => GeneralCategory::LetterNumber,
275 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR => GeneralCategory::LineSeparator,
276 hb_gc::RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER => GeneralCategory::LowercaseLetter,
277 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL => GeneralCategory::MathSymbol,
278 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER => GeneralCategory::ModifierLetter,
279 hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL => GeneralCategory::ModifierSymbol,
280 hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK => GeneralCategory::NonspacingMark,
281 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION => GeneralCategory::OpenPunctuation,
282 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER => GeneralCategory::OtherLetter,
283 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER => GeneralCategory::OtherNumber,
284 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION => GeneralCategory::OtherPunctuation,
285 hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL => GeneralCategory::OtherSymbol,
286 hb_gc::RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR => GeneralCategory::ParagraphSeparator,
287 hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE => GeneralCategory::PrivateUse,
288 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR => GeneralCategory::SpaceSeparator,
289 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK => GeneralCategory::SpacingMark,
290 hb_gc::RB_UNICODE_GENERAL_CATEGORY_SURROGATE => GeneralCategory::Surrogate,
291 hb_gc::RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER => GeneralCategory::TitlecaseLetter,
292 hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED => GeneralCategory::Unassigned,
293 hb_gc::RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER => GeneralCategory::UppercaseLetter,
294 _ => unreachable!(),
295 }
296 }
297
298 fn is_mark(&self) -> bool {
299 match *self {
300 GeneralCategory::SpacingMark |
301 GeneralCategory::EnclosingMark |
302 GeneralCategory::NonspacingMark => true,
303 _ => false,
304 }
305 }
306
307 fn is_letter(&self) -> bool {
308 match *self {
309 GeneralCategory::LowercaseLetter |
310 GeneralCategory::ModifierLetter |
311 GeneralCategory::OtherLetter |
312 GeneralCategory::TitlecaseLetter |
313 GeneralCategory::UppercaseLetter => true,
314 _ => false,
315 }
316 }
317}
318
319pub trait CharExt {
320 fn script(self) -> Script;
321 fn general_category(self) -> GeneralCategory;
322 fn combining_class(self) -> CanonicalCombiningClass;
323 fn space_fallback(self) -> Option<Space>;
324 fn modified_combining_class(self) -> u8;
325 fn mirrored(self) -> Option<char>;
326 fn is_emoji_extended_pictographic(self) -> bool;
327 fn is_default_ignorable(self) -> bool;
328 fn is_variation_selector(self) -> bool;
329 fn vertical(self) -> Option<char>;
330}
331
332impl CharExt for char {
333 fn script(self) -> Script {
334 use crate::script;
335 use unicode_script as us;
336
337 match unicode_script::UnicodeScript::script(&self) {
338 us::Script::Common => script::COMMON,
339 us::Script::Inherited => script::INHERITED,
340 us::Script::Adlam => script::ADLAM,
341 us::Script::Ahom => script::AHOM,
342 us::Script::Anatolian_Hieroglyphs => script::ANATOLIAN_HIEROGLYPHS,
343 us::Script::Arabic => script::ARABIC,
344 us::Script::Armenian => script::ARMENIAN,
345 us::Script::Avestan => script::AVESTAN,
346 us::Script::Balinese => script::BALINESE,
347 us::Script::Bamum => script::BAMUM,
348 us::Script::Bassa_Vah => script::BASSA_VAH,
349 us::Script::Batak => script::BATAK,
350 us::Script::Bengali => script::BENGALI,
351 us::Script::Bhaiksuki => script::BHAIKSUKI,
352 us::Script::Bopomofo => script::BOPOMOFO,
353 us::Script::Brahmi => script::BRAHMI,
354 us::Script::Braille => script::BRAILLE,
355 us::Script::Buginese => script::BUGINESE,
356 us::Script::Buhid => script::BUHID,
357 us::Script::Canadian_Aboriginal => script::CANADIAN_SYLLABICS,
358 us::Script::Carian => script::CARIAN,
359 us::Script::Caucasian_Albanian => script::CAUCASIAN_ALBANIAN,
360 us::Script::Chakma => script::CHAKMA,
361 us::Script::Cham => script::CHAM,
362 us::Script::Cherokee => script::CHEROKEE,
363 us::Script::Chorasmian => script::CHORASMIAN,
364 us::Script::Coptic => script::COPTIC,
365 us::Script::Cuneiform => script::CUNEIFORM,
366 us::Script::Cypriot => script::CYPRIOT,
367 us::Script::Cyrillic => script::CYRILLIC,
368 us::Script::Deseret => script::DESERET,
369 us::Script::Devanagari => script::DEVANAGARI,
370 us::Script::Dives_Akuru => script::DIVES_AKURU,
371 us::Script::Dogra => script::DOGRA,
372 us::Script::Duployan => script::DUPLOYAN,
373 us::Script::Egyptian_Hieroglyphs => script::EGYPTIAN_HIEROGLYPHS,
374 us::Script::Elbasan => script::ELBASAN,
375 us::Script::Elymaic => script::ELYMAIC,
376 us::Script::Ethiopic => script::ETHIOPIC,
377 us::Script::Georgian => script::GEORGIAN,
378 us::Script::Glagolitic => script::GLAGOLITIC,
379 us::Script::Gothic => script::GOTHIC,
380 us::Script::Grantha => script::GRANTHA,
381 us::Script::Greek => script::GREEK,
382 us::Script::Gujarati => script::GUJARATI,
383 us::Script::Gunjala_Gondi => script::GUNJALA_GONDI,
384 us::Script::Gurmukhi => script::GURMUKHI,
385 us::Script::Han => script::HAN,
386 us::Script::Hangul => script::HANGUL,
387 us::Script::Hanifi_Rohingya => script::HANIFI_ROHINGYA,
388 us::Script::Hanunoo => script::HANUNOO,
389 us::Script::Hatran => script::HATRAN,
390 us::Script::Hebrew => script::HEBREW,
391 us::Script::Hiragana => script::HIRAGANA,
392 us::Script::Imperial_Aramaic => script::IMPERIAL_ARAMAIC,
393 us::Script::Inscriptional_Pahlavi => script::INSCRIPTIONAL_PAHLAVI,
394 us::Script::Inscriptional_Parthian => script::INSCRIPTIONAL_PARTHIAN,
395 us::Script::Javanese => script::JAVANESE,
396 us::Script::Kaithi => script::KAITHI,
397 us::Script::Kannada => script::KANNADA,
398 us::Script::Katakana => script::KATAKANA,
399 us::Script::Kayah_Li => script::KAYAH_LI,
400 us::Script::Kharoshthi => script::KHAROSHTHI,
401 us::Script::Khitan_Small_Script => script::KHITAN_SMALL_SCRIPT,
402 us::Script::Khmer => script::KHMER,
403 us::Script::Khojki => script::KHOJKI,
404 us::Script::Khudawadi => script::KHUDAWADI,
405 us::Script::Lao => script::LAO,
406 us::Script::Latin => script::LATIN,
407 us::Script::Lepcha => script::LEPCHA,
408 us::Script::Limbu => script::LIMBU,
409 us::Script::Linear_A => script::LINEAR_A,
410 us::Script::Linear_B => script::LINEAR_B,
411 us::Script::Lisu => script::LISU,
412 us::Script::Lycian => script::LYCIAN,
413 us::Script::Lydian => script::LYDIAN,
414 us::Script::Mahajani => script::MAHAJANI,
415 us::Script::Makasar => script::MAKASAR,
416 us::Script::Malayalam => script::MALAYALAM,
417 us::Script::Mandaic => script::MANDAIC,
418 us::Script::Manichaean => script::MANICHAEAN,
419 us::Script::Marchen => script::MARCHEN,
420 us::Script::Masaram_Gondi => script::MASARAM_GONDI,
421 us::Script::Medefaidrin => script::MEDEFAIDRIN,
422 us::Script::Meetei_Mayek => script::MEETEI_MAYEK,
423 us::Script::Mende_Kikakui => script::MENDE_KIKAKUI,
424 us::Script::Meroitic_Cursive => script::MEROITIC_CURSIVE,
425 us::Script::Meroitic_Hieroglyphs => script::MEROITIC_HIEROGLYPHS,
426 us::Script::Miao => script::MIAO,
427 us::Script::Modi => script::MODI,
428 us::Script::Mongolian => script::MONGOLIAN,
429 us::Script::Mro => script::MRO,
430 us::Script::Multani => script::MULTANI,
431 us::Script::Myanmar => script::MYANMAR,
432 us::Script::Nabataean => script::NABATAEAN,
433 us::Script::Nandinagari => script::NANDINAGARI,
434 us::Script::New_Tai_Lue => script::NEW_TAI_LUE,
435 us::Script::Newa => script::NEWA,
436 us::Script::Nko => script::NKO,
437 us::Script::Nushu => script::NUSHU,
438 us::Script::Nyiakeng_Puachue_Hmong => script::NYIAKENG_PUACHUE_HMONG,
439 us::Script::Ogham => script::OGHAM,
440 us::Script::Ol_Chiki => script::OL_CHIKI,
441 us::Script::Old_Hungarian => script::OLD_HUNGARIAN,
442 us::Script::Old_Italic => script::OLD_ITALIC,
443 us::Script::Old_North_Arabian => script::OLD_NORTH_ARABIAN,
444 us::Script::Old_Permic => script::OLD_PERMIC,
445 us::Script::Old_Persian => script::OLD_PERSIAN,
446 us::Script::Old_Sogdian => script::OLD_SOGDIAN,
447 us::Script::Old_South_Arabian => script::OLD_SOUTH_ARABIAN,
448 us::Script::Old_Turkic => script::OLD_TURKIC,
449 us::Script::Oriya => script::ORIYA,
450 us::Script::Osage => script::OSAGE,
451 us::Script::Osmanya => script::OSMANYA,
452 us::Script::Pahawh_Hmong => script::PAHAWH_HMONG,
453 us::Script::Palmyrene => script::PALMYRENE,
454 us::Script::Pau_Cin_Hau => script::PAU_CIN_HAU,
455 us::Script::Phags_Pa => script::PHAGS_PA,
456 us::Script::Phoenician => script::PHOENICIAN,
457 us::Script::Psalter_Pahlavi => script::PSALTER_PAHLAVI,
458 us::Script::Rejang => script::REJANG,
459 us::Script::Runic => script::RUNIC,
460 us::Script::Samaritan => script::SAMARITAN,
461 us::Script::Saurashtra => script::SAURASHTRA,
462 us::Script::Sharada => script::SHARADA,
463 us::Script::Shavian => script::SHAVIAN,
464 us::Script::Siddham => script::SIDDHAM,
465 us::Script::SignWriting => script::SIGNWRITING,
466 us::Script::Sinhala => script::SINHALA,
467 us::Script::Sogdian => script::SOGDIAN,
468 us::Script::Sora_Sompeng => script::SORA_SOMPENG,
469 us::Script::Soyombo => script::SOYOMBO,
470 us::Script::Sundanese => script::SUNDANESE,
471 us::Script::Syloti_Nagri => script::SYLOTI_NAGRI,
472 us::Script::Syriac => script::SYRIAC,
473 us::Script::Tagalog => script::TAGALOG,
474 us::Script::Tagbanwa => script::TAGBANWA,
475 us::Script::Tai_Le => script::TAI_LE,
476 us::Script::Tai_Tham => script::TAI_THAM,
477 us::Script::Tai_Viet => script::TAI_VIET,
478 us::Script::Takri => script::TAKRI,
479 us::Script::Tamil => script::TAMIL,
480 us::Script::Tangut => script::TANGUT,
481 us::Script::Telugu => script::TELUGU,
482 us::Script::Thaana => script::THAANA,
483 us::Script::Thai => script::THAI,
484 us::Script::Tibetan => script::TIBETAN,
485 us::Script::Tifinagh => script::TIFINAGH,
486 us::Script::Tirhuta => script::TIRHUTA,
487 us::Script::Ugaritic => script::UGARITIC,
488 us::Script::Vai => script::VAI,
489 us::Script::Wancho => script::WANCHO,
490 us::Script::Warang_Citi => script::WARANG_CITI,
491 us::Script::Yezidi => script::YEZIDI,
492 us::Script::Yi => script::YI,
493 us::Script::Zanabazar_Square => script::ZANABAZAR_SQUARE,
494 _ => script::UNKNOWN,
495 }
496 }
497
498 fn general_category(self) -> GeneralCategory {
499 unicode_properties::general_category::UnicodeGeneralCategory::general_category(self)
500 }
501
502 fn combining_class(self) -> CanonicalCombiningClass {
503 unicode_ccc::get_canonical_combining_class(self)
504 }
505
506 fn space_fallback(self) -> Option<Space> {
507 // All GC=Zs chars that can use a fallback.
508 match self {
509 '\u{0020}' => Some(space::SPACE), // SPACE
510 '\u{00A0}' => Some(space::SPACE), // NO-BREAK SPACE
511 '\u{2000}' => Some(space::SPACE_EM_2), // EN QUAD
512 '\u{2001}' => Some(space::SPACE_EM), // EM QUAD
513 '\u{2002}' => Some(space::SPACE_EM_2), // EN SPACE
514 '\u{2003}' => Some(space::SPACE_EM), // EM SPACE
515 '\u{2004}' => Some(space::SPACE_EM_3), // THREE-PER-EM SPACE
516 '\u{2005}' => Some(space::SPACE_EM_4), // FOUR-PER-EM SPACE
517 '\u{2006}' => Some(space::SPACE_EM_6), // SIX-PER-EM SPACE
518 '\u{2007}' => Some(space::SPACE_FIGURE), // FIGURE SPACE
519 '\u{2008}' => Some(space::SPACE_PUNCTUATION), // PUNCTUATION SPACE
520 '\u{2009}' => Some(space::SPACE_EM_5), // THIN SPACE
521 '\u{200A}' => Some(space::SPACE_EM_16), // HAIR SPACE
522 '\u{202F}' => Some(space::SPACE_NARROW), // NARROW NO-BREAK SPACE
523 '\u{205F}' => Some(space::SPACE_4_EM_18), // MEDIUM MATHEMATICAL SPACE
524 '\u{3000}' => Some(space::SPACE_EM), // IDEOGRAPHIC SPACE
525 _ => None, // OGHAM SPACE MARK
526 }
527 }
528
529 fn modified_combining_class(self) -> u8 {
530 let mut u = self;
531
532 // XXX This hack belongs to the Myanmar shaper.
533 if u == '\u{1037}' {
534 u = '\u{103A}';
535 }
536
537 // XXX This hack belongs to the USE shaper (for Tai Tham):
538 // Reorder SAKOT to ensure it comes after any tone marks.
539 if u == '\u{1A60}' {
540 return 254;
541 }
542
543 // XXX This hack belongs to the Tibetan shaper:
544 // Reorder PADMA to ensure it comes after any vowel marks.
545 if u == '\u{0FC6}' {
546 return 254;
547 }
548
549 // Reorder TSA -PHRU to reorder before U+0F74
550 if u == '\u{0F39}' {
551 return 127;
552 }
553
554 let k = unicode_ccc::get_canonical_combining_class(u);
555 MODIFIED_COMBINING_CLASS[k as usize]
556 }
557
558 fn mirrored(self) -> Option<char> {
559 unicode_bidi_mirroring::get_mirrored(self)
560 }
561
562 fn is_emoji_extended_pictographic(self) -> bool {
563 // Generated by scripts/gen-unicode-is-emoji-ext-pict.py
564 match self as u32 {
565 0x00A9 => true,
566 0x00AE => true,
567 0x203C => true,
568 0x2049 => true,
569 0x2122 => true,
570 0x2139 => true,
571 0x2194..=0x2199 => true,
572 0x21A9..=0x21AA => true,
573 0x231A..=0x231B => true,
574 0x2328 => true,
575 0x2388 => true,
576 0x23CF => true,
577 0x23E9..=0x23F3 => true,
578 0x23F8..=0x23FA => true,
579 0x24C2 => true,
580 0x25AA..=0x25AB => true,
581 0x25B6 => true,
582 0x25C0 => true,
583 0x25FB..=0x25FE => true,
584 0x2600..=0x2605 => true,
585 0x2607..=0x2612 => true,
586 0x2614..=0x2685 => true,
587 0x2690..=0x2705 => true,
588 0x2708..=0x2712 => true,
589 0x2714 => true,
590 0x2716 => true,
591 0x271D => true,
592 0x2721 => true,
593 0x2728 => true,
594 0x2733..=0x2734 => true,
595 0x2744 => true,
596 0x2747 => true,
597 0x274C => true,
598 0x274E => true,
599 0x2753..=0x2755 => true,
600 0x2757 => true,
601 0x2763..=0x2767 => true,
602 0x2795..=0x2797 => true,
603 0x27A1 => true,
604 0x27B0 => true,
605 0x27BF => true,
606 0x2934..=0x2935 => true,
607 0x2B05..=0x2B07 => true,
608 0x2B1B..=0x2B1C => true,
609 0x2B50 => true,
610 0x2B55 => true,
611 0x3030 => true,
612 0x303D => true,
613 0x3297 => true,
614 0x3299 => true,
615 0x1F000..=0x1F0FF => true,
616 0x1F10D..=0x1F10F => true,
617 0x1F12F => true,
618 0x1F16C..=0x1F171 => true,
619 0x1F17E..=0x1F17F => true,
620 0x1F18E => true,
621 0x1F191..=0x1F19A => true,
622 0x1F1AD..=0x1F1E5 => true,
623 0x1F201..=0x1F20F => true,
624 0x1F21A => true,
625 0x1F22F => true,
626 0x1F232..=0x1F23A => true,
627 0x1F23C..=0x1F23F => true,
628 0x1F249..=0x1F3FA => true,
629 0x1F400..=0x1F53D => true,
630 0x1F546..=0x1F64F => true,
631 0x1F680..=0x1F6FF => true,
632 0x1F774..=0x1F77F => true,
633 0x1F7D5..=0x1F7FF => true,
634 0x1F80C..=0x1F80F => true,
635 0x1F848..=0x1F84F => true,
636 0x1F85A..=0x1F85F => true,
637 0x1F888..=0x1F88F => true,
638 0x1F8AE..=0x1F8FF => true,
639 0x1F90C..=0x1F93A => true,
640 0x1F93C..=0x1F945 => true,
641 0x1F947..=0x1FFFD => true,
642 _ => false,
643 }
644 }
645
646 /// Default_Ignorable codepoints:
647 ///
648 /// Note: While U+115F, U+1160, U+3164 and U+FFA0 are Default_Ignorable,
649 /// we do NOT want to hide them, as the way Uniscribe has implemented them
650 /// is with regular spacing glyphs, and that's the way fonts are made to work.
651 /// As such, we make exceptions for those four.
652 /// Also ignoring U+1BCA0..1BCA3. https://github.com/harfbuzz/harfbuzz/issues/503
653 ///
654 /// Unicode 14.0:
655 /// $ grep '; Default_Ignorable_Code_Point ' DerivedCoreProperties.txt | sed 's/;.*#/#/'
656 /// 00AD # Cf SOFT HYPHEN
657 /// 034F # Mn COMBINING GRAPHEME JOINER
658 /// 061C # Cf ARABIC LETTER MARK
659 /// 115F..1160 # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
660 /// 17B4..17B5 # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
661 /// 180B..180D # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
662 /// 180E # Cf MONGOLIAN VOWEL SEPARATOR
663 /// 180F # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR
664 /// 200B..200F # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
665 /// 202A..202E # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
666 /// 2060..2064 # Cf [5] WORD JOINER..INVISIBLE PLUS
667 /// 2065 # Cn <reserved-2065>
668 /// 2066..206F # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
669 /// 3164 # Lo HANGUL FILLER
670 /// FE00..FE0F # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
671 /// FEFF # Cf ZERO WIDTH NO-BREAK SPACE
672 /// FFA0 # Lo HALFWIDTH HANGUL FILLER
673 /// FFF0..FFF8 # Cn [9] <reserved-FFF0>..<reserved-FFF8>
674 /// 1BCA0..1BCA3 # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
675 /// 1D173..1D17A # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
676 /// E0000 # Cn <reserved-E0000>
677 /// E0001 # Cf LANGUAGE TAG
678 /// E0002..E001F # Cn [30] <reserved-E0002>..<reserved-E001F>
679 /// E0020..E007F # Cf [96] TAG SPACE..CANCEL TAG
680 /// E0080..E00FF # Cn [128] <reserved-E0080>..<reserved-E00FF>
681 /// E0100..E01EF # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
682 /// E01F0..E0FFF # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
683 fn is_default_ignorable(self) -> bool {
684 let ch = u32::from(self);
685 let plane = ch >> 16;
686 if plane == 0 {
687 // BMP
688 let page = ch >> 8;
689 match page {
690 0x00 => ch == 0x00AD,
691 0x03 => ch == 0x034F,
692 0x06 => ch == 0x061C,
693 0x17 => (0x17B4..=0x17B5).contains(&ch),
694 0x18 => (0x180B..=0x180E).contains(&ch),
695 0x20 => {
696 (0x200B..=0x200F).contains(&ch)
697 || (0x202A..=0x202E).contains(&ch)
698 || (0x2060..=0x206F).contains(&ch)
699 }
700 0xFE => (0xFE00..=0xFE0F).contains(&ch) || ch == 0xFEFF,
701 0xFF => (0xFFF0..=0xFFF8).contains(&ch),
702 _ => false,
703 }
704 } else {
705 // Other planes
706 match plane {
707 0x01 => (0x1D173..=0x1D17A).contains(&ch),
708 0x0E => (0xE0000..=0xE0FFF).contains(&ch),
709 _ => false,
710 }
711 }
712 }
713
714 fn is_variation_selector(self) -> bool {
715 // U+180B..180D, U+180F MONGOLIAN FREE VARIATION SELECTORs are handled in the
716 //Arabic shaper. No need to match them here.
717 let ch = u32::from(self);
718 (0x0FE00..=0x0FE0F).contains(&ch) || // VARIATION SELECTOR - 1..16
719 (0xE0100..=0xE01EF).contains(&ch) // VARIATION SELECTOR - 17..256
720 }
721
722 fn vertical(self) -> Option<char> {
723 Some(match u32::from(self) >> 8 {
724 0x20 => match self {
725 '\u{2013}' => '\u{fe32}', // EN DASH
726 '\u{2014}' => '\u{fe31}', // EM DASH
727 '\u{2025}' => '\u{fe30}', // TWO DOT LEADER
728 '\u{2026}' => '\u{fe19}', // HORIZONTAL ELLIPSIS
729 _ => return None,
730 },
731 0x30 => match self {
732 '\u{3001}' => '\u{fe11}', // IDEOGRAPHIC COMMA
733 '\u{3002}' => '\u{fe12}', // IDEOGRAPHIC FULL STOP
734 '\u{3008}' => '\u{fe3f}', // LEFT ANGLE BRACKET
735 '\u{3009}' => '\u{fe40}', // RIGHT ANGLE BRACKET
736 '\u{300a}' => '\u{fe3d}', // LEFT DOUBLE ANGLE BRACKET
737 '\u{300b}' => '\u{fe3e}', // RIGHT DOUBLE ANGLE BRACKET
738 '\u{300c}' => '\u{fe41}', // LEFT CORNER BRACKET
739 '\u{300d}' => '\u{fe42}', // RIGHT CORNER BRACKET
740 '\u{300e}' => '\u{fe43}', // LEFT WHITE CORNER BRACKET
741 '\u{300f}' => '\u{fe44}', // RIGHT WHITE CORNER BRACKET
742 '\u{3010}' => '\u{fe3b}', // LEFT BLACK LENTICULAR BRACKET
743 '\u{3011}' => '\u{fe3c}', // RIGHT BLACK LENTICULAR BRACKET
744 '\u{3014}' => '\u{fe39}', // LEFT TORTOISE SHELL BRACKET
745 '\u{3015}' => '\u{fe3a}', // RIGHT TORTOISE SHELL BRACKET
746 '\u{3016}' => '\u{fe17}', // LEFT WHITE LENTICULAR BRACKET
747 '\u{3017}' => '\u{fe18}', // RIGHT WHITE LENTICULAR BRACKET
748 _ => return None,
749 },
750 0xfe => match self {
751 '\u{fe4f}' => '\u{fe34}', // WAVY LOW LINE
752 _ => return None,
753 },
754 0xff => match self {
755 '\u{ff01}' => '\u{fe15}', // FULLWIDTH EXCLAMATION MARK
756 '\u{ff08}' => '\u{fe35}', // FULLWIDTH LEFT PARENTHESIS
757 '\u{ff09}' => '\u{fe36}', // FULLWIDTH RIGHT PARENTHESIS
758 '\u{ff0c}' => '\u{fe10}', // FULLWIDTH COMMA
759 '\u{ff1a}' => '\u{fe13}', // FULLWIDTH COLON
760 '\u{ff1b}' => '\u{fe14}', // FULLWIDTH SEMICOLON
761 '\u{ff1f}' => '\u{fe16}', // FULLWIDTH QUESTION MARK
762 '\u{ff3b}' => '\u{fe47}', // FULLWIDTH LEFT SQUARE BRACKET
763 '\u{ff3d}' => '\u{fe48}', // FULLWIDTH RIGHT SQUARE BRACKET
764 '\u{ff3f}' => '\u{fe33}', // FULLWIDTH LOW LINE
765 '\u{ff5b}' => '\u{fe37}', // FULLWIDTH LEFT CURLY BRACKET
766 '\u{ff5d}' => '\u{fe38}', // FULLWIDTH RIGHT CURLY BRACKET
767 _ => return None,
768 },
769 _ => return None,
770 })
771 }
772}
773
774const S_BASE: u32 = 0xAC00;
775const L_BASE: u32 = 0x1100;
776const V_BASE: u32 = 0x1161;
777const T_BASE: u32 = 0x11A7;
778const L_COUNT: u32 = 19;
779const V_COUNT: u32 = 21;
780const T_COUNT: u32 = 28;
781const N_COUNT: u32 = V_COUNT * T_COUNT;
782const S_COUNT: u32 = L_COUNT * N_COUNT;
783
784pub fn compose(a: char, b: char) -> Option<char> {
785 if let Some(ab: char) = compose_hangul(a, b) {
786 return Some(ab);
787 }
788
789 let needle: u64 = (a as u64) << 32 | (b as u64);
790 crateResult::unicode_norm::COMPOSITION_TABLE
791 .binary_search_by(|item| item.0.cmp(&needle))
792 .map(|idx: usize| crate::unicode_norm::COMPOSITION_TABLE[idx].1)
793 .ok()
794}
795
796fn compose_hangul(a: char, b: char) -> Option<char> {
797 let l: u32 = u32::from(a);
798 let v: u32 = u32::from(b);
799 if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
800 let r: u32 = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
801 Some(char::try_from(r).unwrap())
802 } else if S_BASE <= l
803 && l <= (S_BASE + S_COUNT - T_COUNT)
804 && T_BASE <= v
805 && v < (T_BASE + T_COUNT)
806 && (l - S_BASE) % T_COUNT == 0
807 {
808 let r: u32 = l + (v - T_BASE);
809 Some(char::try_from(r).unwrap())
810 } else {
811 None
812 }
813}
814
815pub fn decompose(ab: char) -> Option<(char, char)> {
816 if let Some(ab: (char, char)) = decompose_hangul(ab) {
817 return Some(ab);
818 }
819
820 crateResult<(char, char), usize>::unicode_norm::DECOMPOSITION_TABLE
821 .binary_search_by(|item| item.0.cmp(&ab))
822 .map(|idx: usize| {
823 let chars: &(char, char, Option) = &crate::unicode_norm::DECOMPOSITION_TABLE[idx];
824 (chars.1, chars.2.unwrap_or(default:'\0'))
825 })
826 .ok()
827}
828
829pub fn decompose_hangul(ab: char) -> Option<(char, char)> {
830 let si: u32 = u32::from(ab).wrapping_sub(S_BASE);
831 if si >= S_COUNT {
832 return None;
833 }
834
835 let (a: u32, b: u32) = if si % T_COUNT != 0 {
836 // LV,T
837 (S_BASE + (si / T_COUNT) * T_COUNT, T_BASE + (si % T_COUNT))
838 } else {
839 // L,V
840 (L_BASE + (si / N_COUNT), V_BASE + (si % N_COUNT) / T_COUNT)
841 };
842
843 Some((char::try_from(a).unwrap(), char::try_from(b).unwrap()))
844}
845
846#[cfg(test)]
847mod tests {
848 #[test]
849 fn check_unicode_version() {
850 assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (14, 0, 0));
851 assert_eq!(unicode_ccc::UNICODE_VERSION, (14, 0, 0));
852 assert_eq!(unicode_properties::UNICODE_VERSION, (15, 0, 0));
853 assert_eq!(unicode_script::UNICODE_VERSION, (15, 0, 0));
854 assert_eq!(crate::unicode_norm::UNICODE_VERSION, (14, 0, 0));
855 }
856}
857
858// TODO: remove
859pub mod hb_gc {
860 pub const RB_UNICODE_GENERAL_CATEGORY_CONTROL: u32 = 0;
861 pub const RB_UNICODE_GENERAL_CATEGORY_FORMAT: u32 = 1;
862 pub const RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED: u32 = 2;
863 pub const RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE: u32 = 3;
864 pub const RB_UNICODE_GENERAL_CATEGORY_SURROGATE: u32 = 4;
865 pub const RB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER: u32 = 5;
866 pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER: u32 = 6;
867 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER: u32 = 7;
868 pub const RB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER: u32 = 8;
869 pub const RB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER: u32 = 9;
870 pub const RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK: u32 = 10;
871 pub const RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK: u32 = 11;
872 pub const RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK: u32 = 12;
873 pub const RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER: u32 = 13;
874 pub const RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER: u32 = 14;
875 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER: u32 = 15;
876 pub const RB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION: u32 = 16;
877 pub const RB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION: u32 = 17;
878 pub const RB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION: u32 = 18;
879 pub const RB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION: u32 = 19;
880 pub const RB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION: u32 = 20;
881 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION: u32 = 21;
882 pub const RB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION: u32 = 22;
883 pub const RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL: u32 = 23;
884 pub const RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL: u32 = 24;
885 pub const RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL: u32 = 25;
886 pub const RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL: u32 = 26;
887 pub const RB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR: u32 = 27;
888 pub const RB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR: u32 = 28;
889 pub const RB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR: u32 = 29;
890}
891