1 | use super::arabic::ArabicShapePlan; |
2 | use super::*; |
3 | use crate::buffer::Buffer; |
4 | use crate::ot::{feature, FeatureFlags}; |
5 | use crate::plan::{ShapePlan, ShapePlanner}; |
6 | use crate::unicode::{CharExt, GeneralCategoryExt}; |
7 | use crate::{script, Face, GlyphInfo, Mask, Script, Tag}; |
8 | |
9 | pub const UNIVERSAL_SHAPER: ComplexShaper = ComplexShaper { |
10 | collect_features: Some(collect_features), |
11 | override_features: None, |
12 | create_data: Some(|plan: &ShapePlan| Box::new(UniversalShapePlan::new(plan))), |
13 | preprocess_text: Some(preprocess_text), |
14 | postprocess_glyphs: None, |
15 | normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit), |
16 | decompose: None, |
17 | compose: Some(compose), |
18 | setup_masks: Some(setup_masks), |
19 | gpos_tag: None, |
20 | reorder_marks: None, |
21 | zero_width_marks: Some(ZeroWidthMarksMode::ByGdefEarly), |
22 | fallback_position: false, |
23 | }; |
24 | |
25 | pub type Category = u8; |
26 | #[allow (dead_code)] |
27 | pub mod category { |
28 | pub const O: u8 = 0; // OTHER |
29 | |
30 | pub const B: u8 = 1; // BASE |
31 | |
32 | // pub const IND: u8 = 3; // BASE_IND |
33 | |
34 | pub const N: u8 = 4; // BASE_NUM |
35 | pub const GB: u8 = 5; // BASE_OTHER |
36 | pub const CGJ: u8 = 6; |
37 | |
38 | // pub const CGJ: u8 = 6; // CGJ |
39 | // pub const F: u8 = 7; // CONS_FINAL |
40 | // pub const FM: u8 = 8; // CONS_FINAL_MOD |
41 | // pub const M: u8 = 9; // CONS_MED |
42 | // pub const CM: u8 = 10; // CONS_MOD |
43 | |
44 | pub const SUB: u8 = 11; // CONS_SUB |
45 | pub const H: u8 = 12; // HALANT |
46 | |
47 | pub const HN: u8 = 13; // HALANT_NUM |
48 | pub const ZWNJ: u8 = 14; // Zero width non-joiner |
49 | |
50 | // pub const ZWJ: u8 = 15; // Zero width joiner |
51 | pub const WJ: u8 = 16; // Word joiner |
52 | |
53 | pub const RSV: u8 = 17; // Reserved characters |
54 | pub const R: u8 = 18; // REPHA |
55 | pub const S: u8 = 19; // SYM |
56 | |
57 | // pub const SM: u8 = 20; // SYM_MOD |
58 | // pub const VS: u8 = 21; // VARIATION_SELECTOR |
59 | // pub const V: u8 = 36; // VOWEL |
60 | // pub const VM: u8 = 40; // VOWEL_MOD |
61 | |
62 | pub const CS: u8 = 43; // CONS_WITH_STACKER |
63 | |
64 | // https://github.com/harfbuzz/harfbuzz/issues/1102 |
65 | pub const IS: u8 = 44; // HALANT_OR_VOWEL_MODIFIER |
66 | |
67 | pub const SK: u8 = 48; // SAKOT |
68 | |
69 | pub const FABV: u8 = 24; // CONS_FINAL_ABOVE |
70 | pub const FBLW: u8 = 25; // CONS_FINAL_BELOW |
71 | pub const FPST: u8 = 26; // CONS_FINAL_POST |
72 | pub const MABV: u8 = 27; // CONS_MED_ABOVE |
73 | pub const MBLW: u8 = 28; // CONS_MED_BELOW |
74 | pub const MPST: u8 = 29; // CONS_MED_POST |
75 | pub const MPRE: u8 = 30; // CONS_MED_PRE |
76 | pub const CMABV: u8 = 31; // CONS_MOD_ABOVE |
77 | pub const CMBLW: u8 = 32; // CONS_MOD_BELOW |
78 | pub const VABV: u8 = 33; // VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST |
79 | pub const VBLW: u8 = 34; // VOWEL_BELOW / VOWEL_BELOW_POST |
80 | pub const VPST: u8 = 35; // VOWEL_POST UIPC = Right |
81 | pub const VPRE: u8 = 22; // VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST |
82 | pub const VMABV: u8 = 37; // VOWEL_MOD_ABOVE |
83 | pub const VMBLW: u8 = 38; // VOWEL_MOD_BELOW |
84 | pub const VMPST: u8 = 39; // VOWEL_MOD_POST |
85 | pub const VMPRE: u8 = 23; // VOWEL_MOD_PRE |
86 | pub const SMABV: u8 = 41; // SYM_MOD_ABOVE |
87 | pub const SMBLW: u8 = 42; // SYM_MOD_BELOW |
88 | pub const FMABV: u8 = 45; // CONS_FINAL_MOD UIPC = Top |
89 | pub const FMBLW: u8 = 46; // CONS_FINAL_MOD UIPC = Bottom |
90 | pub const FMPST: u8 = 47; // CONS_FINAL_MOD UIPC = Not_Applicable |
91 | pub const G: u8 = 49; // HIEROGLYPH |
92 | pub const J: u8 = 50; // HIEROGLYPH_JOINER |
93 | pub const SB: u8 = 51; // HIEROGLYPH_SEGMENT_BEGIN |
94 | pub const SE: u8 = 52; // HIEROGLYPH_SEGMENT_END |
95 | } |
96 | |
97 | // These features are applied all at once, before reordering, |
98 | // constrained to the syllable. |
99 | const BASIC_FEATURES: &[Tag] = &[ |
100 | feature::RAKAR_FORMS, |
101 | feature::ABOVE_BASE_FORMS, |
102 | feature::BELOW_BASE_FORMS, |
103 | feature::HALF_FORMS, |
104 | feature::POST_BASE_FORMS, |
105 | feature::VATTU_VARIANTS, |
106 | feature::CONJUNCT_FORMS, |
107 | ]; |
108 | |
109 | const TOPOGRAPHICAL_FEATURES: &[Tag] = &[ |
110 | feature::ISOLATED_FORMS, |
111 | feature::INITIAL_FORMS, |
112 | feature::MEDIAL_FORMS_1, |
113 | feature::TERMINAL_FORMS_1, |
114 | ]; |
115 | |
116 | // Same order as use_topographical_features. |
117 | #[derive (Clone, Copy, PartialEq)] |
118 | enum JoiningForm { |
119 | Isolated = 0, |
120 | Initial, |
121 | Medial, |
122 | Terminal, |
123 | } |
124 | |
125 | // These features are applied all at once, after reordering and clearing syllables. |
126 | const OTHER_FEATURES: &[Tag] = &[ |
127 | feature::ABOVE_BASE_SUBSTITUTIONS, |
128 | feature::BELOW_BASE_SUBSTITUTIONS, |
129 | feature::HALANT_FORMS, |
130 | feature::PRE_BASE_SUBSTITUTIONS, |
131 | feature::POST_BASE_SUBSTITUTIONS, |
132 | ]; |
133 | |
134 | impl GlyphInfo { |
135 | pub(crate) fn use_category(&self) -> Category { |
136 | self.complex_var_u8_category() |
137 | } |
138 | |
139 | fn set_use_category(&mut self, c: Category) { |
140 | self.set_complex_var_u8_category(c) |
141 | } |
142 | |
143 | fn is_halant_use(&self) -> bool { |
144 | matches!(self.use_category(), category::H | category::IS) && !self.is_ligated() |
145 | } |
146 | } |
147 | |
148 | struct UniversalShapePlan { |
149 | rphf_mask: Mask, |
150 | arabic_plan: Option<ArabicShapePlan>, |
151 | } |
152 | |
153 | impl UniversalShapePlan { |
154 | fn new(plan: &ShapePlan) -> UniversalShapePlan { |
155 | let mut arabic_plan: Option = None; |
156 | |
157 | if plan.script.map_or(default:false, f:has_arabic_joining) { |
158 | arabic_plan = Some(super::arabic::ArabicShapePlan::new(plan)); |
159 | } |
160 | |
161 | UniversalShapePlan { |
162 | rphf_mask: plan.ot_map.one_mask(feature_tag:feature::REPH_FORMS), |
163 | arabic_plan, |
164 | } |
165 | } |
166 | } |
167 | |
168 | fn collect_features(planner: &mut ShapePlanner) { |
169 | // Do this before any lookups have been applied. |
170 | planner.ot_map.add_gsub_pause(Some(setup_syllables)); |
171 | |
172 | // Default glyph pre-processing group |
173 | planner |
174 | .ot_map |
175 | .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1); |
176 | planner.ot_map.enable_feature( |
177 | feature::GLYPH_COMPOSITION_DECOMPOSITION, |
178 | FeatureFlags::empty(), |
179 | 1, |
180 | ); |
181 | planner |
182 | .ot_map |
183 | .enable_feature(feature::NUKTA_FORMS, FeatureFlags::empty(), 1); |
184 | planner |
185 | .ot_map |
186 | .enable_feature(feature::AKHANDS, FeatureFlags::MANUAL_ZWJ, 1); |
187 | |
188 | // Reordering group |
189 | planner |
190 | .ot_map |
191 | .add_gsub_pause(Some(crate::ot::clear_substitution_flags)); |
192 | planner |
193 | .ot_map |
194 | .add_feature(feature::REPH_FORMS, FeatureFlags::MANUAL_ZWJ, 1); |
195 | planner.ot_map.add_gsub_pause(Some(record_rphf)); |
196 | planner |
197 | .ot_map |
198 | .add_gsub_pause(Some(crate::ot::clear_substitution_flags)); |
199 | planner |
200 | .ot_map |
201 | .enable_feature(feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_ZWJ, 1); |
202 | planner.ot_map.add_gsub_pause(Some(record_pref)); |
203 | |
204 | // Orthographic unit shaping group |
205 | for feature in BASIC_FEATURES { |
206 | planner |
207 | .ot_map |
208 | .enable_feature(*feature, FeatureFlags::MANUAL_ZWJ, 1); |
209 | } |
210 | |
211 | planner.ot_map.add_gsub_pause(Some(reorder)); |
212 | planner |
213 | .ot_map |
214 | .add_gsub_pause(Some(crate::ot::clear_syllables)); |
215 | |
216 | // Topographical features |
217 | for feature in TOPOGRAPHICAL_FEATURES { |
218 | planner |
219 | .ot_map |
220 | .add_feature(*feature, FeatureFlags::empty(), 1); |
221 | } |
222 | planner.ot_map.add_gsub_pause(None); |
223 | |
224 | // Standard typographic presentation |
225 | for feature in OTHER_FEATURES { |
226 | planner |
227 | .ot_map |
228 | .enable_feature(*feature, FeatureFlags::empty(), 1); |
229 | } |
230 | } |
231 | |
232 | fn setup_syllables(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
233 | super::universal_machine::find_syllables(buffer); |
234 | |
235 | foreach_syllable!(buffer, start, end, { |
236 | buffer.unsafe_to_break(Some(start), Some(end)); |
237 | }); |
238 | |
239 | setup_rphf_mask(plan, buffer); |
240 | setup_topographical_masks(plan, buffer); |
241 | } |
242 | |
243 | fn setup_rphf_mask(plan: &ShapePlan, buffer: &mut Buffer) { |
244 | let universal_plan = plan.data::<UniversalShapePlan>(); |
245 | |
246 | let mask = universal_plan.rphf_mask; |
247 | if mask == 0 { |
248 | return; |
249 | } |
250 | |
251 | let mut start = 0; |
252 | let mut end = buffer.next_syllable(0); |
253 | while start < buffer.len { |
254 | let limit = if buffer.info[start].use_category() == category::R { |
255 | 1 |
256 | } else { |
257 | core::cmp::min(3, end - start) |
258 | }; |
259 | |
260 | for i in start..start + limit { |
261 | buffer.info[i].mask |= mask; |
262 | } |
263 | |
264 | start = end; |
265 | end = buffer.next_syllable(start); |
266 | } |
267 | } |
268 | |
269 | fn setup_topographical_masks(plan: &ShapePlan, buffer: &mut Buffer) { |
270 | use super::universal_machine::SyllableType; |
271 | |
272 | if plan.data::<UniversalShapePlan>().arabic_plan.is_some() { |
273 | return; |
274 | } |
275 | |
276 | let mut masks = [0; 4]; |
277 | let mut all_masks = 0; |
278 | for i in 0..4 { |
279 | masks[i] = plan.ot_map.one_mask(TOPOGRAPHICAL_FEATURES[i]); |
280 | if masks[i] == plan.ot_map.global_mask() { |
281 | masks[i] = 0; |
282 | } |
283 | |
284 | all_masks |= masks[i]; |
285 | } |
286 | |
287 | if all_masks == 0 { |
288 | return; |
289 | } |
290 | |
291 | let other_masks = !all_masks; |
292 | |
293 | let mut last_start = 0; |
294 | let mut last_form = None; |
295 | let mut start = 0; |
296 | let mut end = buffer.next_syllable(0); |
297 | while start < buffer.len { |
298 | let syllable = buffer.info[start].syllable() & 0x0F; |
299 | if syllable == SyllableType::HieroglyphCluster as u8 |
300 | || syllable == SyllableType::NonCluster as u8 |
301 | { |
302 | last_form = None; |
303 | } else { |
304 | let join = last_form == Some(JoiningForm::Terminal) |
305 | || last_form == Some(JoiningForm::Isolated); |
306 | |
307 | if join { |
308 | // Fixup previous syllable's form. |
309 | let form = if last_form == Some(JoiningForm::Terminal) { |
310 | JoiningForm::Medial |
311 | } else { |
312 | JoiningForm::Initial |
313 | }; |
314 | |
315 | for i in last_start..start { |
316 | buffer.info[i].mask = |
317 | (buffer.info[i].mask & other_masks) | masks[form as usize]; |
318 | } |
319 | } |
320 | |
321 | // Form for this syllable. |
322 | let form = if join { |
323 | JoiningForm::Terminal |
324 | } else { |
325 | JoiningForm::Isolated |
326 | }; |
327 | last_form = Some(form); |
328 | for i in start..end { |
329 | buffer.info[i].mask = (buffer.info[i].mask & other_masks) | masks[form as usize]; |
330 | } |
331 | } |
332 | |
333 | last_start = start; |
334 | start = end; |
335 | end = buffer.next_syllable(start); |
336 | } |
337 | } |
338 | |
339 | fn record_rphf(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
340 | let universal_plan = plan.data::<UniversalShapePlan>(); |
341 | |
342 | let mask = universal_plan.rphf_mask; |
343 | if mask == 0 { |
344 | return; |
345 | } |
346 | |
347 | let mut start = 0; |
348 | let mut end = buffer.next_syllable(0); |
349 | while start < buffer.len { |
350 | // Mark a substituted repha as USE_R. |
351 | for i in start..end { |
352 | if buffer.info[i].mask & mask == 0 { |
353 | break; |
354 | } |
355 | |
356 | if buffer.info[i].is_substituted() { |
357 | buffer.info[i].set_use_category(category::R); |
358 | break; |
359 | } |
360 | } |
361 | |
362 | start = end; |
363 | end = buffer.next_syllable(start); |
364 | } |
365 | } |
366 | |
367 | fn reorder(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
368 | use super::universal_machine::SyllableType; |
369 | |
370 | syllabic::insert_dotted_circles( |
371 | face, |
372 | buffer, |
373 | broken_syllable_type:SyllableType::BrokenCluster as u8, |
374 | dottedcircle_category:category::B, |
375 | repha_category:Some(category::R), |
376 | dottedcircle_position:None, |
377 | ); |
378 | |
379 | let mut start: usize = 0; |
380 | let mut end: usize = buffer.next_syllable(start:0); |
381 | while start < buffer.len { |
382 | reorder_syllable(start, end, buffer); |
383 | start = end; |
384 | end = buffer.next_syllable(start); |
385 | } |
386 | } |
387 | |
388 | const fn category_flag(c: Category) -> u32 { |
389 | rb_flag(c as u32) |
390 | } |
391 | |
392 | const fn category_flag64(c: Category) -> u64 { |
393 | rb_flag64(c as u32) |
394 | } |
395 | |
396 | const BASE_FLAGS: u64 = category_flag64(category::FABV) |
397 | | category_flag64(category::FBLW) |
398 | | category_flag64(category::FPST) |
399 | | category_flag64(category::MABV) |
400 | | category_flag64(category::MBLW) |
401 | | category_flag64(category::MPST) |
402 | | category_flag64(category::MPRE) |
403 | | category_flag64(category::VABV) |
404 | | category_flag64(category::VBLW) |
405 | | category_flag64(category::VPST) |
406 | | category_flag64(category::VPRE) |
407 | | category_flag64(category::VMABV) |
408 | | category_flag64(category::VMBLW) |
409 | | category_flag64(category::VMPST) |
410 | | category_flag64(category::VMPRE); |
411 | |
412 | fn reorder_syllable(start: usize, end: usize, buffer: &mut Buffer) { |
413 | use super::universal_machine::SyllableType; |
414 | |
415 | let syllable_type = (buffer.info[start].syllable() & 0x0F) as u32; |
416 | // Only a few syllable types need reordering. |
417 | if (rb_flag_unsafe(syllable_type) |
418 | & (rb_flag(SyllableType::ViramaTerminatedCluster as u32) |
419 | | rb_flag(SyllableType::SakotTerminatedCluster as u32) |
420 | | rb_flag(SyllableType::StandardCluster as u32) |
421 | | rb_flag(SyllableType::BrokenCluster as u32) |
422 | | 0)) |
423 | == 0 |
424 | { |
425 | return; |
426 | } |
427 | |
428 | // Move things forward. |
429 | if buffer.info[start].use_category() == category::R && end - start > 1 { |
430 | // Got a repha. Reorder it towards the end, but before the first post-base glyph. |
431 | for i in start + 1..end { |
432 | let is_post_base_glyph = |
433 | (rb_flag64_unsafe(buffer.info[i].use_category() as u32) & BASE_FLAGS) != 0 |
434 | || buffer.info[i].is_halant_use(); |
435 | |
436 | if is_post_base_glyph || i == end - 1 { |
437 | // If we hit a post-base glyph, move before it; otherwise move to the |
438 | // end. Shift things in between backward. |
439 | |
440 | let mut i = i; |
441 | if is_post_base_glyph { |
442 | i -= 1; |
443 | } |
444 | |
445 | buffer.merge_clusters(start, i + 1); |
446 | let t = buffer.info[start]; |
447 | for k in 0..i - start { |
448 | buffer.info[k + start] = buffer.info[k + start + 1]; |
449 | } |
450 | buffer.info[i] = t; |
451 | |
452 | break; |
453 | } |
454 | } |
455 | } |
456 | |
457 | // Move things back. |
458 | let mut j = start; |
459 | for i in start..end { |
460 | let flag = rb_flag_unsafe(buffer.info[i].use_category() as u32); |
461 | if buffer.info[i].is_halant_use() { |
462 | // If we hit a halant, move after it; otherwise move to the beginning, and |
463 | // shift things in between forward. |
464 | j = i + 1; |
465 | } else if (flag & (category_flag(category::VPRE) | category_flag(category::VMPRE))) != 0 |
466 | && buffer.info[i].lig_comp() == 0 |
467 | && j < i |
468 | { |
469 | // Only move the first component of a MultipleSubst. |
470 | buffer.merge_clusters(j, i + 1); |
471 | let t = buffer.info[i]; |
472 | for k in (0..i - j).rev() { |
473 | buffer.info[k + j + 1] = buffer.info[k + j]; |
474 | } |
475 | buffer.info[j] = t; |
476 | } |
477 | } |
478 | } |
479 | |
480 | fn record_pref(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
481 | let mut start: usize = 0; |
482 | let mut end: usize = buffer.next_syllable(start:0); |
483 | while start < buffer.len { |
484 | // Mark a substituted pref as VPre, as they behave the same way. |
485 | for i: usize in start..end { |
486 | if buffer.info[i].is_substituted() { |
487 | buffer.info[i].set_use_category(category::VPRE); |
488 | break; |
489 | } |
490 | } |
491 | |
492 | start = end; |
493 | end = buffer.next_syllable(start); |
494 | } |
495 | } |
496 | |
497 | fn has_arabic_joining(script: Script) -> bool { |
498 | // List of scripts that have data in arabic-table. |
499 | matches!( |
500 | script, |
501 | script::ADLAM |
502 | | script::ARABIC |
503 | | script::CHORASMIAN |
504 | | script::HANIFI_ROHINGYA |
505 | | script::MANDAIC |
506 | | script::MANICHAEAN |
507 | | script::MONGOLIAN |
508 | | script::NKO |
509 | | script::OLD_UYGHUR |
510 | | script::PHAGS_PA |
511 | | script::PSALTER_PAHLAVI |
512 | | script::SOGDIAN |
513 | | script::SYRIAC |
514 | ) |
515 | } |
516 | |
517 | fn preprocess_text(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
518 | super::vowel_constraints::preprocess_text_vowel_constraints(buffer); |
519 | } |
520 | |
521 | fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> { |
522 | // Avoid recomposing split matras. |
523 | if a.general_category().is_mark() { |
524 | return None; |
525 | } |
526 | |
527 | crate::unicode::compose(a, b) |
528 | } |
529 | |
530 | fn setup_masks(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
531 | let universal_plan: &UniversalShapePlan = plan.data::<UniversalShapePlan>(); |
532 | |
533 | // Do this before allocating use_category(). |
534 | if let Some(ref arabic_plan: &ArabicShapePlan) = universal_plan.arabic_plan { |
535 | super::arabic::setup_masks_inner(arabic_plan, plan.script, buffer); |
536 | } |
537 | |
538 | // We cannot setup masks here. We save information about characters |
539 | // and setup masks later on in a pause-callback. |
540 | for info: &mut GlyphInfo in buffer.info_slice_mut() { |
541 | info.set_use_category(super::universal_table::get_category(info)); |
542 | } |
543 | } |
544 | |