1 | use super::arabic::ArabicShapePlan; |
2 | use super::*; |
3 | use crate::buffer::Buffer; |
4 | use crate::ot::{feature, FeatureFlags}; |
5 | use crate::plan::{ShapePlan, ShapePlanner}; |
6 | use crate::unicode::{CharExt, GeneralCategoryExt}; |
7 | use crate::{script, Face, GlyphInfo, Mask, Script, Tag}; |
8 | |
9 | pub const UNIVERSAL_SHAPER: ComplexShaper = ComplexShaper { |
10 | collect_features: Some(collect_features), |
11 | override_features: None, |
12 | create_data: Some(|plan: &ShapePlan| Box::new(UniversalShapePlan::new(plan))), |
13 | preprocess_text: Some(preprocess_text), |
14 | postprocess_glyphs: None, |
15 | normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit), |
16 | decompose: None, |
17 | compose: Some(compose), |
18 | setup_masks: Some(setup_masks), |
19 | gpos_tag: None, |
20 | reorder_marks: None, |
21 | zero_width_marks: Some(ZeroWidthMarksMode::ByGdefEarly), |
22 | fallback_position: false, |
23 | }; |
24 | |
25 | pub type Category = u8; |
26 | pub mod category { |
27 | pub const O: u8 = 0; // OTHER |
28 | |
29 | pub const B: u8 = 1; // BASE |
30 | |
31 | // pub const IND: u8 = 3; // BASE_IND |
32 | |
33 | pub const N: u8 = 4; // BASE_NUM |
34 | pub const GB: u8 = 5; // BASE_OTHER |
35 | |
36 | // pub const CGJ: u8 = 6; // CGJ |
37 | // pub const F: u8 = 7; // CONS_FINAL |
38 | // pub const FM: u8 = 8; // CONS_FINAL_MOD |
39 | // pub const M: u8 = 9; // CONS_MED |
40 | // pub const CM: u8 = 10; // CONS_MOD |
41 | |
42 | pub const SUB: u8 = 11; // CONS_SUB |
43 | pub const H: u8 = 12; // HALANT |
44 | |
45 | pub const HN: u8 = 13; // HALANT_NUM |
46 | pub const ZWNJ: u8 = 14; // Zero width non-joiner |
47 | |
48 | // pub const ZWJ: u8 = 15; // Zero width joiner |
49 | // pub const WJ: u8 = 16; // Word joiner |
50 | |
51 | pub const RSV: u8 = 17; // Reserved characters |
52 | pub const R: u8 = 18; // REPHA |
53 | pub const S: u8 = 19; // SYM |
54 | |
55 | // pub const SM: u8 = 20; // SYM_MOD |
56 | // pub const VS: u8 = 21; // VARIATION_SELECTOR |
57 | // pub const V: u8 = 36; // VOWEL |
58 | // pub const VM: u8 = 40; // VOWEL_MOD |
59 | |
60 | pub const CS: u8 = 43; // CONS_WITH_STACKER |
61 | |
62 | // https://github.com/harfbuzz/harfbuzz/issues/1102 |
63 | pub const HVM: u8 = 44; // HALANT_OR_VOWEL_MODIFIER |
64 | |
65 | pub const SK: u8 = 48; // SAKOT |
66 | |
67 | pub const FABV: u8 = 24; // CONS_FINAL_ABOVE |
68 | pub const FBLW: u8 = 25; // CONS_FINAL_BELOW |
69 | pub const FPST: u8 = 26; // CONS_FINAL_POST |
70 | pub const MABV: u8 = 27; // CONS_MED_ABOVE |
71 | pub const MBLW: u8 = 28; // CONS_MED_BELOW |
72 | pub const MPST: u8 = 29; // CONS_MED_POST |
73 | pub const MPRE: u8 = 30; // CONS_MED_PRE |
74 | pub const CMABV: u8 = 31; // CONS_MOD_ABOVE |
75 | pub const CMBLW: u8 = 32; // CONS_MOD_BELOW |
76 | pub const VABV: u8 = 33; // VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST |
77 | pub const VBLW: u8 = 34; // VOWEL_BELOW / VOWEL_BELOW_POST |
78 | pub const VPST: u8 = 35; // VOWEL_POST UIPC = Right |
79 | pub const VPRE: u8 = 22; // VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST |
80 | pub const VMABV: u8 = 37; // VOWEL_MOD_ABOVE |
81 | pub const VMBLW: u8 = 38; // VOWEL_MOD_BELOW |
82 | pub const VMPST: u8 = 39; // VOWEL_MOD_POST |
83 | pub const VMPRE: u8 = 23; // VOWEL_MOD_PRE |
84 | pub const SMABV: u8 = 41; // SYM_MOD_ABOVE |
85 | pub const SMBLW: u8 = 42; // SYM_MOD_BELOW |
86 | pub const FMABV: u8 = 45; // CONS_FINAL_MOD UIPC = Top |
87 | pub const FMBLW: u8 = 46; // CONS_FINAL_MOD UIPC = Bottom |
88 | pub const FMPST: u8 = 47; // CONS_FINAL_MOD UIPC = Not_Applicable |
89 | pub const G: u8 = 49; // HIEROGLYPH |
90 | pub const J: u8 = 50; // HIEROGLYPH_JOINER |
91 | pub const SB: u8 = 51; // HIEROGLYPH_SEGMENT_BEGIN |
92 | pub const SE: u8 = 52; // HIEROGLYPH_SEGMENT_END |
93 | } |
94 | |
95 | // These features are applied all at once, before reordering, |
96 | // constrained to the syllable. |
97 | const BASIC_FEATURES: &[Tag] = &[ |
98 | feature::RAKAR_FORMS, |
99 | feature::ABOVE_BASE_FORMS, |
100 | feature::BELOW_BASE_FORMS, |
101 | feature::HALF_FORMS, |
102 | feature::POST_BASE_FORMS, |
103 | feature::VATTU_VARIANTS, |
104 | feature::CONJUNCT_FORMS, |
105 | ]; |
106 | |
107 | const TOPOGRAPHICAL_FEATURES: &[Tag] = &[ |
108 | feature::ISOLATED_FORMS, |
109 | feature::INITIAL_FORMS, |
110 | feature::MEDIAL_FORMS_1, |
111 | feature::TERMINAL_FORMS_1, |
112 | ]; |
113 | |
114 | // Same order as use_topographical_features. |
115 | #[derive (Clone, Copy, PartialEq)] |
116 | enum JoiningForm { |
117 | Isolated = 0, |
118 | Initial, |
119 | Medial, |
120 | Terminal, |
121 | } |
122 | |
123 | // These features are applied all at once, after reordering and clearing syllables. |
124 | const OTHER_FEATURES: &[Tag] = &[ |
125 | feature::ABOVE_BASE_SUBSTITUTIONS, |
126 | feature::BELOW_BASE_SUBSTITUTIONS, |
127 | feature::HALANT_FORMS, |
128 | feature::PRE_BASE_SUBSTITUTIONS, |
129 | feature::POST_BASE_SUBSTITUTIONS, |
130 | ]; |
131 | |
132 | impl GlyphInfo { |
133 | pub(crate) fn use_category(&self) -> Category { |
134 | self.complex_var_u8_category() |
135 | } |
136 | |
137 | fn set_use_category(&mut self, c: Category) { |
138 | self.set_complex_var_u8_category(c) |
139 | } |
140 | |
141 | fn is_halant_use(&self) -> bool { |
142 | matches!(self.use_category(), category::H | category::HVM) && !self.is_ligated() |
143 | } |
144 | } |
145 | |
146 | struct UniversalShapePlan { |
147 | rphf_mask: Mask, |
148 | arabic_plan: Option<ArabicShapePlan>, |
149 | } |
150 | |
151 | impl UniversalShapePlan { |
152 | fn new(plan: &ShapePlan) -> UniversalShapePlan { |
153 | let mut arabic_plan: Option = None; |
154 | |
155 | if plan.script.map_or(default:false, f:has_arabic_joining) { |
156 | arabic_plan = Some(super::arabic::ArabicShapePlan::new(plan)); |
157 | } |
158 | |
159 | UniversalShapePlan { |
160 | rphf_mask: plan.ot_map.one_mask(feature_tag:feature::REPH_FORMS), |
161 | arabic_plan, |
162 | } |
163 | } |
164 | } |
165 | |
166 | fn collect_features(planner: &mut ShapePlanner) { |
167 | // Do this before any lookups have been applied. |
168 | planner.ot_map.add_gsub_pause(Some(setup_syllables)); |
169 | |
170 | // Default glyph pre-processing group |
171 | planner |
172 | .ot_map |
173 | .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1); |
174 | planner.ot_map.enable_feature( |
175 | feature::GLYPH_COMPOSITION_DECOMPOSITION, |
176 | FeatureFlags::empty(), |
177 | 1, |
178 | ); |
179 | planner |
180 | .ot_map |
181 | .enable_feature(feature::NUKTA_FORMS, FeatureFlags::empty(), 1); |
182 | planner |
183 | .ot_map |
184 | .enable_feature(feature::AKHANDS, FeatureFlags::MANUAL_ZWJ, 1); |
185 | |
186 | // Reordering group |
187 | planner |
188 | .ot_map |
189 | .add_gsub_pause(Some(crate::ot::clear_substitution_flags)); |
190 | planner |
191 | .ot_map |
192 | .add_feature(feature::REPH_FORMS, FeatureFlags::MANUAL_ZWJ, 1); |
193 | planner.ot_map.add_gsub_pause(Some(record_rphf)); |
194 | planner |
195 | .ot_map |
196 | .add_gsub_pause(Some(crate::ot::clear_substitution_flags)); |
197 | planner |
198 | .ot_map |
199 | .enable_feature(feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_ZWJ, 1); |
200 | planner.ot_map.add_gsub_pause(Some(record_pref)); |
201 | |
202 | // Orthographic unit shaping group |
203 | for feature in BASIC_FEATURES { |
204 | planner |
205 | .ot_map |
206 | .enable_feature(*feature, FeatureFlags::MANUAL_ZWJ, 1); |
207 | } |
208 | |
209 | planner.ot_map.add_gsub_pause(Some(reorder)); |
210 | planner |
211 | .ot_map |
212 | .add_gsub_pause(Some(crate::ot::clear_syllables)); |
213 | |
214 | // Topographical features |
215 | for feature in TOPOGRAPHICAL_FEATURES { |
216 | planner |
217 | .ot_map |
218 | .add_feature(*feature, FeatureFlags::empty(), 1); |
219 | } |
220 | planner.ot_map.add_gsub_pause(None); |
221 | |
222 | // Standard typographic presentation |
223 | for feature in OTHER_FEATURES { |
224 | planner |
225 | .ot_map |
226 | .enable_feature(*feature, FeatureFlags::empty(), 1); |
227 | } |
228 | } |
229 | |
230 | fn setup_syllables(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
231 | super::universal_machine::find_syllables(buffer); |
232 | |
233 | foreach_syllable!(buffer, start, end, { |
234 | buffer.unsafe_to_break(start, end); |
235 | }); |
236 | |
237 | setup_rphf_mask(plan, buffer); |
238 | setup_topographical_masks(plan, buffer); |
239 | } |
240 | |
241 | fn setup_rphf_mask(plan: &ShapePlan, buffer: &mut Buffer) { |
242 | let universal_plan = plan.data::<UniversalShapePlan>(); |
243 | |
244 | let mask = universal_plan.rphf_mask; |
245 | if mask == 0 { |
246 | return; |
247 | } |
248 | |
249 | let mut start = 0; |
250 | let mut end = buffer.next_syllable(0); |
251 | while start < buffer.len { |
252 | let limit = if buffer.info[start].use_category() == category::R { |
253 | 1 |
254 | } else { |
255 | core::cmp::min(3, end - start) |
256 | }; |
257 | |
258 | for i in start..start + limit { |
259 | buffer.info[i].mask |= mask; |
260 | } |
261 | |
262 | start = end; |
263 | end = buffer.next_syllable(start); |
264 | } |
265 | } |
266 | |
267 | fn setup_topographical_masks(plan: &ShapePlan, buffer: &mut Buffer) { |
268 | use super::universal_machine::SyllableType; |
269 | |
270 | if plan.data::<UniversalShapePlan>().arabic_plan.is_some() { |
271 | return; |
272 | } |
273 | |
274 | let mut masks = [0; 4]; |
275 | let mut all_masks = 0; |
276 | for i in 0..4 { |
277 | masks[i] = plan.ot_map.one_mask(TOPOGRAPHICAL_FEATURES[i]); |
278 | if masks[i] == plan.ot_map.global_mask() { |
279 | masks[i] = 0; |
280 | } |
281 | |
282 | all_masks |= masks[i]; |
283 | } |
284 | |
285 | if all_masks == 0 { |
286 | return; |
287 | } |
288 | |
289 | let other_masks = !all_masks; |
290 | |
291 | let mut last_start = 0; |
292 | let mut last_form = None; |
293 | let mut start = 0; |
294 | let mut end = buffer.next_syllable(0); |
295 | while start < buffer.len { |
296 | let syllable = buffer.info[start].syllable() & 0x0F; |
297 | if syllable == SyllableType::IndependentCluster as u8 |
298 | || syllable == SyllableType::SymbolCluster as u8 |
299 | || syllable == SyllableType::HieroglyphCluster as u8 |
300 | || syllable == SyllableType::NonCluster as u8 |
301 | { |
302 | last_form = None; |
303 | } else { |
304 | let join = last_form == Some(JoiningForm::Terminal) |
305 | || last_form == Some(JoiningForm::Isolated); |
306 | |
307 | if join { |
308 | // Fixup previous syllable's form. |
309 | let form = if last_form == Some(JoiningForm::Terminal) { |
310 | JoiningForm::Medial |
311 | } else { |
312 | JoiningForm::Initial |
313 | }; |
314 | |
315 | for i in last_start..start { |
316 | buffer.info[i].mask = |
317 | (buffer.info[i].mask & other_masks) | masks[form as usize]; |
318 | } |
319 | } |
320 | |
321 | // Form for this syllable. |
322 | let form = if join { |
323 | JoiningForm::Terminal |
324 | } else { |
325 | JoiningForm::Isolated |
326 | }; |
327 | last_form = Some(form); |
328 | for i in start..end { |
329 | buffer.info[i].mask = (buffer.info[i].mask & other_masks) | masks[form as usize]; |
330 | } |
331 | } |
332 | |
333 | last_start = start; |
334 | start = end; |
335 | end = buffer.next_syllable(start); |
336 | } |
337 | } |
338 | |
339 | fn record_rphf(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
340 | let universal_plan = plan.data::<UniversalShapePlan>(); |
341 | |
342 | let mask = universal_plan.rphf_mask; |
343 | if mask == 0 { |
344 | return; |
345 | } |
346 | |
347 | let mut start = 0; |
348 | let mut end = buffer.next_syllable(0); |
349 | while start < buffer.len { |
350 | // Mark a substituted repha as USE_R. |
351 | for i in start..end { |
352 | if buffer.info[i].mask & mask == 0 { |
353 | break; |
354 | } |
355 | |
356 | if buffer.info[i].is_substituted() { |
357 | buffer.info[i].set_use_category(category::R); |
358 | break; |
359 | } |
360 | } |
361 | |
362 | start = end; |
363 | end = buffer.next_syllable(start); |
364 | } |
365 | } |
366 | |
367 | fn reorder(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
368 | use super::universal_machine::SyllableType; |
369 | |
370 | syllabic::insert_dotted_circles( |
371 | face, |
372 | buffer, |
373 | broken_syllable_type:SyllableType::BrokenCluster as u8, |
374 | dottedcircle_category:category::B, |
375 | repha_category:Some(category::R), |
376 | dottedcircle_position:None, |
377 | ); |
378 | |
379 | let mut start: usize = 0; |
380 | let mut end: usize = buffer.next_syllable(start:0); |
381 | while start < buffer.len { |
382 | reorder_syllable(start, end, buffer); |
383 | start = end; |
384 | end = buffer.next_syllable(start); |
385 | } |
386 | } |
387 | |
388 | const fn category_flag(c: Category) -> u32 { |
389 | rb_flag(c as u32) |
390 | } |
391 | |
392 | const fn category_flag64(c: Category) -> u64 { |
393 | rb_flag64(c as u32) |
394 | } |
395 | |
396 | const BASE_FLAGS: u64 = category_flag64(category::FABV) |
397 | | category_flag64(category::FBLW) |
398 | | category_flag64(category::FPST) |
399 | | category_flag64(category::MABV) |
400 | | category_flag64(category::MBLW) |
401 | | category_flag64(category::MPST) |
402 | | category_flag64(category::MPRE) |
403 | | category_flag64(category::VABV) |
404 | | category_flag64(category::VBLW) |
405 | | category_flag64(category::VPST) |
406 | | category_flag64(category::VPRE) |
407 | | category_flag64(category::VMABV) |
408 | | category_flag64(category::VMBLW) |
409 | | category_flag64(category::VMPST) |
410 | | category_flag64(category::VMPRE); |
411 | |
412 | fn reorder_syllable(start: usize, end: usize, buffer: &mut Buffer) { |
413 | use super::universal_machine::SyllableType; |
414 | |
415 | let syllable_type = (buffer.info[start].syllable() & 0x0F) as u32; |
416 | // Only a few syllable types need reordering. |
417 | if (rb_flag_unsafe(syllable_type) |
418 | & (rb_flag(SyllableType::ViramaTerminatedCluster as u32) |
419 | | rb_flag(SyllableType::SakotTerminatedCluster as u32) |
420 | | rb_flag(SyllableType::StandardCluster as u32) |
421 | | rb_flag(SyllableType::BrokenCluster as u32) |
422 | | 0)) |
423 | == 0 |
424 | { |
425 | return; |
426 | } |
427 | |
428 | // Move things forward. |
429 | if buffer.info[start].use_category() == category::R && end - start > 1 { |
430 | // Got a repha. Reorder it towards the end, but before the first post-base glyph. |
431 | for i in start + 1..end { |
432 | let is_post_base_glyph = |
433 | (rb_flag64_unsafe(buffer.info[i].use_category() as u32) & BASE_FLAGS) != 0 |
434 | || buffer.info[i].is_halant_use(); |
435 | |
436 | if is_post_base_glyph || i == end - 1 { |
437 | // If we hit a post-base glyph, move before it; otherwise move to the |
438 | // end. Shift things in between backward. |
439 | |
440 | let mut i = i; |
441 | if is_post_base_glyph { |
442 | i -= 1; |
443 | } |
444 | |
445 | buffer.merge_clusters(start, i + 1); |
446 | let t = buffer.info[start]; |
447 | for k in 0..i - start { |
448 | buffer.info[k + start] = buffer.info[k + start + 1]; |
449 | } |
450 | buffer.info[i] = t; |
451 | |
452 | break; |
453 | } |
454 | } |
455 | } |
456 | |
457 | // Move things back. |
458 | let mut j = start; |
459 | for i in start..end { |
460 | let flag = rb_flag_unsafe(buffer.info[i].use_category() as u32); |
461 | if buffer.info[i].is_halant_use() { |
462 | // If we hit a halant, move after it; otherwise move to the beginning, and |
463 | // shift things in between forward. |
464 | j = i + 1; |
465 | } else if (flag & (category_flag(category::VPRE) | category_flag(category::VMPRE))) != 0 |
466 | && buffer.info[i].lig_comp() == 0 |
467 | && j < i |
468 | { |
469 | // Only move the first component of a MultipleSubst. |
470 | buffer.merge_clusters(j, i + 1); |
471 | let t = buffer.info[i]; |
472 | for k in (0..i - j).rev() { |
473 | buffer.info[k + j + 1] = buffer.info[k + j]; |
474 | } |
475 | buffer.info[j] = t; |
476 | } |
477 | } |
478 | } |
479 | |
480 | fn record_pref(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
481 | let mut start: usize = 0; |
482 | let mut end: usize = buffer.next_syllable(start:0); |
483 | while start < buffer.len { |
484 | // Mark a substituted pref as VPre, as they behave the same way. |
485 | for i: usize in start..end { |
486 | if buffer.info[i].is_substituted() { |
487 | buffer.info[i].set_use_category(category::VPRE); |
488 | break; |
489 | } |
490 | } |
491 | |
492 | start = end; |
493 | end = buffer.next_syllable(start); |
494 | } |
495 | } |
496 | |
497 | fn has_arabic_joining(script: Script) -> bool { |
498 | // List of scripts that have data in arabic-table. |
499 | matches!( |
500 | script, |
501 | script::ADLAM |
502 | | script::ARABIC |
503 | | script::CHORASMIAN |
504 | | script::HANIFI_ROHINGYA |
505 | | script::MANDAIC |
506 | | script::MANICHAEAN |
507 | | script::MONGOLIAN |
508 | | script::NKO |
509 | | script::PHAGS_PA |
510 | | script::PSALTER_PAHLAVI |
511 | | script::SOGDIAN |
512 | | script::SYRIAC |
513 | ) |
514 | } |
515 | |
516 | fn preprocess_text(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
517 | super::vowel_constraints::preprocess_text_vowel_constraints(buffer); |
518 | } |
519 | |
520 | fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> { |
521 | // Avoid recomposing split matras. |
522 | if a.general_category().is_mark() { |
523 | return None; |
524 | } |
525 | |
526 | crate::unicode::compose(a, b) |
527 | } |
528 | |
529 | fn setup_masks(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
530 | let universal_plan: &UniversalShapePlan = plan.data::<UniversalShapePlan>(); |
531 | |
532 | // Do this before allocating use_category(). |
533 | if let Some(ref arabic_plan: &ArabicShapePlan) = universal_plan.arabic_plan { |
534 | super::arabic::setup_masks_inner(arabic_plan, plan.script, buffer); |
535 | } |
536 | |
537 | // We cannot setup masks here. We save information about characters |
538 | // and setup masks later on in a pause-callback. |
539 | for info: &mut GlyphInfo in buffer.info_slice_mut() { |
540 | info.set_use_category(super::universal_table::get_category(info.glyph_id)); |
541 | } |
542 | } |
543 | |