1use super::arabic::ArabicShapePlan;
2use super::*;
3use crate::buffer::Buffer;
4use crate::ot::{feature, FeatureFlags};
5use crate::plan::{ShapePlan, ShapePlanner};
6use crate::unicode::{CharExt, GeneralCategoryExt};
7use crate::{script, Face, GlyphInfo, Mask, Script, Tag};
8
9pub const UNIVERSAL_SHAPER: ComplexShaper = ComplexShaper {
10 collect_features: Some(collect_features),
11 override_features: None,
12 create_data: Some(|plan: &ShapePlan| Box::new(UniversalShapePlan::new(plan))),
13 preprocess_text: Some(preprocess_text),
14 postprocess_glyphs: None,
15 normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit),
16 decompose: None,
17 compose: Some(compose),
18 setup_masks: Some(setup_masks),
19 gpos_tag: None,
20 reorder_marks: None,
21 zero_width_marks: Some(ZeroWidthMarksMode::ByGdefEarly),
22 fallback_position: false,
23};
24
25pub type Category = u8;
26#[allow(dead_code)]
27pub mod category {
28 pub const O: u8 = 0; // OTHER
29
30 pub const B: u8 = 1; // BASE
31
32 // pub const IND: u8 = 3; // BASE_IND
33
34 pub const N: u8 = 4; // BASE_NUM
35 pub const GB: u8 = 5; // BASE_OTHER
36 pub const CGJ: u8 = 6;
37
38 // pub const CGJ: u8 = 6; // CGJ
39 // pub const F: u8 = 7; // CONS_FINAL
40 // pub const FM: u8 = 8; // CONS_FINAL_MOD
41 // pub const M: u8 = 9; // CONS_MED
42 // pub const CM: u8 = 10; // CONS_MOD
43
44 pub const SUB: u8 = 11; // CONS_SUB
45 pub const H: u8 = 12; // HALANT
46
47 pub const HN: u8 = 13; // HALANT_NUM
48 pub const ZWNJ: u8 = 14; // Zero width non-joiner
49
50 // pub const ZWJ: u8 = 15; // Zero width joiner
51 pub const WJ: u8 = 16; // Word joiner
52
53 pub const RSV: u8 = 17; // Reserved characters
54 pub const R: u8 = 18; // REPHA
55 pub const S: u8 = 19; // SYM
56
57 // pub const SM: u8 = 20; // SYM_MOD
58 // pub const VS: u8 = 21; // VARIATION_SELECTOR
59 // pub const V: u8 = 36; // VOWEL
60 // pub const VM: u8 = 40; // VOWEL_MOD
61
62 pub const CS: u8 = 43; // CONS_WITH_STACKER
63
64 // https://github.com/harfbuzz/harfbuzz/issues/1102
65 pub const IS: u8 = 44; // HALANT_OR_VOWEL_MODIFIER
66
67 pub const SK: u8 = 48; // SAKOT
68
69 pub const FABV: u8 = 24; // CONS_FINAL_ABOVE
70 pub const FBLW: u8 = 25; // CONS_FINAL_BELOW
71 pub const FPST: u8 = 26; // CONS_FINAL_POST
72 pub const MABV: u8 = 27; // CONS_MED_ABOVE
73 pub const MBLW: u8 = 28; // CONS_MED_BELOW
74 pub const MPST: u8 = 29; // CONS_MED_POST
75 pub const MPRE: u8 = 30; // CONS_MED_PRE
76 pub const CMABV: u8 = 31; // CONS_MOD_ABOVE
77 pub const CMBLW: u8 = 32; // CONS_MOD_BELOW
78 pub const VABV: u8 = 33; // VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
79 pub const VBLW: u8 = 34; // VOWEL_BELOW / VOWEL_BELOW_POST
80 pub const VPST: u8 = 35; // VOWEL_POST UIPC = Right
81 pub const VPRE: u8 = 22; // VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
82 pub const VMABV: u8 = 37; // VOWEL_MOD_ABOVE
83 pub const VMBLW: u8 = 38; // VOWEL_MOD_BELOW
84 pub const VMPST: u8 = 39; // VOWEL_MOD_POST
85 pub const VMPRE: u8 = 23; // VOWEL_MOD_PRE
86 pub const SMABV: u8 = 41; // SYM_MOD_ABOVE
87 pub const SMBLW: u8 = 42; // SYM_MOD_BELOW
88 pub const FMABV: u8 = 45; // CONS_FINAL_MOD UIPC = Top
89 pub const FMBLW: u8 = 46; // CONS_FINAL_MOD UIPC = Bottom
90 pub const FMPST: u8 = 47; // CONS_FINAL_MOD UIPC = Not_Applicable
91 pub const G: u8 = 49; // HIEROGLYPH
92 pub const J: u8 = 50; // HIEROGLYPH_JOINER
93 pub const SB: u8 = 51; // HIEROGLYPH_SEGMENT_BEGIN
94 pub const SE: u8 = 52; // HIEROGLYPH_SEGMENT_END
95}
96
97// These features are applied all at once, before reordering,
98// constrained to the syllable.
99const BASIC_FEATURES: &[Tag] = &[
100 feature::RAKAR_FORMS,
101 feature::ABOVE_BASE_FORMS,
102 feature::BELOW_BASE_FORMS,
103 feature::HALF_FORMS,
104 feature::POST_BASE_FORMS,
105 feature::VATTU_VARIANTS,
106 feature::CONJUNCT_FORMS,
107];
108
109const TOPOGRAPHICAL_FEATURES: &[Tag] = &[
110 feature::ISOLATED_FORMS,
111 feature::INITIAL_FORMS,
112 feature::MEDIAL_FORMS_1,
113 feature::TERMINAL_FORMS_1,
114];
115
116// Same order as use_topographical_features.
117#[derive(Clone, Copy, PartialEq)]
118enum JoiningForm {
119 Isolated = 0,
120 Initial,
121 Medial,
122 Terminal,
123}
124
125// These features are applied all at once, after reordering and clearing syllables.
126const OTHER_FEATURES: &[Tag] = &[
127 feature::ABOVE_BASE_SUBSTITUTIONS,
128 feature::BELOW_BASE_SUBSTITUTIONS,
129 feature::HALANT_FORMS,
130 feature::PRE_BASE_SUBSTITUTIONS,
131 feature::POST_BASE_SUBSTITUTIONS,
132];
133
134impl GlyphInfo {
135 pub(crate) fn use_category(&self) -> Category {
136 self.complex_var_u8_category()
137 }
138
139 fn set_use_category(&mut self, c: Category) {
140 self.set_complex_var_u8_category(c)
141 }
142
143 fn is_halant_use(&self) -> bool {
144 matches!(self.use_category(), category::H | category::IS) && !self.is_ligated()
145 }
146}
147
148struct UniversalShapePlan {
149 rphf_mask: Mask,
150 arabic_plan: Option<ArabicShapePlan>,
151}
152
153impl UniversalShapePlan {
154 fn new(plan: &ShapePlan) -> UniversalShapePlan {
155 let mut arabic_plan: Option = None;
156
157 if plan.script.map_or(default:false, f:has_arabic_joining) {
158 arabic_plan = Some(super::arabic::ArabicShapePlan::new(plan));
159 }
160
161 UniversalShapePlan {
162 rphf_mask: plan.ot_map.one_mask(feature_tag:feature::REPH_FORMS),
163 arabic_plan,
164 }
165 }
166}
167
168fn collect_features(planner: &mut ShapePlanner) {
169 // Do this before any lookups have been applied.
170 planner.ot_map.add_gsub_pause(Some(setup_syllables));
171
172 // Default glyph pre-processing group
173 planner
174 .ot_map
175 .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1);
176 planner.ot_map.enable_feature(
177 feature::GLYPH_COMPOSITION_DECOMPOSITION,
178 FeatureFlags::empty(),
179 1,
180 );
181 planner
182 .ot_map
183 .enable_feature(feature::NUKTA_FORMS, FeatureFlags::empty(), 1);
184 planner
185 .ot_map
186 .enable_feature(feature::AKHANDS, FeatureFlags::MANUAL_ZWJ, 1);
187
188 // Reordering group
189 planner
190 .ot_map
191 .add_gsub_pause(Some(crate::ot::clear_substitution_flags));
192 planner
193 .ot_map
194 .add_feature(feature::REPH_FORMS, FeatureFlags::MANUAL_ZWJ, 1);
195 planner.ot_map.add_gsub_pause(Some(record_rphf));
196 planner
197 .ot_map
198 .add_gsub_pause(Some(crate::ot::clear_substitution_flags));
199 planner
200 .ot_map
201 .enable_feature(feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_ZWJ, 1);
202 planner.ot_map.add_gsub_pause(Some(record_pref));
203
204 // Orthographic unit shaping group
205 for feature in BASIC_FEATURES {
206 planner
207 .ot_map
208 .enable_feature(*feature, FeatureFlags::MANUAL_ZWJ, 1);
209 }
210
211 planner.ot_map.add_gsub_pause(Some(reorder));
212 planner
213 .ot_map
214 .add_gsub_pause(Some(crate::ot::clear_syllables));
215
216 // Topographical features
217 for feature in TOPOGRAPHICAL_FEATURES {
218 planner
219 .ot_map
220 .add_feature(*feature, FeatureFlags::empty(), 1);
221 }
222 planner.ot_map.add_gsub_pause(None);
223
224 // Standard typographic presentation
225 for feature in OTHER_FEATURES {
226 planner
227 .ot_map
228 .enable_feature(*feature, FeatureFlags::empty(), 1);
229 }
230}
231
232fn setup_syllables(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
233 super::universal_machine::find_syllables(buffer);
234
235 foreach_syllable!(buffer, start, end, {
236 buffer.unsafe_to_break(Some(start), Some(end));
237 });
238
239 setup_rphf_mask(plan, buffer);
240 setup_topographical_masks(plan, buffer);
241}
242
243fn setup_rphf_mask(plan: &ShapePlan, buffer: &mut Buffer) {
244 let universal_plan = plan.data::<UniversalShapePlan>();
245
246 let mask = universal_plan.rphf_mask;
247 if mask == 0 {
248 return;
249 }
250
251 let mut start = 0;
252 let mut end = buffer.next_syllable(0);
253 while start < buffer.len {
254 let limit = if buffer.info[start].use_category() == category::R {
255 1
256 } else {
257 core::cmp::min(3, end - start)
258 };
259
260 for i in start..start + limit {
261 buffer.info[i].mask |= mask;
262 }
263
264 start = end;
265 end = buffer.next_syllable(start);
266 }
267}
268
269fn setup_topographical_masks(plan: &ShapePlan, buffer: &mut Buffer) {
270 use super::universal_machine::SyllableType;
271
272 if plan.data::<UniversalShapePlan>().arabic_plan.is_some() {
273 return;
274 }
275
276 let mut masks = [0; 4];
277 let mut all_masks = 0;
278 for i in 0..4 {
279 masks[i] = plan.ot_map.one_mask(TOPOGRAPHICAL_FEATURES[i]);
280 if masks[i] == plan.ot_map.global_mask() {
281 masks[i] = 0;
282 }
283
284 all_masks |= masks[i];
285 }
286
287 if all_masks == 0 {
288 return;
289 }
290
291 let other_masks = !all_masks;
292
293 let mut last_start = 0;
294 let mut last_form = None;
295 let mut start = 0;
296 let mut end = buffer.next_syllable(0);
297 while start < buffer.len {
298 let syllable = buffer.info[start].syllable() & 0x0F;
299 if syllable == SyllableType::HieroglyphCluster as u8
300 || syllable == SyllableType::NonCluster as u8
301 {
302 last_form = None;
303 } else {
304 let join = last_form == Some(JoiningForm::Terminal)
305 || last_form == Some(JoiningForm::Isolated);
306
307 if join {
308 // Fixup previous syllable's form.
309 let form = if last_form == Some(JoiningForm::Terminal) {
310 JoiningForm::Medial
311 } else {
312 JoiningForm::Initial
313 };
314
315 for i in last_start..start {
316 buffer.info[i].mask =
317 (buffer.info[i].mask & other_masks) | masks[form as usize];
318 }
319 }
320
321 // Form for this syllable.
322 let form = if join {
323 JoiningForm::Terminal
324 } else {
325 JoiningForm::Isolated
326 };
327 last_form = Some(form);
328 for i in start..end {
329 buffer.info[i].mask = (buffer.info[i].mask & other_masks) | masks[form as usize];
330 }
331 }
332
333 last_start = start;
334 start = end;
335 end = buffer.next_syllable(start);
336 }
337}
338
339fn record_rphf(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
340 let universal_plan = plan.data::<UniversalShapePlan>();
341
342 let mask = universal_plan.rphf_mask;
343 if mask == 0 {
344 return;
345 }
346
347 let mut start = 0;
348 let mut end = buffer.next_syllable(0);
349 while start < buffer.len {
350 // Mark a substituted repha as USE_R.
351 for i in start..end {
352 if buffer.info[i].mask & mask == 0 {
353 break;
354 }
355
356 if buffer.info[i].is_substituted() {
357 buffer.info[i].set_use_category(category::R);
358 break;
359 }
360 }
361
362 start = end;
363 end = buffer.next_syllable(start);
364 }
365}
366
367fn reorder(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
368 use super::universal_machine::SyllableType;
369
370 syllabic::insert_dotted_circles(
371 face,
372 buffer,
373 broken_syllable_type:SyllableType::BrokenCluster as u8,
374 dottedcircle_category:category::B,
375 repha_category:Some(category::R),
376 dottedcircle_position:None,
377 );
378
379 let mut start: usize = 0;
380 let mut end: usize = buffer.next_syllable(start:0);
381 while start < buffer.len {
382 reorder_syllable(start, end, buffer);
383 start = end;
384 end = buffer.next_syllable(start);
385 }
386}
387
388const fn category_flag(c: Category) -> u32 {
389 rb_flag(c as u32)
390}
391
392const fn category_flag64(c: Category) -> u64 {
393 rb_flag64(c as u32)
394}
395
396const BASE_FLAGS: u64 = category_flag64(category::FABV)
397 | category_flag64(category::FBLW)
398 | category_flag64(category::FPST)
399 | category_flag64(category::MABV)
400 | category_flag64(category::MBLW)
401 | category_flag64(category::MPST)
402 | category_flag64(category::MPRE)
403 | category_flag64(category::VABV)
404 | category_flag64(category::VBLW)
405 | category_flag64(category::VPST)
406 | category_flag64(category::VPRE)
407 | category_flag64(category::VMABV)
408 | category_flag64(category::VMBLW)
409 | category_flag64(category::VMPST)
410 | category_flag64(category::VMPRE);
411
412fn reorder_syllable(start: usize, end: usize, buffer: &mut Buffer) {
413 use super::universal_machine::SyllableType;
414
415 let syllable_type = (buffer.info[start].syllable() & 0x0F) as u32;
416 // Only a few syllable types need reordering.
417 if (rb_flag_unsafe(syllable_type)
418 & (rb_flag(SyllableType::ViramaTerminatedCluster as u32)
419 | rb_flag(SyllableType::SakotTerminatedCluster as u32)
420 | rb_flag(SyllableType::StandardCluster as u32)
421 | rb_flag(SyllableType::BrokenCluster as u32)
422 | 0))
423 == 0
424 {
425 return;
426 }
427
428 // Move things forward.
429 if buffer.info[start].use_category() == category::R && end - start > 1 {
430 // Got a repha. Reorder it towards the end, but before the first post-base glyph.
431 for i in start + 1..end {
432 let is_post_base_glyph =
433 (rb_flag64_unsafe(buffer.info[i].use_category() as u32) & BASE_FLAGS) != 0
434 || buffer.info[i].is_halant_use();
435
436 if is_post_base_glyph || i == end - 1 {
437 // If we hit a post-base glyph, move before it; otherwise move to the
438 // end. Shift things in between backward.
439
440 let mut i = i;
441 if is_post_base_glyph {
442 i -= 1;
443 }
444
445 buffer.merge_clusters(start, i + 1);
446 let t = buffer.info[start];
447 for k in 0..i - start {
448 buffer.info[k + start] = buffer.info[k + start + 1];
449 }
450 buffer.info[i] = t;
451
452 break;
453 }
454 }
455 }
456
457 // Move things back.
458 let mut j = start;
459 for i in start..end {
460 let flag = rb_flag_unsafe(buffer.info[i].use_category() as u32);
461 if buffer.info[i].is_halant_use() {
462 // If we hit a halant, move after it; otherwise move to the beginning, and
463 // shift things in between forward.
464 j = i + 1;
465 } else if (flag & (category_flag(category::VPRE) | category_flag(category::VMPRE))) != 0
466 && buffer.info[i].lig_comp() == 0
467 && j < i
468 {
469 // Only move the first component of a MultipleSubst.
470 buffer.merge_clusters(j, i + 1);
471 let t = buffer.info[i];
472 for k in (0..i - j).rev() {
473 buffer.info[k + j + 1] = buffer.info[k + j];
474 }
475 buffer.info[j] = t;
476 }
477 }
478}
479
480fn record_pref(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
481 let mut start: usize = 0;
482 let mut end: usize = buffer.next_syllable(start:0);
483 while start < buffer.len {
484 // Mark a substituted pref as VPre, as they behave the same way.
485 for i: usize in start..end {
486 if buffer.info[i].is_substituted() {
487 buffer.info[i].set_use_category(category::VPRE);
488 break;
489 }
490 }
491
492 start = end;
493 end = buffer.next_syllable(start);
494 }
495}
496
497fn has_arabic_joining(script: Script) -> bool {
498 // List of scripts that have data in arabic-table.
499 matches!(
500 script,
501 script::ADLAM
502 | script::ARABIC
503 | script::CHORASMIAN
504 | script::HANIFI_ROHINGYA
505 | script::MANDAIC
506 | script::MANICHAEAN
507 | script::MONGOLIAN
508 | script::NKO
509 | script::OLD_UYGHUR
510 | script::PHAGS_PA
511 | script::PSALTER_PAHLAVI
512 | script::SOGDIAN
513 | script::SYRIAC
514 )
515}
516
517fn preprocess_text(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
518 super::vowel_constraints::preprocess_text_vowel_constraints(buffer);
519}
520
521fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> {
522 // Avoid recomposing split matras.
523 if a.general_category().is_mark() {
524 return None;
525 }
526
527 crate::unicode::compose(a, b)
528}
529
530fn setup_masks(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
531 let universal_plan: &UniversalShapePlan = plan.data::<UniversalShapePlan>();
532
533 // Do this before allocating use_category().
534 if let Some(ref arabic_plan: &ArabicShapePlan) = universal_plan.arabic_plan {
535 super::arabic::setup_masks_inner(arabic_plan, plan.script, buffer);
536 }
537
538 // We cannot setup masks here. We save information about characters
539 // and setup masks later on in a pause-callback.
540 for info: &mut GlyphInfo in buffer.info_slice_mut() {
541 info.set_use_category(super::universal_table::get_category(info));
542 }
543}
544