1use super::arabic::ArabicShapePlan;
2use super::*;
3use crate::buffer::Buffer;
4use crate::ot::{feature, FeatureFlags};
5use crate::plan::{ShapePlan, ShapePlanner};
6use crate::unicode::{CharExt, GeneralCategoryExt};
7use crate::{script, Face, GlyphInfo, Mask, Script, Tag};
8
9pub const UNIVERSAL_SHAPER: ComplexShaper = ComplexShaper {
10 collect_features: Some(collect_features),
11 override_features: None,
12 create_data: Some(|plan: &ShapePlan| Box::new(UniversalShapePlan::new(plan))),
13 preprocess_text: Some(preprocess_text),
14 postprocess_glyphs: None,
15 normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit),
16 decompose: None,
17 compose: Some(compose),
18 setup_masks: Some(setup_masks),
19 gpos_tag: None,
20 reorder_marks: None,
21 zero_width_marks: Some(ZeroWidthMarksMode::ByGdefEarly),
22 fallback_position: false,
23};
24
25pub type Category = u8;
26pub mod category {
27 pub const O: u8 = 0; // OTHER
28
29 pub const B: u8 = 1; // BASE
30
31 // pub const IND: u8 = 3; // BASE_IND
32
33 pub const N: u8 = 4; // BASE_NUM
34 pub const GB: u8 = 5; // BASE_OTHER
35
36 // pub const CGJ: u8 = 6; // CGJ
37 // pub const F: u8 = 7; // CONS_FINAL
38 // pub const FM: u8 = 8; // CONS_FINAL_MOD
39 // pub const M: u8 = 9; // CONS_MED
40 // pub const CM: u8 = 10; // CONS_MOD
41
42 pub const SUB: u8 = 11; // CONS_SUB
43 pub const H: u8 = 12; // HALANT
44
45 pub const HN: u8 = 13; // HALANT_NUM
46 pub const ZWNJ: u8 = 14; // Zero width non-joiner
47
48 // pub const ZWJ: u8 = 15; // Zero width joiner
49 // pub const WJ: u8 = 16; // Word joiner
50
51 pub const RSV: u8 = 17; // Reserved characters
52 pub const R: u8 = 18; // REPHA
53 pub const S: u8 = 19; // SYM
54
55 // pub const SM: u8 = 20; // SYM_MOD
56 // pub const VS: u8 = 21; // VARIATION_SELECTOR
57 // pub const V: u8 = 36; // VOWEL
58 // pub const VM: u8 = 40; // VOWEL_MOD
59
60 pub const CS: u8 = 43; // CONS_WITH_STACKER
61
62 // https://github.com/harfbuzz/harfbuzz/issues/1102
63 pub const HVM: u8 = 44; // HALANT_OR_VOWEL_MODIFIER
64
65 pub const SK: u8 = 48; // SAKOT
66
67 pub const FABV: u8 = 24; // CONS_FINAL_ABOVE
68 pub const FBLW: u8 = 25; // CONS_FINAL_BELOW
69 pub const FPST: u8 = 26; // CONS_FINAL_POST
70 pub const MABV: u8 = 27; // CONS_MED_ABOVE
71 pub const MBLW: u8 = 28; // CONS_MED_BELOW
72 pub const MPST: u8 = 29; // CONS_MED_POST
73 pub const MPRE: u8 = 30; // CONS_MED_PRE
74 pub const CMABV: u8 = 31; // CONS_MOD_ABOVE
75 pub const CMBLW: u8 = 32; // CONS_MOD_BELOW
76 pub const VABV: u8 = 33; // VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
77 pub const VBLW: u8 = 34; // VOWEL_BELOW / VOWEL_BELOW_POST
78 pub const VPST: u8 = 35; // VOWEL_POST UIPC = Right
79 pub const VPRE: u8 = 22; // VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
80 pub const VMABV: u8 = 37; // VOWEL_MOD_ABOVE
81 pub const VMBLW: u8 = 38; // VOWEL_MOD_BELOW
82 pub const VMPST: u8 = 39; // VOWEL_MOD_POST
83 pub const VMPRE: u8 = 23; // VOWEL_MOD_PRE
84 pub const SMABV: u8 = 41; // SYM_MOD_ABOVE
85 pub const SMBLW: u8 = 42; // SYM_MOD_BELOW
86 pub const FMABV: u8 = 45; // CONS_FINAL_MOD UIPC = Top
87 pub const FMBLW: u8 = 46; // CONS_FINAL_MOD UIPC = Bottom
88 pub const FMPST: u8 = 47; // CONS_FINAL_MOD UIPC = Not_Applicable
89 pub const G: u8 = 49; // HIEROGLYPH
90 pub const J: u8 = 50; // HIEROGLYPH_JOINER
91 pub const SB: u8 = 51; // HIEROGLYPH_SEGMENT_BEGIN
92 pub const SE: u8 = 52; // HIEROGLYPH_SEGMENT_END
93}
94
95// These features are applied all at once, before reordering,
96// constrained to the syllable.
97const BASIC_FEATURES: &[Tag] = &[
98 feature::RAKAR_FORMS,
99 feature::ABOVE_BASE_FORMS,
100 feature::BELOW_BASE_FORMS,
101 feature::HALF_FORMS,
102 feature::POST_BASE_FORMS,
103 feature::VATTU_VARIANTS,
104 feature::CONJUNCT_FORMS,
105];
106
107const TOPOGRAPHICAL_FEATURES: &[Tag] = &[
108 feature::ISOLATED_FORMS,
109 feature::INITIAL_FORMS,
110 feature::MEDIAL_FORMS_1,
111 feature::TERMINAL_FORMS_1,
112];
113
114// Same order as use_topographical_features.
115#[derive(Clone, Copy, PartialEq)]
116enum JoiningForm {
117 Isolated = 0,
118 Initial,
119 Medial,
120 Terminal,
121}
122
123// These features are applied all at once, after reordering and clearing syllables.
124const OTHER_FEATURES: &[Tag] = &[
125 feature::ABOVE_BASE_SUBSTITUTIONS,
126 feature::BELOW_BASE_SUBSTITUTIONS,
127 feature::HALANT_FORMS,
128 feature::PRE_BASE_SUBSTITUTIONS,
129 feature::POST_BASE_SUBSTITUTIONS,
130];
131
132impl GlyphInfo {
133 pub(crate) fn use_category(&self) -> Category {
134 self.complex_var_u8_category()
135 }
136
137 fn set_use_category(&mut self, c: Category) {
138 self.set_complex_var_u8_category(c)
139 }
140
141 fn is_halant_use(&self) -> bool {
142 matches!(self.use_category(), category::H | category::HVM) && !self.is_ligated()
143 }
144}
145
146struct UniversalShapePlan {
147 rphf_mask: Mask,
148 arabic_plan: Option<ArabicShapePlan>,
149}
150
151impl UniversalShapePlan {
152 fn new(plan: &ShapePlan) -> UniversalShapePlan {
153 let mut arabic_plan: Option = None;
154
155 if plan.script.map_or(default:false, f:has_arabic_joining) {
156 arabic_plan = Some(super::arabic::ArabicShapePlan::new(plan));
157 }
158
159 UniversalShapePlan {
160 rphf_mask: plan.ot_map.one_mask(feature_tag:feature::REPH_FORMS),
161 arabic_plan,
162 }
163 }
164}
165
166fn collect_features(planner: &mut ShapePlanner) {
167 // Do this before any lookups have been applied.
168 planner.ot_map.add_gsub_pause(Some(setup_syllables));
169
170 // Default glyph pre-processing group
171 planner
172 .ot_map
173 .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1);
174 planner.ot_map.enable_feature(
175 feature::GLYPH_COMPOSITION_DECOMPOSITION,
176 FeatureFlags::empty(),
177 1,
178 );
179 planner
180 .ot_map
181 .enable_feature(feature::NUKTA_FORMS, FeatureFlags::empty(), 1);
182 planner
183 .ot_map
184 .enable_feature(feature::AKHANDS, FeatureFlags::MANUAL_ZWJ, 1);
185
186 // Reordering group
187 planner
188 .ot_map
189 .add_gsub_pause(Some(crate::ot::clear_substitution_flags));
190 planner
191 .ot_map
192 .add_feature(feature::REPH_FORMS, FeatureFlags::MANUAL_ZWJ, 1);
193 planner.ot_map.add_gsub_pause(Some(record_rphf));
194 planner
195 .ot_map
196 .add_gsub_pause(Some(crate::ot::clear_substitution_flags));
197 planner
198 .ot_map
199 .enable_feature(feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_ZWJ, 1);
200 planner.ot_map.add_gsub_pause(Some(record_pref));
201
202 // Orthographic unit shaping group
203 for feature in BASIC_FEATURES {
204 planner
205 .ot_map
206 .enable_feature(*feature, FeatureFlags::MANUAL_ZWJ, 1);
207 }
208
209 planner.ot_map.add_gsub_pause(Some(reorder));
210 planner
211 .ot_map
212 .add_gsub_pause(Some(crate::ot::clear_syllables));
213
214 // Topographical features
215 for feature in TOPOGRAPHICAL_FEATURES {
216 planner
217 .ot_map
218 .add_feature(*feature, FeatureFlags::empty(), 1);
219 }
220 planner.ot_map.add_gsub_pause(None);
221
222 // Standard typographic presentation
223 for feature in OTHER_FEATURES {
224 planner
225 .ot_map
226 .enable_feature(*feature, FeatureFlags::empty(), 1);
227 }
228}
229
230fn setup_syllables(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
231 super::universal_machine::find_syllables(buffer);
232
233 foreach_syllable!(buffer, start, end, {
234 buffer.unsafe_to_break(start, end);
235 });
236
237 setup_rphf_mask(plan, buffer);
238 setup_topographical_masks(plan, buffer);
239}
240
241fn setup_rphf_mask(plan: &ShapePlan, buffer: &mut Buffer) {
242 let universal_plan = plan.data::<UniversalShapePlan>();
243
244 let mask = universal_plan.rphf_mask;
245 if mask == 0 {
246 return;
247 }
248
249 let mut start = 0;
250 let mut end = buffer.next_syllable(0);
251 while start < buffer.len {
252 let limit = if buffer.info[start].use_category() == category::R {
253 1
254 } else {
255 core::cmp::min(3, end - start)
256 };
257
258 for i in start..start + limit {
259 buffer.info[i].mask |= mask;
260 }
261
262 start = end;
263 end = buffer.next_syllable(start);
264 }
265}
266
267fn setup_topographical_masks(plan: &ShapePlan, buffer: &mut Buffer) {
268 use super::universal_machine::SyllableType;
269
270 if plan.data::<UniversalShapePlan>().arabic_plan.is_some() {
271 return;
272 }
273
274 let mut masks = [0; 4];
275 let mut all_masks = 0;
276 for i in 0..4 {
277 masks[i] = plan.ot_map.one_mask(TOPOGRAPHICAL_FEATURES[i]);
278 if masks[i] == plan.ot_map.global_mask() {
279 masks[i] = 0;
280 }
281
282 all_masks |= masks[i];
283 }
284
285 if all_masks == 0 {
286 return;
287 }
288
289 let other_masks = !all_masks;
290
291 let mut last_start = 0;
292 let mut last_form = None;
293 let mut start = 0;
294 let mut end = buffer.next_syllable(0);
295 while start < buffer.len {
296 let syllable = buffer.info[start].syllable() & 0x0F;
297 if syllable == SyllableType::IndependentCluster as u8
298 || syllable == SyllableType::SymbolCluster as u8
299 || syllable == SyllableType::HieroglyphCluster as u8
300 || syllable == SyllableType::NonCluster as u8
301 {
302 last_form = None;
303 } else {
304 let join = last_form == Some(JoiningForm::Terminal)
305 || last_form == Some(JoiningForm::Isolated);
306
307 if join {
308 // Fixup previous syllable's form.
309 let form = if last_form == Some(JoiningForm::Terminal) {
310 JoiningForm::Medial
311 } else {
312 JoiningForm::Initial
313 };
314
315 for i in last_start..start {
316 buffer.info[i].mask =
317 (buffer.info[i].mask & other_masks) | masks[form as usize];
318 }
319 }
320
321 // Form for this syllable.
322 let form = if join {
323 JoiningForm::Terminal
324 } else {
325 JoiningForm::Isolated
326 };
327 last_form = Some(form);
328 for i in start..end {
329 buffer.info[i].mask = (buffer.info[i].mask & other_masks) | masks[form as usize];
330 }
331 }
332
333 last_start = start;
334 start = end;
335 end = buffer.next_syllable(start);
336 }
337}
338
339fn record_rphf(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
340 let universal_plan = plan.data::<UniversalShapePlan>();
341
342 let mask = universal_plan.rphf_mask;
343 if mask == 0 {
344 return;
345 }
346
347 let mut start = 0;
348 let mut end = buffer.next_syllable(0);
349 while start < buffer.len {
350 // Mark a substituted repha as USE_R.
351 for i in start..end {
352 if buffer.info[i].mask & mask == 0 {
353 break;
354 }
355
356 if buffer.info[i].is_substituted() {
357 buffer.info[i].set_use_category(category::R);
358 break;
359 }
360 }
361
362 start = end;
363 end = buffer.next_syllable(start);
364 }
365}
366
367fn reorder(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
368 use super::universal_machine::SyllableType;
369
370 syllabic::insert_dotted_circles(
371 face,
372 buffer,
373 broken_syllable_type:SyllableType::BrokenCluster as u8,
374 dottedcircle_category:category::B,
375 repha_category:Some(category::R),
376 dottedcircle_position:None,
377 );
378
379 let mut start: usize = 0;
380 let mut end: usize = buffer.next_syllable(start:0);
381 while start < buffer.len {
382 reorder_syllable(start, end, buffer);
383 start = end;
384 end = buffer.next_syllable(start);
385 }
386}
387
388const fn category_flag(c: Category) -> u32 {
389 rb_flag(c as u32)
390}
391
392const fn category_flag64(c: Category) -> u64 {
393 rb_flag64(c as u32)
394}
395
396const BASE_FLAGS: u64 = category_flag64(category::FABV)
397 | category_flag64(category::FBLW)
398 | category_flag64(category::FPST)
399 | category_flag64(category::MABV)
400 | category_flag64(category::MBLW)
401 | category_flag64(category::MPST)
402 | category_flag64(category::MPRE)
403 | category_flag64(category::VABV)
404 | category_flag64(category::VBLW)
405 | category_flag64(category::VPST)
406 | category_flag64(category::VPRE)
407 | category_flag64(category::VMABV)
408 | category_flag64(category::VMBLW)
409 | category_flag64(category::VMPST)
410 | category_flag64(category::VMPRE);
411
412fn reorder_syllable(start: usize, end: usize, buffer: &mut Buffer) {
413 use super::universal_machine::SyllableType;
414
415 let syllable_type = (buffer.info[start].syllable() & 0x0F) as u32;
416 // Only a few syllable types need reordering.
417 if (rb_flag_unsafe(syllable_type)
418 & (rb_flag(SyllableType::ViramaTerminatedCluster as u32)
419 | rb_flag(SyllableType::SakotTerminatedCluster as u32)
420 | rb_flag(SyllableType::StandardCluster as u32)
421 | rb_flag(SyllableType::BrokenCluster as u32)
422 | 0))
423 == 0
424 {
425 return;
426 }
427
428 // Move things forward.
429 if buffer.info[start].use_category() == category::R && end - start > 1 {
430 // Got a repha. Reorder it towards the end, but before the first post-base glyph.
431 for i in start + 1..end {
432 let is_post_base_glyph =
433 (rb_flag64_unsafe(buffer.info[i].use_category() as u32) & BASE_FLAGS) != 0
434 || buffer.info[i].is_halant_use();
435
436 if is_post_base_glyph || i == end - 1 {
437 // If we hit a post-base glyph, move before it; otherwise move to the
438 // end. Shift things in between backward.
439
440 let mut i = i;
441 if is_post_base_glyph {
442 i -= 1;
443 }
444
445 buffer.merge_clusters(start, i + 1);
446 let t = buffer.info[start];
447 for k in 0..i - start {
448 buffer.info[k + start] = buffer.info[k + start + 1];
449 }
450 buffer.info[i] = t;
451
452 break;
453 }
454 }
455 }
456
457 // Move things back.
458 let mut j = start;
459 for i in start..end {
460 let flag = rb_flag_unsafe(buffer.info[i].use_category() as u32);
461 if buffer.info[i].is_halant_use() {
462 // If we hit a halant, move after it; otherwise move to the beginning, and
463 // shift things in between forward.
464 j = i + 1;
465 } else if (flag & (category_flag(category::VPRE) | category_flag(category::VMPRE))) != 0
466 && buffer.info[i].lig_comp() == 0
467 && j < i
468 {
469 // Only move the first component of a MultipleSubst.
470 buffer.merge_clusters(j, i + 1);
471 let t = buffer.info[i];
472 for k in (0..i - j).rev() {
473 buffer.info[k + j + 1] = buffer.info[k + j];
474 }
475 buffer.info[j] = t;
476 }
477 }
478}
479
480fn record_pref(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
481 let mut start: usize = 0;
482 let mut end: usize = buffer.next_syllable(start:0);
483 while start < buffer.len {
484 // Mark a substituted pref as VPre, as they behave the same way.
485 for i: usize in start..end {
486 if buffer.info[i].is_substituted() {
487 buffer.info[i].set_use_category(category::VPRE);
488 break;
489 }
490 }
491
492 start = end;
493 end = buffer.next_syllable(start);
494 }
495}
496
497fn has_arabic_joining(script: Script) -> bool {
498 // List of scripts that have data in arabic-table.
499 matches!(
500 script,
501 script::ADLAM
502 | script::ARABIC
503 | script::CHORASMIAN
504 | script::HANIFI_ROHINGYA
505 | script::MANDAIC
506 | script::MANICHAEAN
507 | script::MONGOLIAN
508 | script::NKO
509 | script::PHAGS_PA
510 | script::PSALTER_PAHLAVI
511 | script::SOGDIAN
512 | script::SYRIAC
513 )
514}
515
516fn preprocess_text(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
517 super::vowel_constraints::preprocess_text_vowel_constraints(buffer);
518}
519
520fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> {
521 // Avoid recomposing split matras.
522 if a.general_category().is_mark() {
523 return None;
524 }
525
526 crate::unicode::compose(a, b)
527}
528
529fn setup_masks(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
530 let universal_plan: &UniversalShapePlan = plan.data::<UniversalShapePlan>();
531
532 // Do this before allocating use_category().
533 if let Some(ref arabic_plan: &ArabicShapePlan) = universal_plan.arabic_plan {
534 super::arabic::setup_masks_inner(arabic_plan, plan.script, buffer);
535 }
536
537 // We cannot setup masks here. We save information about characters
538 // and setup masks later on in a pause-callback.
539 for info: &mut GlyphInfo in buffer.info_slice_mut() {
540 info.set_use_category(super::universal_table::get_category(info.glyph_id));
541 }
542}
543