1use alloc::boxed::Box;
2use core::cmp;
3use core::convert::TryFrom;
4use core::ops::Range;
5
6use ttf_parser::GlyphId;
7
8use super::*;
9use crate::buffer::Buffer;
10use crate::normalize::ShapeNormalizationMode;
11use crate::ot::{
12 feature, FeatureFlags, LayoutTable, Map, TableIndex, WouldApply, WouldApplyContext,
13};
14use crate::plan::{ShapePlan, ShapePlanner};
15use crate::unicode::{hb_gc, CharExt, GeneralCategoryExt};
16use crate::{script, Face, GlyphInfo, Mask, Script, Tag};
17
18pub const INDIC_SHAPER: ComplexShaper = ComplexShaper {
19 collect_features: Some(collect_features),
20 override_features: Some(override_features),
21 create_data: Some(|plan: &ShapePlan| Box::new(IndicShapePlan::new(plan))),
22 preprocess_text: Some(preprocess_text),
23 postprocess_glyphs: None,
24 normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit),
25 decompose: Some(decompose),
26 compose: Some(compose),
27 setup_masks: Some(setup_masks),
28 gpos_tag: None,
29 reorder_marks: None,
30 zero_width_marks: None,
31 fallback_position: false,
32};
33
34pub type Category = u8;
35pub mod category {
36 pub const X: u8 = 0;
37 pub const C: u8 = 1;
38 pub const V: u8 = 2;
39 pub const N: u8 = 3;
40 pub const H: u8 = 4;
41 pub const ZWNJ: u8 = 5;
42 pub const ZWJ: u8 = 6;
43 pub const M: u8 = 7;
44 pub const SM: u8 = 8;
45 // OT_VD = 9, UNUSED; we use OT_A instead.
46 pub const A: u8 = 10;
47 pub const PLACEHOLDER: u8 = 11;
48 pub const DOTTED_CIRCLE: u8 = 12;
49 pub const RS: u8 = 13; // Register Shifter, used in Khmer OT spec.
50 pub const COENG: u8 = 14; // Khmer-style Virama.
51 pub const REPHA: u8 = 15; // Atomically-encoded logical or visual repha.
52 pub const RA: u8 = 16;
53 pub const CM: u8 = 17; // Consonant-Medial.
54 pub const SYMBOL: u8 = 18; // Avagraha, etc that take marks (SM,A,VD).
55 pub const CS: u8 = 19;
56 pub const ROBATIC: u8 = 20;
57 pub const X_GROUP: u8 = 21;
58 pub const Y_GROUP: u8 = 22;
59 pub const MW: u8 = 23;
60 pub const MY: u8 = 24;
61 pub const PT: u8 = 25;
62 // The following are used by Khmer & Myanmar shapers. Defined here for them to share.
63 pub const V_AVB: u8 = 26;
64 pub const V_BLW: u8 = 27;
65 pub const V_PRE: u8 = 28;
66 pub const V_PST: u8 = 29;
67 pub const VS: u8 = 30; // Variation selectors
68 pub const P: u8 = 31; // Punctuation
69 pub const D: u8 = 32; // Digits except zero
70 pub const ML: u8 = 33; // Medial la
71}
72
73pub type Position = u8;
74pub mod position {
75 pub const START: u8 = 0;
76 pub const RA_TO_BECOME_REPH: u8 = 1;
77 pub const PRE_M: u8 = 2;
78 pub const PRE_C: u8 = 3;
79 pub const BASE_C: u8 = 4;
80 pub const AFTER_MAIN: u8 = 5;
81 pub const ABOVE_C: u8 = 6;
82 pub const BEFORE_SUB: u8 = 7;
83 pub const BELOW_C: u8 = 8;
84 pub const AFTER_SUB: u8 = 9;
85 pub const BEFORE_POST: u8 = 10;
86 pub const POST_C: u8 = 11;
87 pub const AFTER_POST: u8 = 12;
88 pub const FINAL_C: u8 = 13;
89 pub const SMVD: u8 = 14;
90 pub const END: u8 = 15;
91}
92
93#[allow(dead_code)]
94#[derive(Clone, Copy, PartialEq)]
95pub enum SyllabicCategory {
96 Other,
97 Avagraha,
98 Bindu,
99 BrahmiJoiningNumber,
100 CantillationMark,
101 Consonant,
102 ConsonantDead,
103 ConsonantFinal,
104 ConsonantHeadLetter,
105 ConsonantInitialPostfixed,
106 ConsonantKiller,
107 ConsonantMedial,
108 ConsonantPlaceholder,
109 ConsonantPrecedingRepha,
110 ConsonantPrefixed,
111 ConsonantSubjoined,
112 ConsonantSucceedingRepha,
113 ConsonantWithStacker,
114 GeminationMark,
115 InvisibleStacker,
116 Joiner,
117 ModifyingLetter,
118 NonJoiner,
119 Nukta,
120 Number,
121 NumberJoiner,
122 PureKiller,
123 RegisterShifter,
124 SyllableModifier,
125 ToneLetter,
126 ToneMark,
127 Virama,
128 Visarga,
129 Vowel,
130 VowelDependent,
131 VowelIndependent,
132}
133
134#[allow(dead_code)]
135#[derive(Clone, Copy)]
136pub enum MatraCategory {
137 NotApplicable,
138 Left,
139 Top,
140 Bottom,
141 Right,
142 BottomAndLeft,
143 BottomAndRight,
144 LeftAndRight,
145 TopAndBottom,
146 TopAndBottomAndRight,
147 TopAndBottomAndLeft,
148 TopAndLeft,
149 TopAndLeftAndRight,
150 TopAndRight,
151 Overstruck,
152 VisualOrderLeft,
153}
154
155const INDIC_FEATURES: &[(Tag, FeatureFlags)] = &[
156 // Basic features.
157 // These features are applied in order, one at a time, after initial_reordering,
158 // constrained to the syllable.
159 (feature::NUKTA_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
160 (feature::AKHANDS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
161 (feature::REPH_FORMS, FeatureFlags::MANUAL_JOINERS),
162 (feature::RAKAR_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
163 (feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
164 (feature::BELOW_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
165 (feature::ABOVE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
166 (feature::HALF_FORMS, FeatureFlags::MANUAL_JOINERS),
167 (feature::POST_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
168 (feature::VATTU_VARIANTS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
169 (feature::CONJUNCT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
170 // Other features.
171 // These features are applied all at once, after final_reordering, constrained
172 // to the syllable.
173 // Default Bengali font in Windows for example has intermixed
174 // lookups for init,pres,abvs,blws features.
175 (feature::INITIAL_FORMS, FeatureFlags::MANUAL_JOINERS),
176 (
177 feature::PRE_BASE_SUBSTITUTIONS,
178 FeatureFlags::GLOBAL_MANUAL_JOINERS,
179 ),
180 (
181 feature::ABOVE_BASE_SUBSTITUTIONS,
182 FeatureFlags::GLOBAL_MANUAL_JOINERS,
183 ),
184 (
185 feature::BELOW_BASE_SUBSTITUTIONS,
186 FeatureFlags::GLOBAL_MANUAL_JOINERS,
187 ),
188 (
189 feature::POST_BASE_SUBSTITUTIONS,
190 FeatureFlags::GLOBAL_MANUAL_JOINERS,
191 ),
192 (feature::HALANT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
193];
194
195// Must be in the same order as the INDIC_FEATURES array.
196#[allow(dead_code)]
197mod indic_feature {
198 pub const NUKT: usize = 0;
199 pub const AKHN: usize = 1;
200 pub const RPHF: usize = 2;
201 pub const RKRF: usize = 3;
202 pub const PREF: usize = 4;
203 pub const BLWF: usize = 5;
204 pub const ABVF: usize = 6;
205 pub const HALF: usize = 7;
206 pub const PSTF: usize = 8;
207 pub const VATU: usize = 9;
208 pub const CJCT: usize = 10;
209 pub const INIT: usize = 11;
210 pub const PRES: usize = 12;
211 pub const ABVS: usize = 13;
212 pub const BLWS: usize = 14;
213 pub const PSTS: usize = 15;
214 pub const HALN: usize = 16;
215}
216
217const fn category_flag(c: Category) -> u32 {
218 rb_flag(c as u32)
219}
220
221const MEDIAL_FLAGS: u32 = category_flag(category::CM);
222// Note:
223//
224// We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
225// cannot happen in a consonant syllable. The plus side however is, we can call the
226// consonant syllable logic from the vowel syllable function and get it all right!
227const CONSONANT_FLAGS: u32 = category_flag(category::C)
228 | category_flag(category::CS)
229 | category_flag(category::RA)
230 | MEDIAL_FLAGS
231 | category_flag(category::V)
232 | category_flag(category::PLACEHOLDER)
233 | category_flag(category::DOTTED_CIRCLE);
234const JOINER_FLAGS: u32 = category_flag(category::ZWJ) | category_flag(category::ZWNJ);
235
236// This is a hack for now. We should move this data into the main Indic table.
237// Or completely remove it and just check in the tables.
238const RA_CHARS: &[u32] = &[
239 0x0930, // Devanagari
240 0x09B0, // Bengali
241 0x09F0, // Bengali
242 0x0A30, // Gurmukhi. No Reph
243 0x0AB0, // Gujarati
244 0x0B30, // Oriya
245 0x0BB0, // Tamil. No Reph
246 0x0C30, // Telugu. Reph formed only with ZWJ
247 0x0CB0, // Kannada
248 0x0D30, // Malayalam. No Reph, Logical Repha
249 0x0DBB, // Sinhala. Reph formed only with ZWJ
250];
251
252#[derive(Clone, Copy, PartialEq)]
253enum BasePosition {
254 LastSinhala,
255 Last,
256}
257
258#[derive(Clone, Copy, PartialEq)]
259enum RephPosition {
260 AfterMain = position::AFTER_MAIN as isize,
261 BeforeSub = position::BEFORE_SUB as isize,
262 AfterSub = position::AFTER_SUB as isize,
263 BeforePost = position::BEFORE_POST as isize,
264 AfterPost = position::AFTER_POST as isize,
265}
266
267#[derive(Clone, Copy, PartialEq)]
268enum RephMode {
269 /// Reph formed out of initial Ra,H sequence.
270 Implicit,
271 /// Reph formed out of initial Ra,H,ZWJ sequence.
272 Explicit,
273 /// Encoded Repha character, needs reordering.
274 LogRepha,
275}
276
277#[derive(Clone, Copy, PartialEq)]
278enum BlwfMode {
279 /// Below-forms feature applied to pre-base and post-base.
280 PreAndPost,
281 /// Below-forms feature applied to post-base only.
282 PostOnly,
283}
284
285#[derive(Clone, Copy)]
286struct IndicConfig {
287 script: Option<Script>,
288 has_old_spec: bool,
289 virama: u32,
290 base_pos: BasePosition,
291 reph_pos: RephPosition,
292 reph_mode: RephMode,
293 blwf_mode: BlwfMode,
294}
295
296impl IndicConfig {
297 const fn new(
298 script: Option<Script>,
299 has_old_spec: bool,
300 virama: u32,
301 base_pos: BasePosition,
302 reph_pos: RephPosition,
303 reph_mode: RephMode,
304 blwf_mode: BlwfMode,
305 ) -> Self {
306 IndicConfig {
307 script,
308 has_old_spec,
309 virama,
310 base_pos,
311 reph_pos,
312 reph_mode,
313 blwf_mode,
314 }
315 }
316}
317
318const INDIC_CONFIGS: &[IndicConfig] = &[
319 IndicConfig::new(
320 script:None,
321 has_old_spec:false,
322 virama:0,
323 base_pos:BasePosition::Last,
324 reph_pos:RephPosition::BeforePost,
325 RephMode::Implicit,
326 BlwfMode::PreAndPost,
327 ),
328 IndicConfig::new(
329 script:Some(script::DEVANAGARI),
330 has_old_spec:true,
331 virama:0x094D,
332 base_pos:BasePosition::Last,
333 reph_pos:RephPosition::BeforePost,
334 RephMode::Implicit,
335 BlwfMode::PreAndPost,
336 ),
337 IndicConfig::new(
338 script:Some(script::BENGALI),
339 has_old_spec:true,
340 virama:0x09CD,
341 base_pos:BasePosition::Last,
342 reph_pos:RephPosition::AfterSub,
343 RephMode::Implicit,
344 BlwfMode::PreAndPost,
345 ),
346 IndicConfig::new(
347 script:Some(script::GURMUKHI),
348 has_old_spec:true,
349 virama:0x0A4D,
350 base_pos:BasePosition::Last,
351 reph_pos:RephPosition::BeforeSub,
352 RephMode::Implicit,
353 BlwfMode::PreAndPost,
354 ),
355 IndicConfig::new(
356 script:Some(script::GUJARATI),
357 has_old_spec:true,
358 virama:0x0ACD,
359 base_pos:BasePosition::Last,
360 reph_pos:RephPosition::BeforePost,
361 RephMode::Implicit,
362 BlwfMode::PreAndPost,
363 ),
364 IndicConfig::new(
365 script:Some(script::ORIYA),
366 has_old_spec:true,
367 virama:0x0B4D,
368 base_pos:BasePosition::Last,
369 reph_pos:RephPosition::AfterMain,
370 RephMode::Implicit,
371 BlwfMode::PreAndPost,
372 ),
373 IndicConfig::new(
374 script:Some(script::TAMIL),
375 has_old_spec:true,
376 virama:0x0BCD,
377 base_pos:BasePosition::Last,
378 reph_pos:RephPosition::AfterPost,
379 RephMode::Implicit,
380 BlwfMode::PreAndPost,
381 ),
382 IndicConfig::new(
383 script:Some(script::TELUGU),
384 has_old_spec:true,
385 virama:0x0C4D,
386 base_pos:BasePosition::Last,
387 reph_pos:RephPosition::AfterPost,
388 RephMode::Explicit,
389 BlwfMode::PostOnly,
390 ),
391 IndicConfig::new(
392 script:Some(script::KANNADA),
393 has_old_spec:true,
394 virama:0x0CCD,
395 base_pos:BasePosition::Last,
396 reph_pos:RephPosition::AfterPost,
397 RephMode::Implicit,
398 BlwfMode::PostOnly,
399 ),
400 IndicConfig::new(
401 script:Some(script::MALAYALAM),
402 has_old_spec:true,
403 virama:0x0D4D,
404 base_pos:BasePosition::Last,
405 reph_pos:RephPosition::AfterMain,
406 RephMode::LogRepha,
407 BlwfMode::PreAndPost,
408 ),
409 IndicConfig::new(
410 script:Some(script::SINHALA),
411 has_old_spec:false,
412 virama:0x0DCA,
413 base_pos:BasePosition::LastSinhala,
414 reph_pos:RephPosition::AfterPost,
415 RephMode::Explicit,
416 BlwfMode::PreAndPost,
417 ),
418];
419
420struct IndicWouldSubstituteFeature {
421 lookups: Range<usize>,
422 zero_context: bool,
423}
424
425impl IndicWouldSubstituteFeature {
426 pub fn new(map: &Map, feature_tag: Tag, zero_context: bool) -> Self {
427 IndicWouldSubstituteFeature {
428 lookups: match map.feature_stage(TableIndex::GSUB, feature_tag) {
429 Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage),
430 None => 0..0,
431 },
432 zero_context,
433 }
434 }
435
436 pub fn would_substitute(&self, map: &Map, face: &Face, glyphs: &[GlyphId]) -> bool {
437 for index in self.lookups.clone() {
438 let lookup = map.lookup(TableIndex::GSUB, index);
439 let ctx = WouldApplyContext {
440 glyphs,
441 zero_context: self.zero_context,
442 };
443 if face
444 .gsub
445 .as_ref()
446 .and_then(|table| table.get_lookup(lookup.index))
447 .map_or(false, |lookup| lookup.would_apply(&ctx))
448 {
449 return true;
450 }
451 }
452
453 false
454 }
455}
456
457struct IndicShapePlan {
458 config: IndicConfig,
459 is_old_spec: bool,
460 // virama_glyph: Option<u32>,
461 rphf: IndicWouldSubstituteFeature,
462 pref: IndicWouldSubstituteFeature,
463 blwf: IndicWouldSubstituteFeature,
464 pstf: IndicWouldSubstituteFeature,
465 vatu: IndicWouldSubstituteFeature,
466 mask_array: [Mask; INDIC_FEATURES.len()],
467}
468
469impl IndicShapePlan {
470 fn new(plan: &ShapePlan) -> Self {
471 let script = plan.script;
472 let config = if let Some(c) = INDIC_CONFIGS.iter().skip(1).find(|c| c.script == script) {
473 *c
474 } else {
475 INDIC_CONFIGS[0]
476 };
477
478 let is_old_spec = config.has_old_spec
479 && plan
480 .ot_map
481 .chosen_script(TableIndex::GSUB)
482 .map_or(true, |tag| tag.to_bytes()[3] != b'2');
483
484 // Use zero-context would_substitute() matching for new-spec of the main
485 // Indic scripts, and scripts with one spec only, but not for old-specs.
486 // The new-spec for all dual-spec scripts says zero-context matching happens.
487 //
488 // However, testing with Malayalam shows that old and new spec both allow
489 // context. Testing with Bengali new-spec however shows that it doesn't.
490 // So, the heuristic here is the way it is. It should *only* be changed,
491 // as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE.
492 let zero_context = is_old_spec && script != Some(script::MALAYALAM);
493
494 let mut mask_array = [0; INDIC_FEATURES.len()];
495 for (i, feature) in INDIC_FEATURES.iter().enumerate() {
496 mask_array[i] = if feature.1.contains(FeatureFlags::GLOBAL) {
497 0
498 } else {
499 plan.ot_map.one_mask(feature.0)
500 }
501 }
502
503 // TODO: what is this?
504 // let mut virama_glyph = None;
505 // if config.virama != 0 {
506 // if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) {
507 // virama_glyph = Some(g.0 as u32);
508 // }
509 // }
510
511 IndicShapePlan {
512 config,
513 is_old_spec,
514 // virama_glyph,
515 rphf: IndicWouldSubstituteFeature::new(&plan.ot_map, feature::REPH_FORMS, zero_context),
516 pref: IndicWouldSubstituteFeature::new(
517 &plan.ot_map,
518 feature::PRE_BASE_FORMS,
519 zero_context,
520 ),
521 blwf: IndicWouldSubstituteFeature::new(
522 &plan.ot_map,
523 feature::BELOW_BASE_FORMS,
524 zero_context,
525 ),
526 pstf: IndicWouldSubstituteFeature::new(
527 &plan.ot_map,
528 feature::POST_BASE_FORMS,
529 zero_context,
530 ),
531 vatu: IndicWouldSubstituteFeature::new(
532 &plan.ot_map,
533 feature::VATTU_VARIANTS,
534 zero_context,
535 ),
536 mask_array,
537 }
538 }
539}
540
541impl GlyphInfo {
542 pub(crate) fn indic_category(&self) -> Category {
543 self.complex_var_u8_category()
544 }
545
546 pub(crate) fn set_indic_category(&mut self, c: Category) {
547 self.set_complex_var_u8_category(c)
548 }
549
550 pub(crate) fn indic_position(&self) -> Position {
551 self.complex_var_u8_auxiliary()
552 }
553
554 pub(crate) fn set_indic_position(&mut self, c: Position) {
555 self.set_complex_var_u8_auxiliary(c)
556 }
557
558 fn is_one_of(&self, flags: u32) -> bool {
559 // If it ligated, all bets are off.
560 if self.is_ligated() {
561 return false;
562 }
563
564 rb_flag_unsafe(self.indic_category() as u32) & flags != 0
565 }
566
567 fn is_joiner(&self) -> bool {
568 self.is_one_of(JOINER_FLAGS)
569 }
570
571 pub(crate) fn is_consonant(&self) -> bool {
572 self.is_one_of(CONSONANT_FLAGS)
573 }
574
575 fn is_halant(&self) -> bool {
576 self.is_one_of(rb_flag(category::H as u32))
577 }
578
579 fn set_indic_properties(&mut self) {
580 let u = self.glyph_id;
581 let (mut cat, mut pos) = get_category_and_position(u);
582
583 // Re-assign category
584
585 // The following act more like the Bindus.
586 match u {
587 0x0953..=0x0954 => cat = category::SM,
588 // The following act like consonants.
589 0x0A72..=0x0A73 | 0x1CF5..=0x1CF6 => cat = category::C,
590 // TODO: The following should only be allowed after a Visarga.
591 // For now, just treat them like regular tone marks.
592 0x1CE2..=0x1CE8 => cat = category::A,
593 // TODO: The following should only be allowed after some of
594 // the nasalization marks, maybe only for U+1CE9..U+1CF1.
595 // For now, just treat them like tone marks.
596 0x1CED => cat = category::A,
597 // The following take marks in standalone clusters, similar to Avagraha.
598 0xA8F2..=0xA8F7 | 0x1CE9..=0x1CEC | 0x1CEE..=0x1CF1 => cat = category::SYMBOL,
599 // https://github.com/harfbuzz/harfbuzz/issues/524
600 0x0A51 => {
601 cat = category::M;
602 pos = position::BELOW_C;
603 }
604 // According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
605 // so the Indic shaper needs to know their categories.
606 0x11301 | 0x11303 => cat = category::SM,
607 0x1133B | 0x1133C => cat = category::N,
608 // https://github.com/harfbuzz/harfbuzz/issues/552
609 0x0AFB => cat = category::N,
610 // https://github.com/harfbuzz/harfbuzz/issues/2849
611 0x0B55 => cat = category::N,
612 // https://github.com/harfbuzz/harfbuzz/issues/538
613 0x0980 => cat = category::PLACEHOLDER,
614 // https://github.com/harfbuzz/harfbuzz/issues/1613
615 0x09FC => cat = category::PLACEHOLDER,
616 // https://github.com/harfbuzz/harfbuzz/issues/623
617 0x0C80 => cat = category::PLACEHOLDER,
618 0x2010 | 0x2011 => cat = category::PLACEHOLDER,
619 0x25CC => cat = category::DOTTED_CIRCLE,
620 _ => {}
621 }
622
623 // Re-assign position.
624
625 if (rb_flag_unsafe(cat as u32) & CONSONANT_FLAGS) != 0 {
626 pos = position::BASE_C;
627 if RA_CHARS.contains(&u) {
628 cat = category::RA;
629 }
630 } else if cat == category::M {
631 pos = matra_position_indic(u, pos);
632 } else if (rb_flag_unsafe(cat as u32)
633 & (category_flag(category::SM)
634 | category_flag(category::A)
635 | category_flag(category::SYMBOL)))
636 != 0
637 {
638 pos = position::SMVD;
639 }
640
641 // Oriya Bindu is BeforeSub in the spec.
642 if u == 0x0B01 {
643 pos = position::BEFORE_SUB;
644 }
645
646 self.set_indic_category(cat);
647 self.set_indic_position(pos);
648 }
649}
650
651fn collect_features(planner: &mut ShapePlanner) {
652 // Do this before any lookups have been applied.
653 planner.ot_map.add_gsub_pause(Some(setup_syllables));
654
655 planner
656 .ot_map
657 .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1);
658 // The Indic specs do not require ccmp, but we apply it here since if
659 // there is a use of it, it's typically at the beginning.
660 planner.ot_map.enable_feature(
661 feature::GLYPH_COMPOSITION_DECOMPOSITION,
662 FeatureFlags::empty(),
663 1,
664 );
665
666 planner.ot_map.add_gsub_pause(Some(initial_reordering));
667
668 for feature in INDIC_FEATURES.iter().take(10) {
669 planner.ot_map.add_feature(feature.0, feature.1, 1);
670 planner.ot_map.add_gsub_pause(None);
671 }
672
673 planner.ot_map.add_gsub_pause(Some(final_reordering));
674 planner
675 .ot_map
676 .add_gsub_pause(Some(crate::ot::clear_syllables));
677
678 for feature in INDIC_FEATURES.iter().skip(10) {
679 planner.ot_map.add_feature(feature.0, feature.1, 1);
680 }
681}
682
683fn override_features(planner: &mut ShapePlanner) {
684 planner.ot_map.disable_feature(tag:feature::STANDARD_LIGATURES);
685}
686
687fn preprocess_text(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
688 super::vowel_constraints::preprocess_text_vowel_constraints(buffer);
689}
690
691fn decompose(ctx: &ShapeNormalizeContext, ab: char) -> Option<(char, char)> {
692 // Don't decompose these.
693 match ab {
694 '\u{0931}' | // DEVANAGARI LETTER RRA
695 // https://github.com/harfbuzz/harfbuzz/issues/779
696 '\u{09DC}' | // BENGALI LETTER RRA
697 '\u{09DD}' | // BENGALI LETTER RHA
698 '\u{0B94}' => return None, // TAMIL LETTER AU
699 _ => {}
700 }
701
702 if ab == '\u{0DDA}' || ('\u{0DDC}'..='\u{0DDE}').contains(&ab) {
703 // Sinhala split matras... Let the fun begin.
704 //
705 // These four characters have Unicode decompositions. However, Uniscribe
706 // decomposes them "Khmer-style", that is, it uses the character itself to
707 // get the second half. The first half of all four decompositions is always
708 // U+0DD9.
709 //
710 // Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
711 // broken with Uniscribe. But we need to support them. As such, we only
712 // do the Uniscribe-style decomposition if the character is transformed into
713 // its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to
714 // Unicode decomposition.
715 //
716 // Note that we can't unconditionally use Unicode decomposition. That would
717 // break some other fonts, that are designed to work with Uniscribe, and
718 // don't have positioning features for the Unicode-style decomposition.
719 //
720 // Argh...
721 //
722 // The Uniscribe behavior is now documented in the newly published Sinhala
723 // spec in 2012:
724 //
725 // https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping
726
727 let mut ok = false;
728 if let Some(g) = ctx.face.glyph_index(u32::from(ab)) {
729 let indic_plan = ctx.plan.data::<IndicShapePlan>();
730 ok = indic_plan
731 .pstf
732 .would_substitute(&ctx.plan.ot_map, ctx.face, &[g]);
733 }
734
735 if ok {
736 // Ok, safe to use Uniscribe-style decomposition.
737 return Some(('\u{0DD9}', ab));
738 }
739 }
740
741 crate::unicode::decompose(ab)
742}
743
744fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> {
745 // Avoid recomposing split matras.
746 if a.general_category().is_mark() {
747 return None;
748 }
749
750 // Composition-exclusion exceptions that we want to recompose.
751 if a == '\u{09AF}' && b == '\u{09BC}' {
752 return Some('\u{09DF}');
753 }
754
755 crate::unicode::compose(a, b)
756}
757
758fn setup_masks(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
759 // We cannot setup masks here. We save information about characters
760 // and setup masks later on in a pause-callback.
761 for info: &mut GlyphInfo in buffer.info_slice_mut() {
762 info.set_indic_properties();
763 }
764}
765
766fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
767 super::indic_machine::find_syllables_indic(buffer);
768
769 let mut start: usize = 0;
770 let mut end: usize = buffer.next_syllable(start:0);
771 while start < buffer.len {
772 buffer.unsafe_to_break(start:Some(start), end:Some(end));
773 start = end;
774 end = buffer.next_syllable(start);
775 }
776}
777
778fn initial_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
779 use super::indic_machine::SyllableType;
780
781 let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>();
782
783 update_consonant_positions(plan, indic_plan, face, buffer);
784 syllabic::insert_dotted_circles(
785 face,
786 buffer,
787 broken_syllable_type:SyllableType::BrokenCluster as u8,
788 dottedcircle_category:category::DOTTED_CIRCLE,
789 repha_category:Some(category::REPHA),
790 dottedcircle_position:Some(position::END),
791 );
792
793 let mut start: usize = 0;
794 let mut end: usize = buffer.next_syllable(start:0);
795 while start < buffer.len {
796 initial_reordering_syllable(plan, indic_plan, face, start, end, buffer);
797 start = end;
798 end = buffer.next_syllable(start);
799 }
800}
801
802fn update_consonant_positions(
803 plan: &ShapePlan,
804 indic_plan: &IndicShapePlan,
805 face: &Face,
806 buffer: &mut Buffer,
807) {
808 if indic_plan.config.base_pos != BasePosition::Last {
809 return;
810 }
811
812 let mut virama_glyph: Option = None;
813 if indic_plan.config.virama != 0 {
814 virama_glyph = face.glyph_index(indic_plan.config.virama);
815 }
816
817 if let Some(virama: GlyphId) = virama_glyph {
818 for info: &mut GlyphInfo in buffer.info_slice_mut() {
819 if info.indic_position() == position::BASE_C {
820 let consonant: GlyphId = info.as_glyph();
821 info.set_indic_position(consonant_position_from_face(
822 plan, indic_plan, face, consonant, virama,
823 ));
824 }
825 }
826 }
827}
828
829fn consonant_position_from_face(
830 plan: &ShapePlan,
831 indic_plan: &IndicShapePlan,
832 face: &Face,
833 consonant: GlyphId,
834 virama: GlyphId,
835) -> u8 {
836 // For old-spec, the order of glyphs is Consonant,Virama,
837 // whereas for new-spec, it's Virama,Consonant. However,
838 // some broken fonts (like Free Sans) simply copied lookups
839 // from old-spec to new-spec without modification.
840 // And oddly enough, Uniscribe seems to respect those lookups.
841 // Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
842 // base at 0. The font however, only has lookups matching
843 // 930,94D in 'blwf', not the expected 94D,930 (with new-spec
844 // table). As such, we simply match both sequences. Seems
845 // to work.
846 //
847 // Vatu is done as well, for:
848 // https://github.com/harfbuzz/harfbuzz/issues/1587
849
850 if indic_plan
851 .blwf
852 .would_substitute(&plan.ot_map, face, &[virama, consonant])
853 || indic_plan
854 .blwf
855 .would_substitute(&plan.ot_map, face, &[consonant, virama])
856 || indic_plan
857 .vatu
858 .would_substitute(&plan.ot_map, face, &[virama, consonant])
859 || indic_plan
860 .vatu
861 .would_substitute(&plan.ot_map, face, &[consonant, virama])
862 {
863 return position::BELOW_C;
864 }
865
866 if indic_plan
867 .pstf
868 .would_substitute(&plan.ot_map, face, &[virama, consonant])
869 || indic_plan
870 .pstf
871 .would_substitute(&plan.ot_map, face, &[consonant, virama])
872 {
873 return position::POST_C;
874 }
875
876 if indic_plan
877 .pref
878 .would_substitute(&plan.ot_map, face, &[virama, consonant])
879 || indic_plan
880 .pref
881 .would_substitute(&plan.ot_map, face, &[consonant, virama])
882 {
883 return position::POST_C;
884 }
885
886 position::BASE_C
887}
888
889fn initial_reordering_syllable(
890 plan: &ShapePlan,
891 indic_plan: &IndicShapePlan,
892 face: &Face,
893 start: usize,
894 end: usize,
895 buffer: &mut Buffer,
896) {
897 use super::indic_machine::SyllableType;
898
899 let syllable_type = match buffer.info[start].syllable() & 0x0F {
900 0 => SyllableType::ConsonantSyllable,
901 1 => SyllableType::VowelSyllable,
902 2 => SyllableType::StandaloneCluster,
903 3 => SyllableType::SymbolCluster,
904 4 => SyllableType::BrokenCluster,
905 5 => SyllableType::NonIndicCluster,
906 _ => unreachable!(),
907 };
908
909 match syllable_type {
910 // We made the vowels look like consonants. So let's call the consonant logic!
911 SyllableType::VowelSyllable | SyllableType::ConsonantSyllable => {
912 initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
913 }
914 // We already inserted dotted-circles, so just call the standalone_cluster.
915 SyllableType::BrokenCluster | SyllableType::StandaloneCluster => {
916 initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer);
917 }
918 SyllableType::SymbolCluster | SyllableType::NonIndicCluster => {}
919 }
920}
921
922// Rules from:
923// https://docs.microsqoft.com/en-us/typography/script-development/devanagari */
924fn initial_reordering_consonant_syllable(
925 plan: &ShapePlan,
926 indic_plan: &IndicShapePlan,
927 face: &Face,
928 start: usize,
929 end: usize,
930 buffer: &mut Buffer,
931) {
932 // https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167
933 // For compatibility with legacy usage in Kannada,
934 // Ra+h+ZWJ must behave like Ra+ZWJ+h...
935 if buffer.script == Some(script::KANNADA)
936 && start + 3 <= end
937 && buffer.info[start].is_one_of(category_flag(category::RA))
938 && buffer.info[start + 1].is_one_of(category_flag(category::H))
939 && buffer.info[start + 2].is_one_of(category_flag(category::ZWJ))
940 {
941 buffer.merge_clusters(start + 1, start + 3);
942 buffer.info.swap(start + 1, start + 2);
943 }
944
945 // 1. Find base consonant:
946 //
947 // The shaping engine finds the base consonant of the syllable, using the
948 // following algorithm: starting from the end of the syllable, move backwards
949 // until a consonant is found that does not have a below-base or post-base
950 // form (post-base forms have to follow below-base forms), or that is not a
951 // pre-base-reordering Ra, or arrive at the first consonant. The consonant
952 // stopped at will be the base.
953 //
954 // - If the syllable starts with Ra + Halant (in a script that has Reph)
955 // and has more than one consonant, Ra is excluded from candidates for
956 // base consonants.
957
958 let mut base = end;
959 let mut has_reph = false;
960
961 {
962 // -> If the syllable starts with Ra + Halant (in a script that has Reph)
963 // and has more than one consonant, Ra is excluded from candidates for
964 // base consonants.
965 let mut limit = start;
966 if indic_plan.mask_array[indic_feature::RPHF] != 0
967 && start + 3 <= end
968 && ((indic_plan.config.reph_mode == RephMode::Implicit
969 && !buffer.info[start + 2].is_joiner())
970 || (indic_plan.config.reph_mode == RephMode::Explicit
971 && buffer.info[start + 2].indic_category() == category::ZWJ))
972 {
973 // See if it matches the 'rphf' feature.
974 let glyphs = &[
975 buffer.info[start].as_glyph(),
976 buffer.info[start + 1].as_glyph(),
977 if indic_plan.config.reph_mode == RephMode::Explicit {
978 buffer.info[start + 2].as_glyph()
979 } else {
980 GlyphId(0)
981 },
982 ];
983 if indic_plan
984 .rphf
985 .would_substitute(&plan.ot_map, face, &glyphs[0..2])
986 || (indic_plan.config.reph_mode == RephMode::Explicit
987 && indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs))
988 {
989 limit += 2;
990 while limit < end && buffer.info[limit].is_joiner() {
991 limit += 1;
992 }
993 base = start;
994 has_reph = true;
995 }
996 } else if indic_plan.config.reph_mode == RephMode::LogRepha
997 && buffer.info[start].indic_category() == category::REPHA
998 {
999 limit += 1;
1000 while limit < end && buffer.info[limit].is_joiner() {
1001 limit += 1;
1002 }
1003 base = start;
1004 has_reph = true;
1005 }
1006
1007 match indic_plan.config.base_pos {
1008 BasePosition::Last => {
1009 // -> starting from the end of the syllable, move backwards
1010 let mut i = end;
1011 let mut seen_below = false;
1012 loop {
1013 i -= 1;
1014 // -> until a consonant is found
1015 if buffer.info[i].is_consonant() {
1016 // -> that does not have a below-base or post-base form
1017 // (post-base forms have to follow below-base forms),
1018 if buffer.info[i].indic_position() != position::BELOW_C
1019 && (buffer.info[i].indic_position() != position::POST_C || seen_below)
1020 {
1021 base = i;
1022 break;
1023 }
1024 if buffer.info[i].indic_position() == position::BELOW_C {
1025 seen_below = true;
1026 }
1027
1028 // -> or that is not a pre-base-reordering Ra,
1029 //
1030 // IMPLEMENTATION NOTES:
1031 //
1032 // Our pre-base-reordering Ra's are marked position::PostC, so will be skipped
1033 // by the logic above already.
1034
1035 // -> or arrive at the first consonant. The consonant stopped at will
1036 // be the base.
1037 base = i;
1038 } else {
1039 // A ZWJ after a Halant stops the base search, and requests an explicit
1040 // half form.
1041 // A ZWJ before a Halant, requests a subjoined form instead, and hence
1042 // search continues. This is particularly important for Bengali
1043 // sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya.
1044 if start < i
1045 && buffer.info[i].indic_category() == category::ZWJ
1046 && buffer.info[i - 1].indic_category() == category::H
1047 {
1048 break;
1049 }
1050 }
1051
1052 if i <= limit {
1053 break;
1054 }
1055 }
1056 }
1057 BasePosition::LastSinhala => {
1058 // Sinhala base positioning is slightly different from main Indic, in that:
1059 // 1. Its ZWJ behavior is different,
1060 // 2. We don't need to look into the font for consonant positions.
1061
1062 if !has_reph {
1063 base = limit;
1064 }
1065
1066 // Find the last base consonant that is not blocked by ZWJ. If there is
1067 // a ZWJ right before a base consonant, that would request a subjoined form.
1068 for i in limit..end {
1069 if buffer.info[i].is_consonant() {
1070 if limit < i && buffer.info[i - 1].indic_category() == category::ZWJ {
1071 break;
1072 } else {
1073 base = i;
1074 }
1075 }
1076 }
1077
1078 // Mark all subsequent consonants as below.
1079 for i in base + 1..end {
1080 if buffer.info[i].is_consonant() {
1081 buffer.info[i].set_indic_position(position::BELOW_C);
1082 }
1083 }
1084 }
1085 }
1086
1087 // -> If the syllable starts with Ra + Halant (in a script that has Reph)
1088 // and has more than one consonant, Ra is excluded from candidates for
1089 // base consonants.
1090 //
1091 // Only do this for unforced Reph. (ie. not for Ra,H,ZWJ.
1092 if has_reph && base == start && limit - base <= 2 {
1093 // Have no other consonant, so Reph is not formed and Ra becomes base.
1094 has_reph = false;
1095 }
1096 }
1097
1098 // 2. Decompose and reorder Matras:
1099 //
1100 // Each matra and any syllable modifier sign in the syllable are moved to the
1101 // appropriate position relative to the consonant(s) in the syllable. The
1102 // shaping engine decomposes two- or three-part matras into their constituent
1103 // parts before any repositioning. Matra characters are classified by which
1104 // consonant in a conjunct they have affinity for and are reordered to the
1105 // following positions:
1106 //
1107 // - Before first half form in the syllable
1108 // - After subjoined consonants
1109 // - After post-form consonant
1110 // - After main consonant (for above marks)
1111 //
1112 // IMPLEMENTATION NOTES:
1113 //
1114 // The normalize() routine has already decomposed matras for us, so we don't
1115 // need to worry about that.
1116
1117 // 3. Reorder marks to canonical order:
1118 //
1119 // Adjacent nukta and halant or nukta and vedic sign are always repositioned
1120 // if necessary, so that the nukta is first.
1121 //
1122 // IMPLEMENTATION NOTES:
1123 //
1124 // We don't need to do this: the normalize() routine already did this for us.
1125
1126 // Reorder characters
1127
1128 for i in start..base {
1129 let pos = buffer.info[i].indic_position();
1130 buffer.info[i].set_indic_position(cmp::min(position::PRE_C, pos));
1131 }
1132
1133 if base < end {
1134 buffer.info[base].set_indic_position(position::BASE_C);
1135 }
1136
1137 // Mark final consonants. A final consonant is one appearing after a matra.
1138 // Happens in Sinhala.
1139 for i in base + 1..end {
1140 if buffer.info[i].indic_category() == category::M {
1141 for j in i + 1..end {
1142 if buffer.info[j].is_consonant() {
1143 buffer.info[j].set_indic_position(position::FINAL_C);
1144 break;
1145 }
1146 }
1147
1148 break;
1149 }
1150 }
1151
1152 // Handle beginning Ra
1153 if has_reph {
1154 buffer.info[start].set_indic_position(position::RA_TO_BECOME_REPH);
1155 }
1156
1157 // For old-style Indic script tags, move the first post-base Halant after
1158 // last consonant.
1159 //
1160 // Reports suggest that in some scripts Uniscribe does this only if there
1161 // is *not* a Halant after last consonant already. We know that is the
1162 // case for Kannada, while it reorders unconditionally in other scripts,
1163 // eg. Malayalam, Bengali, and Devanagari. We don't currently know about
1164 // other scripts, so we block Kannada.
1165 //
1166 // Kannada test case:
1167 // U+0C9A,U+0CCD,U+0C9A,U+0CCD
1168 // With some versions of Lohit Kannada.
1169 // https://bugs.freedesktop.org/show_bug.cgi?id=59118
1170 //
1171 // Malayalam test case:
1172 // U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
1173 // With lohit-ttf-20121122/Lohit-Malayalam.ttf
1174 //
1175 // Bengali test case:
1176 // U+0998,U+09CD,U+09AF,U+09CD
1177 // With Windows XP vrinda.ttf
1178 // https://github.com/harfbuzz/harfbuzz/issues/1073
1179 //
1180 // Devanagari test case:
1181 // U+091F,U+094D,U+0930,U+094D
1182 // With chandas.ttf
1183 // https://github.com/harfbuzz/harfbuzz/issues/1071
1184 if indic_plan.is_old_spec {
1185 let disallow_double_halants = buffer.script == Some(script::KANNADA);
1186 for i in base + 1..end {
1187 if buffer.info[i].indic_category() == category::H {
1188 let mut j = end - 1;
1189 while j > i {
1190 if buffer.info[j].is_consonant()
1191 || (disallow_double_halants
1192 && buffer.info[j].indic_category() == category::H)
1193 {
1194 break;
1195 }
1196
1197 j -= 1;
1198 }
1199
1200 if buffer.info[j].indic_category() != category::H && j > i {
1201 // Move Halant to after last consonant.
1202 let t = buffer.info[i];
1203 for k in 0..j - i {
1204 buffer.info[k + i] = buffer.info[k + i + 1];
1205 }
1206 buffer.info[j] = t;
1207 }
1208
1209 break;
1210 }
1211 }
1212 }
1213
1214 // Attach misc marks to previous char to move with them.
1215 {
1216 let mut last_pos = position::START;
1217 for i in start..end {
1218 let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1219 & (category_flag(category::ZWJ)
1220 | category_flag(category::ZWNJ)
1221 | category_flag(category::N)
1222 | category_flag(category::RS)
1223 | category_flag(category::CM)
1224 | category_flag(category::H))
1225 != 0;
1226 if ok {
1227 buffer.info[i].set_indic_position(last_pos);
1228
1229 if buffer.info[i].indic_category() == category::H
1230 && buffer.info[i].indic_position() == position::PRE_M
1231 {
1232 // Uniscribe doesn't move the Halant with Left Matra.
1233 // TEST: U+092B,U+093F,U+094DE
1234 // We follow. This is important for the Sinhala
1235 // U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
1236 // where U+0DD9 is a left matra and U+0DCA is the virama.
1237 // We don't want to move the virama with the left matra.
1238 // TEST: U+0D9A,U+0DDA
1239 for j in (start + 1..=i).rev() {
1240 if buffer.info[j - 1].indic_position() != position::PRE_M {
1241 let pos = buffer.info[j - 1].indic_position();
1242 buffer.info[i].set_indic_position(pos);
1243 break;
1244 }
1245 }
1246 }
1247 } else if buffer.info[i].indic_position() != position::SMVD {
1248 last_pos = buffer.info[i].indic_position();
1249 }
1250 }
1251 }
1252 // For post-base consonants let them own anything before them
1253 // since the last consonant or matra.
1254 {
1255 let mut last = base;
1256 for i in base + 1..end {
1257 if buffer.info[i].is_consonant() {
1258 for j in last + 1..i {
1259 if (buffer.info[j].indic_position() as u8) < (position::SMVD as u8) {
1260 let pos = buffer.info[i].indic_position();
1261 buffer.info[j].set_indic_position(pos);
1262 }
1263 }
1264
1265 last = i;
1266 } else if buffer.info[i].indic_category() == category::M {
1267 last = i;
1268 }
1269 }
1270 }
1271
1272 {
1273 // Use syllable() for sort accounting temporarily.
1274 let syllable = buffer.info[start].syllable();
1275 for i in start..end {
1276 buffer.info[i].set_syllable(u8::try_from(i - start).unwrap());
1277 }
1278
1279 buffer.info[start..end].sort_by(|a, b| a.indic_position().cmp(&b.indic_position()));
1280
1281 // Find base again.
1282 base = end;
1283 for i in start..end {
1284 if buffer.info[i].indic_position() == position::BASE_C {
1285 base = i;
1286 break;
1287 }
1288 }
1289 // Things are out-of-control for post base positions, they may shuffle
1290 // around like crazy. In old-spec mode, we move halants around, so in
1291 // that case merge all clusters after base. Otherwise, check the sort
1292 // order and merge as needed.
1293 // For pre-base stuff, we handle cluster issues in final reordering.
1294 //
1295 // We could use buffer->sort() for this, if there was no special
1296 // reordering of pre-base stuff happening later...
1297 // We don't want to merge_clusters all of that, which buffer->sort()
1298 // would. Here's a concrete example:
1299 //
1300 // Assume there's a pre-base consonant and explicit Halant before base,
1301 // followed by a prebase-reordering (left) Matra:
1302 //
1303 // C,H,ZWNJ,B,M
1304 //
1305 // At this point in reordering we would have:
1306 //
1307 // M,C,H,ZWNJ,B
1308 //
1309 // whereas in final reordering we will bring the Matra closer to Base:
1310 //
1311 // C,H,ZWNJ,M,B
1312 //
1313 // That's why we don't want to merge-clusters anything before the Base
1314 // at this point. But if something moved from after Base to before it,
1315 // we should merge clusters from base to them. In final-reordering, we
1316 // only move things around before base, and merge-clusters up to base.
1317 // These two merge-clusters from the two sides of base will interlock
1318 // to merge things correctly. See:
1319 // https://github.com/harfbuzz/harfbuzz/issues/2272
1320 if indic_plan.is_old_spec || end - start > 127 {
1321 buffer.merge_clusters(base, end);
1322 } else {
1323 // Note! syllable() is a one-byte field.
1324 for i in base..end {
1325 if buffer.info[i].syllable() != 255 {
1326 let mut min = i;
1327 let mut max = i;
1328 let mut j = start + buffer.info[i].syllable() as usize;
1329 while j != i {
1330 min = cmp::min(min, j);
1331 max = cmp::max(max, j);
1332 let next = start + buffer.info[j].syllable() as usize;
1333 buffer.info[j].set_syllable(255); // So we don't process j later again.
1334 j = next;
1335 }
1336
1337 buffer.merge_clusters(cmp::max(base, min), max + 1);
1338 }
1339 }
1340 }
1341
1342 // Put syllable back in.
1343 for info in &mut buffer.info[start..end] {
1344 info.set_syllable(syllable);
1345 }
1346 }
1347
1348 // Setup masks now
1349
1350 {
1351 // Reph
1352 for info in &mut buffer.info[start..end] {
1353 if info.indic_position() != position::RA_TO_BECOME_REPH {
1354 break;
1355 }
1356
1357 info.mask |= indic_plan.mask_array[indic_feature::RPHF];
1358 }
1359
1360 // Pre-base
1361 let mut mask = indic_plan.mask_array[indic_feature::HALF];
1362 if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost {
1363 mask |= indic_plan.mask_array[indic_feature::BLWF];
1364 }
1365
1366 for info in &mut buffer.info[start..base] {
1367 info.mask |= mask;
1368 }
1369
1370 // Base
1371 mask = 0;
1372 if base < end {
1373 buffer.info[base].mask |= mask;
1374 }
1375
1376 // Post-base
1377 mask = indic_plan.mask_array[indic_feature::BLWF]
1378 | indic_plan.mask_array[indic_feature::ABVF]
1379 | indic_plan.mask_array[indic_feature::PSTF];
1380 for i in base + 1..end {
1381 buffer.info[i].mask |= mask;
1382 }
1383 }
1384
1385 if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) {
1386 // Old-spec eye-lash Ra needs special handling. From the
1387 // spec:
1388 //
1389 // "The feature 'below-base form' is applied to consonants
1390 // having below-base forms and following the base consonant.
1391 // The exception is vattu, which may appear below half forms
1392 // as well as below the base glyph. The feature 'below-base
1393 // form' will be applied to all such occurrences of Ra as well."
1394 //
1395 // Test case: U+0924,U+094D,U+0930,U+094d,U+0915
1396 // with Sanskrit 2003 font.
1397 //
1398 // However, note that Ra,Halant,ZWJ is the correct way to
1399 // request eyelash form of Ra, so we wouldbn't inhibit it
1400 // in that sequence.
1401 //
1402 // Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
1403 for i in start..base.saturating_sub(1) {
1404 if buffer.info[i].indic_category() == category::RA
1405 && buffer.info[i + 1].indic_category() == category::H
1406 && (i + 2 == base || buffer.info[i + 2].indic_category() != category::ZWJ)
1407 {
1408 buffer.info[i].mask |= indic_plan.mask_array[indic_feature::BLWF];
1409 buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::BLWF];
1410 }
1411 }
1412 }
1413
1414 let pref_len = 2;
1415 if indic_plan.mask_array[indic_feature::PREF] != 0 && base + pref_len < end {
1416 // Find a Halant,Ra sequence and mark it for pre-base-reordering processing.
1417 for i in base + 1..end - pref_len + 1 {
1418 let glyphs = &[buffer.info[i + 0].as_glyph(), buffer.info[i + 1].as_glyph()];
1419 if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) {
1420 buffer.info[i + 0].mask = indic_plan.mask_array[indic_feature::PREF];
1421 buffer.info[i + 1].mask = indic_plan.mask_array[indic_feature::PREF];
1422 break;
1423 }
1424 }
1425 }
1426
1427 // Apply ZWJ/ZWNJ effects
1428 for i in start + 1..end {
1429 if buffer.info[i].is_joiner() {
1430 let non_joiner = buffer.info[i].indic_category() == category::ZWNJ;
1431 let mut j = i;
1432
1433 loop {
1434 j -= 1;
1435
1436 // ZWJ/ZWNJ should disable CJCT. They do that by simply
1437 // being there, since we don't skip them for the CJCT
1438 // feature (ie. F_MANUAL_ZWJ)
1439
1440 // A ZWNJ disables HALF.
1441 if non_joiner {
1442 buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF];
1443 }
1444
1445 if j <= start || buffer.info[j].is_consonant() {
1446 break;
1447 }
1448 }
1449 }
1450 }
1451}
1452
1453fn initial_reordering_standalone_cluster(
1454 plan: &ShapePlan,
1455 indic_plan: &IndicShapePlan,
1456 face: &Face,
1457 start: usize,
1458 end: usize,
1459 buffer: &mut Buffer,
1460) {
1461 // We treat placeholder/dotted-circle as if they are consonants, so we
1462 // should just chain. Only if not in compatibility mode that is...
1463 initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
1464}
1465
1466fn final_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
1467 if buffer.is_empty() {
1468 return;
1469 }
1470
1471 let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>();
1472
1473 let mut virama_glyph: Option = None;
1474 if indic_plan.config.virama != 0 {
1475 if let Some(g: GlyphId) = face.glyph_index(indic_plan.config.virama) {
1476 virama_glyph = Some(g.0 as u32);
1477 }
1478 }
1479
1480 let mut start: usize = 0;
1481 let mut end: usize = buffer.next_syllable(start:0);
1482 while start < buffer.len {
1483 final_reordering_impl(indic_plan, virama_glyph, start, end, buffer);
1484 start = end;
1485 end = buffer.next_syllable(start);
1486 }
1487}
1488
1489fn final_reordering_impl(
1490 plan: &IndicShapePlan,
1491 virama_glyph: Option<u32>,
1492 start: usize,
1493 end: usize,
1494 buffer: &mut Buffer,
1495) {
1496 // This function relies heavily on halant glyphs. Lots of ligation
1497 // and possibly multiple substitutions happened prior to this
1498 // phase, and that might have messed up our properties. Recover
1499 // from a particular case of that where we're fairly sure that a
1500 // class of OT_H is desired but has been lost.
1501 //
1502 // We don't call load_virama_glyph(), since we know it's already loaded.
1503 if let Some(virama_glyph) = virama_glyph {
1504 for info in &mut buffer.info[start..end] {
1505 if info.glyph_id == virama_glyph && info.is_ligated() && info.is_multiplied() {
1506 // This will make sure that this glyph passes is_halant() test.
1507 info.set_indic_category(category::H);
1508 info.clear_ligated_and_multiplied();
1509 }
1510 }
1511 }
1512
1513 // 4. Final reordering:
1514 //
1515 // After the localized forms and basic shaping forms GSUB features have been
1516 // applied (see below), the shaping engine performs some final glyph
1517 // reordering before applying all the remaining font features to the entire
1518 // syllable.
1519
1520 let mut try_pref = plan.mask_array[indic_feature::PREF] != 0;
1521
1522 let mut base = start;
1523 while base < end {
1524 if buffer.info[base].indic_position() as u32 >= position::BASE_C as u32 {
1525 if try_pref && base + 1 < end {
1526 for i in base + 1..end {
1527 if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 {
1528 if !(buffer.info[i].is_substituted()
1529 && buffer.info[i].is_ligated_and_didnt_multiply())
1530 {
1531 // Ok, this was a 'pref' candidate but didn't form any.
1532 // Base is around here...
1533 base = i;
1534 while base < end && buffer.info[base].is_halant() {
1535 base += 1;
1536 }
1537
1538 buffer.info[base].set_indic_position(position::BASE_C);
1539 try_pref = false;
1540 }
1541
1542 break;
1543 }
1544 }
1545 }
1546
1547 // For Malayalam, skip over unformed below- (but NOT post-) forms.
1548 if buffer.script == Some(script::MALAYALAM) {
1549 let mut i = base + 1;
1550 while i < end {
1551 while i < end && buffer.info[i].is_joiner() {
1552 i += 1;
1553 }
1554
1555 if i == end || !buffer.info[i].is_halant() {
1556 break;
1557 }
1558
1559 i += 1; // Skip halant.
1560
1561 while i < end && buffer.info[i].is_joiner() {
1562 i += 1;
1563 }
1564
1565 if i < end
1566 && buffer.info[i].is_consonant()
1567 && buffer.info[i].indic_position() == position::BELOW_C
1568 {
1569 base = i;
1570 buffer.info[base].set_indic_position(position::BASE_C);
1571 }
1572
1573 i += 1;
1574 }
1575 }
1576
1577 if start < base && buffer.info[base].indic_position() as u32 > position::BASE_C as u32 {
1578 base -= 1;
1579 }
1580
1581 break;
1582 }
1583
1584 base += 1;
1585 }
1586
1587 if base == end && start < base && buffer.info[base - 1].is_one_of(rb_flag(category::ZWJ as u32))
1588 {
1589 base -= 1;
1590 }
1591
1592 if base < end {
1593 while start < base
1594 && buffer.info[base]
1595 .is_one_of(rb_flag(category::N as u32) | rb_flag(category::H as u32))
1596 {
1597 base -= 1;
1598 }
1599 }
1600
1601 // - Reorder matras:
1602 //
1603 // If a pre-base matra character had been reordered before applying basic
1604 // features, the glyph can be moved closer to the main consonant based on
1605 // whether half-forms had been formed. Actual position for the matra is
1606 // defined as “after last standalone halant glyph, after initial matra
1607 // position and before the main consonant”. If ZWJ or ZWNJ follow this
1608 // halant, position is moved after it.
1609 //
1610 // IMPLEMENTATION NOTES:
1611 //
1612 // It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe
1613 // and Devanagari shows that the behavior is best described as:
1614 //
1615 // "If ZWJ follows this halant, matra is NOT repositioned after this halant.
1616 // If ZWNJ follows this halant, position is moved after it."
1617 //
1618 // Test case, with Adobe Devanagari or Nirmala UI:
1619 //
1620 // U+091F,U+094D,U+200C,U+092F,U+093F
1621 // (Matra moves to the middle, after ZWNJ.)
1622 //
1623 // U+091F,U+094D,U+200D,U+092F,U+093F
1624 // (Matra does NOT move, stays to the left.)
1625 //
1626 // https://github.com/harfbuzz/harfbuzz/issues/1070
1627
1628 // Otherwise there can't be any pre-base matra characters.
1629 if start + 1 < end && start < base {
1630 // If we lost track of base, alas, position before last thingy.
1631 let mut new_pos = if base == end { base - 2 } else { base - 1 };
1632
1633 // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1634 // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1635 // We want to position matra after them.
1636 if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) {
1637 loop {
1638 while new_pos > start
1639 && !buffer.info[new_pos]
1640 .is_one_of(rb_flag(category::M as u32) | rb_flag(category::H as u32))
1641 {
1642 new_pos -= 1;
1643 }
1644
1645 // If we found no Halant we are done.
1646 // Otherwise only proceed if the Halant does
1647 // not belong to the Matra itself!
1648 if buffer.info[new_pos].is_halant()
1649 && buffer.info[new_pos].indic_position() != position::PRE_M
1650 {
1651 if new_pos + 1 < end {
1652 // -> If ZWJ follows this halant, matra is NOT repositioned after this halant.
1653 if buffer.info[new_pos + 1].indic_category() == category::ZWJ {
1654 // Keep searching.
1655 if new_pos > start {
1656 new_pos -= 1;
1657 continue;
1658 }
1659 }
1660
1661 // -> If ZWNJ follows this halant, position is moved after it.
1662 //
1663 // IMPLEMENTATION NOTES:
1664 //
1665 // This is taken care of by the state-machine. A Halant,ZWNJ is a terminating
1666 // sequence for a consonant syllable; any pre-base matras occurring after it
1667 // will belong to the subsequent syllable.
1668 }
1669 } else {
1670 new_pos = start; // No move.
1671 }
1672
1673 break;
1674 }
1675 }
1676
1677 if start < new_pos && buffer.info[new_pos].indic_position() != position::PRE_M {
1678 // Now go see if there's actually any matras...
1679 for i in (start + 1..=new_pos).rev() {
1680 if buffer.info[i - 1].indic_position() == position::PRE_M {
1681 let old_pos = i - 1;
1682 // Shouldn't actually happen.
1683 if old_pos < base && base <= new_pos {
1684 base -= 1;
1685 }
1686
1687 let tmp = buffer.info[old_pos];
1688 for i in 0..new_pos - old_pos {
1689 buffer.info[i + old_pos] = buffer.info[i + old_pos + 1];
1690 }
1691 buffer.info[new_pos] = tmp;
1692
1693 // Note: this merge_clusters() is intentionally *after* the reordering.
1694 // Indic matra reordering is special and tricky...
1695 buffer.merge_clusters(new_pos, cmp::min(end, base + 1));
1696
1697 new_pos -= 1;
1698 }
1699 }
1700 } else {
1701 for i in start..base {
1702 if buffer.info[i].indic_position() == position::PRE_M {
1703 buffer.merge_clusters(i, cmp::min(end, base + 1));
1704 break;
1705 }
1706 }
1707 }
1708 }
1709
1710 // - Reorder reph:
1711 //
1712 // Reph’s original position is always at the beginning of the syllable,
1713 // (i.e. it is not reordered at the character reordering stage). However,
1714 // it will be reordered according to the basic-forms shaping results.
1715 // Possible positions for reph, depending on the script, are; after main,
1716 // before post-base consonant forms, and after post-base consonant forms.
1717
1718 // Two cases:
1719 //
1720 // - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
1721 // we should only move it if the sequence ligated to the repha form.
1722 //
1723 // - If repha is encoded separately and in the logical position, we should only
1724 // move it if it did NOT ligate. If it ligated, it's probably the font trying
1725 // to make it work without the reordering.
1726
1727 if start + 1 < end
1728 && buffer.info[start].indic_position() == position::RA_TO_BECOME_REPH
1729 && (buffer.info[start].indic_category() == category::REPHA)
1730 ^ buffer.info[start].is_ligated_and_didnt_multiply()
1731 {
1732 let mut new_reph_pos;
1733 loop {
1734 let reph_pos = plan.config.reph_pos;
1735
1736 // 1. If reph should be positioned after post-base consonant forms,
1737 // proceed to step 5.
1738 if reph_pos != RephPosition::AfterPost {
1739 // 2. If the reph repositioning class is not after post-base: target
1740 // position is after the first explicit halant glyph between the
1741 // first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1742 // are following this halant, position is moved after it. If such
1743 // position is found, this is the target position. Otherwise,
1744 // proceed to the next step.
1745 //
1746 // Note: in old-implementation fonts, where classifications were
1747 // fixed in shaping engine, there was no case where reph position
1748 // will be found on this step.
1749 {
1750 new_reph_pos = start + 1;
1751 while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1752 new_reph_pos += 1;
1753 }
1754
1755 if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1756 // ->If ZWJ or ZWNJ are following this halant, position is moved after it.
1757 if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1758 new_reph_pos += 1;
1759 }
1760
1761 break;
1762 }
1763 }
1764
1765 // 3. If reph should be repositioned after the main consonant: find the
1766 // first consonant not ligated with main, or find the first
1767 // consonant that is not a potential pre-base-reordering Ra.
1768 if reph_pos == RephPosition::AfterMain {
1769 new_reph_pos = base;
1770 while new_reph_pos + 1 < end
1771 && buffer.info[new_reph_pos + 1].indic_position() as u8
1772 <= position::AFTER_MAIN as u8
1773 {
1774 new_reph_pos += 1;
1775 }
1776
1777 if new_reph_pos < end {
1778 break;
1779 }
1780 }
1781
1782 // 4. If reph should be positioned before post-base consonant, find
1783 // first post-base classified consonant not ligated with main. If no
1784 // consonant is found, the target position should be before the
1785 // first matra, syllable modifier sign or vedic sign.
1786 //
1787 // This is our take on what step 4 is trying to say (and failing, BADLY).
1788 if reph_pos == RephPosition::AfterSub {
1789 new_reph_pos = base;
1790 while new_reph_pos + 1 < end
1791 && (rb_flag_unsafe(buffer.info[new_reph_pos + 1].indic_position() as u32)
1792 & (rb_flag(position::POST_C as u32)
1793 | rb_flag(position::AFTER_POST as u32)
1794 | rb_flag(position::SMVD as u32)))
1795 == 0
1796 {
1797 new_reph_pos += 1;
1798 }
1799
1800 if new_reph_pos < end {
1801 break;
1802 }
1803 }
1804 }
1805
1806 // 5. If no consonant is found in steps 3 or 4, move reph to a position
1807 // immediately before the first post-base matra, syllable modifier
1808 // sign or vedic sign that has a reordering class after the intended
1809 // reph position. For example, if the reordering position for reph
1810 // is post-main, it will skip above-base matras that also have a
1811 // post-main position.
1812 //
1813 // Copied from step 2.
1814 new_reph_pos = start + 1;
1815 while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1816 new_reph_pos += 1;
1817 }
1818
1819 if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1820 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1821 if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1822 new_reph_pos += 1;
1823 }
1824
1825 break;
1826 }
1827 // See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654
1828
1829 // 6. Otherwise, reorder reph to the end of the syllable.
1830 {
1831 new_reph_pos = end - 1;
1832 while new_reph_pos > start
1833 && buffer.info[new_reph_pos].indic_position() == position::SMVD
1834 {
1835 new_reph_pos -= 1;
1836 }
1837
1838 // If the Reph is to be ending up after a Matra,Halant sequence,
1839 // position it before that Halant so it can interact with the Matra.
1840 // However, if it's a plain Consonant,Halant we shouldn't do that.
1841 // Uniscribe doesn't do this.
1842 // TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1843 if buffer.info[new_reph_pos].is_halant() {
1844 for info in &buffer.info[base + 1..new_reph_pos] {
1845 if info.indic_category() == category::M {
1846 // Ok, got it.
1847 new_reph_pos -= 1;
1848 }
1849 }
1850 }
1851 }
1852
1853 break;
1854 }
1855
1856 // Move
1857 buffer.merge_clusters(start, new_reph_pos + 1);
1858
1859 let reph = buffer.info[start];
1860 for i in 0..new_reph_pos - start {
1861 buffer.info[i + start] = buffer.info[i + start + 1];
1862 }
1863 buffer.info[new_reph_pos] = reph;
1864
1865 if start < base && base <= new_reph_pos {
1866 base -= 1;
1867 }
1868 }
1869
1870 // - Reorder pre-base-reordering consonants:
1871 //
1872 // If a pre-base-reordering consonant is found, reorder it according to
1873 // the following rules:
1874
1875 // Otherwise there can't be any pre-base-reordering Ra.
1876 if try_pref && base + 1 < end {
1877 for i in base + 1..end {
1878 if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 {
1879 // 1. Only reorder a glyph produced by substitution during application
1880 // of the <pref> feature. (Note that a font may shape a Ra consonant with
1881 // the feature generally but block it in certain contexts.)
1882 //
1883 // Note: We just check that something got substituted. We don't check that
1884 // the <pref> feature actually did it...
1885 //
1886 // Reorder pref only if it ligated.
1887 if buffer.info[i].is_ligated_and_didnt_multiply() {
1888 // 2. Try to find a target position the same way as for pre-base matra.
1889 // If it is found, reorder pre-base consonant glyph.
1890 //
1891 // 3. If position is not found, reorder immediately before main consonant.
1892
1893 let mut new_pos = base;
1894 // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1895 // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1896 // We want to position matra after them.
1897 if buffer.script != Some(script::MALAYALAM)
1898 && buffer.script != Some(script::TAMIL)
1899 {
1900 while new_pos > start
1901 && !buffer.info[new_pos - 1].is_one_of(
1902 rb_flag(category::M as u32) | rb_flag(category::H as u32),
1903 )
1904 {
1905 new_pos -= 1;
1906 }
1907 }
1908
1909 if new_pos > start && buffer.info[new_pos - 1].is_halant() {
1910 // -> If ZWJ or ZWNJ follow this halant, position is moved after it.
1911 if new_pos < end && buffer.info[new_pos].is_joiner() {
1912 new_pos += 1;
1913 }
1914 }
1915
1916 {
1917 let old_pos = i;
1918
1919 buffer.merge_clusters(new_pos, old_pos + 1);
1920 let tmp = buffer.info[old_pos];
1921 for i in (0..=old_pos - new_pos).rev() {
1922 buffer.info[i + new_pos + 1] = buffer.info[i + new_pos];
1923 }
1924 buffer.info[new_pos] = tmp;
1925
1926 if new_pos <= base && base < old_pos {
1927 // TODO: investigate
1928 #[allow(unused_assignments)]
1929 {
1930 base += 1;
1931 }
1932 }
1933 }
1934 }
1935
1936 break;
1937 }
1938 }
1939 }
1940
1941 // Apply 'init' to the Left Matra if it's a word start.
1942 if buffer.info[start].indic_position() == position::PRE_M {
1943 if start == 0
1944 || (rb_flag_unsafe(buffer.info[start - 1].general_category().to_rb())
1945 & rb_flag_range(
1946 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
1947 hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
1948 ))
1949 == 0
1950 {
1951 buffer.info[start].mask |= plan.mask_array[indic_feature::INIT];
1952 } else {
1953 buffer.unsafe_to_break(Some(start - 1), Some(start + 1));
1954 }
1955 }
1956}
1957
1958pub fn get_category_and_position(u: u32) -> (Category, Position) {
1959 let (c1, c2) = super::indic_table::get_categories(u);
1960 let c2 = if c1 == SyllabicCategory::ConsonantMedial
1961 || c1 == SyllabicCategory::GeminationMark
1962 || c1 == SyllabicCategory::RegisterShifter
1963 || c1 == SyllabicCategory::ConsonantSucceedingRepha
1964 || c1 == SyllabicCategory::Virama
1965 || c1 == SyllabicCategory::VowelDependent
1966 || false
1967 {
1968 c2
1969 } else {
1970 MatraCategory::NotApplicable
1971 };
1972
1973 let c1 = match c1 {
1974 SyllabicCategory::Other => category::X,
1975 SyllabicCategory::Avagraha => category::SYMBOL,
1976 SyllabicCategory::Bindu => category::SM,
1977 SyllabicCategory::BrahmiJoiningNumber => category::PLACEHOLDER, // Don't care.
1978 SyllabicCategory::CantillationMark => category::A,
1979 SyllabicCategory::Consonant => category::C,
1980 SyllabicCategory::ConsonantDead => category::C,
1981 SyllabicCategory::ConsonantFinal => category::CM,
1982 SyllabicCategory::ConsonantHeadLetter => category::C,
1983 SyllabicCategory::ConsonantInitialPostfixed => category::PLACEHOLDER,
1984 SyllabicCategory::ConsonantKiller => category::M, // U+17CD only.
1985 SyllabicCategory::ConsonantMedial => category::CM,
1986 SyllabicCategory::ConsonantPlaceholder => category::PLACEHOLDER,
1987 SyllabicCategory::ConsonantPrecedingRepha => category::REPHA,
1988 SyllabicCategory::ConsonantPrefixed => category::X,
1989 SyllabicCategory::ConsonantSubjoined => category::CM,
1990 SyllabicCategory::ConsonantSucceedingRepha => category::CM,
1991 SyllabicCategory::ConsonantWithStacker => category::CS,
1992 SyllabicCategory::GeminationMark => category::SM, // https://github.com/harfbuzz/harfbuzz/issues/552
1993 SyllabicCategory::InvisibleStacker => category::COENG,
1994 SyllabicCategory::Joiner => category::ZWJ,
1995 SyllabicCategory::ModifyingLetter => category::X,
1996 SyllabicCategory::NonJoiner => category::ZWNJ,
1997 SyllabicCategory::Nukta => category::N,
1998 SyllabicCategory::Number => category::PLACEHOLDER,
1999 SyllabicCategory::NumberJoiner => category::PLACEHOLDER, // Don't care.
2000 SyllabicCategory::PureKiller => category::M,
2001 SyllabicCategory::RegisterShifter => category::RS,
2002 SyllabicCategory::SyllableModifier => category::SM,
2003 SyllabicCategory::ToneLetter => category::X,
2004 SyllabicCategory::ToneMark => category::N,
2005 SyllabicCategory::Virama => category::H,
2006 SyllabicCategory::Visarga => category::SM,
2007 SyllabicCategory::Vowel => category::V,
2008 SyllabicCategory::VowelDependent => category::M,
2009 SyllabicCategory::VowelIndependent => category::V,
2010 };
2011
2012 let c2 = match c2 {
2013 MatraCategory::NotApplicable => position::END,
2014 MatraCategory::Left => position::PRE_C,
2015 MatraCategory::Top => position::ABOVE_C,
2016 MatraCategory::Bottom => position::BELOW_C,
2017 MatraCategory::Right => position::POST_C,
2018 MatraCategory::BottomAndLeft => position::POST_C,
2019 MatraCategory::BottomAndRight => position::POST_C,
2020 MatraCategory::LeftAndRight => position::POST_C,
2021 MatraCategory::TopAndBottom => position::BELOW_C,
2022 MatraCategory::TopAndBottomAndRight => position::POST_C,
2023 MatraCategory::TopAndBottomAndLeft => position::BELOW_C,
2024 MatraCategory::TopAndLeft => position::ABOVE_C,
2025 MatraCategory::TopAndLeftAndRight => position::POST_C,
2026 MatraCategory::TopAndRight => position::POST_C,
2027 MatraCategory::Overstruck => position::AFTER_MAIN,
2028 MatraCategory::VisualOrderLeft => position::PRE_M,
2029 };
2030
2031 (c1, c2)
2032}
2033
2034#[rustfmt::skip]
2035fn matra_position_indic(u: u32, side: u8) -> u8 {
2036 #[inline] fn in_half_block(u: u32, base: u32) -> bool { u & !0x7F == base }
2037 #[inline] fn is_deva(u: u32) -> bool { in_half_block(u, 0x0900) }
2038 #[inline] fn is_beng(u: u32) -> bool { in_half_block(u, 0x0980) }
2039 #[inline] fn is_guru(u: u32) -> bool { in_half_block(u, 0x0A00) }
2040 #[inline] fn is_gujr(u: u32) -> bool { in_half_block(u, 0x0A80) }
2041 #[inline] fn is_orya(u: u32) -> bool { in_half_block(u, 0x0B00) }
2042 #[inline] fn is_taml(u: u32) -> bool { in_half_block(u, 0x0B80) }
2043 #[inline] fn is_telu(u: u32) -> bool { in_half_block(u, 0x0C00) }
2044 #[inline] fn is_knda(u: u32) -> bool { in_half_block(u, 0x0C80) }
2045 #[inline] fn is_mlym(u: u32) -> bool { in_half_block(u, 0x0D00) }
2046 #[inline] fn is_sinh(u: u32) -> bool { in_half_block(u, 0x0D80) }
2047
2048 #[inline]
2049 fn matra_pos_right(u: u32) -> Position {
2050 if is_deva(u) {
2051 position::AFTER_SUB
2052 } else if is_beng(u) {
2053 position::AFTER_POST
2054 } else if is_guru(u) {
2055 position::AFTER_POST
2056 } else if is_gujr(u) {
2057 position::AFTER_POST
2058 } else if is_orya(u) {
2059 position::AFTER_POST
2060 } else if is_taml(u) {
2061 position::AFTER_POST
2062 } else if is_telu(u) {
2063 if u <= 0x0C42 {
2064 position::BEFORE_SUB
2065 } else {
2066 position::AFTER_SUB
2067 }
2068 } else if is_knda(u) {
2069 if u < 0x0CC3 || u > 0xCD6 {
2070 position::BEFORE_SUB
2071 } else {
2072 position::AFTER_SUB
2073 }
2074 } else if is_mlym(u) {
2075 position::AFTER_POST
2076 } else if is_sinh(u) {
2077 position::AFTER_SUB
2078 } else {
2079 position::AFTER_SUB
2080 }
2081 }
2082
2083 // BENG and MLYM don't have top matras.
2084 #[inline]
2085 fn matra_pos_top(u: u32) -> Position {
2086 if is_deva(u) {
2087 position::AFTER_SUB
2088 } else if is_guru(u) {
2089 // Deviate from spec
2090 position::AFTER_POST
2091 } else if is_gujr(u) {
2092 position::AFTER_SUB
2093 } else if is_orya(u) {
2094 position::AFTER_MAIN
2095 } else if is_taml(u) {
2096 position::AFTER_SUB
2097 } else if is_telu(u) {
2098 position::BEFORE_SUB
2099 } else if is_knda(u) {
2100 position::BEFORE_SUB
2101 } else if is_sinh(u) {
2102 position::AFTER_SUB
2103 } else {
2104 position::AFTER_SUB
2105 }
2106 }
2107
2108 #[inline]
2109 fn matra_pos_bottom(u: u32) -> Position {
2110 if is_deva(u) {
2111 position::AFTER_SUB
2112 } else if is_beng(u) {
2113 position::AFTER_SUB
2114 } else if is_guru(u) {
2115 position::AFTER_POST
2116 } else if is_gujr(u) {
2117 position::AFTER_POST
2118 } else if is_orya(u) {
2119 position::AFTER_SUB
2120 } else if is_taml(u) {
2121 position::AFTER_POST
2122 } else if is_telu(u) {
2123 position::BEFORE_SUB
2124 } else if is_knda(u) {
2125 position::BEFORE_SUB
2126 } else if is_mlym(u) {
2127 position::AFTER_POST
2128 } else if is_sinh(u) {
2129 position::AFTER_SUB
2130 } else {
2131 position::AFTER_SUB
2132 }
2133 }
2134
2135 match side {
2136 position::PRE_C => position::PRE_M,
2137 position::POST_C => matra_pos_right(u),
2138 position::ABOVE_C => matra_pos_top(u),
2139 position::BELOW_C => matra_pos_bottom(u),
2140 _ => side,
2141 }
2142}
2143