1use alloc::boxed::Box;
2use core::cmp;
3use core::convert::TryFrom;
4use core::ops::Range;
5
6use ttf_parser::GlyphId;
7
8use super::*;
9use crate::buffer::Buffer;
10use crate::normalize::ShapeNormalizationMode;
11use crate::ot::{
12 feature, FeatureFlags, LayoutTable, Map, TableIndex, WouldApply, WouldApplyContext,
13};
14use crate::plan::{ShapePlan, ShapePlanner};
15use crate::unicode::{hb_gc, CharExt, GeneralCategoryExt};
16use crate::{script, Face, GlyphInfo, Mask, Script, Tag};
17
18pub const INDIC_SHAPER: ComplexShaper = ComplexShaper {
19 collect_features: Some(collect_features),
20 override_features: Some(override_features),
21 create_data: Some(|plan: &ShapePlan| Box::new(IndicShapePlan::new(plan))),
22 preprocess_text: Some(preprocess_text),
23 postprocess_glyphs: None,
24 normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit),
25 decompose: Some(decompose),
26 compose: Some(compose),
27 setup_masks: Some(setup_masks),
28 gpos_tag: None,
29 reorder_marks: None,
30 zero_width_marks: None,
31 fallback_position: false,
32};
33
34pub type Category = u8;
35pub mod category {
36 pub const X: u8 = 0;
37 pub const C: u8 = 1;
38 pub const V: u8 = 2;
39 pub const N: u8 = 3;
40 pub const H: u8 = 4;
41 pub const ZWNJ: u8 = 5;
42 pub const ZWJ: u8 = 6;
43 pub const M: u8 = 7;
44 pub const SM: u8 = 8;
45 // OT_VD = 9, UNUSED; we use OT_A instead.
46 pub const A: u8 = 10;
47 pub const PLACEHOLDER: u8 = 11;
48 pub const DOTTED_CIRCLE: u8 = 12;
49 pub const RS: u8 = 13; // Register Shifter, used in Khmer OT spec.
50 pub const COENG: u8 = 14; // Khmer-style Virama.
51 pub const REPHA: u8 = 15; // Atomically-encoded logical or visual repha.
52 pub const RA: u8 = 16;
53 pub const CM: u8 = 17; // Consonant-Medial.
54 pub const SYMBOL: u8 = 18; // Avagraha, etc that take marks (SM,A,VD).
55 pub const CS: u8 = 19;
56 pub const ROBATIC: u8 = 20;
57 pub const X_GROUP: u8 = 21;
58 pub const Y_GROUP: u8 = 22;
59 pub const MW: u8 = 23;
60 pub const MY: u8 = 24;
61 pub const PT: u8 = 25;
62 // The following are used by Khmer & Myanmar shapers. Defined here for them to share.
63 pub const V_AVB: u8 = 26;
64 pub const V_BLW: u8 = 27;
65 pub const V_PRE: u8 = 28;
66 pub const V_PST: u8 = 29;
67 pub const VS: u8 = 30; // Variation selectors
68 pub const P: u8 = 31; // Punctuation
69 pub const D: u8 = 32; // Digits except zero
70}
71
72pub type Position = u8;
73pub mod position {
74 pub const START: u8 = 0;
75 pub const RA_TO_BECOME_REPH: u8 = 1;
76 pub const PRE_M: u8 = 2;
77 pub const PRE_C: u8 = 3;
78 pub const BASE_C: u8 = 4;
79 pub const AFTER_MAIN: u8 = 5;
80 pub const ABOVE_C: u8 = 6;
81 pub const BEFORE_SUB: u8 = 7;
82 pub const BELOW_C: u8 = 8;
83 pub const AFTER_SUB: u8 = 9;
84 pub const BEFORE_POST: u8 = 10;
85 pub const POST_C: u8 = 11;
86 pub const AFTER_POST: u8 = 12;
87 pub const FINAL_C: u8 = 13;
88 pub const SMVD: u8 = 14;
89 pub const END: u8 = 15;
90}
91
92#[allow(dead_code)]
93#[derive(Clone, Copy, PartialEq)]
94pub enum SyllabicCategory {
95 Other,
96 Avagraha,
97 Bindu,
98 BrahmiJoiningNumber,
99 CantillationMark,
100 Consonant,
101 ConsonantDead,
102 ConsonantFinal,
103 ConsonantHeadLetter,
104 ConsonantInitialPostfixed,
105 ConsonantKiller,
106 ConsonantMedial,
107 ConsonantPlaceholder,
108 ConsonantPrecedingRepha,
109 ConsonantPrefixed,
110 ConsonantSubjoined,
111 ConsonantSucceedingRepha,
112 ConsonantWithStacker,
113 GeminationMark,
114 InvisibleStacker,
115 Joiner,
116 ModifyingLetter,
117 NonJoiner,
118 Nukta,
119 Number,
120 NumberJoiner,
121 PureKiller,
122 RegisterShifter,
123 SyllableModifier,
124 ToneLetter,
125 ToneMark,
126 Virama,
127 Visarga,
128 Vowel,
129 VowelDependent,
130 VowelIndependent,
131}
132
133#[allow(dead_code)]
134#[derive(Clone, Copy)]
135pub enum MatraCategory {
136 NotApplicable,
137 Left,
138 Top,
139 Bottom,
140 Right,
141 BottomAndLeft,
142 BottomAndRight,
143 LeftAndRight,
144 TopAndBottom,
145 TopAndBottomAndRight,
146 TopAndLeft,
147 TopAndLeftAndRight,
148 TopAndRight,
149 Overstruck,
150 VisualOrderLeft,
151}
152
153const INDIC_FEATURES: &[(Tag, FeatureFlags)] = &[
154 // Basic features.
155 // These features are applied in order, one at a time, after initial_reordering,
156 // constrained to the syllable.
157 (feature::NUKTA_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
158 (feature::AKHANDS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
159 (feature::REPH_FORMS, FeatureFlags::MANUAL_JOINERS),
160 (feature::RAKAR_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
161 (feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
162 (feature::BELOW_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
163 (feature::ABOVE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
164 (feature::HALF_FORMS, FeatureFlags::MANUAL_JOINERS),
165 (feature::POST_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
166 (feature::VATTU_VARIANTS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
167 (feature::CONJUNCT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
168 // Other features.
169 // These features are applied all at once, after final_reordering, constrained
170 // to the syllable.
171 // Default Bengali font in Windows for example has intermixed
172 // lookups for init,pres,abvs,blws features.
173 (feature::INITIAL_FORMS, FeatureFlags::MANUAL_JOINERS),
174 (
175 feature::PRE_BASE_SUBSTITUTIONS,
176 FeatureFlags::GLOBAL_MANUAL_JOINERS,
177 ),
178 (
179 feature::ABOVE_BASE_SUBSTITUTIONS,
180 FeatureFlags::GLOBAL_MANUAL_JOINERS,
181 ),
182 (
183 feature::BELOW_BASE_SUBSTITUTIONS,
184 FeatureFlags::GLOBAL_MANUAL_JOINERS,
185 ),
186 (
187 feature::POST_BASE_SUBSTITUTIONS,
188 FeatureFlags::GLOBAL_MANUAL_JOINERS,
189 ),
190 (feature::HALANT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
191];
192
193// Must be in the same order as the INDIC_FEATURES array.
194#[allow(dead_code)]
195mod indic_feature {
196 pub const NUKT: usize = 0;
197 pub const AKHN: usize = 1;
198 pub const RPHF: usize = 2;
199 pub const RKRF: usize = 3;
200 pub const PREF: usize = 4;
201 pub const BLWF: usize = 5;
202 pub const ABVF: usize = 6;
203 pub const HALF: usize = 7;
204 pub const PSTF: usize = 8;
205 pub const VATU: usize = 9;
206 pub const CJCT: usize = 10;
207 pub const INIT: usize = 11;
208 pub const PRES: usize = 12;
209 pub const ABVS: usize = 13;
210 pub const BLWS: usize = 14;
211 pub const PSTS: usize = 15;
212 pub const HALN: usize = 16;
213}
214
215const fn category_flag(c: Category) -> u32 {
216 rb_flag(c as u32)
217}
218
219const MEDIAL_FLAGS: u32 = category_flag(category::CM);
220// Note:
221//
222// We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
223// cannot happen in a consonant syllable. The plus side however is, we can call the
224// consonant syllable logic from the vowel syllable function and get it all right!
225const CONSONANT_FLAGS: u32 = category_flag(category::C)
226 | category_flag(category::CS)
227 | category_flag(category::RA)
228 | MEDIAL_FLAGS
229 | category_flag(category::V)
230 | category_flag(category::PLACEHOLDER)
231 | category_flag(category::DOTTED_CIRCLE);
232const JOINER_FLAGS: u32 = category_flag(category::ZWJ) | category_flag(category::ZWNJ);
233
234// This is a hack for now. We should move this data into the main Indic table.
235// Or completely remove it and just check in the tables.
236const RA_CHARS: &[u32] = &[
237 0x0930, // Devanagari
238 0x09B0, // Bengali
239 0x09F0, // Bengali
240 0x0A30, // Gurmukhi. No Reph
241 0x0AB0, // Gujarati
242 0x0B30, // Oriya
243 0x0BB0, // Tamil. No Reph
244 0x0C30, // Telugu. Reph formed only with ZWJ
245 0x0CB0, // Kannada
246 0x0D30, // Malayalam. No Reph, Logical Repha
247 0x0DBB, // Sinhala. Reph formed only with ZWJ
248];
249
250#[derive(Clone, Copy, PartialEq)]
251enum BasePosition {
252 LastSinhala,
253 Last,
254}
255
256#[derive(Clone, Copy, PartialEq)]
257enum RephPosition {
258 AfterMain = position::AFTER_MAIN as isize,
259 BeforeSub = position::BEFORE_SUB as isize,
260 AfterSub = position::AFTER_SUB as isize,
261 BeforePost = position::BEFORE_POST as isize,
262 AfterPost = position::AFTER_POST as isize,
263}
264
265#[derive(Clone, Copy, PartialEq)]
266enum RephMode {
267 /// Reph formed out of initial Ra,H sequence.
268 Implicit,
269 /// Reph formed out of initial Ra,H,ZWJ sequence.
270 Explicit,
271 /// Encoded Repha character, needs reordering.
272 LogRepha,
273}
274
275#[derive(Clone, Copy, PartialEq)]
276enum BlwfMode {
277 /// Below-forms feature applied to pre-base and post-base.
278 PreAndPost,
279 /// Below-forms feature applied to post-base only.
280 PostOnly,
281}
282
283#[derive(Clone, Copy)]
284struct IndicConfig {
285 script: Option<Script>,
286 has_old_spec: bool,
287 virama: u32,
288 base_pos: BasePosition,
289 reph_pos: RephPosition,
290 reph_mode: RephMode,
291 blwf_mode: BlwfMode,
292}
293
294impl IndicConfig {
295 const fn new(
296 script: Option<Script>,
297 has_old_spec: bool,
298 virama: u32,
299 base_pos: BasePosition,
300 reph_pos: RephPosition,
301 reph_mode: RephMode,
302 blwf_mode: BlwfMode,
303 ) -> Self {
304 IndicConfig {
305 script,
306 has_old_spec,
307 virama,
308 base_pos,
309 reph_pos,
310 reph_mode,
311 blwf_mode,
312 }
313 }
314}
315
316const INDIC_CONFIGS: &[IndicConfig] = &[
317 IndicConfig::new(
318 script:None,
319 has_old_spec:false,
320 virama:0,
321 base_pos:BasePosition::Last,
322 reph_pos:RephPosition::BeforePost,
323 RephMode::Implicit,
324 BlwfMode::PreAndPost,
325 ),
326 IndicConfig::new(
327 script:Some(script::DEVANAGARI),
328 has_old_spec:true,
329 virama:0x094D,
330 base_pos:BasePosition::Last,
331 reph_pos:RephPosition::BeforePost,
332 RephMode::Implicit,
333 BlwfMode::PreAndPost,
334 ),
335 IndicConfig::new(
336 script:Some(script::BENGALI),
337 has_old_spec:true,
338 virama:0x09CD,
339 base_pos:BasePosition::Last,
340 reph_pos:RephPosition::AfterSub,
341 RephMode::Implicit,
342 BlwfMode::PreAndPost,
343 ),
344 IndicConfig::new(
345 script:Some(script::GURMUKHI),
346 has_old_spec:true,
347 virama:0x0A4D,
348 base_pos:BasePosition::Last,
349 reph_pos:RephPosition::BeforeSub,
350 RephMode::Implicit,
351 BlwfMode::PreAndPost,
352 ),
353 IndicConfig::new(
354 script:Some(script::GUJARATI),
355 has_old_spec:true,
356 virama:0x0ACD,
357 base_pos:BasePosition::Last,
358 reph_pos:RephPosition::BeforePost,
359 RephMode::Implicit,
360 BlwfMode::PreAndPost,
361 ),
362 IndicConfig::new(
363 script:Some(script::ORIYA),
364 has_old_spec:true,
365 virama:0x0B4D,
366 base_pos:BasePosition::Last,
367 reph_pos:RephPosition::AfterMain,
368 RephMode::Implicit,
369 BlwfMode::PreAndPost,
370 ),
371 IndicConfig::new(
372 script:Some(script::TAMIL),
373 has_old_spec:true,
374 virama:0x0BCD,
375 base_pos:BasePosition::Last,
376 reph_pos:RephPosition::AfterPost,
377 RephMode::Implicit,
378 BlwfMode::PreAndPost,
379 ),
380 IndicConfig::new(
381 script:Some(script::TELUGU),
382 has_old_spec:true,
383 virama:0x0C4D,
384 base_pos:BasePosition::Last,
385 reph_pos:RephPosition::AfterPost,
386 RephMode::Explicit,
387 BlwfMode::PostOnly,
388 ),
389 IndicConfig::new(
390 script:Some(script::KANNADA),
391 has_old_spec:true,
392 virama:0x0CCD,
393 base_pos:BasePosition::Last,
394 reph_pos:RephPosition::AfterPost,
395 RephMode::Implicit,
396 BlwfMode::PostOnly,
397 ),
398 IndicConfig::new(
399 script:Some(script::MALAYALAM),
400 has_old_spec:true,
401 virama:0x0D4D,
402 base_pos:BasePosition::Last,
403 reph_pos:RephPosition::AfterMain,
404 RephMode::LogRepha,
405 BlwfMode::PreAndPost,
406 ),
407 IndicConfig::new(
408 script:Some(script::SINHALA),
409 has_old_spec:false,
410 virama:0x0DCA,
411 base_pos:BasePosition::LastSinhala,
412 reph_pos:RephPosition::AfterPost,
413 RephMode::Explicit,
414 BlwfMode::PreAndPost,
415 ),
416];
417
418struct IndicWouldSubstituteFeature {
419 lookups: Range<usize>,
420 zero_context: bool,
421}
422
423impl IndicWouldSubstituteFeature {
424 pub fn new(map: &Map, feature_tag: Tag, zero_context: bool) -> Self {
425 IndicWouldSubstituteFeature {
426 lookups: match map.feature_stage(TableIndex::GSUB, feature_tag) {
427 Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage),
428 None => 0..0,
429 },
430 zero_context,
431 }
432 }
433
434 pub fn would_substitute(&self, map: &Map, face: &Face, glyphs: &[GlyphId]) -> bool {
435 for index in self.lookups.clone() {
436 let lookup = map.lookup(TableIndex::GSUB, index);
437 let ctx = WouldApplyContext {
438 glyphs,
439 zero_context: self.zero_context,
440 };
441 if face
442 .gsub
443 .as_ref()
444 .and_then(|table| table.get_lookup(lookup.index))
445 .map_or(false, |lookup| lookup.would_apply(&ctx))
446 {
447 return true;
448 }
449 }
450
451 false
452 }
453}
454
455struct IndicShapePlan {
456 config: IndicConfig,
457 is_old_spec: bool,
458 // virama_glyph: Option<u32>,
459 rphf: IndicWouldSubstituteFeature,
460 pref: IndicWouldSubstituteFeature,
461 blwf: IndicWouldSubstituteFeature,
462 pstf: IndicWouldSubstituteFeature,
463 vatu: IndicWouldSubstituteFeature,
464 mask_array: [Mask; INDIC_FEATURES.len()],
465}
466
467impl IndicShapePlan {
468 fn new(plan: &ShapePlan) -> Self {
469 let script = plan.script;
470 let config = if let Some(c) = INDIC_CONFIGS.iter().skip(1).find(|c| c.script == script) {
471 *c
472 } else {
473 INDIC_CONFIGS[0]
474 };
475
476 let is_old_spec = config.has_old_spec
477 && plan
478 .ot_map
479 .chosen_script(TableIndex::GSUB)
480 .map_or(true, |tag| tag.to_bytes()[3] != b'2');
481
482 // Use zero-context would_substitute() matching for new-spec of the main
483 // Indic scripts, and scripts with one spec only, but not for old-specs.
484 // The new-spec for all dual-spec scripts says zero-context matching happens.
485 //
486 // However, testing with Malayalam shows that old and new spec both allow
487 // context. Testing with Bengali new-spec however shows that it doesn't.
488 // So, the heuristic here is the way it is. It should *only* be changed,
489 // as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE.
490 let zero_context = is_old_spec && script != Some(script::MALAYALAM);
491
492 let mut mask_array = [0; INDIC_FEATURES.len()];
493 for (i, feature) in INDIC_FEATURES.iter().enumerate() {
494 mask_array[i] = if feature.1.contains(FeatureFlags::GLOBAL) {
495 0
496 } else {
497 plan.ot_map.one_mask(feature.0)
498 }
499 }
500
501 // TODO: what is this?
502 // let mut virama_glyph = None;
503 // if config.virama != 0 {
504 // if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) {
505 // virama_glyph = Some(g.0 as u32);
506 // }
507 // }
508
509 IndicShapePlan {
510 config,
511 is_old_spec,
512 // virama_glyph,
513 rphf: IndicWouldSubstituteFeature::new(&plan.ot_map, feature::REPH_FORMS, zero_context),
514 pref: IndicWouldSubstituteFeature::new(
515 &plan.ot_map,
516 feature::PRE_BASE_FORMS,
517 zero_context,
518 ),
519 blwf: IndicWouldSubstituteFeature::new(
520 &plan.ot_map,
521 feature::BELOW_BASE_FORMS,
522 zero_context,
523 ),
524 pstf: IndicWouldSubstituteFeature::new(
525 &plan.ot_map,
526 feature::POST_BASE_FORMS,
527 zero_context,
528 ),
529 vatu: IndicWouldSubstituteFeature::new(
530 &plan.ot_map,
531 feature::VATTU_VARIANTS,
532 zero_context,
533 ),
534 mask_array,
535 }
536 }
537}
538
539impl GlyphInfo {
540 pub(crate) fn indic_category(&self) -> Category {
541 self.complex_var_u8_category()
542 }
543
544 pub(crate) fn set_indic_category(&mut self, c: Category) {
545 self.set_complex_var_u8_category(c)
546 }
547
548 pub(crate) fn indic_position(&self) -> Position {
549 self.complex_var_u8_auxiliary()
550 }
551
552 pub(crate) fn set_indic_position(&mut self, c: Position) {
553 self.set_complex_var_u8_auxiliary(c)
554 }
555
556 fn is_one_of(&self, flags: u32) -> bool {
557 // If it ligated, all bets are off.
558 if self.is_ligated() {
559 return false;
560 }
561
562 rb_flag_unsafe(self.indic_category() as u32) & flags != 0
563 }
564
565 fn is_joiner(&self) -> bool {
566 self.is_one_of(JOINER_FLAGS)
567 }
568
569 pub(crate) fn is_consonant(&self) -> bool {
570 self.is_one_of(CONSONANT_FLAGS)
571 }
572
573 fn is_halant(&self) -> bool {
574 self.is_one_of(rb_flag(category::H as u32))
575 }
576
577 fn set_indic_properties(&mut self) {
578 let u = self.glyph_id;
579 let (mut cat, mut pos) = get_category_and_position(u);
580
581 // Re-assign category
582
583 // The following act more like the Bindus.
584 match u {
585 0x0953..=0x0954 => cat = category::SM,
586 // The following act like consonants.
587 0x0A72..=0x0A73 | 0x1CF5..=0x1CF6 => cat = category::C,
588 // TODO: The following should only be allowed after a Visarga.
589 // For now, just treat them like regular tone marks.
590 0x1CE2..=0x1CE8 => cat = category::A,
591 // TODO: The following should only be allowed after some of
592 // the nasalization marks, maybe only for U+1CE9..U+1CF1.
593 // For now, just treat them like tone marks.
594 0x1CED => cat = category::A,
595 // The following take marks in standalone clusters, similar to Avagraha.
596 0xA8F2..=0xA8F7 | 0x1CE9..=0x1CEC | 0x1CEE..=0x1CF1 => cat = category::SYMBOL,
597 // https://github.com/harfbuzz/harfbuzz/issues/524
598 0x0A51 => {
599 cat = category::M;
600 pos = position::BELOW_C;
601 }
602 // According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
603 // so the Indic shaper needs to know their categories.
604 0x11301 | 0x11303 => cat = category::SM,
605 0x1133B | 0x1133C => cat = category::N,
606 // https://github.com/harfbuzz/harfbuzz/issues/552
607 0x0AFB => cat = category::N,
608 // https://github.com/harfbuzz/harfbuzz/issues/2849
609 0x0B55 => cat = category::N,
610 // https://github.com/harfbuzz/harfbuzz/issues/538
611 0x0980 => cat = category::PLACEHOLDER,
612 // https://github.com/harfbuzz/harfbuzz/issues/1613
613 0x09FC => cat = category::PLACEHOLDER,
614 // https://github.com/harfbuzz/harfbuzz/issues/623
615 0x0C80 => cat = category::PLACEHOLDER,
616 0x2010 | 0x2011 => cat = category::PLACEHOLDER,
617 0x25CC => cat = category::DOTTED_CIRCLE,
618 _ => {}
619 }
620
621 // Re-assign position.
622
623 if (rb_flag_unsafe(cat as u32) & CONSONANT_FLAGS) != 0 {
624 pos = position::BASE_C;
625 if RA_CHARS.contains(&u) {
626 cat = category::RA;
627 }
628 } else if cat == category::M {
629 pos = matra_position_indic(u, pos);
630 } else if (rb_flag_unsafe(cat as u32)
631 & (category_flag(category::SM)
632 | category_flag(category::A)
633 | category_flag(category::SYMBOL)))
634 != 0
635 {
636 pos = position::SMVD;
637 }
638
639 // Oriya Bindu is BeforeSub in the spec.
640 if u == 0x0B01 {
641 pos = position::BEFORE_SUB;
642 }
643
644 self.set_indic_category(cat);
645 self.set_indic_position(pos);
646 }
647}
648
649fn collect_features(planner: &mut ShapePlanner) {
650 // Do this before any lookups have been applied.
651 planner.ot_map.add_gsub_pause(Some(setup_syllables));
652
653 planner
654 .ot_map
655 .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1);
656 // The Indic specs do not require ccmp, but we apply it here since if
657 // there is a use of it, it's typically at the beginning.
658 planner.ot_map.enable_feature(
659 feature::GLYPH_COMPOSITION_DECOMPOSITION,
660 FeatureFlags::empty(),
661 1,
662 );
663
664 planner.ot_map.add_gsub_pause(Some(initial_reordering));
665
666 for feature in INDIC_FEATURES.iter().take(10) {
667 planner.ot_map.add_feature(feature.0, feature.1, 1);
668 planner.ot_map.add_gsub_pause(None);
669 }
670
671 planner.ot_map.add_gsub_pause(Some(final_reordering));
672
673 for feature in INDIC_FEATURES.iter().skip(10) {
674 planner.ot_map.add_feature(feature.0, feature.1, 1);
675 }
676
677 planner
678 .ot_map
679 .enable_feature(feature::CONTEXTUAL_ALTERNATES, FeatureFlags::empty(), 1);
680 planner
681 .ot_map
682 .enable_feature(feature::CONTEXTUAL_LIGATURES, FeatureFlags::empty(), 1);
683
684 planner
685 .ot_map
686 .add_gsub_pause(Some(crate::ot::clear_syllables));
687}
688
689fn override_features(planner: &mut ShapePlanner) {
690 planner.ot_map.disable_feature(tag:feature::STANDARD_LIGATURES);
691}
692
693fn preprocess_text(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
694 super::vowel_constraints::preprocess_text_vowel_constraints(buffer);
695}
696
697fn decompose(ctx: &ShapeNormalizeContext, ab: char) -> Option<(char, char)> {
698 // Don't decompose these.
699 match ab {
700 '\u{0931}' | // DEVANAGARI LETTER RRA
701 // https://github.com/harfbuzz/harfbuzz/issues/779
702 '\u{09DC}' | // BENGALI LETTER RRA
703 '\u{09DD}' | // BENGALI LETTER RHA
704 '\u{0B94}' => return None, // TAMIL LETTER AU
705 _ => {}
706 }
707
708 if ab == '\u{0DDA}' || ('\u{0DDC}'..='\u{0DDE}').contains(&ab) {
709 // Sinhala split matras... Let the fun begin.
710 //
711 // These four characters have Unicode decompositions. However, Uniscribe
712 // decomposes them "Khmer-style", that is, it uses the character itself to
713 // get the second half. The first half of all four decompositions is always
714 // U+0DD9.
715 //
716 // Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
717 // broken with Uniscribe. But we need to support them. As such, we only
718 // do the Uniscribe-style decomposition if the character is transformed into
719 // its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to
720 // Unicode decomposition.
721 //
722 // Note that we can't unconditionally use Unicode decomposition. That would
723 // break some other fonts, that are designed to work with Uniscribe, and
724 // don't have positioning features for the Unicode-style decomposition.
725 //
726 // Argh...
727 //
728 // The Uniscribe behavior is now documented in the newly published Sinhala
729 // spec in 2012:
730 //
731 // https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping
732
733 let mut ok = false;
734 if let Some(g) = ctx.face.glyph_index(u32::from(ab)) {
735 let indic_plan = ctx.plan.data::<IndicShapePlan>();
736 ok = indic_plan
737 .pstf
738 .would_substitute(&ctx.plan.ot_map, ctx.face, &[g]);
739 }
740
741 if ok {
742 // Ok, safe to use Uniscribe-style decomposition.
743 return Some(('\u{0DD9}', ab));
744 }
745 }
746
747 crate::unicode::decompose(ab)
748}
749
750fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> {
751 // Avoid recomposing split matras.
752 if a.general_category().is_mark() {
753 return None;
754 }
755
756 // Composition-exclusion exceptions that we want to recompose.
757 if a == '\u{09AF}' && b == '\u{09BC}' {
758 return Some('\u{09DF}');
759 }
760
761 crate::unicode::compose(a, b)
762}
763
764fn setup_masks(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
765 // We cannot setup masks here. We save information about characters
766 // and setup masks later on in a pause-callback.
767 for info: &mut GlyphInfo in buffer.info_slice_mut() {
768 info.set_indic_properties();
769 }
770}
771
772fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
773 super::indic_machine::find_syllables_indic(buffer);
774
775 let mut start: usize = 0;
776 let mut end: usize = buffer.next_syllable(start:0);
777 while start < buffer.len {
778 buffer.unsafe_to_break(start, end);
779 start = end;
780 end = buffer.next_syllable(start);
781 }
782}
783
784fn initial_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
785 use super::indic_machine::SyllableType;
786
787 let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>();
788
789 update_consonant_positions(plan, indic_plan, face, buffer);
790 syllabic::insert_dotted_circles(
791 face,
792 buffer,
793 broken_syllable_type:SyllableType::BrokenCluster as u8,
794 dottedcircle_category:category::DOTTED_CIRCLE,
795 repha_category:Some(category::REPHA),
796 dottedcircle_position:Some(position::END),
797 );
798
799 let mut start: usize = 0;
800 let mut end: usize = buffer.next_syllable(start:0);
801 while start < buffer.len {
802 initial_reordering_syllable(plan, indic_plan, face, start, end, buffer);
803 start = end;
804 end = buffer.next_syllable(start);
805 }
806}
807
808fn update_consonant_positions(
809 plan: &ShapePlan,
810 indic_plan: &IndicShapePlan,
811 face: &Face,
812 buffer: &mut Buffer,
813) {
814 if indic_plan.config.base_pos != BasePosition::Last {
815 return;
816 }
817
818 let mut virama_glyph: Option = None;
819 if indic_plan.config.virama != 0 {
820 virama_glyph = face.glyph_index(indic_plan.config.virama);
821 }
822
823 if let Some(virama: GlyphId) = virama_glyph {
824 for info: &mut GlyphInfo in buffer.info_slice_mut() {
825 if info.indic_position() == position::BASE_C {
826 let consonant: GlyphId = info.as_glyph();
827 info.set_indic_position(consonant_position_from_face(
828 plan, indic_plan, face, consonant, virama,
829 ));
830 }
831 }
832 }
833}
834
835fn consonant_position_from_face(
836 plan: &ShapePlan,
837 indic_plan: &IndicShapePlan,
838 face: &Face,
839 consonant: GlyphId,
840 virama: GlyphId,
841) -> u8 {
842 // For old-spec, the order of glyphs is Consonant,Virama,
843 // whereas for new-spec, it's Virama,Consonant. However,
844 // some broken fonts (like Free Sans) simply copied lookups
845 // from old-spec to new-spec without modification.
846 // And oddly enough, Uniscribe seems to respect those lookups.
847 // Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
848 // base at 0. The font however, only has lookups matching
849 // 930,94D in 'blwf', not the expected 94D,930 (with new-spec
850 // table). As such, we simply match both sequences. Seems
851 // to work.
852 //
853 // Vatu is done as well, for:
854 // https://github.com/harfbuzz/harfbuzz/issues/1587
855
856 if indic_plan
857 .blwf
858 .would_substitute(&plan.ot_map, face, &[virama, consonant])
859 || indic_plan
860 .blwf
861 .would_substitute(&plan.ot_map, face, &[consonant, virama])
862 || indic_plan
863 .vatu
864 .would_substitute(&plan.ot_map, face, &[virama, consonant])
865 || indic_plan
866 .vatu
867 .would_substitute(&plan.ot_map, face, &[consonant, virama])
868 {
869 return position::BELOW_C;
870 }
871
872 if indic_plan
873 .pstf
874 .would_substitute(&plan.ot_map, face, &[virama, consonant])
875 || indic_plan
876 .pstf
877 .would_substitute(&plan.ot_map, face, &[consonant, virama])
878 {
879 return position::POST_C;
880 }
881
882 if indic_plan
883 .pref
884 .would_substitute(&plan.ot_map, face, &[virama, consonant])
885 || indic_plan
886 .pref
887 .would_substitute(&plan.ot_map, face, &[consonant, virama])
888 {
889 return position::POST_C;
890 }
891
892 position::BASE_C
893}
894
895fn initial_reordering_syllable(
896 plan: &ShapePlan,
897 indic_plan: &IndicShapePlan,
898 face: &Face,
899 start: usize,
900 end: usize,
901 buffer: &mut Buffer,
902) {
903 use super::indic_machine::SyllableType;
904
905 let syllable_type = match buffer.info[start].syllable() & 0x0F {
906 0 => SyllableType::ConsonantSyllable,
907 1 => SyllableType::VowelSyllable,
908 2 => SyllableType::StandaloneCluster,
909 3 => SyllableType::SymbolCluster,
910 4 => SyllableType::BrokenCluster,
911 5 => SyllableType::NonIndicCluster,
912 _ => unreachable!(),
913 };
914
915 match syllable_type {
916 // We made the vowels look like consonants. So let's call the consonant logic!
917 SyllableType::VowelSyllable | SyllableType::ConsonantSyllable => {
918 initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
919 }
920 // We already inserted dotted-circles, so just call the standalone_cluster.
921 SyllableType::BrokenCluster | SyllableType::StandaloneCluster => {
922 initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer);
923 }
924 SyllableType::SymbolCluster | SyllableType::NonIndicCluster => {}
925 }
926}
927
928// Rules from:
929// https://docs.microsqoft.com/en-us/typography/script-development/devanagari */
930fn initial_reordering_consonant_syllable(
931 plan: &ShapePlan,
932 indic_plan: &IndicShapePlan,
933 face: &Face,
934 start: usize,
935 end: usize,
936 buffer: &mut Buffer,
937) {
938 // https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167
939 // For compatibility with legacy usage in Kannada,
940 // Ra+h+ZWJ must behave like Ra+ZWJ+h...
941 if buffer.script == Some(script::KANNADA)
942 && start + 3 <= end
943 && buffer.info[start].is_one_of(category_flag(category::RA))
944 && buffer.info[start + 1].is_one_of(category_flag(category::H))
945 && buffer.info[start + 2].is_one_of(category_flag(category::ZWJ))
946 {
947 buffer.merge_clusters(start + 1, start + 3);
948 buffer.info.swap(start + 1, start + 2);
949 }
950
951 // 1. Find base consonant:
952 //
953 // The shaping engine finds the base consonant of the syllable, using the
954 // following algorithm: starting from the end of the syllable, move backwards
955 // until a consonant is found that does not have a below-base or post-base
956 // form (post-base forms have to follow below-base forms), or that is not a
957 // pre-base-reordering Ra, or arrive at the first consonant. The consonant
958 // stopped at will be the base.
959 //
960 // - If the syllable starts with Ra + Halant (in a script that has Reph)
961 // and has more than one consonant, Ra is excluded from candidates for
962 // base consonants.
963
964 let mut base = end;
965 let mut has_reph = false;
966
967 {
968 // -> If the syllable starts with Ra + Halant (in a script that has Reph)
969 // and has more than one consonant, Ra is excluded from candidates for
970 // base consonants.
971 let mut limit = start;
972 if indic_plan.mask_array[indic_feature::RPHF] != 0
973 && start + 3 <= end
974 && ((indic_plan.config.reph_mode == RephMode::Implicit
975 && !buffer.info[start + 2].is_joiner())
976 || (indic_plan.config.reph_mode == RephMode::Explicit
977 && buffer.info[start + 2].indic_category() == category::ZWJ))
978 {
979 // See if it matches the 'rphf' feature.
980 let glyphs = &[
981 buffer.info[start].as_glyph(),
982 buffer.info[start + 1].as_glyph(),
983 if indic_plan.config.reph_mode == RephMode::Explicit {
984 buffer.info[start + 2].as_glyph()
985 } else {
986 GlyphId(0)
987 },
988 ];
989 if indic_plan
990 .rphf
991 .would_substitute(&plan.ot_map, face, &glyphs[0..2])
992 || (indic_plan.config.reph_mode == RephMode::Explicit
993 && indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs))
994 {
995 limit += 2;
996 while limit < end && buffer.info[limit].is_joiner() {
997 limit += 1;
998 }
999 base = start;
1000 has_reph = true;
1001 }
1002 } else if indic_plan.config.reph_mode == RephMode::LogRepha
1003 && buffer.info[start].indic_category() == category::REPHA
1004 {
1005 limit += 1;
1006 while limit < end && buffer.info[limit].is_joiner() {
1007 limit += 1;
1008 }
1009 base = start;
1010 has_reph = true;
1011 }
1012
1013 match indic_plan.config.base_pos {
1014 BasePosition::Last => {
1015 // -> starting from the end of the syllable, move backwards
1016 let mut i = end;
1017 let mut seen_below = false;
1018 loop {
1019 i -= 1;
1020 // -> until a consonant is found
1021 if buffer.info[i].is_consonant() {
1022 // -> that does not have a below-base or post-base form
1023 // (post-base forms have to follow below-base forms),
1024 if buffer.info[i].indic_position() != position::BELOW_C
1025 && (buffer.info[i].indic_position() != position::POST_C || seen_below)
1026 {
1027 base = i;
1028 break;
1029 }
1030 if buffer.info[i].indic_position() == position::BELOW_C {
1031 seen_below = true;
1032 }
1033
1034 // -> or that is not a pre-base-reordering Ra,
1035 //
1036 // IMPLEMENTATION NOTES:
1037 //
1038 // Our pre-base-reordering Ra's are marked position::PostC, so will be skipped
1039 // by the logic above already.
1040
1041 // -> or arrive at the first consonant. The consonant stopped at will
1042 // be the base.
1043 base = i;
1044 } else {
1045 // A ZWJ after a Halant stops the base search, and requests an explicit
1046 // half form.
1047 // A ZWJ before a Halant, requests a subjoined form instead, and hence
1048 // search continues. This is particularly important for Bengali
1049 // sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya.
1050 if start < i
1051 && buffer.info[i].indic_category() == category::ZWJ
1052 && buffer.info[i - 1].indic_category() == category::H
1053 {
1054 break;
1055 }
1056 }
1057
1058 if i <= limit {
1059 break;
1060 }
1061 }
1062 }
1063 BasePosition::LastSinhala => {
1064 // Sinhala base positioning is slightly different from main Indic, in that:
1065 // 1. Its ZWJ behavior is different,
1066 // 2. We don't need to look into the font for consonant positions.
1067
1068 if !has_reph {
1069 base = limit;
1070 }
1071
1072 // Find the last base consonant that is not blocked by ZWJ. If there is
1073 // a ZWJ right before a base consonant, that would request a subjoined form.
1074 for i in limit..end {
1075 if buffer.info[i].is_consonant() {
1076 if limit < i && buffer.info[i - 1].indic_category() == category::ZWJ {
1077 break;
1078 } else {
1079 base = i;
1080 }
1081 }
1082 }
1083
1084 // Mark all subsequent consonants as below.
1085 for i in base + 1..end {
1086 if buffer.info[i].is_consonant() {
1087 buffer.info[i].set_indic_position(position::BELOW_C);
1088 }
1089 }
1090 }
1091 }
1092
1093 // -> If the syllable starts with Ra + Halant (in a script that has Reph)
1094 // and has more than one consonant, Ra is excluded from candidates for
1095 // base consonants.
1096 //
1097 // Only do this for unforced Reph. (ie. not for Ra,H,ZWJ.
1098 if has_reph && base == start && limit - base <= 2 {
1099 // Have no other consonant, so Reph is not formed and Ra becomes base.
1100 has_reph = false;
1101 }
1102 }
1103
1104 // 2. Decompose and reorder Matras:
1105 //
1106 // Each matra and any syllable modifier sign in the syllable are moved to the
1107 // appropriate position relative to the consonant(s) in the syllable. The
1108 // shaping engine decomposes two- or three-part matras into their constituent
1109 // parts before any repositioning. Matra characters are classified by which
1110 // consonant in a conjunct they have affinity for and are reordered to the
1111 // following positions:
1112 //
1113 // - Before first half form in the syllable
1114 // - After subjoined consonants
1115 // - After post-form consonant
1116 // - After main consonant (for above marks)
1117 //
1118 // IMPLEMENTATION NOTES:
1119 //
1120 // The normalize() routine has already decomposed matras for us, so we don't
1121 // need to worry about that.
1122
1123 // 3. Reorder marks to canonical order:
1124 //
1125 // Adjacent nukta and halant or nukta and vedic sign are always repositioned
1126 // if necessary, so that the nukta is first.
1127 //
1128 // IMPLEMENTATION NOTES:
1129 //
1130 // We don't need to do this: the normalize() routine already did this for us.
1131
1132 // Reorder characters
1133
1134 for i in start..base {
1135 let pos = buffer.info[i].indic_position();
1136 buffer.info[i].set_indic_position(cmp::min(position::PRE_C, pos));
1137 }
1138
1139 if base < end {
1140 buffer.info[base].set_indic_position(position::BASE_C);
1141 }
1142
1143 // Mark final consonants. A final consonant is one appearing after a matra.
1144 // Happens in Sinhala.
1145 for i in base + 1..end {
1146 if buffer.info[i].indic_category() == category::M {
1147 for j in i + 1..end {
1148 if buffer.info[j].is_consonant() {
1149 buffer.info[j].set_indic_position(position::FINAL_C);
1150 break;
1151 }
1152 }
1153
1154 break;
1155 }
1156 }
1157
1158 // Handle beginning Ra
1159 if has_reph {
1160 buffer.info[start].set_indic_position(position::RA_TO_BECOME_REPH);
1161 }
1162
1163 // For old-style Indic script tags, move the first post-base Halant after
1164 // last consonant.
1165 //
1166 // Reports suggest that in some scripts Uniscribe does this only if there
1167 // is *not* a Halant after last consonant already. We know that is the
1168 // case for Kannada, while it reorders unconditionally in other scripts,
1169 // eg. Malayalam, Bengali, and Devanagari. We don't currently know about
1170 // other scripts, so we block Kannada.
1171 //
1172 // Kannada test case:
1173 // U+0C9A,U+0CCD,U+0C9A,U+0CCD
1174 // With some versions of Lohit Kannada.
1175 // https://bugs.freedesktop.org/show_bug.cgi?id=59118
1176 //
1177 // Malayalam test case:
1178 // U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
1179 // With lohit-ttf-20121122/Lohit-Malayalam.ttf
1180 //
1181 // Bengali test case:
1182 // U+0998,U+09CD,U+09AF,U+09CD
1183 // With Windows XP vrinda.ttf
1184 // https://github.com/harfbuzz/harfbuzz/issues/1073
1185 //
1186 // Devanagari test case:
1187 // U+091F,U+094D,U+0930,U+094D
1188 // With chandas.ttf
1189 // https://github.com/harfbuzz/harfbuzz/issues/1071
1190 if indic_plan.is_old_spec {
1191 let disallow_double_halants = buffer.script == Some(script::KANNADA);
1192 for i in base + 1..end {
1193 if buffer.info[i].indic_category() == category::H {
1194 let mut j = end - 1;
1195 while j > i {
1196 if buffer.info[j].is_consonant()
1197 || (disallow_double_halants
1198 && buffer.info[j].indic_category() == category::H)
1199 {
1200 break;
1201 }
1202
1203 j -= 1;
1204 }
1205
1206 if buffer.info[j].indic_category() != category::H && j > i {
1207 // Move Halant to after last consonant.
1208 let t = buffer.info[i];
1209 for k in 0..j - i {
1210 buffer.info[k + i] = buffer.info[k + i + 1];
1211 }
1212 buffer.info[j] = t;
1213 }
1214
1215 break;
1216 }
1217 }
1218 }
1219
1220 // Attach misc marks to previous char to move with them.
1221 {
1222 let mut last_pos = position::START;
1223 for i in start..end {
1224 let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1225 & (category_flag(category::ZWJ)
1226 | category_flag(category::ZWNJ)
1227 | category_flag(category::N)
1228 | category_flag(category::RS)
1229 | category_flag(category::CM)
1230 | category_flag(category::H))
1231 != 0;
1232 if ok {
1233 buffer.info[i].set_indic_position(last_pos);
1234
1235 if buffer.info[i].indic_category() == category::H
1236 && buffer.info[i].indic_position() == position::PRE_M
1237 {
1238 // Uniscribe doesn't move the Halant with Left Matra.
1239 // TEST: U+092B,U+093F,U+094DE
1240 // We follow. This is important for the Sinhala
1241 // U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
1242 // where U+0DD9 is a left matra and U+0DCA is the virama.
1243 // We don't want to move the virama with the left matra.
1244 // TEST: U+0D9A,U+0DDA
1245 for j in (start + 1..=i).rev() {
1246 if buffer.info[j - 1].indic_position() != position::PRE_M {
1247 let pos = buffer.info[j - 1].indic_position();
1248 buffer.info[i].set_indic_position(pos);
1249 break;
1250 }
1251 }
1252 }
1253 } else if buffer.info[i].indic_position() != position::SMVD {
1254 last_pos = buffer.info[i].indic_position();
1255 }
1256 }
1257 }
1258 // For post-base consonants let them own anything before them
1259 // since the last consonant or matra.
1260 {
1261 let mut last = base;
1262 for i in base + 1..end {
1263 if buffer.info[i].is_consonant() {
1264 for j in last + 1..i {
1265 if (buffer.info[j].indic_position() as u8) < (position::SMVD as u8) {
1266 let pos = buffer.info[i].indic_position();
1267 buffer.info[j].set_indic_position(pos);
1268 }
1269 }
1270
1271 last = i;
1272 } else if buffer.info[i].indic_category() == category::M {
1273 last = i;
1274 }
1275 }
1276 }
1277
1278 {
1279 // Use syllable() for sort accounting temporarily.
1280 let syllable = buffer.info[start].syllable();
1281 for i in start..end {
1282 buffer.info[i].set_syllable(u8::try_from(i - start).unwrap());
1283 }
1284
1285 buffer.info[start..end].sort_by(|a, b| a.indic_position().cmp(&b.indic_position()));
1286
1287 // Find base again.
1288 base = end;
1289 for i in start..end {
1290 if buffer.info[i].indic_position() == position::BASE_C {
1291 base = i;
1292 break;
1293 }
1294 }
1295 // Things are out-of-control for post base positions, they may shuffle
1296 // around like crazy. In old-spec mode, we move halants around, so in
1297 // that case merge all clusters after base. Otherwise, check the sort
1298 // order and merge as needed.
1299 // For pre-base stuff, we handle cluster issues in final reordering.
1300 //
1301 // We could use buffer->sort() for this, if there was no special
1302 // reordering of pre-base stuff happening later...
1303 // We don't want to merge_clusters all of that, which buffer->sort()
1304 // would. Here's a concrete example:
1305 //
1306 // Assume there's a pre-base consonant and explicit Halant before base,
1307 // followed by a prebase-reordering (left) Matra:
1308 //
1309 // C,H,ZWNJ,B,M
1310 //
1311 // At this point in reordering we would have:
1312 //
1313 // M,C,H,ZWNJ,B
1314 //
1315 // whereas in final reordering we will bring the Matra closer to Base:
1316 //
1317 // C,H,ZWNJ,M,B
1318 //
1319 // That's why we don't want to merge-clusters anything before the Base
1320 // at this point. But if something moved from after Base to before it,
1321 // we should merge clusters from base to them. In final-reordering, we
1322 // only move things around before base, and merge-clusters up to base.
1323 // These two merge-clusters from the two sides of base will interlock
1324 // to merge things correctly. See:
1325 // https://github.com/harfbuzz/harfbuzz/issues/2272
1326 if indic_plan.is_old_spec || end - start > 127 {
1327 buffer.merge_clusters(base, end);
1328 } else {
1329 // Note! syllable() is a one-byte field.
1330 for i in base..end {
1331 if buffer.info[i].syllable() != 255 {
1332 let mut min = i;
1333 let mut max = i;
1334 let mut j = start + buffer.info[i].syllable() as usize;
1335 while j != i {
1336 min = cmp::min(min, j);
1337 max = cmp::max(max, j);
1338 let next = start + buffer.info[j].syllable() as usize;
1339 buffer.info[j].set_syllable(255); // So we don't process j later again.
1340 j = next;
1341 }
1342
1343 buffer.merge_clusters(cmp::max(base, min), max + 1);
1344 }
1345 }
1346 }
1347
1348 // Put syllable back in.
1349 for info in &mut buffer.info[start..end] {
1350 info.set_syllable(syllable);
1351 }
1352 }
1353
1354 // Setup masks now
1355
1356 {
1357 // Reph
1358 for info in &mut buffer.info[start..end] {
1359 if info.indic_position() != position::RA_TO_BECOME_REPH {
1360 break;
1361 }
1362
1363 info.mask |= indic_plan.mask_array[indic_feature::RPHF];
1364 }
1365
1366 // Pre-base
1367 let mut mask = indic_plan.mask_array[indic_feature::HALF];
1368 if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost {
1369 mask |= indic_plan.mask_array[indic_feature::BLWF];
1370 }
1371
1372 for info in &mut buffer.info[start..base] {
1373 info.mask |= mask;
1374 }
1375
1376 // Base
1377 mask = 0;
1378 if base < end {
1379 buffer.info[base].mask |= mask;
1380 }
1381
1382 // Post-base
1383 mask = indic_plan.mask_array[indic_feature::BLWF]
1384 | indic_plan.mask_array[indic_feature::ABVF]
1385 | indic_plan.mask_array[indic_feature::PSTF];
1386 for i in base + 1..end {
1387 buffer.info[i].mask |= mask;
1388 }
1389 }
1390
1391 if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) {
1392 // Old-spec eye-lash Ra needs special handling. From the
1393 // spec:
1394 //
1395 // "The feature 'below-base form' is applied to consonants
1396 // having below-base forms and following the base consonant.
1397 // The exception is vattu, which may appear below half forms
1398 // as well as below the base glyph. The feature 'below-base
1399 // form' will be applied to all such occurrences of Ra as well."
1400 //
1401 // Test case: U+0924,U+094D,U+0930,U+094d,U+0915
1402 // with Sanskrit 2003 font.
1403 //
1404 // However, note that Ra,Halant,ZWJ is the correct way to
1405 // request eyelash form of Ra, so we wouldbn't inhibit it
1406 // in that sequence.
1407 //
1408 // Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
1409 for i in start..base.saturating_sub(1) {
1410 if buffer.info[i].indic_category() == category::RA
1411 && buffer.info[i + 1].indic_category() == category::H
1412 && (i + 2 == base || buffer.info[i + 2].indic_category() != category::ZWJ)
1413 {
1414 buffer.info[i].mask |= indic_plan.mask_array[indic_feature::BLWF];
1415 buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::BLWF];
1416 }
1417 }
1418 }
1419
1420 let pref_len = 2;
1421 if indic_plan.mask_array[indic_feature::PREF] != 0 && base + pref_len < end {
1422 // Find a Halant,Ra sequence and mark it for pre-base-reordering processing.
1423 for i in base + 1..end - pref_len + 1 {
1424 let glyphs = &[buffer.info[i + 0].as_glyph(), buffer.info[i + 1].as_glyph()];
1425 if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) {
1426 buffer.info[i + 0].mask = indic_plan.mask_array[indic_feature::PREF];
1427 buffer.info[i + 1].mask = indic_plan.mask_array[indic_feature::PREF];
1428 break;
1429 }
1430 }
1431 }
1432
1433 // Apply ZWJ/ZWNJ effects
1434 for i in start + 1..end {
1435 if buffer.info[i].is_joiner() {
1436 let non_joiner = buffer.info[i].indic_category() == category::ZWNJ;
1437 let mut j = i;
1438
1439 loop {
1440 j -= 1;
1441
1442 // ZWJ/ZWNJ should disable CJCT. They do that by simply
1443 // being there, since we don't skip them for the CJCT
1444 // feature (ie. F_MANUAL_ZWJ)
1445
1446 // A ZWNJ disables HALF.
1447 if non_joiner {
1448 buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF];
1449 }
1450
1451 if j <= start || buffer.info[j].is_consonant() {
1452 break;
1453 }
1454 }
1455 }
1456 }
1457}
1458
1459fn initial_reordering_standalone_cluster(
1460 plan: &ShapePlan,
1461 indic_plan: &IndicShapePlan,
1462 face: &Face,
1463 start: usize,
1464 end: usize,
1465 buffer: &mut Buffer,
1466) {
1467 // We treat placeholder/dotted-circle as if they are consonants, so we
1468 // should just chain. Only if not in compatibility mode that is...
1469 initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
1470}
1471
1472fn final_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
1473 if buffer.is_empty() {
1474 return;
1475 }
1476
1477 let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>();
1478
1479 let mut virama_glyph: Option = None;
1480 if indic_plan.config.virama != 0 {
1481 if let Some(g: GlyphId) = face.glyph_index(indic_plan.config.virama) {
1482 virama_glyph = Some(g.0 as u32);
1483 }
1484 }
1485
1486 let mut start: usize = 0;
1487 let mut end: usize = buffer.next_syllable(start:0);
1488 while start < buffer.len {
1489 final_reordering_impl(indic_plan, virama_glyph, start, end, buffer);
1490 start = end;
1491 end = buffer.next_syllable(start);
1492 }
1493}
1494
1495fn final_reordering_impl(
1496 plan: &IndicShapePlan,
1497 virama_glyph: Option<u32>,
1498 start: usize,
1499 end: usize,
1500 buffer: &mut Buffer,
1501) {
1502 // This function relies heavily on halant glyphs. Lots of ligation
1503 // and possibly multiple substitutions happened prior to this
1504 // phase, and that might have messed up our properties. Recover
1505 // from a particular case of that where we're fairly sure that a
1506 // class of OT_H is desired but has been lost.
1507 //
1508 // We don't call load_virama_glyph(), since we know it's already loaded.
1509 if let Some(virama_glyph) = virama_glyph {
1510 for info in &mut buffer.info[start..end] {
1511 if info.glyph_id == virama_glyph && info.is_ligated() && info.is_multiplied() {
1512 // This will make sure that this glyph passes is_halant() test.
1513 info.set_indic_category(category::H);
1514 info.clear_ligated_and_multiplied();
1515 }
1516 }
1517 }
1518
1519 // 4. Final reordering:
1520 //
1521 // After the localized forms and basic shaping forms GSUB features have been
1522 // applied (see below), the shaping engine performs some final glyph
1523 // reordering before applying all the remaining font features to the entire
1524 // syllable.
1525
1526 let mut try_pref = plan.mask_array[indic_feature::PREF] != 0;
1527
1528 let mut base = start;
1529 while base < end {
1530 if buffer.info[base].indic_position() as u32 >= position::BASE_C as u32 {
1531 if try_pref && base + 1 < end {
1532 for i in base + 1..end {
1533 if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 {
1534 if !(buffer.info[i].is_substituted()
1535 && buffer.info[i].is_ligated_and_didnt_multiply())
1536 {
1537 // Ok, this was a 'pref' candidate but didn't form any.
1538 // Base is around here...
1539 base = i;
1540 while base < end && buffer.info[base].is_halant() {
1541 base += 1;
1542 }
1543
1544 buffer.info[base].set_indic_position(position::BASE_C);
1545 try_pref = false;
1546 }
1547
1548 break;
1549 }
1550 }
1551 }
1552
1553 // For Malayalam, skip over unformed below- (but NOT post-) forms.
1554 if buffer.script == Some(script::MALAYALAM) {
1555 let mut i = base + 1;
1556 while i < end {
1557 while i < end && buffer.info[i].is_joiner() {
1558 i += 1;
1559 }
1560
1561 if i == end || !buffer.info[i].is_halant() {
1562 break;
1563 }
1564
1565 i += 1; // Skip halant.
1566
1567 while i < end && buffer.info[i].is_joiner() {
1568 i += 1;
1569 }
1570
1571 if i < end
1572 && buffer.info[i].is_consonant()
1573 && buffer.info[i].indic_position() == position::BELOW_C
1574 {
1575 base = i;
1576 buffer.info[base].set_indic_position(position::BASE_C);
1577 }
1578
1579 i += 1;
1580 }
1581 }
1582
1583 if start < base && buffer.info[base].indic_position() as u32 > position::BASE_C as u32 {
1584 base -= 1;
1585 }
1586
1587 break;
1588 }
1589
1590 base += 1;
1591 }
1592
1593 if base == end && start < base && buffer.info[base - 1].is_one_of(rb_flag(category::ZWJ as u32))
1594 {
1595 base -= 1;
1596 }
1597
1598 if base < end {
1599 while start < base
1600 && buffer.info[base]
1601 .is_one_of(rb_flag(category::N as u32) | rb_flag(category::H as u32))
1602 {
1603 base -= 1;
1604 }
1605 }
1606
1607 // - Reorder matras:
1608 //
1609 // If a pre-base matra character had been reordered before applying basic
1610 // features, the glyph can be moved closer to the main consonant based on
1611 // whether half-forms had been formed. Actual position for the matra is
1612 // defined as “after last standalone halant glyph, after initial matra
1613 // position and before the main consonant”. If ZWJ or ZWNJ follow this
1614 // halant, position is moved after it.
1615 //
1616 // IMPLEMENTATION NOTES:
1617 //
1618 // It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe
1619 // and Devanagari shows that the behavior is best described as:
1620 //
1621 // "If ZWJ follows this halant, matra is NOT repositioned after this halant.
1622 // If ZWNJ follows this halant, position is moved after it."
1623 //
1624 // Test case, with Adobe Devanagari or Nirmala UI:
1625 //
1626 // U+091F,U+094D,U+200C,U+092F,U+093F
1627 // (Matra moves to the middle, after ZWNJ.)
1628 //
1629 // U+091F,U+094D,U+200D,U+092F,U+093F
1630 // (Matra does NOT move, stays to the left.)
1631 //
1632 // https://github.com/harfbuzz/harfbuzz/issues/1070
1633
1634 // Otherwise there can't be any pre-base matra characters.
1635 if start + 1 < end && start < base {
1636 // If we lost track of base, alas, position before last thingy.
1637 let mut new_pos = if base == end { base - 2 } else { base - 1 };
1638
1639 // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1640 // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1641 // We want to position matra after them.
1642 if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) {
1643 loop {
1644 while new_pos > start
1645 && !buffer.info[new_pos]
1646 .is_one_of(rb_flag(category::M as u32) | rb_flag(category::H as u32))
1647 {
1648 new_pos -= 1;
1649 }
1650
1651 // If we found no Halant we are done.
1652 // Otherwise only proceed if the Halant does
1653 // not belong to the Matra itself!
1654 if buffer.info[new_pos].is_halant()
1655 && buffer.info[new_pos].indic_position() != position::PRE_M
1656 {
1657 if new_pos + 1 < end {
1658 // -> If ZWJ follows this halant, matra is NOT repositioned after this halant.
1659 if buffer.info[new_pos + 1].indic_category() == category::ZWJ {
1660 // Keep searching.
1661 if new_pos > start {
1662 new_pos -= 1;
1663 continue;
1664 }
1665 }
1666
1667 // -> If ZWNJ follows this halant, position is moved after it.
1668 //
1669 // IMPLEMENTATION NOTES:
1670 //
1671 // This is taken care of by the state-machine. A Halant,ZWNJ is a terminating
1672 // sequence for a consonant syllable; any pre-base matras occurring after it
1673 // will belong to the subsequent syllable.
1674 }
1675 } else {
1676 new_pos = start; // No move.
1677 }
1678
1679 break;
1680 }
1681 }
1682
1683 if start < new_pos && buffer.info[new_pos].indic_position() != position::PRE_M {
1684 // Now go see if there's actually any matras...
1685 for i in (start + 1..=new_pos).rev() {
1686 if buffer.info[i - 1].indic_position() == position::PRE_M {
1687 let old_pos = i - 1;
1688 // Shouldn't actually happen.
1689 if old_pos < base && base <= new_pos {
1690 base -= 1;
1691 }
1692
1693 let tmp = buffer.info[old_pos];
1694 for i in 0..new_pos - old_pos {
1695 buffer.info[i + old_pos] = buffer.info[i + old_pos + 1];
1696 }
1697 buffer.info[new_pos] = tmp;
1698
1699 // Note: this merge_clusters() is intentionally *after* the reordering.
1700 // Indic matra reordering is special and tricky...
1701 buffer.merge_clusters(new_pos, cmp::min(end, base + 1));
1702
1703 new_pos -= 1;
1704 }
1705 }
1706 } else {
1707 for i in start..base {
1708 if buffer.info[i].indic_position() == position::PRE_M {
1709 buffer.merge_clusters(i, cmp::min(end, base + 1));
1710 break;
1711 }
1712 }
1713 }
1714 }
1715
1716 // - Reorder reph:
1717 //
1718 // Reph’s original position is always at the beginning of the syllable,
1719 // (i.e. it is not reordered at the character reordering stage). However,
1720 // it will be reordered according to the basic-forms shaping results.
1721 // Possible positions for reph, depending on the script, are; after main,
1722 // before post-base consonant forms, and after post-base consonant forms.
1723
1724 // Two cases:
1725 //
1726 // - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
1727 // we should only move it if the sequence ligated to the repha form.
1728 //
1729 // - If repha is encoded separately and in the logical position, we should only
1730 // move it if it did NOT ligate. If it ligated, it's probably the font trying
1731 // to make it work without the reordering.
1732
1733 if start + 1 < end
1734 && buffer.info[start].indic_position() == position::RA_TO_BECOME_REPH
1735 && (buffer.info[start].indic_category() == category::REPHA)
1736 ^ buffer.info[start].is_ligated_and_didnt_multiply()
1737 {
1738 let mut new_reph_pos;
1739 loop {
1740 let reph_pos = plan.config.reph_pos;
1741
1742 // 1. If reph should be positioned after post-base consonant forms,
1743 // proceed to step 5.
1744 if reph_pos != RephPosition::AfterPost {
1745 // 2. If the reph repositioning class is not after post-base: target
1746 // position is after the first explicit halant glyph between the
1747 // first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1748 // are following this halant, position is moved after it. If such
1749 // position is found, this is the target position. Otherwise,
1750 // proceed to the next step.
1751 //
1752 // Note: in old-implementation fonts, where classifications were
1753 // fixed in shaping engine, there was no case where reph position
1754 // will be found on this step.
1755 {
1756 new_reph_pos = start + 1;
1757 while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1758 new_reph_pos += 1;
1759 }
1760
1761 if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1762 // ->If ZWJ or ZWNJ are following this halant, position is moved after it.
1763 if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1764 new_reph_pos += 1;
1765 }
1766
1767 break;
1768 }
1769 }
1770
1771 // 3. If reph should be repositioned after the main consonant: find the
1772 // first consonant not ligated with main, or find the first
1773 // consonant that is not a potential pre-base-reordering Ra.
1774 if reph_pos == RephPosition::AfterMain {
1775 new_reph_pos = base;
1776 while new_reph_pos + 1 < end
1777 && buffer.info[new_reph_pos + 1].indic_position() as u8
1778 <= position::AFTER_MAIN as u8
1779 {
1780 new_reph_pos += 1;
1781 }
1782
1783 if new_reph_pos < end {
1784 break;
1785 }
1786 }
1787
1788 // 4. If reph should be positioned before post-base consonant, find
1789 // first post-base classified consonant not ligated with main. If no
1790 // consonant is found, the target position should be before the
1791 // first matra, syllable modifier sign or vedic sign.
1792 //
1793 // This is our take on what step 4 is trying to say (and failing, BADLY).
1794 if reph_pos == RephPosition::AfterSub {
1795 new_reph_pos = base;
1796 while new_reph_pos + 1 < end
1797 && (rb_flag_unsafe(buffer.info[new_reph_pos + 1].indic_position() as u32)
1798 & (rb_flag(position::POST_C as u32)
1799 | rb_flag(position::AFTER_POST as u32)
1800 | rb_flag(position::SMVD as u32)))
1801 == 0
1802 {
1803 new_reph_pos += 1;
1804 }
1805
1806 if new_reph_pos < end {
1807 break;
1808 }
1809 }
1810 }
1811
1812 // 5. If no consonant is found in steps 3 or 4, move reph to a position
1813 // immediately before the first post-base matra, syllable modifier
1814 // sign or vedic sign that has a reordering class after the intended
1815 // reph position. For example, if the reordering position for reph
1816 // is post-main, it will skip above-base matras that also have a
1817 // post-main position.
1818 //
1819 // Copied from step 2.
1820 new_reph_pos = start + 1;
1821 while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1822 new_reph_pos += 1;
1823 }
1824
1825 if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1826 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1827 if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1828 new_reph_pos += 1;
1829 }
1830
1831 break;
1832 }
1833 // See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654
1834
1835 // 6. Otherwise, reorder reph to the end of the syllable.
1836 {
1837 new_reph_pos = end - 1;
1838 while new_reph_pos > start
1839 && buffer.info[new_reph_pos].indic_position() == position::SMVD
1840 {
1841 new_reph_pos -= 1;
1842 }
1843
1844 // If the Reph is to be ending up after a Matra,Halant sequence,
1845 // position it before that Halant so it can interact with the Matra.
1846 // However, if it's a plain Consonant,Halant we shouldn't do that.
1847 // Uniscribe doesn't do this.
1848 // TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1849 if buffer.info[new_reph_pos].is_halant() {
1850 for info in &buffer.info[base + 1..new_reph_pos] {
1851 if info.indic_category() == category::M {
1852 // Ok, got it.
1853 new_reph_pos -= 1;
1854 }
1855 }
1856 }
1857 }
1858
1859 break;
1860 }
1861
1862 // Move
1863 buffer.merge_clusters(start, new_reph_pos + 1);
1864
1865 let reph = buffer.info[start];
1866 for i in 0..new_reph_pos - start {
1867 buffer.info[i + start] = buffer.info[i + start + 1];
1868 }
1869 buffer.info[new_reph_pos] = reph;
1870
1871 if start < base && base <= new_reph_pos {
1872 base -= 1;
1873 }
1874 }
1875
1876 // - Reorder pre-base-reordering consonants:
1877 //
1878 // If a pre-base-reordering consonant is found, reorder it according to
1879 // the following rules:
1880
1881 // Otherwise there can't be any pre-base-reordering Ra.
1882 if try_pref && base + 1 < end {
1883 for i in base + 1..end {
1884 if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 {
1885 // 1. Only reorder a glyph produced by substitution during application
1886 // of the <pref> feature. (Note that a font may shape a Ra consonant with
1887 // the feature generally but block it in certain contexts.)
1888 //
1889 // Note: We just check that something got substituted. We don't check that
1890 // the <pref> feature actually did it...
1891 //
1892 // Reorder pref only if it ligated.
1893 if buffer.info[i].is_ligated_and_didnt_multiply() {
1894 // 2. Try to find a target position the same way as for pre-base matra.
1895 // If it is found, reorder pre-base consonant glyph.
1896 //
1897 // 3. If position is not found, reorder immediately before main consonant.
1898
1899 let mut new_pos = base;
1900 // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1901 // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1902 // We want to position matra after them.
1903 if buffer.script != Some(script::MALAYALAM)
1904 && buffer.script != Some(script::TAMIL)
1905 {
1906 while new_pos > start
1907 && !buffer.info[new_pos - 1].is_one_of(
1908 rb_flag(category::M as u32) | rb_flag(category::H as u32),
1909 )
1910 {
1911 new_pos -= 1;
1912 }
1913 }
1914
1915 if new_pos > start && buffer.info[new_pos - 1].is_halant() {
1916 // -> If ZWJ or ZWNJ follow this halant, position is moved after it.
1917 if new_pos < end && buffer.info[new_pos].is_joiner() {
1918 new_pos += 1;
1919 }
1920 }
1921
1922 {
1923 let old_pos = i;
1924
1925 buffer.merge_clusters(new_pos, old_pos + 1);
1926 let tmp = buffer.info[old_pos];
1927 for i in (0..=old_pos - new_pos).rev() {
1928 buffer.info[i + new_pos + 1] = buffer.info[i + new_pos];
1929 }
1930 buffer.info[new_pos] = tmp;
1931
1932 if new_pos <= base && base < old_pos {
1933 // TODO: investigate
1934 #[allow(unused_assignments)]
1935 {
1936 base += 1;
1937 }
1938 }
1939 }
1940 }
1941
1942 break;
1943 }
1944 }
1945 }
1946
1947 // Apply 'init' to the Left Matra if it's a word start.
1948 if buffer.info[start].indic_position() == position::PRE_M {
1949 if start == 0
1950 || (rb_flag_unsafe(buffer.info[start - 1].general_category().to_rb())
1951 & rb_flag_range(
1952 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
1953 hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
1954 ))
1955 == 0
1956 {
1957 buffer.info[start].mask |= plan.mask_array[indic_feature::INIT];
1958 } else {
1959 buffer.unsafe_to_break(start - 1, start + 1);
1960 }
1961 }
1962}
1963
1964pub fn get_category_and_position(u: u32) -> (Category, Position) {
1965 let (c1, c2) = super::indic_table::get_categories(u);
1966 let c2 = if c1 == SyllabicCategory::ConsonantMedial
1967 || c1 == SyllabicCategory::GeminationMark
1968 || c1 == SyllabicCategory::RegisterShifter
1969 || c1 == SyllabicCategory::ConsonantSucceedingRepha
1970 || c1 == SyllabicCategory::Virama
1971 || c1 == SyllabicCategory::VowelDependent
1972 || false
1973 {
1974 c2
1975 } else {
1976 MatraCategory::NotApplicable
1977 };
1978
1979 let c1 = match c1 {
1980 SyllabicCategory::Other => category::X,
1981 SyllabicCategory::Avagraha => category::SYMBOL,
1982 SyllabicCategory::Bindu => category::SM,
1983 SyllabicCategory::BrahmiJoiningNumber => category::PLACEHOLDER, // Don't care.
1984 SyllabicCategory::CantillationMark => category::A,
1985 SyllabicCategory::Consonant => category::C,
1986 SyllabicCategory::ConsonantDead => category::C,
1987 SyllabicCategory::ConsonantFinal => category::CM,
1988 SyllabicCategory::ConsonantHeadLetter => category::C,
1989 SyllabicCategory::ConsonantInitialPostfixed => category::PLACEHOLDER,
1990 SyllabicCategory::ConsonantKiller => category::M, // U+17CD only.
1991 SyllabicCategory::ConsonantMedial => category::CM,
1992 SyllabicCategory::ConsonantPlaceholder => category::PLACEHOLDER,
1993 SyllabicCategory::ConsonantPrecedingRepha => category::REPHA,
1994 SyllabicCategory::ConsonantPrefixed => category::X,
1995 SyllabicCategory::ConsonantSubjoined => category::CM,
1996 SyllabicCategory::ConsonantSucceedingRepha => category::CM,
1997 SyllabicCategory::ConsonantWithStacker => category::CS,
1998 SyllabicCategory::GeminationMark => category::SM, // https://github.com/harfbuzz/harfbuzz/issues/552
1999 SyllabicCategory::InvisibleStacker => category::COENG,
2000 SyllabicCategory::Joiner => category::ZWJ,
2001 SyllabicCategory::ModifyingLetter => category::X,
2002 SyllabicCategory::NonJoiner => category::ZWNJ,
2003 SyllabicCategory::Nukta => category::N,
2004 SyllabicCategory::Number => category::PLACEHOLDER,
2005 SyllabicCategory::NumberJoiner => category::PLACEHOLDER, // Don't care.
2006 SyllabicCategory::PureKiller => category::M,
2007 SyllabicCategory::RegisterShifter => category::RS,
2008 SyllabicCategory::SyllableModifier => category::SM,
2009 SyllabicCategory::ToneLetter => category::X,
2010 SyllabicCategory::ToneMark => category::N,
2011 SyllabicCategory::Virama => category::H,
2012 SyllabicCategory::Visarga => category::SM,
2013 SyllabicCategory::Vowel => category::V,
2014 SyllabicCategory::VowelDependent => category::M,
2015 SyllabicCategory::VowelIndependent => category::V,
2016 };
2017
2018 let c2 = match c2 {
2019 MatraCategory::NotApplicable => position::END,
2020 MatraCategory::Left => position::PRE_C,
2021 MatraCategory::Top => position::ABOVE_C,
2022 MatraCategory::Bottom => position::BELOW_C,
2023 MatraCategory::Right => position::POST_C,
2024 MatraCategory::BottomAndLeft => position::POST_C,
2025 MatraCategory::BottomAndRight => position::POST_C,
2026 MatraCategory::LeftAndRight => position::POST_C,
2027 MatraCategory::TopAndBottom => position::BELOW_C,
2028 MatraCategory::TopAndBottomAndRight => position::POST_C,
2029 MatraCategory::TopAndLeft => position::ABOVE_C,
2030 MatraCategory::TopAndLeftAndRight => position::POST_C,
2031 MatraCategory::TopAndRight => position::POST_C,
2032 MatraCategory::Overstruck => position::AFTER_MAIN,
2033 MatraCategory::VisualOrderLeft => position::PRE_M,
2034 };
2035
2036 (c1, c2)
2037}
2038
2039#[rustfmt::skip]
2040fn matra_position_indic(u: u32, side: u8) -> u8 {
2041 #[inline] fn in_half_block(u: u32, base: u32) -> bool { u & !0x7F == base }
2042 #[inline] fn is_deva(u: u32) -> bool { in_half_block(u, 0x0900) }
2043 #[inline] fn is_beng(u: u32) -> bool { in_half_block(u, 0x0980) }
2044 #[inline] fn is_guru(u: u32) -> bool { in_half_block(u, 0x0A00) }
2045 #[inline] fn is_gujr(u: u32) -> bool { in_half_block(u, 0x0A80) }
2046 #[inline] fn is_orya(u: u32) -> bool { in_half_block(u, 0x0B00) }
2047 #[inline] fn is_taml(u: u32) -> bool { in_half_block(u, 0x0B80) }
2048 #[inline] fn is_telu(u: u32) -> bool { in_half_block(u, 0x0C00) }
2049 #[inline] fn is_knda(u: u32) -> bool { in_half_block(u, 0x0C80) }
2050 #[inline] fn is_mlym(u: u32) -> bool { in_half_block(u, 0x0D00) }
2051 #[inline] fn is_sinh(u: u32) -> bool { in_half_block(u, 0x0D80) }
2052
2053 #[inline]
2054 fn matra_pos_right(u: u32) -> Position {
2055 if is_deva(u) {
2056 position::AFTER_SUB
2057 } else if is_beng(u) {
2058 position::AFTER_POST
2059 } else if is_guru(u) {
2060 position::AFTER_POST
2061 } else if is_gujr(u) {
2062 position::AFTER_POST
2063 } else if is_orya(u) {
2064 position::AFTER_POST
2065 } else if is_taml(u) {
2066 position::AFTER_POST
2067 } else if is_telu(u) {
2068 if u <= 0x0C42 {
2069 position::BEFORE_SUB
2070 } else {
2071 position::AFTER_SUB
2072 }
2073 } else if is_knda(u) {
2074 if u < 0x0CC3 || u > 0xCD6 {
2075 position::BEFORE_SUB
2076 } else {
2077 position::AFTER_SUB
2078 }
2079 } else if is_mlym(u) {
2080 position::AFTER_POST
2081 } else if is_sinh(u) {
2082 position::AFTER_SUB
2083 } else {
2084 position::AFTER_SUB
2085 }
2086 }
2087
2088 // BENG and MLYM don't have top matras.
2089 #[inline]
2090 fn matra_pos_top(u: u32) -> Position {
2091 if is_deva(u) {
2092 position::AFTER_SUB
2093 } else if is_guru(u) {
2094 // Deviate from spec
2095 position::AFTER_POST
2096 } else if is_gujr(u) {
2097 position::AFTER_SUB
2098 } else if is_orya(u) {
2099 position::AFTER_MAIN
2100 } else if is_taml(u) {
2101 position::AFTER_SUB
2102 } else if is_telu(u) {
2103 position::BEFORE_SUB
2104 } else if is_knda(u) {
2105 position::BEFORE_SUB
2106 } else if is_sinh(u) {
2107 position::AFTER_SUB
2108 } else {
2109 position::AFTER_SUB
2110 }
2111 }
2112
2113 #[inline]
2114 fn matra_pos_bottom(u: u32) -> Position {
2115 if is_deva(u) {
2116 position::AFTER_SUB
2117 } else if is_beng(u) {
2118 position::AFTER_SUB
2119 } else if is_guru(u) {
2120 position::AFTER_POST
2121 } else if is_gujr(u) {
2122 position::AFTER_POST
2123 } else if is_orya(u) {
2124 position::AFTER_SUB
2125 } else if is_taml(u) {
2126 position::AFTER_POST
2127 } else if is_telu(u) {
2128 position::BEFORE_SUB
2129 } else if is_knda(u) {
2130 position::BEFORE_SUB
2131 } else if is_mlym(u) {
2132 position::AFTER_POST
2133 } else if is_sinh(u) {
2134 position::AFTER_SUB
2135 } else {
2136 position::AFTER_SUB
2137 }
2138 }
2139
2140 match side {
2141 position::PRE_C => position::PRE_M,
2142 position::POST_C => matra_pos_right(u),
2143 position::ABOVE_C => matra_pos_top(u),
2144 position::BELOW_C => matra_pos_bottom(u),
2145 _ => side,
2146 }
2147}
2148