1use alloc::boxed::Box;
2use core::cmp;
3use core::convert::TryFrom;
4use core::ops::Range;
5
6use ttf_parser::GlyphId;
7
8use super::algs::*;
9use super::buffer::hb_buffer_t;
10use super::ot_layout::*;
11use super::ot_layout_gsubgpos::{WouldApply, WouldApplyContext};
12use super::ot_map::*;
13use super::ot_shape::*;
14use super::ot_shape_normalize::*;
15use super::ot_shape_plan::hb_ot_shape_plan_t;
16use super::ot_shaper::*;
17use super::unicode::{hb_gc, CharExt, GeneralCategoryExt};
18use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script};
19
20pub const INDIC_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t {
21 collect_features: Some(collect_features),
22 override_features: Some(override_features),
23 create_data: Some(|plan: &hb_ot_shape_plan_t| Box::new(IndicShapePlan::new(plan))),
24 preprocess_text: Some(preprocess_text),
25 postprocess_glyphs: None,
26 normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
27 decompose: Some(decompose),
28 compose: Some(compose),
29 setup_masks: Some(setup_masks),
30 gpos_tag: None,
31 reorder_marks: None,
32 zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
33 fallback_position: false,
34};
35
36pub type Category = u8;
37
38// This mod doesn't exist in harfbuzz anymore. Instead, the corresponding values are auto-generated
39// by the various machines and stored in `hb-ot-shaper-indic-table`. This means that when updating the
40// values in the machines, we also need to update them here.
41#[allow(dead_code)]
42pub mod ot_category_t {
43 pub const OT_X: u8 = 0;
44 pub const OT_C: u8 = 1;
45 pub const OT_V: u8 = 2;
46 pub const OT_N: u8 = 3;
47 pub const OT_H: u8 = 4;
48 pub const OT_ZWNJ: u8 = 5;
49 pub const OT_ZWJ: u8 = 6;
50 pub const OT_M: u8 = 7;
51 pub const OT_SM: u8 = 8;
52 pub const OT_A: u8 = 9;
53 pub const OT_VD: u8 = OT_A;
54 pub const OT_PLACEHOLDER: u8 = 10;
55 pub const OT_GB: u8 = OT_PLACEHOLDER;
56 pub const OT_DOTTEDCIRCLE: u8 = 11;
57 pub const OT_RS: u8 = 12; // Register Shifter, used in Khmer OT spec.
58 pub const OT_MPst: u8 = 13;
59 pub const OT_Repha: u8 = 14; // Atomically-encoded logical or visual repha.
60 pub const OT_Ra: u8 = 15;
61 pub const OT_CM: u8 = 16; // Consonant-Medial.
62 pub const OT_Symbol: u8 = 17; // Avagraha, etc that take marks (SM,A,VD).
63 pub const OT_CS: u8 = 18;
64
65 /* Khmer & Myanmar shapers. */
66 pub const OT_VAbv: u8 = 20;
67 pub const OT_VBlw: u8 = 21;
68 pub const OT_VPre: u8 = 22;
69 pub const OT_VPst: u8 = 23;
70
71 /* Khmer. */
72 pub const OT_Robatic: u8 = 25;
73 pub const OT_Xgroup: u8 = 26;
74 pub const OT_Ygroup: u8 = 27;
75
76 /* Myanmar */
77 pub const OT_As: u8 = 32; // Asat
78 pub const OT_MH: u8 = 35; // Medial
79 pub const OT_MR: u8 = 36; // Medial
80 pub const OT_MW: u8 = 37; // Medial
81 pub const OT_MY: u8 = 38; // Medial
82 pub const OT_PT: u8 = 39; // Pwo and other tones
83 pub const OT_VS: u8 = 40; // Variation selectors
84 pub const OT_ML: u8 = 41; // Consonant medials
85
86 // This one doesn't exist in ot_category_t in harfbuzz, only in
87 // the Myanmar machine. However, in Rust we unfortunately can't export
88 // inside the Ragel file, so we have to define it here as well. Needs to
89 // be kept in sync with the value in the machine.
90 pub const IV: u8 = 2;
91}
92
93pub type Position = u8;
94pub mod ot_position_t {
95 pub const POS_START: u8 = 0;
96
97 pub const POS_RA_TO_BECOME_REPH: u8 = 1;
98 pub const POS_PRE_M: u8 = 2;
99 pub const POS_PRE_C: u8 = 3;
100
101 pub const POS_BASE_C: u8 = 4;
102 pub const POS_AFTER_MAIN: u8 = 5;
103
104 pub const POS_ABOVE_C: u8 = 6;
105
106 pub const POS_BEFORE_SUB: u8 = 7;
107 pub const POS_BELOW_C: u8 = 8;
108 pub const POS_AFTER_SUB: u8 = 9;
109
110 pub const POS_BEFORE_POST: u8 = 10;
111 pub const POS_POST_C: u8 = 11;
112 pub const POS_AFTER_POST: u8 = 12;
113
114 pub const POS_SMVD: u8 = 13;
115
116 pub const POS_END: u8 = 14;
117}
118
119const INDIC_FEATURES: &[(hb_tag_t, hb_ot_map_feature_flags_t)] = &[
120 // Basic features.
121 // These features are applied in order, one at a time, after initial_reordering,
122 // constrained to the syllable.
123 (
124 hb_tag_t::from_bytes(b"nukt"),
125 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
126 ),
127 (
128 hb_tag_t::from_bytes(b"akhn"),
129 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
130 ),
131 (
132 hb_tag_t::from_bytes(b"rphf"),
133 F_MANUAL_JOINERS | F_PER_SYLLABLE,
134 ),
135 (
136 hb_tag_t::from_bytes(b"rkrf"),
137 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
138 ),
139 (
140 hb_tag_t::from_bytes(b"pref"),
141 F_MANUAL_JOINERS | F_PER_SYLLABLE,
142 ),
143 (
144 hb_tag_t::from_bytes(b"blwf"),
145 F_MANUAL_JOINERS | F_PER_SYLLABLE,
146 ),
147 (
148 hb_tag_t::from_bytes(b"abvf"),
149 F_MANUAL_JOINERS | F_PER_SYLLABLE,
150 ),
151 (
152 hb_tag_t::from_bytes(b"half"),
153 F_MANUAL_JOINERS | F_PER_SYLLABLE,
154 ),
155 (
156 hb_tag_t::from_bytes(b"pstf"),
157 F_MANUAL_JOINERS | F_PER_SYLLABLE,
158 ),
159 (
160 hb_tag_t::from_bytes(b"vatu"),
161 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
162 ),
163 (
164 hb_tag_t::from_bytes(b"cjct"),
165 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
166 ),
167 // Other features.
168 // These features are applied all at once, after final_reordering, constrained
169 // to the syllable.
170 // Default Bengali font in Windows for example has intermixed
171 // lookups for init,pres,abvs,blws features.
172 (
173 hb_tag_t::from_bytes(b"init"),
174 F_MANUAL_JOINERS | F_PER_SYLLABLE,
175 ),
176 (
177 hb_tag_t::from_bytes(b"pres"),
178 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
179 ),
180 (
181 hb_tag_t::from_bytes(b"abvs"),
182 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
183 ),
184 (
185 hb_tag_t::from_bytes(b"blws"),
186 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
187 ),
188 (
189 hb_tag_t::from_bytes(b"psts"),
190 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
191 ),
192 (
193 hb_tag_t::from_bytes(b"haln"),
194 F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE,
195 ),
196];
197
198// Must be in the same order as the INDIC_FEATURES array.
199#[allow(dead_code)]
200mod indic_feature {
201 pub const NUKT: usize = 0;
202 pub const AKHN: usize = 1;
203 pub const RPHF: usize = 2;
204 pub const RKRF: usize = 3;
205 pub const PREF: usize = 4;
206 pub const BLWF: usize = 5;
207 pub const ABVF: usize = 6;
208 pub const HALF: usize = 7;
209 pub const PSTF: usize = 8;
210 pub const VATU: usize = 9;
211 pub const CJCT: usize = 10;
212 pub const INIT: usize = 11;
213 pub const PRES: usize = 12;
214 pub const ABVS: usize = 13;
215 pub const BLWS: usize = 14;
216 pub const PSTS: usize = 15;
217 pub const HALN: usize = 16;
218}
219
220pub(crate) const fn category_flag(c: Category) -> u32 {
221 rb_flag(c as u32)
222}
223
224// Note:
225//
226// We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
227// cannot happen in a consonant syllable. The plus side however is, we can call the
228// consonant syllable logic from the vowel syllable function and get it all right!
229const CONSONANT_FLAGS_INDIC: u32 = category_flag(ot_category_t::OT_C)
230 | category_flag(ot_category_t::OT_CS)
231 | category_flag(ot_category_t::OT_Ra)
232 | category_flag(ot_category_t::OT_CM)
233 | category_flag(ot_category_t::OT_V)
234 | category_flag(ot_category_t::OT_PLACEHOLDER)
235 | category_flag(ot_category_t::OT_DOTTEDCIRCLE);
236
237const CONSONANT_FLAGS_MYANMAR: u32 = category_flag(ot_category_t::OT_C)
238 | category_flag(ot_category_t::OT_CS)
239 | category_flag(ot_category_t::OT_Ra)
240 // | category_flag(ot_category_t::OT_CM)
241 | category_flag(ot_category_t::IV)
242 | category_flag(ot_category_t::OT_GB)
243 | category_flag(ot_category_t::OT_DOTTEDCIRCLE);
244
245const JOINER_FLAGS: u32 =
246 category_flag(ot_category_t::OT_ZWJ) | category_flag(ot_category_t::OT_ZWNJ);
247
248#[derive(Clone, Copy, PartialEq)]
249enum RephPosition {
250 AfterMain = ot_position_t::POS_AFTER_MAIN as isize,
251 BeforeSub = ot_position_t::POS_BEFORE_SUB as isize,
252 AfterSub = ot_position_t::POS_AFTER_SUB as isize,
253 BeforePost = ot_position_t::POS_BEFORE_POST as isize,
254 AfterPost = ot_position_t::POS_AFTER_POST as isize,
255}
256
257#[derive(Clone, Copy, PartialEq)]
258enum RephMode {
259 /// Reph formed out of initial Ra,H sequence.
260 Implicit,
261 /// Reph formed out of initial Ra,H,ZWJ sequence.
262 Explicit,
263 /// Encoded Repha character, needs reordering.
264 LogRepha,
265}
266
267#[derive(Clone, Copy, PartialEq)]
268enum BlwfMode {
269 /// Below-forms feature applied to pre-base and post-base.
270 PreAndPost,
271 /// Below-forms feature applied to post-base only.
272 PostOnly,
273}
274
275#[derive(Clone, Copy)]
276struct IndicConfig {
277 script: Option<Script>,
278 has_old_spec: bool,
279 virama: u32,
280 reph_pos: RephPosition,
281 reph_mode: RephMode,
282 blwf_mode: BlwfMode,
283}
284
285impl IndicConfig {
286 const fn new(
287 script: Option<Script>,
288 has_old_spec: bool,
289 virama: u32,
290 reph_pos: RephPosition,
291 reph_mode: RephMode,
292 blwf_mode: BlwfMode,
293 ) -> Self {
294 IndicConfig {
295 script,
296 has_old_spec,
297 virama,
298 reph_pos,
299 reph_mode,
300 blwf_mode,
301 }
302 }
303}
304
305const INDIC_CONFIGS: &[IndicConfig] = &[
306 IndicConfig::new(
307 script:None,
308 has_old_spec:false,
309 virama:0,
310 reph_pos:RephPosition::BeforePost,
311 RephMode::Implicit,
312 BlwfMode::PreAndPost,
313 ),
314 IndicConfig::new(
315 script:Some(script::DEVANAGARI),
316 has_old_spec:true,
317 virama:0x094D,
318 reph_pos:RephPosition::BeforePost,
319 RephMode::Implicit,
320 BlwfMode::PreAndPost,
321 ),
322 IndicConfig::new(
323 script:Some(script::BENGALI),
324 has_old_spec:true,
325 virama:0x09CD,
326 reph_pos:RephPosition::AfterSub,
327 RephMode::Implicit,
328 BlwfMode::PreAndPost,
329 ),
330 IndicConfig::new(
331 script:Some(script::GURMUKHI),
332 has_old_spec:true,
333 virama:0x0A4D,
334 reph_pos:RephPosition::BeforeSub,
335 RephMode::Implicit,
336 BlwfMode::PreAndPost,
337 ),
338 IndicConfig::new(
339 script:Some(script::GUJARATI),
340 has_old_spec:true,
341 virama:0x0ACD,
342 reph_pos:RephPosition::BeforePost,
343 RephMode::Implicit,
344 BlwfMode::PreAndPost,
345 ),
346 IndicConfig::new(
347 script:Some(script::ORIYA),
348 has_old_spec:true,
349 virama:0x0B4D,
350 reph_pos:RephPosition::AfterMain,
351 RephMode::Implicit,
352 BlwfMode::PreAndPost,
353 ),
354 IndicConfig::new(
355 script:Some(script::TAMIL),
356 has_old_spec:true,
357 virama:0x0BCD,
358 reph_pos:RephPosition::AfterPost,
359 RephMode::Implicit,
360 BlwfMode::PreAndPost,
361 ),
362 IndicConfig::new(
363 script:Some(script::TELUGU),
364 has_old_spec:true,
365 virama:0x0C4D,
366 reph_pos:RephPosition::AfterPost,
367 RephMode::Explicit,
368 BlwfMode::PostOnly,
369 ),
370 IndicConfig::new(
371 script:Some(script::KANNADA),
372 has_old_spec:true,
373 virama:0x0CCD,
374 reph_pos:RephPosition::AfterPost,
375 RephMode::Implicit,
376 BlwfMode::PostOnly,
377 ),
378 IndicConfig::new(
379 script:Some(script::MALAYALAM),
380 has_old_spec:true,
381 virama:0x0D4D,
382 reph_pos:RephPosition::AfterMain,
383 RephMode::LogRepha,
384 BlwfMode::PreAndPost,
385 ),
386 IndicConfig::new(
387 script:Some(script::SINHALA),
388 has_old_spec:false,
389 virama:0x0DCA,
390 reph_pos:RephPosition::AfterPost,
391 RephMode::Explicit,
392 BlwfMode::PreAndPost,
393 ),
394];
395
396struct IndicWouldSubstituteFeature {
397 lookups: Range<usize>,
398 zero_context: bool,
399}
400
401impl IndicWouldSubstituteFeature {
402 pub fn new(map: &hb_ot_map_t, feature_tag: hb_tag_t, zero_context: bool) -> Self {
403 IndicWouldSubstituteFeature {
404 lookups: match map.get_feature_stage(TableIndex::GSUB, feature_tag) {
405 Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage),
406 None => 0..0,
407 },
408 zero_context,
409 }
410 }
411
412 pub fn would_substitute(
413 &self,
414 map: &hb_ot_map_t,
415 face: &hb_font_t,
416 glyphs: &[GlyphId],
417 ) -> bool {
418 for index in self.lookups.clone() {
419 let lookup = map.lookup(TableIndex::GSUB, index);
420 let ctx = WouldApplyContext {
421 glyphs,
422 zero_context: self.zero_context,
423 };
424 if face
425 .gsub
426 .as_ref()
427 .and_then(|table| table.get_lookup(lookup.index))
428 .map_or(false, |lookup| lookup.would_apply(&ctx))
429 {
430 return true;
431 }
432 }
433
434 false
435 }
436}
437
438struct IndicShapePlan {
439 config: IndicConfig,
440 is_old_spec: bool,
441 // virama_glyph: Option<u32>,
442 rphf: IndicWouldSubstituteFeature,
443 pref: IndicWouldSubstituteFeature,
444 blwf: IndicWouldSubstituteFeature,
445 pstf: IndicWouldSubstituteFeature,
446 vatu: IndicWouldSubstituteFeature,
447 mask_array: [hb_mask_t; INDIC_FEATURES.len()],
448}
449
450impl IndicShapePlan {
451 fn new(plan: &hb_ot_shape_plan_t) -> Self {
452 let script = plan.script;
453 let config = if let Some(c) = INDIC_CONFIGS.iter().skip(1).find(|c| c.script == script) {
454 *c
455 } else {
456 INDIC_CONFIGS[0]
457 };
458
459 let is_old_spec = config.has_old_spec
460 && plan
461 .ot_map
462 .chosen_script(TableIndex::GSUB)
463 .map_or(true, |tag| tag.to_bytes()[3] != b'2');
464
465 // Use zero-context would_substitute() matching for new-spec of the main
466 // Indic scripts, and scripts with one spec only, but not for old-specs.
467 // The new-spec for all dual-spec scripts says zero-context matching happens.
468 //
469 // However, testing with Malayalam shows that old and new spec both allow
470 // context. Testing with Bengali new-spec however shows that it doesn't.
471 // So, the heuristic here is the way it is. It should *only* be changed,
472 // as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE.
473 let zero_context = is_old_spec && script != Some(script::MALAYALAM);
474
475 let mut mask_array = [0; INDIC_FEATURES.len()];
476 for (i, feature) in INDIC_FEATURES.iter().enumerate() {
477 mask_array[i] = if feature.1 & F_GLOBAL != 0 {
478 0
479 } else {
480 plan.ot_map.get_1_mask(feature.0)
481 }
482 }
483
484 // TODO: what is this?
485 // let mut virama_glyph = None;
486 // if config.virama != 0 {
487 // if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) {
488 // virama_glyph = Some(g.0 as u32);
489 // }
490 // }
491
492 IndicShapePlan {
493 config,
494 is_old_spec,
495 // virama_glyph,
496 rphf: IndicWouldSubstituteFeature::new(
497 &plan.ot_map,
498 hb_tag_t::from_bytes(b"rphf"),
499 zero_context,
500 ),
501 pref: IndicWouldSubstituteFeature::new(
502 &plan.ot_map,
503 hb_tag_t::from_bytes(b"pref"),
504 zero_context,
505 ),
506 blwf: IndicWouldSubstituteFeature::new(
507 &plan.ot_map,
508 hb_tag_t::from_bytes(b"blwf"),
509 zero_context,
510 ),
511 pstf: IndicWouldSubstituteFeature::new(
512 &plan.ot_map,
513 hb_tag_t::from_bytes(b"pstf"),
514 zero_context,
515 ),
516 vatu: IndicWouldSubstituteFeature::new(
517 &plan.ot_map,
518 hb_tag_t::from_bytes(b"vatu"),
519 zero_context,
520 ),
521 mask_array,
522 }
523 }
524}
525
526impl hb_glyph_info_t {
527 pub(crate) fn indic_category(&self) -> Category {
528 self.ot_shaper_var_u8_category()
529 }
530
531 pub(crate) fn myanmar_category(&self) -> Category {
532 self.ot_shaper_var_u8_category()
533 }
534
535 pub(crate) fn khmer_category(&self) -> Category {
536 self.ot_shaper_var_u8_category()
537 }
538
539 pub(crate) fn set_indic_category(&mut self, c: Category) {
540 self.set_ot_shaper_var_u8_category(c)
541 }
542
543 pub(crate) fn set_myanmar_category(&mut self, c: Category) {
544 self.set_ot_shaper_var_u8_category(c)
545 }
546
547 pub(crate) fn indic_position(&self) -> Position {
548 self.ot_shaper_var_u8_auxiliary()
549 }
550
551 pub(crate) fn myanmar_position(&self) -> Position {
552 self.ot_shaper_var_u8_auxiliary()
553 }
554
555 pub(crate) fn set_indic_position(&mut self, c: Position) {
556 self.set_ot_shaper_var_u8_auxiliary(c)
557 }
558
559 pub(crate) fn set_myanmar_position(&mut self, c: Position) {
560 self.set_ot_shaper_var_u8_auxiliary(c)
561 }
562
563 fn is_one_of(&self, flags: u32) -> bool {
564 // If it ligated, all bets are off.
565 if _hb_glyph_info_ligated(self) {
566 return false;
567 }
568
569 rb_flag_unsafe(self.indic_category() as u32) & flags != 0
570 }
571
572 fn is_joiner(&self) -> bool {
573 self.is_one_of(JOINER_FLAGS)
574 }
575
576 pub(crate) fn is_consonant(&self) -> bool {
577 self.is_one_of(CONSONANT_FLAGS_INDIC)
578 }
579
580 pub(crate) fn is_consonant_myanmar(&self) -> bool {
581 self.is_one_of(CONSONANT_FLAGS_MYANMAR)
582 }
583
584 fn is_halant(&self) -> bool {
585 self.is_one_of(rb_flag(ot_category_t::OT_H as u32))
586 }
587
588 fn set_indic_properties(&mut self) {
589 let u = self.glyph_id;
590 let (cat, pos) = crate::hb::ot_shaper_indic_table::get_categories(u);
591
592 self.set_indic_category(cat);
593 self.set_indic_position(pos);
594 }
595}
596
597fn collect_features(planner: &mut hb_ot_shape_planner_t) {
598 // Do this before any lookups have been applied.
599 planner.ot_map.add_gsub_pause(Some(setup_syllables));
600
601 planner
602 .ot_map
603 .enable_feature(hb_tag_t::from_bytes(b"locl"), F_PER_SYLLABLE, 1);
604 // The Indic specs do not require ccmp, but we apply it here since if
605 // there is a use of it, it's typically at the beginning.
606 planner
607 .ot_map
608 .enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_PER_SYLLABLE, 1);
609
610 planner.ot_map.add_gsub_pause(Some(initial_reordering));
611
612 for feature in INDIC_FEATURES.iter().take(11) {
613 planner.ot_map.add_feature(feature.0, feature.1, 1);
614 planner.ot_map.add_gsub_pause(None);
615 }
616
617 planner.ot_map.add_gsub_pause(Some(final_reordering));
618
619 for feature in INDIC_FEATURES.iter().skip(11) {
620 planner.ot_map.add_feature(feature.0, feature.1, 1);
621 }
622}
623
624fn override_features(planner: &mut hb_ot_shape_planner_t) {
625 planner
626 .ot_map
627 .disable_feature(tag:hb_tag_t::from_bytes(b"liga"));
628 planner.ot_map.add_gsub_pause(Some(syllabic_clear_var)); // Don't need syllables anymore.
629}
630
631fn preprocess_text(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
632 super::ot_shaper_vowel_constraints::preprocess_text_vowel_constraints(buffer);
633}
634
635fn decompose(_: &hb_ot_shape_normalize_context_t, ab: char) -> Option<(char, char)> {
636 // Don't decompose these.
637 match ab {
638 '\u{0931}' | // DEVANAGARI LETTER RRA
639 // https://github.com/harfbuzz/harfbuzz/issues/779
640 '\u{09DC}' | // BENGALI LETTER RRA
641 '\u{09DD}' | // BENGALI LETTER RHA
642 '\u{0B94}' => return None, // TAMIL LETTER AU
643 _ => {}
644 }
645
646 crate::hb::unicode::decompose(ab)
647}
648
649fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> {
650 // Avoid recomposing split matras.
651 if a.general_category().is_mark() {
652 return None;
653 }
654
655 // Composition-exclusion exceptions that we want to recompose.
656 if a == '\u{09AF}' && b == '\u{09BC}' {
657 return Some('\u{09DF}');
658 }
659
660 crate::hb::unicode::compose(a, b)
661}
662
663fn setup_masks(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
664 // We cannot setup masks here. We save information about characters
665 // and setup masks later on in a pause-callback.
666 for info: &mut hb_glyph_info_t in buffer.info_slice_mut() {
667 info.set_indic_properties();
668 }
669}
670
671fn setup_syllables(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
672 super::ot_shaper_indic_machine::find_syllables_indic(buffer);
673
674 let mut start: usize = 0;
675 let mut end: usize = buffer.next_syllable(start:0);
676 while start < buffer.len {
677 buffer.unsafe_to_break(start:Some(start), end:Some(end));
678 start = end;
679 end = buffer.next_syllable(start);
680 }
681
682 false
683}
684
685fn initial_reordering(
686 plan: &hb_ot_shape_plan_t,
687 face: &hb_font_t,
688 buffer: &mut hb_buffer_t,
689) -> bool {
690 use super::ot_shaper_indic_machine::SyllableType;
691
692 let mut ret = false;
693
694 let indic_plan = plan.data::<IndicShapePlan>();
695
696 update_consonant_positions(plan, indic_plan, face, buffer);
697 if super::ot_shaper_syllabic::insert_dotted_circles(
698 face,
699 buffer,
700 SyllableType::BrokenCluster as u8,
701 ot_category_t::OT_DOTTEDCIRCLE,
702 Some(ot_category_t::OT_Repha),
703 Some(ot_position_t::POS_END),
704 ) {
705 ret = true;
706 }
707
708 let mut start = 0;
709 let mut end = buffer.next_syllable(0);
710 while start < buffer.len {
711 initial_reordering_syllable(plan, indic_plan, face, start, end, buffer);
712 start = end;
713 end = buffer.next_syllable(start);
714 }
715
716 ret
717}
718
719fn update_consonant_positions(
720 plan: &hb_ot_shape_plan_t,
721 indic_plan: &IndicShapePlan,
722 face: &hb_font_t,
723 buffer: &mut hb_buffer_t,
724) {
725 let mut virama_glyph: Option = None;
726 if indic_plan.config.virama != 0 {
727 virama_glyph = face.get_nominal_glyph(indic_plan.config.virama);
728 }
729
730 if let Some(virama: GlyphId) = virama_glyph {
731 for info: &mut hb_glyph_info_t in buffer.info_slice_mut() {
732 if info.indic_position() == ot_position_t::POS_BASE_C {
733 let consonant: GlyphId = info.as_glyph();
734 info.set_indic_position(consonant_position_from_face(
735 plan, indic_plan, face, consonant, virama,
736 ));
737 }
738 }
739 }
740}
741
742fn consonant_position_from_face(
743 plan: &hb_ot_shape_plan_t,
744 indic_plan: &IndicShapePlan,
745 face: &hb_font_t,
746 consonant: GlyphId,
747 virama: GlyphId,
748) -> u8 {
749 // For old-spec, the order of glyphs is Consonant,Virama,
750 // whereas for new-spec, it's Virama,Consonant. However,
751 // some broken fonts (like Free Sans) simply copied lookups
752 // from old-spec to new-spec without modification.
753 // And oddly enough, Uniscribe seems to respect those lookups.
754 // Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
755 // base at 0. The font however, only has lookups matching
756 // 930,94D in 'blwf', not the expected 94D,930 (with new-spec
757 // table). As such, we simply match both sequences. Seems
758 // to work.
759 //
760 // Vatu is done as well, for:
761 // https://github.com/harfbuzz/harfbuzz/issues/1587
762
763 if indic_plan
764 .blwf
765 .would_substitute(&plan.ot_map, face, &[virama, consonant])
766 || indic_plan
767 .blwf
768 .would_substitute(&plan.ot_map, face, &[consonant, virama])
769 || indic_plan
770 .vatu
771 .would_substitute(&plan.ot_map, face, &[virama, consonant])
772 || indic_plan
773 .vatu
774 .would_substitute(&plan.ot_map, face, &[consonant, virama])
775 {
776 return ot_position_t::POS_BELOW_C;
777 }
778
779 if indic_plan
780 .pstf
781 .would_substitute(&plan.ot_map, face, &[virama, consonant])
782 || indic_plan
783 .pstf
784 .would_substitute(&plan.ot_map, face, &[consonant, virama])
785 {
786 return ot_position_t::POS_POST_C;
787 }
788
789 if indic_plan
790 .pref
791 .would_substitute(&plan.ot_map, face, &[virama, consonant])
792 || indic_plan
793 .pref
794 .would_substitute(&plan.ot_map, face, &[consonant, virama])
795 {
796 return ot_position_t::POS_POST_C;
797 }
798
799 ot_position_t::POS_BASE_C
800}
801
802fn initial_reordering_syllable(
803 plan: &hb_ot_shape_plan_t,
804 indic_plan: &IndicShapePlan,
805 face: &hb_font_t,
806 start: usize,
807 end: usize,
808 buffer: &mut hb_buffer_t,
809) {
810 use super::ot_shaper_indic_machine::SyllableType;
811
812 let syllable_type = match buffer.info[start].syllable() & 0x0F {
813 0 => SyllableType::ConsonantSyllable,
814 1 => SyllableType::VowelSyllable,
815 2 => SyllableType::StandaloneCluster,
816 3 => SyllableType::SymbolCluster,
817 4 => SyllableType::BrokenCluster,
818 5 => SyllableType::NonIndicCluster,
819 _ => unreachable!(),
820 };
821
822 match syllable_type {
823 // We made the vowels look like consonants. So let's call the consonant logic!
824 SyllableType::VowelSyllable | SyllableType::ConsonantSyllable => {
825 initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
826 }
827 // We already inserted dotted-circles, so just call the standalone_cluster.
828 SyllableType::BrokenCluster | SyllableType::StandaloneCluster => {
829 initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer);
830 }
831 SyllableType::SymbolCluster | SyllableType::NonIndicCluster => {}
832 }
833}
834
835// Rules from:
836// https://docs.microsqoft.com/en-us/typography/script-development/devanagari */
837fn initial_reordering_consonant_syllable(
838 plan: &hb_ot_shape_plan_t,
839 indic_plan: &IndicShapePlan,
840 face: &hb_font_t,
841 start: usize,
842 end: usize,
843 buffer: &mut hb_buffer_t,
844) {
845 // https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167
846 // For compatibility with legacy usage in Kannada,
847 // Ra+h+ZWJ must behave like Ra+ZWJ+h...
848 if buffer.script == Some(script::KANNADA)
849 && start + 3 <= end
850 && buffer.info[start].is_one_of(category_flag(ot_category_t::OT_Ra))
851 && buffer.info[start + 1].is_one_of(category_flag(ot_category_t::OT_H))
852 && buffer.info[start + 2].is_one_of(category_flag(ot_category_t::OT_ZWJ))
853 {
854 buffer.merge_clusters(start + 1, start + 3);
855 buffer.info.swap(start + 1, start + 2);
856 }
857
858 // 1. Find base consonant:
859 //
860 // The shaping engine finds the base consonant of the syllable, using the
861 // following algorithm: starting from the end of the syllable, move backwards
862 // until a consonant is found that does not have a below-base or post-base
863 // form (post-base forms have to follow below-base forms), or that is not a
864 // pre-base-reordering Ra, or arrive at the first consonant. The consonant
865 // stopped at will be the base.
866 //
867 // - If the syllable starts with Ra + Halant (in a script that has Reph)
868 // and has more than one consonant, Ra is excluded from candidates for
869 // base consonants.
870
871 let mut base = end;
872 let mut has_reph = false;
873
874 {
875 // -> If the syllable starts with Ra + Halant (in a script that has Reph)
876 // and has more than one consonant, Ra is excluded from candidates for
877 // base consonants.
878 let mut limit = start;
879 if indic_plan.mask_array[indic_feature::RPHF] != 0
880 && start + 3 <= end
881 && ((indic_plan.config.reph_mode == RephMode::Implicit
882 && !buffer.info[start + 2].is_joiner())
883 || (indic_plan.config.reph_mode == RephMode::Explicit
884 && buffer.info[start + 2].indic_category() == ot_category_t::OT_ZWJ))
885 {
886 // See if it matches the 'rphf' feature.
887 let glyphs = &[
888 buffer.info[start].as_glyph(),
889 buffer.info[start + 1].as_glyph(),
890 if indic_plan.config.reph_mode == RephMode::Explicit {
891 buffer.info[start + 2].as_glyph()
892 } else {
893 GlyphId(0)
894 },
895 ];
896 if indic_plan
897 .rphf
898 .would_substitute(&plan.ot_map, face, &glyphs[0..2])
899 || (indic_plan.config.reph_mode == RephMode::Explicit
900 && indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs))
901 {
902 limit += 2;
903 while limit < end && buffer.info[limit].is_joiner() {
904 limit += 1;
905 }
906 base = start;
907 has_reph = true;
908 }
909 } else if indic_plan.config.reph_mode == RephMode::LogRepha
910 && buffer.info[start].indic_category() == ot_category_t::OT_Repha
911 {
912 limit += 1;
913 while limit < end && buffer.info[limit].is_joiner() {
914 limit += 1;
915 }
916 base = start;
917 has_reph = true;
918 }
919
920 {
921 // -> starting from the end of the syllable, move backwards
922 let mut i = end;
923 let mut seen_below = false;
924 loop {
925 i -= 1;
926 // -> until a consonant is found
927 if buffer.info[i].is_consonant_myanmar() {
928 // -> that does not have a below-base or post-base form
929 // (post-base forms have to follow below-base forms),
930 if buffer.info[i].indic_position() != ot_position_t::POS_BELOW_C
931 && (buffer.info[i].indic_position() != ot_position_t::POS_POST_C
932 || seen_below)
933 {
934 base = i;
935 break;
936 }
937 if buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C {
938 seen_below = true;
939 }
940
941 // -> or that is not a pre-base-reordering Ra,
942 //
943 // IMPLEMENTATION NOTES:
944 //
945 // Our pre-base-reordering Ra's are marked position::PostC, so will be skipped
946 // by the logic above already.
947
948 // -> or arrive at the first consonant. The consonant stopped at will
949 // be the base.
950 base = i;
951 } else {
952 // A ZWJ after a Halant stops the base search, and requests an explicit
953 // half form.
954 // A ZWJ before a Halant, requests a subjoined form instead, and hence
955 // search continues. This is particularly important for Bengali
956 // sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya.
957 if start < i
958 && buffer.info[i].indic_category() == ot_category_t::OT_ZWJ
959 && buffer.info[i - 1].indic_category() == ot_category_t::OT_H
960 {
961 break;
962 }
963 }
964
965 if i <= limit {
966 break;
967 }
968 }
969 }
970
971 // -> If the syllable starts with Ra + Halant (in a script that has Reph)
972 // and has more than one consonant, Ra is excluded from candidates for
973 // base consonants.
974 //
975 // Only do this for unforced Reph. (ie. not for Ra,H,ZWJ.
976 if has_reph && base == start && limit - base <= 2 {
977 // Have no other consonant, so Reph is not formed and Ra becomes base.
978 has_reph = false;
979 }
980 }
981
982 // 2. Decompose and reorder Matras:
983 //
984 // Each matra and any syllable modifier sign in the syllable are moved to the
985 // appropriate position relative to the consonant(s) in the syllable. The
986 // shaping engine decomposes two- or three-part matras into their constituent
987 // parts before any repositioning. Matra characters are classified by which
988 // consonant in a conjunct they have affinity for and are reordered to the
989 // following positions:
990 //
991 // - Before first half form in the syllable
992 // - After subjoined consonants
993 // - After post-form consonant
994 // - After main consonant (for above marks)
995 //
996 // IMPLEMENTATION NOTES:
997 //
998 // The normalize() routine has already decomposed matras for us, so we don't
999 // need to worry about that.
1000
1001 // 3. Reorder marks to canonical order:
1002 //
1003 // Adjacent nukta and halant or nukta and vedic sign are always repositioned
1004 // if necessary, so that the nukta is first.
1005 //
1006 // IMPLEMENTATION NOTES:
1007 //
1008 // We don't need to do this: the normalize() routine already did this for us.
1009
1010 // Reorder characters
1011
1012 for i in start..base {
1013 let pos = buffer.info[i].indic_position();
1014 buffer.info[i].set_indic_position(cmp::min(ot_position_t::POS_PRE_C, pos));
1015 }
1016
1017 if base < end {
1018 buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C);
1019 }
1020
1021 // Handle beginning Ra
1022 if has_reph {
1023 buffer.info[start].set_indic_position(ot_position_t::POS_RA_TO_BECOME_REPH);
1024 }
1025
1026 // For old-style Indic script tags, move the first post-base Halant after
1027 // last consonant.
1028 //
1029 // Reports suggest that in some scripts Uniscribe does this only if there
1030 // is *not* a Halant after last consonant already. We know that is the
1031 // case for Kannada, while it reorders unconditionally in other scripts,
1032 // eg. Malayalam, Bengali, and Devanagari. We don't currently know about
1033 // other scripts, so we block Kannada.
1034 //
1035 // Kannada test case:
1036 // U+0C9A,U+0CCD,U+0C9A,U+0CCD
1037 // With some versions of Lohit Kannada.
1038 // https://bugs.freedesktop.org/show_bug.cgi?id=59118
1039 //
1040 // Malayalam test case:
1041 // U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
1042 // With lohit-ttf-20121122/Lohit-Malayalam.ttf
1043 //
1044 // Bengali test case:
1045 // U+0998,U+09CD,U+09AF,U+09CD
1046 // With Windows XP vrinda.ttf
1047 // https://github.com/harfbuzz/harfbuzz/issues/1073
1048 //
1049 // Devanagari test case:
1050 // U+091F,U+094D,U+0930,U+094D
1051 // With chandas.ttf
1052 // https://github.com/harfbuzz/harfbuzz/issues/1071
1053 if indic_plan.is_old_spec {
1054 let disallow_double_halants = buffer.script == Some(script::KANNADA);
1055 for i in base + 1..end {
1056 if buffer.info[i].indic_category() == ot_category_t::OT_H {
1057 let mut j = end - 1;
1058 while j > i {
1059 if buffer.info[j].is_consonant()
1060 || (disallow_double_halants
1061 && buffer.info[j].indic_category() == ot_category_t::OT_H)
1062 {
1063 break;
1064 }
1065
1066 j -= 1;
1067 }
1068
1069 if buffer.info[j].indic_category() != ot_category_t::OT_H && j > i {
1070 // Move Halant to after last consonant.
1071 let t = buffer.info[i];
1072 for k in 0..j - i {
1073 buffer.info[k + i] = buffer.info[k + i + 1];
1074 }
1075 buffer.info[j] = t;
1076 }
1077
1078 break;
1079 }
1080 }
1081 }
1082
1083 // Attach misc marks to previous char to move with them.
1084 {
1085 let mut last_pos = ot_position_t::POS_START;
1086 for i in start..end {
1087 let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1088 & (category_flag(ot_category_t::OT_ZWJ)
1089 | category_flag(ot_category_t::OT_ZWNJ)
1090 | category_flag(ot_category_t::OT_N)
1091 | category_flag(ot_category_t::OT_RS)
1092 | category_flag(ot_category_t::OT_CM)
1093 | category_flag(ot_category_t::OT_H))
1094 != 0;
1095 if ok {
1096 buffer.info[i].set_indic_position(last_pos);
1097
1098 if buffer.info[i].indic_category() == ot_category_t::OT_H
1099 && buffer.info[i].indic_position() == ot_position_t::POS_PRE_M
1100 {
1101 // Uniscribe doesn't move the Halant with Left Matra.
1102 // TEST: U+092B,U+093F,U+094DE
1103 // We follow.
1104 for j in (start + 1..=i).rev() {
1105 if buffer.info[j - 1].indic_position() != ot_position_t::POS_PRE_M {
1106 let pos = buffer.info[j - 1].indic_position();
1107 buffer.info[i].set_indic_position(pos);
1108 break;
1109 }
1110 }
1111 }
1112 } else if buffer.info[i].indic_position() != ot_position_t::POS_SMVD {
1113 if buffer.info[i].indic_category() == ot_category_t::OT_MPst
1114 && i > start
1115 && buffer.info[i - 1].indic_category() == ot_category_t::OT_SM
1116 {
1117 let val = buffer.info[i].indic_position();
1118 buffer.info[i - 1].set_indic_position(val);
1119 }
1120
1121 last_pos = buffer.info[i].indic_position();
1122 }
1123 }
1124 }
1125 // For post-base consonants let them own anything before them
1126 // since the last consonant or matra.
1127 {
1128 let mut last = base;
1129 for i in base + 1..end {
1130 if buffer.info[i].is_consonant() {
1131 for j in last + 1..i {
1132 if buffer.info[j].indic_position() < ot_position_t::POS_SMVD {
1133 let pos = buffer.info[i].indic_position();
1134 buffer.info[j].set_indic_position(pos);
1135 }
1136 }
1137
1138 last = i;
1139 } else if (rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1140 & (rb_flag(ot_category_t::OT_M as u32) | rb_flag(ot_category_t::OT_MPst as u32)))
1141 != 0
1142 {
1143 last = i;
1144 }
1145 }
1146 }
1147
1148 {
1149 // Use syllable() for sort accounting temporarily.
1150 let syllable = buffer.info[start].syllable();
1151 for i in start..end {
1152 buffer.info[i].set_syllable(u8::try_from(i - start).unwrap());
1153 }
1154
1155 buffer.info[start..end].sort_by_key(|a| a.indic_position());
1156
1157 // Find base again; also flip left-matra sequence.
1158 let mut first_left_mantra = end;
1159 let mut last_left_mantra = end;
1160 base = end;
1161
1162 for i in start..end {
1163 if buffer.info[i].indic_position() == ot_position_t::POS_BASE_C {
1164 base = i;
1165 break;
1166 } else if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M {
1167 if first_left_mantra == end {
1168 first_left_mantra = i;
1169 }
1170
1171 last_left_mantra = i;
1172 }
1173 }
1174
1175 // https://github.com/harfbuzz/harfbuzz/issues/3863
1176 if first_left_mantra < last_left_mantra {
1177 // No need to merge clusters, handled later.
1178 buffer.reverse_range(first_left_mantra, last_left_mantra + 1);
1179 // Reverse back nuktas, etc.
1180 let mut i = first_left_mantra;
1181
1182 for j in i..=last_left_mantra {
1183 if (rb_flag_unsafe(buffer.info[j].indic_category() as u32)
1184 & (rb_flag(ot_category_t::OT_M as u32)
1185 | rb_flag(ot_category_t::OT_MPst as u32)))
1186 != 0
1187 {
1188 buffer.reverse_range(i, j + 1);
1189 i = j + 1;
1190 }
1191 }
1192 }
1193
1194 // Things are out-of-control for post base positions, they may shuffle
1195 // around like crazy. In old-spec mode, we move halants around, so in
1196 // that case merge all clusters after base. Otherwise, check the sort
1197 // order and merge as needed.
1198 // For pre-base stuff, we handle cluster issues in final reordering.
1199 //
1200 // We could use buffer->sort() for this, if there was no special
1201 // reordering of pre-base stuff happening later...
1202 // We don't want to merge_clusters all of that, which buffer->sort()
1203 // would. Here's a concrete example:
1204 //
1205 // Assume there's a pre-base consonant and explicit Halant before base,
1206 // followed by a prebase-reordering (left) Matra:
1207 //
1208 // C,H,ZWNJ,B,M
1209 //
1210 // At this point in reordering we would have:
1211 //
1212 // M,C,H,ZWNJ,B
1213 //
1214 // whereas in final reordering we will bring the Matra closer to Base:
1215 //
1216 // C,H,ZWNJ,M,B
1217 //
1218 // That's why we don't want to merge-clusters anything before the Base
1219 // at this point. But if something moved from after Base to before it,
1220 // we should merge clusters from base to them. In final-reordering, we
1221 // only move things around before base, and merge-clusters up to base.
1222 // These two merge-clusters from the two sides of base will interlock
1223 // to merge things correctly. See:
1224 // https://github.com/harfbuzz/harfbuzz/issues/2272
1225 if indic_plan.is_old_spec || end - start > 127 {
1226 buffer.merge_clusters(base, end);
1227 } else {
1228 // Note! syllable() is a one-byte field.
1229 for i in base..end {
1230 if buffer.info[i].syllable() != 255 {
1231 let mut min = i;
1232 let mut max = i;
1233 let mut j = start + buffer.info[i].syllable() as usize;
1234 while j != i {
1235 min = cmp::min(min, j);
1236 max = cmp::max(max, j);
1237 let next = start + buffer.info[j].syllable() as usize;
1238 buffer.info[j].set_syllable(255); // So we don't process j later again.
1239 j = next;
1240 }
1241
1242 buffer.merge_clusters(cmp::max(base, min), max + 1);
1243 }
1244 }
1245 }
1246
1247 // Put syllable back in.
1248 for info in &mut buffer.info[start..end] {
1249 info.set_syllable(syllable);
1250 }
1251 }
1252
1253 // Setup masks now
1254
1255 {
1256 // Reph
1257 for info in &mut buffer.info[start..end] {
1258 if info.indic_position() != ot_position_t::POS_RA_TO_BECOME_REPH {
1259 break;
1260 }
1261
1262 info.mask |= indic_plan.mask_array[indic_feature::RPHF];
1263 }
1264
1265 // Pre-base
1266 let mut mask = indic_plan.mask_array[indic_feature::HALF];
1267 if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost {
1268 mask |= indic_plan.mask_array[indic_feature::BLWF];
1269 }
1270
1271 for info in &mut buffer.info[start..base] {
1272 info.mask |= mask;
1273 }
1274
1275 // Base
1276 mask = 0;
1277 if base < end {
1278 buffer.info[base].mask |= mask;
1279 }
1280
1281 // Post-base
1282 mask = indic_plan.mask_array[indic_feature::BLWF]
1283 | indic_plan.mask_array[indic_feature::ABVF]
1284 | indic_plan.mask_array[indic_feature::PSTF];
1285 for i in base + 1..end {
1286 buffer.info[i].mask |= mask;
1287 }
1288 }
1289
1290 if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) {
1291 // Old-spec eye-lash Ra needs special handling. From the
1292 // spec:
1293 //
1294 // "The feature 'below-base form' is applied to consonants
1295 // having below-base forms and following the base consonant.
1296 // The exception is vattu, which may appear below half forms
1297 // as well as below the base glyph. The feature 'below-base
1298 // form' will be applied to all such occurrences of Ra as well."
1299 //
1300 // Test case: U+0924,U+094D,U+0930,U+094d,U+0915
1301 // with Sanskrit 2003 font.
1302 //
1303 // However, note that Ra,Halant,ZWJ is the correct way to
1304 // request eyelash form of Ra, so we wouldbn't inhibit it
1305 // in that sequence.
1306 //
1307 // Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
1308 for i in start..base.saturating_sub(1) {
1309 if buffer.info[i].indic_category() == ot_category_t::OT_Ra
1310 && buffer.info[i + 1].indic_category() == ot_category_t::OT_H
1311 && (i + 2 == base || buffer.info[i + 2].indic_category() != ot_category_t::OT_ZWJ)
1312 {
1313 buffer.info[i].mask |= indic_plan.mask_array[indic_feature::BLWF];
1314 buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::BLWF];
1315 }
1316 }
1317 }
1318
1319 let pref_len = 2;
1320 if indic_plan.mask_array[indic_feature::PREF] != 0 && base + pref_len < end {
1321 // Find a Halant,Ra sequence and mark it for pre-base-reordering processing.
1322 for i in base + 1..end - pref_len + 1 {
1323 let glyphs = &[buffer.info[i + 0].as_glyph(), buffer.info[i + 1].as_glyph()];
1324 if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) {
1325 buffer.info[i + 0].mask |= indic_plan.mask_array[indic_feature::PREF];
1326 buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::PREF];
1327 break;
1328 }
1329 }
1330 }
1331
1332 // Apply ZWJ/ZWNJ effects
1333 for i in start + 1..end {
1334 if buffer.info[i].is_joiner() {
1335 let non_joiner = buffer.info[i].indic_category() == ot_category_t::OT_ZWNJ;
1336 let mut j = i;
1337
1338 loop {
1339 j -= 1;
1340
1341 // ZWJ/ZWNJ should disable CJCT. They do that by simply
1342 // being there, since we don't skip them for the CJCT
1343 // feature (ie. F_MANUAL_ZWJ)
1344
1345 // A ZWNJ disables HALF.
1346 if non_joiner {
1347 buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF];
1348 }
1349
1350 if j <= start || buffer.info[j].is_consonant() {
1351 break;
1352 }
1353 }
1354 }
1355 }
1356}
1357
1358fn initial_reordering_standalone_cluster(
1359 plan: &hb_ot_shape_plan_t,
1360 indic_plan: &IndicShapePlan,
1361 face: &hb_font_t,
1362 start: usize,
1363 end: usize,
1364 buffer: &mut hb_buffer_t,
1365) {
1366 // We treat placeholder/dotted-circle as if they are consonants, so we
1367 // should just chain. Only if not in compatibility mode that is...
1368 initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
1369}
1370
1371fn final_reordering(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
1372 if buffer.is_empty() {
1373 return false;
1374 }
1375
1376 foreach_syllable!(buffer, start, end, {
1377 final_reordering_impl(plan, face, start, end, buffer);
1378 });
1379
1380 false
1381}
1382
1383fn final_reordering_impl(
1384 plan: &hb_ot_shape_plan_t,
1385 face: &hb_font_t,
1386 start: usize,
1387 end: usize,
1388 buffer: &mut hb_buffer_t,
1389) {
1390 let indic_plan = plan.data::<IndicShapePlan>();
1391
1392 // This function relies heavily on halant glyphs. Lots of ligation
1393 // and possibly multiple substitutions happened prior to this
1394 // phase, and that might have messed up our properties. Recover
1395 // from a particular case of that where we're fairly sure that a
1396 // class of OT_H is desired but has been lost.
1397 //
1398 // We don't call load_virama_glyph(), since we know it's already loaded.
1399 let mut virama_glyph = None;
1400 if indic_plan.config.virama != 0 {
1401 if let Some(g) = face.get_nominal_glyph(indic_plan.config.virama) {
1402 virama_glyph = Some(g.0 as u32);
1403 }
1404 }
1405
1406 if let Some(virama_glyph) = virama_glyph {
1407 for info in &mut buffer.info[start..end] {
1408 if info.glyph_id == virama_glyph
1409 && _hb_glyph_info_ligated(info)
1410 && _hb_glyph_info_multiplied(info)
1411 {
1412 // This will make sure that this glyph passes is_halant() test.
1413 info.set_indic_category(ot_category_t::OT_H);
1414 _hb_glyph_info_clear_ligated_and_multiplied(info);
1415 }
1416 }
1417 }
1418
1419 // 4. Final reordering:
1420 //
1421 // After the localized forms and basic shaping forms GSUB features have been
1422 // applied (see below), the shaping engine performs some final glyph
1423 // reordering before applying all the remaining font features to the entire
1424 // syllable.
1425
1426 let mut try_pref = indic_plan.mask_array[indic_feature::PREF] != 0;
1427
1428 let mut base = start;
1429 while base < end {
1430 if buffer.info[base].indic_position() as u32 >= ot_position_t::POS_BASE_C as u32 {
1431 if try_pref && base + 1 < end {
1432 for i in base + 1..end {
1433 if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != 0 {
1434 if !(_hb_glyph_info_substituted(&buffer.info[i])
1435 && _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]))
1436 {
1437 // Ok, this was a 'pref' candidate but didn't form any.
1438 // Base is around here...
1439 base = i;
1440 while base < end && buffer.info[base].is_halant() {
1441 base += 1;
1442 }
1443
1444 if base < end {
1445 buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C);
1446 }
1447
1448 try_pref = false;
1449 }
1450
1451 break;
1452 }
1453
1454 if base == end {
1455 break;
1456 }
1457 }
1458 }
1459
1460 // For Malayalam, skip over unformed below- (but NOT post-) forms.
1461 if buffer.script == Some(script::MALAYALAM) {
1462 let mut i = base + 1;
1463 while i < end {
1464 while i < end && buffer.info[i].is_joiner() {
1465 i += 1;
1466 }
1467
1468 if i == end || !buffer.info[i].is_halant() {
1469 break;
1470 }
1471
1472 i += 1; // Skip halant.
1473
1474 while i < end && buffer.info[i].is_joiner() {
1475 i += 1;
1476 }
1477
1478 if i < end
1479 && buffer.info[i].is_consonant()
1480 && buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C
1481 {
1482 base = i;
1483 buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C);
1484 }
1485
1486 i += 1;
1487 }
1488 }
1489
1490 if start < base
1491 && buffer.info[base].indic_position() as u32 > ot_position_t::POS_BASE_C as u32
1492 {
1493 base -= 1;
1494 }
1495
1496 break;
1497 }
1498
1499 base += 1;
1500 }
1501
1502 if base == end
1503 && start < base
1504 && buffer.info[base - 1].is_one_of(rb_flag(ot_category_t::OT_ZWJ as u32))
1505 {
1506 base -= 1;
1507 }
1508
1509 if base < end {
1510 while start < base
1511 && buffer.info[base].is_one_of(
1512 rb_flag(ot_category_t::OT_N as u32) | rb_flag(ot_category_t::OT_H as u32),
1513 )
1514 {
1515 base -= 1;
1516 }
1517 }
1518
1519 // - Reorder matras:
1520 //
1521 // If a pre-base matra character had been reordered before applying basic
1522 // features, the glyph can be moved closer to the main consonant based on
1523 // whether half-forms had been formed. Actual position for the matra is
1524 // defined as “after last standalone halant glyph, after initial matra
1525 // position and before the main consonant”. If ZWJ or ZWNJ follow this
1526 // halant, position is moved after it.
1527 //
1528 // IMPLEMENTATION NOTES:
1529 //
1530 // It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe
1531 // and Devanagari shows that the behavior is best described as:
1532 //
1533 // "If ZWJ follows this halant, matra is NOT repositioned after this halant.
1534 // If ZWNJ follows this halant, position is moved after it."
1535 //
1536 // Test case, with Adobe Devanagari or Nirmala UI:
1537 //
1538 // U+091F,U+094D,U+200C,U+092F,U+093F
1539 // (Matra moves to the middle, after ZWNJ.)
1540 //
1541 // U+091F,U+094D,U+200D,U+092F,U+093F
1542 // (Matra does NOT move, stays to the left.)
1543 //
1544 // https://github.com/harfbuzz/harfbuzz/issues/1070
1545
1546 // Otherwise there can't be any pre-base matra characters.
1547 if start + 1 < end && start < base {
1548 // If we lost track of base, alas, position before last thingy.
1549 let mut new_pos = if base == end { base - 2 } else { base - 1 };
1550
1551 // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1552 // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1553 // We want to position matra after them.
1554 if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) {
1555 loop {
1556 while new_pos > start
1557 && !buffer.info[new_pos].is_one_of(
1558 rb_flag(ot_category_t::OT_M as u32)
1559 | rb_flag(ot_category_t::OT_MPst as u32)
1560 | rb_flag(ot_category_t::OT_H as u32),
1561 )
1562 {
1563 new_pos -= 1;
1564 }
1565
1566 // If we found no Halant we are done.
1567 // Otherwise only proceed if the Halant does
1568 // not belong to the Matra itself!
1569 if buffer.info[new_pos].is_halant()
1570 && buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M
1571 {
1572 if new_pos + 1 < end {
1573 // -> If ZWJ follows this halant, matra is NOT repositioned after this halant.
1574 if buffer.info[new_pos + 1].indic_category() == ot_category_t::OT_ZWJ {
1575 // Keep searching.
1576 if new_pos > start {
1577 new_pos -= 1;
1578 continue;
1579 }
1580 }
1581
1582 // -> If ZWNJ follows this halant, position is moved after it.
1583 //
1584 // IMPLEMENTATION NOTES:
1585 //
1586 // This is taken care of by the state-machine. A Halant,ZWNJ is a terminating
1587 // sequence for a consonant syllable; any pre-base matras occurring after it
1588 // will belong to the subsequent syllable.
1589 }
1590 } else {
1591 new_pos = start; // No move.
1592 }
1593
1594 break;
1595 }
1596 }
1597
1598 if start < new_pos && buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M {
1599 // Now go see if there's actually any matras...
1600 for i in (start + 1..=new_pos).rev() {
1601 if buffer.info[i - 1].indic_position() == ot_position_t::POS_PRE_M {
1602 let old_pos = i - 1;
1603 // Shouldn't actually happen.
1604 if old_pos < base && base <= new_pos {
1605 base -= 1;
1606 }
1607
1608 let tmp = buffer.info[old_pos];
1609 for i in 0..new_pos - old_pos {
1610 buffer.info[i + old_pos] = buffer.info[i + old_pos + 1];
1611 }
1612 buffer.info[new_pos] = tmp;
1613
1614 // Note: this merge_clusters() is intentionally *after* the reordering.
1615 // Indic matra reordering is special and tricky...
1616 buffer.merge_clusters(new_pos, cmp::min(end, base + 1));
1617
1618 new_pos -= 1;
1619 }
1620 }
1621 } else {
1622 for i in start..base {
1623 if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M {
1624 buffer.merge_clusters(i, cmp::min(end, base + 1));
1625 break;
1626 }
1627 }
1628 }
1629 }
1630
1631 // - Reorder reph:
1632 //
1633 // Reph’s original position is always at the beginning of the syllable,
1634 // (i.e. it is not reordered at the character reordering stage). However,
1635 // it will be reordered according to the basic-forms shaping results.
1636 // Possible positions for reph, depending on the script, are; after main,
1637 // before post-base consonant forms, and after post-base consonant forms.
1638
1639 // Two cases:
1640 //
1641 // - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
1642 // we should only move it if the sequence ligated to the repha form.
1643 //
1644 // - If repha is encoded separately and in the logical position, we should only
1645 // move it if it did NOT ligate. If it ligated, it's probably the font trying
1646 // to make it work without the reordering.
1647
1648 if start + 1 < end
1649 && buffer.info[start].indic_position() == ot_position_t::POS_RA_TO_BECOME_REPH
1650 && (buffer.info[start].indic_category() == ot_category_t::OT_Repha)
1651 ^ _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[start])
1652 {
1653 let mut new_reph_pos;
1654 'reph: {
1655 let reph_pos = indic_plan.config.reph_pos;
1656
1657 // 1. If reph should be positioned after post-base consonant forms,
1658 // proceed to step 5.
1659 if reph_pos != RephPosition::AfterPost {
1660 // 2. If the reph repositioning class is not after post-base: target
1661 // position is after the first explicit halant glyph between the
1662 // first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1663 // are following this halant, position is moved after it. If such
1664 // position is found, this is the target position. Otherwise,
1665 // proceed to the next step.
1666 //
1667 // Note: in old-implementation fonts, where classifications were
1668 // fixed in shaping engine, there was no case where reph position
1669 // will be found on this step.
1670 {
1671 new_reph_pos = start + 1;
1672 while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1673 new_reph_pos += 1;
1674 }
1675
1676 if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1677 // ->If ZWJ or ZWNJ are following this halant, position is moved after it.
1678 if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1679 new_reph_pos += 1;
1680 }
1681
1682 break 'reph;
1683 }
1684 }
1685
1686 // 3. If reph should be repositioned after the main consonant: find the
1687 // first consonant not ligated with main, or find the first
1688 // consonant that is not a potential pre-base-reordering Ra.
1689 if reph_pos == RephPosition::AfterMain {
1690 new_reph_pos = base;
1691 while new_reph_pos + 1 < end
1692 && buffer.info[new_reph_pos + 1].indic_position()
1693 <= ot_position_t::POS_AFTER_MAIN
1694 {
1695 new_reph_pos += 1;
1696 }
1697
1698 if new_reph_pos < end {
1699 break 'reph;
1700 }
1701 }
1702
1703 // 4. If reph should be positioned before post-base consonant, find
1704 // first post-base classified consonant not ligated with main. If no
1705 // consonant is found, the target position should be before the
1706 // first matra, syllable modifier sign or vedic sign.
1707 //
1708 // This is our take on what step 4 is trying to say (and failing, BADLY).
1709 if reph_pos == RephPosition::AfterSub {
1710 new_reph_pos = base;
1711 while new_reph_pos + 1 < end
1712 && (rb_flag_unsafe(buffer.info[new_reph_pos + 1].indic_position() as u32)
1713 & (rb_flag(ot_position_t::POS_POST_C as u32)
1714 | rb_flag(ot_position_t::POS_AFTER_POST as u32)
1715 | rb_flag(ot_position_t::POS_SMVD as u32)))
1716 == 0
1717 {
1718 new_reph_pos += 1;
1719 }
1720
1721 if new_reph_pos < end {
1722 break 'reph;
1723 }
1724 }
1725 }
1726
1727 // 5. If no consonant is found in steps 3 or 4, move reph to a position
1728 // immediately before the first post-base matra, syllable modifier
1729 // sign or vedic sign that has a reordering class after the intended
1730 // reph position. For example, if the reordering position for reph
1731 // is post-main, it will skip above-base matras that also have a
1732 // post-main position.
1733 //
1734 // Copied from step 2.
1735 new_reph_pos = start + 1;
1736 while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1737 new_reph_pos += 1;
1738 }
1739
1740 if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1741 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
1742 if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() {
1743 new_reph_pos += 1;
1744 }
1745
1746 break 'reph;
1747 }
1748 // See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654
1749
1750 // 6. Otherwise, reorder reph to the end of the syllable.
1751 {
1752 new_reph_pos = end - 1;
1753 while new_reph_pos > start
1754 && buffer.info[new_reph_pos].indic_position() == ot_position_t::POS_SMVD
1755 {
1756 new_reph_pos -= 1;
1757 }
1758
1759 // If the Reph is to be ending up after a Matra,Halant sequence,
1760 // position it before that Halant so it can interact with the Matra.
1761 // However, if it's a plain Consonant,Halant we shouldn't do that.
1762 // Uniscribe doesn't do this.
1763 // TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1764 if buffer.info[new_reph_pos].is_halant() {
1765 for info in &buffer.info[base + 1..new_reph_pos] {
1766 if (rb_flag_unsafe(info.indic_category() as u32)
1767 & (rb_flag(ot_category_t::OT_M as u32)
1768 | rb_flag(ot_category_t::OT_MPst as u32)))
1769 != 0
1770 {
1771 // Ok, got it.
1772 new_reph_pos -= 1;
1773 }
1774 }
1775 }
1776 }
1777
1778 break 'reph;
1779 }
1780
1781 // Move
1782 buffer.merge_clusters(start, new_reph_pos + 1);
1783
1784 let reph = buffer.info[start];
1785 for i in 0..new_reph_pos - start {
1786 buffer.info[i + start] = buffer.info[i + start + 1];
1787 }
1788 buffer.info[new_reph_pos] = reph;
1789
1790 if start < base && base <= new_reph_pos {
1791 base -= 1;
1792 }
1793 }
1794
1795 // - Reorder pre-base-reordering consonants:
1796 //
1797 // If a pre-base-reordering consonant is found, reorder it according to
1798 // the following rules:
1799
1800 // Otherwise there can't be any pre-base-reordering Ra.
1801 if try_pref && base + 1 < end {
1802 for i in base + 1..end {
1803 if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != 0 {
1804 // 1. Only reorder a glyph produced by substitution during application
1805 // of the <pref> feature. (Note that a font may shape a Ra consonant with
1806 // the feature generally but block it in certain contexts.)
1807 //
1808 // Note: We just check that something got substituted. We don't check that
1809 // the <pref> feature actually did it...
1810 //
1811 // Reorder pref only if it ligated.
1812 if _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]) {
1813 // 2. Try to find a target position the same way as for pre-base matra.
1814 // If it is found, reorder pre-base consonant glyph.
1815 //
1816 // 3. If position is not found, reorder immediately before main consonant.
1817
1818 let mut new_pos = base;
1819 // Malayalam / Tamil do not have "half" forms or explicit virama forms.
1820 // The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1821 // We want to position matra after them.
1822 if buffer.script != Some(script::MALAYALAM)
1823 && buffer.script != Some(script::TAMIL)
1824 {
1825 while new_pos > start
1826 && !buffer.info[new_pos - 1].is_one_of(
1827 rb_flag(ot_category_t::OT_M as u32)
1828 | rb_flag(ot_category_t::OT_MPst as u32)
1829 | rb_flag(ot_category_t::OT_H as u32),
1830 )
1831 {
1832 new_pos -= 1;
1833 }
1834 }
1835
1836 if new_pos > start && buffer.info[new_pos - 1].is_halant() {
1837 // -> If ZWJ or ZWNJ follow this halant, position is moved after it.
1838 if new_pos < end && buffer.info[new_pos].is_joiner() {
1839 new_pos += 1;
1840 }
1841 }
1842
1843 {
1844 let old_pos = i;
1845
1846 buffer.merge_clusters(new_pos, old_pos + 1);
1847 let tmp = buffer.info[old_pos];
1848 for i in (0..old_pos - new_pos).rev() {
1849 buffer.info[i + new_pos + 1] = buffer.info[i + new_pos];
1850 }
1851 buffer.info[new_pos] = tmp;
1852
1853 if new_pos <= base && base < old_pos {
1854 // TODO: investigate
1855 #[allow(unused_assignments)]
1856 {
1857 base += 1;
1858 }
1859 }
1860 }
1861 }
1862
1863 break;
1864 }
1865 }
1866 }
1867
1868 // Apply 'init' to the Left Matra if it's a word start.
1869 if buffer.info[start].indic_position() == ot_position_t::POS_PRE_M {
1870 if start == 0
1871 || (rb_flag_unsafe(
1872 _hb_glyph_info_get_general_category(&buffer.info[start - 1]).to_rb(),
1873 ) & rb_flag_range(
1874 hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
1875 hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
1876 )) == 0
1877 {
1878 buffer.info[start].mask |= indic_plan.mask_array[indic_feature::INIT];
1879 } else {
1880 buffer.unsafe_to_break(Some(start - 1), Some(start + 1));
1881 }
1882 }
1883}
1884