ot_shaper_indic.rs source code [crates/rustybuzz/src/hb/ot_shaper_indic.rs]

1	use alloc::boxed::Box;
2	use core::cmp;
3	use core::convert::TryFrom;
4	use core::ops::Range;
5
6	use ttf_parser::GlyphId;
7
8	use super::algs::*;
9	use super::buffer::hb_buffer_t;
10	use super::ot_layout::*;
11	use super::ot_layout_gsubgpos::{WouldApply, WouldApplyContext};
12	use super::ot_map::*;
13	use super::ot_shape::*;
14	use super::ot_shape_normalize::*;
15	use super::ot_shape_plan::hb_ot_shape_plan_t;
16	use super::ot_shaper::*;
17	use super::unicode::{hb_gc, CharExt, GeneralCategoryExt};
18	use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script};
19
20	pub const INDIC_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t {
21	collect_features: Some(collect_features),
22	override_features: Some(override_features),
23	create_data: Some(\|plan: &hb_ot_shape_plan_t\| Box::new(IndicShapePlan::new(plan))),
24	preprocess_text: Some(preprocess_text),
25	postprocess_glyphs: None,
26	normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
27	decompose: Some(decompose),
28	compose: Some(compose),
29	setup_masks: Some(setup_masks),
30	gpos_tag: None,
31	reorder_marks: None,
32	zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
33	fallback_position: `false`,
34	};
35
36	pub type Category = u8;
37
38	// This mod doesn't exist in harfbuzz anymore. Instead, the corresponding values are auto-generated
39	// by the various machines and stored in `hb-ot-shaper-indic-table`. This means that when updating the
40	// values in the machines, we also need to update them here.
41	#[allow(dead_code)]
42	pub mod ot_category_t {
43	pub const OT_X: u8 = `0`;
44	pub const OT_C: u8 = `1`;
45	pub const OT_V: u8 = `2`;
46	pub const OT_N: u8 = `3`;
47	pub const OT_H: u8 = `4`;
48	pub const OT_ZWNJ: u8 = `5`;
49	pub const OT_ZWJ: u8 = `6`;
50	pub const OT_M: u8 = `7`;
51	pub const OT_SM: u8 = `8`;
52	pub const OT_A: u8 = `9`;
53	pub const OT_VD: u8 = OT_A;
54	pub const OT_PLACEHOLDER: u8 = `10`;
55	pub const OT_GB: u8 = OT_PLACEHOLDER;
56	pub const OT_DOTTEDCIRCLE: u8 = `11`;
57	pub const OT_RS: u8 = `12`; // Register Shifter, used in Khmer OT spec.
58	pub const OT_MPst: u8 = `13`;
59	pub const OT_Repha: u8 = `14`; // Atomically-encoded logical or visual repha.
60	pub const OT_Ra: u8 = `15`;
61	pub const OT_CM: u8 = `16`; // Consonant-Medial.
62	pub const OT_Symbol: u8 = `17`; // Avagraha, etc that take marks (SM,A,VD).
63	pub const OT_CS: u8 = `18`;
64
65	/ Khmer & Myanmar shapers. /
66	pub const OT_VAbv: u8 = `20`;
67	pub const OT_VBlw: u8 = `21`;
68	pub const OT_VPre: u8 = `22`;
69	pub const OT_VPst: u8 = `23`;
70
71	/ Khmer. /
72	pub const OT_Robatic: u8 = `25`;
73	pub const OT_Xgroup: u8 = `26`;
74	pub const OT_Ygroup: u8 = `27`;
75
76	/ Myanmar /
77	pub const OT_As: u8 = `32`; // Asat
78	pub const OT_MH: u8 = `35`; // Medial
79	pub const OT_MR: u8 = `36`; // Medial
80	pub const OT_MW: u8 = `37`; // Medial
81	pub const OT_MY: u8 = `38`; // Medial
82	pub const OT_PT: u8 = `39`; // Pwo and other tones
83	pub const OT_VS: u8 = `40`; // Variation selectors
84	pub const OT_ML: u8 = `41`; // Consonant medials
85
86	// This one doesn't exist in ot_category_t in harfbuzz, only in
87	// the Myanmar machine. However, in Rust we unfortunately can't export
88	// inside the Ragel file, so we have to define it here as well. Needs to
89	// be kept in sync with the value in the machine.
90	pub const IV: u8 = `2`;
91	}
92
93	pub type Position = u8;
94	pub mod ot_position_t {
95	pub const POS_START: u8 = `0`;
96
97	pub const POS_RA_TO_BECOME_REPH: u8 = `1`;
98	pub const POS_PRE_M: u8 = `2`;
99	pub const POS_PRE_C: u8 = `3`;
100
101	pub const POS_BASE_C: u8 = `4`;
102	pub const POS_AFTER_MAIN: u8 = `5`;
103
104	pub const POS_ABOVE_C: u8 = `6`;
105
106	pub const POS_BEFORE_SUB: u8 = `7`;
107	pub const POS_BELOW_C: u8 = `8`;
108	pub const POS_AFTER_SUB: u8 = `9`;
109
110	pub const POS_BEFORE_POST: u8 = `10`;
111	pub const POS_POST_C: u8 = `11`;
112	pub const POS_AFTER_POST: u8 = `12`;
113
114	pub const POS_SMVD: u8 = `13`;
115
116	pub const POS_END: u8 = `14`;
117	}
118
119	const INDIC_FEATURES: &[(hb_tag_t, hb_ot_map_feature_flags_t)] = &[
120	// Basic features.
121	// These features are applied in order, one at a time, after initial_reordering,
122	// constrained to the syllable.
123	(
124	hb_tag_t::from_bytes(b"nukt"),
125	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
126	),
127	(
128	hb_tag_t::from_bytes(b"akhn"),
129	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
130	),
131	(
132	hb_tag_t::from_bytes(b"rphf"),
133	F_MANUAL_JOINERS \| F_PER_SYLLABLE,
134	),
135	(
136	hb_tag_t::from_bytes(b"rkrf"),
137	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
138	),
139	(
140	hb_tag_t::from_bytes(b"pref"),
141	F_MANUAL_JOINERS \| F_PER_SYLLABLE,
142	),
143	(
144	hb_tag_t::from_bytes(b"blwf"),
145	F_MANUAL_JOINERS \| F_PER_SYLLABLE,
146	),
147	(
148	hb_tag_t::from_bytes(b"abvf"),
149	F_MANUAL_JOINERS \| F_PER_SYLLABLE,
150	),
151	(
152	hb_tag_t::from_bytes(b"half"),
153	F_MANUAL_JOINERS \| F_PER_SYLLABLE,
154	),
155	(
156	hb_tag_t::from_bytes(b"pstf"),
157	F_MANUAL_JOINERS \| F_PER_SYLLABLE,
158	),
159	(
160	hb_tag_t::from_bytes(b"vatu"),
161	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
162	),
163	(
164	hb_tag_t::from_bytes(b"cjct"),
165	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
166	),
167	// Other features.
168	// These features are applied all at once, after final_reordering, constrained
169	// to the syllable.
170	// Default Bengali font in Windows for example has intermixed
171	// lookups for init,pres,abvs,blws features.
172	(
173	hb_tag_t::from_bytes(b"init"),
174	F_MANUAL_JOINERS \| F_PER_SYLLABLE,
175	),
176	(
177	hb_tag_t::from_bytes(b"pres"),
178	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
179	),
180	(
181	hb_tag_t::from_bytes(b"abvs"),
182	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
183	),
184	(
185	hb_tag_t::from_bytes(b"blws"),
186	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
187	),
188	(
189	hb_tag_t::from_bytes(b"psts"),
190	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
191	),
192	(
193	hb_tag_t::from_bytes(b"haln"),
194	F_GLOBAL_MANUAL_JOINERS \| F_PER_SYLLABLE,
195	),
196	];
197
198	// Must be in the same order as the INDIC_FEATURES array.
199	#[allow(dead_code)]
200	mod indic_feature {
201	pub const NUKT: usize = `0`;
202	pub const AKHN: usize = `1`;
203	pub const RPHF: usize = `2`;
204	pub const RKRF: usize = `3`;
205	pub const PREF: usize = `4`;
206	pub const BLWF: usize = `5`;
207	pub const ABVF: usize = `6`;
208	pub const HALF: usize = `7`;
209	pub const PSTF: usize = `8`;
210	pub const VATU: usize = `9`;
211	pub const CJCT: usize = `10`;
212	pub const INIT: usize = `11`;
213	pub const PRES: usize = `12`;
214	pub const ABVS: usize = `13`;
215	pub const BLWS: usize = `14`;
216	pub const PSTS: usize = `15`;
217	pub const HALN: usize = `16`;
218	}
219
220	pub(crate) const fn category_flag(c: Category) -> u32 {
221	rb_flag(c as u32)
222	}
223
224	// Note:
225	//
226	// We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
227	// cannot happen in a consonant syllable. The plus side however is, we can call the
228	// consonant syllable logic from the vowel syllable function and get it all right!
229	const CONSONANT_FLAGS_INDIC: u32 = category_flag(ot_category_t::OT_C)
230	\| category_flag(ot_category_t::OT_CS)
231	\| category_flag(ot_category_t::OT_Ra)
232	\| category_flag(ot_category_t::OT_CM)
233	\| category_flag(ot_category_t::OT_V)
234	\| category_flag(ot_category_t::OT_PLACEHOLDER)
235	\| category_flag(ot_category_t::OT_DOTTEDCIRCLE);
236
237	const CONSONANT_FLAGS_MYANMAR: u32 = category_flag(ot_category_t::OT_C)
238	\| category_flag(ot_category_t::OT_CS)
239	\| category_flag(ot_category_t::OT_Ra)
240	// \| category_flag(ot_category_t::OT_CM)
241	\| category_flag(ot_category_t::IV)
242	\| category_flag(ot_category_t::OT_GB)
243	\| category_flag(ot_category_t::OT_DOTTEDCIRCLE);
244
245	const JOINER_FLAGS: u32 =
246	category_flag(ot_category_t::OT_ZWJ) \| category_flag(ot_category_t::OT_ZWNJ);
247
248	#[derive(Clone, Copy, PartialEq)]
249	enum RephPosition {
250	AfterMain = ot_position_t::POS_AFTER_MAIN as isize,
251	BeforeSub = ot_position_t::POS_BEFORE_SUB as isize,
252	AfterSub = ot_position_t::POS_AFTER_SUB as isize,
253	BeforePost = ot_position_t::POS_BEFORE_POST as isize,
254	AfterPost = ot_position_t::POS_AFTER_POST as isize,
255	}
256
257	#[derive(Clone, Copy, PartialEq)]
258	enum RephMode {
259	/// Reph formed out of initial Ra,H sequence.
260	Implicit,
261	/// Reph formed out of initial Ra,H,ZWJ sequence.
262	Explicit,
263	/// Encoded Repha character, needs reordering.
264	LogRepha,
265	}
266
267	#[derive(Clone, Copy, PartialEq)]
268	enum BlwfMode {
269	/// Below-forms feature applied to pre-base and post-base.
270	PreAndPost,
271	/// Below-forms feature applied to post-base only.
272	PostOnly,
273	}
274
275	#[derive(Clone, Copy)]
276	struct IndicConfig {
277	script: Option<Script>,
278	has_old_spec: bool,
279	virama: u32,
280	reph_pos: RephPosition,
281	reph_mode: RephMode,
282	blwf_mode: BlwfMode,
283	}
284
285	impl IndicConfig {
286	const fn new(
287	script: Option<Script>,
288	has_old_spec: bool,
289	virama: u32,
290	reph_pos: RephPosition,
291	reph_mode: RephMode,
292	blwf_mode: BlwfMode,
293	) -> Self {
294	IndicConfig {
295	script,
296	has_old_spec,
297	virama,
298	reph_pos,
299	reph_mode,
300	blwf_mode,
301	}
302	}
303	}
304
305	const INDIC_CONFIGS: &[IndicConfig] = &[
306	IndicConfig::new(
307	script:None,
308	has_old_spec:`false`,
309	virama:`0`,
310	reph_pos:RephPosition::BeforePost,
311	RephMode::Implicit,
312	BlwfMode::PreAndPost,
313	),
314	IndicConfig::new(
315	script:Some(script::DEVANAGARI),
316	has_old_spec:`true`,
317	virama:`0x094D`,
318	reph_pos:RephPosition::BeforePost,
319	RephMode::Implicit,
320	BlwfMode::PreAndPost,
321	),
322	IndicConfig::new(
323	script:Some(script::BENGALI),
324	has_old_spec:`true`,
325	virama:`0x09CD`,
326	reph_pos:RephPosition::AfterSub,
327	RephMode::Implicit,
328	BlwfMode::PreAndPost,
329	),
330	IndicConfig::new(
331	script:Some(script::GURMUKHI),
332	has_old_spec:`true`,
333	virama:`0x0A4D`,
334	reph_pos:RephPosition::BeforeSub,
335	RephMode::Implicit,
336	BlwfMode::PreAndPost,
337	),
338	IndicConfig::new(
339	script:Some(script::GUJARATI),
340	has_old_spec:`true`,
341	virama:`0x0ACD`,
342	reph_pos:RephPosition::BeforePost,
343	RephMode::Implicit,
344	BlwfMode::PreAndPost,
345	),
346	IndicConfig::new(
347	script:Some(script::ORIYA),
348	has_old_spec:`true`,
349	virama:`0x0B4D`,
350	reph_pos:RephPosition::AfterMain,
351	RephMode::Implicit,
352	BlwfMode::PreAndPost,
353	),
354	IndicConfig::new(
355	script:Some(script::TAMIL),
356	has_old_spec:`true`,
357	virama:`0x0BCD`,
358	reph_pos:RephPosition::AfterPost,
359	RephMode::Implicit,
360	BlwfMode::PreAndPost,
361	),
362	IndicConfig::new(
363	script:Some(script::TELUGU),
364	has_old_spec:`true`,
365	virama:`0x0C4D`,
366	reph_pos:RephPosition::AfterPost,
367	RephMode::Explicit,
368	BlwfMode::PostOnly,
369	),
370	IndicConfig::new(
371	script:Some(script::KANNADA),
372	has_old_spec:`true`,
373	virama:`0x0CCD`,
374	reph_pos:RephPosition::AfterPost,
375	RephMode::Implicit,
376	BlwfMode::PostOnly,
377	),
378	IndicConfig::new(
379	script:Some(script::MALAYALAM),
380	has_old_spec:`true`,
381	virama:`0x0D4D`,
382	reph_pos:RephPosition::AfterMain,
383	RephMode::LogRepha,
384	BlwfMode::PreAndPost,
385	),
386	IndicConfig::new(
387	script:Some(script::SINHALA),
388	has_old_spec:`false`,
389	virama:`0x0DCA`,
390	reph_pos:RephPosition::AfterPost,
391	RephMode::Explicit,
392	BlwfMode::PreAndPost,
393	),
394	];
395
396	struct IndicWouldSubstituteFeature {
397	lookups: Range<usize>,
398	zero_context: bool,
399	}
400
401	impl IndicWouldSubstituteFeature {
402	pub fn new(map: &hb_ot_map_t, feature_tag: hb_tag_t, zero_context: bool) -> Self {
403	IndicWouldSubstituteFeature {
404	lookups: match map.get_feature_stage(TableIndex::GSUB, feature_tag) {
405	Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage),
406	None => `0`..`0`,
407	},
408	zero_context,
409	}
410	}
411
412	pub fn would_substitute(
413	&self,
414	map: &hb_ot_map_t,
415	face: &hb_font_t,
416	glyphs: &[GlyphId],
417	) -> bool {
418	for index in self.lookups.clone() {
419	let lookup = map.lookup(TableIndex::GSUB, index);
420	let ctx = WouldApplyContext {
421	glyphs,
422	zero_context: self.zero_context,
423	};
424	if face
425	.gsub
426	.as_ref()
427	.and_then(\|table\| table.get_lookup(lookup.index))
428	.map_or(`false`, \|lookup\| lookup.would_apply(&ctx))
429	{
430	return `true`;
431	}
432	}
433
434	`false`
435	}
436	}
437
438	struct IndicShapePlan {
439	config: IndicConfig,
440	is_old_spec: bool,
441	// virama_glyph: Option<u32>,
442	rphf: IndicWouldSubstituteFeature,
443	pref: IndicWouldSubstituteFeature,
444	blwf: IndicWouldSubstituteFeature,
445	pstf: IndicWouldSubstituteFeature,
446	vatu: IndicWouldSubstituteFeature,
447	mask_array: [hb_mask_t; INDIC_FEATURES.len()],
448	}
449
450	impl IndicShapePlan {
451	fn new(plan: &hb_ot_shape_plan_t) -> Self {
452	let script = plan.script;
453	let config = if let Some(c) = INDIC_CONFIGS.iter().skip(`1`).find(\|c\| c.script == script) {
454	*c
455	} else {
456	INDIC_CONFIGS[`0`]
457	};
458
459	let is_old_spec = config.has_old_spec
460	&& plan
461	.ot_map
462	.chosen_script(TableIndex::GSUB)
463	.map_or(`true`, \|tag\| tag.to_bytes()[`3`] != b'2');
464
465	// Use zero-context would_substitute() matching for new-spec of the main
466	// Indic scripts, and scripts with one spec only, but not for old-specs.
467	// The new-spec for all dual-spec scripts says zero-context matching happens.
468	//
469	// However, testing with Malayalam shows that old and new spec both allow
470	// context. Testing with Bengali new-spec however shows that it doesn't.
471	// So, the heuristic here is the way it is. It should only* be changed,*
472	// as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE.
473	let zero_context = is_old_spec && script != Some(script::MALAYALAM);
474
475	let mut mask_array = [`0`; INDIC_FEATURES.len()];
476	for (i, feature) in INDIC_FEATURES.iter().enumerate() {
477	mask_array[i] = if feature.1 & F_GLOBAL != `0` {
478	`0`
479	} else {
480	plan.ot_map.get_1_mask(feature.0)
481	}
482	}
483
484	// TODO: what is this?
485	// let mut virama_glyph = None;
486	// if config.virama != 0 {
487	// if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) {
488	// virama_glyph = Some(g.0 as u32);
489	// }
490	// }
491
492	IndicShapePlan {
493	config,
494	is_old_spec,
495	// virama_glyph,
496	rphf: IndicWouldSubstituteFeature::new(
497	&plan.ot_map,
498	hb_tag_t::from_bytes(b"rphf"),
499	zero_context,
500	),
501	pref: IndicWouldSubstituteFeature::new(
502	&plan.ot_map,
503	hb_tag_t::from_bytes(b"pref"),
504	zero_context,
505	),
506	blwf: IndicWouldSubstituteFeature::new(
507	&plan.ot_map,
508	hb_tag_t::from_bytes(b"blwf"),
509	zero_context,
510	),
511	pstf: IndicWouldSubstituteFeature::new(
512	&plan.ot_map,
513	hb_tag_t::from_bytes(b"pstf"),
514	zero_context,
515	),
516	vatu: IndicWouldSubstituteFeature::new(
517	&plan.ot_map,
518	hb_tag_t::from_bytes(b"vatu"),
519	zero_context,
520	),
521	mask_array,
522	}
523	}
524	}
525
526	impl hb_glyph_info_t {
527	pub(crate) fn indic_category(&self) -> Category {
528	self.ot_shaper_var_u8_category()
529	}
530
531	pub(crate) fn myanmar_category(&self) -> Category {
532	self.ot_shaper_var_u8_category()
533	}
534
535	pub(crate) fn khmer_category(&self) -> Category {
536	self.ot_shaper_var_u8_category()
537	}
538
539	pub(crate) fn set_indic_category(&mut self, c: Category) {
540	self.set_ot_shaper_var_u8_category(c)
541	}
542
543	pub(crate) fn set_myanmar_category(&mut self, c: Category) {
544	self.set_ot_shaper_var_u8_category(c)
545	}
546
547	pub(crate) fn indic_position(&self) -> Position {
548	self.ot_shaper_var_u8_auxiliary()
549	}
550
551	pub(crate) fn myanmar_position(&self) -> Position {
552	self.ot_shaper_var_u8_auxiliary()
553	}
554
555	pub(crate) fn set_indic_position(&mut self, c: Position) {
556	self.set_ot_shaper_var_u8_auxiliary(c)
557	}
558
559	pub(crate) fn set_myanmar_position(&mut self, c: Position) {
560	self.set_ot_shaper_var_u8_auxiliary(c)
561	}
562
563	fn is_one_of(&self, flags: u32) -> bool {
564	// If it ligated, all bets are off.
565	if _hb_glyph_info_ligated(self) {
566	return `false`;
567	}
568
569	rb_flag_unsafe(self.indic_category() as u32) & flags != `0`
570	}
571
572	fn is_joiner(&self) -> bool {
573	self.is_one_of(JOINER_FLAGS)
574	}
575
576	pub(crate) fn is_consonant(&self) -> bool {
577	self.is_one_of(CONSONANT_FLAGS_INDIC)
578	}
579
580	pub(crate) fn is_consonant_myanmar(&self) -> bool {
581	self.is_one_of(CONSONANT_FLAGS_MYANMAR)
582	}
583
584	fn is_halant(&self) -> bool {
585	self.is_one_of(rb_flag(ot_category_t::OT_H as u32))
586	}
587
588	fn set_indic_properties(&mut self) {
589	let u = self.glyph_id;
590	let (cat, pos) = crate::hb::ot_shaper_indic_table::get_categories(u);
591
592	self.set_indic_category(cat);
593	self.set_indic_position(pos);
594	}
595	}
596
597	fn collect_features(planner: &mut hb_ot_shape_planner_t) {
598	// Do this before any lookups have been applied.
599	planner.ot_map.add_gsub_pause(Some(setup_syllables));
600
601	planner
602	.ot_map
603	.enable_feature(hb_tag_t::from_bytes(b"locl"), F_PER_SYLLABLE, `1`);
604	// The Indic specs do not require ccmp, but we apply it here since if
605	// there is a use of it, it's typically at the beginning.
606	planner
607	.ot_map
608	.enable_feature(hb_tag_t::from_bytes(b"ccmp"), F_PER_SYLLABLE, `1`);
609
610	planner.ot_map.add_gsub_pause(Some(initial_reordering));
611
612	for feature in INDIC_FEATURES.iter().take(`11`) {
613	planner.ot_map.add_feature(feature.0, feature.1, `1`);
614	planner.ot_map.add_gsub_pause(None);
615	}
616
617	planner.ot_map.add_gsub_pause(Some(final_reordering));
618
619	for feature in INDIC_FEATURES.iter().skip(`11`) {
620	planner.ot_map.add_feature(feature.0, feature.1, `1`);
621	}
622	}
623
624	fn override_features(planner: &mut hb_ot_shape_planner_t) {
625	planner
626	.ot_map
627	.disable_feature(tag:hb_tag_t::from_bytes(b"liga"));
628	planner.ot_map.add_gsub_pause(Some(syllabic_clear_var)); // Don't need syllables anymore.
629	}
630
631	fn preprocess_text(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
632	super::ot_shaper_vowel_constraints::preprocess_text_vowel_constraints(buffer);
633	}
634
635	fn decompose(_: &hb_ot_shape_normalize_context_t, ab: char) -> Option<(char, char)> {
636	// Don't decompose these.
637	match ab {
638	'`\u{0931}`' \| // DEVANAGARI LETTER RRA
639	// https://github.com/harfbuzz/harfbuzz/issues/779
640	'`\u{09DC}`' \| // BENGALI LETTER RRA
641	'`\u{09DD}`' \| // BENGALI LETTER RHA
642	'`\u{0B94}`' => return None, // TAMIL LETTER AU
643	_ => {}
644	}
645
646	crate::hb::unicode::decompose(ab)
647	}
648
649	fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> {
650	// Avoid recomposing split matras.
651	if a.general_category().is_mark() {
652	return None;
653	}
654
655	// Composition-exclusion exceptions that we want to recompose.
656	if a == '`\u{09AF}`' && b == '`\u{09BC}`' {
657	return Some('`\u{09DF}`');
658	}
659
660	crate::hb::unicode::compose(a, b)
661	}
662
663	fn setup_masks(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) {
664	// We cannot setup masks here. We save information about characters
665	// and setup masks later on in a pause-callback.
666	for info: &mut hb_glyph_info_t in buffer.info_slice_mut() {
667	info.set_indic_properties();
668	}
669	}
670
671	fn setup_syllables(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
672	super::ot_shaper_indic_machine::find_syllables_indic(buffer);
673
674	let mut start: usize = `0`;
675	let mut end: usize = buffer.next_syllable(start:`0`);
676	while start < buffer.len {
677	buffer.unsafe_to_break(start:Some(start), end:Some(end));
678	start = end;
679	end = buffer.next_syllable(start);
680	}
681
682	`false`
683	}
684
685	fn initial_reordering(
686	plan: &hb_ot_shape_plan_t,
687	face: &hb_font_t,
688	buffer: &mut hb_buffer_t,
689	) -> bool {
690	use super::ot_shaper_indic_machine::SyllableType;
691
692	let mut ret = `false`;
693
694	let indic_plan = plan.data::<IndicShapePlan>();
695
696	update_consonant_positions(plan, indic_plan, face, buffer);
697	if super::ot_shaper_syllabic::insert_dotted_circles(
698	face,
699	buffer,
700	SyllableType::BrokenCluster as u8,
701	ot_category_t::OT_DOTTEDCIRCLE,
702	Some(ot_category_t::OT_Repha),
703	Some(ot_position_t::POS_END),
704	) {
705	ret = `true`;
706	}
707
708	let mut start = `0`;
709	let mut end = buffer.next_syllable(`0`);
710	while start < buffer.len {
711	initial_reordering_syllable(plan, indic_plan, face, start, end, buffer);
712	start = end;
713	end = buffer.next_syllable(start);
714	}
715
716	ret
717	}
718
719	fn update_consonant_positions(
720	plan: &hb_ot_shape_plan_t,
721	indic_plan: &IndicShapePlan,
722	face: &hb_font_t,
723	buffer: &mut hb_buffer_t,
724	) {
725	let mut virama_glyph: Option = None;
726	if indic_plan.config.virama != `0` {
727	virama_glyph = face.get_nominal_glyph(indic_plan.config.virama);
728	}
729
730	if let Some(virama: GlyphId) = virama_glyph {
731	for info: &mut hb_glyph_info_t in buffer.info_slice_mut() {
732	if info.indic_position() == ot_position_t::POS_BASE_C {
733	let consonant: GlyphId = info.as_glyph();
734	info.set_indic_position(consonant_position_from_face(
735	plan, indic_plan, face, consonant, virama,
736	));
737	}
738	}
739	}
740	}
741
742	fn consonant_position_from_face(
743	plan: &hb_ot_shape_plan_t,
744	indic_plan: &IndicShapePlan,
745	face: &hb_font_t,
746	consonant: GlyphId,
747	virama: GlyphId,
748	) -> u8 {
749	// For old-spec, the order of glyphs is Consonant,Virama,
750	// whereas for new-spec, it's Virama,Consonant. However,
751	// some broken fonts (like Free Sans) simply copied lookups
752	// from old-spec to new-spec without modification.
753	// And oddly enough, Uniscribe seems to respect those lookups.
754	// Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
755	// base at 0. The font however, only has lookups matching
756	// 930,94D in 'blwf', not the expected 94D,930 (with new-spec
757	// table). As such, we simply match both sequences. Seems
758	// to work.
759	//
760	// Vatu is done as well, for:
761	// https://github.com/harfbuzz/harfbuzz/issues/1587
762
763	if indic_plan
764	.blwf
765	.would_substitute(&plan.ot_map, face, &[virama, consonant])
766	\|\| indic_plan
767	.blwf
768	.would_substitute(&plan.ot_map, face, &[consonant, virama])
769	\|\| indic_plan
770	.vatu
771	.would_substitute(&plan.ot_map, face, &[virama, consonant])
772	\|\| indic_plan
773	.vatu
774	.would_substitute(&plan.ot_map, face, &[consonant, virama])
775	{
776	return ot_position_t::POS_BELOW_C;
777	}
778
779	if indic_plan
780	.pstf
781	.would_substitute(&plan.ot_map, face, &[virama, consonant])
782	\|\| indic_plan
783	.pstf
784	.would_substitute(&plan.ot_map, face, &[consonant, virama])
785	{
786	return ot_position_t::POS_POST_C;
787	}
788
789	if indic_plan
790	.pref
791	.would_substitute(&plan.ot_map, face, &[virama, consonant])
792	\|\| indic_plan
793	.pref
794	.would_substitute(&plan.ot_map, face, &[consonant, virama])
795	{
796	return ot_position_t::POS_POST_C;
797	}
798
799	ot_position_t::POS_BASE_C
800	}
801
802	fn initial_reordering_syllable(
803	plan: &hb_ot_shape_plan_t,
804	indic_plan: &IndicShapePlan,
805	face: &hb_font_t,
806	start: usize,
807	end: usize,
808	buffer: &mut hb_buffer_t,
809	) {
810	use super::ot_shaper_indic_machine::SyllableType;
811
812	let syllable_type = match buffer.info[start].syllable() & `0x0F` {
813	`0` => SyllableType::ConsonantSyllable,
814	`1` => SyllableType::VowelSyllable,
815	`2` => SyllableType::StandaloneCluster,
816	`3` => SyllableType::SymbolCluster,
817	`4` => SyllableType::BrokenCluster,
818	`5` => SyllableType::NonIndicCluster,
819	_ => unreachable!(),
820	};
821
822	match syllable_type {
823	// We made the vowels look like consonants. So let's call the consonant logic!
824	SyllableType::VowelSyllable \| SyllableType::ConsonantSyllable => {
825	initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
826	}
827	// We already inserted dotted-circles, so just call the standalone_cluster.
828	SyllableType::BrokenCluster \| SyllableType::StandaloneCluster => {
829	initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer);
830	}
831	SyllableType::SymbolCluster \| SyllableType::NonIndicCluster => {}
832	}
833	}
834
835	// Rules from:
836	// https://docs.microsqoft.com/en-us/typography/script-development/devanagari /*
837	fn initial_reordering_consonant_syllable(
838	plan: &hb_ot_shape_plan_t,
839	indic_plan: &IndicShapePlan,
840	face: &hb_font_t,
841	start: usize,
842	end: usize,
843	buffer: &mut hb_buffer_t,
844	) {
845	// https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167
846	// For compatibility with legacy usage in Kannada,
847	// Ra+h+ZWJ must behave like Ra+ZWJ+h...
848	if buffer.script == Some(script::KANNADA)
849	&& start + `3` <= end
850	&& buffer.info[start].is_one_of(category_flag(ot_category_t::OT_Ra))
851	&& buffer.info[start + `1`].is_one_of(category_flag(ot_category_t::OT_H))
852	&& buffer.info[start + `2`].is_one_of(category_flag(ot_category_t::OT_ZWJ))
853	{
854	buffer.merge_clusters(start + `1`, start + `3`);
855	buffer.info.swap(start + `1`, start + `2`);
856	}
857
858	// 1. Find base consonant:
859	//
860	// The shaping engine finds the base consonant of the syllable, using the
861	// following algorithm: starting from the end of the syllable, move backwards
862	// until a consonant is found that does not have a below-base or post-base
863	// form (post-base forms have to follow below-base forms), or that is not a
864	// pre-base-reordering Ra, or arrive at the first consonant. The consonant
865	// stopped at will be the base.
866	//
867	// - If the syllable starts with Ra + Halant (in a script that has Reph)
868	// and has more than one consonant, Ra is excluded from candidates for
869	// base consonants.
870
871	let mut base = end;
872	let mut has_reph = `false`;
873
874	{
875	// -> If the syllable starts with Ra + Halant (in a script that has Reph)
876	// and has more than one consonant, Ra is excluded from candidates for
877	// base consonants.
878	let mut limit = start;
879	if indic_plan.mask_array[indic_feature::RPHF] != `0`
880	&& start + `3` <= end
881	&& ((indic_plan.config.reph_mode == RephMode::Implicit
882	&& !buffer.info[start + `2`].is_joiner())
883	\|\| (indic_plan.config.reph_mode == RephMode::Explicit
884	&& buffer.info[start + `2`].indic_category() == ot_category_t::OT_ZWJ))
885	{
886	// See if it matches the 'rphf' feature.
887	let glyphs = &[
888	buffer.info[start].as_glyph(),
889	buffer.info[start + `1`].as_glyph(),
890	if indic_plan.config.reph_mode == RephMode::Explicit {
891	buffer.info[start + `2`].as_glyph()
892	} else {
893	GlyphId(`0`)
894	},
895	];
896	if indic_plan
897	.rphf
898	.would_substitute(&plan.ot_map, face, &glyphs[`0`..`2`])
899	\|\| (indic_plan.config.reph_mode == RephMode::Explicit
900	&& indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs))
901	{
902	limit += `2`;
903	while limit < end && buffer.info[limit].is_joiner() {
904	limit += `1`;
905	}
906	base = start;
907	has_reph = `true`;
908	}
909	} else if indic_plan.config.reph_mode == RephMode::LogRepha
910	&& buffer.info[start].indic_category() == ot_category_t::OT_Repha
911	{
912	limit += `1`;
913	while limit < end && buffer.info[limit].is_joiner() {
914	limit += `1`;
915	}
916	base = start;
917	has_reph = `true`;
918	}
919
920	{
921	// -> starting from the end of the syllable, move backwards
922	let mut i = end;
923	let mut seen_below = `false`;
924	loop {
925	i -= `1`;
926	// -> until a consonant is found
927	if buffer.info[i].is_consonant_myanmar() {
928	// -> that does not have a below-base or post-base form
929	// (post-base forms have to follow below-base forms),
930	if buffer.info[i].indic_position() != ot_position_t::POS_BELOW_C
931	&& (buffer.info[i].indic_position() != ot_position_t::POS_POST_C
932	\|\| seen_below)
933	{
934	base = i;
935	break;
936	}
937	if buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C {
938	seen_below = `true`;
939	}
940
941	// -> or that is not a pre-base-reordering Ra,
942	//
943	// IMPLEMENTATION NOTES:
944	//
945	// Our pre-base-reordering Ra's are marked position::PostC, so will be skipped
946	// by the logic above already.
947
948	// -> or arrive at the first consonant. The consonant stopped at will
949	// be the base.
950	base = i;
951	} else {
952	// A ZWJ after a Halant stops the base search, and requests an explicit
953	// half form.
954	// A ZWJ before a Halant, requests a subjoined form instead, and hence
955	// search continues. This is particularly important for Bengali
956	// sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya.
957	if start < i
958	&& buffer.info[i].indic_category() == ot_category_t::OT_ZWJ
959	&& buffer.info[i - `1`].indic_category() == ot_category_t::OT_H
960	{
961	break;
962	}
963	}
964
965	if i <= limit {
966	break;
967	}
968	}
969	}
970
971	// -> If the syllable starts with Ra + Halant (in a script that has Reph)
972	// and has more than one consonant, Ra is excluded from candidates for
973	// base consonants.
974	//
975	// Only do this for unforced Reph. (ie. not for Ra,H,ZWJ.
976	if has_reph && base == start && limit - base <= `2` {
977	// Have no other consonant, so Reph is not formed and Ra becomes base.
978	has_reph = `false`;
979	}
980	}
981
982	// 2. Decompose and reorder Matras:
983	//
984	// Each matra and any syllable modifier sign in the syllable are moved to the
985	// appropriate position relative to the consonant(s) in the syllable. The
986	// shaping engine decomposes two- or three-part matras into their constituent
987	// parts before any repositioning. Matra characters are classified by which
988	// consonant in a conjunct they have affinity for and are reordered to the
989	// following positions:
990	//
991	// - Before first half form in the syllable
992	// - After subjoined consonants
993	// - After post-form consonant
994	// - After main consonant (for above marks)
995	//
996	// IMPLEMENTATION NOTES:
997	//
998	// The normalize() routine has already decomposed matras for us, so we don't
999	// need to worry about that.
1000
1001	// 3. Reorder marks to canonical order:
1002	//
1003	// Adjacent nukta and halant or nukta and vedic sign are always repositioned
1004	// if necessary, so that the nukta is first.
1005	//
1006	// IMPLEMENTATION NOTES:
1007	//
1008	// We don't need to do this: the normalize() routine already did this for us.
1009
1010	// Reorder characters
1011
1012	for i in start..base {
1013	let pos = buffer.info[i].indic_position();
1014	buffer.info[i].set_indic_position(cmp::min(ot_position_t::POS_PRE_C, pos));
1015	}
1016
1017	if base < end {
1018	buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C);
1019	}
1020
1021	// Handle beginning Ra
1022	if has_reph {
1023	buffer.info[start].set_indic_position(ot_position_t::POS_RA_TO_BECOME_REPH);
1024	}
1025
1026	// For old-style Indic script tags, move the first post-base Halant after
1027	// last consonant.
1028	//
1029	// Reports suggest that in some scripts Uniscribe does this only if there
1030	// is not* a Halant after last consonant already. We know that is the*
1031	// case for Kannada, while it reorders unconditionally in other scripts,
1032	// eg. Malayalam, Bengali, and Devanagari. We don't currently know about
1033	// other scripts, so we block Kannada.
1034	//
1035	// Kannada test case:
1036	// U+0C9A,U+0CCD,U+0C9A,U+0CCD
1037	// With some versions of Lohit Kannada.
1038	// https://bugs.freedesktop.org/show_bug.cgi?id=59118
1039	//
1040	// Malayalam test case:
1041	// U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
1042	// With lohit-ttf-20121122/Lohit-Malayalam.ttf
1043	//
1044	// Bengali test case:
1045	// U+0998,U+09CD,U+09AF,U+09CD
1046	// With Windows XP vrinda.ttf
1047	// https://github.com/harfbuzz/harfbuzz/issues/1073
1048	//
1049	// Devanagari test case:
1050	// U+091F,U+094D,U+0930,U+094D
1051	// With chandas.ttf
1052	// https://github.com/harfbuzz/harfbuzz/issues/1071
1053	if indic_plan.is_old_spec {
1054	let disallow_double_halants = buffer.script == Some(script::KANNADA);
1055	for i in base + `1`..end {
1056	if buffer.info[i].indic_category() == ot_category_t::OT_H {
1057	let mut j = end - `1`;
1058	while j > i {
1059	if buffer.info[j].is_consonant()
1060	\|\| (disallow_double_halants
1061	&& buffer.info[j].indic_category() == ot_category_t::OT_H)
1062	{
1063	break;
1064	}
1065
1066	j -= `1`;
1067	}
1068
1069	if buffer.info[j].indic_category() != ot_category_t::OT_H && j > i {
1070	// Move Halant to after last consonant.
1071	let t = buffer.info[i];
1072	for k in `0`..j - i {
1073	buffer.info[k + i] = buffer.info[k + i + `1`];
1074	}
1075	buffer.info[j] = t;
1076	}
1077
1078	break;
1079	}
1080	}
1081	}
1082
1083	// Attach misc marks to previous char to move with them.
1084	{
1085	let mut last_pos = ot_position_t::POS_START;
1086	for i in start..end {
1087	let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1088	& (category_flag(ot_category_t::OT_ZWJ)
1089	\| category_flag(ot_category_t::OT_ZWNJ)
1090	\| category_flag(ot_category_t::OT_N)
1091	\| category_flag(ot_category_t::OT_RS)
1092	\| category_flag(ot_category_t::OT_CM)
1093	\| category_flag(ot_category_t::OT_H))
1094	!= `0`;
1095	if ok {
1096	buffer.info[i].set_indic_position(last_pos);
1097
1098	if buffer.info[i].indic_category() == ot_category_t::OT_H
1099	&& buffer.info[i].indic_position() == ot_position_t::POS_PRE_M
1100	{
1101	// Uniscribe doesn't move the Halant with Left Matra.
1102	// TEST: U+092B,U+093F,U+094DE
1103	// We follow.
1104	for j in (start + `1`..=i).rev() {
1105	if buffer.info[j - `1`].indic_position() != ot_position_t::POS_PRE_M {
1106	let pos = buffer.info[j - `1`].indic_position();
1107	buffer.info[i].set_indic_position(pos);
1108	break;
1109	}
1110	}
1111	}
1112	} else if buffer.info[i].indic_position() != ot_position_t::POS_SMVD {
1113	if buffer.info[i].indic_category() == ot_category_t::OT_MPst
1114	&& i > start
1115	&& buffer.info[i - `1`].indic_category() == ot_category_t::OT_SM
1116	{
1117	let val = buffer.info[i].indic_position();
1118	buffer.info[i - `1`].set_indic_position(val);
1119	}
1120
1121	last_pos = buffer.info[i].indic_position();
1122	}
1123	}
1124	}
1125	// For post-base consonants let them own anything before them
1126	// since the last consonant or matra.
1127	{
1128	let mut last = base;
1129	for i in base + `1`..end {
1130	if buffer.info[i].is_consonant() {
1131	for j in last + `1`..i {
1132	if buffer.info[j].indic_position() < ot_position_t::POS_SMVD {
1133	let pos = buffer.info[i].indic_position();
1134	buffer.info[j].set_indic_position(pos);
1135	}
1136	}
1137
1138	last = i;
1139	} else if (rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1140	& (rb_flag(ot_category_t::OT_M as u32) \| rb_flag(ot_category_t::OT_MPst as u32)))
1141	!= `0`
1142	{
1143	last = i;
1144	}
1145	}
1146	}
1147
1148	{
1149	// Use syllable() for sort accounting temporarily.
1150	let syllable = buffer.info[start].syllable();
1151	for i in start..end {
1152	buffer.info[i].set_syllable(u8::try_from(i - start).unwrap());
1153	}
1154
1155	buffer.info[start..end].sort_by_key(\|a\| a.indic_position());
1156
1157	// Find base again; also flip left-matra sequence.
1158	let mut first_left_mantra = end;
1159	let mut last_left_mantra = end;
1160	base = end;
1161
1162	for i in start..end {
1163	if buffer.info[i].indic_position() == ot_position_t::POS_BASE_C {
1164	base = i;
1165	break;
1166	} else if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M {
1167	if first_left_mantra == end {
1168	first_left_mantra = i;
1169	}
1170
1171	last_left_mantra = i;
1172	}
1173	}
1174
1175	// https://github.com/harfbuzz/harfbuzz/issues/3863
1176	if first_left_mantra < last_left_mantra {
1177	// No need to merge clusters, handled later.
1178	buffer.reverse_range(first_left_mantra, last_left_mantra + `1`);
1179	// Reverse back nuktas, etc.
1180	let mut i = first_left_mantra;
1181
1182	for j in i..=last_left_mantra {
1183	if (rb_flag_unsafe(buffer.info[j].indic_category() as u32)
1184	& (rb_flag(ot_category_t::OT_M as u32)
1185	\| rb_flag(ot_category_t::OT_MPst as u32)))
1186	!= `0`
1187	{
1188	buffer.reverse_range(i, j + `1`);
1189	i = j + `1`;
1190	}
1191	}
1192	}
1193
1194	// Things are out-of-control for post base positions, they may shuffle
1195	// around like crazy. In old-spec mode, we move halants around, so in
1196	// that case merge all clusters after base. Otherwise, check the sort
1197	// order and merge as needed.
1198	// For pre-base stuff, we handle cluster issues in final reordering.
1199	//
1200	// We could use buffer->sort() for this, if there was no special
1201	// reordering of pre-base stuff happening later...
1202	// We don't want to merge_clusters all of that, which buffer->sort()
1203	// would. Here's a concrete example:
1204	//
1205	// Assume there's a pre-base consonant and explicit Halant before base,
1206	// followed by a prebase-reordering (left) Matra:
1207	//
1208	// C,H,ZWNJ,B,M
1209	//
1210	// At this point in reordering we would have:
1211	//
1212	// M,C,H,ZWNJ,B
1213	//
1214	// whereas in final reordering we will bring the Matra closer to Base:
1215	//
1216	// C,H,ZWNJ,M,B
1217	//
1218	// That's why we don't want to merge-clusters anything before the Base
1219	// at this point. But if something moved from after Base to before it,
1220	// we should merge clusters from base to them. In final-reordering, we
1221	// only move things around before base, and merge-clusters up to base.
1222	// These two merge-clusters from the two sides of base will interlock
1223	// to merge things correctly. See:
1224	// https://github.com/harfbuzz/harfbuzz/issues/2272
1225	if indic_plan.is_old_spec \|\| end - start > `127` {
1226	buffer.merge_clusters(base, end);
1227	} else {
1228	// Note! syllable() is a one-byte field.
1229	for i in base..end {
1230	if buffer.info[i].syllable() != `255` {
1231	let mut min = i;
1232	let mut max = i;
1233	let mut j = start + buffer.info[i].syllable() as usize;
1234	while j != i {
1235	min = cmp::min(min, j);
1236	max = cmp::max(max, j);
1237	let next = start + buffer.info[j].syllable() as usize;
1238	buffer.info[j].set_syllable(`255`); // So we don't process j later again.
1239	j = next;
1240	}
1241
1242	buffer.merge_clusters(cmp::max(base, min), max + `1`);
1243	}
1244	}
1245	}
1246
1247	// Put syllable back in.
1248	for info in &mut buffer.info[start..end] {
1249	info.set_syllable(syllable);
1250	}
1251	}
1252
1253	// Setup masks now
1254
1255	{
1256	// Reph
1257	for info in &mut buffer.info[start..end] {
1258	if info.indic_position() != ot_position_t::POS_RA_TO_BECOME_REPH {
1259	break;
1260	}
1261
1262	info.mask \|= indic_plan.mask_array[indic_feature::RPHF];
1263	}
1264
1265	// Pre-base
1266	let mut mask = indic_plan.mask_array[indic_feature::HALF];
1267	if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost {
1268	mask \|= indic_plan.mask_array[indic_feature::BLWF];
1269	}
1270
1271	for info in &mut buffer.info[start..base] {
1272	info.mask \|= mask;
1273	}
1274
1275	// Base
1276	mask = `0`;
1277	if base < end {
1278	buffer.info[base].mask \|= mask;
1279	}
1280
1281	// Post-base
1282	mask = indic_plan.mask_array[indic_feature::BLWF]
1283	\| indic_plan.mask_array[indic_feature::ABVF]
1284	\| indic_plan.mask_array[indic_feature::PSTF];
1285	for i in base + `1`..end {
1286	buffer.info[i].mask \|= mask;
1287	}
1288	}
1289
1290	if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) {
1291	// Old-spec eye-lash Ra needs special handling. From the
1292	// spec:
1293	//
1294	// "The feature 'below-base form' is applied to consonants
1295	// having below-base forms and following the base consonant.
1296	// The exception is vattu, which may appear below half forms
1297	// as well as below the base glyph. The feature 'below-base
1298	// form' will be applied to all such occurrences of Ra as well."
1299	//
1300	// Test case: U+0924,U+094D,U+0930,U+094d,U+0915
1301	// with Sanskrit 2003 font.
1302	//
1303	// However, note that Ra,Halant,ZWJ is the correct way to
1304	// request eyelash form of Ra, so we wouldbn't inhibit it
1305	// in that sequence.
1306	//
1307	// Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
1308	for i in start..base.saturating_sub(`1`) {
1309	if buffer.info[i].indic_category() == ot_category_t::OT_Ra
1310	&& buffer.info[i + `1`].indic_category() == ot_category_t::OT_H
1311	&& (i + `2` == base \|\| buffer.info[i + `2`].indic_category() != ot_category_t::OT_ZWJ)
1312	{
1313	buffer.info[i].mask \|= indic_plan.mask_array[indic_feature::BLWF];
1314	buffer.info[i + `1`].mask \|= indic_plan.mask_array[indic_feature::BLWF];
1315	}
1316	}
1317	}
1318
1319	let pref_len = `2`;
1320	if indic_plan.mask_array[indic_feature::PREF] != `0` && base + pref_len < end {
1321	// Find a Halant,Ra sequence and mark it for pre-base-reordering processing.
1322	for i in base + `1`..end - pref_len + `1` {
1323	let glyphs = &[buffer.info[i + `0`].as_glyph(), buffer.info[i + `1`].as_glyph()];
1324	if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) {
1325	buffer.info[i + `0`].mask \|= indic_plan.mask_array[indic_feature::PREF];
1326	buffer.info[i + `1`].mask \|= indic_plan.mask_array[indic_feature::PREF];
1327	break;
1328	}
1329	}
1330	}
1331
1332	// Apply ZWJ/ZWNJ effects
1333	for i in start + `1`..end {
1334	if buffer.info[i].is_joiner() {
1335	let non_joiner = buffer.info[i].indic_category() == ot_category_t::OT_ZWNJ;
1336	let mut j = i;
1337
1338	loop {
1339	j -= `1`;
1340
1341	// ZWJ/ZWNJ should disable CJCT. They do that by simply
1342	// being there, since we don't skip them for the CJCT
1343	// feature (ie. F_MANUAL_ZWJ)
1344
1345	// A ZWNJ disables HALF.
1346	if non_joiner {
1347	buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF];
1348	}
1349
1350	if j <= start \|\| buffer.info[j].is_consonant() {
1351	break;
1352	}
1353	}
1354	}
1355	}
1356	}
1357
1358	fn initial_reordering_standalone_cluster(
1359	plan: &hb_ot_shape_plan_t,
1360	indic_plan: &IndicShapePlan,
1361	face: &hb_font_t,
1362	start: usize,
1363	end: usize,
1364	buffer: &mut hb_buffer_t,
1365	) {
1366	// We treat placeholder/dotted-circle as if they are consonants, so we
1367	// should just chain. Only if not in compatibility mode that is...
1368	initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
1369	}
1370
1371	fn final_reordering(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) -> bool {
1372	if buffer.is_empty() {
1373	return `false`;
1374	}
1375
1376	foreach_syllable!(buffer, start, end, {
1377	final_reordering_impl(plan, face, start, end, buffer);
1378	});
1379
1380	`false`
1381	}
1382
1383	fn final_reordering_impl(
1384	plan: &hb_ot_shape_plan_t,
1385	face: &hb_font_t,
1386	start: usize,
1387	end: usize,
1388	buffer: &mut hb_buffer_t,
1389	) {
1390	let indic_plan = plan.data::<IndicShapePlan>();
1391
1392	// This function relies heavily on halant glyphs. Lots of ligation
1393	// and possibly multiple substitutions happened prior to this
1394	// phase, and that might have messed up our properties. Recover
1395	// from a particular case of that where we're fairly sure that a
1396	// class of OT_H is desired but has been lost.
1397	//
1398	// We don't call load_virama_glyph(), since we know it's already loaded.
1399	let mut virama_glyph = None;
1400	if indic_plan.config.virama != `0` {
1401	if let Some(g) = face.get_nominal_glyph(indic_plan.config.virama) {
1402	virama_glyph = Some(g.0 as u32);
1403	}
1404	}
1405
1406	if let Some(virama_glyph) = virama_glyph {
1407	for info in &mut buffer.info[start..end] {
1408	if info.glyph_id == virama_glyph
1409	&& _hb_glyph_info_ligated(info)
1410	&& _hb_glyph_info_multiplied(info)
1411	{
1412	// This will make sure that this glyph passes is_halant() test.
1413	info.set_indic_category(ot_category_t::OT_H);
1414	_hb_glyph_info_clear_ligated_and_multiplied(info);
1415	}
1416	}
1417	}
1418
1419	// 4. Final reordering:
1420	//
1421	// After the localized forms and basic shaping forms GSUB features have been
1422	// applied (see below), the shaping engine performs some final glyph
1423	// reordering before applying all the remaining font features to the entire
1424	// syllable.
1425
1426	let mut try_pref = indic_plan.mask_array[indic_feature::PREF] != `0`;
1427
1428	let mut base = start;
1429	while base < end {
1430	if buffer.info[base].indic_position() as u32 >= ot_position_t::POS_BASE_C as u32 {
1431	if try_pref && base + `1` < end {
1432	for i in base + `1`..end {
1433	if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != `0` {
1434	if !(_hb_glyph_info_substituted(&buffer.info[i])
1435	&& _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]))
1436	{
1437	// Ok, this was a 'pref' candidate but didn't form any.
1438	// Base is around here...
1439	base = i;
1440	while base < end && buffer.info[base].is_halant() {
1441	base += `1`;
1442	}
1443
1444	if base < end {
1445	buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C);
1446	}
1447
1448	try_pref = `false`;
1449	}
1450
1451	break;
1452	}
1453
1454	if base == end {
1455	break;
1456	}
1457	}
1458	}
1459
1460	// For Malayalam, skip over unformed below- (but NOT post-) forms.
1461	if buffer.script == Some(script::MALAYALAM) {
1462	let mut i = base + `1`;
1463	while i < end {
1464	while i < end && buffer.info[i].is_joiner() {
1465	i += `1`;
1466	}
1467
1468	if i == end \|\| !buffer.info[i].is_halant() {
1469	break;
1470	}
1471
1472	i += `1`; // Skip halant.
1473
1474	while i < end && buffer.info[i].is_joiner() {
1475	i += `1`;
1476	}
1477
1478	if i < end
1479	&& buffer.info[i].is_consonant()
1480	&& buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C
1481	{
1482	base = i;
1483	buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C);
1484	}
1485
1486	i += `1`;
1487	}
1488	}
1489
1490	if start < base
1491	&& buffer.info[base].indic_position() as u32 > ot_position_t::POS_BASE_C as u32
1492	{
1493	base -= `1`;
1494	}
1495
1496	break;
1497	}
1498
1499	base += `1`;
1500	}
1501
1502	if base == end
1503	&& start < base
1504	&& buffer.info[base - `1`].is_one_of(rb_flag(ot_category_t::OT_ZWJ as u32))
1505	{
1506	base -= `1`;
1507	}
1508
1509	if base < end {
1510	while start < base
1511	&& buffer.info[base].is_one_of(
1512	rb_flag(ot_category_t::OT_N as u32) \| rb_flag(ot_category_t::OT_H as u32),
1513	)
1514	{
1515	base -= `1`;
1516	}
1517	}
1518
1519	// - Reorder matras:
1520	//
1521	// If a pre-base matra character had been reordered before applying basic
1522	// features, the glyph can be moved closer to the main consonant based on
1523	// whether half-forms had been formed. Actual position for the matra is
1524	// defined as “after last standalone halant glyph, after initial matra
1525	// position and before the main consonant”. If ZWJ or ZWNJ follow this
1526	// halant, position is moved after it.
1527	//
1528	// IMPLEMENTATION NOTES:
1529	//
1530	// It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe
1531	// and Devanagari shows that the behavior is best described as:
1532	//
1533	// "If ZWJ follows this halant, matra is NOT repositioned after this halant.
1534	// If ZWNJ follows this halant, position is moved after it."
1535	//
1536	// Test case, with Adobe Devanagari or Nirmala UI:
1537	//
1538	// U+091F,U+094D,U+200C,U+092F,U+093F
1539	// (Matra moves to the middle, after ZWNJ.)
1540	//
1541	// U+091F,U+094D,U+200D,U+092F,U+093F
1542	// (Matra does NOT move, stays to the left.)
1543	//
1544	// https://github.com/harfbuzz/harfbuzz/issues/1070
1545
1546	// Otherwise there can't be any pre-base matra characters.
1547	if start + `1` < end && start < base {
1548	// If we lost track of base, alas, position before last thingy.
1549	let mut new_pos = if base == end { base - `2` } else { base - `1` };
1550
1551	// Malayalam / Tamil do not have "half" forms or explicit virama forms.
1552	// The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1553	// We want to position matra after them.
1554	if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) {
1555	loop {
1556	while new_pos > start
1557	&& !buffer.info[new_pos].is_one_of(
1558	rb_flag(ot_category_t::OT_M as u32)
1559	\| rb_flag(ot_category_t::OT_MPst as u32)
1560	\| rb_flag(ot_category_t::OT_H as u32),
1561	)
1562	{
1563	new_pos -= `1`;
1564	}
1565
1566	// If we found no Halant we are done.
1567	// Otherwise only proceed if the Halant does
1568	// not belong to the Matra itself!
1569	if buffer.info[new_pos].is_halant()
1570	&& buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M
1571	{
1572	if new_pos + `1` < end {
1573	// -> If ZWJ follows this halant, matra is NOT repositioned after this halant.
1574	if buffer.info[new_pos + `1`].indic_category() == ot_category_t::OT_ZWJ {
1575	// Keep searching.
1576	if new_pos > start {
1577	new_pos -= `1`;
1578	continue;
1579	}
1580	}
1581
1582	// -> If ZWNJ follows this halant, position is moved after it.
1583	//
1584	// IMPLEMENTATION NOTES:
1585	//
1586	// This is taken care of by the state-machine. A Halant,ZWNJ is a terminating
1587	// sequence for a consonant syllable; any pre-base matras occurring after it
1588	// will belong to the subsequent syllable.
1589	}
1590	} else {
1591	new_pos = start; // No move.
1592	}
1593
1594	break;
1595	}
1596	}
1597
1598	if start < new_pos && buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M {
1599	// Now go see if there's actually any matras...
1600	for i in (start + `1`..=new_pos).rev() {
1601	if buffer.info[i - `1`].indic_position() == ot_position_t::POS_PRE_M {
1602	let old_pos = i - `1`;
1603	// Shouldn't actually happen.
1604	if old_pos < base && base <= new_pos {
1605	base -= `1`;
1606	}
1607
1608	let tmp = buffer.info[old_pos];
1609	for i in `0`..new_pos - old_pos {
1610	buffer.info[i + old_pos] = buffer.info[i + old_pos + `1`];
1611	}
1612	buffer.info[new_pos] = tmp;
1613
1614	// Note: this merge_clusters() is intentionally after* the reordering.*
1615	// Indic matra reordering is special and tricky...
1616	buffer.merge_clusters(new_pos, cmp::min(end, base + `1`));
1617
1618	new_pos -= `1`;
1619	}
1620	}
1621	} else {
1622	for i in start..base {
1623	if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M {
1624	buffer.merge_clusters(i, cmp::min(end, base + `1`));
1625	break;
1626	}
1627	}
1628	}
1629	}
1630
1631	// - Reorder reph:
1632	//
1633	// Reph’s original position is always at the beginning of the syllable,
1634	// (i.e. it is not reordered at the character reordering stage). However,
1635	// it will be reordered according to the basic-forms shaping results.
1636	// Possible positions for reph, depending on the script, are; after main,
1637	// before post-base consonant forms, and after post-base consonant forms.
1638
1639	// Two cases:
1640	//
1641	// - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
1642	// we should only move it if the sequence ligated to the repha form.
1643	//
1644	// - If repha is encoded separately and in the logical position, we should only
1645	// move it if it did NOT ligate. If it ligated, it's probably the font trying
1646	// to make it work without the reordering.
1647
1648	if start + `1` < end
1649	&& buffer.info[start].indic_position() == ot_position_t::POS_RA_TO_BECOME_REPH
1650	&& (buffer.info[start].indic_category() == ot_category_t::OT_Repha)
1651	^ _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[start])
1652	{
1653	let mut new_reph_pos;
1654	'reph: {
1655	let reph_pos = indic_plan.config.reph_pos;
1656
1657	// 1. If reph should be positioned after post-base consonant forms,
1658	// proceed to step 5.
1659	if reph_pos != RephPosition::AfterPost {
1660	// 2. If the reph repositioning class is not after post-base: target
1661	// position is after the first explicit halant glyph between the
1662	// first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1663	// are following this halant, position is moved after it. If such
1664	// position is found, this is the target position. Otherwise,
1665	// proceed to the next step.
1666	//
1667	// Note: in old-implementation fonts, where classifications were
1668	// fixed in shaping engine, there was no case where reph position
1669	// will be found on this step.
1670	{
1671	new_reph_pos = start + `1`;
1672	while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1673	new_reph_pos += `1`;
1674	}
1675
1676	if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1677	// ->If ZWJ or ZWNJ are following this halant, position is moved after it.
1678	if new_reph_pos + `1` < base && buffer.info[new_reph_pos + `1`].is_joiner() {
1679	new_reph_pos += `1`;
1680	}
1681
1682	break 'reph;
1683	}
1684	}
1685
1686	// 3. If reph should be repositioned after the main consonant: find the
1687	// first consonant not ligated with main, or find the first
1688	// consonant that is not a potential pre-base-reordering Ra.
1689	if reph_pos == RephPosition::AfterMain {
1690	new_reph_pos = base;
1691	while new_reph_pos + `1` < end
1692	&& buffer.info[new_reph_pos + `1`].indic_position()
1693	<= ot_position_t::POS_AFTER_MAIN
1694	{
1695	new_reph_pos += `1`;
1696	}
1697
1698	if new_reph_pos < end {
1699	break 'reph;
1700	}
1701	}
1702
1703	// 4. If reph should be positioned before post-base consonant, find
1704	// first post-base classified consonant not ligated with main. If no
1705	// consonant is found, the target position should be before the
1706	// first matra, syllable modifier sign or vedic sign.
1707	//
1708	// This is our take on what step 4 is trying to say (and failing, BADLY).
1709	if reph_pos == RephPosition::AfterSub {
1710	new_reph_pos = base;
1711	while new_reph_pos + `1` < end
1712	&& (rb_flag_unsafe(buffer.info[new_reph_pos + `1`].indic_position() as u32)
1713	& (rb_flag(ot_position_t::POS_POST_C as u32)
1714	\| rb_flag(ot_position_t::POS_AFTER_POST as u32)
1715	\| rb_flag(ot_position_t::POS_SMVD as u32)))
1716	== `0`
1717	{
1718	new_reph_pos += `1`;
1719	}
1720
1721	if new_reph_pos < end {
1722	break 'reph;
1723	}
1724	}
1725	}
1726
1727	// 5. If no consonant is found in steps 3 or 4, move reph to a position
1728	// immediately before the first post-base matra, syllable modifier
1729	// sign or vedic sign that has a reordering class after the intended
1730	// reph position. For example, if the reordering position for reph
1731	// is post-main, it will skip above-base matras that also have a
1732	// post-main position.
1733	//
1734	// Copied from step 2.
1735	new_reph_pos = start + `1`;
1736	while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1737	new_reph_pos += `1`;
1738	}
1739
1740	if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1741	/ ->If ZWJ or ZWNJ are following this halant, position is moved after it. /
1742	if new_reph_pos + `1` < base && buffer.info[new_reph_pos + `1`].is_joiner() {
1743	new_reph_pos += `1`;
1744	}
1745
1746	break 'reph;
1747	}
1748	// See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654
1749
1750	// 6. Otherwise, reorder reph to the end of the syllable.
1751	{
1752	new_reph_pos = end - `1`;
1753	while new_reph_pos > start
1754	&& buffer.info[new_reph_pos].indic_position() == ot_position_t::POS_SMVD
1755	{
1756	new_reph_pos -= `1`;
1757	}
1758
1759	// If the Reph is to be ending up after a Matra,Halant sequence,
1760	// position it before that Halant so it can interact with the Matra.
1761	// However, if it's a plain Consonant,Halant we shouldn't do that.
1762	// Uniscribe doesn't do this.
1763	// TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1764	if buffer.info[new_reph_pos].is_halant() {
1765	for info in &buffer.info[base + `1`..new_reph_pos] {
1766	if (rb_flag_unsafe(info.indic_category() as u32)
1767	& (rb_flag(ot_category_t::OT_M as u32)
1768	\| rb_flag(ot_category_t::OT_MPst as u32)))
1769	!= `0`
1770	{
1771	// Ok, got it.
1772	new_reph_pos -= `1`;
1773	}
1774	}
1775	}
1776	}
1777
1778	break 'reph;
1779	}
1780
1781	// Move
1782	buffer.merge_clusters(start, new_reph_pos + `1`);
1783
1784	let reph = buffer.info[start];
1785	for i in `0`..new_reph_pos - start {
1786	buffer.info[i + start] = buffer.info[i + start + `1`];
1787	}
1788	buffer.info[new_reph_pos] = reph;
1789
1790	if start < base && base <= new_reph_pos {
1791	base -= `1`;
1792	}
1793	}
1794
1795	// - Reorder pre-base-reordering consonants:
1796	//
1797	// If a pre-base-reordering consonant is found, reorder it according to
1798	// the following rules:
1799
1800	// Otherwise there can't be any pre-base-reordering Ra.
1801	if try_pref && base + `1` < end {
1802	for i in base + `1`..end {
1803	if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != `0` {
1804	// 1. Only reorder a glyph produced by substitution during application
1805	// of the <pref> feature. (Note that a font may shape a Ra consonant with
1806	// the feature generally but block it in certain contexts.)
1807	//
1808	// Note: We just check that something got substituted. We don't check that
1809	// the <pref> feature actually did it...
1810	//
1811	// Reorder pref only if it ligated.
1812	if _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]) {
1813	// 2. Try to find a target position the same way as for pre-base matra.
1814	// If it is found, reorder pre-base consonant glyph.
1815	//
1816	// 3. If position is not found, reorder immediately before main consonant.
1817
1818	let mut new_pos = base;
1819	// Malayalam / Tamil do not have "half" forms or explicit virama forms.
1820	// The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1821	// We want to position matra after them.
1822	if buffer.script != Some(script::MALAYALAM)
1823	&& buffer.script != Some(script::TAMIL)
1824	{
1825	while new_pos > start
1826	&& !buffer.info[new_pos - `1`].is_one_of(
1827	rb_flag(ot_category_t::OT_M as u32)
1828	\| rb_flag(ot_category_t::OT_MPst as u32)
1829	\| rb_flag(ot_category_t::OT_H as u32),
1830	)
1831	{
1832	new_pos -= `1`;
1833	}
1834	}
1835
1836	if new_pos > start && buffer.info[new_pos - `1`].is_halant() {
1837	// -> If ZWJ or ZWNJ follow this halant, position is moved after it.
1838	if new_pos < end && buffer.info[new_pos].is_joiner() {
1839	new_pos += `1`;
1840	}
1841	}
1842
1843	{
1844	let old_pos = i;
1845
1846	buffer.merge_clusters(new_pos, old_pos + `1`);
1847	let tmp = buffer.info[old_pos];
1848	for i in (`0`..old_pos - new_pos).rev() {
1849	buffer.info[i + new_pos + `1`] = buffer.info[i + new_pos];
1850	}
1851	buffer.info[new_pos] = tmp;
1852
1853	if new_pos <= base && base < old_pos {
1854	// TODO: investigate
1855	#[allow(unused_assignments)]
1856	{
1857	base += `1`;
1858	}
1859	}
1860	}
1861	}
1862
1863	break;
1864	}
1865	}
1866	}
1867
1868	// Apply 'init' to the Left Matra if it's a word start.
1869	if buffer.info[start].indic_position() == ot_position_t::POS_PRE_M {
1870	if start == `0`
1871	\|\| (rb_flag_unsafe(
1872	_hb_glyph_info_get_general_category(&buffer.info[start - `1`]).to_rb(),
1873	) & rb_flag_range(
1874	hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
1875	hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
1876	)) == `0`
1877	{
1878	buffer.info[start].mask \|= indic_plan.mask_array[indic_feature::INIT];
1879	} else {
1880	buffer.unsafe_to_break(Some(start - `1`), Some(start + `1`));
1881	}
1882	}
1883	}
1884