indic.rs source code [crates/rustybuzz-0.13.0/src/complex/indic.rs]

1	use alloc::boxed::Box;
2	use core::cmp;
3	use core::convert::TryFrom;
4	use core::ops::Range;
5
6	use ttf_parser::GlyphId;
7
8	use super::*;
9	use crate::buffer::Buffer;
10	use crate::normalize::ShapeNormalizationMode;
11	use crate::ot::{
12	feature, FeatureFlags, LayoutTable, Map, TableIndex, WouldApply, WouldApplyContext,
13	};
14	use crate::plan::{ShapePlan, ShapePlanner};
15	use crate::unicode::{hb_gc, CharExt, GeneralCategoryExt};
16	use crate::{script, Face, GlyphInfo, Mask, Script, Tag};
17
18	pub const INDIC_SHAPER: ComplexShaper = ComplexShaper {
19	collect_features: Some(collect_features),
20	override_features: Some(override_features),
21	create_data: Some(\|plan: &ShapePlan\| Box::new(IndicShapePlan::new(plan))),
22	preprocess_text: Some(preprocess_text),
23	postprocess_glyphs: None,
24	normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit),
25	decompose: Some(decompose),
26	compose: Some(compose),
27	setup_masks: Some(setup_masks),
28	gpos_tag: None,
29	reorder_marks: None,
30	zero_width_marks: None,
31	fallback_position: `false`,
32	};
33
34	pub type Category = u8;
35	pub mod category {
36	pub const X: u8 = `0`;
37	pub const C: u8 = `1`;
38	pub const V: u8 = `2`;
39	pub const N: u8 = `3`;
40	pub const H: u8 = `4`;
41	pub const ZWNJ: u8 = `5`;
42	pub const ZWJ: u8 = `6`;
43	pub const M: u8 = `7`;
44	pub const SM: u8 = `8`;
45	// OT_VD = 9, UNUSED; we use OT_A instead.
46	pub const A: u8 = `10`;
47	pub const PLACEHOLDER: u8 = `11`;
48	pub const DOTTED_CIRCLE: u8 = `12`;
49	pub const RS: u8 = `13`; // Register Shifter, used in Khmer OT spec.
50	pub const COENG: u8 = `14`; // Khmer-style Virama.
51	pub const REPHA: u8 = `15`; // Atomically-encoded logical or visual repha.
52	pub const RA: u8 = `16`;
53	pub const CM: u8 = `17`; // Consonant-Medial.
54	pub const SYMBOL: u8 = `18`; // Avagraha, etc that take marks (SM,A,VD).
55	pub const CS: u8 = `19`;
56	pub const ROBATIC: u8 = `20`;
57	pub const X_GROUP: u8 = `21`;
58	pub const Y_GROUP: u8 = `22`;
59	pub const MW: u8 = `23`;
60	pub const MY: u8 = `24`;
61	pub const PT: u8 = `25`;
62	// The following are used by Khmer & Myanmar shapers. Defined here for them to share.
63	pub const V_AVB: u8 = `26`;
64	pub const V_BLW: u8 = `27`;
65	pub const V_PRE: u8 = `28`;
66	pub const V_PST: u8 = `29`;
67	pub const VS: u8 = `30`; // Variation selectors
68	pub const P: u8 = `31`; // Punctuation
69	pub const D: u8 = `32`; // Digits except zero
70	pub const ML: u8 = `33`; // Medial la
71	}
72
73	pub type Position = u8;
74	pub mod position {
75	pub const START: u8 = `0`;
76	pub const RA_TO_BECOME_REPH: u8 = `1`;
77	pub const PRE_M: u8 = `2`;
78	pub const PRE_C: u8 = `3`;
79	pub const BASE_C: u8 = `4`;
80	pub const AFTER_MAIN: u8 = `5`;
81	pub const ABOVE_C: u8 = `6`;
82	pub const BEFORE_SUB: u8 = `7`;
83	pub const BELOW_C: u8 = `8`;
84	pub const AFTER_SUB: u8 = `9`;
85	pub const BEFORE_POST: u8 = `10`;
86	pub const POST_C: u8 = `11`;
87	pub const AFTER_POST: u8 = `12`;
88	pub const FINAL_C: u8 = `13`;
89	pub const SMVD: u8 = `14`;
90	pub const END: u8 = `15`;
91	}
92
93	#[allow(dead_code)]
94	#[derive(Clone, Copy, PartialEq)]
95	pub enum SyllabicCategory {
96	Other,
97	Avagraha,
98	Bindu,
99	BrahmiJoiningNumber,
100	CantillationMark,
101	Consonant,
102	ConsonantDead,
103	ConsonantFinal,
104	ConsonantHeadLetter,
105	ConsonantInitialPostfixed,
106	ConsonantKiller,
107	ConsonantMedial,
108	ConsonantPlaceholder,
109	ConsonantPrecedingRepha,
110	ConsonantPrefixed,
111	ConsonantSubjoined,
112	ConsonantSucceedingRepha,
113	ConsonantWithStacker,
114	GeminationMark,
115	InvisibleStacker,
116	Joiner,
117	ModifyingLetter,
118	NonJoiner,
119	Nukta,
120	Number,
121	NumberJoiner,
122	PureKiller,
123	RegisterShifter,
124	SyllableModifier,
125	ToneLetter,
126	ToneMark,
127	Virama,
128	Visarga,
129	Vowel,
130	VowelDependent,
131	VowelIndependent,
132	}
133
134	#[allow(dead_code)]
135	#[derive(Clone, Copy)]
136	pub enum MatraCategory {
137	NotApplicable,
138	Left,
139	Top,
140	Bottom,
141	Right,
142	BottomAndLeft,
143	BottomAndRight,
144	LeftAndRight,
145	TopAndBottom,
146	TopAndBottomAndRight,
147	TopAndBottomAndLeft,
148	TopAndLeft,
149	TopAndLeftAndRight,
150	TopAndRight,
151	Overstruck,
152	VisualOrderLeft,
153	}
154
155	const INDIC_FEATURES: &[(Tag, FeatureFlags)] = &[
156	// Basic features.
157	// These features are applied in order, one at a time, after initial_reordering,
158	// constrained to the syllable.
159	(feature::NUKTA_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
160	(feature::AKHANDS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
161	(feature::REPH_FORMS, FeatureFlags::MANUAL_JOINERS),
162	(feature::RAKAR_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
163	(feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
164	(feature::BELOW_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
165	(feature::ABOVE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
166	(feature::HALF_FORMS, FeatureFlags::MANUAL_JOINERS),
167	(feature::POST_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
168	(feature::VATTU_VARIANTS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
169	(feature::CONJUNCT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
170	// Other features.
171	// These features are applied all at once, after final_reordering, constrained
172	// to the syllable.
173	// Default Bengali font in Windows for example has intermixed
174	// lookups for init,pres,abvs,blws features.
175	(feature::INITIAL_FORMS, FeatureFlags::MANUAL_JOINERS),
176	(
177	feature::PRE_BASE_SUBSTITUTIONS,
178	FeatureFlags::GLOBAL_MANUAL_JOINERS,
179	),
180	(
181	feature::ABOVE_BASE_SUBSTITUTIONS,
182	FeatureFlags::GLOBAL_MANUAL_JOINERS,
183	),
184	(
185	feature::BELOW_BASE_SUBSTITUTIONS,
186	FeatureFlags::GLOBAL_MANUAL_JOINERS,
187	),
188	(
189	feature::POST_BASE_SUBSTITUTIONS,
190	FeatureFlags::GLOBAL_MANUAL_JOINERS,
191	),
192	(feature::HALANT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS),
193	];
194
195	// Must be in the same order as the INDIC_FEATURES array.
196	#[allow(dead_code)]
197	mod indic_feature {
198	pub const NUKT: usize = `0`;
199	pub const AKHN: usize = `1`;
200	pub const RPHF: usize = `2`;
201	pub const RKRF: usize = `3`;
202	pub const PREF: usize = `4`;
203	pub const BLWF: usize = `5`;
204	pub const ABVF: usize = `6`;
205	pub const HALF: usize = `7`;
206	pub const PSTF: usize = `8`;
207	pub const VATU: usize = `9`;
208	pub const CJCT: usize = `10`;
209	pub const INIT: usize = `11`;
210	pub const PRES: usize = `12`;
211	pub const ABVS: usize = `13`;
212	pub const BLWS: usize = `14`;
213	pub const PSTS: usize = `15`;
214	pub const HALN: usize = `16`;
215	}
216
217	const fn category_flag(c: Category) -> u32 {
218	rb_flag(c as u32)
219	}
220
221	const MEDIAL_FLAGS: u32 = category_flag(category::CM);
222	// Note:
223	//
224	// We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
225	// cannot happen in a consonant syllable. The plus side however is, we can call the
226	// consonant syllable logic from the vowel syllable function and get it all right!
227	const CONSONANT_FLAGS: u32 = category_flag(category::C)
228	\| category_flag(category::CS)
229	\| category_flag(category::RA)
230	\| MEDIAL_FLAGS
231	\| category_flag(category::V)
232	\| category_flag(category::PLACEHOLDER)
233	\| category_flag(category::DOTTED_CIRCLE);
234	const JOINER_FLAGS: u32 = category_flag(category::ZWJ) \| category_flag(category::ZWNJ);
235
236	// This is a hack for now. We should move this data into the main Indic table.
237	// Or completely remove it and just check in the tables.
238	const RA_CHARS: &[u32] = &[
239	`0x0930`, // Devanagari
240	`0x09B0`, // Bengali
241	`0x09F0`, // Bengali
242	`0x0A30`, // Gurmukhi. No Reph
243	`0x0AB0`, // Gujarati
244	`0x0B30`, // Oriya
245	`0x0BB0`, // Tamil. No Reph
246	`0x0C30`, // Telugu. Reph formed only with ZWJ
247	`0x0CB0`, // Kannada
248	`0x0D30`, // Malayalam. No Reph, Logical Repha
249	`0x0DBB`, // Sinhala. Reph formed only with ZWJ
250	];
251
252	#[derive(Clone, Copy, PartialEq)]
253	enum BasePosition {
254	LastSinhala,
255	Last,
256	}
257
258	#[derive(Clone, Copy, PartialEq)]
259	enum RephPosition {
260	AfterMain = position::AFTER_MAIN as isize,
261	BeforeSub = position::BEFORE_SUB as isize,
262	AfterSub = position::AFTER_SUB as isize,
263	BeforePost = position::BEFORE_POST as isize,
264	AfterPost = position::AFTER_POST as isize,
265	}
266
267	#[derive(Clone, Copy, PartialEq)]
268	enum RephMode {
269	/// Reph formed out of initial Ra,H sequence.
270	Implicit,
271	/// Reph formed out of initial Ra,H,ZWJ sequence.
272	Explicit,
273	/// Encoded Repha character, needs reordering.
274	LogRepha,
275	}
276
277	#[derive(Clone, Copy, PartialEq)]
278	enum BlwfMode {
279	/// Below-forms feature applied to pre-base and post-base.
280	PreAndPost,
281	/// Below-forms feature applied to post-base only.
282	PostOnly,
283	}
284
285	#[derive(Clone, Copy)]
286	struct IndicConfig {
287	script: Option<Script>,
288	has_old_spec: bool,
289	virama: u32,
290	base_pos: BasePosition,
291	reph_pos: RephPosition,
292	reph_mode: RephMode,
293	blwf_mode: BlwfMode,
294	}
295
296	impl IndicConfig {
297	const fn new(
298	script: Option<Script>,
299	has_old_spec: bool,
300	virama: u32,
301	base_pos: BasePosition,
302	reph_pos: RephPosition,
303	reph_mode: RephMode,
304	blwf_mode: BlwfMode,
305	) -> Self {
306	IndicConfig {
307	script,
308	has_old_spec,
309	virama,
310	base_pos,
311	reph_pos,
312	reph_mode,
313	blwf_mode,
314	}
315	}
316	}
317
318	const INDIC_CONFIGS: &[IndicConfig] = &[
319	IndicConfig::new(
320	script:None,
321	has_old_spec:`false`,
322	virama:`0`,
323	base_pos:BasePosition::Last,
324	reph_pos:RephPosition::BeforePost,
325	RephMode::Implicit,
326	BlwfMode::PreAndPost,
327	),
328	IndicConfig::new(
329	script:Some(script::DEVANAGARI),
330	has_old_spec:`true`,
331	virama:`0x094D`,
332	base_pos:BasePosition::Last,
333	reph_pos:RephPosition::BeforePost,
334	RephMode::Implicit,
335	BlwfMode::PreAndPost,
336	),
337	IndicConfig::new(
338	script:Some(script::BENGALI),
339	has_old_spec:`true`,
340	virama:`0x09CD`,
341	base_pos:BasePosition::Last,
342	reph_pos:RephPosition::AfterSub,
343	RephMode::Implicit,
344	BlwfMode::PreAndPost,
345	),
346	IndicConfig::new(
347	script:Some(script::GURMUKHI),
348	has_old_spec:`true`,
349	virama:`0x0A4D`,
350	base_pos:BasePosition::Last,
351	reph_pos:RephPosition::BeforeSub,
352	RephMode::Implicit,
353	BlwfMode::PreAndPost,
354	),
355	IndicConfig::new(
356	script:Some(script::GUJARATI),
357	has_old_spec:`true`,
358	virama:`0x0ACD`,
359	base_pos:BasePosition::Last,
360	reph_pos:RephPosition::BeforePost,
361	RephMode::Implicit,
362	BlwfMode::PreAndPost,
363	),
364	IndicConfig::new(
365	script:Some(script::ORIYA),
366	has_old_spec:`true`,
367	virama:`0x0B4D`,
368	base_pos:BasePosition::Last,
369	reph_pos:RephPosition::AfterMain,
370	RephMode::Implicit,
371	BlwfMode::PreAndPost,
372	),
373	IndicConfig::new(
374	script:Some(script::TAMIL),
375	has_old_spec:`true`,
376	virama:`0x0BCD`,
377	base_pos:BasePosition::Last,
378	reph_pos:RephPosition::AfterPost,
379	RephMode::Implicit,
380	BlwfMode::PreAndPost,
381	),
382	IndicConfig::new(
383	script:Some(script::TELUGU),
384	has_old_spec:`true`,
385	virama:`0x0C4D`,
386	base_pos:BasePosition::Last,
387	reph_pos:RephPosition::AfterPost,
388	RephMode::Explicit,
389	BlwfMode::PostOnly,
390	),
391	IndicConfig::new(
392	script:Some(script::KANNADA),
393	has_old_spec:`true`,
394	virama:`0x0CCD`,
395	base_pos:BasePosition::Last,
396	reph_pos:RephPosition::AfterPost,
397	RephMode::Implicit,
398	BlwfMode::PostOnly,
399	),
400	IndicConfig::new(
401	script:Some(script::MALAYALAM),
402	has_old_spec:`true`,
403	virama:`0x0D4D`,
404	base_pos:BasePosition::Last,
405	reph_pos:RephPosition::AfterMain,
406	RephMode::LogRepha,
407	BlwfMode::PreAndPost,
408	),
409	IndicConfig::new(
410	script:Some(script::SINHALA),
411	has_old_spec:`false`,
412	virama:`0x0DCA`,
413	base_pos:BasePosition::LastSinhala,
414	reph_pos:RephPosition::AfterPost,
415	RephMode::Explicit,
416	BlwfMode::PreAndPost,
417	),
418	];
419
420	struct IndicWouldSubstituteFeature {
421	lookups: Range<usize>,
422	zero_context: bool,
423	}
424
425	impl IndicWouldSubstituteFeature {
426	pub fn new(map: &Map, feature_tag: Tag, zero_context: bool) -> Self {
427	IndicWouldSubstituteFeature {
428	lookups: match map.feature_stage(TableIndex::GSUB, feature_tag) {
429	Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage),
430	None => `0`..`0`,
431	},
432	zero_context,
433	}
434	}
435
436	pub fn would_substitute(&self, map: &Map, face: &Face, glyphs: &[GlyphId]) -> bool {
437	for index in self.lookups.clone() {
438	let lookup = map.lookup(TableIndex::GSUB, index);
439	let ctx = WouldApplyContext {
440	glyphs,
441	zero_context: self.zero_context,
442	};
443	if face
444	.gsub
445	.as_ref()
446	.and_then(\|table\| table.get_lookup(lookup.index))
447	.map_or(`false`, \|lookup\| lookup.would_apply(&ctx))
448	{
449	return `true`;
450	}
451	}
452
453	`false`
454	}
455	}
456
457	struct IndicShapePlan {
458	config: IndicConfig,
459	is_old_spec: bool,
460	// virama_glyph: Option<u32>,
461	rphf: IndicWouldSubstituteFeature,
462	pref: IndicWouldSubstituteFeature,
463	blwf: IndicWouldSubstituteFeature,
464	pstf: IndicWouldSubstituteFeature,
465	vatu: IndicWouldSubstituteFeature,
466	mask_array: [Mask; INDIC_FEATURES.len()],
467	}
468
469	impl IndicShapePlan {
470	fn new(plan: &ShapePlan) -> Self {
471	let script = plan.script;
472	let config = if let Some(c) = INDIC_CONFIGS.iter().skip(`1`).find(\|c\| c.script == script) {
473	*c
474	} else {
475	INDIC_CONFIGS[`0`]
476	};
477
478	let is_old_spec = config.has_old_spec
479	&& plan
480	.ot_map
481	.chosen_script(TableIndex::GSUB)
482	.map_or(`true`, \|tag\| tag.to_bytes()[`3`] != b'2');
483
484	// Use zero-context would_substitute() matching for new-spec of the main
485	// Indic scripts, and scripts with one spec only, but not for old-specs.
486	// The new-spec for all dual-spec scripts says zero-context matching happens.
487	//
488	// However, testing with Malayalam shows that old and new spec both allow
489	// context. Testing with Bengali new-spec however shows that it doesn't.
490	// So, the heuristic here is the way it is. It should only* be changed,*
491	// as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE.
492	let zero_context = is_old_spec && script != Some(script::MALAYALAM);
493
494	let mut mask_array = [`0`; INDIC_FEATURES.len()];
495	for (i, feature) in INDIC_FEATURES.iter().enumerate() {
496	mask_array[i] = if feature.1.contains(FeatureFlags::GLOBAL) {
497	`0`
498	} else {
499	plan.ot_map.one_mask(feature.0)
500	}
501	}
502
503	// TODO: what is this?
504	// let mut virama_glyph = None;
505	// if config.virama != 0 {
506	// if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) {
507	// virama_glyph = Some(g.0 as u32);
508	// }
509	// }
510
511	IndicShapePlan {
512	config,
513	is_old_spec,
514	// virama_glyph,
515	rphf: IndicWouldSubstituteFeature::new(&plan.ot_map, feature::REPH_FORMS, zero_context),
516	pref: IndicWouldSubstituteFeature::new(
517	&plan.ot_map,
518	feature::PRE_BASE_FORMS,
519	zero_context,
520	),
521	blwf: IndicWouldSubstituteFeature::new(
522	&plan.ot_map,
523	feature::BELOW_BASE_FORMS,
524	zero_context,
525	),
526	pstf: IndicWouldSubstituteFeature::new(
527	&plan.ot_map,
528	feature::POST_BASE_FORMS,
529	zero_context,
530	),
531	vatu: IndicWouldSubstituteFeature::new(
532	&plan.ot_map,
533	feature::VATTU_VARIANTS,
534	zero_context,
535	),
536	mask_array,
537	}
538	}
539	}
540
541	impl GlyphInfo {
542	pub(crate) fn indic_category(&self) -> Category {
543	self.complex_var_u8_category()
544	}
545
546	pub(crate) fn set_indic_category(&mut self, c: Category) {
547	self.set_complex_var_u8_category(c)
548	}
549
550	pub(crate) fn indic_position(&self) -> Position {
551	self.complex_var_u8_auxiliary()
552	}
553
554	pub(crate) fn set_indic_position(&mut self, c: Position) {
555	self.set_complex_var_u8_auxiliary(c)
556	}
557
558	fn is_one_of(&self, flags: u32) -> bool {
559	// If it ligated, all bets are off.
560	if self.is_ligated() {
561	return `false`;
562	}
563
564	rb_flag_unsafe(self.indic_category() as u32) & flags != `0`
565	}
566
567	fn is_joiner(&self) -> bool {
568	self.is_one_of(JOINER_FLAGS)
569	}
570
571	pub(crate) fn is_consonant(&self) -> bool {
572	self.is_one_of(CONSONANT_FLAGS)
573	}
574
575	fn is_halant(&self) -> bool {
576	self.is_one_of(rb_flag(category::H as u32))
577	}
578
579	fn set_indic_properties(&mut self) {
580	let u = self.glyph_id;
581	let (mut cat, mut pos) = get_category_and_position(u);
582
583	// Re-assign category
584
585	// The following act more like the Bindus.
586	match u {
587	`0x0953`..=`0x0954` => cat = category::SM,
588	// The following act like consonants.
589	`0x0A72`..=`0x0A73` \| `0x1CF5`..=`0x1CF6` => cat = category::C,
590	// TODO: The following should only be allowed after a Visarga.
591	// For now, just treat them like regular tone marks.
592	`0x1CE2`..=`0x1CE8` => cat = category::A,
593	// TODO: The following should only be allowed after some of
594	// the nasalization marks, maybe only for U+1CE9..U+1CF1.
595	// For now, just treat them like tone marks.
596	`0x1CED` => cat = category::A,
597	// The following take marks in standalone clusters, similar to Avagraha.
598	`0xA8F2`..=`0xA8F7` \| `0x1CE9`..=`0x1CEC` \| `0x1CEE`..=`0x1CF1` => cat = category::SYMBOL,
599	// https://github.com/harfbuzz/harfbuzz/issues/524
600	`0x0A51` => {
601	cat = category::M;
602	pos = position::BELOW_C;
603	}
604	// According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
605	// so the Indic shaper needs to know their categories.
606	`0x11301` \| `0x11303` => cat = category::SM,
607	`0x1133B` \| `0x1133C` => cat = category::N,
608	// https://github.com/harfbuzz/harfbuzz/issues/552
609	`0x0AFB` => cat = category::N,
610	// https://github.com/harfbuzz/harfbuzz/issues/2849
611	`0x0B55` => cat = category::N,
612	// https://github.com/harfbuzz/harfbuzz/issues/538
613	`0x0980` => cat = category::PLACEHOLDER,
614	// https://github.com/harfbuzz/harfbuzz/issues/1613
615	`0x09FC` => cat = category::PLACEHOLDER,
616	// https://github.com/harfbuzz/harfbuzz/issues/623
617	`0x0C80` => cat = category::PLACEHOLDER,
618	`0x2010` \| `0x2011` => cat = category::PLACEHOLDER,
619	`0x25CC` => cat = category::DOTTED_CIRCLE,
620	_ => {}
621	}
622
623	// Re-assign position.
624
625	if (rb_flag_unsafe(cat as u32) & CONSONANT_FLAGS) != `0` {
626	pos = position::BASE_C;
627	if RA_CHARS.contains(&u) {
628	cat = category::RA;
629	}
630	} else if cat == category::M {
631	pos = matra_position_indic(u, pos);
632	} else if (rb_flag_unsafe(cat as u32)
633	& (category_flag(category::SM)
634	\| category_flag(category::A)
635	\| category_flag(category::SYMBOL)))
636	!= `0`
637	{
638	pos = position::SMVD;
639	}
640
641	// Oriya Bindu is BeforeSub in the spec.
642	if u == `0x0B01` {
643	pos = position::BEFORE_SUB;
644	}
645
646	self.set_indic_category(cat);
647	self.set_indic_position(pos);
648	}
649	}
650
651	fn collect_features(planner: &mut ShapePlanner) {
652	// Do this before any lookups have been applied.
653	planner.ot_map.add_gsub_pause(Some(setup_syllables));
654
655	planner
656	.ot_map
657	.enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), `1`);
658	// The Indic specs do not require ccmp, but we apply it here since if
659	// there is a use of it, it's typically at the beginning.
660	planner.ot_map.enable_feature(
661	feature::GLYPH_COMPOSITION_DECOMPOSITION,
662	FeatureFlags::empty(),
663	`1`,
664	);
665
666	planner.ot_map.add_gsub_pause(Some(initial_reordering));
667
668	for feature in INDIC_FEATURES.iter().take(`10`) {
669	planner.ot_map.add_feature(feature.0, feature.1, `1`);
670	planner.ot_map.add_gsub_pause(None);
671	}
672
673	planner.ot_map.add_gsub_pause(Some(final_reordering));
674	planner
675	.ot_map
676	.add_gsub_pause(Some(crate::ot::clear_syllables));
677
678	for feature in INDIC_FEATURES.iter().skip(`10`) {
679	planner.ot_map.add_feature(feature.0, feature.1, `1`);
680	}
681	}
682
683	fn override_features(planner: &mut ShapePlanner) {
684	planner.ot_map.disable_feature(tag:feature::STANDARD_LIGATURES);
685	}
686
687	fn preprocess_text(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
688	super::vowel_constraints::preprocess_text_vowel_constraints(buffer);
689	}
690
691	fn decompose(ctx: &ShapeNormalizeContext, ab: char) -> Option<(char, char)> {
692	// Don't decompose these.
693	match ab {
694	'`\u{0931}`' \| // DEVANAGARI LETTER RRA
695	// https://github.com/harfbuzz/harfbuzz/issues/779
696	'`\u{09DC}`' \| // BENGALI LETTER RRA
697	'`\u{09DD}`' \| // BENGALI LETTER RHA
698	'`\u{0B94}`' => return None, // TAMIL LETTER AU
699	_ => {}
700	}
701
702	if ab == '`\u{0DDA}`' \|\| ('`\u{0DDC}`'..='`\u{0DDE}`').contains(&ab) {
703	// Sinhala split matras... Let the fun begin.
704	//
705	// These four characters have Unicode decompositions. However, Uniscribe
706	// decomposes them "Khmer-style", that is, it uses the character itself to
707	// get the second half. The first half of all four decompositions is always
708	// U+0DD9.
709	//
710	// Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
711	// broken with Uniscribe. But we need to support them. As such, we only
712	// do the Uniscribe-style decomposition if the character is transformed into
713	// its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to
714	// Unicode decomposition.
715	//
716	// Note that we can't unconditionally use Unicode decomposition. That would
717	// break some other fonts, that are designed to work with Uniscribe, and
718	// don't have positioning features for the Unicode-style decomposition.
719	//
720	// Argh...
721	//
722	// The Uniscribe behavior is now documented in the newly published Sinhala
723	// spec in 2012:
724	//
725	// https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping
726
727	let mut ok = `false`;
728	if let Some(g) = ctx.face.glyph_index(u32::from(ab)) {
729	let indic_plan = ctx.plan.data::<IndicShapePlan>();
730	ok = indic_plan
731	.pstf
732	.would_substitute(&ctx.plan.ot_map, ctx.face, &[g]);
733	}
734
735	if ok {
736	// Ok, safe to use Uniscribe-style decomposition.
737	return Some(('`\u{0DD9}`', ab));
738	}
739	}
740
741	crate::unicode::decompose(ab)
742	}
743
744	fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> {
745	// Avoid recomposing split matras.
746	if a.general_category().is_mark() {
747	return None;
748	}
749
750	// Composition-exclusion exceptions that we want to recompose.
751	if a == '`\u{09AF}`' && b == '`\u{09BC}`' {
752	return Some('`\u{09DF}`');
753	}
754
755	crate::unicode::compose(a, b)
756	}
757
758	fn setup_masks(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
759	// We cannot setup masks here. We save information about characters
760	// and setup masks later on in a pause-callback.
761	for info: &mut GlyphInfo in buffer.info_slice_mut() {
762	info.set_indic_properties();
763	}
764	}
765
766	fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
767	super::indic_machine::find_syllables_indic(buffer);
768
769	let mut start: usize = `0`;
770	let mut end: usize = buffer.next_syllable(start:`0`);
771	while start < buffer.len {
772	buffer.unsafe_to_break(start:Some(start), end:Some(end));
773	start = end;
774	end = buffer.next_syllable(start);
775	}
776	}
777
778	fn initial_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
779	use super::indic_machine::SyllableType;
780
781	let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>();
782
783	update_consonant_positions(plan, indic_plan, face, buffer);
784	syllabic::insert_dotted_circles(
785	face,
786	buffer,
787	broken_syllable_type:SyllableType::BrokenCluster as u8,
788	dottedcircle_category:category::DOTTED_CIRCLE,
789	repha_category:Some(category::REPHA),
790	dottedcircle_position:Some(position::END),
791	);
792
793	let mut start: usize = `0`;
794	let mut end: usize = buffer.next_syllable(start:`0`);
795	while start < buffer.len {
796	initial_reordering_syllable(plan, indic_plan, face, start, end, buffer);
797	start = end;
798	end = buffer.next_syllable(start);
799	}
800	}
801
802	fn update_consonant_positions(
803	plan: &ShapePlan,
804	indic_plan: &IndicShapePlan,
805	face: &Face,
806	buffer: &mut Buffer,
807	) {
808	if indic_plan.config.base_pos != BasePosition::Last {
809	return;
810	}
811
812	let mut virama_glyph: Option = None;
813	if indic_plan.config.virama != `0` {
814	virama_glyph = face.glyph_index(indic_plan.config.virama);
815	}
816
817	if let Some(virama: GlyphId) = virama_glyph {
818	for info: &mut GlyphInfo in buffer.info_slice_mut() {
819	if info.indic_position() == position::BASE_C {
820	let consonant: GlyphId = info.as_glyph();
821	info.set_indic_position(consonant_position_from_face(
822	plan, indic_plan, face, consonant, virama,
823	));
824	}
825	}
826	}
827	}
828
829	fn consonant_position_from_face(
830	plan: &ShapePlan,
831	indic_plan: &IndicShapePlan,
832	face: &Face,
833	consonant: GlyphId,
834	virama: GlyphId,
835	) -> u8 {
836	// For old-spec, the order of glyphs is Consonant,Virama,
837	// whereas for new-spec, it's Virama,Consonant. However,
838	// some broken fonts (like Free Sans) simply copied lookups
839	// from old-spec to new-spec without modification.
840	// And oddly enough, Uniscribe seems to respect those lookups.
841	// Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
842	// base at 0. The font however, only has lookups matching
843	// 930,94D in 'blwf', not the expected 94D,930 (with new-spec
844	// table). As such, we simply match both sequences. Seems
845	// to work.
846	//
847	// Vatu is done as well, for:
848	// https://github.com/harfbuzz/harfbuzz/issues/1587
849
850	if indic_plan
851	.blwf
852	.would_substitute(&plan.ot_map, face, &[virama, consonant])
853	\|\| indic_plan
854	.blwf
855	.would_substitute(&plan.ot_map, face, &[consonant, virama])
856	\|\| indic_plan
857	.vatu
858	.would_substitute(&plan.ot_map, face, &[virama, consonant])
859	\|\| indic_plan
860	.vatu
861	.would_substitute(&plan.ot_map, face, &[consonant, virama])
862	{
863	return position::BELOW_C;
864	}
865
866	if indic_plan
867	.pstf
868	.would_substitute(&plan.ot_map, face, &[virama, consonant])
869	\|\| indic_plan
870	.pstf
871	.would_substitute(&plan.ot_map, face, &[consonant, virama])
872	{
873	return position::POST_C;
874	}
875
876	if indic_plan
877	.pref
878	.would_substitute(&plan.ot_map, face, &[virama, consonant])
879	\|\| indic_plan
880	.pref
881	.would_substitute(&plan.ot_map, face, &[consonant, virama])
882	{
883	return position::POST_C;
884	}
885
886	position::BASE_C
887	}
888
889	fn initial_reordering_syllable(
890	plan: &ShapePlan,
891	indic_plan: &IndicShapePlan,
892	face: &Face,
893	start: usize,
894	end: usize,
895	buffer: &mut Buffer,
896	) {
897	use super::indic_machine::SyllableType;
898
899	let syllable_type = match buffer.info[start].syllable() & `0x0F` {
900	`0` => SyllableType::ConsonantSyllable,
901	`1` => SyllableType::VowelSyllable,
902	`2` => SyllableType::StandaloneCluster,
903	`3` => SyllableType::SymbolCluster,
904	`4` => SyllableType::BrokenCluster,
905	`5` => SyllableType::NonIndicCluster,
906	_ => unreachable!(),
907	};
908
909	match syllable_type {
910	// We made the vowels look like consonants. So let's call the consonant logic!
911	SyllableType::VowelSyllable \| SyllableType::ConsonantSyllable => {
912	initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
913	}
914	// We already inserted dotted-circles, so just call the standalone_cluster.
915	SyllableType::BrokenCluster \| SyllableType::StandaloneCluster => {
916	initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer);
917	}
918	SyllableType::SymbolCluster \| SyllableType::NonIndicCluster => {}
919	}
920	}
921
922	// Rules from:
923	// https://docs.microsqoft.com/en-us/typography/script-development/devanagari /*
924	fn initial_reordering_consonant_syllable(
925	plan: &ShapePlan,
926	indic_plan: &IndicShapePlan,
927	face: &Face,
928	start: usize,
929	end: usize,
930	buffer: &mut Buffer,
931	) {
932	// https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167
933	// For compatibility with legacy usage in Kannada,
934	// Ra+h+ZWJ must behave like Ra+ZWJ+h...
935	if buffer.script == Some(script::KANNADA)
936	&& start + `3` <= end
937	&& buffer.info[start].is_one_of(category_flag(category::RA))
938	&& buffer.info[start + `1`].is_one_of(category_flag(category::H))
939	&& buffer.info[start + `2`].is_one_of(category_flag(category::ZWJ))
940	{
941	buffer.merge_clusters(start + `1`, start + `3`);
942	buffer.info.swap(start + `1`, start + `2`);
943	}
944
945	// 1. Find base consonant:
946	//
947	// The shaping engine finds the base consonant of the syllable, using the
948	// following algorithm: starting from the end of the syllable, move backwards
949	// until a consonant is found that does not have a below-base or post-base
950	// form (post-base forms have to follow below-base forms), or that is not a
951	// pre-base-reordering Ra, or arrive at the first consonant. The consonant
952	// stopped at will be the base.
953	//
954	// - If the syllable starts with Ra + Halant (in a script that has Reph)
955	// and has more than one consonant, Ra is excluded from candidates for
956	// base consonants.
957
958	let mut base = end;
959	let mut has_reph = `false`;
960
961	{
962	// -> If the syllable starts with Ra + Halant (in a script that has Reph)
963	// and has more than one consonant, Ra is excluded from candidates for
964	// base consonants.
965	let mut limit = start;
966	if indic_plan.mask_array[indic_feature::RPHF] != `0`
967	&& start + `3` <= end
968	&& ((indic_plan.config.reph_mode == RephMode::Implicit
969	&& !buffer.info[start + `2`].is_joiner())
970	\|\| (indic_plan.config.reph_mode == RephMode::Explicit
971	&& buffer.info[start + `2`].indic_category() == category::ZWJ))
972	{
973	// See if it matches the 'rphf' feature.
974	let glyphs = &[
975	buffer.info[start].as_glyph(),
976	buffer.info[start + `1`].as_glyph(),
977	if indic_plan.config.reph_mode == RephMode::Explicit {
978	buffer.info[start + `2`].as_glyph()
979	} else {
980	GlyphId(`0`)
981	},
982	];
983	if indic_plan
984	.rphf
985	.would_substitute(&plan.ot_map, face, &glyphs[`0`..`2`])
986	\|\| (indic_plan.config.reph_mode == RephMode::Explicit
987	&& indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs))
988	{
989	limit += `2`;
990	while limit < end && buffer.info[limit].is_joiner() {
991	limit += `1`;
992	}
993	base = start;
994	has_reph = `true`;
995	}
996	} else if indic_plan.config.reph_mode == RephMode::LogRepha
997	&& buffer.info[start].indic_category() == category::REPHA
998	{
999	limit += `1`;
1000	while limit < end && buffer.info[limit].is_joiner() {
1001	limit += `1`;
1002	}
1003	base = start;
1004	has_reph = `true`;
1005	}
1006
1007	match indic_plan.config.base_pos {
1008	BasePosition::Last => {
1009	// -> starting from the end of the syllable, move backwards
1010	let mut i = end;
1011	let mut seen_below = `false`;
1012	loop {
1013	i -= `1`;
1014	// -> until a consonant is found
1015	if buffer.info[i].is_consonant() {
1016	// -> that does not have a below-base or post-base form
1017	// (post-base forms have to follow below-base forms),
1018	if buffer.info[i].indic_position() != position::BELOW_C
1019	&& (buffer.info[i].indic_position() != position::POST_C \|\| seen_below)
1020	{
1021	base = i;
1022	break;
1023	}
1024	if buffer.info[i].indic_position() == position::BELOW_C {
1025	seen_below = `true`;
1026	}
1027
1028	// -> or that is not a pre-base-reordering Ra,
1029	//
1030	// IMPLEMENTATION NOTES:
1031	//
1032	// Our pre-base-reordering Ra's are marked position::PostC, so will be skipped
1033	// by the logic above already.
1034
1035	// -> or arrive at the first consonant. The consonant stopped at will
1036	// be the base.
1037	base = i;
1038	} else {
1039	// A ZWJ after a Halant stops the base search, and requests an explicit
1040	// half form.
1041	// A ZWJ before a Halant, requests a subjoined form instead, and hence
1042	// search continues. This is particularly important for Bengali
1043	// sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya.
1044	if start < i
1045	&& buffer.info[i].indic_category() == category::ZWJ
1046	&& buffer.info[i - `1`].indic_category() == category::H
1047	{
1048	break;
1049	}
1050	}
1051
1052	if i <= limit {
1053	break;
1054	}
1055	}
1056	}
1057	BasePosition::LastSinhala => {
1058	// Sinhala base positioning is slightly different from main Indic, in that:
1059	// 1. Its ZWJ behavior is different,
1060	// 2. We don't need to look into the font for consonant positions.
1061
1062	if !has_reph {
1063	base = limit;
1064	}
1065
1066	// Find the last base consonant that is not blocked by ZWJ. If there is
1067	// a ZWJ right before a base consonant, that would request a subjoined form.
1068	for i in limit..end {
1069	if buffer.info[i].is_consonant() {
1070	if limit < i && buffer.info[i - `1`].indic_category() == category::ZWJ {
1071	break;
1072	} else {
1073	base = i;
1074	}
1075	}
1076	}
1077
1078	// Mark all subsequent consonants as below.
1079	for i in base + `1`..end {
1080	if buffer.info[i].is_consonant() {
1081	buffer.info[i].set_indic_position(position::BELOW_C);
1082	}
1083	}
1084	}
1085	}
1086
1087	// -> If the syllable starts with Ra + Halant (in a script that has Reph)
1088	// and has more than one consonant, Ra is excluded from candidates for
1089	// base consonants.
1090	//
1091	// Only do this for unforced Reph. (ie. not for Ra,H,ZWJ.
1092	if has_reph && base == start && limit - base <= `2` {
1093	// Have no other consonant, so Reph is not formed and Ra becomes base.
1094	has_reph = `false`;
1095	}
1096	}
1097
1098	// 2. Decompose and reorder Matras:
1099	//
1100	// Each matra and any syllable modifier sign in the syllable are moved to the
1101	// appropriate position relative to the consonant(s) in the syllable. The
1102	// shaping engine decomposes two- or three-part matras into their constituent
1103	// parts before any repositioning. Matra characters are classified by which
1104	// consonant in a conjunct they have affinity for and are reordered to the
1105	// following positions:
1106	//
1107	// - Before first half form in the syllable
1108	// - After subjoined consonants
1109	// - After post-form consonant
1110	// - After main consonant (for above marks)
1111	//
1112	// IMPLEMENTATION NOTES:
1113	//
1114	// The normalize() routine has already decomposed matras for us, so we don't
1115	// need to worry about that.
1116
1117	// 3. Reorder marks to canonical order:
1118	//
1119	// Adjacent nukta and halant or nukta and vedic sign are always repositioned
1120	// if necessary, so that the nukta is first.
1121	//
1122	// IMPLEMENTATION NOTES:
1123	//
1124	// We don't need to do this: the normalize() routine already did this for us.
1125
1126	// Reorder characters
1127
1128	for i in start..base {
1129	let pos = buffer.info[i].indic_position();
1130	buffer.info[i].set_indic_position(cmp::min(position::PRE_C, pos));
1131	}
1132
1133	if base < end {
1134	buffer.info[base].set_indic_position(position::BASE_C);
1135	}
1136
1137	// Mark final consonants. A final consonant is one appearing after a matra.
1138	// Happens in Sinhala.
1139	for i in base + `1`..end {
1140	if buffer.info[i].indic_category() == category::M {
1141	for j in i + `1`..end {
1142	if buffer.info[j].is_consonant() {
1143	buffer.info[j].set_indic_position(position::FINAL_C);
1144	break;
1145	}
1146	}
1147
1148	break;
1149	}
1150	}
1151
1152	// Handle beginning Ra
1153	if has_reph {
1154	buffer.info[start].set_indic_position(position::RA_TO_BECOME_REPH);
1155	}
1156
1157	// For old-style Indic script tags, move the first post-base Halant after
1158	// last consonant.
1159	//
1160	// Reports suggest that in some scripts Uniscribe does this only if there
1161	// is not* a Halant after last consonant already. We know that is the*
1162	// case for Kannada, while it reorders unconditionally in other scripts,
1163	// eg. Malayalam, Bengali, and Devanagari. We don't currently know about
1164	// other scripts, so we block Kannada.
1165	//
1166	// Kannada test case:
1167	// U+0C9A,U+0CCD,U+0C9A,U+0CCD
1168	// With some versions of Lohit Kannada.
1169	// https://bugs.freedesktop.org/show_bug.cgi?id=59118
1170	//
1171	// Malayalam test case:
1172	// U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
1173	// With lohit-ttf-20121122/Lohit-Malayalam.ttf
1174	//
1175	// Bengali test case:
1176	// U+0998,U+09CD,U+09AF,U+09CD
1177	// With Windows XP vrinda.ttf
1178	// https://github.com/harfbuzz/harfbuzz/issues/1073
1179	//
1180	// Devanagari test case:
1181	// U+091F,U+094D,U+0930,U+094D
1182	// With chandas.ttf
1183	// https://github.com/harfbuzz/harfbuzz/issues/1071
1184	if indic_plan.is_old_spec {
1185	let disallow_double_halants = buffer.script == Some(script::KANNADA);
1186	for i in base + `1`..end {
1187	if buffer.info[i].indic_category() == category::H {
1188	let mut j = end - `1`;
1189	while j > i {
1190	if buffer.info[j].is_consonant()
1191	\|\| (disallow_double_halants
1192	&& buffer.info[j].indic_category() == category::H)
1193	{
1194	break;
1195	}
1196
1197	j -= `1`;
1198	}
1199
1200	if buffer.info[j].indic_category() != category::H && j > i {
1201	// Move Halant to after last consonant.
1202	let t = buffer.info[i];
1203	for k in `0`..j - i {
1204	buffer.info[k + i] = buffer.info[k + i + `1`];
1205	}
1206	buffer.info[j] = t;
1207	}
1208
1209	break;
1210	}
1211	}
1212	}
1213
1214	// Attach misc marks to previous char to move with them.
1215	{
1216	let mut last_pos = position::START;
1217	for i in start..end {
1218	let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32)
1219	& (category_flag(category::ZWJ)
1220	\| category_flag(category::ZWNJ)
1221	\| category_flag(category::N)
1222	\| category_flag(category::RS)
1223	\| category_flag(category::CM)
1224	\| category_flag(category::H))
1225	!= `0`;
1226	if ok {
1227	buffer.info[i].set_indic_position(last_pos);
1228
1229	if buffer.info[i].indic_category() == category::H
1230	&& buffer.info[i].indic_position() == position::PRE_M
1231	{
1232	// Uniscribe doesn't move the Halant with Left Matra.
1233	// TEST: U+092B,U+093F,U+094DE
1234	// We follow. This is important for the Sinhala
1235	// U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
1236	// where U+0DD9 is a left matra and U+0DCA is the virama.
1237	// We don't want to move the virama with the left matra.
1238	// TEST: U+0D9A,U+0DDA
1239	for j in (start + `1`..=i).rev() {
1240	if buffer.info[j - `1`].indic_position() != position::PRE_M {
1241	let pos = buffer.info[j - `1`].indic_position();
1242	buffer.info[i].set_indic_position(pos);
1243	break;
1244	}
1245	}
1246	}
1247	} else if buffer.info[i].indic_position() != position::SMVD {
1248	last_pos = buffer.info[i].indic_position();
1249	}
1250	}
1251	}
1252	// For post-base consonants let them own anything before them
1253	// since the last consonant or matra.
1254	{
1255	let mut last = base;
1256	for i in base + `1`..end {
1257	if buffer.info[i].is_consonant() {
1258	for j in last + `1`..i {
1259	if (buffer.info[j].indic_position() as u8) < (position::SMVD as u8) {
1260	let pos = buffer.info[i].indic_position();
1261	buffer.info[j].set_indic_position(pos);
1262	}
1263	}
1264
1265	last = i;
1266	} else if buffer.info[i].indic_category() == category::M {
1267	last = i;
1268	}
1269	}
1270	}
1271
1272	{
1273	// Use syllable() for sort accounting temporarily.
1274	let syllable = buffer.info[start].syllable();
1275	for i in start..end {
1276	buffer.info[i].set_syllable(u8::try_from(i - start).unwrap());
1277	}
1278
1279	buffer.info[start..end].sort_by(\|a, b\| a.indic_position().cmp(&b.indic_position()));
1280
1281	// Find base again.
1282	base = end;
1283	for i in start..end {
1284	if buffer.info[i].indic_position() == position::BASE_C {
1285	base = i;
1286	break;
1287	}
1288	}
1289	// Things are out-of-control for post base positions, they may shuffle
1290	// around like crazy. In old-spec mode, we move halants around, so in
1291	// that case merge all clusters after base. Otherwise, check the sort
1292	// order and merge as needed.
1293	// For pre-base stuff, we handle cluster issues in final reordering.
1294	//
1295	// We could use buffer->sort() for this, if there was no special
1296	// reordering of pre-base stuff happening later...
1297	// We don't want to merge_clusters all of that, which buffer->sort()
1298	// would. Here's a concrete example:
1299	//
1300	// Assume there's a pre-base consonant and explicit Halant before base,
1301	// followed by a prebase-reordering (left) Matra:
1302	//
1303	// C,H,ZWNJ,B,M
1304	//
1305	// At this point in reordering we would have:
1306	//
1307	// M,C,H,ZWNJ,B
1308	//
1309	// whereas in final reordering we will bring the Matra closer to Base:
1310	//
1311	// C,H,ZWNJ,M,B
1312	//
1313	// That's why we don't want to merge-clusters anything before the Base
1314	// at this point. But if something moved from after Base to before it,
1315	// we should merge clusters from base to them. In final-reordering, we
1316	// only move things around before base, and merge-clusters up to base.
1317	// These two merge-clusters from the two sides of base will interlock
1318	// to merge things correctly. See:
1319	// https://github.com/harfbuzz/harfbuzz/issues/2272
1320	if indic_plan.is_old_spec \|\| end - start > `127` {
1321	buffer.merge_clusters(base, end);
1322	} else {
1323	// Note! syllable() is a one-byte field.
1324	for i in base..end {
1325	if buffer.info[i].syllable() != `255` {
1326	let mut min = i;
1327	let mut max = i;
1328	let mut j = start + buffer.info[i].syllable() as usize;
1329	while j != i {
1330	min = cmp::min(min, j);
1331	max = cmp::max(max, j);
1332	let next = start + buffer.info[j].syllable() as usize;
1333	buffer.info[j].set_syllable(`255`); // So we don't process j later again.
1334	j = next;
1335	}
1336
1337	buffer.merge_clusters(cmp::max(base, min), max + `1`);
1338	}
1339	}
1340	}
1341
1342	// Put syllable back in.
1343	for info in &mut buffer.info[start..end] {
1344	info.set_syllable(syllable);
1345	}
1346	}
1347
1348	// Setup masks now
1349
1350	{
1351	// Reph
1352	for info in &mut buffer.info[start..end] {
1353	if info.indic_position() != position::RA_TO_BECOME_REPH {
1354	break;
1355	}
1356
1357	info.mask \|= indic_plan.mask_array[indic_feature::RPHF];
1358	}
1359
1360	// Pre-base
1361	let mut mask = indic_plan.mask_array[indic_feature::HALF];
1362	if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost {
1363	mask \|= indic_plan.mask_array[indic_feature::BLWF];
1364	}
1365
1366	for info in &mut buffer.info[start..base] {
1367	info.mask \|= mask;
1368	}
1369
1370	// Base
1371	mask = `0`;
1372	if base < end {
1373	buffer.info[base].mask \|= mask;
1374	}
1375
1376	// Post-base
1377	mask = indic_plan.mask_array[indic_feature::BLWF]
1378	\| indic_plan.mask_array[indic_feature::ABVF]
1379	\| indic_plan.mask_array[indic_feature::PSTF];
1380	for i in base + `1`..end {
1381	buffer.info[i].mask \|= mask;
1382	}
1383	}
1384
1385	if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) {
1386	// Old-spec eye-lash Ra needs special handling. From the
1387	// spec:
1388	//
1389	// "The feature 'below-base form' is applied to consonants
1390	// having below-base forms and following the base consonant.
1391	// The exception is vattu, which may appear below half forms
1392	// as well as below the base glyph. The feature 'below-base
1393	// form' will be applied to all such occurrences of Ra as well."
1394	//
1395	// Test case: U+0924,U+094D,U+0930,U+094d,U+0915
1396	// with Sanskrit 2003 font.
1397	//
1398	// However, note that Ra,Halant,ZWJ is the correct way to
1399	// request eyelash form of Ra, so we wouldbn't inhibit it
1400	// in that sequence.
1401	//
1402	// Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
1403	for i in start..base.saturating_sub(`1`) {
1404	if buffer.info[i].indic_category() == category::RA
1405	&& buffer.info[i + `1`].indic_category() == category::H
1406	&& (i + `2` == base \|\| buffer.info[i + `2`].indic_category() != category::ZWJ)
1407	{
1408	buffer.info[i].mask \|= indic_plan.mask_array[indic_feature::BLWF];
1409	buffer.info[i + `1`].mask \|= indic_plan.mask_array[indic_feature::BLWF];
1410	}
1411	}
1412	}
1413
1414	let pref_len = `2`;
1415	if indic_plan.mask_array[indic_feature::PREF] != `0` && base + pref_len < end {
1416	// Find a Halant,Ra sequence and mark it for pre-base-reordering processing.
1417	for i in base + `1`..end - pref_len + `1` {
1418	let glyphs = &[buffer.info[i + `0`].as_glyph(), buffer.info[i + `1`].as_glyph()];
1419	if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) {
1420	buffer.info[i + `0`].mask = indic_plan.mask_array[indic_feature::PREF];
1421	buffer.info[i + `1`].mask = indic_plan.mask_array[indic_feature::PREF];
1422	break;
1423	}
1424	}
1425	}
1426
1427	// Apply ZWJ/ZWNJ effects
1428	for i in start + `1`..end {
1429	if buffer.info[i].is_joiner() {
1430	let non_joiner = buffer.info[i].indic_category() == category::ZWNJ;
1431	let mut j = i;
1432
1433	loop {
1434	j -= `1`;
1435
1436	// ZWJ/ZWNJ should disable CJCT. They do that by simply
1437	// being there, since we don't skip them for the CJCT
1438	// feature (ie. F_MANUAL_ZWJ)
1439
1440	// A ZWNJ disables HALF.
1441	if non_joiner {
1442	buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF];
1443	}
1444
1445	if j <= start \|\| buffer.info[j].is_consonant() {
1446	break;
1447	}
1448	}
1449	}
1450	}
1451	}
1452
1453	fn initial_reordering_standalone_cluster(
1454	plan: &ShapePlan,
1455	indic_plan: &IndicShapePlan,
1456	face: &Face,
1457	start: usize,
1458	end: usize,
1459	buffer: &mut Buffer,
1460	) {
1461	// We treat placeholder/dotted-circle as if they are consonants, so we
1462	// should just chain. Only if not in compatibility mode that is...
1463	initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer);
1464	}
1465
1466	fn final_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
1467	if buffer.is_empty() {
1468	return;
1469	}
1470
1471	let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>();
1472
1473	let mut virama_glyph: Option = None;
1474	if indic_plan.config.virama != `0` {
1475	if let Some(g: GlyphId) = face.glyph_index(indic_plan.config.virama) {
1476	virama_glyph = Some(g.0 as u32);
1477	}
1478	}
1479
1480	let mut start: usize = `0`;
1481	let mut end: usize = buffer.next_syllable(start:`0`);
1482	while start < buffer.len {
1483	final_reordering_impl(indic_plan, virama_glyph, start, end, buffer);
1484	start = end;
1485	end = buffer.next_syllable(start);
1486	}
1487	}
1488
1489	fn final_reordering_impl(
1490	plan: &IndicShapePlan,
1491	virama_glyph: Option<u32>,
1492	start: usize,
1493	end: usize,
1494	buffer: &mut Buffer,
1495	) {
1496	// This function relies heavily on halant glyphs. Lots of ligation
1497	// and possibly multiple substitutions happened prior to this
1498	// phase, and that might have messed up our properties. Recover
1499	// from a particular case of that where we're fairly sure that a
1500	// class of OT_H is desired but has been lost.
1501	//
1502	// We don't call load_virama_glyph(), since we know it's already loaded.
1503	if let Some(virama_glyph) = virama_glyph {
1504	for info in &mut buffer.info[start..end] {
1505	if info.glyph_id == virama_glyph && info.is_ligated() && info.is_multiplied() {
1506	// This will make sure that this glyph passes is_halant() test.
1507	info.set_indic_category(category::H);
1508	info.clear_ligated_and_multiplied();
1509	}
1510	}
1511	}
1512
1513	// 4. Final reordering:
1514	//
1515	// After the localized forms and basic shaping forms GSUB features have been
1516	// applied (see below), the shaping engine performs some final glyph
1517	// reordering before applying all the remaining font features to the entire
1518	// syllable.
1519
1520	let mut try_pref = plan.mask_array[indic_feature::PREF] != `0`;
1521
1522	let mut base = start;
1523	while base < end {
1524	if buffer.info[base].indic_position() as u32 >= position::BASE_C as u32 {
1525	if try_pref && base + `1` < end {
1526	for i in base + `1`..end {
1527	if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != `0` {
1528	if !(buffer.info[i].is_substituted()
1529	&& buffer.info[i].is_ligated_and_didnt_multiply())
1530	{
1531	// Ok, this was a 'pref' candidate but didn't form any.
1532	// Base is around here...
1533	base = i;
1534	while base < end && buffer.info[base].is_halant() {
1535	base += `1`;
1536	}
1537
1538	buffer.info[base].set_indic_position(position::BASE_C);
1539	try_pref = `false`;
1540	}
1541
1542	break;
1543	}
1544	}
1545	}
1546
1547	// For Malayalam, skip over unformed below- (but NOT post-) forms.
1548	if buffer.script == Some(script::MALAYALAM) {
1549	let mut i = base + `1`;
1550	while i < end {
1551	while i < end && buffer.info[i].is_joiner() {
1552	i += `1`;
1553	}
1554
1555	if i == end \|\| !buffer.info[i].is_halant() {
1556	break;
1557	}
1558
1559	i += `1`; // Skip halant.
1560
1561	while i < end && buffer.info[i].is_joiner() {
1562	i += `1`;
1563	}
1564
1565	if i < end
1566	&& buffer.info[i].is_consonant()
1567	&& buffer.info[i].indic_position() == position::BELOW_C
1568	{
1569	base = i;
1570	buffer.info[base].set_indic_position(position::BASE_C);
1571	}
1572
1573	i += `1`;
1574	}
1575	}
1576
1577	if start < base && buffer.info[base].indic_position() as u32 > position::BASE_C as u32 {
1578	base -= `1`;
1579	}
1580
1581	break;
1582	}
1583
1584	base += `1`;
1585	}
1586
1587	if base == end && start < base && buffer.info[base - `1`].is_one_of(rb_flag(category::ZWJ as u32))
1588	{
1589	base -= `1`;
1590	}
1591
1592	if base < end {
1593	while start < base
1594	&& buffer.info[base]
1595	.is_one_of(rb_flag(category::N as u32) \| rb_flag(category::H as u32))
1596	{
1597	base -= `1`;
1598	}
1599	}
1600
1601	// - Reorder matras:
1602	//
1603	// If a pre-base matra character had been reordered before applying basic
1604	// features, the glyph can be moved closer to the main consonant based on
1605	// whether half-forms had been formed. Actual position for the matra is
1606	// defined as “after last standalone halant glyph, after initial matra
1607	// position and before the main consonant”. If ZWJ or ZWNJ follow this
1608	// halant, position is moved after it.
1609	//
1610	// IMPLEMENTATION NOTES:
1611	//
1612	// It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe
1613	// and Devanagari shows that the behavior is best described as:
1614	//
1615	// "If ZWJ follows this halant, matra is NOT repositioned after this halant.
1616	// If ZWNJ follows this halant, position is moved after it."
1617	//
1618	// Test case, with Adobe Devanagari or Nirmala UI:
1619	//
1620	// U+091F,U+094D,U+200C,U+092F,U+093F
1621	// (Matra moves to the middle, after ZWNJ.)
1622	//
1623	// U+091F,U+094D,U+200D,U+092F,U+093F
1624	// (Matra does NOT move, stays to the left.)
1625	//
1626	// https://github.com/harfbuzz/harfbuzz/issues/1070
1627
1628	// Otherwise there can't be any pre-base matra characters.
1629	if start + `1` < end && start < base {
1630	// If we lost track of base, alas, position before last thingy.
1631	let mut new_pos = if base == end { base - `2` } else { base - `1` };
1632
1633	// Malayalam / Tamil do not have "half" forms or explicit virama forms.
1634	// The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1635	// We want to position matra after them.
1636	if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) {
1637	loop {
1638	while new_pos > start
1639	&& !buffer.info[new_pos]
1640	.is_one_of(rb_flag(category::M as u32) \| rb_flag(category::H as u32))
1641	{
1642	new_pos -= `1`;
1643	}
1644
1645	// If we found no Halant we are done.
1646	// Otherwise only proceed if the Halant does
1647	// not belong to the Matra itself!
1648	if buffer.info[new_pos].is_halant()
1649	&& buffer.info[new_pos].indic_position() != position::PRE_M
1650	{
1651	if new_pos + `1` < end {
1652	// -> If ZWJ follows this halant, matra is NOT repositioned after this halant.
1653	if buffer.info[new_pos + `1`].indic_category() == category::ZWJ {
1654	// Keep searching.
1655	if new_pos > start {
1656	new_pos -= `1`;
1657	continue;
1658	}
1659	}
1660
1661	// -> If ZWNJ follows this halant, position is moved after it.
1662	//
1663	// IMPLEMENTATION NOTES:
1664	//
1665	// This is taken care of by the state-machine. A Halant,ZWNJ is a terminating
1666	// sequence for a consonant syllable; any pre-base matras occurring after it
1667	// will belong to the subsequent syllable.
1668	}
1669	} else {
1670	new_pos = start; // No move.
1671	}
1672
1673	break;
1674	}
1675	}
1676
1677	if start < new_pos && buffer.info[new_pos].indic_position() != position::PRE_M {
1678	// Now go see if there's actually any matras...
1679	for i in (start + `1`..=new_pos).rev() {
1680	if buffer.info[i - `1`].indic_position() == position::PRE_M {
1681	let old_pos = i - `1`;
1682	// Shouldn't actually happen.
1683	if old_pos < base && base <= new_pos {
1684	base -= `1`;
1685	}
1686
1687	let tmp = buffer.info[old_pos];
1688	for i in `0`..new_pos - old_pos {
1689	buffer.info[i + old_pos] = buffer.info[i + old_pos + `1`];
1690	}
1691	buffer.info[new_pos] = tmp;
1692
1693	// Note: this merge_clusters() is intentionally after* the reordering.*
1694	// Indic matra reordering is special and tricky...
1695	buffer.merge_clusters(new_pos, cmp::min(end, base + `1`));
1696
1697	new_pos -= `1`;
1698	}
1699	}
1700	} else {
1701	for i in start..base {
1702	if buffer.info[i].indic_position() == position::PRE_M {
1703	buffer.merge_clusters(i, cmp::min(end, base + `1`));
1704	break;
1705	}
1706	}
1707	}
1708	}
1709
1710	// - Reorder reph:
1711	//
1712	// Reph’s original position is always at the beginning of the syllable,
1713	// (i.e. it is not reordered at the character reordering stage). However,
1714	// it will be reordered according to the basic-forms shaping results.
1715	// Possible positions for reph, depending on the script, are; after main,
1716	// before post-base consonant forms, and after post-base consonant forms.
1717
1718	// Two cases:
1719	//
1720	// - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
1721	// we should only move it if the sequence ligated to the repha form.
1722	//
1723	// - If repha is encoded separately and in the logical position, we should only
1724	// move it if it did NOT ligate. If it ligated, it's probably the font trying
1725	// to make it work without the reordering.
1726
1727	if start + `1` < end
1728	&& buffer.info[start].indic_position() == position::RA_TO_BECOME_REPH
1729	&& (buffer.info[start].indic_category() == category::REPHA)
1730	^ buffer.info[start].is_ligated_and_didnt_multiply()
1731	{
1732	let mut new_reph_pos;
1733	loop {
1734	let reph_pos = plan.config.reph_pos;
1735
1736	// 1. If reph should be positioned after post-base consonant forms,
1737	// proceed to step 5.
1738	if reph_pos != RephPosition::AfterPost {
1739	// 2. If the reph repositioning class is not after post-base: target
1740	// position is after the first explicit halant glyph between the
1741	// first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1742	// are following this halant, position is moved after it. If such
1743	// position is found, this is the target position. Otherwise,
1744	// proceed to the next step.
1745	//
1746	// Note: in old-implementation fonts, where classifications were
1747	// fixed in shaping engine, there was no case where reph position
1748	// will be found on this step.
1749	{
1750	new_reph_pos = start + `1`;
1751	while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1752	new_reph_pos += `1`;
1753	}
1754
1755	if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1756	// ->If ZWJ or ZWNJ are following this halant, position is moved after it.
1757	if new_reph_pos + `1` < base && buffer.info[new_reph_pos + `1`].is_joiner() {
1758	new_reph_pos += `1`;
1759	}
1760
1761	break;
1762	}
1763	}
1764
1765	// 3. If reph should be repositioned after the main consonant: find the
1766	// first consonant not ligated with main, or find the first
1767	// consonant that is not a potential pre-base-reordering Ra.
1768	if reph_pos == RephPosition::AfterMain {
1769	new_reph_pos = base;
1770	while new_reph_pos + `1` < end
1771	&& buffer.info[new_reph_pos + `1`].indic_position() as u8
1772	<= position::AFTER_MAIN as u8
1773	{
1774	new_reph_pos += `1`;
1775	}
1776
1777	if new_reph_pos < end {
1778	break;
1779	}
1780	}
1781
1782	// 4. If reph should be positioned before post-base consonant, find
1783	// first post-base classified consonant not ligated with main. If no
1784	// consonant is found, the target position should be before the
1785	// first matra, syllable modifier sign or vedic sign.
1786	//
1787	// This is our take on what step 4 is trying to say (and failing, BADLY).
1788	if reph_pos == RephPosition::AfterSub {
1789	new_reph_pos = base;
1790	while new_reph_pos + `1` < end
1791	&& (rb_flag_unsafe(buffer.info[new_reph_pos + `1`].indic_position() as u32)
1792	& (rb_flag(position::POST_C as u32)
1793	\| rb_flag(position::AFTER_POST as u32)
1794	\| rb_flag(position::SMVD as u32)))
1795	== `0`
1796	{
1797	new_reph_pos += `1`;
1798	}
1799
1800	if new_reph_pos < end {
1801	break;
1802	}
1803	}
1804	}
1805
1806	// 5. If no consonant is found in steps 3 or 4, move reph to a position
1807	// immediately before the first post-base matra, syllable modifier
1808	// sign or vedic sign that has a reordering class after the intended
1809	// reph position. For example, if the reordering position for reph
1810	// is post-main, it will skip above-base matras that also have a
1811	// post-main position.
1812	//
1813	// Copied from step 2.
1814	new_reph_pos = start + `1`;
1815	while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() {
1816	new_reph_pos += `1`;
1817	}
1818
1819	if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() {
1820	/ ->If ZWJ or ZWNJ are following this halant, position is moved after it. /
1821	if new_reph_pos + `1` < base && buffer.info[new_reph_pos + `1`].is_joiner() {
1822	new_reph_pos += `1`;
1823	}
1824
1825	break;
1826	}
1827	// See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654
1828
1829	// 6. Otherwise, reorder reph to the end of the syllable.
1830	{
1831	new_reph_pos = end - `1`;
1832	while new_reph_pos > start
1833	&& buffer.info[new_reph_pos].indic_position() == position::SMVD
1834	{
1835	new_reph_pos -= `1`;
1836	}
1837
1838	// If the Reph is to be ending up after a Matra,Halant sequence,
1839	// position it before that Halant so it can interact with the Matra.
1840	// However, if it's a plain Consonant,Halant we shouldn't do that.
1841	// Uniscribe doesn't do this.
1842	// TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1843	if buffer.info[new_reph_pos].is_halant() {
1844	for info in &buffer.info[base + `1`..new_reph_pos] {
1845	if info.indic_category() == category::M {
1846	// Ok, got it.
1847	new_reph_pos -= `1`;
1848	}
1849	}
1850	}
1851	}
1852
1853	break;
1854	}
1855
1856	// Move
1857	buffer.merge_clusters(start, new_reph_pos + `1`);
1858
1859	let reph = buffer.info[start];
1860	for i in `0`..new_reph_pos - start {
1861	buffer.info[i + start] = buffer.info[i + start + `1`];
1862	}
1863	buffer.info[new_reph_pos] = reph;
1864
1865	if start < base && base <= new_reph_pos {
1866	base -= `1`;
1867	}
1868	}
1869
1870	// - Reorder pre-base-reordering consonants:
1871	//
1872	// If a pre-base-reordering consonant is found, reorder it according to
1873	// the following rules:
1874
1875	// Otherwise there can't be any pre-base-reordering Ra.
1876	if try_pref && base + `1` < end {
1877	for i in base + `1`..end {
1878	if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != `0` {
1879	// 1. Only reorder a glyph produced by substitution during application
1880	// of the <pref> feature. (Note that a font may shape a Ra consonant with
1881	// the feature generally but block it in certain contexts.)
1882	//
1883	// Note: We just check that something got substituted. We don't check that
1884	// the <pref> feature actually did it...
1885	//
1886	// Reorder pref only if it ligated.
1887	if buffer.info[i].is_ligated_and_didnt_multiply() {
1888	// 2. Try to find a target position the same way as for pre-base matra.
1889	// If it is found, reorder pre-base consonant glyph.
1890	//
1891	// 3. If position is not found, reorder immediately before main consonant.
1892
1893	let mut new_pos = base;
1894	// Malayalam / Tamil do not have "half" forms or explicit virama forms.
1895	// The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1896	// We want to position matra after them.
1897	if buffer.script != Some(script::MALAYALAM)
1898	&& buffer.script != Some(script::TAMIL)
1899	{
1900	while new_pos > start
1901	&& !buffer.info[new_pos - `1`].is_one_of(
1902	rb_flag(category::M as u32) \| rb_flag(category::H as u32),
1903	)
1904	{
1905	new_pos -= `1`;
1906	}
1907	}
1908
1909	if new_pos > start && buffer.info[new_pos - `1`].is_halant() {
1910	// -> If ZWJ or ZWNJ follow this halant, position is moved after it.
1911	if new_pos < end && buffer.info[new_pos].is_joiner() {
1912	new_pos += `1`;
1913	}
1914	}
1915
1916	{
1917	let old_pos = i;
1918
1919	buffer.merge_clusters(new_pos, old_pos + `1`);
1920	let tmp = buffer.info[old_pos];
1921	for i in (`0`..=old_pos - new_pos).rev() {
1922	buffer.info[i + new_pos + `1`] = buffer.info[i + new_pos];
1923	}
1924	buffer.info[new_pos] = tmp;
1925
1926	if new_pos <= base && base < old_pos {
1927	// TODO: investigate
1928	#[allow(unused_assignments)]
1929	{
1930	base += `1`;
1931	}
1932	}
1933	}
1934	}
1935
1936	break;
1937	}
1938	}
1939	}
1940
1941	// Apply 'init' to the Left Matra if it's a word start.
1942	if buffer.info[start].indic_position() == position::PRE_M {
1943	if start == `0`
1944	\|\| (rb_flag_unsafe(buffer.info[start - `1`].general_category().to_rb())
1945	& rb_flag_range(
1946	hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT,
1947	hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,
1948	))
1949	== `0`
1950	{
1951	buffer.info[start].mask \|= plan.mask_array[indic_feature::INIT];
1952	} else {
1953	buffer.unsafe_to_break(Some(start - `1`), Some(start + `1`));
1954	}
1955	}
1956	}
1957
1958	pub fn get_category_and_position(u: u32) -> (Category, Position) {
1959	let (c1, c2) = super::indic_table::get_categories(u);
1960	let c2 = if c1 == SyllabicCategory::ConsonantMedial
1961	\|\| c1 == SyllabicCategory::GeminationMark
1962	\|\| c1 == SyllabicCategory::RegisterShifter
1963	\|\| c1 == SyllabicCategory::ConsonantSucceedingRepha
1964	\|\| c1 == SyllabicCategory::Virama
1965	\|\| c1 == SyllabicCategory::VowelDependent
1966	\|\| `false`
1967	{
1968	c2
1969	} else {
1970	MatraCategory::NotApplicable
1971	};
1972
1973	let c1 = match c1 {
1974	SyllabicCategory::Other => category::X,
1975	SyllabicCategory::Avagraha => category::SYMBOL,
1976	SyllabicCategory::Bindu => category::SM,
1977	SyllabicCategory::BrahmiJoiningNumber => category::PLACEHOLDER, // Don't care.
1978	SyllabicCategory::CantillationMark => category::A,
1979	SyllabicCategory::Consonant => category::C,
1980	SyllabicCategory::ConsonantDead => category::C,
1981	SyllabicCategory::ConsonantFinal => category::CM,
1982	SyllabicCategory::ConsonantHeadLetter => category::C,
1983	SyllabicCategory::ConsonantInitialPostfixed => category::PLACEHOLDER,
1984	SyllabicCategory::ConsonantKiller => category::M, // U+17CD only.
1985	SyllabicCategory::ConsonantMedial => category::CM,
1986	SyllabicCategory::ConsonantPlaceholder => category::PLACEHOLDER,
1987	SyllabicCategory::ConsonantPrecedingRepha => category::REPHA,
1988	SyllabicCategory::ConsonantPrefixed => category::X,
1989	SyllabicCategory::ConsonantSubjoined => category::CM,
1990	SyllabicCategory::ConsonantSucceedingRepha => category::CM,
1991	SyllabicCategory::ConsonantWithStacker => category::CS,
1992	SyllabicCategory::GeminationMark => category::SM, // https://github.com/harfbuzz/harfbuzz/issues/552
1993	SyllabicCategory::InvisibleStacker => category::COENG,
1994	SyllabicCategory::Joiner => category::ZWJ,
1995	SyllabicCategory::ModifyingLetter => category::X,
1996	SyllabicCategory::NonJoiner => category::ZWNJ,
1997	SyllabicCategory::Nukta => category::N,
1998	SyllabicCategory::Number => category::PLACEHOLDER,
1999	SyllabicCategory::NumberJoiner => category::PLACEHOLDER, // Don't care.
2000	SyllabicCategory::PureKiller => category::M,
2001	SyllabicCategory::RegisterShifter => category::RS,
2002	SyllabicCategory::SyllableModifier => category::SM,
2003	SyllabicCategory::ToneLetter => category::X,
2004	SyllabicCategory::ToneMark => category::N,
2005	SyllabicCategory::Virama => category::H,
2006	SyllabicCategory::Visarga => category::SM,
2007	SyllabicCategory::Vowel => category::V,
2008	SyllabicCategory::VowelDependent => category::M,
2009	SyllabicCategory::VowelIndependent => category::V,
2010	};
2011
2012	let c2 = match c2 {
2013	MatraCategory::NotApplicable => position::END,
2014	MatraCategory::Left => position::PRE_C,
2015	MatraCategory::Top => position::ABOVE_C,
2016	MatraCategory::Bottom => position::BELOW_C,
2017	MatraCategory::Right => position::POST_C,
2018	MatraCategory::BottomAndLeft => position::POST_C,
2019	MatraCategory::BottomAndRight => position::POST_C,
2020	MatraCategory::LeftAndRight => position::POST_C,
2021	MatraCategory::TopAndBottom => position::BELOW_C,
2022	MatraCategory::TopAndBottomAndRight => position::POST_C,
2023	MatraCategory::TopAndBottomAndLeft => position::BELOW_C,
2024	MatraCategory::TopAndLeft => position::ABOVE_C,
2025	MatraCategory::TopAndLeftAndRight => position::POST_C,
2026	MatraCategory::TopAndRight => position::POST_C,
2027	MatraCategory::Overstruck => position::AFTER_MAIN,
2028	MatraCategory::VisualOrderLeft => position::PRE_M,
2029	};
2030
2031	(c1, c2)
2032	}
2033
2034	#[rustfmt::skip]
2035	fn matra_position_indic(u: u32, side: u8) -> u8 {
2036	#[inline] fn in_half_block(u: u32, base: u32) -> bool { u & !`0x7F` == base }
2037	#[inline] fn is_deva(u: u32) -> bool { in_half_block(u, `0x0900`) }
2038	#[inline] fn is_beng(u: u32) -> bool { in_half_block(u, `0x0980`) }
2039	#[inline] fn is_guru(u: u32) -> bool { in_half_block(u, `0x0A00`) }
2040	#[inline] fn is_gujr(u: u32) -> bool { in_half_block(u, `0x0A80`) }
2041	#[inline] fn is_orya(u: u32) -> bool { in_half_block(u, `0x0B00`) }
2042	#[inline] fn is_taml(u: u32) -> bool { in_half_block(u, `0x0B80`) }
2043	#[inline] fn is_telu(u: u32) -> bool { in_half_block(u, `0x0C00`) }
2044	#[inline] fn is_knda(u: u32) -> bool { in_half_block(u, `0x0C80`) }
2045	#[inline] fn is_mlym(u: u32) -> bool { in_half_block(u, `0x0D00`) }
2046	#[inline] fn is_sinh(u: u32) -> bool { in_half_block(u, `0x0D80`) }
2047
2048	#[inline]
2049	fn matra_pos_right(u: u32) -> Position {
2050	if is_deva(u) {
2051	position::AFTER_SUB
2052	} else if is_beng(u) {
2053	position::AFTER_POST
2054	} else if is_guru(u) {
2055	position::AFTER_POST
2056	} else if is_gujr(u) {
2057	position::AFTER_POST
2058	} else if is_orya(u) {
2059	position::AFTER_POST
2060	} else if is_taml(u) {
2061	position::AFTER_POST
2062	} else if is_telu(u) {
2063	if u <= `0x0C42` {
2064	position::BEFORE_SUB
2065	} else {
2066	position::AFTER_SUB
2067	}
2068	} else if is_knda(u) {
2069	if u < `0x0CC3` \|\| u > `0xCD6` {
2070	position::BEFORE_SUB
2071	} else {
2072	position::AFTER_SUB
2073	}
2074	} else if is_mlym(u) {
2075	position::AFTER_POST
2076	} else if is_sinh(u) {
2077	position::AFTER_SUB
2078	} else {
2079	position::AFTER_SUB
2080	}
2081	}
2082
2083	// BENG and MLYM don't have top matras.
2084	#[inline]
2085	fn matra_pos_top(u: u32) -> Position {
2086	if is_deva(u) {
2087	position::AFTER_SUB
2088	} else if is_guru(u) {
2089	// Deviate from spec
2090	position::AFTER_POST
2091	} else if is_gujr(u) {
2092	position::AFTER_SUB
2093	} else if is_orya(u) {
2094	position::AFTER_MAIN
2095	} else if is_taml(u) {
2096	position::AFTER_SUB
2097	} else if is_telu(u) {
2098	position::BEFORE_SUB
2099	} else if is_knda(u) {
2100	position::BEFORE_SUB
2101	} else if is_sinh(u) {
2102	position::AFTER_SUB
2103	} else {
2104	position::AFTER_SUB
2105	}
2106	}
2107
2108	#[inline]
2109	fn matra_pos_bottom(u: u32) -> Position {
2110	if is_deva(u) {
2111	position::AFTER_SUB
2112	} else if is_beng(u) {
2113	position::AFTER_SUB
2114	} else if is_guru(u) {
2115	position::AFTER_POST
2116	} else if is_gujr(u) {
2117	position::AFTER_POST
2118	} else if is_orya(u) {
2119	position::AFTER_SUB
2120	} else if is_taml(u) {
2121	position::AFTER_POST
2122	} else if is_telu(u) {
2123	position::BEFORE_SUB
2124	} else if is_knda(u) {
2125	position::BEFORE_SUB
2126	} else if is_mlym(u) {
2127	position::AFTER_POST
2128	} else if is_sinh(u) {
2129	position::AFTER_SUB
2130	} else {
2131	position::AFTER_SUB
2132	}
2133	}
2134
2135	match side {
2136	position::PRE_C => position::PRE_M,
2137	position::POST_C => matra_pos_right(u),
2138	position::ABOVE_C => matra_pos_top(u),
2139	position::BELOW_C => matra_pos_bottom(u),
2140	_ => side,
2141	}
2142	}
2143