1use alloc::boxed::Box;
2
3use super::*;
4use crate::buffer::{Buffer, BufferScratchFlags};
5use crate::ot::{feature, FeatureFlags};
6use crate::plan::{ShapePlan, ShapePlanner};
7use crate::unicode::{
8 hb_gc, modified_combining_class, CharExt, GeneralCategory, GeneralCategoryExt,
9};
10use crate::{script, Face, GlyphInfo, Mask, Script, Tag};
11
12pub const ARABIC_SHAPER: ComplexShaper = ComplexShaper {
13 collect_features: Some(collect_features),
14 override_features: None,
15 create_data: Some(|plan: &ShapePlan| Box::new(ArabicShapePlan::new(plan))),
16 preprocess_text: None,
17 postprocess_glyphs: Some(postprocess_glyphs),
18 normalization_mode: Some(ShapeNormalizationMode::Auto),
19 decompose: None,
20 compose: None,
21 setup_masks: Some(setup_masks),
22 gpos_tag: None,
23 reorder_marks: Some(reorder_marks),
24 zero_width_marks: Some(ZeroWidthMarksMode::ByGdefLate),
25 fallback_position: true,
26};
27
28const ARABIC_HAS_STCH: BufferScratchFlags = BufferScratchFlags::COMPLEX0;
29
30const ARABIC_FEATURES: &[Tag] = &[
31 feature::ISOLATED_FORMS,
32 feature::TERMINAL_FORMS_1,
33 feature::TERMINAL_FORMS_2,
34 feature::TERMINAL_FORMS_3,
35 feature::MEDIAL_FORMS_1,
36 feature::MEDIAL_FORMS_2,
37 feature::INITIAL_FORMS,
38];
39
40fn feature_is_syriac(tag: Tag) -> bool {
41 matches!(tag.to_bytes()[3], b'2' | b'3')
42}
43
44mod action {
45 pub const ISOL: u8 = 0;
46 pub const FINA: u8 = 1;
47 pub const FIN2: u8 = 2;
48 pub const FIN3: u8 = 3;
49 pub const MEDI: u8 = 4;
50 pub const MED2: u8 = 5;
51 pub const INIT: u8 = 6;
52 pub const NONE: u8 = 7;
53
54 // We abuse the same byte for other things...
55 pub const STRETCHING_FIXED: u8 = 8;
56 pub const STRETCHING_REPEATING: u8 = 9;
57
58 #[inline]
59 pub fn is_stch(n: u8) -> bool {
60 matches!(n, STRETCHING_FIXED | STRETCHING_REPEATING)
61 }
62}
63
64const STATE_TABLE: &[[(u8, u8, u16); 6]] = &[
65 // jt_U, jt_L, jt_R,
66 // jt_D, jg_ALAPH, jg_DALATH_RISH
67
68 // State 0: prev was U, not willing to join.
69 [
70 (action::NONE, action::NONE, 0),
71 (action::NONE, action::ISOL, 2),
72 (action::NONE, action::ISOL, 1),
73 (action::NONE, action::ISOL, 2),
74 (action::NONE, action::ISOL, 1),
75 (action::NONE, action::ISOL, 6),
76 ],
77 // State 1: prev was R or action::ISOL/ALAPH, not willing to join.
78 [
79 (action::NONE, action::NONE, 0),
80 (action::NONE, action::ISOL, 2),
81 (action::NONE, action::ISOL, 1),
82 (action::NONE, action::ISOL, 2),
83 (action::NONE, action::FIN2, 5),
84 (action::NONE, action::ISOL, 6),
85 ],
86 // State 2: prev was D/L in action::ISOL form, willing to join.
87 [
88 (action::NONE, action::NONE, 0),
89 (action::NONE, action::ISOL, 2),
90 (action::INIT, action::FINA, 1),
91 (action::INIT, action::FINA, 3),
92 (action::INIT, action::FINA, 4),
93 (action::INIT, action::FINA, 6),
94 ],
95 // State 3: prev was D in action::FINA form, willing to join.
96 [
97 (action::NONE, action::NONE, 0),
98 (action::NONE, action::ISOL, 2),
99 (action::MEDI, action::FINA, 1),
100 (action::MEDI, action::FINA, 3),
101 (action::MEDI, action::FINA, 4),
102 (action::MEDI, action::FINA, 6),
103 ],
104 // State 4: prev was action::FINA ALAPH, not willing to join.
105 [
106 (action::NONE, action::NONE, 0),
107 (action::NONE, action::ISOL, 2),
108 (action::MED2, action::ISOL, 1),
109 (action::MED2, action::ISOL, 2),
110 (action::MED2, action::FIN2, 5),
111 (action::MED2, action::ISOL, 6),
112 ],
113 // State 5: prev was FIN2/FIN3 ALAPH, not willing to join.
114 [
115 (action::NONE, action::NONE, 0),
116 (action::NONE, action::ISOL, 2),
117 (action::ISOL, action::ISOL, 1),
118 (action::ISOL, action::ISOL, 2),
119 (action::ISOL, action::FIN2, 5),
120 (action::ISOL, action::ISOL, 6),
121 ],
122 // State 6: prev was DALATH/RISH, not willing to join.
123 [
124 (action::NONE, action::NONE, 0),
125 (action::NONE, action::ISOL, 2),
126 (action::NONE, action::ISOL, 1),
127 (action::NONE, action::ISOL, 2),
128 (action::NONE, action::FIN3, 5),
129 (action::NONE, action::ISOL, 6),
130 ],
131];
132
133#[derive(Clone, Copy, PartialEq, PartialOrd, Debug)]
134pub enum JoiningType {
135 U = 0,
136 L = 1,
137 R = 2,
138 D = 3,
139 // We don't have C, like harfbuzz, because Rust doesn't allow duplicated enum variants.
140 GroupAlaph = 4,
141 GroupDalathRish = 5,
142 T = 7,
143 X = 8, // means: use general-category to choose between U or T.
144}
145
146impl GlyphInfo {
147 fn arabic_shaping_action(&self) -> u8 {
148 self.complex_var_u8_auxiliary()
149 }
150
151 fn set_arabic_shaping_action(&mut self, action: u8) {
152 self.set_complex_var_u8_auxiliary(action)
153 }
154}
155
156pub struct ArabicShapePlan {
157 // The "+ 1" in the next array is to accommodate for the "NONE" command,
158 // which is not an OpenType feature, but this simplifies the code by not
159 // having to do a "if (... < NONE) ..." and just rely on the fact that
160 // mask_array[NONE] == 0.
161 mask_array: [Mask; ARABIC_FEATURES.len() + 1],
162 has_stch: bool,
163}
164
165impl ArabicShapePlan {
166 pub fn new(plan: &ShapePlan) -> ArabicShapePlan {
167 let has_stch: bool = plan
168 .ot_map
169 .one_mask(feature_tag:feature::STRETCHING_GLYPH_DECOMPOSITION)
170 != 0;
171
172 let mut mask_array: [u32; 8] = [0; ARABIC_FEATURES.len() + 1];
173 for i: usize in 0..ARABIC_FEATURES.len() {
174 mask_array[i] = plan.ot_map.one_mask(ARABIC_FEATURES[i]);
175 }
176
177 ArabicShapePlan {
178 mask_array,
179 has_stch,
180 }
181 }
182}
183
184fn collect_features(planner: &mut ShapePlanner) {
185 // We apply features according to the Arabic spec, with pauses
186 // in between most.
187 //
188 // The pause between init/medi/... and rlig is required. See eg:
189 // https://bugzilla.mozilla.org/show_bug.cgi?id=644184
190 //
191 // The pauses between init/medi/... themselves are not necessarily
192 // needed as only one of those features is applied to any character.
193 // The only difference it makes is when fonts have contextual
194 // substitutions. We now follow the order of the spec, which makes
195 // for better experience if that's what Uniscribe is doing.
196 //
197 // At least for Arabic, looks like Uniscribe has a pause between
198 // rlig and calt. Otherwise the IranNastaliq's ALLAH ligature won't
199 // work. However, testing shows that rlig and calt are applied
200 // together for Mongolian in Uniscribe. As such, we only add a
201 // pause for Arabic, not other scripts.
202 //
203 // A pause after calt is required to make KFGQPC Uthmanic Script HAFS
204 // work correctly. See https://github.com/harfbuzz/harfbuzz/issues/505
205
206 planner.ot_map.enable_feature(
207 feature::STRETCHING_GLYPH_DECOMPOSITION,
208 FeatureFlags::empty(),
209 1,
210 );
211 planner.ot_map.add_gsub_pause(Some(record_stch));
212
213 planner.ot_map.enable_feature(
214 feature::GLYPH_COMPOSITION_DECOMPOSITION,
215 FeatureFlags::empty(),
216 1,
217 );
218 planner
219 .ot_map
220 .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1);
221
222 planner.ot_map.add_gsub_pause(None);
223
224 for feature in ARABIC_FEATURES {
225 let has_fallback = planner.script == Some(script::ARABIC) && !feature_is_syriac(*feature);
226 let flags = if has_fallback {
227 FeatureFlags::HAS_FALLBACK
228 } else {
229 FeatureFlags::empty()
230 };
231 planner.ot_map.add_feature(*feature, flags, 1);
232 planner.ot_map.add_gsub_pause(None);
233 }
234
235 // Normally, Unicode says a ZWNJ means "don't ligate". In Arabic script
236 // however, it says a ZWJ should also mean "don't ligate". So we run
237 // the main ligating features as MANUAL_ZWJ.
238
239 planner.ot_map.enable_feature(
240 feature::REQUIRED_LIGATURES,
241 FeatureFlags::MANUAL_ZWJ | FeatureFlags::HAS_FALLBACK,
242 1,
243 );
244
245 if planner.script == Some(script::ARABIC) {
246 planner.ot_map.add_gsub_pause(Some(fallback_shape));
247 }
248
249 // No pause after rclt.
250 // See 98460779bae19e4d64d29461ff154b3527bf8420
251 planner.ot_map.enable_feature(
252 feature::REQUIRED_CONTEXTUAL_ALTERNATES,
253 FeatureFlags::MANUAL_ZWJ,
254 1,
255 );
256 planner
257 .ot_map
258 .enable_feature(feature::CONTEXTUAL_ALTERNATES, FeatureFlags::MANUAL_ZWJ, 1);
259 planner.ot_map.add_gsub_pause(None);
260
261 // The spec includes 'cswh'. Earlier versions of Windows
262 // used to enable this by default, but testing suggests
263 // that Windows 8 and later do not enable it by default,
264 // and spec now says 'Off by default'.
265 // We disabled this in ae23c24c32.
266 // Note that IranNastaliq uses this feature extensively
267 // to fixup broken glyph sequences. Oh well...
268 // Test case: U+0643,U+0640,U+0631.
269
270 // planner.ot_map.enable_feature(feature::CONTEXTUAL_SWASH, FeatureFlags::empty(), 1);
271 planner.ot_map.enable_feature(
272 feature::MARK_POSITIONING_VIA_SUBSTITUTION,
273 FeatureFlags::empty(),
274 1,
275 );
276}
277
278fn fallback_shape(_: &ShapePlan, _: &Face, _: &mut Buffer) {}
279
280// Stretch feature: "stch".
281// See example here:
282// https://docs.microsoft.com/en-us/typography/script-development/syriac
283// We implement this in a generic way, such that the Arabic subtending
284// marks can use it as well.
285fn record_stch(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
286 let arabic_plan = plan.data::<ArabicShapePlan>();
287 if !arabic_plan.has_stch {
288 return;
289 }
290
291 // 'stch' feature was just applied. Look for anything that multiplied,
292 // and record it for stch treatment later. Note that rtlm, frac, etc
293 // are applied before stch, but we assume that they didn't result in
294 // anything multiplying into 5 pieces, so it's safe-ish...
295
296 let len = buffer.len;
297 let info = &mut buffer.info;
298 let mut has_stch = false;
299 for glyph_info in &mut info[..len] {
300 if glyph_info.is_multiplied() {
301 let comp = if glyph_info.lig_comp() % 2 != 0 {
302 action::STRETCHING_REPEATING
303 } else {
304 action::STRETCHING_FIXED
305 };
306
307 glyph_info.set_arabic_shaping_action(comp);
308 has_stch = true;
309 }
310 }
311
312 if has_stch {
313 buffer.scratch_flags |= ARABIC_HAS_STCH;
314 }
315}
316
317fn postprocess_glyphs(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
318 apply_stch(face, buffer)
319}
320
321fn apply_stch(face: &Face, buffer: &mut Buffer) {
322 if !buffer.scratch_flags.contains(ARABIC_HAS_STCH) {
323 return;
324 }
325
326 // The Arabic shaper currently always processes in RTL mode, so we should
327 // stretch / position the stretched pieces to the left / preceding glyphs.
328
329 // We do a two pass implementation:
330 // First pass calculates the exact number of extra glyphs we need,
331 // We then enlarge buffer to have that much room,
332 // Second pass applies the stretch, copying things to the end of buffer.
333
334 let mut extra_glyphs_needed: usize = 0; // Set during MEASURE, used during CUT
335 const MEASURE: usize = 0;
336 const CUT: usize = 1;
337
338 for step in 0..2 {
339 let new_len = buffer.len + extra_glyphs_needed; // write head during CUT
340 let mut i = buffer.len;
341 let mut j = new_len;
342 while i != 0 {
343 if !action::is_stch(buffer.info[i - 1].arabic_shaping_action()) {
344 if step == CUT {
345 j -= 1;
346 buffer.info[j] = buffer.info[i - 1];
347 buffer.pos[j] = buffer.pos[i - 1];
348 }
349
350 i -= 1;
351 continue;
352 }
353
354 // Yay, justification!
355
356 let mut w_total = 0; // Total to be filled
357 let mut w_fixed = 0; // Sum of fixed tiles
358 let mut w_repeating = 0; // Sum of repeating tiles
359 let mut n_repeating: i32 = 0;
360
361 let end = i;
362 while i != 0 && action::is_stch(buffer.info[i - 1].arabic_shaping_action()) {
363 i -= 1;
364 let width = face.glyph_h_advance(buffer.info[i].as_glyph()) as i32;
365
366 if buffer.info[i].arabic_shaping_action() == action::STRETCHING_FIXED {
367 w_fixed += width;
368 } else {
369 w_repeating += width;
370 n_repeating += 1;
371 }
372 }
373
374 let start = i;
375 let mut context = i;
376 while context != 0
377 && !action::is_stch(buffer.info[context - 1].arabic_shaping_action())
378 && (buffer.info[context - 1].is_default_ignorable()
379 || is_word_category(buffer.info[context - 1].general_category()))
380 {
381 context -= 1;
382 w_total += buffer.pos[context].x_advance;
383 }
384
385 i += 1; // Don't touch i again.
386
387 // Number of additional times to repeat each repeating tile.
388 let mut n_copies: i32 = 0;
389
390 let w_remaining = w_total - w_fixed;
391 if w_remaining > w_repeating && w_repeating > 0 {
392 n_copies = w_remaining / (w_repeating) - 1;
393 }
394
395 // See if we can improve the fit by adding an extra repeat and squeezing them together a bit.
396 let mut extra_repeat_overlap = 0;
397 let shortfall = w_remaining - w_repeating * (n_copies + 1);
398 if shortfall > 0 && n_repeating > 0 {
399 n_copies += 1;
400 let excess = (n_copies + 1) * w_repeating - w_remaining;
401 if excess > 0 {
402 extra_repeat_overlap = excess / (n_copies * n_repeating);
403 }
404 }
405
406 if step == MEASURE {
407 extra_glyphs_needed += (n_copies * n_repeating) as usize;
408 } else {
409 buffer.unsafe_to_break(Some(context), Some(end));
410 let mut x_offset = 0;
411 for k in (start + 1..=end).rev() {
412 let width = face.glyph_h_advance(buffer.info[k - 1].as_glyph()) as i32;
413
414 let mut repeat = 1;
415 if buffer.info[k - 1].arabic_shaping_action() == action::STRETCHING_REPEATING {
416 repeat += n_copies;
417 }
418
419 for n in 0..repeat {
420 x_offset -= width;
421 if n > 0 {
422 x_offset += extra_repeat_overlap;
423 }
424
425 buffer.pos[k - 1].x_offset = x_offset;
426
427 // Append copy.
428 j -= 1;
429 buffer.info[j] = buffer.info[k - 1];
430 buffer.pos[j] = buffer.pos[k - 1];
431 }
432 }
433 }
434
435 i -= 1;
436 }
437
438 if step == MEASURE {
439 buffer.ensure(buffer.len + extra_glyphs_needed);
440 } else {
441 debug_assert_eq!(j, 0);
442 buffer.set_len(new_len);
443 }
444 }
445}
446
447// See:
448// https://github.com/harfbuzz/harfbuzz/commit/6e6f82b6f3dde0fc6c3c7d991d9ec6cfff57823d#commitcomment-14248516
449fn is_word_category(gc: GeneralCategory) -> bool {
450 (rb_flag_unsafe(gc.to_rb())
451 & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED)
452 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE)
453 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER)
454 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER)
455 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK)
456 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK)
457 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
458 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER)
459 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER)
460 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER)
461 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL)
462 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL)
463 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL)
464 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL)))
465 != 0
466}
467
468fn setup_masks(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
469 let arabic_plan: &ArabicShapePlan = plan.data::<ArabicShapePlan>();
470 setup_masks_inner(arabic_plan, plan.script, buffer)
471}
472
473pub fn setup_masks_inner(
474 arabic_plan: &ArabicShapePlan,
475 script: Option<Script>,
476 buffer: &mut Buffer,
477) {
478 arabic_joining(buffer);
479 if script == Some(script::MONGOLIAN) {
480 mongolian_variation_selectors(buffer);
481 }
482
483 for info: &mut GlyphInfo in buffer.info_slice_mut() {
484 info.mask |= arabic_plan.mask_array[info.arabic_shaping_action() as usize];
485 }
486}
487
488fn arabic_joining(buffer: &mut Buffer) {
489 let mut prev: Option<usize> = None;
490 let mut state = 0;
491
492 // Check pre-context.
493 for i in 0..buffer.context_len[0] {
494 let c = buffer.context[0][i];
495 let this_type = get_joining_type(c, c.general_category());
496 if this_type == JoiningType::T {
497 continue;
498 }
499
500 state = STATE_TABLE[state][this_type as usize].2 as usize;
501 break;
502 }
503
504 for i in 0..buffer.len {
505 let this_type =
506 get_joining_type(buffer.info[i].as_char(), buffer.info[i].general_category());
507 if this_type == JoiningType::T {
508 buffer.info[i].set_arabic_shaping_action(action::NONE);
509 continue;
510 }
511
512 let entry = &STATE_TABLE[state][this_type as usize];
513 if entry.0 != action::NONE && prev.is_some() {
514 if let Some(prev) = prev {
515 buffer.info[prev].set_arabic_shaping_action(entry.0);
516 buffer.unsafe_to_break(Some(prev), Some(i + 1));
517 }
518 }
519 // States that have a possible prev_action.
520 else {
521 if let Some(prev) = prev {
522 if this_type >= JoiningType::R || (2 <= state && state <= 5) {
523 buffer.unsafe_to_concat(Some(prev), Some(i + 1));
524 }
525 } else {
526 if this_type >= JoiningType::R {
527 buffer.unsafe_to_concat_from_outbuffer(Some(0), Some(i + 1));
528 }
529 }
530 }
531
532 buffer.info[i].set_arabic_shaping_action(entry.1);
533
534 prev = Some(i);
535 state = entry.2 as usize;
536 }
537
538 for i in 0..buffer.context_len[1] {
539 let c = buffer.context[1][i];
540 let this_type = get_joining_type(c, c.general_category());
541 if this_type == JoiningType::T {
542 continue;
543 }
544
545 let entry = &STATE_TABLE[state][this_type as usize];
546 if entry.0 != action::NONE && prev.is_some() {
547 if let Some(prev) = prev {
548 buffer.info[prev].set_arabic_shaping_action(entry.0);
549 buffer.unsafe_to_break(Some(prev), Some(buffer.len));
550 }
551 }
552 // States that have a possible prev_action.
553 else if 2 <= state && state <= 5 {
554 if let Some(prev) = prev {
555 buffer.unsafe_to_concat(Some(prev), Some(buffer.len));
556 }
557 }
558
559 break;
560 }
561}
562
563fn mongolian_variation_selectors(buffer: &mut Buffer) {
564 // Copy arabic_shaping_action() from base to Mongolian variation selectors.
565 let len: usize = buffer.len;
566 let info: &mut Vec = &mut buffer.info;
567 for i: usize in 1..len {
568 if (0x180B..=0x180D).contains(&info[i].glyph_id) || info[i].glyph_id == 0x180F {
569 let a: u8 = info[i - 1].arabic_shaping_action();
570 info[i].set_arabic_shaping_action(a);
571 }
572 }
573}
574
575fn get_joining_type(u: char, gc: GeneralCategory) -> JoiningType {
576 let j_type: JoiningType = super::arabic_table::joining_type(u);
577 if j_type != JoiningType::X {
578 return j_type;
579 }
580
581 let ok: u32 = rb_flag_unsafe(gc.to_rb())
582 & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
583 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK)
584 | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT));
585
586 if ok != 0 {
587 JoiningType::T
588 } else {
589 JoiningType::U
590 }
591}
592
593// http://www.unicode.org/reports/tr53/
594const MODIFIER_COMBINING_MARKS: &[u32] = &[
595 0x0654, // ARABIC HAMZA ABOVE
596 0x0655, // ARABIC HAMZA BELOW
597 0x0658, // ARABIC MARK NOON GHUNNA
598 0x06DC, // ARABIC SMALL HIGH SEEN
599 0x06E3, // ARABIC SMALL LOW SEEN
600 0x06E7, // ARABIC SMALL HIGH YEH
601 0x06E8, // ARABIC SMALL HIGH NOON
602 0x08CA, // ARABIC SMALL HIGH FARSI YEH
603 0x08CB, // ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW
604 0x08CD, // ARABIC SMALL HIGH ZAH
605 0x08CE, // ARABIC LARGE ROUND DOT ABOVE
606 0x08CF, // ARABIC LARGE ROUND DOT BELOW
607 0x08D3, // ARABIC SMALL LOW WAW
608 0x08F3, // ARABIC SMALL HIGH WAW
609];
610
611fn reorder_marks(_: &ShapePlan, buffer: &mut Buffer, mut start: usize, end: usize) {
612 let mut i = start;
613 for cc in [220u8, 230].iter().cloned() {
614 while i < end && buffer.info[i].modified_combining_class() < cc {
615 i += 1;
616 }
617
618 if i == end {
619 break;
620 }
621
622 if buffer.info[i].modified_combining_class() > cc {
623 continue;
624 }
625
626 let mut j = i;
627 while j < end
628 && buffer.info[j].modified_combining_class() == cc
629 && MODIFIER_COMBINING_MARKS.contains(&buffer.info[j].glyph_id)
630 {
631 j += 1;
632 }
633
634 if i == j {
635 continue;
636 }
637
638 // Shift it!
639 let mut temp = [GlyphInfo::default(); MAX_COMBINING_MARKS];
640 debug_assert!(j - i <= MAX_COMBINING_MARKS);
641 buffer.merge_clusters(start, j);
642
643 temp[..j - i].copy_from_slice(&buffer.info[i..j]);
644
645 for k in (0..i - start).rev() {
646 buffer.info[k + start + j - i] = buffer.info[k + start];
647 }
648
649 buffer.info[start..][..j - i].copy_from_slice(&temp[..j - i]);
650
651 // Renumber CC such that the reordered sequence is still sorted.
652 // 22 and 26 are chosen because they are smaller than all Arabic categories,
653 // and are folded back to 220/230 respectively during fallback mark positioning.
654 //
655 // We do this because the CGJ-handling logic in the normalizer relies on
656 // mark sequences having an increasing order even after this reordering.
657 // https://github.com/harfbuzz/harfbuzz/issues/554
658 // This, however, does break some obscure sequences, where the normalizer
659 // might compose a sequence that it should not. For example, in the seequence
660 // ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this
661 // renumbering, we will.
662 let new_start = start + j - i;
663 let new_cc = if cc == 220 {
664 modified_combining_class::CCC22
665 } else {
666 modified_combining_class::CCC26
667 };
668
669 while start < new_start {
670 buffer.info[start].set_modified_combining_class(new_cc);
671 start += 1;
672 }
673
674 i = j;
675 }
676}
677