1 | use alloc::boxed::Box; |
2 | |
3 | use super::*; |
4 | use crate::buffer::{Buffer, BufferScratchFlags}; |
5 | use crate::ot::{feature, FeatureFlags}; |
6 | use crate::plan::{ShapePlan, ShapePlanner}; |
7 | use crate::unicode::{ |
8 | hb_gc, modified_combining_class, CharExt, GeneralCategory, GeneralCategoryExt, |
9 | }; |
10 | use crate::{script, Face, GlyphInfo, Mask, Script, Tag}; |
11 | |
12 | pub const ARABIC_SHAPER: ComplexShaper = ComplexShaper { |
13 | collect_features: Some(collect_features), |
14 | override_features: None, |
15 | create_data: Some(|plan: &ShapePlan| Box::new(ArabicShapePlan::new(plan))), |
16 | preprocess_text: None, |
17 | postprocess_glyphs: Some(postprocess_glyphs), |
18 | normalization_mode: Some(ShapeNormalizationMode::Auto), |
19 | decompose: None, |
20 | compose: None, |
21 | setup_masks: Some(setup_masks), |
22 | gpos_tag: None, |
23 | reorder_marks: Some(reorder_marks), |
24 | zero_width_marks: Some(ZeroWidthMarksMode::ByGdefLate), |
25 | fallback_position: true, |
26 | }; |
27 | |
28 | const ARABIC_HAS_STCH: BufferScratchFlags = BufferScratchFlags::COMPLEX0; |
29 | |
30 | const ARABIC_FEATURES: &[Tag] = &[ |
31 | feature::ISOLATED_FORMS, |
32 | feature::TERMINAL_FORMS_1, |
33 | feature::TERMINAL_FORMS_2, |
34 | feature::TERMINAL_FORMS_3, |
35 | feature::MEDIAL_FORMS_1, |
36 | feature::MEDIAL_FORMS_2, |
37 | feature::INITIAL_FORMS, |
38 | ]; |
39 | |
40 | fn feature_is_syriac(tag: Tag) -> bool { |
41 | matches!(tag.to_bytes()[3], b'2' | b'3' ) |
42 | } |
43 | |
44 | mod action { |
45 | pub const ISOL: u8 = 0; |
46 | pub const FINA: u8 = 1; |
47 | pub const FIN2: u8 = 2; |
48 | pub const FIN3: u8 = 3; |
49 | pub const MEDI: u8 = 4; |
50 | pub const MED2: u8 = 5; |
51 | pub const INIT: u8 = 6; |
52 | pub const NONE: u8 = 7; |
53 | |
54 | // We abuse the same byte for other things... |
55 | pub const STRETCHING_FIXED: u8 = 8; |
56 | pub const STRETCHING_REPEATING: u8 = 9; |
57 | |
58 | #[inline ] |
59 | pub fn is_stch(n: u8) -> bool { |
60 | matches!(n, STRETCHING_FIXED | STRETCHING_REPEATING) |
61 | } |
62 | } |
63 | |
64 | const STATE_TABLE: &[[(u8, u8, u16); 6]] = &[ |
65 | // jt_U, jt_L, jt_R, |
66 | // jt_D, jg_ALAPH, jg_DALATH_RISH |
67 | |
68 | // State 0: prev was U, not willing to join. |
69 | [ |
70 | (action::NONE, action::NONE, 0), |
71 | (action::NONE, action::ISOL, 2), |
72 | (action::NONE, action::ISOL, 1), |
73 | (action::NONE, action::ISOL, 2), |
74 | (action::NONE, action::ISOL, 1), |
75 | (action::NONE, action::ISOL, 6), |
76 | ], |
77 | // State 1: prev was R or action::ISOL/ALAPH, not willing to join. |
78 | [ |
79 | (action::NONE, action::NONE, 0), |
80 | (action::NONE, action::ISOL, 2), |
81 | (action::NONE, action::ISOL, 1), |
82 | (action::NONE, action::ISOL, 2), |
83 | (action::NONE, action::FIN2, 5), |
84 | (action::NONE, action::ISOL, 6), |
85 | ], |
86 | // State 2: prev was D/L in action::ISOL form, willing to join. |
87 | [ |
88 | (action::NONE, action::NONE, 0), |
89 | (action::NONE, action::ISOL, 2), |
90 | (action::INIT, action::FINA, 1), |
91 | (action::INIT, action::FINA, 3), |
92 | (action::INIT, action::FINA, 4), |
93 | (action::INIT, action::FINA, 6), |
94 | ], |
95 | // State 3: prev was D in action::FINA form, willing to join. |
96 | [ |
97 | (action::NONE, action::NONE, 0), |
98 | (action::NONE, action::ISOL, 2), |
99 | (action::MEDI, action::FINA, 1), |
100 | (action::MEDI, action::FINA, 3), |
101 | (action::MEDI, action::FINA, 4), |
102 | (action::MEDI, action::FINA, 6), |
103 | ], |
104 | // State 4: prev was action::FINA ALAPH, not willing to join. |
105 | [ |
106 | (action::NONE, action::NONE, 0), |
107 | (action::NONE, action::ISOL, 2), |
108 | (action::MED2, action::ISOL, 1), |
109 | (action::MED2, action::ISOL, 2), |
110 | (action::MED2, action::FIN2, 5), |
111 | (action::MED2, action::ISOL, 6), |
112 | ], |
113 | // State 5: prev was FIN2/FIN3 ALAPH, not willing to join. |
114 | [ |
115 | (action::NONE, action::NONE, 0), |
116 | (action::NONE, action::ISOL, 2), |
117 | (action::ISOL, action::ISOL, 1), |
118 | (action::ISOL, action::ISOL, 2), |
119 | (action::ISOL, action::FIN2, 5), |
120 | (action::ISOL, action::ISOL, 6), |
121 | ], |
122 | // State 6: prev was DALATH/RISH, not willing to join. |
123 | [ |
124 | (action::NONE, action::NONE, 0), |
125 | (action::NONE, action::ISOL, 2), |
126 | (action::NONE, action::ISOL, 1), |
127 | (action::NONE, action::ISOL, 2), |
128 | (action::NONE, action::FIN3, 5), |
129 | (action::NONE, action::ISOL, 6), |
130 | ], |
131 | ]; |
132 | |
133 | #[derive (Clone, Copy, PartialEq, Debug)] |
134 | pub enum JoiningType { |
135 | U = 0, |
136 | L = 1, |
137 | R = 2, |
138 | D = 3, |
139 | // We don't have C, like harfbuzz, because Rust doesn't allow duplicated enum variants. |
140 | GroupAlaph = 4, |
141 | GroupDalathRish = 5, |
142 | T = 7, |
143 | X = 8, // means: use general-category to choose between U or T. |
144 | } |
145 | |
146 | impl GlyphInfo { |
147 | fn arabic_shaping_action(&self) -> u8 { |
148 | self.complex_var_u8_auxiliary() |
149 | } |
150 | |
151 | fn set_arabic_shaping_action(&mut self, action: u8) { |
152 | self.set_complex_var_u8_auxiliary(action) |
153 | } |
154 | } |
155 | |
156 | pub struct ArabicShapePlan { |
157 | // The "+ 1" in the next array is to accommodate for the "NONE" command, |
158 | // which is not an OpenType feature, but this simplifies the code by not |
159 | // having to do a "if (... < NONE) ..." and just rely on the fact that |
160 | // mask_array[NONE] == 0. |
161 | mask_array: [Mask; ARABIC_FEATURES.len() + 1], |
162 | has_stch: bool, |
163 | } |
164 | |
165 | impl ArabicShapePlan { |
166 | pub fn new(plan: &ShapePlan) -> ArabicShapePlan { |
167 | let has_stch: bool = plan |
168 | .ot_map |
169 | .one_mask(feature_tag:feature::STRETCHING_GLYPH_DECOMPOSITION) |
170 | != 0; |
171 | |
172 | let mut mask_array: [u32; 8] = [0; ARABIC_FEATURES.len() + 1]; |
173 | for i: usize in 0..ARABIC_FEATURES.len() { |
174 | mask_array[i] = plan.ot_map.one_mask(ARABIC_FEATURES[i]); |
175 | } |
176 | |
177 | ArabicShapePlan { |
178 | mask_array, |
179 | has_stch, |
180 | } |
181 | } |
182 | } |
183 | |
184 | fn collect_features(planner: &mut ShapePlanner) { |
185 | // We apply features according to the Arabic spec, with pauses |
186 | // in between most. |
187 | // |
188 | // The pause between init/medi/... and rlig is required. See eg: |
189 | // https://bugzilla.mozilla.org/show_bug.cgi?id=644184 |
190 | // |
191 | // The pauses between init/medi/... themselves are not necessarily |
192 | // needed as only one of those features is applied to any character. |
193 | // The only difference it makes is when fonts have contextual |
194 | // substitutions. We now follow the order of the spec, which makes |
195 | // for better experience if that's what Uniscribe is doing. |
196 | // |
197 | // At least for Arabic, looks like Uniscribe has a pause between |
198 | // rlig and calt. Otherwise the IranNastaliq's ALLAH ligature won't |
199 | // work. However, testing shows that rlig and calt are applied |
200 | // together for Mongolian in Uniscribe. As such, we only add a |
201 | // pause for Arabic, not other scripts. |
202 | // |
203 | // A pause after calt is required to make KFGQPC Uthmanic Script HAFS |
204 | // work correctly. See https://github.com/harfbuzz/harfbuzz/issues/505 |
205 | |
206 | planner.ot_map.enable_feature( |
207 | feature::STRETCHING_GLYPH_DECOMPOSITION, |
208 | FeatureFlags::empty(), |
209 | 1, |
210 | ); |
211 | planner.ot_map.add_gsub_pause(Some(record_stch)); |
212 | |
213 | planner.ot_map.enable_feature( |
214 | feature::GLYPH_COMPOSITION_DECOMPOSITION, |
215 | FeatureFlags::empty(), |
216 | 1, |
217 | ); |
218 | planner |
219 | .ot_map |
220 | .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1); |
221 | |
222 | planner.ot_map.add_gsub_pause(None); |
223 | |
224 | for feature in ARABIC_FEATURES { |
225 | let has_fallback = planner.script == Some(script::ARABIC) && !feature_is_syriac(*feature); |
226 | let flags = if has_fallback { |
227 | FeatureFlags::HAS_FALLBACK |
228 | } else { |
229 | FeatureFlags::empty() |
230 | }; |
231 | planner.ot_map.add_feature(*feature, flags, 1); |
232 | planner.ot_map.add_gsub_pause(None); |
233 | } |
234 | |
235 | // Normally, Unicode says a ZWNJ means "don't ligate". In Arabic script |
236 | // however, it says a ZWJ should also mean "don't ligate". So we run |
237 | // the main ligating features as MANUAL_ZWJ. |
238 | |
239 | planner.ot_map.enable_feature( |
240 | feature::REQUIRED_LIGATURES, |
241 | FeatureFlags::MANUAL_ZWJ | FeatureFlags::HAS_FALLBACK, |
242 | 1, |
243 | ); |
244 | |
245 | if planner.script == Some(script::ARABIC) { |
246 | planner.ot_map.add_gsub_pause(Some(fallback_shape)); |
247 | } |
248 | |
249 | // No pause after rclt. |
250 | // See 98460779bae19e4d64d29461ff154b3527bf8420 |
251 | planner.ot_map.enable_feature( |
252 | feature::REQUIRED_CONTEXTUAL_ALTERNATES, |
253 | FeatureFlags::MANUAL_ZWJ, |
254 | 1, |
255 | ); |
256 | planner |
257 | .ot_map |
258 | .enable_feature(feature::CONTEXTUAL_ALTERNATES, FeatureFlags::MANUAL_ZWJ, 1); |
259 | planner.ot_map.add_gsub_pause(None); |
260 | |
261 | // The spec includes 'cswh'. Earlier versions of Windows |
262 | // used to enable this by default, but testing suggests |
263 | // that Windows 8 and later do not enable it by default, |
264 | // and spec now says 'Off by default'. |
265 | // We disabled this in ae23c24c32. |
266 | // Note that IranNastaliq uses this feature extensively |
267 | // to fixup broken glyph sequences. Oh well... |
268 | // Test case: U+0643,U+0640,U+0631. |
269 | |
270 | // planner.ot_map.enable_feature(feature::CONTEXTUAL_SWASH, FeatureFlags::empty(), 1); |
271 | planner.ot_map.enable_feature( |
272 | feature::MARK_POSITIONING_VIA_SUBSTITUTION, |
273 | FeatureFlags::empty(), |
274 | 1, |
275 | ); |
276 | } |
277 | |
278 | fn fallback_shape(_: &ShapePlan, _: &Face, _: &mut Buffer) {} |
279 | |
280 | // Stretch feature: "stch". |
281 | // See example here: |
282 | // https://docs.microsoft.com/en-us/typography/script-development/syriac |
283 | // We implement this in a generic way, such that the Arabic subtending |
284 | // marks can use it as well. |
285 | fn record_stch(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
286 | let arabic_plan = plan.data::<ArabicShapePlan>(); |
287 | if !arabic_plan.has_stch { |
288 | return; |
289 | } |
290 | |
291 | // 'stch' feature was just applied. Look for anything that multiplied, |
292 | // and record it for stch treatment later. Note that rtlm, frac, etc |
293 | // are applied before stch, but we assume that they didn't result in |
294 | // anything multiplying into 5 pieces, so it's safe-ish... |
295 | |
296 | let len = buffer.len; |
297 | let info = &mut buffer.info; |
298 | let mut has_stch = false; |
299 | for glyph_info in &mut info[..len] { |
300 | if glyph_info.is_multiplied() { |
301 | let comp = if glyph_info.lig_comp() % 2 != 0 { |
302 | action::STRETCHING_REPEATING |
303 | } else { |
304 | action::STRETCHING_FIXED |
305 | }; |
306 | |
307 | glyph_info.set_arabic_shaping_action(comp); |
308 | has_stch = true; |
309 | } |
310 | } |
311 | |
312 | if has_stch { |
313 | buffer.scratch_flags |= ARABIC_HAS_STCH; |
314 | } |
315 | } |
316 | |
317 | fn postprocess_glyphs(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
318 | apply_stch(face, buffer) |
319 | } |
320 | |
321 | fn apply_stch(face: &Face, buffer: &mut Buffer) { |
322 | if !buffer.scratch_flags.contains(ARABIC_HAS_STCH) { |
323 | return; |
324 | } |
325 | |
326 | // The Arabic shaper currently always processes in RTL mode, so we should |
327 | // stretch / position the stretched pieces to the left / preceding glyphs. |
328 | |
329 | // We do a two pass implementation: |
330 | // First pass calculates the exact number of extra glyphs we need, |
331 | // We then enlarge buffer to have that much room, |
332 | // Second pass applies the stretch, copying things to the end of buffer. |
333 | |
334 | let mut extra_glyphs_needed: usize = 0; // Set during MEASURE, used during CUT |
335 | const MEASURE: usize = 0; |
336 | const CUT: usize = 1; |
337 | |
338 | for step in 0..2 { |
339 | let new_len = buffer.len + extra_glyphs_needed; // write head during CUT |
340 | let mut i = buffer.len; |
341 | let mut j = new_len; |
342 | while i != 0 { |
343 | if !action::is_stch(buffer.info[i - 1].arabic_shaping_action()) { |
344 | if step == CUT { |
345 | j -= 1; |
346 | buffer.info[j] = buffer.info[i - 1]; |
347 | buffer.pos[j] = buffer.pos[i - 1]; |
348 | } |
349 | |
350 | i -= 1; |
351 | continue; |
352 | } |
353 | |
354 | // Yay, justification! |
355 | |
356 | let mut w_total = 0; // Total to be filled |
357 | let mut w_fixed = 0; // Sum of fixed tiles |
358 | let mut w_repeating = 0; // Sum of repeating tiles |
359 | let mut n_repeating: i32 = 0; |
360 | |
361 | let end = i; |
362 | while i != 0 && action::is_stch(buffer.info[i - 1].arabic_shaping_action()) { |
363 | i -= 1; |
364 | let width = face.glyph_h_advance(buffer.info[i].as_glyph()) as i32; |
365 | |
366 | if buffer.info[i].arabic_shaping_action() == action::STRETCHING_FIXED { |
367 | w_fixed += width; |
368 | } else { |
369 | w_repeating += width; |
370 | n_repeating += 1; |
371 | } |
372 | } |
373 | |
374 | let start = i; |
375 | let mut context = i; |
376 | while context != 0 |
377 | && !action::is_stch(buffer.info[context - 1].arabic_shaping_action()) |
378 | && (buffer.info[context - 1].is_default_ignorable() |
379 | || is_word_category(buffer.info[context - 1].general_category())) |
380 | { |
381 | context -= 1; |
382 | w_total += buffer.pos[context].x_advance; |
383 | } |
384 | |
385 | i += 1; // Don't touch i again. |
386 | |
387 | // Number of additional times to repeat each repeating tile. |
388 | let mut n_copies: i32 = 0; |
389 | |
390 | let w_remaining = w_total - w_fixed; |
391 | if w_remaining > w_repeating && w_repeating > 0 { |
392 | n_copies = w_remaining / (w_repeating) - 1; |
393 | } |
394 | |
395 | // See if we can improve the fit by adding an extra repeat and squeezing them together a bit. |
396 | let mut extra_repeat_overlap = 0; |
397 | let shortfall = w_remaining - w_repeating * (n_copies + 1); |
398 | if shortfall > 0 && n_repeating > 0 { |
399 | n_copies += 1; |
400 | let excess = (n_copies + 1) * w_repeating - w_remaining; |
401 | if excess > 0 { |
402 | extra_repeat_overlap = excess / (n_copies * n_repeating); |
403 | } |
404 | } |
405 | |
406 | if step == MEASURE { |
407 | extra_glyphs_needed += (n_copies * n_repeating) as usize; |
408 | } else { |
409 | buffer.unsafe_to_break(context, end); |
410 | let mut x_offset = 0; |
411 | for k in (start + 1..=end).rev() { |
412 | let width = face.glyph_h_advance(buffer.info[k - 1].as_glyph()) as i32; |
413 | |
414 | let mut repeat = 1; |
415 | if buffer.info[k - 1].arabic_shaping_action() == action::STRETCHING_REPEATING { |
416 | repeat += n_copies; |
417 | } |
418 | |
419 | for n in 0..repeat { |
420 | x_offset -= width; |
421 | if n > 0 { |
422 | x_offset += extra_repeat_overlap; |
423 | } |
424 | |
425 | buffer.pos[k - 1].x_offset = x_offset; |
426 | |
427 | // Append copy. |
428 | j -= 1; |
429 | buffer.info[j] = buffer.info[k - 1]; |
430 | buffer.pos[j] = buffer.pos[k - 1]; |
431 | } |
432 | } |
433 | } |
434 | |
435 | i -= 1; |
436 | } |
437 | |
438 | if step == MEASURE { |
439 | buffer.ensure(buffer.len + extra_glyphs_needed); |
440 | } else { |
441 | debug_assert_eq!(j, 0); |
442 | buffer.set_len(new_len); |
443 | } |
444 | } |
445 | } |
446 | |
447 | // See: |
448 | // https://github.com/harfbuzz/harfbuzz/commit/6e6f82b6f3dde0fc6c3c7d991d9ec6cfff57823d#commitcomment-14248516 |
449 | fn is_word_category(gc: GeneralCategory) -> bool { |
450 | (rb_flag_unsafe(gc.to_rb()) |
451 | & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED) |
452 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE) |
453 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) |
454 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) |
455 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
456 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
457 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) |
458 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) |
459 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER) |
460 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER) |
461 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL) |
462 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL) |
463 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL) |
464 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL))) |
465 | != 0 |
466 | } |
467 | |
468 | fn setup_masks(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
469 | let arabic_plan: &ArabicShapePlan = plan.data::<ArabicShapePlan>(); |
470 | setup_masks_inner(arabic_plan, plan.script, buffer) |
471 | } |
472 | |
473 | pub fn setup_masks_inner( |
474 | arabic_plan: &ArabicShapePlan, |
475 | script: Option<Script>, |
476 | buffer: &mut Buffer, |
477 | ) { |
478 | arabic_joining(buffer); |
479 | if script == Some(script::MONGOLIAN) { |
480 | mongolian_variation_selectors(buffer); |
481 | } |
482 | |
483 | for info: &mut GlyphInfo in buffer.info_slice_mut() { |
484 | info.mask |= arabic_plan.mask_array[info.arabic_shaping_action() as usize]; |
485 | } |
486 | } |
487 | |
488 | fn arabic_joining(buffer: &mut Buffer) { |
489 | let mut prev: Option<usize> = None; |
490 | let mut state = 0; |
491 | |
492 | // Check pre-context. |
493 | for i in 0..buffer.context_len[0] { |
494 | let c = buffer.context[0][i]; |
495 | let this_type = get_joining_type(c, c.general_category()); |
496 | if this_type == JoiningType::T { |
497 | continue; |
498 | } |
499 | |
500 | state = STATE_TABLE[state][this_type as usize].2 as usize; |
501 | break; |
502 | } |
503 | |
504 | for i in 0..buffer.len { |
505 | let this_type = |
506 | get_joining_type(buffer.info[i].as_char(), buffer.info[i].general_category()); |
507 | if this_type == JoiningType::T { |
508 | buffer.info[i].set_arabic_shaping_action(action::NONE); |
509 | continue; |
510 | } |
511 | |
512 | let entry = &STATE_TABLE[state][this_type as usize]; |
513 | if entry.0 != action::NONE && prev.is_some() { |
514 | if let Some(prev) = prev { |
515 | buffer.info[prev].set_arabic_shaping_action(entry.0); |
516 | buffer.unsafe_to_break(prev, i + 1); |
517 | } |
518 | } |
519 | |
520 | buffer.info[i].set_arabic_shaping_action(entry.1); |
521 | |
522 | prev = Some(i); |
523 | state = entry.2 as usize; |
524 | } |
525 | |
526 | for i in 0..buffer.context_len[1] { |
527 | let c = buffer.context[1][i]; |
528 | let this_type = get_joining_type(c, c.general_category()); |
529 | if this_type == JoiningType::T { |
530 | continue; |
531 | } |
532 | |
533 | let entry = &STATE_TABLE[state][this_type as usize]; |
534 | if entry.0 != action::NONE && prev.is_some() { |
535 | if let Some(prev) = prev { |
536 | buffer.info[prev].set_arabic_shaping_action(entry.0); |
537 | } |
538 | } |
539 | |
540 | break; |
541 | } |
542 | } |
543 | |
544 | fn mongolian_variation_selectors(buffer: &mut Buffer) { |
545 | // Copy arabic_shaping_action() from base to Mongolian variation selectors. |
546 | let len: usize = buffer.len; |
547 | let info: &mut Vec = &mut buffer.info; |
548 | for i: usize in 1..len { |
549 | if (0x180B..=0x180D).contains(&info[i].glyph_id) { |
550 | let a: u8 = info[i - 1].arabic_shaping_action(); |
551 | info[i].set_arabic_shaping_action(a); |
552 | } |
553 | } |
554 | } |
555 | |
556 | fn get_joining_type(u: char, gc: GeneralCategory) -> JoiningType { |
557 | let j_type: JoiningType = super::arabic_table::joining_type(u); |
558 | if j_type != JoiningType::X { |
559 | return j_type; |
560 | } |
561 | |
562 | let ok: u32 = rb_flag_unsafe(gc.to_rb()) |
563 | & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) |
564 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
565 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT)); |
566 | |
567 | if ok != 0 { |
568 | JoiningType::T |
569 | } else { |
570 | JoiningType::U |
571 | } |
572 | } |
573 | |
574 | // http://www.unicode.org/reports/tr53/ |
575 | const MODIFIER_COMBINING_MARKS: &[u32] = &[ |
576 | 0x0654, // ARABIC HAMZA ABOVE |
577 | 0x0655, // ARABIC HAMZA BELOW |
578 | 0x0658, // ARABIC MARK NOON GHUNNA |
579 | 0x06DC, // ARABIC SMALL HIGH SEEN |
580 | 0x06E3, // ARABIC SMALL LOW SEEN |
581 | 0x06E7, // ARABIC SMALL HIGH YEH |
582 | 0x06E8, // ARABIC SMALL HIGH NOON |
583 | 0x08D3, // ARABIC SMALL LOW WAW |
584 | 0x08F3, // ARABIC SMALL HIGH WAW |
585 | ]; |
586 | |
587 | fn reorder_marks(_: &ShapePlan, buffer: &mut Buffer, mut start: usize, end: usize) { |
588 | let mut i = start; |
589 | for cc in [220u8, 230].iter().cloned() { |
590 | while i < end && buffer.info[i].modified_combining_class() < cc { |
591 | i += 1; |
592 | } |
593 | |
594 | if i == end { |
595 | break; |
596 | } |
597 | |
598 | if buffer.info[i].modified_combining_class() > cc { |
599 | continue; |
600 | } |
601 | |
602 | let mut j = i; |
603 | while j < end |
604 | && buffer.info[j].modified_combining_class() == cc |
605 | && MODIFIER_COMBINING_MARKS.contains(&buffer.info[j].glyph_id) |
606 | { |
607 | j += 1; |
608 | } |
609 | |
610 | if i == j { |
611 | continue; |
612 | } |
613 | |
614 | // Shift it! |
615 | let mut temp = [GlyphInfo::default(); MAX_COMBINING_MARKS]; |
616 | debug_assert!(j - i <= MAX_COMBINING_MARKS); |
617 | buffer.merge_clusters(start, j); |
618 | |
619 | temp[..j - i].copy_from_slice(&buffer.info[i..j]); |
620 | |
621 | for k in (0..i - start).rev() { |
622 | buffer.info[k + start + j - i] = buffer.info[k + start]; |
623 | } |
624 | |
625 | buffer.info[start..][..j - i].copy_from_slice(&temp[..j - i]); |
626 | |
627 | // Renumber CC such that the reordered sequence is still sorted. |
628 | // 22 and 26 are chosen because they are smaller than all Arabic categories, |
629 | // and are folded back to 220/230 respectively during fallback mark positioning. |
630 | // |
631 | // We do this because the CGJ-handling logic in the normalizer relies on |
632 | // mark sequences having an increasing order even after this reordering. |
633 | // https://github.com/harfbuzz/harfbuzz/issues/554 |
634 | // This, however, does break some obscure sequences, where the normalizer |
635 | // might compose a sequence that it should not. For example, in the seequence |
636 | // ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this |
637 | // renumbering, we will. |
638 | let new_start = start + j - i; |
639 | let new_cc = if cc == 220 { |
640 | modified_combining_class::CCC22 |
641 | } else { |
642 | modified_combining_class::CCC26 |
643 | }; |
644 | |
645 | while start < new_start { |
646 | buffer.info[start].set_modified_combining_class(new_cc); |
647 | start += 1; |
648 | } |
649 | |
650 | i = j; |
651 | } |
652 | } |
653 | |