1 | use crate::Direction; |
2 | use alloc::boxed::Box; |
3 | |
4 | use super::algs::*; |
5 | use super::buffer::*; |
6 | use super::ot_layout::*; |
7 | use super::ot_map::*; |
8 | use super::ot_shape::*; |
9 | use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_AUTO; |
10 | use super::ot_shape_plan::hb_ot_shape_plan_t; |
11 | use super::ot_shaper::*; |
12 | use super::unicode::*; |
13 | use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script}; |
14 | |
15 | const HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH: hb_buffer_scratch_flags_t = |
16 | HB_BUFFER_SCRATCH_FLAG_SHAPER0; |
17 | |
18 | // See: |
19 | // https://github.com/harfbuzz/harfbuzz/commit/6e6f82b6f3dde0fc6c3c7d991d9ec6cfff57823d#commitcomment-14248516 |
20 | fn is_word_category(gc: hb_unicode_general_category_t) -> bool { |
21 | (rb_flag_unsafe(gc.to_rb()) |
22 | & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED) |
23 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE) |
24 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) |
25 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) |
26 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
27 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
28 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) |
29 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) |
30 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER) |
31 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER) |
32 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL) |
33 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL) |
34 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL) |
35 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL))) |
36 | != 0 |
37 | } |
38 | |
39 | #[derive (Clone, Copy, PartialEq, PartialOrd, Debug)] |
40 | pub enum hb_arabic_joining_type_t { |
41 | U = 0, |
42 | L = 1, |
43 | R = 2, |
44 | D = 3, |
45 | // We don't have C, like harfbuzz, because Rust doesn't allow duplicated enum variants. |
46 | GroupAlaph = 4, |
47 | GroupDalathRish = 5, |
48 | T = 7, |
49 | X = 8, // means: use general-category to choose between U or T. |
50 | } |
51 | |
52 | fn get_joining_type(u: char, gc: hb_unicode_general_category_t) -> hb_arabic_joining_type_t { |
53 | let j_type: hb_arabic_joining_type_t = super::ot_shaper_arabic_table::joining_type(u); |
54 | if j_type != hb_arabic_joining_type_t::X { |
55 | return j_type; |
56 | } |
57 | |
58 | let ok: u32 = rb_flag_unsafe(gc.to_rb()) |
59 | & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) |
60 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
61 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT)); |
62 | |
63 | if ok != 0 { |
64 | hb_arabic_joining_type_t::T |
65 | } else { |
66 | hb_arabic_joining_type_t::U |
67 | } |
68 | } |
69 | |
70 | fn feature_is_syriac(tag: hb_tag_t) -> bool { |
71 | matches!(tag.to_bytes()[3], b'2' | b'3' ) |
72 | } |
73 | |
74 | const ARABIC_FEATURES: &[hb_tag_t] = &[ |
75 | hb_tag_t::from_bytes(b"isol" ), |
76 | hb_tag_t::from_bytes(b"fina" ), |
77 | hb_tag_t::from_bytes(b"fin2" ), |
78 | hb_tag_t::from_bytes(b"fin3" ), |
79 | hb_tag_t::from_bytes(b"medi" ), |
80 | hb_tag_t::from_bytes(b"med2" ), |
81 | hb_tag_t::from_bytes(b"init" ), |
82 | ]; |
83 | |
84 | mod arabic_action_t { |
85 | pub const ISOL: u8 = 0; |
86 | pub const FINA: u8 = 1; |
87 | pub const FIN2: u8 = 2; |
88 | pub const FIN3: u8 = 3; |
89 | pub const MEDI: u8 = 4; |
90 | pub const MED2: u8 = 5; |
91 | pub const INIT: u8 = 6; |
92 | pub const NONE: u8 = 7; |
93 | |
94 | // We abuse the same byte for other things... |
95 | pub const STRETCHING_FIXED: u8 = 8; |
96 | pub const STRETCHING_REPEATING: u8 = 9; |
97 | |
98 | #[inline ] |
99 | pub fn is_stch(n: u8) -> bool { |
100 | matches!(n, STRETCHING_FIXED | STRETCHING_REPEATING) |
101 | } |
102 | } |
103 | |
104 | const STATE_TABLE: &[[(u8, u8, u16); 6]] = &[ |
105 | // jt_U, jt_L, jt_R, |
106 | // jt_D, jg_ALAPH, jg_DALATH_RISH |
107 | |
108 | // State 0: prev was U, not willing to join. |
109 | [ |
110 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
111 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
112 | (arabic_action_t::NONE, arabic_action_t::ISOL, 1), |
113 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
114 | (arabic_action_t::NONE, arabic_action_t::ISOL, 1), |
115 | (arabic_action_t::NONE, arabic_action_t::ISOL, 6), |
116 | ], |
117 | // State 1: prev was R or action::ISOL/ALAPH, not willing to join. |
118 | [ |
119 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
120 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
121 | (arabic_action_t::NONE, arabic_action_t::ISOL, 1), |
122 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
123 | (arabic_action_t::NONE, arabic_action_t::FIN2, 5), |
124 | (arabic_action_t::NONE, arabic_action_t::ISOL, 6), |
125 | ], |
126 | // State 2: prev was D/L in action::ISOL form, willing to join. |
127 | [ |
128 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
129 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
130 | (arabic_action_t::INIT, arabic_action_t::FINA, 1), |
131 | (arabic_action_t::INIT, arabic_action_t::FINA, 3), |
132 | (arabic_action_t::INIT, arabic_action_t::FINA, 4), |
133 | (arabic_action_t::INIT, arabic_action_t::FINA, 6), |
134 | ], |
135 | // State 3: prev was D in action::FINA form, willing to join. |
136 | [ |
137 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
138 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
139 | (arabic_action_t::MEDI, arabic_action_t::FINA, 1), |
140 | (arabic_action_t::MEDI, arabic_action_t::FINA, 3), |
141 | (arabic_action_t::MEDI, arabic_action_t::FINA, 4), |
142 | (arabic_action_t::MEDI, arabic_action_t::FINA, 6), |
143 | ], |
144 | // State 4: prev was action::FINA ALAPH, not willing to join. |
145 | [ |
146 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
147 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
148 | (arabic_action_t::MED2, arabic_action_t::ISOL, 1), |
149 | (arabic_action_t::MED2, arabic_action_t::ISOL, 2), |
150 | (arabic_action_t::MED2, arabic_action_t::FIN2, 5), |
151 | (arabic_action_t::MED2, arabic_action_t::ISOL, 6), |
152 | ], |
153 | // State 5: prev was FIN2/FIN3 ALAPH, not willing to join. |
154 | [ |
155 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
156 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
157 | (arabic_action_t::ISOL, arabic_action_t::ISOL, 1), |
158 | (arabic_action_t::ISOL, arabic_action_t::ISOL, 2), |
159 | (arabic_action_t::ISOL, arabic_action_t::FIN2, 5), |
160 | (arabic_action_t::ISOL, arabic_action_t::ISOL, 6), |
161 | ], |
162 | // State 6: prev was DALATH/RISH, not willing to join. |
163 | [ |
164 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
165 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
166 | (arabic_action_t::NONE, arabic_action_t::ISOL, 1), |
167 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
168 | (arabic_action_t::NONE, arabic_action_t::FIN3, 5), |
169 | (arabic_action_t::NONE, arabic_action_t::ISOL, 6), |
170 | ], |
171 | ]; |
172 | |
173 | impl hb_glyph_info_t { |
174 | fn arabic_shaping_action(&self) -> u8 { |
175 | self.ot_shaper_var_u8_auxiliary() |
176 | } |
177 | |
178 | fn set_arabic_shaping_action(&mut self, action: u8) { |
179 | self.set_ot_shaper_var_u8_auxiliary(action) |
180 | } |
181 | } |
182 | |
183 | fn collect_features(planner: &mut hb_ot_shape_planner_t) { |
184 | // We apply features according to the Arabic spec, with pauses |
185 | // in between most. |
186 | // |
187 | // The pause between init/medi/... and rlig is required. See eg: |
188 | // https://bugzilla.mozilla.org/show_bug.cgi?id=644184 |
189 | // |
190 | // The pauses between init/medi/... themselves are not necessarily |
191 | // needed as only one of those features is applied to any character. |
192 | // The only difference it makes is when fonts have contextual |
193 | // substitutions. We now follow the order of the spec, which makes |
194 | // for better experience if that's what Uniscribe is doing. |
195 | // |
196 | // At least for Arabic, looks like Uniscribe has a pause between |
197 | // rlig and calt. Otherwise the IranNastaliq's ALLAH ligature won't |
198 | // work. However, testing shows that rlig and calt are applied |
199 | // together for Mongolian in Uniscribe. As such, we only add a |
200 | // pause for Arabic, not other scripts. |
201 | // |
202 | // A pause after calt is required to make KFGQPC Uthmanic Script HAFS |
203 | // work correctly. See https://github.com/harfbuzz/harfbuzz/issues/505 |
204 | |
205 | planner |
206 | .ot_map |
207 | .enable_feature(hb_tag_t::from_bytes(b"stch" ), F_NONE, 1); |
208 | planner.ot_map.add_gsub_pause(Some(record_stch)); |
209 | |
210 | planner |
211 | .ot_map |
212 | .enable_feature(hb_tag_t::from_bytes(b"ccmp" ), F_MANUAL_ZWJ, 1); |
213 | planner |
214 | .ot_map |
215 | .enable_feature(hb_tag_t::from_bytes(b"locl" ), F_MANUAL_ZWJ, 1); |
216 | |
217 | planner.ot_map.add_gsub_pause(None); |
218 | |
219 | for feature in ARABIC_FEATURES { |
220 | let has_fallback = planner.script == Some(script::ARABIC) && !feature_is_syriac(*feature); |
221 | let flags = if has_fallback { F_HAS_FALLBACK } else { F_NONE }; |
222 | planner |
223 | .ot_map |
224 | .add_feature(*feature, F_MANUAL_ZWJ | flags, 1); |
225 | planner.ot_map.add_gsub_pause(None); |
226 | } |
227 | |
228 | // Normally, Unicode says a ZWNJ means "don't ligate". In Arabic script |
229 | // however, it says a ZWJ should also mean "don't ligate". So we run |
230 | // the main ligating features as MANUAL_ZWJ. |
231 | |
232 | planner.ot_map.enable_feature( |
233 | hb_tag_t::from_bytes(b"rlig" ), |
234 | F_MANUAL_ZWJ | F_HAS_FALLBACK, |
235 | 1, |
236 | ); |
237 | |
238 | if planner.script == Some(script::ARABIC) { |
239 | planner.ot_map.add_gsub_pause(Some(arabic_fallback_shape)); |
240 | } |
241 | |
242 | // No pause after rclt. |
243 | // See 98460779bae19e4d64d29461ff154b3527bf8420 |
244 | planner |
245 | .ot_map |
246 | .enable_feature(hb_tag_t::from_bytes(b"calt" ), F_MANUAL_ZWJ, 1); |
247 | /* https://github.com/harfbuzz/harfbuzz/issues/1573 */ |
248 | if !planner.ot_map.has_feature(hb_tag_t::from_bytes(b"rclt" )) { |
249 | planner.ot_map.add_gsub_pause(None); |
250 | } |
251 | |
252 | planner |
253 | .ot_map |
254 | .enable_feature(hb_tag_t::from_bytes(b"liga" ), F_MANUAL_ZWJ, 1); |
255 | planner |
256 | .ot_map |
257 | .enable_feature(hb_tag_t::from_bytes(b"clig" ), F_MANUAL_ZWJ, 1); |
258 | |
259 | // The spec includes 'cswh'. Earlier versions of Windows |
260 | // used to enable this by default, but testing suggests |
261 | // that Windows 8 and later do not enable it by default, |
262 | // and spec now says 'Off by default'. |
263 | // We disabled this in ae23c24c32. |
264 | // Note that IranNastaliq uses this feature extensively |
265 | // to fixup broken glyph sequences. Oh well... |
266 | // Test case: U+0643,U+0640,U+0631. |
267 | |
268 | // planner.ot_map.enable_feature(feature::CONTEXTUAL_SWASH, F_MANUAL_ZWJ, 1); |
269 | planner |
270 | .ot_map |
271 | .enable_feature(hb_tag_t::from_bytes(b"mset" ), F_MANUAL_ZWJ, 1); |
272 | } |
273 | |
274 | pub struct arabic_shape_plan_t { |
275 | // The "+ 1" in the next array is to accommodate for the "NONE" command, |
276 | // which is not an OpenType feature, but this simplifies the code by not |
277 | // having to do a "if (... < NONE) ..." and just rely on the fact that |
278 | // mask_array[NONE] == 0. |
279 | mask_array: [hb_mask_t; ARABIC_FEATURES.len() + 1], |
280 | has_stch: bool, |
281 | } |
282 | |
283 | pub fn data_create_arabic(plan: &hb_ot_shape_plan_t) -> arabic_shape_plan_t { |
284 | let has_stch: bool = plan.ot_map.get_1_mask(feature_tag:hb_tag_t::from_bytes(b"stch" )) != 0; |
285 | |
286 | let mut mask_array: [u32; 8] = [0; ARABIC_FEATURES.len() + 1]; |
287 | for i: usize in 0..ARABIC_FEATURES.len() { |
288 | mask_array[i] = plan.ot_map.get_1_mask(ARABIC_FEATURES[i]); |
289 | } |
290 | |
291 | arabic_shape_plan_t { |
292 | mask_array, |
293 | has_stch, |
294 | } |
295 | } |
296 | |
297 | fn arabic_joining(buffer: &mut hb_buffer_t) { |
298 | let mut prev: Option<usize> = None; |
299 | let mut state = 0; |
300 | |
301 | // Check pre-context. |
302 | for i in 0..buffer.context_len[0] { |
303 | let c = buffer.context[0][i]; |
304 | let this_type = get_joining_type(c, c.general_category()); |
305 | if this_type == hb_arabic_joining_type_t::T { |
306 | continue; |
307 | } |
308 | |
309 | state = STATE_TABLE[state][this_type as usize].2 as usize; |
310 | break; |
311 | } |
312 | |
313 | for i in 0..buffer.len { |
314 | let this_type = get_joining_type( |
315 | buffer.info[i].as_char(), |
316 | _hb_glyph_info_get_general_category(&buffer.info[i]), |
317 | ); |
318 | if this_type == hb_arabic_joining_type_t::T { |
319 | buffer.info[i].set_arabic_shaping_action(arabic_action_t::NONE); |
320 | continue; |
321 | } |
322 | |
323 | let entry = &STATE_TABLE[state][this_type as usize]; |
324 | if entry.0 != arabic_action_t::NONE && prev.is_some() { |
325 | if let Some(prev) = prev { |
326 | buffer.info[prev].set_arabic_shaping_action(entry.0); |
327 | buffer.safe_to_insert_tatweel(Some(prev), Some(i + 1)); |
328 | } |
329 | } |
330 | // States that have a possible prev_action. |
331 | else { |
332 | if let Some(prev) = prev { |
333 | if this_type >= hb_arabic_joining_type_t::R || (2 <= state && state <= 5) { |
334 | buffer.unsafe_to_concat(Some(prev), Some(i + 1)); |
335 | } |
336 | } else { |
337 | if this_type >= hb_arabic_joining_type_t::R { |
338 | buffer.unsafe_to_concat_from_outbuffer(Some(0), Some(i + 1)); |
339 | } |
340 | } |
341 | } |
342 | |
343 | buffer.info[i].set_arabic_shaping_action(entry.1); |
344 | |
345 | prev = Some(i); |
346 | state = entry.2 as usize; |
347 | } |
348 | |
349 | for i in 0..buffer.context_len[1] { |
350 | let c = buffer.context[1][i]; |
351 | let this_type = get_joining_type(c, c.general_category()); |
352 | if this_type == hb_arabic_joining_type_t::T { |
353 | continue; |
354 | } |
355 | |
356 | let entry = &STATE_TABLE[state][this_type as usize]; |
357 | if entry.0 != arabic_action_t::NONE && prev.is_some() { |
358 | if let Some(prev) = prev { |
359 | buffer.info[prev].set_arabic_shaping_action(entry.0); |
360 | buffer.safe_to_insert_tatweel(Some(prev), Some(buffer.len)); |
361 | } |
362 | } |
363 | // States that have a possible prev_action. |
364 | else if 2 <= state && state <= 5 { |
365 | if let Some(prev) = prev { |
366 | buffer.unsafe_to_concat(Some(prev), Some(buffer.len)); |
367 | } |
368 | } |
369 | |
370 | break; |
371 | } |
372 | } |
373 | |
374 | fn mongolian_variation_selectors(buffer: &mut hb_buffer_t) { |
375 | // Copy arabic_shaping_action() from base to Mongolian variation selectors. |
376 | let len: usize = buffer.len; |
377 | let info: &mut Vec = &mut buffer.info; |
378 | for i: usize in 1..len { |
379 | if (0x180B..=0x180D).contains(&info[i].glyph_id) || info[i].glyph_id == 0x180F { |
380 | let a: u8 = info[i - 1].arabic_shaping_action(); |
381 | info[i].set_arabic_shaping_action(a); |
382 | } |
383 | } |
384 | } |
385 | |
386 | fn setup_masks_arabic_plan(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) { |
387 | let arabic_plan: &arabic_shape_plan_t = plan.data::<arabic_shape_plan_t>(); |
388 | setup_masks_inner(arabic_plan, plan.script, buffer) |
389 | } |
390 | |
391 | pub fn setup_masks_inner( |
392 | arabic_plan: &arabic_shape_plan_t, |
393 | script: Option<Script>, |
394 | buffer: &mut hb_buffer_t, |
395 | ) { |
396 | arabic_joining(buffer); |
397 | if script == Some(script::MONGOLIAN) { |
398 | mongolian_variation_selectors(buffer); |
399 | } |
400 | |
401 | for info: &mut hb_glyph_info_t in buffer.info_slice_mut() { |
402 | info.mask |= arabic_plan.mask_array[info.arabic_shaping_action() as usize]; |
403 | } |
404 | } |
405 | |
406 | fn arabic_fallback_shape(_: &hb_ot_shape_plan_t, _: &hb_font_t, _: &mut hb_buffer_t) -> bool { |
407 | false |
408 | } |
409 | |
410 | // Stretch feature: "stch". |
411 | // See example here: |
412 | // https://docs.microsoft.com/en-us/typography/script-development/syriac |
413 | // We implement this in a generic way, such that the Arabic subtending |
414 | // marks can use it as well. |
415 | fn record_stch(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool { |
416 | let arabic_plan = plan.data::<arabic_shape_plan_t>(); |
417 | if !arabic_plan.has_stch { |
418 | return false; |
419 | } |
420 | |
421 | // 'stch' feature was just applied. Look for anything that multiplied, |
422 | // and record it for stch treatment later. Note that rtlm, frac, etc |
423 | // are applied before stch, but we assume that they didn't result in |
424 | // anything multiplying into 5 pieces, so it's safe-ish... |
425 | |
426 | let len = buffer.len; |
427 | let info = &mut buffer.info; |
428 | let mut has_stch = false; |
429 | for glyph_info in &mut info[..len] { |
430 | if _hb_glyph_info_multiplied(glyph_info) { |
431 | let comp = if _hb_glyph_info_get_lig_comp(glyph_info) % 2 != 0 { |
432 | arabic_action_t::STRETCHING_REPEATING |
433 | } else { |
434 | arabic_action_t::STRETCHING_FIXED |
435 | }; |
436 | |
437 | glyph_info.set_arabic_shaping_action(comp); |
438 | has_stch = true; |
439 | } |
440 | } |
441 | |
442 | if has_stch { |
443 | buffer.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH; |
444 | } |
445 | |
446 | false |
447 | } |
448 | |
449 | fn apply_stch(face: &hb_font_t, buffer: &mut hb_buffer_t) { |
450 | if buffer.scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH == 0 { |
451 | return; |
452 | } |
453 | |
454 | let rtl = buffer.direction == Direction::RightToLeft; |
455 | |
456 | if !rtl { |
457 | buffer.reverse(); |
458 | } |
459 | |
460 | // We do a two pass implementation: |
461 | // First pass calculates the exact number of extra glyphs we need, |
462 | // We then enlarge buffer to have that much room, |
463 | // Second pass applies the stretch, copying things to the end of buffer. |
464 | |
465 | let mut extra_glyphs_needed: usize = 0; // Set during MEASURE, used during CUT |
466 | const MEASURE: usize = 0; |
467 | const CUT: usize = 1; |
468 | |
469 | for step in 0..2 { |
470 | let new_len = buffer.len + extra_glyphs_needed; // write head during CUT |
471 | let mut i = buffer.len; |
472 | let mut j = new_len; |
473 | while i != 0 { |
474 | if !arabic_action_t::is_stch(buffer.info[i - 1].arabic_shaping_action()) { |
475 | if step == CUT { |
476 | j -= 1; |
477 | buffer.info[j] = buffer.info[i - 1]; |
478 | buffer.pos[j] = buffer.pos[i - 1]; |
479 | } |
480 | |
481 | i -= 1; |
482 | continue; |
483 | } |
484 | |
485 | // Yay, justification! |
486 | |
487 | let mut w_total = 0; // Total to be filled |
488 | let mut w_fixed = 0; // Sum of fixed tiles |
489 | let mut w_repeating = 0; // Sum of repeating tiles |
490 | let mut n_repeating: i32 = 0; |
491 | |
492 | let end = i; |
493 | while i != 0 && arabic_action_t::is_stch(buffer.info[i - 1].arabic_shaping_action()) { |
494 | i -= 1; |
495 | let width = face.glyph_h_advance(buffer.info[i].as_glyph()); |
496 | |
497 | if buffer.info[i].arabic_shaping_action() == arabic_action_t::STRETCHING_FIXED { |
498 | w_fixed += width; |
499 | } else { |
500 | w_repeating += width; |
501 | n_repeating += 1; |
502 | } |
503 | } |
504 | |
505 | let start = i; |
506 | let mut context = i; |
507 | while context != 0 |
508 | && !arabic_action_t::is_stch(buffer.info[context - 1].arabic_shaping_action()) |
509 | && (_hb_glyph_info_is_default_ignorable(&buffer.info[context - 1]) |
510 | || is_word_category(_hb_glyph_info_get_general_category( |
511 | &buffer.info[context - 1], |
512 | ))) |
513 | { |
514 | context -= 1; |
515 | w_total += buffer.pos[context].x_advance; |
516 | } |
517 | |
518 | i += 1; // Don't touch i again. |
519 | |
520 | // Number of additional times to repeat each repeating tile. |
521 | let mut n_copies: i32 = 0; |
522 | |
523 | let mut w_remaining = w_total - w_fixed; |
524 | if w_remaining > w_repeating && w_repeating > 0 { |
525 | n_copies = w_remaining / (w_repeating) - 1; |
526 | } |
527 | |
528 | // See if we can improve the fit by adding an extra repeat and squeezing them together a bit. |
529 | let mut extra_repeat_overlap = 0; |
530 | let shortfall = w_remaining - w_repeating * (n_copies + 1); |
531 | if shortfall > 0 && n_repeating > 0 { |
532 | n_copies += 1; |
533 | let excess = (n_copies + 1) * w_repeating - w_remaining; |
534 | if excess > 0 { |
535 | extra_repeat_overlap = excess / (n_copies * n_repeating); |
536 | w_remaining = 0; |
537 | } |
538 | } |
539 | |
540 | if step == MEASURE { |
541 | extra_glyphs_needed += (n_copies * n_repeating) as usize; |
542 | } else { |
543 | buffer.unsafe_to_break(Some(context), Some(end)); |
544 | let mut x_offset = w_remaining / 2; |
545 | for k in (start + 1..=end).rev() { |
546 | let width = face.glyph_h_advance(buffer.info[k - 1].as_glyph()); |
547 | |
548 | let mut repeat = 1; |
549 | if buffer.info[k - 1].arabic_shaping_action() |
550 | == arabic_action_t::STRETCHING_REPEATING |
551 | { |
552 | repeat += n_copies; |
553 | } |
554 | |
555 | buffer.pos[k - 1].x_advance = 0; |
556 | |
557 | for n in 0..repeat { |
558 | if rtl { |
559 | x_offset -= width; |
560 | if n > 0 { |
561 | x_offset += extra_repeat_overlap; |
562 | } |
563 | } |
564 | |
565 | buffer.pos[k - 1].x_offset = x_offset; |
566 | |
567 | // Append copy. |
568 | j -= 1; |
569 | buffer.info[j] = buffer.info[k - 1]; |
570 | buffer.pos[j] = buffer.pos[k - 1]; |
571 | |
572 | if !rtl { |
573 | x_offset += width; |
574 | |
575 | if n > 0 { |
576 | x_offset -= extra_repeat_overlap; |
577 | } |
578 | } |
579 | } |
580 | } |
581 | } |
582 | |
583 | i -= 1; |
584 | } |
585 | |
586 | if step == MEASURE { |
587 | if !buffer.ensure(buffer.len + extra_glyphs_needed) { |
588 | break; |
589 | } |
590 | } else { |
591 | debug_assert_eq!(j, 0); |
592 | buffer.len = new_len; |
593 | } |
594 | } |
595 | |
596 | if !rtl { |
597 | buffer.reverse(); |
598 | } |
599 | } |
600 | |
601 | fn postprocess_glyphs_arabic(_: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) { |
602 | apply_stch(face, buffer) |
603 | } |
604 | |
605 | // http://www.unicode.org/reports/tr53/ |
606 | const MODIFIER_COMBINING_MARKS: &[u32] = &[ |
607 | 0x0654, // ARABIC HAMZA ABOVE |
608 | 0x0655, // ARABIC HAMZA BELOW |
609 | 0x0658, // ARABIC MARK NOON GHUNNA |
610 | 0x06DC, // ARABIC SMALL HIGH SEEN |
611 | 0x06E3, // ARABIC SMALL LOW SEEN |
612 | 0x06E7, // ARABIC SMALL HIGH YEH |
613 | 0x06E8, // ARABIC SMALL HIGH NOON |
614 | 0x08CA, // ARABIC SMALL HIGH FARSI YEH |
615 | 0x08CB, // ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW |
616 | 0x08CD, // ARABIC SMALL HIGH ZAH |
617 | 0x08CE, // ARABIC LARGE ROUND DOT ABOVE |
618 | 0x08CF, // ARABIC LARGE ROUND DOT BELOW |
619 | 0x08D3, // ARABIC SMALL LOW WAW |
620 | 0x08F3, // ARABIC SMALL HIGH WAW |
621 | ]; |
622 | |
623 | fn reorder_marks_arabic( |
624 | _: &hb_ot_shape_plan_t, |
625 | buffer: &mut hb_buffer_t, |
626 | mut start: usize, |
627 | end: usize, |
628 | ) { |
629 | let mut i = start; |
630 | for cc in [220u8, 230].iter().cloned() { |
631 | while i < end && _hb_glyph_info_get_modified_combining_class(&buffer.info[i]) < cc { |
632 | i += 1; |
633 | } |
634 | |
635 | if i == end { |
636 | break; |
637 | } |
638 | |
639 | if _hb_glyph_info_get_modified_combining_class(&buffer.info[i]) > cc { |
640 | continue; |
641 | } |
642 | |
643 | let mut j = i; |
644 | while j < end |
645 | && _hb_glyph_info_get_modified_combining_class(&buffer.info[j]) == cc |
646 | && MODIFIER_COMBINING_MARKS.contains(&buffer.info[j].glyph_id) |
647 | { |
648 | j += 1; |
649 | } |
650 | |
651 | if i == j { |
652 | continue; |
653 | } |
654 | |
655 | // Shift it! |
656 | let mut temp = [hb_glyph_info_t::default(); MAX_COMBINING_MARKS]; |
657 | debug_assert!(j - i <= MAX_COMBINING_MARKS); |
658 | buffer.merge_clusters(start, j); |
659 | |
660 | temp[..j - i].copy_from_slice(&buffer.info[i..j]); |
661 | |
662 | for k in (0..i - start).rev() { |
663 | buffer.info[k + start + j - i] = buffer.info[k + start]; |
664 | } |
665 | |
666 | buffer.info[start..][..j - i].copy_from_slice(&temp[..j - i]); |
667 | |
668 | // Renumber CC such that the reordered sequence is still sorted. |
669 | // 22 and 26 are chosen because they are smaller than all Arabic categories, |
670 | // and are folded back to 220/230 respectively during fallback mark positioning. |
671 | // |
672 | // We do this because the CGJ-handling logic in the normalizer relies on |
673 | // mark sequences having an increasing order even after this reordering. |
674 | // https://github.com/harfbuzz/harfbuzz/issues/554 |
675 | // This, however, does break some obscure sequences, where the normalizer |
676 | // might compose a sequence that it should not. For example, in the seequence |
677 | // ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this |
678 | // renumbering, we will. |
679 | let new_start = start + j - i; |
680 | let new_cc = if cc == 220 { |
681 | modified_combining_class::CCC22 |
682 | } else { |
683 | modified_combining_class::CCC26 |
684 | }; |
685 | |
686 | while start < new_start { |
687 | _hb_glyph_info_set_modified_combining_class(&mut buffer.info[start], new_cc); |
688 | start += 1; |
689 | } |
690 | |
691 | i = j; |
692 | } |
693 | } |
694 | |
695 | pub const ARABIC_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t { |
696 | collect_features: Some(collect_features), |
697 | override_features: None, |
698 | create_data: Some(|plan: &hb_ot_shape_plan_t| Box::new(data_create_arabic(plan))), |
699 | preprocess_text: None, |
700 | postprocess_glyphs: Some(postprocess_glyphs_arabic), |
701 | normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, |
702 | decompose: None, |
703 | compose: None, |
704 | setup_masks: Some(setup_masks_arabic_plan), |
705 | gpos_tag: None, |
706 | reorder_marks: Some(reorder_marks_arabic), |
707 | zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, |
708 | fallback_position: true, |
709 | }; |
710 | |