| 1 | use crate::Direction; |
| 2 | use alloc::boxed::Box; |
| 3 | |
| 4 | use super::algs::*; |
| 5 | use super::buffer::*; |
| 6 | use super::ot_layout::*; |
| 7 | use super::ot_map::*; |
| 8 | use super::ot_shape::*; |
| 9 | use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_AUTO; |
| 10 | use super::ot_shape_plan::hb_ot_shape_plan_t; |
| 11 | use super::ot_shaper::*; |
| 12 | use super::unicode::*; |
| 13 | use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script}; |
| 14 | |
| 15 | const HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH: hb_buffer_scratch_flags_t = |
| 16 | HB_BUFFER_SCRATCH_FLAG_SHAPER0; |
| 17 | |
| 18 | // See: |
| 19 | // https://github.com/harfbuzz/harfbuzz/commit/6e6f82b6f3dde0fc6c3c7d991d9ec6cfff57823d#commitcomment-14248516 |
| 20 | fn is_word_category(gc: hb_unicode_general_category_t) -> bool { |
| 21 | (rb_flag_unsafe(gc.to_rb()) |
| 22 | & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_UNASSIGNED) |
| 23 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE) |
| 24 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) |
| 25 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) |
| 26 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
| 27 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
| 28 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) |
| 29 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) |
| 30 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER) |
| 31 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER) |
| 32 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL) |
| 33 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL) |
| 34 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL) |
| 35 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL))) |
| 36 | != 0 |
| 37 | } |
| 38 | |
| 39 | #[derive (Clone, Copy, PartialEq, PartialOrd, Debug)] |
| 40 | pub enum hb_arabic_joining_type_t { |
| 41 | U = 0, |
| 42 | L = 1, |
| 43 | R = 2, |
| 44 | D = 3, |
| 45 | // We don't have C, like harfbuzz, because Rust doesn't allow duplicated enum variants. |
| 46 | GroupAlaph = 4, |
| 47 | GroupDalathRish = 5, |
| 48 | T = 7, |
| 49 | X = 8, // means: use general-category to choose between U or T. |
| 50 | } |
| 51 | |
| 52 | fn get_joining_type(u: char, gc: hb_unicode_general_category_t) -> hb_arabic_joining_type_t { |
| 53 | let j_type: hb_arabic_joining_type_t = super::ot_shaper_arabic_table::joining_type(u); |
| 54 | if j_type != hb_arabic_joining_type_t::X { |
| 55 | return j_type; |
| 56 | } |
| 57 | |
| 58 | let ok: u32 = rb_flag_unsafe(gc.to_rb()) |
| 59 | & (rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) |
| 60 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
| 61 | | rb_flag(hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT)); |
| 62 | |
| 63 | if ok != 0 { |
| 64 | hb_arabic_joining_type_t::T |
| 65 | } else { |
| 66 | hb_arabic_joining_type_t::U |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | fn feature_is_syriac(tag: hb_tag_t) -> bool { |
| 71 | matches!(tag.to_bytes()[3], b'2' | b'3' ) |
| 72 | } |
| 73 | |
| 74 | const ARABIC_FEATURES: &[hb_tag_t] = &[ |
| 75 | hb_tag_t::from_bytes(b"isol" ), |
| 76 | hb_tag_t::from_bytes(b"fina" ), |
| 77 | hb_tag_t::from_bytes(b"fin2" ), |
| 78 | hb_tag_t::from_bytes(b"fin3" ), |
| 79 | hb_tag_t::from_bytes(b"medi" ), |
| 80 | hb_tag_t::from_bytes(b"med2" ), |
| 81 | hb_tag_t::from_bytes(b"init" ), |
| 82 | ]; |
| 83 | |
| 84 | mod arabic_action_t { |
| 85 | pub const ISOL: u8 = 0; |
| 86 | pub const FINA: u8 = 1; |
| 87 | pub const FIN2: u8 = 2; |
| 88 | pub const FIN3: u8 = 3; |
| 89 | pub const MEDI: u8 = 4; |
| 90 | pub const MED2: u8 = 5; |
| 91 | pub const INIT: u8 = 6; |
| 92 | pub const NONE: u8 = 7; |
| 93 | |
| 94 | // We abuse the same byte for other things... |
| 95 | pub const STRETCHING_FIXED: u8 = 8; |
| 96 | pub const STRETCHING_REPEATING: u8 = 9; |
| 97 | |
| 98 | #[inline ] |
| 99 | pub fn is_stch(n: u8) -> bool { |
| 100 | matches!(n, STRETCHING_FIXED | STRETCHING_REPEATING) |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | const STATE_TABLE: &[[(u8, u8, u16); 6]] = &[ |
| 105 | // jt_U, jt_L, jt_R, |
| 106 | // jt_D, jg_ALAPH, jg_DALATH_RISH |
| 107 | |
| 108 | // State 0: prev was U, not willing to join. |
| 109 | [ |
| 110 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
| 111 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 112 | (arabic_action_t::NONE, arabic_action_t::ISOL, 1), |
| 113 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 114 | (arabic_action_t::NONE, arabic_action_t::ISOL, 1), |
| 115 | (arabic_action_t::NONE, arabic_action_t::ISOL, 6), |
| 116 | ], |
| 117 | // State 1: prev was R or action::ISOL/ALAPH, not willing to join. |
| 118 | [ |
| 119 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
| 120 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 121 | (arabic_action_t::NONE, arabic_action_t::ISOL, 1), |
| 122 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 123 | (arabic_action_t::NONE, arabic_action_t::FIN2, 5), |
| 124 | (arabic_action_t::NONE, arabic_action_t::ISOL, 6), |
| 125 | ], |
| 126 | // State 2: prev was D/L in action::ISOL form, willing to join. |
| 127 | [ |
| 128 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
| 129 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 130 | (arabic_action_t::INIT, arabic_action_t::FINA, 1), |
| 131 | (arabic_action_t::INIT, arabic_action_t::FINA, 3), |
| 132 | (arabic_action_t::INIT, arabic_action_t::FINA, 4), |
| 133 | (arabic_action_t::INIT, arabic_action_t::FINA, 6), |
| 134 | ], |
| 135 | // State 3: prev was D in action::FINA form, willing to join. |
| 136 | [ |
| 137 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
| 138 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 139 | (arabic_action_t::MEDI, arabic_action_t::FINA, 1), |
| 140 | (arabic_action_t::MEDI, arabic_action_t::FINA, 3), |
| 141 | (arabic_action_t::MEDI, arabic_action_t::FINA, 4), |
| 142 | (arabic_action_t::MEDI, arabic_action_t::FINA, 6), |
| 143 | ], |
| 144 | // State 4: prev was action::FINA ALAPH, not willing to join. |
| 145 | [ |
| 146 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
| 147 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 148 | (arabic_action_t::MED2, arabic_action_t::ISOL, 1), |
| 149 | (arabic_action_t::MED2, arabic_action_t::ISOL, 2), |
| 150 | (arabic_action_t::MED2, arabic_action_t::FIN2, 5), |
| 151 | (arabic_action_t::MED2, arabic_action_t::ISOL, 6), |
| 152 | ], |
| 153 | // State 5: prev was FIN2/FIN3 ALAPH, not willing to join. |
| 154 | [ |
| 155 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
| 156 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 157 | (arabic_action_t::ISOL, arabic_action_t::ISOL, 1), |
| 158 | (arabic_action_t::ISOL, arabic_action_t::ISOL, 2), |
| 159 | (arabic_action_t::ISOL, arabic_action_t::FIN2, 5), |
| 160 | (arabic_action_t::ISOL, arabic_action_t::ISOL, 6), |
| 161 | ], |
| 162 | // State 6: prev was DALATH/RISH, not willing to join. |
| 163 | [ |
| 164 | (arabic_action_t::NONE, arabic_action_t::NONE, 0), |
| 165 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 166 | (arabic_action_t::NONE, arabic_action_t::ISOL, 1), |
| 167 | (arabic_action_t::NONE, arabic_action_t::ISOL, 2), |
| 168 | (arabic_action_t::NONE, arabic_action_t::FIN3, 5), |
| 169 | (arabic_action_t::NONE, arabic_action_t::ISOL, 6), |
| 170 | ], |
| 171 | ]; |
| 172 | |
| 173 | impl hb_glyph_info_t { |
| 174 | fn arabic_shaping_action(&self) -> u8 { |
| 175 | self.ot_shaper_var_u8_auxiliary() |
| 176 | } |
| 177 | |
| 178 | fn set_arabic_shaping_action(&mut self, action: u8) { |
| 179 | self.set_ot_shaper_var_u8_auxiliary(action) |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | fn collect_features(planner: &mut hb_ot_shape_planner_t) { |
| 184 | // We apply features according to the Arabic spec, with pauses |
| 185 | // in between most. |
| 186 | // |
| 187 | // The pause between init/medi/... and rlig is required. See eg: |
| 188 | // https://bugzilla.mozilla.org/show_bug.cgi?id=644184 |
| 189 | // |
| 190 | // The pauses between init/medi/... themselves are not necessarily |
| 191 | // needed as only one of those features is applied to any character. |
| 192 | // The only difference it makes is when fonts have contextual |
| 193 | // substitutions. We now follow the order of the spec, which makes |
| 194 | // for better experience if that's what Uniscribe is doing. |
| 195 | // |
| 196 | // At least for Arabic, looks like Uniscribe has a pause between |
| 197 | // rlig and calt. Otherwise the IranNastaliq's ALLAH ligature won't |
| 198 | // work. However, testing shows that rlig and calt are applied |
| 199 | // together for Mongolian in Uniscribe. As such, we only add a |
| 200 | // pause for Arabic, not other scripts. |
| 201 | // |
| 202 | // A pause after calt is required to make KFGQPC Uthmanic Script HAFS |
| 203 | // work correctly. See https://github.com/harfbuzz/harfbuzz/issues/505 |
| 204 | |
| 205 | planner |
| 206 | .ot_map |
| 207 | .enable_feature(hb_tag_t::from_bytes(b"stch" ), F_NONE, 1); |
| 208 | planner.ot_map.add_gsub_pause(Some(record_stch)); |
| 209 | |
| 210 | planner |
| 211 | .ot_map |
| 212 | .enable_feature(hb_tag_t::from_bytes(b"ccmp" ), F_MANUAL_ZWJ, 1); |
| 213 | planner |
| 214 | .ot_map |
| 215 | .enable_feature(hb_tag_t::from_bytes(b"locl" ), F_MANUAL_ZWJ, 1); |
| 216 | |
| 217 | planner.ot_map.add_gsub_pause(None); |
| 218 | |
| 219 | for feature in ARABIC_FEATURES { |
| 220 | let has_fallback = planner.script == Some(script::ARABIC) && !feature_is_syriac(*feature); |
| 221 | let flags = if has_fallback { F_HAS_FALLBACK } else { F_NONE }; |
| 222 | planner |
| 223 | .ot_map |
| 224 | .add_feature(*feature, F_MANUAL_ZWJ | flags, 1); |
| 225 | planner.ot_map.add_gsub_pause(None); |
| 226 | } |
| 227 | |
| 228 | // Normally, Unicode says a ZWNJ means "don't ligate". In Arabic script |
| 229 | // however, it says a ZWJ should also mean "don't ligate". So we run |
| 230 | // the main ligating features as MANUAL_ZWJ. |
| 231 | |
| 232 | planner.ot_map.enable_feature( |
| 233 | hb_tag_t::from_bytes(b"rlig" ), |
| 234 | F_MANUAL_ZWJ | F_HAS_FALLBACK, |
| 235 | 1, |
| 236 | ); |
| 237 | |
| 238 | if planner.script == Some(script::ARABIC) { |
| 239 | planner.ot_map.add_gsub_pause(Some(arabic_fallback_shape)); |
| 240 | } |
| 241 | |
| 242 | // No pause after rclt. |
| 243 | // See 98460779bae19e4d64d29461ff154b3527bf8420 |
| 244 | planner |
| 245 | .ot_map |
| 246 | .enable_feature(hb_tag_t::from_bytes(b"calt" ), F_MANUAL_ZWJ, 1); |
| 247 | /* https://github.com/harfbuzz/harfbuzz/issues/1573 */ |
| 248 | if !planner.ot_map.has_feature(hb_tag_t::from_bytes(b"rclt" )) { |
| 249 | planner.ot_map.add_gsub_pause(None); |
| 250 | } |
| 251 | |
| 252 | planner |
| 253 | .ot_map |
| 254 | .enable_feature(hb_tag_t::from_bytes(b"liga" ), F_MANUAL_ZWJ, 1); |
| 255 | planner |
| 256 | .ot_map |
| 257 | .enable_feature(hb_tag_t::from_bytes(b"clig" ), F_MANUAL_ZWJ, 1); |
| 258 | |
| 259 | // The spec includes 'cswh'. Earlier versions of Windows |
| 260 | // used to enable this by default, but testing suggests |
| 261 | // that Windows 8 and later do not enable it by default, |
| 262 | // and spec now says 'Off by default'. |
| 263 | // We disabled this in ae23c24c32. |
| 264 | // Note that IranNastaliq uses this feature extensively |
| 265 | // to fixup broken glyph sequences. Oh well... |
| 266 | // Test case: U+0643,U+0640,U+0631. |
| 267 | |
| 268 | // planner.ot_map.enable_feature(feature::CONTEXTUAL_SWASH, F_MANUAL_ZWJ, 1); |
| 269 | planner |
| 270 | .ot_map |
| 271 | .enable_feature(hb_tag_t::from_bytes(b"mset" ), F_MANUAL_ZWJ, 1); |
| 272 | } |
| 273 | |
| 274 | pub struct arabic_shape_plan_t { |
| 275 | // The "+ 1" in the next array is to accommodate for the "NONE" command, |
| 276 | // which is not an OpenType feature, but this simplifies the code by not |
| 277 | // having to do a "if (... < NONE) ..." and just rely on the fact that |
| 278 | // mask_array[NONE] == 0. |
| 279 | mask_array: [hb_mask_t; ARABIC_FEATURES.len() + 1], |
| 280 | has_stch: bool, |
| 281 | } |
| 282 | |
| 283 | pub fn data_create_arabic(plan: &hb_ot_shape_plan_t) -> arabic_shape_plan_t { |
| 284 | let has_stch: bool = plan.ot_map.get_1_mask(feature_tag:hb_tag_t::from_bytes(b"stch" )) != 0; |
| 285 | |
| 286 | let mut mask_array: [u32; 8] = [0; ARABIC_FEATURES.len() + 1]; |
| 287 | for i: usize in 0..ARABIC_FEATURES.len() { |
| 288 | mask_array[i] = plan.ot_map.get_1_mask(ARABIC_FEATURES[i]); |
| 289 | } |
| 290 | |
| 291 | arabic_shape_plan_t { |
| 292 | mask_array, |
| 293 | has_stch, |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | fn arabic_joining(buffer: &mut hb_buffer_t) { |
| 298 | let mut prev: Option<usize> = None; |
| 299 | let mut state = 0; |
| 300 | |
| 301 | // Check pre-context. |
| 302 | for i in 0..buffer.context_len[0] { |
| 303 | let c = buffer.context[0][i]; |
| 304 | let this_type = get_joining_type(c, c.general_category()); |
| 305 | if this_type == hb_arabic_joining_type_t::T { |
| 306 | continue; |
| 307 | } |
| 308 | |
| 309 | state = STATE_TABLE[state][this_type as usize].2 as usize; |
| 310 | break; |
| 311 | } |
| 312 | |
| 313 | for i in 0..buffer.len { |
| 314 | let this_type = get_joining_type( |
| 315 | buffer.info[i].as_char(), |
| 316 | _hb_glyph_info_get_general_category(&buffer.info[i]), |
| 317 | ); |
| 318 | if this_type == hb_arabic_joining_type_t::T { |
| 319 | buffer.info[i].set_arabic_shaping_action(arabic_action_t::NONE); |
| 320 | continue; |
| 321 | } |
| 322 | |
| 323 | let entry = &STATE_TABLE[state][this_type as usize]; |
| 324 | if entry.0 != arabic_action_t::NONE && prev.is_some() { |
| 325 | if let Some(prev) = prev { |
| 326 | buffer.info[prev].set_arabic_shaping_action(entry.0); |
| 327 | buffer.safe_to_insert_tatweel(Some(prev), Some(i + 1)); |
| 328 | } |
| 329 | } |
| 330 | // States that have a possible prev_action. |
| 331 | else { |
| 332 | if let Some(prev) = prev { |
| 333 | if this_type >= hb_arabic_joining_type_t::R || (2 <= state && state <= 5) { |
| 334 | buffer.unsafe_to_concat(Some(prev), Some(i + 1)); |
| 335 | } |
| 336 | } else { |
| 337 | if this_type >= hb_arabic_joining_type_t::R { |
| 338 | buffer.unsafe_to_concat_from_outbuffer(Some(0), Some(i + 1)); |
| 339 | } |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | buffer.info[i].set_arabic_shaping_action(entry.1); |
| 344 | |
| 345 | prev = Some(i); |
| 346 | state = entry.2 as usize; |
| 347 | } |
| 348 | |
| 349 | for i in 0..buffer.context_len[1] { |
| 350 | let c = buffer.context[1][i]; |
| 351 | let this_type = get_joining_type(c, c.general_category()); |
| 352 | if this_type == hb_arabic_joining_type_t::T { |
| 353 | continue; |
| 354 | } |
| 355 | |
| 356 | let entry = &STATE_TABLE[state][this_type as usize]; |
| 357 | if entry.0 != arabic_action_t::NONE && prev.is_some() { |
| 358 | if let Some(prev) = prev { |
| 359 | buffer.info[prev].set_arabic_shaping_action(entry.0); |
| 360 | buffer.safe_to_insert_tatweel(Some(prev), Some(buffer.len)); |
| 361 | } |
| 362 | } |
| 363 | // States that have a possible prev_action. |
| 364 | else if 2 <= state && state <= 5 { |
| 365 | if let Some(prev) = prev { |
| 366 | buffer.unsafe_to_concat(Some(prev), Some(buffer.len)); |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | break; |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | fn mongolian_variation_selectors(buffer: &mut hb_buffer_t) { |
| 375 | // Copy arabic_shaping_action() from base to Mongolian variation selectors. |
| 376 | let len: usize = buffer.len; |
| 377 | let info: &mut Vec = &mut buffer.info; |
| 378 | for i: usize in 1..len { |
| 379 | if (0x180B..=0x180D).contains(&info[i].glyph_id) || info[i].glyph_id == 0x180F { |
| 380 | let a: u8 = info[i - 1].arabic_shaping_action(); |
| 381 | info[i].set_arabic_shaping_action(a); |
| 382 | } |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | fn setup_masks_arabic_plan(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) { |
| 387 | let arabic_plan: &arabic_shape_plan_t = plan.data::<arabic_shape_plan_t>(); |
| 388 | setup_masks_inner(arabic_plan, plan.script, buffer) |
| 389 | } |
| 390 | |
| 391 | pub fn setup_masks_inner( |
| 392 | arabic_plan: &arabic_shape_plan_t, |
| 393 | script: Option<Script>, |
| 394 | buffer: &mut hb_buffer_t, |
| 395 | ) { |
| 396 | arabic_joining(buffer); |
| 397 | if script == Some(script::MONGOLIAN) { |
| 398 | mongolian_variation_selectors(buffer); |
| 399 | } |
| 400 | |
| 401 | for info: &mut hb_glyph_info_t in buffer.info_slice_mut() { |
| 402 | info.mask |= arabic_plan.mask_array[info.arabic_shaping_action() as usize]; |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | fn arabic_fallback_shape(_: &hb_ot_shape_plan_t, _: &hb_font_t, _: &mut hb_buffer_t) -> bool { |
| 407 | false |
| 408 | } |
| 409 | |
| 410 | // Stretch feature: "stch". |
| 411 | // See example here: |
| 412 | // https://docs.microsoft.com/en-us/typography/script-development/syriac |
| 413 | // We implement this in a generic way, such that the Arabic subtending |
| 414 | // marks can use it as well. |
| 415 | fn record_stch(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool { |
| 416 | let arabic_plan = plan.data::<arabic_shape_plan_t>(); |
| 417 | if !arabic_plan.has_stch { |
| 418 | return false; |
| 419 | } |
| 420 | |
| 421 | // 'stch' feature was just applied. Look for anything that multiplied, |
| 422 | // and record it for stch treatment later. Note that rtlm, frac, etc |
| 423 | // are applied before stch, but we assume that they didn't result in |
| 424 | // anything multiplying into 5 pieces, so it's safe-ish... |
| 425 | |
| 426 | let len = buffer.len; |
| 427 | let info = &mut buffer.info; |
| 428 | let mut has_stch = false; |
| 429 | for glyph_info in &mut info[..len] { |
| 430 | if _hb_glyph_info_multiplied(glyph_info) { |
| 431 | let comp = if _hb_glyph_info_get_lig_comp(glyph_info) % 2 != 0 { |
| 432 | arabic_action_t::STRETCHING_REPEATING |
| 433 | } else { |
| 434 | arabic_action_t::STRETCHING_FIXED |
| 435 | }; |
| 436 | |
| 437 | glyph_info.set_arabic_shaping_action(comp); |
| 438 | has_stch = true; |
| 439 | } |
| 440 | } |
| 441 | |
| 442 | if has_stch { |
| 443 | buffer.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH; |
| 444 | } |
| 445 | |
| 446 | false |
| 447 | } |
| 448 | |
| 449 | fn apply_stch(face: &hb_font_t, buffer: &mut hb_buffer_t) { |
| 450 | if buffer.scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH == 0 { |
| 451 | return; |
| 452 | } |
| 453 | |
| 454 | let rtl = buffer.direction == Direction::RightToLeft; |
| 455 | |
| 456 | if !rtl { |
| 457 | buffer.reverse(); |
| 458 | } |
| 459 | |
| 460 | // We do a two pass implementation: |
| 461 | // First pass calculates the exact number of extra glyphs we need, |
| 462 | // We then enlarge buffer to have that much room, |
| 463 | // Second pass applies the stretch, copying things to the end of buffer. |
| 464 | |
| 465 | let mut extra_glyphs_needed: usize = 0; // Set during MEASURE, used during CUT |
| 466 | const MEASURE: usize = 0; |
| 467 | const CUT: usize = 1; |
| 468 | |
| 469 | for step in 0..2 { |
| 470 | let new_len = buffer.len + extra_glyphs_needed; // write head during CUT |
| 471 | let mut i = buffer.len; |
| 472 | let mut j = new_len; |
| 473 | while i != 0 { |
| 474 | if !arabic_action_t::is_stch(buffer.info[i - 1].arabic_shaping_action()) { |
| 475 | if step == CUT { |
| 476 | j -= 1; |
| 477 | buffer.info[j] = buffer.info[i - 1]; |
| 478 | buffer.pos[j] = buffer.pos[i - 1]; |
| 479 | } |
| 480 | |
| 481 | i -= 1; |
| 482 | continue; |
| 483 | } |
| 484 | |
| 485 | // Yay, justification! |
| 486 | |
| 487 | let mut w_total = 0; // Total to be filled |
| 488 | let mut w_fixed = 0; // Sum of fixed tiles |
| 489 | let mut w_repeating = 0; // Sum of repeating tiles |
| 490 | let mut n_repeating: i32 = 0; |
| 491 | |
| 492 | let end = i; |
| 493 | while i != 0 && arabic_action_t::is_stch(buffer.info[i - 1].arabic_shaping_action()) { |
| 494 | i -= 1; |
| 495 | let width = face.glyph_h_advance(buffer.info[i].as_glyph()); |
| 496 | |
| 497 | if buffer.info[i].arabic_shaping_action() == arabic_action_t::STRETCHING_FIXED { |
| 498 | w_fixed += width; |
| 499 | } else { |
| 500 | w_repeating += width; |
| 501 | n_repeating += 1; |
| 502 | } |
| 503 | } |
| 504 | |
| 505 | let start = i; |
| 506 | let mut context = i; |
| 507 | while context != 0 |
| 508 | && !arabic_action_t::is_stch(buffer.info[context - 1].arabic_shaping_action()) |
| 509 | && (_hb_glyph_info_is_default_ignorable(&buffer.info[context - 1]) |
| 510 | || is_word_category(_hb_glyph_info_get_general_category( |
| 511 | &buffer.info[context - 1], |
| 512 | ))) |
| 513 | { |
| 514 | context -= 1; |
| 515 | w_total += buffer.pos[context].x_advance; |
| 516 | } |
| 517 | |
| 518 | i += 1; // Don't touch i again. |
| 519 | |
| 520 | // Number of additional times to repeat each repeating tile. |
| 521 | let mut n_copies: i32 = 0; |
| 522 | |
| 523 | let mut w_remaining = w_total - w_fixed; |
| 524 | if w_remaining > w_repeating && w_repeating > 0 { |
| 525 | n_copies = w_remaining / (w_repeating) - 1; |
| 526 | } |
| 527 | |
| 528 | // See if we can improve the fit by adding an extra repeat and squeezing them together a bit. |
| 529 | let mut extra_repeat_overlap = 0; |
| 530 | let shortfall = w_remaining - w_repeating * (n_copies + 1); |
| 531 | if shortfall > 0 && n_repeating > 0 { |
| 532 | n_copies += 1; |
| 533 | let excess = (n_copies + 1) * w_repeating - w_remaining; |
| 534 | if excess > 0 { |
| 535 | extra_repeat_overlap = excess / (n_copies * n_repeating); |
| 536 | w_remaining = 0; |
| 537 | } |
| 538 | } |
| 539 | |
| 540 | if step == MEASURE { |
| 541 | extra_glyphs_needed += (n_copies * n_repeating) as usize; |
| 542 | } else { |
| 543 | buffer.unsafe_to_break(Some(context), Some(end)); |
| 544 | let mut x_offset = w_remaining / 2; |
| 545 | for k in (start + 1..=end).rev() { |
| 546 | let width = face.glyph_h_advance(buffer.info[k - 1].as_glyph()); |
| 547 | |
| 548 | let mut repeat = 1; |
| 549 | if buffer.info[k - 1].arabic_shaping_action() |
| 550 | == arabic_action_t::STRETCHING_REPEATING |
| 551 | { |
| 552 | repeat += n_copies; |
| 553 | } |
| 554 | |
| 555 | buffer.pos[k - 1].x_advance = 0; |
| 556 | |
| 557 | for n in 0..repeat { |
| 558 | if rtl { |
| 559 | x_offset -= width; |
| 560 | if n > 0 { |
| 561 | x_offset += extra_repeat_overlap; |
| 562 | } |
| 563 | } |
| 564 | |
| 565 | buffer.pos[k - 1].x_offset = x_offset; |
| 566 | |
| 567 | // Append copy. |
| 568 | j -= 1; |
| 569 | buffer.info[j] = buffer.info[k - 1]; |
| 570 | buffer.pos[j] = buffer.pos[k - 1]; |
| 571 | |
| 572 | if !rtl { |
| 573 | x_offset += width; |
| 574 | |
| 575 | if n > 0 { |
| 576 | x_offset -= extra_repeat_overlap; |
| 577 | } |
| 578 | } |
| 579 | } |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | i -= 1; |
| 584 | } |
| 585 | |
| 586 | if step == MEASURE { |
| 587 | if !buffer.ensure(buffer.len + extra_glyphs_needed) { |
| 588 | break; |
| 589 | } |
| 590 | } else { |
| 591 | debug_assert_eq!(j, 0); |
| 592 | buffer.len = new_len; |
| 593 | } |
| 594 | } |
| 595 | |
| 596 | if !rtl { |
| 597 | buffer.reverse(); |
| 598 | } |
| 599 | } |
| 600 | |
| 601 | fn postprocess_glyphs_arabic(_: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) { |
| 602 | apply_stch(face, buffer) |
| 603 | } |
| 604 | |
| 605 | // http://www.unicode.org/reports/tr53/ |
| 606 | const MODIFIER_COMBINING_MARKS: &[u32] = &[ |
| 607 | 0x0654, // ARABIC HAMZA ABOVE |
| 608 | 0x0655, // ARABIC HAMZA BELOW |
| 609 | 0x0658, // ARABIC MARK NOON GHUNNA |
| 610 | 0x06DC, // ARABIC SMALL HIGH SEEN |
| 611 | 0x06E3, // ARABIC SMALL LOW SEEN |
| 612 | 0x06E7, // ARABIC SMALL HIGH YEH |
| 613 | 0x06E8, // ARABIC SMALL HIGH NOON |
| 614 | 0x08CA, // ARABIC SMALL HIGH FARSI YEH |
| 615 | 0x08CB, // ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW |
| 616 | 0x08CD, // ARABIC SMALL HIGH ZAH |
| 617 | 0x08CE, // ARABIC LARGE ROUND DOT ABOVE |
| 618 | 0x08CF, // ARABIC LARGE ROUND DOT BELOW |
| 619 | 0x08D3, // ARABIC SMALL LOW WAW |
| 620 | 0x08F3, // ARABIC SMALL HIGH WAW |
| 621 | ]; |
| 622 | |
| 623 | fn reorder_marks_arabic( |
| 624 | _: &hb_ot_shape_plan_t, |
| 625 | buffer: &mut hb_buffer_t, |
| 626 | mut start: usize, |
| 627 | end: usize, |
| 628 | ) { |
| 629 | let mut i = start; |
| 630 | for cc in [220u8, 230].iter().cloned() { |
| 631 | while i < end && _hb_glyph_info_get_modified_combining_class(&buffer.info[i]) < cc { |
| 632 | i += 1; |
| 633 | } |
| 634 | |
| 635 | if i == end { |
| 636 | break; |
| 637 | } |
| 638 | |
| 639 | if _hb_glyph_info_get_modified_combining_class(&buffer.info[i]) > cc { |
| 640 | continue; |
| 641 | } |
| 642 | |
| 643 | let mut j = i; |
| 644 | while j < end |
| 645 | && _hb_glyph_info_get_modified_combining_class(&buffer.info[j]) == cc |
| 646 | && MODIFIER_COMBINING_MARKS.contains(&buffer.info[j].glyph_id) |
| 647 | { |
| 648 | j += 1; |
| 649 | } |
| 650 | |
| 651 | if i == j { |
| 652 | continue; |
| 653 | } |
| 654 | |
| 655 | // Shift it! |
| 656 | let mut temp = [hb_glyph_info_t::default(); MAX_COMBINING_MARKS]; |
| 657 | debug_assert!(j - i <= MAX_COMBINING_MARKS); |
| 658 | buffer.merge_clusters(start, j); |
| 659 | |
| 660 | temp[..j - i].copy_from_slice(&buffer.info[i..j]); |
| 661 | |
| 662 | for k in (0..i - start).rev() { |
| 663 | buffer.info[k + start + j - i] = buffer.info[k + start]; |
| 664 | } |
| 665 | |
| 666 | buffer.info[start..][..j - i].copy_from_slice(&temp[..j - i]); |
| 667 | |
| 668 | // Renumber CC such that the reordered sequence is still sorted. |
| 669 | // 22 and 26 are chosen because they are smaller than all Arabic categories, |
| 670 | // and are folded back to 220/230 respectively during fallback mark positioning. |
| 671 | // |
| 672 | // We do this because the CGJ-handling logic in the normalizer relies on |
| 673 | // mark sequences having an increasing order even after this reordering. |
| 674 | // https://github.com/harfbuzz/harfbuzz/issues/554 |
| 675 | // This, however, does break some obscure sequences, where the normalizer |
| 676 | // might compose a sequence that it should not. For example, in the seequence |
| 677 | // ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this |
| 678 | // renumbering, we will. |
| 679 | let new_start = start + j - i; |
| 680 | let new_cc = if cc == 220 { |
| 681 | modified_combining_class::CCC22 |
| 682 | } else { |
| 683 | modified_combining_class::CCC26 |
| 684 | }; |
| 685 | |
| 686 | while start < new_start { |
| 687 | _hb_glyph_info_set_modified_combining_class(&mut buffer.info[start], new_cc); |
| 688 | start += 1; |
| 689 | } |
| 690 | |
| 691 | i = j; |
| 692 | } |
| 693 | } |
| 694 | |
| 695 | pub const ARABIC_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t { |
| 696 | collect_features: Some(collect_features), |
| 697 | override_features: None, |
| 698 | create_data: Some(|plan: &hb_ot_shape_plan_t| Box::new(data_create_arabic(plan))), |
| 699 | preprocess_text: None, |
| 700 | postprocess_glyphs: Some(postprocess_glyphs_arabic), |
| 701 | normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, |
| 702 | decompose: None, |
| 703 | compose: None, |
| 704 | setup_masks: Some(setup_masks_arabic_plan), |
| 705 | gpos_tag: None, |
| 706 | reorder_marks: Some(reorder_marks_arabic), |
| 707 | zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, |
| 708 | fallback_position: true, |
| 709 | }; |
| 710 | |