| 1 | use alloc::boxed::Box; |
| 2 | use core::cmp; |
| 3 | use core::convert::TryFrom; |
| 4 | use core::ops::Range; |
| 5 | |
| 6 | use ttf_parser::GlyphId; |
| 7 | |
| 8 | use super::algs::*; |
| 9 | use super::buffer::hb_buffer_t; |
| 10 | use super::ot_layout::*; |
| 11 | use super::ot_layout_gsubgpos::{WouldApply, WouldApplyContext}; |
| 12 | use super::ot_map::*; |
| 13 | use super::ot_shape::*; |
| 14 | use super::ot_shape_normalize::*; |
| 15 | use super::ot_shape_plan::hb_ot_shape_plan_t; |
| 16 | use super::ot_shaper::*; |
| 17 | use super::unicode::{hb_gc, CharExt, GeneralCategoryExt}; |
| 18 | use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script}; |
| 19 | |
| 20 | pub const INDIC_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t { |
| 21 | collect_features: Some(collect_features), |
| 22 | override_features: Some(override_features), |
| 23 | create_data: Some(|plan: &hb_ot_shape_plan_t| Box::new(IndicShapePlan::new(plan))), |
| 24 | preprocess_text: Some(preprocess_text), |
| 25 | postprocess_glyphs: None, |
| 26 | normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, |
| 27 | decompose: Some(decompose), |
| 28 | compose: Some(compose), |
| 29 | setup_masks: Some(setup_masks), |
| 30 | gpos_tag: None, |
| 31 | reorder_marks: None, |
| 32 | zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, |
| 33 | fallback_position: false, |
| 34 | }; |
| 35 | |
| 36 | pub type Category = u8; |
| 37 | |
| 38 | // This mod doesn't exist in harfbuzz anymore. Instead, the corresponding values are auto-generated |
| 39 | // by the various machines and stored in `hb-ot-shaper-indic-table`. This means that when updating the |
| 40 | // values in the machines, we also need to update them here. |
| 41 | #[allow (dead_code)] |
| 42 | pub mod ot_category_t { |
| 43 | pub const OT_X: u8 = 0; |
| 44 | pub const OT_C: u8 = 1; |
| 45 | pub const OT_V: u8 = 2; |
| 46 | pub const OT_N: u8 = 3; |
| 47 | pub const OT_H: u8 = 4; |
| 48 | pub const OT_ZWNJ: u8 = 5; |
| 49 | pub const OT_ZWJ: u8 = 6; |
| 50 | pub const OT_M: u8 = 7; |
| 51 | pub const OT_SM: u8 = 8; |
| 52 | pub const OT_A: u8 = 9; |
| 53 | pub const OT_VD: u8 = OT_A; |
| 54 | pub const OT_PLACEHOLDER: u8 = 10; |
| 55 | pub const OT_GB: u8 = OT_PLACEHOLDER; |
| 56 | pub const OT_DOTTEDCIRCLE: u8 = 11; |
| 57 | pub const OT_RS: u8 = 12; // Register Shifter, used in Khmer OT spec. |
| 58 | pub const OT_MPst: u8 = 13; |
| 59 | pub const OT_Repha: u8 = 14; // Atomically-encoded logical or visual repha. |
| 60 | pub const OT_Ra: u8 = 15; |
| 61 | pub const OT_CM: u8 = 16; // Consonant-Medial. |
| 62 | pub const OT_Symbol: u8 = 17; // Avagraha, etc that take marks (SM,A,VD). |
| 63 | pub const OT_CS: u8 = 18; |
| 64 | |
| 65 | /* Khmer & Myanmar shapers. */ |
| 66 | pub const OT_VAbv: u8 = 20; |
| 67 | pub const OT_VBlw: u8 = 21; |
| 68 | pub const OT_VPre: u8 = 22; |
| 69 | pub const OT_VPst: u8 = 23; |
| 70 | |
| 71 | /* Khmer. */ |
| 72 | pub const OT_Robatic: u8 = 25; |
| 73 | pub const OT_Xgroup: u8 = 26; |
| 74 | pub const OT_Ygroup: u8 = 27; |
| 75 | |
| 76 | /* Myanmar */ |
| 77 | pub const OT_As: u8 = 32; // Asat |
| 78 | pub const OT_MH: u8 = 35; // Medial |
| 79 | pub const OT_MR: u8 = 36; // Medial |
| 80 | pub const OT_MW: u8 = 37; // Medial |
| 81 | pub const OT_MY: u8 = 38; // Medial |
| 82 | pub const OT_PT: u8 = 39; // Pwo and other tones |
| 83 | pub const OT_VS: u8 = 40; // Variation selectors |
| 84 | pub const OT_ML: u8 = 41; // Consonant medials |
| 85 | |
| 86 | // This one doesn't exist in ot_category_t in harfbuzz, only in |
| 87 | // the Myanmar machine. However, in Rust we unfortunately can't export |
| 88 | // inside the Ragel file, so we have to define it here as well. Needs to |
| 89 | // be kept in sync with the value in the machine. |
| 90 | pub const IV: u8 = 2; |
| 91 | } |
| 92 | |
| 93 | pub type Position = u8; |
| 94 | pub mod ot_position_t { |
| 95 | pub const POS_START: u8 = 0; |
| 96 | |
| 97 | pub const POS_RA_TO_BECOME_REPH: u8 = 1; |
| 98 | pub const POS_PRE_M: u8 = 2; |
| 99 | pub const POS_PRE_C: u8 = 3; |
| 100 | |
| 101 | pub const POS_BASE_C: u8 = 4; |
| 102 | pub const POS_AFTER_MAIN: u8 = 5; |
| 103 | |
| 104 | pub const POS_ABOVE_C: u8 = 6; |
| 105 | |
| 106 | pub const POS_BEFORE_SUB: u8 = 7; |
| 107 | pub const POS_BELOW_C: u8 = 8; |
| 108 | pub const POS_AFTER_SUB: u8 = 9; |
| 109 | |
| 110 | pub const POS_BEFORE_POST: u8 = 10; |
| 111 | pub const POS_POST_C: u8 = 11; |
| 112 | pub const POS_AFTER_POST: u8 = 12; |
| 113 | |
| 114 | pub const POS_SMVD: u8 = 13; |
| 115 | |
| 116 | pub const POS_END: u8 = 14; |
| 117 | } |
| 118 | |
| 119 | const INDIC_FEATURES: &[(hb_tag_t, hb_ot_map_feature_flags_t)] = &[ |
| 120 | // Basic features. |
| 121 | // These features are applied in order, one at a time, after initial_reordering, |
| 122 | // constrained to the syllable. |
| 123 | ( |
| 124 | hb_tag_t::from_bytes(b"nukt" ), |
| 125 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 126 | ), |
| 127 | ( |
| 128 | hb_tag_t::from_bytes(b"akhn" ), |
| 129 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 130 | ), |
| 131 | ( |
| 132 | hb_tag_t::from_bytes(b"rphf" ), |
| 133 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 134 | ), |
| 135 | ( |
| 136 | hb_tag_t::from_bytes(b"rkrf" ), |
| 137 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 138 | ), |
| 139 | ( |
| 140 | hb_tag_t::from_bytes(b"pref" ), |
| 141 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 142 | ), |
| 143 | ( |
| 144 | hb_tag_t::from_bytes(b"blwf" ), |
| 145 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 146 | ), |
| 147 | ( |
| 148 | hb_tag_t::from_bytes(b"abvf" ), |
| 149 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 150 | ), |
| 151 | ( |
| 152 | hb_tag_t::from_bytes(b"half" ), |
| 153 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 154 | ), |
| 155 | ( |
| 156 | hb_tag_t::from_bytes(b"pstf" ), |
| 157 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 158 | ), |
| 159 | ( |
| 160 | hb_tag_t::from_bytes(b"vatu" ), |
| 161 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 162 | ), |
| 163 | ( |
| 164 | hb_tag_t::from_bytes(b"cjct" ), |
| 165 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 166 | ), |
| 167 | // Other features. |
| 168 | // These features are applied all at once, after final_reordering, constrained |
| 169 | // to the syllable. |
| 170 | // Default Bengali font in Windows for example has intermixed |
| 171 | // lookups for init,pres,abvs,blws features. |
| 172 | ( |
| 173 | hb_tag_t::from_bytes(b"init" ), |
| 174 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 175 | ), |
| 176 | ( |
| 177 | hb_tag_t::from_bytes(b"pres" ), |
| 178 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 179 | ), |
| 180 | ( |
| 181 | hb_tag_t::from_bytes(b"abvs" ), |
| 182 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 183 | ), |
| 184 | ( |
| 185 | hb_tag_t::from_bytes(b"blws" ), |
| 186 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 187 | ), |
| 188 | ( |
| 189 | hb_tag_t::from_bytes(b"psts" ), |
| 190 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 191 | ), |
| 192 | ( |
| 193 | hb_tag_t::from_bytes(b"haln" ), |
| 194 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
| 195 | ), |
| 196 | ]; |
| 197 | |
| 198 | // Must be in the same order as the INDIC_FEATURES array. |
| 199 | #[allow (dead_code)] |
| 200 | mod indic_feature { |
| 201 | pub const NUKT: usize = 0; |
| 202 | pub const AKHN: usize = 1; |
| 203 | pub const RPHF: usize = 2; |
| 204 | pub const RKRF: usize = 3; |
| 205 | pub const PREF: usize = 4; |
| 206 | pub const BLWF: usize = 5; |
| 207 | pub const ABVF: usize = 6; |
| 208 | pub const HALF: usize = 7; |
| 209 | pub const PSTF: usize = 8; |
| 210 | pub const VATU: usize = 9; |
| 211 | pub const CJCT: usize = 10; |
| 212 | pub const INIT: usize = 11; |
| 213 | pub const PRES: usize = 12; |
| 214 | pub const ABVS: usize = 13; |
| 215 | pub const BLWS: usize = 14; |
| 216 | pub const PSTS: usize = 15; |
| 217 | pub const HALN: usize = 16; |
| 218 | } |
| 219 | |
| 220 | pub(crate) const fn category_flag(c: Category) -> u32 { |
| 221 | rb_flag(c as u32) |
| 222 | } |
| 223 | |
| 224 | // Note: |
| 225 | // |
| 226 | // We treat Vowels and placeholders as if they were consonants. This is safe because Vowels |
| 227 | // cannot happen in a consonant syllable. The plus side however is, we can call the |
| 228 | // consonant syllable logic from the vowel syllable function and get it all right! |
| 229 | const CONSONANT_FLAGS_INDIC: u32 = category_flag(ot_category_t::OT_C) |
| 230 | | category_flag(ot_category_t::OT_CS) |
| 231 | | category_flag(ot_category_t::OT_Ra) |
| 232 | | category_flag(ot_category_t::OT_CM) |
| 233 | | category_flag(ot_category_t::OT_V) |
| 234 | | category_flag(ot_category_t::OT_PLACEHOLDER) |
| 235 | | category_flag(ot_category_t::OT_DOTTEDCIRCLE); |
| 236 | |
| 237 | const CONSONANT_FLAGS_MYANMAR: u32 = category_flag(ot_category_t::OT_C) |
| 238 | | category_flag(ot_category_t::OT_CS) |
| 239 | | category_flag(ot_category_t::OT_Ra) |
| 240 | // | category_flag(ot_category_t::OT_CM) |
| 241 | | category_flag(ot_category_t::IV) |
| 242 | | category_flag(ot_category_t::OT_GB) |
| 243 | | category_flag(ot_category_t::OT_DOTTEDCIRCLE); |
| 244 | |
| 245 | const JOINER_FLAGS: u32 = |
| 246 | category_flag(ot_category_t::OT_ZWJ) | category_flag(ot_category_t::OT_ZWNJ); |
| 247 | |
| 248 | #[derive (Clone, Copy, PartialEq)] |
| 249 | enum RephPosition { |
| 250 | AfterMain = ot_position_t::POS_AFTER_MAIN as isize, |
| 251 | BeforeSub = ot_position_t::POS_BEFORE_SUB as isize, |
| 252 | AfterSub = ot_position_t::POS_AFTER_SUB as isize, |
| 253 | BeforePost = ot_position_t::POS_BEFORE_POST as isize, |
| 254 | AfterPost = ot_position_t::POS_AFTER_POST as isize, |
| 255 | } |
| 256 | |
| 257 | #[derive (Clone, Copy, PartialEq)] |
| 258 | enum RephMode { |
| 259 | /// Reph formed out of initial Ra,H sequence. |
| 260 | Implicit, |
| 261 | /// Reph formed out of initial Ra,H,ZWJ sequence. |
| 262 | Explicit, |
| 263 | /// Encoded Repha character, needs reordering. |
| 264 | LogRepha, |
| 265 | } |
| 266 | |
| 267 | #[derive (Clone, Copy, PartialEq)] |
| 268 | enum BlwfMode { |
| 269 | /// Below-forms feature applied to pre-base and post-base. |
| 270 | PreAndPost, |
| 271 | /// Below-forms feature applied to post-base only. |
| 272 | PostOnly, |
| 273 | } |
| 274 | |
| 275 | #[derive (Clone, Copy)] |
| 276 | struct IndicConfig { |
| 277 | script: Option<Script>, |
| 278 | has_old_spec: bool, |
| 279 | virama: u32, |
| 280 | reph_pos: RephPosition, |
| 281 | reph_mode: RephMode, |
| 282 | blwf_mode: BlwfMode, |
| 283 | } |
| 284 | |
| 285 | impl IndicConfig { |
| 286 | const fn new( |
| 287 | script: Option<Script>, |
| 288 | has_old_spec: bool, |
| 289 | virama: u32, |
| 290 | reph_pos: RephPosition, |
| 291 | reph_mode: RephMode, |
| 292 | blwf_mode: BlwfMode, |
| 293 | ) -> Self { |
| 294 | IndicConfig { |
| 295 | script, |
| 296 | has_old_spec, |
| 297 | virama, |
| 298 | reph_pos, |
| 299 | reph_mode, |
| 300 | blwf_mode, |
| 301 | } |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | const INDIC_CONFIGS: &[IndicConfig] = &[ |
| 306 | IndicConfig::new( |
| 307 | script:None, |
| 308 | has_old_spec:false, |
| 309 | virama:0, |
| 310 | reph_pos:RephPosition::BeforePost, |
| 311 | RephMode::Implicit, |
| 312 | BlwfMode::PreAndPost, |
| 313 | ), |
| 314 | IndicConfig::new( |
| 315 | script:Some(script::DEVANAGARI), |
| 316 | has_old_spec:true, |
| 317 | virama:0x094D, |
| 318 | reph_pos:RephPosition::BeforePost, |
| 319 | RephMode::Implicit, |
| 320 | BlwfMode::PreAndPost, |
| 321 | ), |
| 322 | IndicConfig::new( |
| 323 | script:Some(script::BENGALI), |
| 324 | has_old_spec:true, |
| 325 | virama:0x09CD, |
| 326 | reph_pos:RephPosition::AfterSub, |
| 327 | RephMode::Implicit, |
| 328 | BlwfMode::PreAndPost, |
| 329 | ), |
| 330 | IndicConfig::new( |
| 331 | script:Some(script::GURMUKHI), |
| 332 | has_old_spec:true, |
| 333 | virama:0x0A4D, |
| 334 | reph_pos:RephPosition::BeforeSub, |
| 335 | RephMode::Implicit, |
| 336 | BlwfMode::PreAndPost, |
| 337 | ), |
| 338 | IndicConfig::new( |
| 339 | script:Some(script::GUJARATI), |
| 340 | has_old_spec:true, |
| 341 | virama:0x0ACD, |
| 342 | reph_pos:RephPosition::BeforePost, |
| 343 | RephMode::Implicit, |
| 344 | BlwfMode::PreAndPost, |
| 345 | ), |
| 346 | IndicConfig::new( |
| 347 | script:Some(script::ORIYA), |
| 348 | has_old_spec:true, |
| 349 | virama:0x0B4D, |
| 350 | reph_pos:RephPosition::AfterMain, |
| 351 | RephMode::Implicit, |
| 352 | BlwfMode::PreAndPost, |
| 353 | ), |
| 354 | IndicConfig::new( |
| 355 | script:Some(script::TAMIL), |
| 356 | has_old_spec:true, |
| 357 | virama:0x0BCD, |
| 358 | reph_pos:RephPosition::AfterPost, |
| 359 | RephMode::Implicit, |
| 360 | BlwfMode::PreAndPost, |
| 361 | ), |
| 362 | IndicConfig::new( |
| 363 | script:Some(script::TELUGU), |
| 364 | has_old_spec:true, |
| 365 | virama:0x0C4D, |
| 366 | reph_pos:RephPosition::AfterPost, |
| 367 | RephMode::Explicit, |
| 368 | BlwfMode::PostOnly, |
| 369 | ), |
| 370 | IndicConfig::new( |
| 371 | script:Some(script::KANNADA), |
| 372 | has_old_spec:true, |
| 373 | virama:0x0CCD, |
| 374 | reph_pos:RephPosition::AfterPost, |
| 375 | RephMode::Implicit, |
| 376 | BlwfMode::PostOnly, |
| 377 | ), |
| 378 | IndicConfig::new( |
| 379 | script:Some(script::MALAYALAM), |
| 380 | has_old_spec:true, |
| 381 | virama:0x0D4D, |
| 382 | reph_pos:RephPosition::AfterMain, |
| 383 | RephMode::LogRepha, |
| 384 | BlwfMode::PreAndPost, |
| 385 | ), |
| 386 | IndicConfig::new( |
| 387 | script:Some(script::SINHALA), |
| 388 | has_old_spec:false, |
| 389 | virama:0x0DCA, |
| 390 | reph_pos:RephPosition::AfterPost, |
| 391 | RephMode::Explicit, |
| 392 | BlwfMode::PreAndPost, |
| 393 | ), |
| 394 | ]; |
| 395 | |
| 396 | struct IndicWouldSubstituteFeature { |
| 397 | lookups: Range<usize>, |
| 398 | zero_context: bool, |
| 399 | } |
| 400 | |
| 401 | impl IndicWouldSubstituteFeature { |
| 402 | pub fn new(map: &hb_ot_map_t, feature_tag: hb_tag_t, zero_context: bool) -> Self { |
| 403 | IndicWouldSubstituteFeature { |
| 404 | lookups: match map.get_feature_stage(TableIndex::GSUB, feature_tag) { |
| 405 | Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage), |
| 406 | None => 0..0, |
| 407 | }, |
| 408 | zero_context, |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | pub fn would_substitute( |
| 413 | &self, |
| 414 | map: &hb_ot_map_t, |
| 415 | face: &hb_font_t, |
| 416 | glyphs: &[GlyphId], |
| 417 | ) -> bool { |
| 418 | for index in self.lookups.clone() { |
| 419 | let lookup = map.lookup(TableIndex::GSUB, index); |
| 420 | let ctx = WouldApplyContext { |
| 421 | glyphs, |
| 422 | zero_context: self.zero_context, |
| 423 | }; |
| 424 | if face |
| 425 | .gsub |
| 426 | .as_ref() |
| 427 | .and_then(|table| table.get_lookup(lookup.index)) |
| 428 | .map_or(false, |lookup| lookup.would_apply(&ctx)) |
| 429 | { |
| 430 | return true; |
| 431 | } |
| 432 | } |
| 433 | |
| 434 | false |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | struct IndicShapePlan { |
| 439 | config: IndicConfig, |
| 440 | is_old_spec: bool, |
| 441 | // virama_glyph: Option<u32>, |
| 442 | rphf: IndicWouldSubstituteFeature, |
| 443 | pref: IndicWouldSubstituteFeature, |
| 444 | blwf: IndicWouldSubstituteFeature, |
| 445 | pstf: IndicWouldSubstituteFeature, |
| 446 | vatu: IndicWouldSubstituteFeature, |
| 447 | mask_array: [hb_mask_t; INDIC_FEATURES.len()], |
| 448 | } |
| 449 | |
| 450 | impl IndicShapePlan { |
| 451 | fn new(plan: &hb_ot_shape_plan_t) -> Self { |
| 452 | let script = plan.script; |
| 453 | let config = if let Some(c) = INDIC_CONFIGS.iter().skip(1).find(|c| c.script == script) { |
| 454 | *c |
| 455 | } else { |
| 456 | INDIC_CONFIGS[0] |
| 457 | }; |
| 458 | |
| 459 | let is_old_spec = config.has_old_spec |
| 460 | && plan |
| 461 | .ot_map |
| 462 | .chosen_script(TableIndex::GSUB) |
| 463 | .map_or(true, |tag| tag.to_bytes()[3] != b'2' ); |
| 464 | |
| 465 | // Use zero-context would_substitute() matching for new-spec of the main |
| 466 | // Indic scripts, and scripts with one spec only, but not for old-specs. |
| 467 | // The new-spec for all dual-spec scripts says zero-context matching happens. |
| 468 | // |
| 469 | // However, testing with Malayalam shows that old and new spec both allow |
| 470 | // context. Testing with Bengali new-spec however shows that it doesn't. |
| 471 | // So, the heuristic here is the way it is. It should *only* be changed, |
| 472 | // as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE. |
| 473 | let zero_context = is_old_spec && script != Some(script::MALAYALAM); |
| 474 | |
| 475 | let mut mask_array = [0; INDIC_FEATURES.len()]; |
| 476 | for (i, feature) in INDIC_FEATURES.iter().enumerate() { |
| 477 | mask_array[i] = if feature.1 & F_GLOBAL != 0 { |
| 478 | 0 |
| 479 | } else { |
| 480 | plan.ot_map.get_1_mask(feature.0) |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | // TODO: what is this? |
| 485 | // let mut virama_glyph = None; |
| 486 | // if config.virama != 0 { |
| 487 | // if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) { |
| 488 | // virama_glyph = Some(g.0 as u32); |
| 489 | // } |
| 490 | // } |
| 491 | |
| 492 | IndicShapePlan { |
| 493 | config, |
| 494 | is_old_spec, |
| 495 | // virama_glyph, |
| 496 | rphf: IndicWouldSubstituteFeature::new( |
| 497 | &plan.ot_map, |
| 498 | hb_tag_t::from_bytes(b"rphf" ), |
| 499 | zero_context, |
| 500 | ), |
| 501 | pref: IndicWouldSubstituteFeature::new( |
| 502 | &plan.ot_map, |
| 503 | hb_tag_t::from_bytes(b"pref" ), |
| 504 | zero_context, |
| 505 | ), |
| 506 | blwf: IndicWouldSubstituteFeature::new( |
| 507 | &plan.ot_map, |
| 508 | hb_tag_t::from_bytes(b"blwf" ), |
| 509 | zero_context, |
| 510 | ), |
| 511 | pstf: IndicWouldSubstituteFeature::new( |
| 512 | &plan.ot_map, |
| 513 | hb_tag_t::from_bytes(b"pstf" ), |
| 514 | zero_context, |
| 515 | ), |
| 516 | vatu: IndicWouldSubstituteFeature::new( |
| 517 | &plan.ot_map, |
| 518 | hb_tag_t::from_bytes(b"vatu" ), |
| 519 | zero_context, |
| 520 | ), |
| 521 | mask_array, |
| 522 | } |
| 523 | } |
| 524 | } |
| 525 | |
| 526 | impl hb_glyph_info_t { |
| 527 | pub(crate) fn indic_category(&self) -> Category { |
| 528 | self.ot_shaper_var_u8_category() |
| 529 | } |
| 530 | |
| 531 | pub(crate) fn myanmar_category(&self) -> Category { |
| 532 | self.ot_shaper_var_u8_category() |
| 533 | } |
| 534 | |
| 535 | pub(crate) fn khmer_category(&self) -> Category { |
| 536 | self.ot_shaper_var_u8_category() |
| 537 | } |
| 538 | |
| 539 | pub(crate) fn set_indic_category(&mut self, c: Category) { |
| 540 | self.set_ot_shaper_var_u8_category(c) |
| 541 | } |
| 542 | |
| 543 | pub(crate) fn set_myanmar_category(&mut self, c: Category) { |
| 544 | self.set_ot_shaper_var_u8_category(c) |
| 545 | } |
| 546 | |
| 547 | pub(crate) fn indic_position(&self) -> Position { |
| 548 | self.ot_shaper_var_u8_auxiliary() |
| 549 | } |
| 550 | |
| 551 | pub(crate) fn myanmar_position(&self) -> Position { |
| 552 | self.ot_shaper_var_u8_auxiliary() |
| 553 | } |
| 554 | |
| 555 | pub(crate) fn set_indic_position(&mut self, c: Position) { |
| 556 | self.set_ot_shaper_var_u8_auxiliary(c) |
| 557 | } |
| 558 | |
| 559 | pub(crate) fn set_myanmar_position(&mut self, c: Position) { |
| 560 | self.set_ot_shaper_var_u8_auxiliary(c) |
| 561 | } |
| 562 | |
| 563 | fn is_one_of(&self, flags: u32) -> bool { |
| 564 | // If it ligated, all bets are off. |
| 565 | if _hb_glyph_info_ligated(self) { |
| 566 | return false; |
| 567 | } |
| 568 | |
| 569 | rb_flag_unsafe(self.indic_category() as u32) & flags != 0 |
| 570 | } |
| 571 | |
| 572 | fn is_joiner(&self) -> bool { |
| 573 | self.is_one_of(JOINER_FLAGS) |
| 574 | } |
| 575 | |
| 576 | pub(crate) fn is_consonant(&self) -> bool { |
| 577 | self.is_one_of(CONSONANT_FLAGS_INDIC) |
| 578 | } |
| 579 | |
| 580 | pub(crate) fn is_consonant_myanmar(&self) -> bool { |
| 581 | self.is_one_of(CONSONANT_FLAGS_MYANMAR) |
| 582 | } |
| 583 | |
| 584 | fn is_halant(&self) -> bool { |
| 585 | self.is_one_of(rb_flag(ot_category_t::OT_H as u32)) |
| 586 | } |
| 587 | |
| 588 | fn set_indic_properties(&mut self) { |
| 589 | let u = self.glyph_id; |
| 590 | let (cat, pos) = crate::hb::ot_shaper_indic_table::get_categories(u); |
| 591 | |
| 592 | self.set_indic_category(cat); |
| 593 | self.set_indic_position(pos); |
| 594 | } |
| 595 | } |
| 596 | |
| 597 | fn collect_features(planner: &mut hb_ot_shape_planner_t) { |
| 598 | // Do this before any lookups have been applied. |
| 599 | planner.ot_map.add_gsub_pause(Some(setup_syllables)); |
| 600 | |
| 601 | planner |
| 602 | .ot_map |
| 603 | .enable_feature(hb_tag_t::from_bytes(b"locl" ), F_PER_SYLLABLE, 1); |
| 604 | // The Indic specs do not require ccmp, but we apply it here since if |
| 605 | // there is a use of it, it's typically at the beginning. |
| 606 | planner |
| 607 | .ot_map |
| 608 | .enable_feature(hb_tag_t::from_bytes(b"ccmp" ), F_PER_SYLLABLE, 1); |
| 609 | |
| 610 | planner.ot_map.add_gsub_pause(Some(initial_reordering)); |
| 611 | |
| 612 | for feature in INDIC_FEATURES.iter().take(11) { |
| 613 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
| 614 | planner.ot_map.add_gsub_pause(None); |
| 615 | } |
| 616 | |
| 617 | planner.ot_map.add_gsub_pause(Some(final_reordering)); |
| 618 | |
| 619 | for feature in INDIC_FEATURES.iter().skip(11) { |
| 620 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
| 621 | } |
| 622 | } |
| 623 | |
| 624 | fn override_features(planner: &mut hb_ot_shape_planner_t) { |
| 625 | planner |
| 626 | .ot_map |
| 627 | .disable_feature(tag:hb_tag_t::from_bytes(b"liga" )); |
| 628 | planner.ot_map.add_gsub_pause(Some(syllabic_clear_var)); // Don't need syllables anymore. |
| 629 | } |
| 630 | |
| 631 | fn preprocess_text(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) { |
| 632 | super::ot_shaper_vowel_constraints::preprocess_text_vowel_constraints(buffer); |
| 633 | } |
| 634 | |
| 635 | fn decompose(_: &hb_ot_shape_normalize_context_t, ab: char) -> Option<(char, char)> { |
| 636 | // Don't decompose these. |
| 637 | match ab { |
| 638 | ' \u{0931}' | // DEVANAGARI LETTER RRA |
| 639 | // https://github.com/harfbuzz/harfbuzz/issues/779 |
| 640 | ' \u{09DC}' | // BENGALI LETTER RRA |
| 641 | ' \u{09DD}' | // BENGALI LETTER RHA |
| 642 | ' \u{0B94}' => return None, // TAMIL LETTER AU |
| 643 | _ => {} |
| 644 | } |
| 645 | |
| 646 | crate::hb::unicode::decompose(ab) |
| 647 | } |
| 648 | |
| 649 | fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> { |
| 650 | // Avoid recomposing split matras. |
| 651 | if a.general_category().is_mark() { |
| 652 | return None; |
| 653 | } |
| 654 | |
| 655 | // Composition-exclusion exceptions that we want to recompose. |
| 656 | if a == ' \u{09AF}' && b == ' \u{09BC}' { |
| 657 | return Some(' \u{09DF}' ); |
| 658 | } |
| 659 | |
| 660 | crate::hb::unicode::compose(a, b) |
| 661 | } |
| 662 | |
| 663 | fn setup_masks(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) { |
| 664 | // We cannot setup masks here. We save information about characters |
| 665 | // and setup masks later on in a pause-callback. |
| 666 | for info: &mut hb_glyph_info_t in buffer.info_slice_mut() { |
| 667 | info.set_indic_properties(); |
| 668 | } |
| 669 | } |
| 670 | |
| 671 | fn setup_syllables(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool { |
| 672 | super::ot_shaper_indic_machine::find_syllables_indic(buffer); |
| 673 | |
| 674 | let mut start: usize = 0; |
| 675 | let mut end: usize = buffer.next_syllable(start:0); |
| 676 | while start < buffer.len { |
| 677 | buffer.unsafe_to_break(start:Some(start), end:Some(end)); |
| 678 | start = end; |
| 679 | end = buffer.next_syllable(start); |
| 680 | } |
| 681 | |
| 682 | false |
| 683 | } |
| 684 | |
| 685 | fn initial_reordering( |
| 686 | plan: &hb_ot_shape_plan_t, |
| 687 | face: &hb_font_t, |
| 688 | buffer: &mut hb_buffer_t, |
| 689 | ) -> bool { |
| 690 | use super::ot_shaper_indic_machine::SyllableType; |
| 691 | |
| 692 | let mut ret = false; |
| 693 | |
| 694 | let indic_plan = plan.data::<IndicShapePlan>(); |
| 695 | |
| 696 | update_consonant_positions(plan, indic_plan, face, buffer); |
| 697 | if super::ot_shaper_syllabic::insert_dotted_circles( |
| 698 | face, |
| 699 | buffer, |
| 700 | SyllableType::BrokenCluster as u8, |
| 701 | ot_category_t::OT_DOTTEDCIRCLE, |
| 702 | Some(ot_category_t::OT_Repha), |
| 703 | Some(ot_position_t::POS_END), |
| 704 | ) { |
| 705 | ret = true; |
| 706 | } |
| 707 | |
| 708 | let mut start = 0; |
| 709 | let mut end = buffer.next_syllable(0); |
| 710 | while start < buffer.len { |
| 711 | initial_reordering_syllable(plan, indic_plan, face, start, end, buffer); |
| 712 | start = end; |
| 713 | end = buffer.next_syllable(start); |
| 714 | } |
| 715 | |
| 716 | ret |
| 717 | } |
| 718 | |
| 719 | fn update_consonant_positions( |
| 720 | plan: &hb_ot_shape_plan_t, |
| 721 | indic_plan: &IndicShapePlan, |
| 722 | face: &hb_font_t, |
| 723 | buffer: &mut hb_buffer_t, |
| 724 | ) { |
| 725 | let mut virama_glyph: Option = None; |
| 726 | if indic_plan.config.virama != 0 { |
| 727 | virama_glyph = face.get_nominal_glyph(indic_plan.config.virama); |
| 728 | } |
| 729 | |
| 730 | if let Some(virama: GlyphId) = virama_glyph { |
| 731 | for info: &mut hb_glyph_info_t in buffer.info_slice_mut() { |
| 732 | if info.indic_position() == ot_position_t::POS_BASE_C { |
| 733 | let consonant: GlyphId = info.as_glyph(); |
| 734 | info.set_indic_position(consonant_position_from_face( |
| 735 | plan, indic_plan, face, consonant, virama, |
| 736 | )); |
| 737 | } |
| 738 | } |
| 739 | } |
| 740 | } |
| 741 | |
| 742 | fn consonant_position_from_face( |
| 743 | plan: &hb_ot_shape_plan_t, |
| 744 | indic_plan: &IndicShapePlan, |
| 745 | face: &hb_font_t, |
| 746 | consonant: GlyphId, |
| 747 | virama: GlyphId, |
| 748 | ) -> u8 { |
| 749 | // For old-spec, the order of glyphs is Consonant,Virama, |
| 750 | // whereas for new-spec, it's Virama,Consonant. However, |
| 751 | // some broken fonts (like Free Sans) simply copied lookups |
| 752 | // from old-spec to new-spec without modification. |
| 753 | // And oddly enough, Uniscribe seems to respect those lookups. |
| 754 | // Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds |
| 755 | // base at 0. The font however, only has lookups matching |
| 756 | // 930,94D in 'blwf', not the expected 94D,930 (with new-spec |
| 757 | // table). As such, we simply match both sequences. Seems |
| 758 | // to work. |
| 759 | // |
| 760 | // Vatu is done as well, for: |
| 761 | // https://github.com/harfbuzz/harfbuzz/issues/1587 |
| 762 | |
| 763 | if indic_plan |
| 764 | .blwf |
| 765 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
| 766 | || indic_plan |
| 767 | .blwf |
| 768 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
| 769 | || indic_plan |
| 770 | .vatu |
| 771 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
| 772 | || indic_plan |
| 773 | .vatu |
| 774 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
| 775 | { |
| 776 | return ot_position_t::POS_BELOW_C; |
| 777 | } |
| 778 | |
| 779 | if indic_plan |
| 780 | .pstf |
| 781 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
| 782 | || indic_plan |
| 783 | .pstf |
| 784 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
| 785 | { |
| 786 | return ot_position_t::POS_POST_C; |
| 787 | } |
| 788 | |
| 789 | if indic_plan |
| 790 | .pref |
| 791 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
| 792 | || indic_plan |
| 793 | .pref |
| 794 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
| 795 | { |
| 796 | return ot_position_t::POS_POST_C; |
| 797 | } |
| 798 | |
| 799 | ot_position_t::POS_BASE_C |
| 800 | } |
| 801 | |
| 802 | fn initial_reordering_syllable( |
| 803 | plan: &hb_ot_shape_plan_t, |
| 804 | indic_plan: &IndicShapePlan, |
| 805 | face: &hb_font_t, |
| 806 | start: usize, |
| 807 | end: usize, |
| 808 | buffer: &mut hb_buffer_t, |
| 809 | ) { |
| 810 | use super::ot_shaper_indic_machine::SyllableType; |
| 811 | |
| 812 | let syllable_type = match buffer.info[start].syllable() & 0x0F { |
| 813 | 0 => SyllableType::ConsonantSyllable, |
| 814 | 1 => SyllableType::VowelSyllable, |
| 815 | 2 => SyllableType::StandaloneCluster, |
| 816 | 3 => SyllableType::SymbolCluster, |
| 817 | 4 => SyllableType::BrokenCluster, |
| 818 | 5 => SyllableType::NonIndicCluster, |
| 819 | _ => unreachable!(), |
| 820 | }; |
| 821 | |
| 822 | match syllable_type { |
| 823 | // We made the vowels look like consonants. So let's call the consonant logic! |
| 824 | SyllableType::VowelSyllable | SyllableType::ConsonantSyllable => { |
| 825 | initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer); |
| 826 | } |
| 827 | // We already inserted dotted-circles, so just call the standalone_cluster. |
| 828 | SyllableType::BrokenCluster | SyllableType::StandaloneCluster => { |
| 829 | initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer); |
| 830 | } |
| 831 | SyllableType::SymbolCluster | SyllableType::NonIndicCluster => {} |
| 832 | } |
| 833 | } |
| 834 | |
| 835 | // Rules from: |
| 836 | // https://docs.microsqoft.com/en-us/typography/script-development/devanagari */ |
| 837 | fn initial_reordering_consonant_syllable( |
| 838 | plan: &hb_ot_shape_plan_t, |
| 839 | indic_plan: &IndicShapePlan, |
| 840 | face: &hb_font_t, |
| 841 | start: usize, |
| 842 | end: usize, |
| 843 | buffer: &mut hb_buffer_t, |
| 844 | ) { |
| 845 | // https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167 |
| 846 | // For compatibility with legacy usage in Kannada, |
| 847 | // Ra+h+ZWJ must behave like Ra+ZWJ+h... |
| 848 | if buffer.script == Some(script::KANNADA) |
| 849 | && start + 3 <= end |
| 850 | && buffer.info[start].is_one_of(category_flag(ot_category_t::OT_Ra)) |
| 851 | && buffer.info[start + 1].is_one_of(category_flag(ot_category_t::OT_H)) |
| 852 | && buffer.info[start + 2].is_one_of(category_flag(ot_category_t::OT_ZWJ)) |
| 853 | { |
| 854 | buffer.merge_clusters(start + 1, start + 3); |
| 855 | buffer.info.swap(start + 1, start + 2); |
| 856 | } |
| 857 | |
| 858 | // 1. Find base consonant: |
| 859 | // |
| 860 | // The shaping engine finds the base consonant of the syllable, using the |
| 861 | // following algorithm: starting from the end of the syllable, move backwards |
| 862 | // until a consonant is found that does not have a below-base or post-base |
| 863 | // form (post-base forms have to follow below-base forms), or that is not a |
| 864 | // pre-base-reordering Ra, or arrive at the first consonant. The consonant |
| 865 | // stopped at will be the base. |
| 866 | // |
| 867 | // - If the syllable starts with Ra + Halant (in a script that has Reph) |
| 868 | // and has more than one consonant, Ra is excluded from candidates for |
| 869 | // base consonants. |
| 870 | |
| 871 | let mut base = end; |
| 872 | let mut has_reph = false; |
| 873 | |
| 874 | { |
| 875 | // -> If the syllable starts with Ra + Halant (in a script that has Reph) |
| 876 | // and has more than one consonant, Ra is excluded from candidates for |
| 877 | // base consonants. |
| 878 | let mut limit = start; |
| 879 | if indic_plan.mask_array[indic_feature::RPHF] != 0 |
| 880 | && start + 3 <= end |
| 881 | && ((indic_plan.config.reph_mode == RephMode::Implicit |
| 882 | && !buffer.info[start + 2].is_joiner()) |
| 883 | || (indic_plan.config.reph_mode == RephMode::Explicit |
| 884 | && buffer.info[start + 2].indic_category() == ot_category_t::OT_ZWJ)) |
| 885 | { |
| 886 | // See if it matches the 'rphf' feature. |
| 887 | let glyphs = &[ |
| 888 | buffer.info[start].as_glyph(), |
| 889 | buffer.info[start + 1].as_glyph(), |
| 890 | if indic_plan.config.reph_mode == RephMode::Explicit { |
| 891 | buffer.info[start + 2].as_glyph() |
| 892 | } else { |
| 893 | GlyphId(0) |
| 894 | }, |
| 895 | ]; |
| 896 | if indic_plan |
| 897 | .rphf |
| 898 | .would_substitute(&plan.ot_map, face, &glyphs[0..2]) |
| 899 | || (indic_plan.config.reph_mode == RephMode::Explicit |
| 900 | && indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs)) |
| 901 | { |
| 902 | limit += 2; |
| 903 | while limit < end && buffer.info[limit].is_joiner() { |
| 904 | limit += 1; |
| 905 | } |
| 906 | base = start; |
| 907 | has_reph = true; |
| 908 | } |
| 909 | } else if indic_plan.config.reph_mode == RephMode::LogRepha |
| 910 | && buffer.info[start].indic_category() == ot_category_t::OT_Repha |
| 911 | { |
| 912 | limit += 1; |
| 913 | while limit < end && buffer.info[limit].is_joiner() { |
| 914 | limit += 1; |
| 915 | } |
| 916 | base = start; |
| 917 | has_reph = true; |
| 918 | } |
| 919 | |
| 920 | { |
| 921 | // -> starting from the end of the syllable, move backwards |
| 922 | let mut i = end; |
| 923 | let mut seen_below = false; |
| 924 | loop { |
| 925 | i -= 1; |
| 926 | // -> until a consonant is found |
| 927 | if buffer.info[i].is_consonant_myanmar() { |
| 928 | // -> that does not have a below-base or post-base form |
| 929 | // (post-base forms have to follow below-base forms), |
| 930 | if buffer.info[i].indic_position() != ot_position_t::POS_BELOW_C |
| 931 | && (buffer.info[i].indic_position() != ot_position_t::POS_POST_C |
| 932 | || seen_below) |
| 933 | { |
| 934 | base = i; |
| 935 | break; |
| 936 | } |
| 937 | if buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C { |
| 938 | seen_below = true; |
| 939 | } |
| 940 | |
| 941 | // -> or that is not a pre-base-reordering Ra, |
| 942 | // |
| 943 | // IMPLEMENTATION NOTES: |
| 944 | // |
| 945 | // Our pre-base-reordering Ra's are marked position::PostC, so will be skipped |
| 946 | // by the logic above already. |
| 947 | |
| 948 | // -> or arrive at the first consonant. The consonant stopped at will |
| 949 | // be the base. |
| 950 | base = i; |
| 951 | } else { |
| 952 | // A ZWJ after a Halant stops the base search, and requests an explicit |
| 953 | // half form. |
| 954 | // A ZWJ before a Halant, requests a subjoined form instead, and hence |
| 955 | // search continues. This is particularly important for Bengali |
| 956 | // sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. |
| 957 | if start < i |
| 958 | && buffer.info[i].indic_category() == ot_category_t::OT_ZWJ |
| 959 | && buffer.info[i - 1].indic_category() == ot_category_t::OT_H |
| 960 | { |
| 961 | break; |
| 962 | } |
| 963 | } |
| 964 | |
| 965 | if i <= limit { |
| 966 | break; |
| 967 | } |
| 968 | } |
| 969 | } |
| 970 | |
| 971 | // -> If the syllable starts with Ra + Halant (in a script that has Reph) |
| 972 | // and has more than one consonant, Ra is excluded from candidates for |
| 973 | // base consonants. |
| 974 | // |
| 975 | // Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. |
| 976 | if has_reph && base == start && limit - base <= 2 { |
| 977 | // Have no other consonant, so Reph is not formed and Ra becomes base. |
| 978 | has_reph = false; |
| 979 | } |
| 980 | } |
| 981 | |
| 982 | // 2. Decompose and reorder Matras: |
| 983 | // |
| 984 | // Each matra and any syllable modifier sign in the syllable are moved to the |
| 985 | // appropriate position relative to the consonant(s) in the syllable. The |
| 986 | // shaping engine decomposes two- or three-part matras into their constituent |
| 987 | // parts before any repositioning. Matra characters are classified by which |
| 988 | // consonant in a conjunct they have affinity for and are reordered to the |
| 989 | // following positions: |
| 990 | // |
| 991 | // - Before first half form in the syllable |
| 992 | // - After subjoined consonants |
| 993 | // - After post-form consonant |
| 994 | // - After main consonant (for above marks) |
| 995 | // |
| 996 | // IMPLEMENTATION NOTES: |
| 997 | // |
| 998 | // The normalize() routine has already decomposed matras for us, so we don't |
| 999 | // need to worry about that. |
| 1000 | |
| 1001 | // 3. Reorder marks to canonical order: |
| 1002 | // |
| 1003 | // Adjacent nukta and halant or nukta and vedic sign are always repositioned |
| 1004 | // if necessary, so that the nukta is first. |
| 1005 | // |
| 1006 | // IMPLEMENTATION NOTES: |
| 1007 | // |
| 1008 | // We don't need to do this: the normalize() routine already did this for us. |
| 1009 | |
| 1010 | // Reorder characters |
| 1011 | |
| 1012 | for i in start..base { |
| 1013 | let pos = buffer.info[i].indic_position(); |
| 1014 | buffer.info[i].set_indic_position(cmp::min(ot_position_t::POS_PRE_C, pos)); |
| 1015 | } |
| 1016 | |
| 1017 | if base < end { |
| 1018 | buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C); |
| 1019 | } |
| 1020 | |
| 1021 | // Handle beginning Ra |
| 1022 | if has_reph { |
| 1023 | buffer.info[start].set_indic_position(ot_position_t::POS_RA_TO_BECOME_REPH); |
| 1024 | } |
| 1025 | |
| 1026 | // For old-style Indic script tags, move the first post-base Halant after |
| 1027 | // last consonant. |
| 1028 | // |
| 1029 | // Reports suggest that in some scripts Uniscribe does this only if there |
| 1030 | // is *not* a Halant after last consonant already. We know that is the |
| 1031 | // case for Kannada, while it reorders unconditionally in other scripts, |
| 1032 | // eg. Malayalam, Bengali, and Devanagari. We don't currently know about |
| 1033 | // other scripts, so we block Kannada. |
| 1034 | // |
| 1035 | // Kannada test case: |
| 1036 | // U+0C9A,U+0CCD,U+0C9A,U+0CCD |
| 1037 | // With some versions of Lohit Kannada. |
| 1038 | // https://bugs.freedesktop.org/show_bug.cgi?id=59118 |
| 1039 | // |
| 1040 | // Malayalam test case: |
| 1041 | // U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D |
| 1042 | // With lohit-ttf-20121122/Lohit-Malayalam.ttf |
| 1043 | // |
| 1044 | // Bengali test case: |
| 1045 | // U+0998,U+09CD,U+09AF,U+09CD |
| 1046 | // With Windows XP vrinda.ttf |
| 1047 | // https://github.com/harfbuzz/harfbuzz/issues/1073 |
| 1048 | // |
| 1049 | // Devanagari test case: |
| 1050 | // U+091F,U+094D,U+0930,U+094D |
| 1051 | // With chandas.ttf |
| 1052 | // https://github.com/harfbuzz/harfbuzz/issues/1071 |
| 1053 | if indic_plan.is_old_spec { |
| 1054 | let disallow_double_halants = buffer.script == Some(script::KANNADA); |
| 1055 | for i in base + 1..end { |
| 1056 | if buffer.info[i].indic_category() == ot_category_t::OT_H { |
| 1057 | let mut j = end - 1; |
| 1058 | while j > i { |
| 1059 | if buffer.info[j].is_consonant() |
| 1060 | || (disallow_double_halants |
| 1061 | && buffer.info[j].indic_category() == ot_category_t::OT_H) |
| 1062 | { |
| 1063 | break; |
| 1064 | } |
| 1065 | |
| 1066 | j -= 1; |
| 1067 | } |
| 1068 | |
| 1069 | if buffer.info[j].indic_category() != ot_category_t::OT_H && j > i { |
| 1070 | // Move Halant to after last consonant. |
| 1071 | let t = buffer.info[i]; |
| 1072 | for k in 0..j - i { |
| 1073 | buffer.info[k + i] = buffer.info[k + i + 1]; |
| 1074 | } |
| 1075 | buffer.info[j] = t; |
| 1076 | } |
| 1077 | |
| 1078 | break; |
| 1079 | } |
| 1080 | } |
| 1081 | } |
| 1082 | |
| 1083 | // Attach misc marks to previous char to move with them. |
| 1084 | { |
| 1085 | let mut last_pos = ot_position_t::POS_START; |
| 1086 | for i in start..end { |
| 1087 | let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32) |
| 1088 | & (category_flag(ot_category_t::OT_ZWJ) |
| 1089 | | category_flag(ot_category_t::OT_ZWNJ) |
| 1090 | | category_flag(ot_category_t::OT_N) |
| 1091 | | category_flag(ot_category_t::OT_RS) |
| 1092 | | category_flag(ot_category_t::OT_CM) |
| 1093 | | category_flag(ot_category_t::OT_H)) |
| 1094 | != 0; |
| 1095 | if ok { |
| 1096 | buffer.info[i].set_indic_position(last_pos); |
| 1097 | |
| 1098 | if buffer.info[i].indic_category() == ot_category_t::OT_H |
| 1099 | && buffer.info[i].indic_position() == ot_position_t::POS_PRE_M |
| 1100 | { |
| 1101 | // Uniscribe doesn't move the Halant with Left Matra. |
| 1102 | // TEST: U+092B,U+093F,U+094DE |
| 1103 | // We follow. |
| 1104 | for j in (start + 1..=i).rev() { |
| 1105 | if buffer.info[j - 1].indic_position() != ot_position_t::POS_PRE_M { |
| 1106 | let pos = buffer.info[j - 1].indic_position(); |
| 1107 | buffer.info[i].set_indic_position(pos); |
| 1108 | break; |
| 1109 | } |
| 1110 | } |
| 1111 | } |
| 1112 | } else if buffer.info[i].indic_position() != ot_position_t::POS_SMVD { |
| 1113 | if buffer.info[i].indic_category() == ot_category_t::OT_MPst |
| 1114 | && i > start |
| 1115 | && buffer.info[i - 1].indic_category() == ot_category_t::OT_SM |
| 1116 | { |
| 1117 | let val = buffer.info[i].indic_position(); |
| 1118 | buffer.info[i - 1].set_indic_position(val); |
| 1119 | } |
| 1120 | |
| 1121 | last_pos = buffer.info[i].indic_position(); |
| 1122 | } |
| 1123 | } |
| 1124 | } |
| 1125 | // For post-base consonants let them own anything before them |
| 1126 | // since the last consonant or matra. |
| 1127 | { |
| 1128 | let mut last = base; |
| 1129 | for i in base + 1..end { |
| 1130 | if buffer.info[i].is_consonant() { |
| 1131 | for j in last + 1..i { |
| 1132 | if buffer.info[j].indic_position() < ot_position_t::POS_SMVD { |
| 1133 | let pos = buffer.info[i].indic_position(); |
| 1134 | buffer.info[j].set_indic_position(pos); |
| 1135 | } |
| 1136 | } |
| 1137 | |
| 1138 | last = i; |
| 1139 | } else if (rb_flag_unsafe(buffer.info[i].indic_category() as u32) |
| 1140 | & (rb_flag(ot_category_t::OT_M as u32) | rb_flag(ot_category_t::OT_MPst as u32))) |
| 1141 | != 0 |
| 1142 | { |
| 1143 | last = i; |
| 1144 | } |
| 1145 | } |
| 1146 | } |
| 1147 | |
| 1148 | { |
| 1149 | // Use syllable() for sort accounting temporarily. |
| 1150 | let syllable = buffer.info[start].syllable(); |
| 1151 | for i in start..end { |
| 1152 | buffer.info[i].set_syllable(u8::try_from(i - start).unwrap()); |
| 1153 | } |
| 1154 | |
| 1155 | buffer.info[start..end].sort_by_key(|a| a.indic_position()); |
| 1156 | |
| 1157 | // Find base again; also flip left-matra sequence. |
| 1158 | let mut first_left_mantra = end; |
| 1159 | let mut last_left_mantra = end; |
| 1160 | base = end; |
| 1161 | |
| 1162 | for i in start..end { |
| 1163 | if buffer.info[i].indic_position() == ot_position_t::POS_BASE_C { |
| 1164 | base = i; |
| 1165 | break; |
| 1166 | } else if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M { |
| 1167 | if first_left_mantra == end { |
| 1168 | first_left_mantra = i; |
| 1169 | } |
| 1170 | |
| 1171 | last_left_mantra = i; |
| 1172 | } |
| 1173 | } |
| 1174 | |
| 1175 | // https://github.com/harfbuzz/harfbuzz/issues/3863 |
| 1176 | if first_left_mantra < last_left_mantra { |
| 1177 | // No need to merge clusters, handled later. |
| 1178 | buffer.reverse_range(first_left_mantra, last_left_mantra + 1); |
| 1179 | // Reverse back nuktas, etc. |
| 1180 | let mut i = first_left_mantra; |
| 1181 | |
| 1182 | for j in i..=last_left_mantra { |
| 1183 | if (rb_flag_unsafe(buffer.info[j].indic_category() as u32) |
| 1184 | & (rb_flag(ot_category_t::OT_M as u32) |
| 1185 | | rb_flag(ot_category_t::OT_MPst as u32))) |
| 1186 | != 0 |
| 1187 | { |
| 1188 | buffer.reverse_range(i, j + 1); |
| 1189 | i = j + 1; |
| 1190 | } |
| 1191 | } |
| 1192 | } |
| 1193 | |
| 1194 | // Things are out-of-control for post base positions, they may shuffle |
| 1195 | // around like crazy. In old-spec mode, we move halants around, so in |
| 1196 | // that case merge all clusters after base. Otherwise, check the sort |
| 1197 | // order and merge as needed. |
| 1198 | // For pre-base stuff, we handle cluster issues in final reordering. |
| 1199 | // |
| 1200 | // We could use buffer->sort() for this, if there was no special |
| 1201 | // reordering of pre-base stuff happening later... |
| 1202 | // We don't want to merge_clusters all of that, which buffer->sort() |
| 1203 | // would. Here's a concrete example: |
| 1204 | // |
| 1205 | // Assume there's a pre-base consonant and explicit Halant before base, |
| 1206 | // followed by a prebase-reordering (left) Matra: |
| 1207 | // |
| 1208 | // C,H,ZWNJ,B,M |
| 1209 | // |
| 1210 | // At this point in reordering we would have: |
| 1211 | // |
| 1212 | // M,C,H,ZWNJ,B |
| 1213 | // |
| 1214 | // whereas in final reordering we will bring the Matra closer to Base: |
| 1215 | // |
| 1216 | // C,H,ZWNJ,M,B |
| 1217 | // |
| 1218 | // That's why we don't want to merge-clusters anything before the Base |
| 1219 | // at this point. But if something moved from after Base to before it, |
| 1220 | // we should merge clusters from base to them. In final-reordering, we |
| 1221 | // only move things around before base, and merge-clusters up to base. |
| 1222 | // These two merge-clusters from the two sides of base will interlock |
| 1223 | // to merge things correctly. See: |
| 1224 | // https://github.com/harfbuzz/harfbuzz/issues/2272 |
| 1225 | if indic_plan.is_old_spec || end - start > 127 { |
| 1226 | buffer.merge_clusters(base, end); |
| 1227 | } else { |
| 1228 | // Note! syllable() is a one-byte field. |
| 1229 | for i in base..end { |
| 1230 | if buffer.info[i].syllable() != 255 { |
| 1231 | let mut min = i; |
| 1232 | let mut max = i; |
| 1233 | let mut j = start + buffer.info[i].syllable() as usize; |
| 1234 | while j != i { |
| 1235 | min = cmp::min(min, j); |
| 1236 | max = cmp::max(max, j); |
| 1237 | let next = start + buffer.info[j].syllable() as usize; |
| 1238 | buffer.info[j].set_syllable(255); // So we don't process j later again. |
| 1239 | j = next; |
| 1240 | } |
| 1241 | |
| 1242 | buffer.merge_clusters(cmp::max(base, min), max + 1); |
| 1243 | } |
| 1244 | } |
| 1245 | } |
| 1246 | |
| 1247 | // Put syllable back in. |
| 1248 | for info in &mut buffer.info[start..end] { |
| 1249 | info.set_syllable(syllable); |
| 1250 | } |
| 1251 | } |
| 1252 | |
| 1253 | // Setup masks now |
| 1254 | |
| 1255 | { |
| 1256 | // Reph |
| 1257 | for info in &mut buffer.info[start..end] { |
| 1258 | if info.indic_position() != ot_position_t::POS_RA_TO_BECOME_REPH { |
| 1259 | break; |
| 1260 | } |
| 1261 | |
| 1262 | info.mask |= indic_plan.mask_array[indic_feature::RPHF]; |
| 1263 | } |
| 1264 | |
| 1265 | // Pre-base |
| 1266 | let mut mask = indic_plan.mask_array[indic_feature::HALF]; |
| 1267 | if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost { |
| 1268 | mask |= indic_plan.mask_array[indic_feature::BLWF]; |
| 1269 | } |
| 1270 | |
| 1271 | for info in &mut buffer.info[start..base] { |
| 1272 | info.mask |= mask; |
| 1273 | } |
| 1274 | |
| 1275 | // Base |
| 1276 | mask = 0; |
| 1277 | if base < end { |
| 1278 | buffer.info[base].mask |= mask; |
| 1279 | } |
| 1280 | |
| 1281 | // Post-base |
| 1282 | mask = indic_plan.mask_array[indic_feature::BLWF] |
| 1283 | | indic_plan.mask_array[indic_feature::ABVF] |
| 1284 | | indic_plan.mask_array[indic_feature::PSTF]; |
| 1285 | for i in base + 1..end { |
| 1286 | buffer.info[i].mask |= mask; |
| 1287 | } |
| 1288 | } |
| 1289 | |
| 1290 | if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) { |
| 1291 | // Old-spec eye-lash Ra needs special handling. From the |
| 1292 | // spec: |
| 1293 | // |
| 1294 | // "The feature 'below-base form' is applied to consonants |
| 1295 | // having below-base forms and following the base consonant. |
| 1296 | // The exception is vattu, which may appear below half forms |
| 1297 | // as well as below the base glyph. The feature 'below-base |
| 1298 | // form' will be applied to all such occurrences of Ra as well." |
| 1299 | // |
| 1300 | // Test case: U+0924,U+094D,U+0930,U+094d,U+0915 |
| 1301 | // with Sanskrit 2003 font. |
| 1302 | // |
| 1303 | // However, note that Ra,Halant,ZWJ is the correct way to |
| 1304 | // request eyelash form of Ra, so we wouldbn't inhibit it |
| 1305 | // in that sequence. |
| 1306 | // |
| 1307 | // Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 |
| 1308 | for i in start..base.saturating_sub(1) { |
| 1309 | if buffer.info[i].indic_category() == ot_category_t::OT_Ra |
| 1310 | && buffer.info[i + 1].indic_category() == ot_category_t::OT_H |
| 1311 | && (i + 2 == base || buffer.info[i + 2].indic_category() != ot_category_t::OT_ZWJ) |
| 1312 | { |
| 1313 | buffer.info[i].mask |= indic_plan.mask_array[indic_feature::BLWF]; |
| 1314 | buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::BLWF]; |
| 1315 | } |
| 1316 | } |
| 1317 | } |
| 1318 | |
| 1319 | let pref_len = 2; |
| 1320 | if indic_plan.mask_array[indic_feature::PREF] != 0 && base + pref_len < end { |
| 1321 | // Find a Halant,Ra sequence and mark it for pre-base-reordering processing. |
| 1322 | for i in base + 1..end - pref_len + 1 { |
| 1323 | let glyphs = &[buffer.info[i + 0].as_glyph(), buffer.info[i + 1].as_glyph()]; |
| 1324 | if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) { |
| 1325 | buffer.info[i + 0].mask |= indic_plan.mask_array[indic_feature::PREF]; |
| 1326 | buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::PREF]; |
| 1327 | break; |
| 1328 | } |
| 1329 | } |
| 1330 | } |
| 1331 | |
| 1332 | // Apply ZWJ/ZWNJ effects |
| 1333 | for i in start + 1..end { |
| 1334 | if buffer.info[i].is_joiner() { |
| 1335 | let non_joiner = buffer.info[i].indic_category() == ot_category_t::OT_ZWNJ; |
| 1336 | let mut j = i; |
| 1337 | |
| 1338 | loop { |
| 1339 | j -= 1; |
| 1340 | |
| 1341 | // ZWJ/ZWNJ should disable CJCT. They do that by simply |
| 1342 | // being there, since we don't skip them for the CJCT |
| 1343 | // feature (ie. F_MANUAL_ZWJ) |
| 1344 | |
| 1345 | // A ZWNJ disables HALF. |
| 1346 | if non_joiner { |
| 1347 | buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF]; |
| 1348 | } |
| 1349 | |
| 1350 | if j <= start || buffer.info[j].is_consonant() { |
| 1351 | break; |
| 1352 | } |
| 1353 | } |
| 1354 | } |
| 1355 | } |
| 1356 | } |
| 1357 | |
| 1358 | fn initial_reordering_standalone_cluster( |
| 1359 | plan: &hb_ot_shape_plan_t, |
| 1360 | indic_plan: &IndicShapePlan, |
| 1361 | face: &hb_font_t, |
| 1362 | start: usize, |
| 1363 | end: usize, |
| 1364 | buffer: &mut hb_buffer_t, |
| 1365 | ) { |
| 1366 | // We treat placeholder/dotted-circle as if they are consonants, so we |
| 1367 | // should just chain. Only if not in compatibility mode that is... |
| 1368 | initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer); |
| 1369 | } |
| 1370 | |
| 1371 | fn final_reordering(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) -> bool { |
| 1372 | if buffer.is_empty() { |
| 1373 | return false; |
| 1374 | } |
| 1375 | |
| 1376 | foreach_syllable!(buffer, start, end, { |
| 1377 | final_reordering_impl(plan, face, start, end, buffer); |
| 1378 | }); |
| 1379 | |
| 1380 | false |
| 1381 | } |
| 1382 | |
| 1383 | fn final_reordering_impl( |
| 1384 | plan: &hb_ot_shape_plan_t, |
| 1385 | face: &hb_font_t, |
| 1386 | start: usize, |
| 1387 | end: usize, |
| 1388 | buffer: &mut hb_buffer_t, |
| 1389 | ) { |
| 1390 | let indic_plan = plan.data::<IndicShapePlan>(); |
| 1391 | |
| 1392 | // This function relies heavily on halant glyphs. Lots of ligation |
| 1393 | // and possibly multiple substitutions happened prior to this |
| 1394 | // phase, and that might have messed up our properties. Recover |
| 1395 | // from a particular case of that where we're fairly sure that a |
| 1396 | // class of OT_H is desired but has been lost. |
| 1397 | // |
| 1398 | // We don't call load_virama_glyph(), since we know it's already loaded. |
| 1399 | let mut virama_glyph = None; |
| 1400 | if indic_plan.config.virama != 0 { |
| 1401 | if let Some(g) = face.get_nominal_glyph(indic_plan.config.virama) { |
| 1402 | virama_glyph = Some(g.0 as u32); |
| 1403 | } |
| 1404 | } |
| 1405 | |
| 1406 | if let Some(virama_glyph) = virama_glyph { |
| 1407 | for info in &mut buffer.info[start..end] { |
| 1408 | if info.glyph_id == virama_glyph |
| 1409 | && _hb_glyph_info_ligated(info) |
| 1410 | && _hb_glyph_info_multiplied(info) |
| 1411 | { |
| 1412 | // This will make sure that this glyph passes is_halant() test. |
| 1413 | info.set_indic_category(ot_category_t::OT_H); |
| 1414 | _hb_glyph_info_clear_ligated_and_multiplied(info); |
| 1415 | } |
| 1416 | } |
| 1417 | } |
| 1418 | |
| 1419 | // 4. Final reordering: |
| 1420 | // |
| 1421 | // After the localized forms and basic shaping forms GSUB features have been |
| 1422 | // applied (see below), the shaping engine performs some final glyph |
| 1423 | // reordering before applying all the remaining font features to the entire |
| 1424 | // syllable. |
| 1425 | |
| 1426 | let mut try_pref = indic_plan.mask_array[indic_feature::PREF] != 0; |
| 1427 | |
| 1428 | let mut base = start; |
| 1429 | while base < end { |
| 1430 | if buffer.info[base].indic_position() as u32 >= ot_position_t::POS_BASE_C as u32 { |
| 1431 | if try_pref && base + 1 < end { |
| 1432 | for i in base + 1..end { |
| 1433 | if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != 0 { |
| 1434 | if !(_hb_glyph_info_substituted(&buffer.info[i]) |
| 1435 | && _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i])) |
| 1436 | { |
| 1437 | // Ok, this was a 'pref' candidate but didn't form any. |
| 1438 | // Base is around here... |
| 1439 | base = i; |
| 1440 | while base < end && buffer.info[base].is_halant() { |
| 1441 | base += 1; |
| 1442 | } |
| 1443 | |
| 1444 | if base < end { |
| 1445 | buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C); |
| 1446 | } |
| 1447 | |
| 1448 | try_pref = false; |
| 1449 | } |
| 1450 | |
| 1451 | break; |
| 1452 | } |
| 1453 | |
| 1454 | if base == end { |
| 1455 | break; |
| 1456 | } |
| 1457 | } |
| 1458 | } |
| 1459 | |
| 1460 | // For Malayalam, skip over unformed below- (but NOT post-) forms. |
| 1461 | if buffer.script == Some(script::MALAYALAM) { |
| 1462 | let mut i = base + 1; |
| 1463 | while i < end { |
| 1464 | while i < end && buffer.info[i].is_joiner() { |
| 1465 | i += 1; |
| 1466 | } |
| 1467 | |
| 1468 | if i == end || !buffer.info[i].is_halant() { |
| 1469 | break; |
| 1470 | } |
| 1471 | |
| 1472 | i += 1; // Skip halant. |
| 1473 | |
| 1474 | while i < end && buffer.info[i].is_joiner() { |
| 1475 | i += 1; |
| 1476 | } |
| 1477 | |
| 1478 | if i < end |
| 1479 | && buffer.info[i].is_consonant() |
| 1480 | && buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C |
| 1481 | { |
| 1482 | base = i; |
| 1483 | buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C); |
| 1484 | } |
| 1485 | |
| 1486 | i += 1; |
| 1487 | } |
| 1488 | } |
| 1489 | |
| 1490 | if start < base |
| 1491 | && buffer.info[base].indic_position() as u32 > ot_position_t::POS_BASE_C as u32 |
| 1492 | { |
| 1493 | base -= 1; |
| 1494 | } |
| 1495 | |
| 1496 | break; |
| 1497 | } |
| 1498 | |
| 1499 | base += 1; |
| 1500 | } |
| 1501 | |
| 1502 | if base == end |
| 1503 | && start < base |
| 1504 | && buffer.info[base - 1].is_one_of(rb_flag(ot_category_t::OT_ZWJ as u32)) |
| 1505 | { |
| 1506 | base -= 1; |
| 1507 | } |
| 1508 | |
| 1509 | if base < end { |
| 1510 | while start < base |
| 1511 | && buffer.info[base].is_one_of( |
| 1512 | rb_flag(ot_category_t::OT_N as u32) | rb_flag(ot_category_t::OT_H as u32), |
| 1513 | ) |
| 1514 | { |
| 1515 | base -= 1; |
| 1516 | } |
| 1517 | } |
| 1518 | |
| 1519 | // - Reorder matras: |
| 1520 | // |
| 1521 | // If a pre-base matra character had been reordered before applying basic |
| 1522 | // features, the glyph can be moved closer to the main consonant based on |
| 1523 | // whether half-forms had been formed. Actual position for the matra is |
| 1524 | // defined as “after last standalone halant glyph, after initial matra |
| 1525 | // position and before the main consonant”. If ZWJ or ZWNJ follow this |
| 1526 | // halant, position is moved after it. |
| 1527 | // |
| 1528 | // IMPLEMENTATION NOTES: |
| 1529 | // |
| 1530 | // It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe |
| 1531 | // and Devanagari shows that the behavior is best described as: |
| 1532 | // |
| 1533 | // "If ZWJ follows this halant, matra is NOT repositioned after this halant. |
| 1534 | // If ZWNJ follows this halant, position is moved after it." |
| 1535 | // |
| 1536 | // Test case, with Adobe Devanagari or Nirmala UI: |
| 1537 | // |
| 1538 | // U+091F,U+094D,U+200C,U+092F,U+093F |
| 1539 | // (Matra moves to the middle, after ZWNJ.) |
| 1540 | // |
| 1541 | // U+091F,U+094D,U+200D,U+092F,U+093F |
| 1542 | // (Matra does NOT move, stays to the left.) |
| 1543 | // |
| 1544 | // https://github.com/harfbuzz/harfbuzz/issues/1070 |
| 1545 | |
| 1546 | // Otherwise there can't be any pre-base matra characters. |
| 1547 | if start + 1 < end && start < base { |
| 1548 | // If we lost track of base, alas, position before last thingy. |
| 1549 | let mut new_pos = if base == end { base - 2 } else { base - 1 }; |
| 1550 | |
| 1551 | // Malayalam / Tamil do not have "half" forms or explicit virama forms. |
| 1552 | // The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
| 1553 | // We want to position matra after them. |
| 1554 | if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) { |
| 1555 | loop { |
| 1556 | while new_pos > start |
| 1557 | && !buffer.info[new_pos].is_one_of( |
| 1558 | rb_flag(ot_category_t::OT_M as u32) |
| 1559 | | rb_flag(ot_category_t::OT_MPst as u32) |
| 1560 | | rb_flag(ot_category_t::OT_H as u32), |
| 1561 | ) |
| 1562 | { |
| 1563 | new_pos -= 1; |
| 1564 | } |
| 1565 | |
| 1566 | // If we found no Halant we are done. |
| 1567 | // Otherwise only proceed if the Halant does |
| 1568 | // not belong to the Matra itself! |
| 1569 | if buffer.info[new_pos].is_halant() |
| 1570 | && buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M |
| 1571 | { |
| 1572 | if new_pos + 1 < end { |
| 1573 | // -> If ZWJ follows this halant, matra is NOT repositioned after this halant. |
| 1574 | if buffer.info[new_pos + 1].indic_category() == ot_category_t::OT_ZWJ { |
| 1575 | // Keep searching. |
| 1576 | if new_pos > start { |
| 1577 | new_pos -= 1; |
| 1578 | continue; |
| 1579 | } |
| 1580 | } |
| 1581 | |
| 1582 | // -> If ZWNJ follows this halant, position is moved after it. |
| 1583 | // |
| 1584 | // IMPLEMENTATION NOTES: |
| 1585 | // |
| 1586 | // This is taken care of by the state-machine. A Halant,ZWNJ is a terminating |
| 1587 | // sequence for a consonant syllable; any pre-base matras occurring after it |
| 1588 | // will belong to the subsequent syllable. |
| 1589 | } |
| 1590 | } else { |
| 1591 | new_pos = start; // No move. |
| 1592 | } |
| 1593 | |
| 1594 | break; |
| 1595 | } |
| 1596 | } |
| 1597 | |
| 1598 | if start < new_pos && buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M { |
| 1599 | // Now go see if there's actually any matras... |
| 1600 | for i in (start + 1..=new_pos).rev() { |
| 1601 | if buffer.info[i - 1].indic_position() == ot_position_t::POS_PRE_M { |
| 1602 | let old_pos = i - 1; |
| 1603 | // Shouldn't actually happen. |
| 1604 | if old_pos < base && base <= new_pos { |
| 1605 | base -= 1; |
| 1606 | } |
| 1607 | |
| 1608 | let tmp = buffer.info[old_pos]; |
| 1609 | for i in 0..new_pos - old_pos { |
| 1610 | buffer.info[i + old_pos] = buffer.info[i + old_pos + 1]; |
| 1611 | } |
| 1612 | buffer.info[new_pos] = tmp; |
| 1613 | |
| 1614 | // Note: this merge_clusters() is intentionally *after* the reordering. |
| 1615 | // Indic matra reordering is special and tricky... |
| 1616 | buffer.merge_clusters(new_pos, cmp::min(end, base + 1)); |
| 1617 | |
| 1618 | new_pos -= 1; |
| 1619 | } |
| 1620 | } |
| 1621 | } else { |
| 1622 | for i in start..base { |
| 1623 | if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M { |
| 1624 | buffer.merge_clusters(i, cmp::min(end, base + 1)); |
| 1625 | break; |
| 1626 | } |
| 1627 | } |
| 1628 | } |
| 1629 | } |
| 1630 | |
| 1631 | // - Reorder reph: |
| 1632 | // |
| 1633 | // Reph’s original position is always at the beginning of the syllable, |
| 1634 | // (i.e. it is not reordered at the character reordering stage). However, |
| 1635 | // it will be reordered according to the basic-forms shaping results. |
| 1636 | // Possible positions for reph, depending on the script, are; after main, |
| 1637 | // before post-base consonant forms, and after post-base consonant forms. |
| 1638 | |
| 1639 | // Two cases: |
| 1640 | // |
| 1641 | // - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then |
| 1642 | // we should only move it if the sequence ligated to the repha form. |
| 1643 | // |
| 1644 | // - If repha is encoded separately and in the logical position, we should only |
| 1645 | // move it if it did NOT ligate. If it ligated, it's probably the font trying |
| 1646 | // to make it work without the reordering. |
| 1647 | |
| 1648 | if start + 1 < end |
| 1649 | && buffer.info[start].indic_position() == ot_position_t::POS_RA_TO_BECOME_REPH |
| 1650 | && (buffer.info[start].indic_category() == ot_category_t::OT_Repha) |
| 1651 | ^ _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[start]) |
| 1652 | { |
| 1653 | let mut new_reph_pos; |
| 1654 | 'reph: { |
| 1655 | let reph_pos = indic_plan.config.reph_pos; |
| 1656 | |
| 1657 | // 1. If reph should be positioned after post-base consonant forms, |
| 1658 | // proceed to step 5. |
| 1659 | if reph_pos != RephPosition::AfterPost { |
| 1660 | // 2. If the reph repositioning class is not after post-base: target |
| 1661 | // position is after the first explicit halant glyph between the |
| 1662 | // first post-reph consonant and last main consonant. If ZWJ or ZWNJ |
| 1663 | // are following this halant, position is moved after it. If such |
| 1664 | // position is found, this is the target position. Otherwise, |
| 1665 | // proceed to the next step. |
| 1666 | // |
| 1667 | // Note: in old-implementation fonts, where classifications were |
| 1668 | // fixed in shaping engine, there was no case where reph position |
| 1669 | // will be found on this step. |
| 1670 | { |
| 1671 | new_reph_pos = start + 1; |
| 1672 | while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() { |
| 1673 | new_reph_pos += 1; |
| 1674 | } |
| 1675 | |
| 1676 | if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() { |
| 1677 | // ->If ZWJ or ZWNJ are following this halant, position is moved after it. |
| 1678 | if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() { |
| 1679 | new_reph_pos += 1; |
| 1680 | } |
| 1681 | |
| 1682 | break 'reph; |
| 1683 | } |
| 1684 | } |
| 1685 | |
| 1686 | // 3. If reph should be repositioned after the main consonant: find the |
| 1687 | // first consonant not ligated with main, or find the first |
| 1688 | // consonant that is not a potential pre-base-reordering Ra. |
| 1689 | if reph_pos == RephPosition::AfterMain { |
| 1690 | new_reph_pos = base; |
| 1691 | while new_reph_pos + 1 < end |
| 1692 | && buffer.info[new_reph_pos + 1].indic_position() |
| 1693 | <= ot_position_t::POS_AFTER_MAIN |
| 1694 | { |
| 1695 | new_reph_pos += 1; |
| 1696 | } |
| 1697 | |
| 1698 | if new_reph_pos < end { |
| 1699 | break 'reph; |
| 1700 | } |
| 1701 | } |
| 1702 | |
| 1703 | // 4. If reph should be positioned before post-base consonant, find |
| 1704 | // first post-base classified consonant not ligated with main. If no |
| 1705 | // consonant is found, the target position should be before the |
| 1706 | // first matra, syllable modifier sign or vedic sign. |
| 1707 | // |
| 1708 | // This is our take on what step 4 is trying to say (and failing, BADLY). |
| 1709 | if reph_pos == RephPosition::AfterSub { |
| 1710 | new_reph_pos = base; |
| 1711 | while new_reph_pos + 1 < end |
| 1712 | && (rb_flag_unsafe(buffer.info[new_reph_pos + 1].indic_position() as u32) |
| 1713 | & (rb_flag(ot_position_t::POS_POST_C as u32) |
| 1714 | | rb_flag(ot_position_t::POS_AFTER_POST as u32) |
| 1715 | | rb_flag(ot_position_t::POS_SMVD as u32))) |
| 1716 | == 0 |
| 1717 | { |
| 1718 | new_reph_pos += 1; |
| 1719 | } |
| 1720 | |
| 1721 | if new_reph_pos < end { |
| 1722 | break 'reph; |
| 1723 | } |
| 1724 | } |
| 1725 | } |
| 1726 | |
| 1727 | // 5. If no consonant is found in steps 3 or 4, move reph to a position |
| 1728 | // immediately before the first post-base matra, syllable modifier |
| 1729 | // sign or vedic sign that has a reordering class after the intended |
| 1730 | // reph position. For example, if the reordering position for reph |
| 1731 | // is post-main, it will skip above-base matras that also have a |
| 1732 | // post-main position. |
| 1733 | // |
| 1734 | // Copied from step 2. |
| 1735 | new_reph_pos = start + 1; |
| 1736 | while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() { |
| 1737 | new_reph_pos += 1; |
| 1738 | } |
| 1739 | |
| 1740 | if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() { |
| 1741 | /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ |
| 1742 | if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() { |
| 1743 | new_reph_pos += 1; |
| 1744 | } |
| 1745 | |
| 1746 | break 'reph; |
| 1747 | } |
| 1748 | // See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654 |
| 1749 | |
| 1750 | // 6. Otherwise, reorder reph to the end of the syllable. |
| 1751 | { |
| 1752 | new_reph_pos = end - 1; |
| 1753 | while new_reph_pos > start |
| 1754 | && buffer.info[new_reph_pos].indic_position() == ot_position_t::POS_SMVD |
| 1755 | { |
| 1756 | new_reph_pos -= 1; |
| 1757 | } |
| 1758 | |
| 1759 | // If the Reph is to be ending up after a Matra,Halant sequence, |
| 1760 | // position it before that Halant so it can interact with the Matra. |
| 1761 | // However, if it's a plain Consonant,Halant we shouldn't do that. |
| 1762 | // Uniscribe doesn't do this. |
| 1763 | // TEST: U+0930,U+094D,U+0915,U+094B,U+094D |
| 1764 | if buffer.info[new_reph_pos].is_halant() { |
| 1765 | for info in &buffer.info[base + 1..new_reph_pos] { |
| 1766 | if (rb_flag_unsafe(info.indic_category() as u32) |
| 1767 | & (rb_flag(ot_category_t::OT_M as u32) |
| 1768 | | rb_flag(ot_category_t::OT_MPst as u32))) |
| 1769 | != 0 |
| 1770 | { |
| 1771 | // Ok, got it. |
| 1772 | new_reph_pos -= 1; |
| 1773 | } |
| 1774 | } |
| 1775 | } |
| 1776 | } |
| 1777 | |
| 1778 | break 'reph; |
| 1779 | } |
| 1780 | |
| 1781 | // Move |
| 1782 | buffer.merge_clusters(start, new_reph_pos + 1); |
| 1783 | |
| 1784 | let reph = buffer.info[start]; |
| 1785 | for i in 0..new_reph_pos - start { |
| 1786 | buffer.info[i + start] = buffer.info[i + start + 1]; |
| 1787 | } |
| 1788 | buffer.info[new_reph_pos] = reph; |
| 1789 | |
| 1790 | if start < base && base <= new_reph_pos { |
| 1791 | base -= 1; |
| 1792 | } |
| 1793 | } |
| 1794 | |
| 1795 | // - Reorder pre-base-reordering consonants: |
| 1796 | // |
| 1797 | // If a pre-base-reordering consonant is found, reorder it according to |
| 1798 | // the following rules: |
| 1799 | |
| 1800 | // Otherwise there can't be any pre-base-reordering Ra. |
| 1801 | if try_pref && base + 1 < end { |
| 1802 | for i in base + 1..end { |
| 1803 | if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != 0 { |
| 1804 | // 1. Only reorder a glyph produced by substitution during application |
| 1805 | // of the <pref> feature. (Note that a font may shape a Ra consonant with |
| 1806 | // the feature generally but block it in certain contexts.) |
| 1807 | // |
| 1808 | // Note: We just check that something got substituted. We don't check that |
| 1809 | // the <pref> feature actually did it... |
| 1810 | // |
| 1811 | // Reorder pref only if it ligated. |
| 1812 | if _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]) { |
| 1813 | // 2. Try to find a target position the same way as for pre-base matra. |
| 1814 | // If it is found, reorder pre-base consonant glyph. |
| 1815 | // |
| 1816 | // 3. If position is not found, reorder immediately before main consonant. |
| 1817 | |
| 1818 | let mut new_pos = base; |
| 1819 | // Malayalam / Tamil do not have "half" forms or explicit virama forms. |
| 1820 | // The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
| 1821 | // We want to position matra after them. |
| 1822 | if buffer.script != Some(script::MALAYALAM) |
| 1823 | && buffer.script != Some(script::TAMIL) |
| 1824 | { |
| 1825 | while new_pos > start |
| 1826 | && !buffer.info[new_pos - 1].is_one_of( |
| 1827 | rb_flag(ot_category_t::OT_M as u32) |
| 1828 | | rb_flag(ot_category_t::OT_MPst as u32) |
| 1829 | | rb_flag(ot_category_t::OT_H as u32), |
| 1830 | ) |
| 1831 | { |
| 1832 | new_pos -= 1; |
| 1833 | } |
| 1834 | } |
| 1835 | |
| 1836 | if new_pos > start && buffer.info[new_pos - 1].is_halant() { |
| 1837 | // -> If ZWJ or ZWNJ follow this halant, position is moved after it. |
| 1838 | if new_pos < end && buffer.info[new_pos].is_joiner() { |
| 1839 | new_pos += 1; |
| 1840 | } |
| 1841 | } |
| 1842 | |
| 1843 | { |
| 1844 | let old_pos = i; |
| 1845 | |
| 1846 | buffer.merge_clusters(new_pos, old_pos + 1); |
| 1847 | let tmp = buffer.info[old_pos]; |
| 1848 | for i in (0..old_pos - new_pos).rev() { |
| 1849 | buffer.info[i + new_pos + 1] = buffer.info[i + new_pos]; |
| 1850 | } |
| 1851 | buffer.info[new_pos] = tmp; |
| 1852 | |
| 1853 | if new_pos <= base && base < old_pos { |
| 1854 | // TODO: investigate |
| 1855 | #[allow (unused_assignments)] |
| 1856 | { |
| 1857 | base += 1; |
| 1858 | } |
| 1859 | } |
| 1860 | } |
| 1861 | } |
| 1862 | |
| 1863 | break; |
| 1864 | } |
| 1865 | } |
| 1866 | } |
| 1867 | |
| 1868 | // Apply 'init' to the Left Matra if it's a word start. |
| 1869 | if buffer.info[start].indic_position() == ot_position_t::POS_PRE_M { |
| 1870 | if start == 0 |
| 1871 | || (rb_flag_unsafe( |
| 1872 | _hb_glyph_info_get_general_category(&buffer.info[start - 1]).to_rb(), |
| 1873 | ) & rb_flag_range( |
| 1874 | hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT, |
| 1875 | hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, |
| 1876 | )) == 0 |
| 1877 | { |
| 1878 | buffer.info[start].mask |= indic_plan.mask_array[indic_feature::INIT]; |
| 1879 | } else { |
| 1880 | buffer.unsafe_to_break(Some(start - 1), Some(start + 1)); |
| 1881 | } |
| 1882 | } |
| 1883 | } |
| 1884 | |