| 1 | use super::buffer::*; |
| 2 | use super::ot_layout::*; |
| 3 | use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_AUTO; |
| 4 | use super::ot_shape_plan::hb_ot_shape_plan_t; |
| 5 | use super::ot_shaper::*; |
| 6 | use super::unicode::hb_unicode_general_category_t; |
| 7 | use super::{hb_font_t, script}; |
| 8 | |
| 9 | pub const THAI_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t { |
| 10 | collect_features: None, |
| 11 | override_features: None, |
| 12 | create_data: None, |
| 13 | preprocess_text: Some(preprocess_text), |
| 14 | postprocess_glyphs: None, |
| 15 | normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, |
| 16 | decompose: None, |
| 17 | compose: None, |
| 18 | setup_masks: None, |
| 19 | gpos_tag: None, |
| 20 | reorder_marks: None, |
| 21 | zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, |
| 22 | fallback_position: false, |
| 23 | }; |
| 24 | |
| 25 | #[derive (Clone, Copy, PartialEq)] |
| 26 | enum Consonant { |
| 27 | NC = 0, |
| 28 | AC, |
| 29 | RC, |
| 30 | DC, |
| 31 | NotConsonant, |
| 32 | } |
| 33 | |
| 34 | fn get_consonant_type(u: u32) -> Consonant { |
| 35 | match u { |
| 36 | 0x0E1B | 0x0E1D | 0x0E1F => Consonant::AC, |
| 37 | 0x0E0D | 0x0E10 => Consonant::RC, |
| 38 | 0x0E0E | 0x0E0F => Consonant::DC, |
| 39 | 0x0E01..=0x0E2E => Consonant::NC, |
| 40 | _ => Consonant::NotConsonant, |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | #[derive (Clone, Copy, PartialEq)] |
| 45 | enum Mark { |
| 46 | AV, |
| 47 | BV, |
| 48 | T, |
| 49 | NotMark, |
| 50 | } |
| 51 | |
| 52 | fn get_mark_type(u: u32) -> Mark { |
| 53 | match u { |
| 54 | 0x0E31 | 0x0E34..=0x0E37 | 0x0E47 | 0x0E4D..=0x0E4E => Mark::AV, |
| 55 | 0x0E38..=0x0E3A => Mark::BV, |
| 56 | 0x0E48..=0x0E4C => Mark::T, |
| 57 | _ => Mark::NotMark, |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | #[derive (Clone, Copy, PartialEq)] |
| 62 | enum Action { |
| 63 | NOP, |
| 64 | /// Shift combining-mark down. |
| 65 | SD, |
| 66 | /// Shift combining-mark left. |
| 67 | SL, |
| 68 | /// Shift combining-mark down-left. |
| 69 | SDL, |
| 70 | /// Remove descender from base. |
| 71 | RD, |
| 72 | } |
| 73 | |
| 74 | #[derive (Clone, Copy)] |
| 75 | struct PuaMapping { |
| 76 | u: u16, |
| 77 | win_pua: u16, |
| 78 | mac_pua: u16, |
| 79 | } |
| 80 | |
| 81 | impl PuaMapping { |
| 82 | const fn new(u: u16, win_pua: u16, mac_pua: u16) -> Self { |
| 83 | PuaMapping { |
| 84 | u, |
| 85 | win_pua, |
| 86 | mac_pua, |
| 87 | } |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | const SD_MAPPINGS: &[PuaMapping] = &[ |
| 92 | PuaMapping::new(u:0x0E48, win_pua:0xF70A, mac_pua:0xF88B), // MAI EK |
| 93 | PuaMapping::new(u:0x0E49, win_pua:0xF70B, mac_pua:0xF88E), // MAI THO |
| 94 | PuaMapping::new(u:0x0E4A, win_pua:0xF70C, mac_pua:0xF891), // MAI TRI |
| 95 | PuaMapping::new(u:0x0E4B, win_pua:0xF70D, mac_pua:0xF894), // MAI CHATTAWA |
| 96 | PuaMapping::new(u:0x0E4C, win_pua:0xF70E, mac_pua:0xF897), // THANTHAKHAT |
| 97 | PuaMapping::new(u:0x0E38, win_pua:0xF718, mac_pua:0xF89B), // SARA U |
| 98 | PuaMapping::new(u:0x0E39, win_pua:0xF719, mac_pua:0xF89C), // SARA UU |
| 99 | PuaMapping::new(u:0x0E3A, win_pua:0xF71A, mac_pua:0xF89D), // PHINTHU |
| 100 | PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000), |
| 101 | ]; |
| 102 | |
| 103 | const SDL_MAPPINGS: &[PuaMapping] = &[ |
| 104 | PuaMapping::new(u:0x0E48, win_pua:0xF705, mac_pua:0xF88C), // MAI EK |
| 105 | PuaMapping::new(u:0x0E49, win_pua:0xF706, mac_pua:0xF88F), // MAI THO |
| 106 | PuaMapping::new(u:0x0E4A, win_pua:0xF707, mac_pua:0xF892), // MAI TRI |
| 107 | PuaMapping::new(u:0x0E4B, win_pua:0xF708, mac_pua:0xF895), // MAI CHATTAWA |
| 108 | PuaMapping::new(u:0x0E4C, win_pua:0xF709, mac_pua:0xF898), // THANTHAKHAT |
| 109 | PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000), |
| 110 | ]; |
| 111 | |
| 112 | const SL_MAPPINGS: &[PuaMapping] = &[ |
| 113 | PuaMapping::new(u:0x0E48, win_pua:0xF713, mac_pua:0xF88A), // MAI EK |
| 114 | PuaMapping::new(u:0x0E49, win_pua:0xF714, mac_pua:0xF88D), // MAI THO |
| 115 | PuaMapping::new(u:0x0E4A, win_pua:0xF715, mac_pua:0xF890), // MAI TRI |
| 116 | PuaMapping::new(u:0x0E4B, win_pua:0xF716, mac_pua:0xF893), // MAI CHATTAWA |
| 117 | PuaMapping::new(u:0x0E4C, win_pua:0xF717, mac_pua:0xF896), // THANTHAKHAT |
| 118 | PuaMapping::new(u:0x0E31, win_pua:0xF710, mac_pua:0xF884), // MAI HAN-AKAT |
| 119 | PuaMapping::new(u:0x0E34, win_pua:0xF701, mac_pua:0xF885), // SARA I |
| 120 | PuaMapping::new(u:0x0E35, win_pua:0xF702, mac_pua:0xF886), // SARA II |
| 121 | PuaMapping::new(u:0x0E36, win_pua:0xF703, mac_pua:0xF887), // SARA UE |
| 122 | PuaMapping::new(u:0x0E37, win_pua:0xF704, mac_pua:0xF888), // SARA UEE |
| 123 | PuaMapping::new(u:0x0E47, win_pua:0xF712, mac_pua:0xF889), // MAITAIKHU |
| 124 | PuaMapping::new(u:0x0E4D, win_pua:0xF711, mac_pua:0xF899), // NIKHAHIT |
| 125 | PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000), |
| 126 | ]; |
| 127 | |
| 128 | const RD_MAPPINGS: &[PuaMapping] = &[ |
| 129 | PuaMapping::new(u:0x0E0D, win_pua:0xF70F, mac_pua:0xF89A), // YO YING |
| 130 | PuaMapping::new(u:0x0E10, win_pua:0xF700, mac_pua:0xF89E), // THO THAN |
| 131 | PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000), |
| 132 | ]; |
| 133 | |
| 134 | fn pua_shape(u: u32, action: Action, face: &hb_font_t) -> u32 { |
| 135 | let mappings = match action { |
| 136 | Action::NOP => return u, |
| 137 | Action::SD => SD_MAPPINGS, |
| 138 | Action::SL => SL_MAPPINGS, |
| 139 | Action::SDL => SDL_MAPPINGS, |
| 140 | Action::RD => RD_MAPPINGS, |
| 141 | }; |
| 142 | |
| 143 | for m in mappings { |
| 144 | if m.u as u32 == u { |
| 145 | if face.get_nominal_glyph(m.win_pua as u32).is_some() { |
| 146 | return m.win_pua as u32; |
| 147 | } |
| 148 | |
| 149 | if face.get_nominal_glyph(m.mac_pua as u32).is_some() { |
| 150 | return m.mac_pua as u32; |
| 151 | } |
| 152 | |
| 153 | break; |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | u |
| 158 | } |
| 159 | |
| 160 | #[derive (Clone, Copy)] |
| 161 | enum AboveState { |
| 162 | // Cluster above looks like: |
| 163 | T0, // ⣤ |
| 164 | T1, // ⣼ |
| 165 | T2, // ⣾ |
| 166 | T3, // ⣿ |
| 167 | } |
| 168 | |
| 169 | const ABOVE_START_STATE: &[AboveState] = &[ |
| 170 | AboveState::T0, // NC |
| 171 | AboveState::T1, // AC |
| 172 | AboveState::T0, // RC |
| 173 | AboveState::T0, // DC |
| 174 | AboveState::T3, // NotConsonant |
| 175 | ]; |
| 176 | |
| 177 | #[derive (Clone, Copy)] |
| 178 | struct AboveStateMachineEdge { |
| 179 | action: Action, |
| 180 | next_state: AboveState, |
| 181 | } |
| 182 | |
| 183 | impl AboveStateMachineEdge { |
| 184 | const fn new(action: Action, next_state: AboveState) -> Self { |
| 185 | AboveStateMachineEdge { action, next_state } |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | type ASME = AboveStateMachineEdge; |
| 190 | |
| 191 | const ABOVE_STATE_MACHINE: &[[ASME; 3]] = &[ |
| 192 | // AV BV T |
| 193 | /* T0 */ |
| 194 | [ |
| 195 | ASME::new(Action::NOP, next_state:AboveState::T3), |
| 196 | ASME::new(Action::NOP, next_state:AboveState::T0), |
| 197 | ASME::new(Action::SD, next_state:AboveState::T3), |
| 198 | ], |
| 199 | /* T1 */ |
| 200 | [ |
| 201 | ASME::new(Action::SL, next_state:AboveState::T2), |
| 202 | ASME::new(Action::NOP, next_state:AboveState::T1), |
| 203 | ASME::new(Action::SDL, next_state:AboveState::T2), |
| 204 | ], |
| 205 | /* T2 */ |
| 206 | [ |
| 207 | ASME::new(Action::NOP, next_state:AboveState::T3), |
| 208 | ASME::new(Action::NOP, next_state:AboveState::T2), |
| 209 | ASME::new(Action::SL, next_state:AboveState::T3), |
| 210 | ], |
| 211 | /* T3 */ |
| 212 | [ |
| 213 | ASME::new(Action::NOP, next_state:AboveState::T3), |
| 214 | ASME::new(Action::NOP, next_state:AboveState::T3), |
| 215 | ASME::new(Action::NOP, next_state:AboveState::T3), |
| 216 | ], |
| 217 | ]; |
| 218 | |
| 219 | #[derive (Clone, Copy)] |
| 220 | enum BelowState { |
| 221 | /// No descender. |
| 222 | B0, |
| 223 | /// Removable descender. |
| 224 | B1, |
| 225 | /// Strict descender. |
| 226 | B2, |
| 227 | } |
| 228 | |
| 229 | const BELOW_START_STATE: &[BelowState] = &[ |
| 230 | BelowState::B0, // NC |
| 231 | BelowState::B0, // AC |
| 232 | BelowState::B1, // RC |
| 233 | BelowState::B2, // DC |
| 234 | BelowState::B2, // NotConsonant |
| 235 | ]; |
| 236 | |
| 237 | #[derive (Clone, Copy)] |
| 238 | struct BelowStateMachineEdge { |
| 239 | action: Action, |
| 240 | next_state: BelowState, |
| 241 | } |
| 242 | |
| 243 | impl BelowStateMachineEdge { |
| 244 | const fn new(action: Action, next_state: BelowState) -> Self { |
| 245 | BelowStateMachineEdge { action, next_state } |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | type BSME = BelowStateMachineEdge; |
| 250 | |
| 251 | const BELOW_STATE_MACHINE: &[[BSME; 3]] = &[ |
| 252 | // AV BV T |
| 253 | /* B0 */ |
| 254 | [ |
| 255 | BSME::new(Action::NOP, next_state:BelowState::B0), |
| 256 | BSME::new(Action::NOP, next_state:BelowState::B2), |
| 257 | BSME::new(Action::NOP, next_state:BelowState::B0), |
| 258 | ], |
| 259 | /* B1 */ |
| 260 | [ |
| 261 | BSME::new(Action::NOP, next_state:BelowState::B1), |
| 262 | BSME::new(Action::RD, next_state:BelowState::B2), |
| 263 | BSME::new(Action::NOP, next_state:BelowState::B1), |
| 264 | ], |
| 265 | /* B2 */ |
| 266 | [ |
| 267 | BSME::new(Action::NOP, next_state:BelowState::B2), |
| 268 | BSME::new(Action::SD, next_state:BelowState::B2), |
| 269 | BSME::new(Action::NOP, next_state:BelowState::B2), |
| 270 | ], |
| 271 | ]; |
| 272 | |
| 273 | fn do_pua_shaping(face: &hb_font_t, buffer: &mut hb_buffer_t) { |
| 274 | let mut above_state = ABOVE_START_STATE[Consonant::NotConsonant as usize]; |
| 275 | let mut below_state = BELOW_START_STATE[Consonant::NotConsonant as usize]; |
| 276 | let mut base = 0; |
| 277 | |
| 278 | for i in 0..buffer.len { |
| 279 | let mt = get_mark_type(buffer.info[i].glyph_id); |
| 280 | |
| 281 | if mt == Mark::NotMark { |
| 282 | let ct = get_consonant_type(buffer.info[i].glyph_id); |
| 283 | above_state = ABOVE_START_STATE[ct as usize]; |
| 284 | below_state = BELOW_START_STATE[ct as usize]; |
| 285 | base = i; |
| 286 | continue; |
| 287 | } |
| 288 | |
| 289 | let above_edge = ABOVE_STATE_MACHINE[above_state as usize][mt as usize]; |
| 290 | let below_edge = BELOW_STATE_MACHINE[below_state as usize][mt as usize]; |
| 291 | above_state = above_edge.next_state; |
| 292 | below_state = below_edge.next_state; |
| 293 | |
| 294 | // At least one of the above/below actions is NOP. |
| 295 | let action = if above_edge.action != Action::NOP { |
| 296 | above_edge.action |
| 297 | } else { |
| 298 | below_edge.action |
| 299 | }; |
| 300 | |
| 301 | buffer.unsafe_to_break(Some(base), Some(i)); |
| 302 | if action == Action::RD { |
| 303 | buffer.info[base].glyph_id = pua_shape(buffer.info[base].glyph_id, action, face); |
| 304 | } else { |
| 305 | buffer.info[i].glyph_id = pua_shape(buffer.info[i].glyph_id, action, face); |
| 306 | } |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | // TODO: more tests |
| 311 | fn preprocess_text(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) { |
| 312 | // This function implements the shaping logic documented here: |
| 313 | // |
| 314 | // https://linux.thai.net/~thep/th-otf/shaping.html |
| 315 | // |
| 316 | // The first shaping rule listed there is needed even if the font has Thai |
| 317 | // OpenType tables. The rest do fallback positioning based on PUA codepoints. |
| 318 | // We implement that only if there exist no Thai GSUB in the font. |
| 319 | |
| 320 | // The following is NOT specified in the MS OT Thai spec, however, it seems |
| 321 | // to be what Uniscribe and other engines implement. According to Eric Muller: |
| 322 | // |
| 323 | // When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the |
| 324 | // NIKHAHIT backwards over any above-base marks (0E31, 0E34-0E37, 0E47-0E4E). |
| 325 | // |
| 326 | // <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32> |
| 327 | // |
| 328 | // This reordering is legit only when the NIKHAHIT comes from a SARA AM, not |
| 329 | // when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably |
| 330 | // not what a user wanted, but the rendering is nevertheless nikhahit above |
| 331 | // chattawa. |
| 332 | // |
| 333 | // Same for Lao. |
| 334 | // |
| 335 | // Note: |
| 336 | // |
| 337 | // Uniscribe also does some below-marks reordering. Namely, it positions U+0E3A |
| 338 | // after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A. |
| 339 | // See unicode->modified_combining_class (). Lao does NOT have a U+0E3A |
| 340 | // equivalent. |
| 341 | |
| 342 | // Here are the characters of significance: |
| 343 | // |
| 344 | // Thai Lao |
| 345 | // SARA AM: U+0E33 U+0EB3 |
| 346 | // SARA AA: U+0E32 U+0EB2 |
| 347 | // Nikhahit: U+0E4D U+0ECD |
| 348 | // |
| 349 | // Testing shows that Uniscribe reorder the following marks: |
| 350 | // Thai: <0E31,0E34..0E37,0E47..0E4E> |
| 351 | // Lao: <0EB1,0EB4..0EB7,0EBB,0EC8..0ECD> |
| 352 | // |
| 353 | // Note how the Lao versions are the same as Thai + 0x80. |
| 354 | |
| 355 | // We only get one script at a time, so a script-agnostic implementation |
| 356 | // is adequate here. |
| 357 | #[inline ] |
| 358 | fn is_sara_am(u: u32) -> bool { |
| 359 | (u & !0x0080) == 0x0E33 |
| 360 | } |
| 361 | #[inline ] |
| 362 | fn nikhahit_from_sara_am(u: u32) -> u32 { |
| 363 | u - 0x0E33 + 0x0E4D |
| 364 | } |
| 365 | #[inline ] |
| 366 | fn sara_aa_from_sara_am(u: u32) -> u32 { |
| 367 | u - 1 |
| 368 | } |
| 369 | #[inline ] |
| 370 | fn is_above_base_mark(u: u32) -> bool { |
| 371 | let u = u & !0x0080; |
| 372 | matches!(u, 0x0E34..=0x0E37 | 0x0E47..=0x0E4E | 0x0E31..=0x0E31 | 0x0E3B..=0x0E3B) |
| 373 | } |
| 374 | |
| 375 | buffer.clear_output(); |
| 376 | buffer.idx = 0; |
| 377 | while buffer.idx < buffer.len { |
| 378 | let u = buffer.cur(0).glyph_id; |
| 379 | if !is_sara_am(u) { |
| 380 | buffer.next_glyph(); |
| 381 | continue; |
| 382 | } |
| 383 | |
| 384 | // Is SARA AM. Decompose and reorder. |
| 385 | buffer.output_glyph(nikhahit_from_sara_am(u)); |
| 386 | { |
| 387 | let out_idx = buffer.out_len - 1; |
| 388 | _hb_glyph_info_set_continuation(&mut buffer.out_info_mut()[out_idx]); |
| 389 | } |
| 390 | buffer.replace_glyph(sara_aa_from_sara_am(u)); |
| 391 | |
| 392 | // Make Nikhahit be recognized as a ccc=0 mark when zeroing widths. |
| 393 | let end = buffer.out_len; |
| 394 | _hb_glyph_info_set_general_category( |
| 395 | &mut buffer.out_info_mut()[end - 2], |
| 396 | hb_unicode_general_category_t::NonspacingMark, |
| 397 | ); |
| 398 | |
| 399 | // Ok, let's see... |
| 400 | let mut start = end - 2; |
| 401 | while start > 0 && is_above_base_mark(buffer.out_info()[start - 1].glyph_id) { |
| 402 | start -= 1; |
| 403 | } |
| 404 | |
| 405 | if start + 2 < end { |
| 406 | // Move Nikhahit (end-2) to the beginning |
| 407 | buffer.merge_out_clusters(start, end); |
| 408 | let t = buffer.out_info()[end - 2]; |
| 409 | for i in 0..(end - start - 2) { |
| 410 | buffer.out_info_mut()[i + start + 1] = buffer.out_info()[i + start]; |
| 411 | } |
| 412 | buffer.out_info_mut()[start] = t; |
| 413 | } else { |
| 414 | // Since we decomposed, and NIKHAHIT is combining, merge clusters with the |
| 415 | // previous cluster. |
| 416 | if start != 0 && buffer.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES { |
| 417 | buffer.merge_out_clusters(start - 1, end); |
| 418 | } |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | buffer.sync(); |
| 423 | |
| 424 | // If font has Thai GSUB, we are done. |
| 425 | if plan.script == Some(script::THAI) && !plan.ot_map.found_script(TableIndex::GSUB) { |
| 426 | do_pua_shaping(face, buffer); |
| 427 | } |
| 428 | } |
| 429 | |