| 1 | use alloc::boxed::Box; |
| 2 | |
| 3 | use super::buffer::*; |
| 4 | use super::ot_map::*; |
| 5 | use super::ot_shape::*; |
| 6 | use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_NONE; |
| 7 | use super::ot_shape_plan::hb_ot_shape_plan_t; |
| 8 | use super::ot_shaper::*; |
| 9 | use super::*; |
| 10 | use crate::BufferFlags; |
| 11 | |
| 12 | const LJMO: u8 = 1; |
| 13 | const VJMO: u8 = 2; |
| 14 | const TJMO: u8 = 3; |
| 15 | |
| 16 | impl hb_glyph_info_t { |
| 17 | fn hangul_shaping_feature(&self) -> u8 { |
| 18 | self.ot_shaper_var_u8_auxiliary() |
| 19 | } |
| 20 | |
| 21 | fn set_hangul_shaping_feature(&mut self, feature: u8) { |
| 22 | self.set_ot_shaper_var_u8_auxiliary(feature) |
| 23 | } |
| 24 | } |
| 25 | |
| 26 | fn collect_features_hangul(planner: &mut hb_ot_shape_planner_t) { |
| 27 | planner |
| 28 | .ot_map |
| 29 | .add_feature(tag:hb_tag_t::from_bytes(b"ljmo" ), F_NONE, value:1); |
| 30 | planner |
| 31 | .ot_map |
| 32 | .add_feature(tag:hb_tag_t::from_bytes(b"vjmo" ), F_NONE, value:1); |
| 33 | planner |
| 34 | .ot_map |
| 35 | .add_feature(tag:hb_tag_t::from_bytes(b"tjmo" ), F_NONE, value:1); |
| 36 | } |
| 37 | |
| 38 | fn override_features_hangul(planner: &mut hb_ot_shape_planner_t) { |
| 39 | // Uniscribe does not apply 'calt' for Hangul, and certain fonts |
| 40 | // (Noto Sans CJK, Source Sans Han, etc) apply all of jamo lookups |
| 41 | // in calt, which is not desirable. |
| 42 | planner |
| 43 | .ot_map |
| 44 | .disable_feature(tag:hb_tag_t::from_bytes(b"calt" )); |
| 45 | } |
| 46 | |
| 47 | struct hangul_shape_plan_t { |
| 48 | mask_array: [hb_mask_t; 4], |
| 49 | } |
| 50 | |
| 51 | fn data_create_hangul(map: &hb_ot_map_t) -> hangul_shape_plan_t { |
| 52 | hangul_shape_plan_t { |
| 53 | mask_array: [ |
| 54 | 0, |
| 55 | map.get_1_mask(feature_tag:hb_tag_t::from_bytes(b"ljmo" )), |
| 56 | map.get_1_mask(feature_tag:hb_tag_t::from_bytes(b"vjmo" )), |
| 57 | map.get_1_mask(feature_tag:hb_tag_t::from_bytes(b"tjmo" )), |
| 58 | ], |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | const L_BASE: u32 = 0x1100; |
| 63 | const V_BASE: u32 = 0x1161; |
| 64 | const T_BASE: u32 = 0x11A7; |
| 65 | const L_COUNT: u32 = 19; |
| 66 | const V_COUNT: u32 = 21; |
| 67 | const T_COUNT: u32 = 28; |
| 68 | const N_COUNT: u32 = V_COUNT * T_COUNT; |
| 69 | const S_COUNT: u32 = L_COUNT * N_COUNT; |
| 70 | const S_BASE: u32 = 0xAC00; |
| 71 | |
| 72 | fn is_combining_l(u: u32) -> bool { |
| 73 | (L_BASE..=L_BASE + L_COUNT - 1).contains(&u) |
| 74 | } |
| 75 | |
| 76 | fn is_combining_v(u: u32) -> bool { |
| 77 | (V_BASE..=V_BASE + V_COUNT - 1).contains(&u) |
| 78 | } |
| 79 | |
| 80 | fn is_combining_t(u: u32) -> bool { |
| 81 | (T_BASE + 1..=T_BASE + T_COUNT - 1).contains(&u) |
| 82 | } |
| 83 | |
| 84 | fn is_combined_s(u: u32) -> bool { |
| 85 | (S_BASE..=S_BASE + S_COUNT - 1).contains(&u) |
| 86 | } |
| 87 | |
| 88 | fn is_l(u: u32) -> bool { |
| 89 | (0x1100..=0x115F).contains(&u) || (0xA960..=0xA97C).contains(&u) |
| 90 | } |
| 91 | |
| 92 | fn is_v(u: u32) -> bool { |
| 93 | (0x1160..=0x11A7).contains(&u) || (0xD7B0..=0xD7C6).contains(&u) |
| 94 | } |
| 95 | |
| 96 | fn is_t(u: u32) -> bool { |
| 97 | (0x11A8..=0x11FF).contains(&u) || (0xD7CB..=0xD7FB).contains(&u) |
| 98 | } |
| 99 | |
| 100 | fn is_hangul_tone(u: u32) -> bool { |
| 101 | (0x302E..=0x302F).contains(&u) |
| 102 | } |
| 103 | |
| 104 | fn is_zero_width_char(face: &hb_font_t, c: char) -> bool { |
| 105 | if let Some(glyph: GlyphId) = face.get_nominal_glyph(c as u32) { |
| 106 | face.glyph_h_advance(glyph) == 0 |
| 107 | } else { |
| 108 | false |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | fn preprocess_text_hangul(_: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) { |
| 113 | // Hangul syllables come in two shapes: LV, and LVT. Of those: |
| 114 | // |
| 115 | // - LV can be precomposed, or decomposed. Lets call those |
| 116 | // <LV> and <L,V>, |
| 117 | // - LVT can be fully precomposed, partially precomposed, or |
| 118 | // fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. |
| 119 | // |
| 120 | // The composition / decomposition is mechanical. However, not |
| 121 | // all <L,V> sequences compose, and not all <LV,T> sequences |
| 122 | // compose. |
| 123 | // |
| 124 | // Here are the specifics: |
| 125 | // |
| 126 | // - <L>: U+1100..115F, U+A960..A97F |
| 127 | // - <V>: U+1160..11A7, U+D7B0..D7C7 |
| 128 | // - <T>: U+11A8..11FF, U+D7CB..D7FB |
| 129 | // |
| 130 | // - Only the <L,V> sequences for some of the U+11xx ranges combine. |
| 131 | // - Only <LV,T> sequences for some of the Ts in U+11xx range combine. |
| 132 | // |
| 133 | // Here is what we want to accomplish in this shaper: |
| 134 | // |
| 135 | // - If the whole syllable can be precomposed, do that, |
| 136 | // - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features. |
| 137 | // - If a valid syllable is followed by a Hangul tone mark, reorder the tone |
| 138 | // mark to precede the whole syllable - unless it is a zero-width glyph, in |
| 139 | // which case we leave it untouched, assuming it's designed to overstrike. |
| 140 | // |
| 141 | // That is, of the different possible syllables: |
| 142 | // |
| 143 | // <L> |
| 144 | // <L,V> |
| 145 | // <L,V,T> |
| 146 | // <LV> |
| 147 | // <LVT> |
| 148 | // <LV, T> |
| 149 | // |
| 150 | // - <L> needs no work. |
| 151 | // |
| 152 | // - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we |
| 153 | // should fully decompose them if font supports. |
| 154 | // |
| 155 | // - <L,V> and <L,V,T> we should compose if the whole thing can be composed. |
| 156 | // |
| 157 | // - <LV,T> we should compose if the whole thing can be composed, otherwise we should |
| 158 | // decompose. |
| 159 | |
| 160 | buffer.clear_output(); |
| 161 | // Extent of most recently seen syllable; valid only if start < end |
| 162 | let mut start = 0; |
| 163 | let mut end = 0; |
| 164 | buffer.idx = 0; |
| 165 | while buffer.idx < buffer.len { |
| 166 | let u = buffer.cur(0).glyph_id; |
| 167 | let c = buffer.cur(0).as_char(); |
| 168 | |
| 169 | if is_hangul_tone(u) { |
| 170 | // We could cache the width of the tone marks and the existence of dotted-circle, |
| 171 | // but the use of the Hangul tone mark characters seems to be rare enough that |
| 172 | // I didn't bother for now. |
| 173 | if start < end && end == buffer.out_len { |
| 174 | // Tone mark follows a valid syllable; move it in front, unless it's zero width. |
| 175 | buffer.unsafe_to_break_from_outbuffer(Some(start), Some(buffer.idx)); |
| 176 | buffer.next_glyph(); |
| 177 | if !is_zero_width_char(face, c) { |
| 178 | buffer.merge_out_clusters(start, end + 1); |
| 179 | let out_info = buffer.out_info_mut(); |
| 180 | let tone = out_info[end]; |
| 181 | for i in (0..end - start).rev() { |
| 182 | out_info[i + start + 1] = out_info[i + start]; |
| 183 | } |
| 184 | out_info[start] = tone; |
| 185 | } |
| 186 | } else { |
| 187 | // No valid syllable as base for tone mark; try to insert dotted circle. |
| 188 | if !buffer |
| 189 | .flags |
| 190 | .contains(BufferFlags::DO_NOT_INSERT_DOTTED_CIRCLE) |
| 191 | && face.has_glyph(0x25CC) |
| 192 | { |
| 193 | let mut chars = [0; 2]; |
| 194 | if !is_zero_width_char(face, c) { |
| 195 | chars[0] = u; |
| 196 | chars[1] = 0x25CC; |
| 197 | } else { |
| 198 | chars[0] = 0x25CC; |
| 199 | chars[1] = u; |
| 200 | } |
| 201 | |
| 202 | buffer.replace_glyphs(1, 2, &chars); |
| 203 | } else { |
| 204 | // No dotted circle available in the font; just leave tone mark untouched. |
| 205 | buffer.next_glyph(); |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | start = buffer.out_len; |
| 210 | end = buffer.out_len; |
| 211 | continue; |
| 212 | } |
| 213 | |
| 214 | // Remember current position as a potential syllable start; |
| 215 | // will only be used if we set end to a later position. |
| 216 | start = buffer.out_len; |
| 217 | |
| 218 | if is_l(u) && buffer.idx + 1 < buffer.len { |
| 219 | let l = u; |
| 220 | let v = buffer.cur(1).glyph_id; |
| 221 | if is_v(v) { |
| 222 | // Have <L,V> or <L,V,T>. |
| 223 | let mut t = 0; |
| 224 | let mut tindex = 0; |
| 225 | if buffer.idx + 2 < buffer.len { |
| 226 | t = buffer.cur(2).glyph_id; |
| 227 | if is_t(t) { |
| 228 | // Only used if isCombiningT (t); otherwise invalid. |
| 229 | tindex = t - T_BASE; |
| 230 | } else { |
| 231 | // The next character was not a trailing jamo. |
| 232 | t = 0; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | let offset = if t != 0 { 3 } else { 2 }; |
| 237 | buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + offset)); |
| 238 | |
| 239 | // We've got a syllable <L,V,T?>; see if it can potentially be composed. |
| 240 | if is_combining_l(l) && is_combining_v(v) && (t == 0 || is_combining_t(t)) { |
| 241 | // Try to compose; if this succeeds, end is set to start+1. |
| 242 | let s = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT + tindex; |
| 243 | if face.has_glyph(s) { |
| 244 | let n = if t != 0 { 3 } else { 2 }; |
| 245 | buffer.replace_glyphs(n, 1, &[s]); |
| 246 | end = start + 1; |
| 247 | continue; |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | // We didn't compose, either because it's an Old Hangul syllable without a |
| 252 | // precomposed character in Unicode, or because the font didn't support the |
| 253 | // necessary precomposed glyph. |
| 254 | // Set jamo features on the individual glyphs, and advance past them. |
| 255 | buffer.cur_mut(0).set_hangul_shaping_feature(LJMO); |
| 256 | buffer.next_glyph(); |
| 257 | buffer.cur_mut(0).set_hangul_shaping_feature(VJMO); |
| 258 | buffer.next_glyph(); |
| 259 | if t != 0 { |
| 260 | buffer.cur_mut(0).set_hangul_shaping_feature(TJMO); |
| 261 | buffer.next_glyph(); |
| 262 | end = start + 3; |
| 263 | } else { |
| 264 | end = start + 2; |
| 265 | } |
| 266 | |
| 267 | if buffer.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES { |
| 268 | buffer.merge_out_clusters(start, end); |
| 269 | } |
| 270 | |
| 271 | continue; |
| 272 | } |
| 273 | } else if is_combined_s(u) { |
| 274 | // Have <LV>, <LVT>, or <LV,T> |
| 275 | let s = u; |
| 276 | let has_glyph = face.has_glyph(s); |
| 277 | |
| 278 | let lindex = (s - S_BASE) / N_COUNT; |
| 279 | let nindex = (s - S_BASE) % N_COUNT; |
| 280 | let vindex = nindex / T_COUNT; |
| 281 | let tindex = nindex % T_COUNT; |
| 282 | |
| 283 | if tindex == 0 && buffer.idx + 1 < buffer.len && is_combining_t(buffer.cur(1).glyph_id) |
| 284 | { |
| 285 | // <LV,T>, try to combine. |
| 286 | let new_tindex = buffer.cur(1).glyph_id - T_BASE; |
| 287 | let new_s = s + new_tindex; |
| 288 | |
| 289 | if face.has_glyph(new_s) { |
| 290 | buffer.replace_glyphs(2, 1, &[new_s]); |
| 291 | end = start + 1; |
| 292 | continue; |
| 293 | } else { |
| 294 | // Mark unsafe between LV and T. |
| 295 | buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); |
| 296 | } |
| 297 | } |
| 298 | |
| 299 | // Otherwise, decompose if font doesn't support <LV> or <LVT>, |
| 300 | // or if having non-combining <LV,T>. Note that we already handled |
| 301 | // combining <LV,T> above. |
| 302 | if !has_glyph |
| 303 | || (tindex == 0 && buffer.idx + 1 < buffer.len && is_t(buffer.cur(1).glyph_id)) |
| 304 | { |
| 305 | let decomposed = [L_BASE + lindex, V_BASE + vindex, T_BASE + tindex]; |
| 306 | if face.has_glyph(decomposed[0]) |
| 307 | && face.has_glyph(decomposed[1]) |
| 308 | && (tindex == 0 || face.has_glyph(decomposed[2])) |
| 309 | { |
| 310 | let mut s_len = if tindex != 0 { 3 } else { 2 }; |
| 311 | buffer.replace_glyphs(1, s_len, &decomposed); |
| 312 | |
| 313 | // If we decomposed an LV because of a non-combining T following, |
| 314 | // we want to include this T in the syllable. |
| 315 | if has_glyph && tindex == 0 { |
| 316 | buffer.next_glyph(); |
| 317 | s_len += 1; |
| 318 | } |
| 319 | |
| 320 | // We decomposed S: apply jamo features to the individual glyphs |
| 321 | // that are now in `buffer.out_info`. |
| 322 | end = start + s_len; |
| 323 | |
| 324 | buffer.out_info_mut()[start + 0].set_hangul_shaping_feature(LJMO); |
| 325 | buffer.out_info_mut()[start + 1].set_hangul_shaping_feature(VJMO); |
| 326 | if start + 2 < end { |
| 327 | buffer.out_info_mut()[start + 2].set_hangul_shaping_feature(TJMO); |
| 328 | } |
| 329 | |
| 330 | if buffer.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES { |
| 331 | buffer.merge_out_clusters(start, end); |
| 332 | } |
| 333 | |
| 334 | continue; |
| 335 | } else if tindex == 0 && buffer.idx + 1 > buffer.len && is_t(buffer.cur(1).glyph_id) |
| 336 | { |
| 337 | // Mark unsafe between LV and T. |
| 338 | buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | if has_glyph { |
| 343 | // We didn't decompose the S, so just advance past it. |
| 344 | end = start + 1; |
| 345 | buffer.next_glyph(); |
| 346 | continue; |
| 347 | } |
| 348 | } |
| 349 | |
| 350 | // Didn't find a recognizable syllable, so we leave end <= start; |
| 351 | // this will prevent tone-mark reordering happening. |
| 352 | buffer.next_glyph(); |
| 353 | } |
| 354 | |
| 355 | buffer.sync(); |
| 356 | } |
| 357 | |
| 358 | fn setup_masks_hangul(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) { |
| 359 | let hangul_plan: &hangul_shape_plan_t = plan.data::<hangul_shape_plan_t>(); |
| 360 | for info: &mut hb_glyph_info_t in buffer.info_slice_mut() { |
| 361 | info.mask |= hangul_plan.mask_array[info.hangul_shaping_feature() as usize]; |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | pub const HANGUL_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t { |
| 366 | collect_features: Some(collect_features_hangul), |
| 367 | override_features: Some(override_features_hangul), |
| 368 | create_data: Some(|plan: &hb_ot_shape_plan_t| Box::new(data_create_hangul(&plan.ot_map))), |
| 369 | preprocess_text: Some(preprocess_text_hangul), |
| 370 | postprocess_glyphs: None, |
| 371 | normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_NONE, |
| 372 | decompose: None, |
| 373 | compose: None, |
| 374 | setup_masks: Some(setup_masks_hangul), |
| 375 | gpos_tag: None, |
| 376 | reorder_marks: None, |
| 377 | zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, |
| 378 | fallback_position: false, |
| 379 | }; |
| 380 | |