1 | use alloc::boxed::Box; |
2 | |
3 | use super::buffer::*; |
4 | use super::ot_map::*; |
5 | use super::ot_shape::*; |
6 | use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_NONE; |
7 | use super::ot_shape_plan::hb_ot_shape_plan_t; |
8 | use super::ot_shaper::*; |
9 | use super::*; |
10 | use crate::BufferFlags; |
11 | |
12 | const LJMO: u8 = 1; |
13 | const VJMO: u8 = 2; |
14 | const TJMO: u8 = 3; |
15 | |
16 | impl hb_glyph_info_t { |
17 | fn hangul_shaping_feature(&self) -> u8 { |
18 | self.ot_shaper_var_u8_auxiliary() |
19 | } |
20 | |
21 | fn set_hangul_shaping_feature(&mut self, feature: u8) { |
22 | self.set_ot_shaper_var_u8_auxiliary(feature) |
23 | } |
24 | } |
25 | |
26 | fn collect_features_hangul(planner: &mut hb_ot_shape_planner_t) { |
27 | planner |
28 | .ot_map |
29 | .add_feature(tag:hb_tag_t::from_bytes(b"ljmo" ), F_NONE, value:1); |
30 | planner |
31 | .ot_map |
32 | .add_feature(tag:hb_tag_t::from_bytes(b"vjmo" ), F_NONE, value:1); |
33 | planner |
34 | .ot_map |
35 | .add_feature(tag:hb_tag_t::from_bytes(b"tjmo" ), F_NONE, value:1); |
36 | } |
37 | |
38 | fn override_features_hangul(planner: &mut hb_ot_shape_planner_t) { |
39 | // Uniscribe does not apply 'calt' for Hangul, and certain fonts |
40 | // (Noto Sans CJK, Source Sans Han, etc) apply all of jamo lookups |
41 | // in calt, which is not desirable. |
42 | planner |
43 | .ot_map |
44 | .disable_feature(tag:hb_tag_t::from_bytes(b"calt" )); |
45 | } |
46 | |
47 | struct hangul_shape_plan_t { |
48 | mask_array: [hb_mask_t; 4], |
49 | } |
50 | |
51 | fn data_create_hangul(map: &hb_ot_map_t) -> hangul_shape_plan_t { |
52 | hangul_shape_plan_t { |
53 | mask_array: [ |
54 | 0, |
55 | map.get_1_mask(feature_tag:hb_tag_t::from_bytes(b"ljmo" )), |
56 | map.get_1_mask(feature_tag:hb_tag_t::from_bytes(b"vjmo" )), |
57 | map.get_1_mask(feature_tag:hb_tag_t::from_bytes(b"tjmo" )), |
58 | ], |
59 | } |
60 | } |
61 | |
62 | const L_BASE: u32 = 0x1100; |
63 | const V_BASE: u32 = 0x1161; |
64 | const T_BASE: u32 = 0x11A7; |
65 | const L_COUNT: u32 = 19; |
66 | const V_COUNT: u32 = 21; |
67 | const T_COUNT: u32 = 28; |
68 | const N_COUNT: u32 = V_COUNT * T_COUNT; |
69 | const S_COUNT: u32 = L_COUNT * N_COUNT; |
70 | const S_BASE: u32 = 0xAC00; |
71 | |
72 | fn is_combining_l(u: u32) -> bool { |
73 | (L_BASE..=L_BASE + L_COUNT - 1).contains(&u) |
74 | } |
75 | |
76 | fn is_combining_v(u: u32) -> bool { |
77 | (V_BASE..=V_BASE + V_COUNT - 1).contains(&u) |
78 | } |
79 | |
80 | fn is_combining_t(u: u32) -> bool { |
81 | (T_BASE + 1..=T_BASE + T_COUNT - 1).contains(&u) |
82 | } |
83 | |
84 | fn is_combined_s(u: u32) -> bool { |
85 | (S_BASE..=S_BASE + S_COUNT - 1).contains(&u) |
86 | } |
87 | |
88 | fn is_l(u: u32) -> bool { |
89 | (0x1100..=0x115F).contains(&u) || (0xA960..=0xA97C).contains(&u) |
90 | } |
91 | |
92 | fn is_v(u: u32) -> bool { |
93 | (0x1160..=0x11A7).contains(&u) || (0xD7B0..=0xD7C6).contains(&u) |
94 | } |
95 | |
96 | fn is_t(u: u32) -> bool { |
97 | (0x11A8..=0x11FF).contains(&u) || (0xD7CB..=0xD7FB).contains(&u) |
98 | } |
99 | |
100 | fn is_hangul_tone(u: u32) -> bool { |
101 | (0x302E..=0x302F).contains(&u) |
102 | } |
103 | |
104 | fn is_zero_width_char(face: &hb_font_t, c: char) -> bool { |
105 | if let Some(glyph: GlyphId) = face.get_nominal_glyph(c as u32) { |
106 | face.glyph_h_advance(glyph) == 0 |
107 | } else { |
108 | false |
109 | } |
110 | } |
111 | |
112 | fn preprocess_text_hangul(_: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) { |
113 | // Hangul syllables come in two shapes: LV, and LVT. Of those: |
114 | // |
115 | // - LV can be precomposed, or decomposed. Lets call those |
116 | // <LV> and <L,V>, |
117 | // - LVT can be fully precomposed, partially precomposed, or |
118 | // fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. |
119 | // |
120 | // The composition / decomposition is mechanical. However, not |
121 | // all <L,V> sequences compose, and not all <LV,T> sequences |
122 | // compose. |
123 | // |
124 | // Here are the specifics: |
125 | // |
126 | // - <L>: U+1100..115F, U+A960..A97F |
127 | // - <V>: U+1160..11A7, U+D7B0..D7C7 |
128 | // - <T>: U+11A8..11FF, U+D7CB..D7FB |
129 | // |
130 | // - Only the <L,V> sequences for some of the U+11xx ranges combine. |
131 | // - Only <LV,T> sequences for some of the Ts in U+11xx range combine. |
132 | // |
133 | // Here is what we want to accomplish in this shaper: |
134 | // |
135 | // - If the whole syllable can be precomposed, do that, |
136 | // - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features. |
137 | // - If a valid syllable is followed by a Hangul tone mark, reorder the tone |
138 | // mark to precede the whole syllable - unless it is a zero-width glyph, in |
139 | // which case we leave it untouched, assuming it's designed to overstrike. |
140 | // |
141 | // That is, of the different possible syllables: |
142 | // |
143 | // <L> |
144 | // <L,V> |
145 | // <L,V,T> |
146 | // <LV> |
147 | // <LVT> |
148 | // <LV, T> |
149 | // |
150 | // - <L> needs no work. |
151 | // |
152 | // - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we |
153 | // should fully decompose them if font supports. |
154 | // |
155 | // - <L,V> and <L,V,T> we should compose if the whole thing can be composed. |
156 | // |
157 | // - <LV,T> we should compose if the whole thing can be composed, otherwise we should |
158 | // decompose. |
159 | |
160 | buffer.clear_output(); |
161 | // Extent of most recently seen syllable; valid only if start < end |
162 | let mut start = 0; |
163 | let mut end = 0; |
164 | buffer.idx = 0; |
165 | while buffer.idx < buffer.len { |
166 | let u = buffer.cur(0).glyph_id; |
167 | let c = buffer.cur(0).as_char(); |
168 | |
169 | if is_hangul_tone(u) { |
170 | // We could cache the width of the tone marks and the existence of dotted-circle, |
171 | // but the use of the Hangul tone mark characters seems to be rare enough that |
172 | // I didn't bother for now. |
173 | if start < end && end == buffer.out_len { |
174 | // Tone mark follows a valid syllable; move it in front, unless it's zero width. |
175 | buffer.unsafe_to_break_from_outbuffer(Some(start), Some(buffer.idx)); |
176 | buffer.next_glyph(); |
177 | if !is_zero_width_char(face, c) { |
178 | buffer.merge_out_clusters(start, end + 1); |
179 | let out_info = buffer.out_info_mut(); |
180 | let tone = out_info[end]; |
181 | for i in (0..end - start).rev() { |
182 | out_info[i + start + 1] = out_info[i + start]; |
183 | } |
184 | out_info[start] = tone; |
185 | } |
186 | } else { |
187 | // No valid syllable as base for tone mark; try to insert dotted circle. |
188 | if !buffer |
189 | .flags |
190 | .contains(BufferFlags::DO_NOT_INSERT_DOTTED_CIRCLE) |
191 | && face.has_glyph(0x25CC) |
192 | { |
193 | let mut chars = [0; 2]; |
194 | if !is_zero_width_char(face, c) { |
195 | chars[0] = u; |
196 | chars[1] = 0x25CC; |
197 | } else { |
198 | chars[0] = 0x25CC; |
199 | chars[1] = u; |
200 | } |
201 | |
202 | buffer.replace_glyphs(1, 2, &chars); |
203 | } else { |
204 | // No dotted circle available in the font; just leave tone mark untouched. |
205 | buffer.next_glyph(); |
206 | } |
207 | } |
208 | |
209 | start = buffer.out_len; |
210 | end = buffer.out_len; |
211 | continue; |
212 | } |
213 | |
214 | // Remember current position as a potential syllable start; |
215 | // will only be used if we set end to a later position. |
216 | start = buffer.out_len; |
217 | |
218 | if is_l(u) && buffer.idx + 1 < buffer.len { |
219 | let l = u; |
220 | let v = buffer.cur(1).glyph_id; |
221 | if is_v(v) { |
222 | // Have <L,V> or <L,V,T>. |
223 | let mut t = 0; |
224 | let mut tindex = 0; |
225 | if buffer.idx + 2 < buffer.len { |
226 | t = buffer.cur(2).glyph_id; |
227 | if is_t(t) { |
228 | // Only used if isCombiningT (t); otherwise invalid. |
229 | tindex = t - T_BASE; |
230 | } else { |
231 | // The next character was not a trailing jamo. |
232 | t = 0; |
233 | } |
234 | } |
235 | |
236 | let offset = if t != 0 { 3 } else { 2 }; |
237 | buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + offset)); |
238 | |
239 | // We've got a syllable <L,V,T?>; see if it can potentially be composed. |
240 | if is_combining_l(l) && is_combining_v(v) && (t == 0 || is_combining_t(t)) { |
241 | // Try to compose; if this succeeds, end is set to start+1. |
242 | let s = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT + tindex; |
243 | if face.has_glyph(s) { |
244 | let n = if t != 0 { 3 } else { 2 }; |
245 | buffer.replace_glyphs(n, 1, &[s]); |
246 | end = start + 1; |
247 | continue; |
248 | } |
249 | } |
250 | |
251 | // We didn't compose, either because it's an Old Hangul syllable without a |
252 | // precomposed character in Unicode, or because the font didn't support the |
253 | // necessary precomposed glyph. |
254 | // Set jamo features on the individual glyphs, and advance past them. |
255 | buffer.cur_mut(0).set_hangul_shaping_feature(LJMO); |
256 | buffer.next_glyph(); |
257 | buffer.cur_mut(0).set_hangul_shaping_feature(VJMO); |
258 | buffer.next_glyph(); |
259 | if t != 0 { |
260 | buffer.cur_mut(0).set_hangul_shaping_feature(TJMO); |
261 | buffer.next_glyph(); |
262 | end = start + 3; |
263 | } else { |
264 | end = start + 2; |
265 | } |
266 | |
267 | if buffer.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES { |
268 | buffer.merge_out_clusters(start, end); |
269 | } |
270 | |
271 | continue; |
272 | } |
273 | } else if is_combined_s(u) { |
274 | // Have <LV>, <LVT>, or <LV,T> |
275 | let s = u; |
276 | let has_glyph = face.has_glyph(s); |
277 | |
278 | let lindex = (s - S_BASE) / N_COUNT; |
279 | let nindex = (s - S_BASE) % N_COUNT; |
280 | let vindex = nindex / T_COUNT; |
281 | let tindex = nindex % T_COUNT; |
282 | |
283 | if tindex == 0 && buffer.idx + 1 < buffer.len && is_combining_t(buffer.cur(1).glyph_id) |
284 | { |
285 | // <LV,T>, try to combine. |
286 | let new_tindex = buffer.cur(1).glyph_id - T_BASE; |
287 | let new_s = s + new_tindex; |
288 | |
289 | if face.has_glyph(new_s) { |
290 | buffer.replace_glyphs(2, 1, &[new_s]); |
291 | end = start + 1; |
292 | continue; |
293 | } else { |
294 | // Mark unsafe between LV and T. |
295 | buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); |
296 | } |
297 | } |
298 | |
299 | // Otherwise, decompose if font doesn't support <LV> or <LVT>, |
300 | // or if having non-combining <LV,T>. Note that we already handled |
301 | // combining <LV,T> above. |
302 | if !has_glyph |
303 | || (tindex == 0 && buffer.idx + 1 < buffer.len && is_t(buffer.cur(1).glyph_id)) |
304 | { |
305 | let decomposed = [L_BASE + lindex, V_BASE + vindex, T_BASE + tindex]; |
306 | if face.has_glyph(decomposed[0]) |
307 | && face.has_glyph(decomposed[1]) |
308 | && (tindex == 0 || face.has_glyph(decomposed[2])) |
309 | { |
310 | let mut s_len = if tindex != 0 { 3 } else { 2 }; |
311 | buffer.replace_glyphs(1, s_len, &decomposed); |
312 | |
313 | // If we decomposed an LV because of a non-combining T following, |
314 | // we want to include this T in the syllable. |
315 | if has_glyph && tindex == 0 { |
316 | buffer.next_glyph(); |
317 | s_len += 1; |
318 | } |
319 | |
320 | // We decomposed S: apply jamo features to the individual glyphs |
321 | // that are now in `buffer.out_info`. |
322 | end = start + s_len; |
323 | |
324 | buffer.out_info_mut()[start + 0].set_hangul_shaping_feature(LJMO); |
325 | buffer.out_info_mut()[start + 1].set_hangul_shaping_feature(VJMO); |
326 | if start + 2 < end { |
327 | buffer.out_info_mut()[start + 2].set_hangul_shaping_feature(TJMO); |
328 | } |
329 | |
330 | if buffer.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES { |
331 | buffer.merge_out_clusters(start, end); |
332 | } |
333 | |
334 | continue; |
335 | } else if tindex == 0 && buffer.idx + 1 > buffer.len && is_t(buffer.cur(1).glyph_id) |
336 | { |
337 | // Mark unsafe between LV and T. |
338 | buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); |
339 | } |
340 | } |
341 | |
342 | if has_glyph { |
343 | // We didn't decompose the S, so just advance past it. |
344 | end = start + 1; |
345 | buffer.next_glyph(); |
346 | continue; |
347 | } |
348 | } |
349 | |
350 | // Didn't find a recognizable syllable, so we leave end <= start; |
351 | // this will prevent tone-mark reordering happening. |
352 | buffer.next_glyph(); |
353 | } |
354 | |
355 | buffer.sync(); |
356 | } |
357 | |
358 | fn setup_masks_hangul(plan: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) { |
359 | let hangul_plan: &hangul_shape_plan_t = plan.data::<hangul_shape_plan_t>(); |
360 | for info: &mut hb_glyph_info_t in buffer.info_slice_mut() { |
361 | info.mask |= hangul_plan.mask_array[info.hangul_shaping_feature() as usize]; |
362 | } |
363 | } |
364 | |
365 | pub const HANGUL_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t { |
366 | collect_features: Some(collect_features_hangul), |
367 | override_features: Some(override_features_hangul), |
368 | create_data: Some(|plan: &hb_ot_shape_plan_t| Box::new(data_create_hangul(&plan.ot_map))), |
369 | preprocess_text: Some(preprocess_text_hangul), |
370 | postprocess_glyphs: None, |
371 | normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_NONE, |
372 | decompose: None, |
373 | compose: None, |
374 | setup_masks: Some(setup_masks_hangul), |
375 | gpos_tag: None, |
376 | reorder_marks: None, |
377 | zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, |
378 | fallback_position: false, |
379 | }; |
380 | |