1 | use alloc::boxed::Box; |
2 | |
3 | use super::*; |
4 | use crate::buffer::{Buffer, BufferClusterLevel, BufferFlags}; |
5 | use crate::ot::{feature, FeatureFlags, Map}; |
6 | use crate::plan::{ShapePlan, ShapePlanner}; |
7 | use crate::{Face, GlyphInfo, Mask}; |
8 | |
9 | pub const HANGUL_SHAPER: ComplexShaper = ComplexShaper { |
10 | collect_features: Some(collect_features), |
11 | override_features: Some(override_features), |
12 | create_data: Some(|plan: &ShapePlan| Box::new(HangulShapePlan::new(&plan.ot_map))), |
13 | preprocess_text: Some(preprocess_text), |
14 | postprocess_glyphs: None, |
15 | normalization_mode: None, |
16 | decompose: None, |
17 | compose: None, |
18 | setup_masks: Some(setup_masks), |
19 | gpos_tag: None, |
20 | reorder_marks: None, |
21 | zero_width_marks: None, |
22 | fallback_position: false, |
23 | }; |
24 | |
25 | const L_BASE: u32 = 0x1100; |
26 | const V_BASE: u32 = 0x1161; |
27 | const T_BASE: u32 = 0x11A7; |
28 | const L_COUNT: u32 = 19; |
29 | const V_COUNT: u32 = 21; |
30 | const T_COUNT: u32 = 28; |
31 | const N_COUNT: u32 = V_COUNT * T_COUNT; |
32 | const S_COUNT: u32 = L_COUNT * N_COUNT; |
33 | const S_BASE: u32 = 0xAC00; |
34 | |
35 | const LJMO: u8 = 1; |
36 | const VJMO: u8 = 2; |
37 | const TJMO: u8 = 3; |
38 | |
39 | impl GlyphInfo { |
40 | fn hangul_shaping_feature(&self) -> u8 { |
41 | self.complex_var_u8_auxiliary() |
42 | } |
43 | |
44 | fn set_hangul_shaping_feature(&mut self, feature: u8) { |
45 | self.set_complex_var_u8_auxiliary(feature) |
46 | } |
47 | } |
48 | |
49 | struct HangulShapePlan { |
50 | mask_array: [Mask; 4], |
51 | } |
52 | |
53 | impl HangulShapePlan { |
54 | fn new(map: &Map) -> Self { |
55 | HangulShapePlan { |
56 | mask_array: [ |
57 | 0, |
58 | map.one_mask(feature_tag:feature::LEADING_JAMO_FORMS), |
59 | map.one_mask(feature_tag:feature::VOWEL_JAMO_FORMS), |
60 | map.one_mask(feature_tag:feature::TRAILING_JAMO_FORMS), |
61 | ], |
62 | } |
63 | } |
64 | } |
65 | |
66 | fn collect_features(planner: &mut ShapePlanner) { |
67 | planner |
68 | .ot_map |
69 | .add_feature(tag:feature::LEADING_JAMO_FORMS, flags:FeatureFlags::empty(), value:1); |
70 | planner |
71 | .ot_map |
72 | .add_feature(tag:feature::VOWEL_JAMO_FORMS, flags:FeatureFlags::empty(), value:1); |
73 | planner |
74 | .ot_map |
75 | .add_feature(tag:feature::TRAILING_JAMO_FORMS, flags:FeatureFlags::empty(), value:1); |
76 | } |
77 | |
78 | fn override_features(planner: &mut ShapePlanner) { |
79 | // Uniscribe does not apply 'calt' for Hangul, and certain fonts |
80 | // (Noto Sans CJK, Source Sans Han, etc) apply all of jamo lookups |
81 | // in calt, which is not desirable. |
82 | planner |
83 | .ot_map |
84 | .disable_feature(tag:feature::CONTEXTUAL_ALTERNATES); |
85 | } |
86 | |
87 | fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
88 | // Hangul syllables come in two shapes: LV, and LVT. Of those: |
89 | // |
90 | // - LV can be precomposed, or decomposed. Lets call those |
91 | // <LV> and <L,V>, |
92 | // - LVT can be fully precomposed, partially precomposed, or |
93 | // fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. |
94 | // |
95 | // The composition / decomposition is mechanical. However, not |
96 | // all <L,V> sequences compose, and not all <LV,T> sequences |
97 | // compose. |
98 | // |
99 | // Here are the specifics: |
100 | // |
101 | // - <L>: U+1100..115F, U+A960..A97F |
102 | // - <V>: U+1160..11A7, U+D7B0..D7C7 |
103 | // - <T>: U+11A8..11FF, U+D7CB..D7FB |
104 | // |
105 | // - Only the <L,V> sequences for some of the U+11xx ranges combine. |
106 | // - Only <LV,T> sequences for some of the Ts in U+11xx range combine. |
107 | // |
108 | // Here is what we want to accomplish in this shaper: |
109 | // |
110 | // - If the whole syllable can be precomposed, do that, |
111 | // - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features. |
112 | // - If a valid syllable is followed by a Hangul tone mark, reorder the tone |
113 | // mark to precede the whole syllable - unless it is a zero-width glyph, in |
114 | // which case we leave it untouched, assuming it's designed to overstrike. |
115 | // |
116 | // That is, of the different possible syllables: |
117 | // |
118 | // <L> |
119 | // <L,V> |
120 | // <L,V,T> |
121 | // <LV> |
122 | // <LVT> |
123 | // <LV, T> |
124 | // |
125 | // - <L> needs no work. |
126 | // |
127 | // - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we |
128 | // should fully decompose them if font supports. |
129 | // |
130 | // - <L,V> and <L,V,T> we should compose if the whole thing can be composed. |
131 | // |
132 | // - <LV,T> we should compose if the whole thing can be composed, otherwise we should |
133 | // decompose. |
134 | |
135 | buffer.clear_output(); |
136 | // Extent of most recently seen syllable; valid only if start < end |
137 | let mut start = 0; |
138 | let mut end = 0; |
139 | buffer.idx = 0; |
140 | while buffer.idx < buffer.len { |
141 | let u = buffer.cur(0).glyph_id; |
142 | let c = buffer.cur(0).as_char(); |
143 | |
144 | if is_hangul_tone(u) { |
145 | // We could cache the width of the tone marks and the existence of dotted-circle, |
146 | // but the use of the Hangul tone mark characters seems to be rare enough that |
147 | // I didn't bother for now. |
148 | if start < end && end == buffer.out_len { |
149 | // Tone mark follows a valid syllable; move it in front, unless it's zero width. |
150 | buffer.unsafe_to_break_from_outbuffer(Some(start), Some(buffer.idx)); |
151 | buffer.next_glyph(); |
152 | if !is_zero_width_char(face, c) { |
153 | buffer.merge_out_clusters(start, end + 1); |
154 | let out_info = buffer.out_info_mut(); |
155 | let tone = out_info[end]; |
156 | for i in (0..end - start).rev() { |
157 | out_info[i + start + 1] = out_info[i + start]; |
158 | } |
159 | out_info[start] = tone; |
160 | } |
161 | } else { |
162 | // No valid syllable as base for tone mark; try to insert dotted circle. |
163 | if !buffer |
164 | .flags |
165 | .contains(BufferFlags::DO_NOT_INSERT_DOTTED_CIRCLE) |
166 | && face.has_glyph(0x25CC) |
167 | { |
168 | let mut chars = [0; 2]; |
169 | if !is_zero_width_char(face, c) { |
170 | chars[0] = u; |
171 | chars[1] = 0x25CC; |
172 | } else { |
173 | chars[0] = 0x25CC; |
174 | chars[1] = u; |
175 | } |
176 | |
177 | buffer.replace_glyphs(1, 2, &chars); |
178 | } else { |
179 | // No dotted circle available in the font; just leave tone mark untouched. |
180 | buffer.next_glyph(); |
181 | } |
182 | } |
183 | |
184 | start = buffer.out_len; |
185 | end = buffer.out_len; |
186 | continue; |
187 | } |
188 | |
189 | // Remember current position as a potential syllable start; |
190 | // will only be used if we set end to a later position. |
191 | start = buffer.out_len; |
192 | |
193 | if is_l(u) && buffer.idx + 1 < buffer.len { |
194 | let l = u; |
195 | let v = buffer.cur(1).glyph_id; |
196 | if is_v(v) { |
197 | // Have <L,V> or <L,V,T>. |
198 | let mut t = 0; |
199 | let mut tindex = 0; |
200 | if buffer.idx + 2 < buffer.len { |
201 | t = buffer.cur(2).glyph_id; |
202 | if is_t(t) { |
203 | // Only used if isCombiningT (t); otherwise invalid. |
204 | tindex = t - T_BASE; |
205 | } else { |
206 | // The next character was not a trailing jamo. |
207 | t = 0; |
208 | } |
209 | } |
210 | |
211 | let offset = if t != 0 { 3 } else { 2 }; |
212 | buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + offset)); |
213 | |
214 | // We've got a syllable <L,V,T?>; see if it can potentially be composed. |
215 | if is_combining_l(l) && is_combining_v(v) && (t == 0 || is_combining_t(t)) { |
216 | // Try to compose; if this succeeds, end is set to start+1. |
217 | let s = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT + tindex; |
218 | if face.has_glyph(s) { |
219 | let n = if t != 0 { 3 } else { 2 }; |
220 | buffer.replace_glyphs(n, 1, &[s]); |
221 | end = start + 1; |
222 | continue; |
223 | } |
224 | } |
225 | |
226 | // We didn't compose, either because it's an Old Hangul syllable without a |
227 | // precomposed character in Unicode, or because the font didn't support the |
228 | // necessary precomposed glyph. |
229 | // Set jamo features on the individual glyphs, and advance past them. |
230 | buffer.cur_mut(0).set_hangul_shaping_feature(LJMO); |
231 | buffer.next_glyph(); |
232 | buffer.cur_mut(0).set_hangul_shaping_feature(VJMO); |
233 | buffer.next_glyph(); |
234 | if t != 0 { |
235 | buffer.cur_mut(0).set_hangul_shaping_feature(TJMO); |
236 | buffer.next_glyph(); |
237 | end = start + 3; |
238 | } else { |
239 | end = start + 2; |
240 | } |
241 | |
242 | if buffer.cluster_level == BufferClusterLevel::MonotoneGraphemes { |
243 | buffer.merge_out_clusters(start, end); |
244 | } |
245 | |
246 | continue; |
247 | } |
248 | } else if is_combined_s(u) { |
249 | // Have <LV>, <LVT>, or <LV,T> |
250 | let s = u; |
251 | let has_glyph = face.has_glyph(s); |
252 | |
253 | let lindex = (s - S_BASE) / N_COUNT; |
254 | let nindex = (s - S_BASE) % N_COUNT; |
255 | let vindex = nindex / T_COUNT; |
256 | let tindex = nindex % T_COUNT; |
257 | |
258 | if tindex == 0 && buffer.idx + 1 < buffer.len && is_combining_t(buffer.cur(1).glyph_id) |
259 | { |
260 | // <LV,T>, try to combine. |
261 | let new_tindex = buffer.cur(1).glyph_id - T_BASE; |
262 | let new_s = s + new_tindex; |
263 | |
264 | if face.has_glyph(new_s) { |
265 | buffer.replace_glyphs(2, 1, &[new_s]); |
266 | end = start + 1; |
267 | continue; |
268 | } else { |
269 | // Mark unsafe between LV and T. |
270 | buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); |
271 | } |
272 | } |
273 | |
274 | // Otherwise, decompose if font doesn't support <LV> or <LVT>, |
275 | // or if having non-combining <LV,T>. Note that we already handled |
276 | // combining <LV,T> above. |
277 | if !has_glyph |
278 | || (tindex == 0 && buffer.idx + 1 < buffer.len && is_t(buffer.cur(1).glyph_id)) |
279 | { |
280 | let decomposed = [L_BASE + lindex, V_BASE + vindex, T_BASE + tindex]; |
281 | if face.has_glyph(decomposed[0]) |
282 | && face.has_glyph(decomposed[1]) |
283 | && (tindex == 0 || face.has_glyph(decomposed[2])) |
284 | { |
285 | let mut s_len = if tindex != 0 { 3 } else { 2 }; |
286 | buffer.replace_glyphs(1, s_len, &decomposed); |
287 | |
288 | // If we decomposed an LV because of a non-combining T following, |
289 | // we want to include this T in the syllable. |
290 | if has_glyph && tindex == 0 { |
291 | buffer.next_glyph(); |
292 | s_len += 1; |
293 | } |
294 | |
295 | // We decomposed S: apply jamo features to the individual glyphs |
296 | // that are now in `buffer.out_info`. |
297 | end = start + s_len; |
298 | |
299 | buffer.out_info_mut()[start + 0].set_hangul_shaping_feature(LJMO); |
300 | buffer.out_info_mut()[start + 1].set_hangul_shaping_feature(VJMO); |
301 | if start + 2 < end { |
302 | buffer.out_info_mut()[start + 2].set_hangul_shaping_feature(TJMO); |
303 | } |
304 | |
305 | if buffer.cluster_level == BufferClusterLevel::MonotoneGraphemes { |
306 | buffer.merge_out_clusters(start, end); |
307 | } |
308 | |
309 | continue; |
310 | } else if tindex == 0 && buffer.idx + 1 > buffer.len && is_t(buffer.cur(1).glyph_id) |
311 | { |
312 | // Mark unsafe between LV and T. |
313 | buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2)); |
314 | } |
315 | } |
316 | |
317 | if has_glyph { |
318 | // We didn't decompose the S, so just advance past it. |
319 | end = start + 1; |
320 | buffer.next_glyph(); |
321 | continue; |
322 | } |
323 | } |
324 | |
325 | // Didn't find a recognizable syllable, so we leave end <= start; |
326 | // this will prevent tone-mark reordering happening. |
327 | buffer.next_glyph(); |
328 | } |
329 | |
330 | buffer.sync(); |
331 | } |
332 | |
333 | fn is_hangul_tone(u: u32) -> bool { |
334 | (0x302E..=0x302F).contains(&u) |
335 | } |
336 | |
337 | fn is_zero_width_char(face: &Face, c: char) -> bool { |
338 | if let Some(glyph: GlyphId) = face.glyph_index(c as u32) { |
339 | face.glyph_h_advance(glyph) == 0 |
340 | } else { |
341 | false |
342 | } |
343 | } |
344 | |
345 | fn is_l(u: u32) -> bool { |
346 | (0x1100..=0x115F).contains(&u) || (0xA960..=0xA97C).contains(&u) |
347 | } |
348 | |
349 | fn is_v(u: u32) -> bool { |
350 | (0x1160..=0x11A7).contains(&u) || (0xD7B0..=0xD7C6).contains(&u) |
351 | } |
352 | |
353 | fn is_t(u: u32) -> bool { |
354 | (0x11A8..=0x11FF).contains(&u) || (0xD7CB..=0xD7FB).contains(&u) |
355 | } |
356 | |
357 | fn is_combining_l(u: u32) -> bool { |
358 | (L_BASE..=L_BASE + L_COUNT - 1).contains(&u) |
359 | } |
360 | |
361 | fn is_combining_v(u: u32) -> bool { |
362 | (V_BASE..=V_BASE + V_COUNT - 1).contains(&u) |
363 | } |
364 | |
365 | fn is_combining_t(u: u32) -> bool { |
366 | (T_BASE + 1..=T_BASE + T_COUNT - 1).contains(&u) |
367 | } |
368 | |
369 | fn is_combined_s(u: u32) -> bool { |
370 | (S_BASE..=S_BASE + S_COUNT - 1).contains(&u) |
371 | } |
372 | |
373 | fn setup_masks(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
374 | let hangul_plan: &HangulShapePlan = plan.data::<HangulShapePlan>(); |
375 | for info: &mut GlyphInfo in buffer.info_slice_mut() { |
376 | info.mask |= hangul_plan.mask_array[info.hangul_shaping_feature() as usize]; |
377 | } |
378 | } |
379 | |