1use alloc::boxed::Box;
2
3use super::*;
4use crate::buffer::{Buffer, BufferClusterLevel, BufferFlags};
5use crate::ot::{feature, FeatureFlags, Map};
6use crate::plan::{ShapePlan, ShapePlanner};
7use crate::{Face, GlyphInfo, Mask};
8
9pub const HANGUL_SHAPER: ComplexShaper = ComplexShaper {
10 collect_features: Some(collect_features),
11 override_features: Some(override_features),
12 create_data: Some(|plan: &ShapePlan| Box::new(HangulShapePlan::new(&plan.ot_map))),
13 preprocess_text: Some(preprocess_text),
14 postprocess_glyphs: None,
15 normalization_mode: None,
16 decompose: None,
17 compose: None,
18 setup_masks: Some(setup_masks),
19 gpos_tag: None,
20 reorder_marks: None,
21 zero_width_marks: None,
22 fallback_position: false,
23};
24
25const L_BASE: u32 = 0x1100;
26const V_BASE: u32 = 0x1161;
27const T_BASE: u32 = 0x11A7;
28const L_COUNT: u32 = 19;
29const V_COUNT: u32 = 21;
30const T_COUNT: u32 = 28;
31const N_COUNT: u32 = V_COUNT * T_COUNT;
32const S_COUNT: u32 = L_COUNT * N_COUNT;
33const S_BASE: u32 = 0xAC00;
34
35const LJMO: u8 = 1;
36const VJMO: u8 = 2;
37const TJMO: u8 = 3;
38
39impl GlyphInfo {
40 fn hangul_shaping_feature(&self) -> u8 {
41 self.complex_var_u8_auxiliary()
42 }
43
44 fn set_hangul_shaping_feature(&mut self, feature: u8) {
45 self.set_complex_var_u8_auxiliary(feature)
46 }
47}
48
49struct HangulShapePlan {
50 mask_array: [Mask; 4],
51}
52
53impl HangulShapePlan {
54 fn new(map: &Map) -> Self {
55 HangulShapePlan {
56 mask_array: [
57 0,
58 map.one_mask(feature_tag:feature::LEADING_JAMO_FORMS),
59 map.one_mask(feature_tag:feature::VOWEL_JAMO_FORMS),
60 map.one_mask(feature_tag:feature::TRAILING_JAMO_FORMS),
61 ],
62 }
63 }
64}
65
66fn collect_features(planner: &mut ShapePlanner) {
67 planner
68 .ot_map
69 .add_feature(tag:feature::LEADING_JAMO_FORMS, flags:FeatureFlags::empty(), value:1);
70 planner
71 .ot_map
72 .add_feature(tag:feature::VOWEL_JAMO_FORMS, flags:FeatureFlags::empty(), value:1);
73 planner
74 .ot_map
75 .add_feature(tag:feature::TRAILING_JAMO_FORMS, flags:FeatureFlags::empty(), value:1);
76}
77
78fn override_features(planner: &mut ShapePlanner) {
79 // Uniscribe does not apply 'calt' for Hangul, and certain fonts
80 // (Noto Sans CJK, Source Sans Han, etc) apply all of jamo lookups
81 // in calt, which is not desirable.
82 planner
83 .ot_map
84 .disable_feature(tag:feature::CONTEXTUAL_ALTERNATES);
85}
86
87fn preprocess_text(_: &ShapePlan, face: &Face, buffer: &mut Buffer) {
88 // Hangul syllables come in two shapes: LV, and LVT. Of those:
89 //
90 // - LV can be precomposed, or decomposed. Lets call those
91 // <LV> and <L,V>,
92 // - LVT can be fully precomposed, partially precomposed, or
93 // fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>.
94 //
95 // The composition / decomposition is mechanical. However, not
96 // all <L,V> sequences compose, and not all <LV,T> sequences
97 // compose.
98 //
99 // Here are the specifics:
100 //
101 // - <L>: U+1100..115F, U+A960..A97F
102 // - <V>: U+1160..11A7, U+D7B0..D7C7
103 // - <T>: U+11A8..11FF, U+D7CB..D7FB
104 //
105 // - Only the <L,V> sequences for some of the U+11xx ranges combine.
106 // - Only <LV,T> sequences for some of the Ts in U+11xx range combine.
107 //
108 // Here is what we want to accomplish in this shaper:
109 //
110 // - If the whole syllable can be precomposed, do that,
111 // - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features.
112 // - If a valid syllable is followed by a Hangul tone mark, reorder the tone
113 // mark to precede the whole syllable - unless it is a zero-width glyph, in
114 // which case we leave it untouched, assuming it's designed to overstrike.
115 //
116 // That is, of the different possible syllables:
117 //
118 // <L>
119 // <L,V>
120 // <L,V,T>
121 // <LV>
122 // <LVT>
123 // <LV, T>
124 //
125 // - <L> needs no work.
126 //
127 // - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we
128 // should fully decompose them if font supports.
129 //
130 // - <L,V> and <L,V,T> we should compose if the whole thing can be composed.
131 //
132 // - <LV,T> we should compose if the whole thing can be composed, otherwise we should
133 // decompose.
134
135 buffer.clear_output();
136 // Extent of most recently seen syllable; valid only if start < end
137 let mut start = 0;
138 let mut end = 0;
139 buffer.idx = 0;
140 while buffer.idx < buffer.len {
141 let u = buffer.cur(0).glyph_id;
142 let c = buffer.cur(0).as_char();
143
144 if is_hangul_tone(u) {
145 // We could cache the width of the tone marks and the existence of dotted-circle,
146 // but the use of the Hangul tone mark characters seems to be rare enough that
147 // I didn't bother for now.
148 if start < end && end == buffer.out_len {
149 // Tone mark follows a valid syllable; move it in front, unless it's zero width.
150 buffer.unsafe_to_break_from_outbuffer(Some(start), Some(buffer.idx));
151 buffer.next_glyph();
152 if !is_zero_width_char(face, c) {
153 buffer.merge_out_clusters(start, end + 1);
154 let out_info = buffer.out_info_mut();
155 let tone = out_info[end];
156 for i in (0..end - start).rev() {
157 out_info[i + start + 1] = out_info[i + start];
158 }
159 out_info[start] = tone;
160 }
161 } else {
162 // No valid syllable as base for tone mark; try to insert dotted circle.
163 if !buffer
164 .flags
165 .contains(BufferFlags::DO_NOT_INSERT_DOTTED_CIRCLE)
166 && face.has_glyph(0x25CC)
167 {
168 let mut chars = [0; 2];
169 if !is_zero_width_char(face, c) {
170 chars[0] = u;
171 chars[1] = 0x25CC;
172 } else {
173 chars[0] = 0x25CC;
174 chars[1] = u;
175 }
176
177 buffer.replace_glyphs(1, 2, &chars);
178 } else {
179 // No dotted circle available in the font; just leave tone mark untouched.
180 buffer.next_glyph();
181 }
182 }
183
184 start = buffer.out_len;
185 end = buffer.out_len;
186 continue;
187 }
188
189 // Remember current position as a potential syllable start;
190 // will only be used if we set end to a later position.
191 start = buffer.out_len;
192
193 if is_l(u) && buffer.idx + 1 < buffer.len {
194 let l = u;
195 let v = buffer.cur(1).glyph_id;
196 if is_v(v) {
197 // Have <L,V> or <L,V,T>.
198 let mut t = 0;
199 let mut tindex = 0;
200 if buffer.idx + 2 < buffer.len {
201 t = buffer.cur(2).glyph_id;
202 if is_t(t) {
203 // Only used if isCombiningT (t); otherwise invalid.
204 tindex = t - T_BASE;
205 } else {
206 // The next character was not a trailing jamo.
207 t = 0;
208 }
209 }
210
211 let offset = if t != 0 { 3 } else { 2 };
212 buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + offset));
213
214 // We've got a syllable <L,V,T?>; see if it can potentially be composed.
215 if is_combining_l(l) && is_combining_v(v) && (t == 0 || is_combining_t(t)) {
216 // Try to compose; if this succeeds, end is set to start+1.
217 let s = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT + tindex;
218 if face.has_glyph(s) {
219 let n = if t != 0 { 3 } else { 2 };
220 buffer.replace_glyphs(n, 1, &[s]);
221 end = start + 1;
222 continue;
223 }
224 }
225
226 // We didn't compose, either because it's an Old Hangul syllable without a
227 // precomposed character in Unicode, or because the font didn't support the
228 // necessary precomposed glyph.
229 // Set jamo features on the individual glyphs, and advance past them.
230 buffer.cur_mut(0).set_hangul_shaping_feature(LJMO);
231 buffer.next_glyph();
232 buffer.cur_mut(0).set_hangul_shaping_feature(VJMO);
233 buffer.next_glyph();
234 if t != 0 {
235 buffer.cur_mut(0).set_hangul_shaping_feature(TJMO);
236 buffer.next_glyph();
237 end = start + 3;
238 } else {
239 end = start + 2;
240 }
241
242 if buffer.cluster_level == BufferClusterLevel::MonotoneGraphemes {
243 buffer.merge_out_clusters(start, end);
244 }
245
246 continue;
247 }
248 } else if is_combined_s(u) {
249 // Have <LV>, <LVT>, or <LV,T>
250 let s = u;
251 let has_glyph = face.has_glyph(s);
252
253 let lindex = (s - S_BASE) / N_COUNT;
254 let nindex = (s - S_BASE) % N_COUNT;
255 let vindex = nindex / T_COUNT;
256 let tindex = nindex % T_COUNT;
257
258 if tindex == 0 && buffer.idx + 1 < buffer.len && is_combining_t(buffer.cur(1).glyph_id)
259 {
260 // <LV,T>, try to combine.
261 let new_tindex = buffer.cur(1).glyph_id - T_BASE;
262 let new_s = s + new_tindex;
263
264 if face.has_glyph(new_s) {
265 buffer.replace_glyphs(2, 1, &[new_s]);
266 end = start + 1;
267 continue;
268 } else {
269 // Mark unsafe between LV and T.
270 buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2));
271 }
272 }
273
274 // Otherwise, decompose if font doesn't support <LV> or <LVT>,
275 // or if having non-combining <LV,T>. Note that we already handled
276 // combining <LV,T> above.
277 if !has_glyph
278 || (tindex == 0 && buffer.idx + 1 < buffer.len && is_t(buffer.cur(1).glyph_id))
279 {
280 let decomposed = [L_BASE + lindex, V_BASE + vindex, T_BASE + tindex];
281 if face.has_glyph(decomposed[0])
282 && face.has_glyph(decomposed[1])
283 && (tindex == 0 || face.has_glyph(decomposed[2]))
284 {
285 let mut s_len = if tindex != 0 { 3 } else { 2 };
286 buffer.replace_glyphs(1, s_len, &decomposed);
287
288 // If we decomposed an LV because of a non-combining T following,
289 // we want to include this T in the syllable.
290 if has_glyph && tindex == 0 {
291 buffer.next_glyph();
292 s_len += 1;
293 }
294
295 // We decomposed S: apply jamo features to the individual glyphs
296 // that are now in `buffer.out_info`.
297 end = start + s_len;
298
299 buffer.out_info_mut()[start + 0].set_hangul_shaping_feature(LJMO);
300 buffer.out_info_mut()[start + 1].set_hangul_shaping_feature(VJMO);
301 if start + 2 < end {
302 buffer.out_info_mut()[start + 2].set_hangul_shaping_feature(TJMO);
303 }
304
305 if buffer.cluster_level == BufferClusterLevel::MonotoneGraphemes {
306 buffer.merge_out_clusters(start, end);
307 }
308
309 continue;
310 } else if tindex == 0 && buffer.idx + 1 > buffer.len && is_t(buffer.cur(1).glyph_id)
311 {
312 // Mark unsafe between LV and T.
313 buffer.unsafe_to_break(Some(buffer.idx), Some(buffer.idx + 2));
314 }
315 }
316
317 if has_glyph {
318 // We didn't decompose the S, so just advance past it.
319 end = start + 1;
320 buffer.next_glyph();
321 continue;
322 }
323 }
324
325 // Didn't find a recognizable syllable, so we leave end <= start;
326 // this will prevent tone-mark reordering happening.
327 buffer.next_glyph();
328 }
329
330 buffer.sync();
331}
332
333fn is_hangul_tone(u: u32) -> bool {
334 (0x302E..=0x302F).contains(&u)
335}
336
337fn is_zero_width_char(face: &Face, c: char) -> bool {
338 if let Some(glyph: GlyphId) = face.glyph_index(c as u32) {
339 face.glyph_h_advance(glyph) == 0
340 } else {
341 false
342 }
343}
344
345fn is_l(u: u32) -> bool {
346 (0x1100..=0x115F).contains(&u) || (0xA960..=0xA97C).contains(&u)
347}
348
349fn is_v(u: u32) -> bool {
350 (0x1160..=0x11A7).contains(&u) || (0xD7B0..=0xD7C6).contains(&u)
351}
352
353fn is_t(u: u32) -> bool {
354 (0x11A8..=0x11FF).contains(&u) || (0xD7CB..=0xD7FB).contains(&u)
355}
356
357fn is_combining_l(u: u32) -> bool {
358 (L_BASE..=L_BASE + L_COUNT - 1).contains(&u)
359}
360
361fn is_combining_v(u: u32) -> bool {
362 (V_BASE..=V_BASE + V_COUNT - 1).contains(&u)
363}
364
365fn is_combining_t(u: u32) -> bool {
366 (T_BASE + 1..=T_BASE + T_COUNT - 1).contains(&u)
367}
368
369fn is_combined_s(u: u32) -> bool {
370 (S_BASE..=S_BASE + S_COUNT - 1).contains(&u)
371}
372
373fn setup_masks(plan: &ShapePlan, _: &Face, buffer: &mut Buffer) {
374 let hangul_plan: &HangulShapePlan = plan.data::<HangulShapePlan>();
375 for info: &mut GlyphInfo in buffer.info_slice_mut() {
376 info.mask |= hangul_plan.mask_array[info.hangul_shaping_feature() as usize];
377 }
378}
379