1 | use super::buffer::*; |
2 | use super::ot_layout::*; |
3 | use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_AUTO; |
4 | use super::ot_shape_plan::hb_ot_shape_plan_t; |
5 | use super::ot_shaper::*; |
6 | use super::unicode::hb_unicode_general_category_t; |
7 | use super::{hb_font_t, script}; |
8 | |
9 | pub const THAI_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t { |
10 | collect_features: None, |
11 | override_features: None, |
12 | create_data: None, |
13 | preprocess_text: Some(preprocess_text), |
14 | postprocess_glyphs: None, |
15 | normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, |
16 | decompose: None, |
17 | compose: None, |
18 | setup_masks: None, |
19 | gpos_tag: None, |
20 | reorder_marks: None, |
21 | zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, |
22 | fallback_position: false, |
23 | }; |
24 | |
25 | #[derive (Clone, Copy, PartialEq)] |
26 | enum Consonant { |
27 | NC = 0, |
28 | AC, |
29 | RC, |
30 | DC, |
31 | NotConsonant, |
32 | } |
33 | |
34 | fn get_consonant_type(u: u32) -> Consonant { |
35 | match u { |
36 | 0x0E1B | 0x0E1D | 0x0E1F => Consonant::AC, |
37 | 0x0E0D | 0x0E10 => Consonant::RC, |
38 | 0x0E0E | 0x0E0F => Consonant::DC, |
39 | 0x0E01..=0x0E2E => Consonant::NC, |
40 | _ => Consonant::NotConsonant, |
41 | } |
42 | } |
43 | |
44 | #[derive (Clone, Copy, PartialEq)] |
45 | enum Mark { |
46 | AV, |
47 | BV, |
48 | T, |
49 | NotMark, |
50 | } |
51 | |
52 | fn get_mark_type(u: u32) -> Mark { |
53 | match u { |
54 | 0x0E31 | 0x0E34..=0x0E37 | 0x0E47 | 0x0E4D..=0x0E4E => Mark::AV, |
55 | 0x0E38..=0x0E3A => Mark::BV, |
56 | 0x0E48..=0x0E4C => Mark::T, |
57 | _ => Mark::NotMark, |
58 | } |
59 | } |
60 | |
61 | #[derive (Clone, Copy, PartialEq)] |
62 | enum Action { |
63 | NOP, |
64 | /// Shift combining-mark down. |
65 | SD, |
66 | /// Shift combining-mark left. |
67 | SL, |
68 | /// Shift combining-mark down-left. |
69 | SDL, |
70 | /// Remove descender from base. |
71 | RD, |
72 | } |
73 | |
74 | #[derive (Clone, Copy)] |
75 | struct PuaMapping { |
76 | u: u16, |
77 | win_pua: u16, |
78 | mac_pua: u16, |
79 | } |
80 | |
81 | impl PuaMapping { |
82 | const fn new(u: u16, win_pua: u16, mac_pua: u16) -> Self { |
83 | PuaMapping { |
84 | u, |
85 | win_pua, |
86 | mac_pua, |
87 | } |
88 | } |
89 | } |
90 | |
91 | const SD_MAPPINGS: &[PuaMapping] = &[ |
92 | PuaMapping::new(u:0x0E48, win_pua:0xF70A, mac_pua:0xF88B), // MAI EK |
93 | PuaMapping::new(u:0x0E49, win_pua:0xF70B, mac_pua:0xF88E), // MAI THO |
94 | PuaMapping::new(u:0x0E4A, win_pua:0xF70C, mac_pua:0xF891), // MAI TRI |
95 | PuaMapping::new(u:0x0E4B, win_pua:0xF70D, mac_pua:0xF894), // MAI CHATTAWA |
96 | PuaMapping::new(u:0x0E4C, win_pua:0xF70E, mac_pua:0xF897), // THANTHAKHAT |
97 | PuaMapping::new(u:0x0E38, win_pua:0xF718, mac_pua:0xF89B), // SARA U |
98 | PuaMapping::new(u:0x0E39, win_pua:0xF719, mac_pua:0xF89C), // SARA UU |
99 | PuaMapping::new(u:0x0E3A, win_pua:0xF71A, mac_pua:0xF89D), // PHINTHU |
100 | PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000), |
101 | ]; |
102 | |
103 | const SDL_MAPPINGS: &[PuaMapping] = &[ |
104 | PuaMapping::new(u:0x0E48, win_pua:0xF705, mac_pua:0xF88C), // MAI EK |
105 | PuaMapping::new(u:0x0E49, win_pua:0xF706, mac_pua:0xF88F), // MAI THO |
106 | PuaMapping::new(u:0x0E4A, win_pua:0xF707, mac_pua:0xF892), // MAI TRI |
107 | PuaMapping::new(u:0x0E4B, win_pua:0xF708, mac_pua:0xF895), // MAI CHATTAWA |
108 | PuaMapping::new(u:0x0E4C, win_pua:0xF709, mac_pua:0xF898), // THANTHAKHAT |
109 | PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000), |
110 | ]; |
111 | |
112 | const SL_MAPPINGS: &[PuaMapping] = &[ |
113 | PuaMapping::new(u:0x0E48, win_pua:0xF713, mac_pua:0xF88A), // MAI EK |
114 | PuaMapping::new(u:0x0E49, win_pua:0xF714, mac_pua:0xF88D), // MAI THO |
115 | PuaMapping::new(u:0x0E4A, win_pua:0xF715, mac_pua:0xF890), // MAI TRI |
116 | PuaMapping::new(u:0x0E4B, win_pua:0xF716, mac_pua:0xF893), // MAI CHATTAWA |
117 | PuaMapping::new(u:0x0E4C, win_pua:0xF717, mac_pua:0xF896), // THANTHAKHAT |
118 | PuaMapping::new(u:0x0E31, win_pua:0xF710, mac_pua:0xF884), // MAI HAN-AKAT |
119 | PuaMapping::new(u:0x0E34, win_pua:0xF701, mac_pua:0xF885), // SARA I |
120 | PuaMapping::new(u:0x0E35, win_pua:0xF702, mac_pua:0xF886), // SARA II |
121 | PuaMapping::new(u:0x0E36, win_pua:0xF703, mac_pua:0xF887), // SARA UE |
122 | PuaMapping::new(u:0x0E37, win_pua:0xF704, mac_pua:0xF888), // SARA UEE |
123 | PuaMapping::new(u:0x0E47, win_pua:0xF712, mac_pua:0xF889), // MAITAIKHU |
124 | PuaMapping::new(u:0x0E4D, win_pua:0xF711, mac_pua:0xF899), // NIKHAHIT |
125 | PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000), |
126 | ]; |
127 | |
128 | const RD_MAPPINGS: &[PuaMapping] = &[ |
129 | PuaMapping::new(u:0x0E0D, win_pua:0xF70F, mac_pua:0xF89A), // YO YING |
130 | PuaMapping::new(u:0x0E10, win_pua:0xF700, mac_pua:0xF89E), // THO THAN |
131 | PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000), |
132 | ]; |
133 | |
134 | fn pua_shape(u: u32, action: Action, face: &hb_font_t) -> u32 { |
135 | let mappings = match action { |
136 | Action::NOP => return u, |
137 | Action::SD => SD_MAPPINGS, |
138 | Action::SL => SL_MAPPINGS, |
139 | Action::SDL => SDL_MAPPINGS, |
140 | Action::RD => RD_MAPPINGS, |
141 | }; |
142 | |
143 | for m in mappings { |
144 | if m.u as u32 == u { |
145 | if face.get_nominal_glyph(m.win_pua as u32).is_some() { |
146 | return m.win_pua as u32; |
147 | } |
148 | |
149 | if face.get_nominal_glyph(m.mac_pua as u32).is_some() { |
150 | return m.mac_pua as u32; |
151 | } |
152 | |
153 | break; |
154 | } |
155 | } |
156 | |
157 | u |
158 | } |
159 | |
160 | #[derive (Clone, Copy)] |
161 | enum AboveState { |
162 | // Cluster above looks like: |
163 | T0, // ⣤ |
164 | T1, // ⣼ |
165 | T2, // ⣾ |
166 | T3, // ⣿ |
167 | } |
168 | |
169 | const ABOVE_START_STATE: &[AboveState] = &[ |
170 | AboveState::T0, // NC |
171 | AboveState::T1, // AC |
172 | AboveState::T0, // RC |
173 | AboveState::T0, // DC |
174 | AboveState::T3, // NotConsonant |
175 | ]; |
176 | |
177 | #[derive (Clone, Copy)] |
178 | struct AboveStateMachineEdge { |
179 | action: Action, |
180 | next_state: AboveState, |
181 | } |
182 | |
183 | impl AboveStateMachineEdge { |
184 | const fn new(action: Action, next_state: AboveState) -> Self { |
185 | AboveStateMachineEdge { action, next_state } |
186 | } |
187 | } |
188 | |
189 | type ASME = AboveStateMachineEdge; |
190 | |
191 | const ABOVE_STATE_MACHINE: &[[ASME; 3]] = &[ |
192 | // AV BV T |
193 | /* T0 */ |
194 | [ |
195 | ASME::new(Action::NOP, next_state:AboveState::T3), |
196 | ASME::new(Action::NOP, next_state:AboveState::T0), |
197 | ASME::new(Action::SD, next_state:AboveState::T3), |
198 | ], |
199 | /* T1 */ |
200 | [ |
201 | ASME::new(Action::SL, next_state:AboveState::T2), |
202 | ASME::new(Action::NOP, next_state:AboveState::T1), |
203 | ASME::new(Action::SDL, next_state:AboveState::T2), |
204 | ], |
205 | /* T2 */ |
206 | [ |
207 | ASME::new(Action::NOP, next_state:AboveState::T3), |
208 | ASME::new(Action::NOP, next_state:AboveState::T2), |
209 | ASME::new(Action::SL, next_state:AboveState::T3), |
210 | ], |
211 | /* T3 */ |
212 | [ |
213 | ASME::new(Action::NOP, next_state:AboveState::T3), |
214 | ASME::new(Action::NOP, next_state:AboveState::T3), |
215 | ASME::new(Action::NOP, next_state:AboveState::T3), |
216 | ], |
217 | ]; |
218 | |
219 | #[derive (Clone, Copy)] |
220 | enum BelowState { |
221 | /// No descender. |
222 | B0, |
223 | /// Removable descender. |
224 | B1, |
225 | /// Strict descender. |
226 | B2, |
227 | } |
228 | |
229 | const BELOW_START_STATE: &[BelowState] = &[ |
230 | BelowState::B0, // NC |
231 | BelowState::B0, // AC |
232 | BelowState::B1, // RC |
233 | BelowState::B2, // DC |
234 | BelowState::B2, // NotConsonant |
235 | ]; |
236 | |
237 | #[derive (Clone, Copy)] |
238 | struct BelowStateMachineEdge { |
239 | action: Action, |
240 | next_state: BelowState, |
241 | } |
242 | |
243 | impl BelowStateMachineEdge { |
244 | const fn new(action: Action, next_state: BelowState) -> Self { |
245 | BelowStateMachineEdge { action, next_state } |
246 | } |
247 | } |
248 | |
249 | type BSME = BelowStateMachineEdge; |
250 | |
251 | const BELOW_STATE_MACHINE: &[[BSME; 3]] = &[ |
252 | // AV BV T |
253 | /* B0 */ |
254 | [ |
255 | BSME::new(Action::NOP, next_state:BelowState::B0), |
256 | BSME::new(Action::NOP, next_state:BelowState::B2), |
257 | BSME::new(Action::NOP, next_state:BelowState::B0), |
258 | ], |
259 | /* B1 */ |
260 | [ |
261 | BSME::new(Action::NOP, next_state:BelowState::B1), |
262 | BSME::new(Action::RD, next_state:BelowState::B2), |
263 | BSME::new(Action::NOP, next_state:BelowState::B1), |
264 | ], |
265 | /* B2 */ |
266 | [ |
267 | BSME::new(Action::NOP, next_state:BelowState::B2), |
268 | BSME::new(Action::SD, next_state:BelowState::B2), |
269 | BSME::new(Action::NOP, next_state:BelowState::B2), |
270 | ], |
271 | ]; |
272 | |
273 | fn do_pua_shaping(face: &hb_font_t, buffer: &mut hb_buffer_t) { |
274 | let mut above_state = ABOVE_START_STATE[Consonant::NotConsonant as usize]; |
275 | let mut below_state = BELOW_START_STATE[Consonant::NotConsonant as usize]; |
276 | let mut base = 0; |
277 | |
278 | for i in 0..buffer.len { |
279 | let mt = get_mark_type(buffer.info[i].glyph_id); |
280 | |
281 | if mt == Mark::NotMark { |
282 | let ct = get_consonant_type(buffer.info[i].glyph_id); |
283 | above_state = ABOVE_START_STATE[ct as usize]; |
284 | below_state = BELOW_START_STATE[ct as usize]; |
285 | base = i; |
286 | continue; |
287 | } |
288 | |
289 | let above_edge = ABOVE_STATE_MACHINE[above_state as usize][mt as usize]; |
290 | let below_edge = BELOW_STATE_MACHINE[below_state as usize][mt as usize]; |
291 | above_state = above_edge.next_state; |
292 | below_state = below_edge.next_state; |
293 | |
294 | // At least one of the above/below actions is NOP. |
295 | let action = if above_edge.action != Action::NOP { |
296 | above_edge.action |
297 | } else { |
298 | below_edge.action |
299 | }; |
300 | |
301 | buffer.unsafe_to_break(Some(base), Some(i)); |
302 | if action == Action::RD { |
303 | buffer.info[base].glyph_id = pua_shape(buffer.info[base].glyph_id, action, face); |
304 | } else { |
305 | buffer.info[i].glyph_id = pua_shape(buffer.info[i].glyph_id, action, face); |
306 | } |
307 | } |
308 | } |
309 | |
310 | // TODO: more tests |
311 | fn preprocess_text(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) { |
312 | // This function implements the shaping logic documented here: |
313 | // |
314 | // https://linux.thai.net/~thep/th-otf/shaping.html |
315 | // |
316 | // The first shaping rule listed there is needed even if the font has Thai |
317 | // OpenType tables. The rest do fallback positioning based on PUA codepoints. |
318 | // We implement that only if there exist no Thai GSUB in the font. |
319 | |
320 | // The following is NOT specified in the MS OT Thai spec, however, it seems |
321 | // to be what Uniscribe and other engines implement. According to Eric Muller: |
322 | // |
323 | // When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the |
324 | // NIKHAHIT backwards over any above-base marks (0E31, 0E34-0E37, 0E47-0E4E). |
325 | // |
326 | // <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32> |
327 | // |
328 | // This reordering is legit only when the NIKHAHIT comes from a SARA AM, not |
329 | // when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably |
330 | // not what a user wanted, but the rendering is nevertheless nikhahit above |
331 | // chattawa. |
332 | // |
333 | // Same for Lao. |
334 | // |
335 | // Note: |
336 | // |
337 | // Uniscribe also does some below-marks reordering. Namely, it positions U+0E3A |
338 | // after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A. |
339 | // See unicode->modified_combining_class (). Lao does NOT have a U+0E3A |
340 | // equivalent. |
341 | |
342 | // Here are the characters of significance: |
343 | // |
344 | // Thai Lao |
345 | // SARA AM: U+0E33 U+0EB3 |
346 | // SARA AA: U+0E32 U+0EB2 |
347 | // Nikhahit: U+0E4D U+0ECD |
348 | // |
349 | // Testing shows that Uniscribe reorder the following marks: |
350 | // Thai: <0E31,0E34..0E37,0E47..0E4E> |
351 | // Lao: <0EB1,0EB4..0EB7,0EBB,0EC8..0ECD> |
352 | // |
353 | // Note how the Lao versions are the same as Thai + 0x80. |
354 | |
355 | // We only get one script at a time, so a script-agnostic implementation |
356 | // is adequate here. |
357 | #[inline ] |
358 | fn is_sara_am(u: u32) -> bool { |
359 | (u & !0x0080) == 0x0E33 |
360 | } |
361 | #[inline ] |
362 | fn nikhahit_from_sara_am(u: u32) -> u32 { |
363 | u - 0x0E33 + 0x0E4D |
364 | } |
365 | #[inline ] |
366 | fn sara_aa_from_sara_am(u: u32) -> u32 { |
367 | u - 1 |
368 | } |
369 | #[inline ] |
370 | fn is_above_base_mark(u: u32) -> bool { |
371 | let u = u & !0x0080; |
372 | matches!(u, 0x0E34..=0x0E37 | 0x0E47..=0x0E4E | 0x0E31..=0x0E31 | 0x0E3B..=0x0E3B) |
373 | } |
374 | |
375 | buffer.clear_output(); |
376 | buffer.idx = 0; |
377 | while buffer.idx < buffer.len { |
378 | let u = buffer.cur(0).glyph_id; |
379 | if !is_sara_am(u) { |
380 | buffer.next_glyph(); |
381 | continue; |
382 | } |
383 | |
384 | // Is SARA AM. Decompose and reorder. |
385 | buffer.output_glyph(nikhahit_from_sara_am(u)); |
386 | { |
387 | let out_idx = buffer.out_len - 1; |
388 | _hb_glyph_info_set_continuation(&mut buffer.out_info_mut()[out_idx]); |
389 | } |
390 | buffer.replace_glyph(sara_aa_from_sara_am(u)); |
391 | |
392 | // Make Nikhahit be recognized as a ccc=0 mark when zeroing widths. |
393 | let end = buffer.out_len; |
394 | _hb_glyph_info_set_general_category( |
395 | &mut buffer.out_info_mut()[end - 2], |
396 | hb_unicode_general_category_t::NonspacingMark, |
397 | ); |
398 | |
399 | // Ok, let's see... |
400 | let mut start = end - 2; |
401 | while start > 0 && is_above_base_mark(buffer.out_info()[start - 1].glyph_id) { |
402 | start -= 1; |
403 | } |
404 | |
405 | if start + 2 < end { |
406 | // Move Nikhahit (end-2) to the beginning |
407 | buffer.merge_out_clusters(start, end); |
408 | let t = buffer.out_info()[end - 2]; |
409 | for i in 0..(end - start - 2) { |
410 | buffer.out_info_mut()[i + start + 1] = buffer.out_info()[i + start]; |
411 | } |
412 | buffer.out_info_mut()[start] = t; |
413 | } else { |
414 | // Since we decomposed, and NIKHAHIT is combining, merge clusters with the |
415 | // previous cluster. |
416 | if start != 0 && buffer.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES { |
417 | buffer.merge_out_clusters(start - 1, end); |
418 | } |
419 | } |
420 | } |
421 | |
422 | buffer.sync(); |
423 | |
424 | // If font has Thai GSUB, we are done. |
425 | if plan.script == Some(script::THAI) && !plan.ot_map.found_script(TableIndex::GSUB) { |
426 | do_pua_shaping(face, buffer); |
427 | } |
428 | } |
429 | |