1use super::*;
2use crate::buffer::{Buffer, BufferClusterLevel};
3use crate::ot::TableIndex;
4use crate::plan::ShapePlan;
5use crate::unicode::GeneralCategory;
6use crate::{script, Face};
7
8pub const THAI_SHAPER: ComplexShaper = ComplexShaper {
9 collect_features: None,
10 override_features: None,
11 create_data: None,
12 preprocess_text: Some(preprocess_text),
13 postprocess_glyphs: None,
14 normalization_mode: Some(ShapeNormalizationMode::Auto),
15 decompose: None,
16 compose: None,
17 setup_masks: None,
18 gpos_tag: None,
19 reorder_marks: None,
20 zero_width_marks: Some(ZeroWidthMarksMode::ByGdefLate),
21 fallback_position: false,
22};
23
24#[derive(Clone, Copy, PartialEq)]
25enum Consonant {
26 NC = 0,
27 AC,
28 RC,
29 DC,
30 NotConsonant,
31}
32
33fn get_consonant_type(u: u32) -> Consonant {
34 match u {
35 0x0E1B | 0x0E1D | 0x0E1F => Consonant::AC,
36 0x0E0D | 0x0E10 => Consonant::RC,
37 0x0E0E | 0x0E0F => Consonant::DC,
38 0x0E01..=0x0E2E => Consonant::NC,
39 _ => Consonant::NotConsonant,
40 }
41}
42
43#[derive(Clone, Copy, PartialEq)]
44enum Mark {
45 AV,
46 BV,
47 T,
48 NotMark,
49}
50
51fn get_mark_type(u: u32) -> Mark {
52 match u {
53 0x0E31 | 0x0E34..=0x0E37 | 0x0E47 | 0x0E4D..=0x0E4E => Mark::AV,
54 0x0E38..=0x0E3A => Mark::BV,
55 0x0E48..=0x0E4C => Mark::T,
56 _ => Mark::NotMark,
57 }
58}
59
60#[derive(Clone, Copy, PartialEq)]
61enum Action {
62 NOP,
63 /// Shift combining-mark down.
64 SD,
65 /// Shift combining-mark left.
66 SL,
67 /// Shift combining-mark down-left.
68 SDL,
69 /// Remove descender from base.
70 RD,
71}
72
73#[derive(Clone, Copy)]
74struct PuaMapping {
75 u: u32,
76 win_pua: u32,
77 mac_pua: u32,
78}
79
80impl PuaMapping {
81 const fn new(u: u32, win_pua: u32, mac_pua: u32) -> Self {
82 PuaMapping {
83 u,
84 win_pua,
85 mac_pua,
86 }
87 }
88}
89
90const SD_MAPPINGS: &[PuaMapping] = &[
91 PuaMapping::new(u:0x0E48, win_pua:0xF70A, mac_pua:0xF88B), // MAI EK
92 PuaMapping::new(u:0x0E49, win_pua:0xF70B, mac_pua:0xF88E), // MAI THO
93 PuaMapping::new(u:0x0E4A, win_pua:0xF70C, mac_pua:0xF891), // MAI TRI
94 PuaMapping::new(u:0x0E4B, win_pua:0xF70D, mac_pua:0xF894), // MAI CHATTAWA
95 PuaMapping::new(u:0x0E4C, win_pua:0xF70E, mac_pua:0xF897), // THANTHAKHAT
96 PuaMapping::new(u:0x0E38, win_pua:0xF718, mac_pua:0xF89B), // SARA U
97 PuaMapping::new(u:0x0E39, win_pua:0xF719, mac_pua:0xF89C), // SARA UU
98 PuaMapping::new(u:0x0E3A, win_pua:0xF71A, mac_pua:0xF89D), // PHINTHU
99 PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000),
100];
101
102const SDL_MAPPINGS: &[PuaMapping] = &[
103 PuaMapping::new(u:0x0E48, win_pua:0xF705, mac_pua:0xF88C), // MAI EK
104 PuaMapping::new(u:0x0E49, win_pua:0xF706, mac_pua:0xF88F), // MAI THO
105 PuaMapping::new(u:0x0E4A, win_pua:0xF707, mac_pua:0xF892), // MAI TRI
106 PuaMapping::new(u:0x0E4B, win_pua:0xF708, mac_pua:0xF895), // MAI CHATTAWA
107 PuaMapping::new(u:0x0E4C, win_pua:0xF709, mac_pua:0xF898), // THANTHAKHAT
108 PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000),
109];
110
111const SL_MAPPINGS: &[PuaMapping] = &[
112 PuaMapping::new(u:0x0E48, win_pua:0xF713, mac_pua:0xF88A), // MAI EK
113 PuaMapping::new(u:0x0E49, win_pua:0xF714, mac_pua:0xF88D), // MAI THO
114 PuaMapping::new(u:0x0E4A, win_pua:0xF715, mac_pua:0xF890), // MAI TRI
115 PuaMapping::new(u:0x0E4B, win_pua:0xF716, mac_pua:0xF893), // MAI CHATTAWA
116 PuaMapping::new(u:0x0E4C, win_pua:0xF717, mac_pua:0xF896), // THANTHAKHAT
117 PuaMapping::new(u:0x0E31, win_pua:0xF710, mac_pua:0xF884), // MAI HAN-AKAT
118 PuaMapping::new(u:0x0E34, win_pua:0xF701, mac_pua:0xF885), // SARA I
119 PuaMapping::new(u:0x0E35, win_pua:0xF702, mac_pua:0xF886), // SARA II
120 PuaMapping::new(u:0x0E36, win_pua:0xF703, mac_pua:0xF887), // SARA UE
121 PuaMapping::new(u:0x0E37, win_pua:0xF704, mac_pua:0xF888), // SARA UEE
122 PuaMapping::new(u:0x0E47, win_pua:0xF712, mac_pua:0xF889), // MAITAIKHU
123 PuaMapping::new(u:0x0E4D, win_pua:0xF711, mac_pua:0xF899), // NIKHAHIT
124 PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000),
125];
126
127const RD_MAPPINGS: &[PuaMapping] = &[
128 PuaMapping::new(u:0x0E0D, win_pua:0xF70F, mac_pua:0xF89A), // YO YING
129 PuaMapping::new(u:0x0E10, win_pua:0xF700, mac_pua:0xF89E), // THO THAN
130 PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000),
131];
132
133fn pua_shape(u: u32, action: Action, face: &Face) -> u32 {
134 let mappings = match action {
135 Action::NOP => return u,
136 Action::SD => SD_MAPPINGS,
137 Action::SL => SL_MAPPINGS,
138 Action::SDL => SDL_MAPPINGS,
139 Action::RD => RD_MAPPINGS,
140 };
141
142 for m in mappings {
143 if m.u == u {
144 if face.glyph_index(m.win_pua).is_some() {
145 return m.win_pua;
146 }
147
148 if face.glyph_index(m.mac_pua).is_some() {
149 return m.mac_pua;
150 }
151
152 break;
153 }
154 }
155
156 u
157}
158
159#[derive(Clone, Copy)]
160enum AboveState {
161 // Cluster above looks like:
162 T0, // ⣤
163 T1, // ⣼
164 T2, // ⣾
165 T3, // ⣿
166}
167
168const ABOVE_START_STATE: &[AboveState] = &[
169 AboveState::T0, // NC
170 AboveState::T1, // AC
171 AboveState::T0, // RC
172 AboveState::T0, // DC
173 AboveState::T3, // NotConsonant
174];
175
176#[derive(Clone, Copy)]
177struct AboveStateMachineEdge {
178 action: Action,
179 next_state: AboveState,
180}
181
182impl AboveStateMachineEdge {
183 const fn new(action: Action, next_state: AboveState) -> Self {
184 AboveStateMachineEdge { action, next_state }
185 }
186}
187
188type ASME = AboveStateMachineEdge;
189
190const ABOVE_STATE_MACHINE: &[[ASME; 3]] = &[
191 // AV BV T
192 /* T0 */
193 [
194 ASME::new(Action::NOP, next_state:AboveState::T3),
195 ASME::new(Action::NOP, next_state:AboveState::T0),
196 ASME::new(Action::SD, next_state:AboveState::T3),
197 ],
198 /* T1 */
199 [
200 ASME::new(Action::SL, next_state:AboveState::T2),
201 ASME::new(Action::NOP, next_state:AboveState::T1),
202 ASME::new(Action::SDL, next_state:AboveState::T2),
203 ],
204 /* T2 */
205 [
206 ASME::new(Action::NOP, next_state:AboveState::T3),
207 ASME::new(Action::NOP, next_state:AboveState::T2),
208 ASME::new(Action::SL, next_state:AboveState::T3),
209 ],
210 /* T3 */
211 [
212 ASME::new(Action::NOP, next_state:AboveState::T3),
213 ASME::new(Action::NOP, next_state:AboveState::T3),
214 ASME::new(Action::NOP, next_state:AboveState::T3),
215 ],
216];
217
218#[derive(Clone, Copy)]
219enum BelowState {
220 /// No descender.
221 B0,
222 /// Removable descender.
223 B1,
224 /// Strict descender.
225 B2,
226}
227
228const BELOW_START_STATE: &[BelowState] = &[
229 BelowState::B0, // NC
230 BelowState::B0, // AC
231 BelowState::B1, // RC
232 BelowState::B2, // DC
233 BelowState::B2, // NotConsonant
234];
235
236#[derive(Clone, Copy)]
237struct BelowStateMachineEdge {
238 action: Action,
239 next_state: BelowState,
240}
241
242impl BelowStateMachineEdge {
243 const fn new(action: Action, next_state: BelowState) -> Self {
244 BelowStateMachineEdge { action, next_state }
245 }
246}
247
248type BSME = BelowStateMachineEdge;
249
250const BELOW_STATE_MACHINE: &[[BSME; 3]] = &[
251 // AV BV T
252 /* B0 */
253 [
254 BSME::new(Action::NOP, next_state:BelowState::B0),
255 BSME::new(Action::NOP, next_state:BelowState::B2),
256 BSME::new(Action::NOP, next_state:BelowState::B0),
257 ],
258 /* B1 */
259 [
260 BSME::new(Action::NOP, next_state:BelowState::B1),
261 BSME::new(Action::RD, next_state:BelowState::B2),
262 BSME::new(Action::NOP, next_state:BelowState::B1),
263 ],
264 /* B2 */
265 [
266 BSME::new(Action::NOP, next_state:BelowState::B2),
267 BSME::new(Action::SD, next_state:BelowState::B2),
268 BSME::new(Action::NOP, next_state:BelowState::B2),
269 ],
270];
271
272fn do_pua_shaping(face: &Face, buffer: &mut Buffer) {
273 let mut above_state = ABOVE_START_STATE[Consonant::NotConsonant as usize];
274 let mut below_state = BELOW_START_STATE[Consonant::NotConsonant as usize];
275 let mut base = 0;
276
277 for i in 0..buffer.len {
278 let mt = get_mark_type(buffer.info[i].glyph_id);
279
280 if mt == Mark::NotMark {
281 let ct = get_consonant_type(buffer.info[i].glyph_id);
282 above_state = ABOVE_START_STATE[ct as usize];
283 below_state = BELOW_START_STATE[ct as usize];
284 base = i;
285 continue;
286 }
287
288 let above_edge = ABOVE_STATE_MACHINE[above_state as usize][mt as usize];
289 let below_edge = BELOW_STATE_MACHINE[below_state as usize][mt as usize];
290 above_state = above_edge.next_state;
291 below_state = below_edge.next_state;
292
293 // At least one of the above/below actions is NOP.
294 let action = if above_edge.action != Action::NOP {
295 above_edge.action
296 } else {
297 below_edge.action
298 };
299
300 buffer.unsafe_to_break(base, i);
301 if action == Action::RD {
302 buffer.info[base].glyph_id = pua_shape(buffer.info[base].glyph_id, action, face);
303 } else {
304 buffer.info[i].glyph_id = pua_shape(buffer.info[i].glyph_id, action, face);
305 }
306 }
307}
308
309// TODO: more tests
310fn preprocess_text(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
311 // This function implements the shaping logic documented here:
312 //
313 // https://linux.thai.net/~thep/th-otf/shaping.html
314 //
315 // The first shaping rule listed there is needed even if the font has Thai
316 // OpenType tables. The rest do fallback positioning based on PUA codepoints.
317 // We implement that only if there exist no Thai GSUB in the font.
318
319 // The following is NOT specified in the MS OT Thai spec, however, it seems
320 // to be what Uniscribe and other engines implement. According to Eric Muller:
321 //
322 // When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
323 // NIKHAHIT backwards over any tone mark (0E48-0E4B).
324 //
325 // <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
326 //
327 // This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
328 // when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
329 // not what a user wanted, but the rendering is nevertheless nikhahit above
330 // chattawa.
331 //
332 // Same for Lao.
333 //
334 // Note:
335 //
336 // Uniscribe also does some below-marks reordering. Namely, it positions U+0E3A
337 // after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A.
338 // See unicode->modified_combining_class (). Lao does NOT have a U+0E3A
339 // equivalent.
340
341 // Here are the characters of significance:
342 //
343 // Thai Lao
344 // SARA AM: U+0E33 U+0EB3
345 // SARA AA: U+0E32 U+0EB2
346 // Nikhahit: U+0E4D U+0ECD
347 //
348 // Testing shows that Uniscribe reorder the following marks:
349 // Thai: <0E31,0E34..0E37,0E47..0E4E>
350 // Lao: <0EB1,0EB4..0EB7,0EC7..0ECE>
351 //
352 // Note how the Lao versions are the same as Thai + 0x80.
353
354 // We only get one script at a time, so a script-agnostic implementation
355 // is adequate here.
356 #[inline]
357 fn is_sara_am(u: u32) -> bool {
358 (u & !0x0080) == 0x0E33
359 }
360 #[inline]
361 fn nikhahit_from_sara_am(u: u32) -> u32 {
362 u - 0x0E33 + 0x0E4D
363 }
364 #[inline]
365 fn sara_aa_from_sara_am(u: u32) -> u32 {
366 u - 1
367 }
368 #[inline]
369 fn is_tone_mark(u: u32) -> bool {
370 let u = u & !0x0080;
371 matches!(u, 0x0E34..=0x0E37 | 0x0E47..=0x0E4E | 0x0E31..=0x0E31)
372 }
373
374 buffer.clear_output();
375 buffer.idx = 0;
376 while buffer.idx < buffer.len {
377 let u = buffer.cur(0).glyph_id;
378 if !is_sara_am(u) {
379 buffer.next_glyph();
380 continue;
381 }
382
383 // Is SARA AM. Decompose and reorder.
384 buffer.output_glyph(nikhahit_from_sara_am(u));
385 {
386 let out_idx = buffer.out_len - 1;
387 buffer.out_info_mut()[out_idx].set_continuation();
388 }
389 buffer.replace_glyph(sara_aa_from_sara_am(u));
390
391 // Make Nikhahit be recognized as a ccc=0 mark when zeroing widths.
392 let end = buffer.out_len;
393 buffer.out_info_mut()[end - 2].set_general_category(GeneralCategory::NonspacingMark);
394
395 // Ok, let's see...
396 let mut start = end - 2;
397 while start > 0 && is_tone_mark(buffer.out_info()[start - 1].glyph_id) {
398 start -= 1;
399 }
400
401 if start + 2 < end {
402 // Move Nikhahit (end-2) to the beginning
403 buffer.merge_out_clusters(start, end);
404 let t = buffer.out_info()[end - 2];
405 for i in 0..(end - start - 2) {
406 buffer.out_info_mut()[i + start + 1] = buffer.out_info()[i + start];
407 }
408 buffer.out_info_mut()[start] = t;
409 } else {
410 // Since we decomposed, and NIKHAHIT is combining, merge clusters with the
411 // previous cluster.
412 if start != 0 && buffer.cluster_level == BufferClusterLevel::MonotoneGraphemes {
413 buffer.merge_out_clusters(start - 1, end);
414 }
415 }
416 }
417
418 buffer.swap_buffers();
419
420 // If font has Thai GSUB, we are done.
421 if plan.script == Some(script::THAI) && !plan.ot_map.found_script(TableIndex::GSUB) {
422 do_pua_shaping(face, buffer);
423 }
424}
425