1use super::buffer::*;
2use super::ot_layout::*;
3use super::ot_shape_normalize::HB_OT_SHAPE_NORMALIZATION_MODE_AUTO;
4use super::ot_shape_plan::hb_ot_shape_plan_t;
5use super::ot_shaper::*;
6use super::unicode::hb_unicode_general_category_t;
7use super::{hb_font_t, script};
8
9pub const THAI_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t {
10 collect_features: None,
11 override_features: None,
12 create_data: None,
13 preprocess_text: Some(preprocess_text),
14 postprocess_glyphs: None,
15 normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_AUTO,
16 decompose: None,
17 compose: None,
18 setup_masks: None,
19 gpos_tag: None,
20 reorder_marks: None,
21 zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
22 fallback_position: false,
23};
24
25#[derive(Clone, Copy, PartialEq)]
26enum Consonant {
27 NC = 0,
28 AC,
29 RC,
30 DC,
31 NotConsonant,
32}
33
34fn get_consonant_type(u: u32) -> Consonant {
35 match u {
36 0x0E1B | 0x0E1D | 0x0E1F => Consonant::AC,
37 0x0E0D | 0x0E10 => Consonant::RC,
38 0x0E0E | 0x0E0F => Consonant::DC,
39 0x0E01..=0x0E2E => Consonant::NC,
40 _ => Consonant::NotConsonant,
41 }
42}
43
44#[derive(Clone, Copy, PartialEq)]
45enum Mark {
46 AV,
47 BV,
48 T,
49 NotMark,
50}
51
52fn get_mark_type(u: u32) -> Mark {
53 match u {
54 0x0E31 | 0x0E34..=0x0E37 | 0x0E47 | 0x0E4D..=0x0E4E => Mark::AV,
55 0x0E38..=0x0E3A => Mark::BV,
56 0x0E48..=0x0E4C => Mark::T,
57 _ => Mark::NotMark,
58 }
59}
60
61#[derive(Clone, Copy, PartialEq)]
62enum Action {
63 NOP,
64 /// Shift combining-mark down.
65 SD,
66 /// Shift combining-mark left.
67 SL,
68 /// Shift combining-mark down-left.
69 SDL,
70 /// Remove descender from base.
71 RD,
72}
73
74#[derive(Clone, Copy)]
75struct PuaMapping {
76 u: u16,
77 win_pua: u16,
78 mac_pua: u16,
79}
80
81impl PuaMapping {
82 const fn new(u: u16, win_pua: u16, mac_pua: u16) -> Self {
83 PuaMapping {
84 u,
85 win_pua,
86 mac_pua,
87 }
88 }
89}
90
91const SD_MAPPINGS: &[PuaMapping] = &[
92 PuaMapping::new(u:0x0E48, win_pua:0xF70A, mac_pua:0xF88B), // MAI EK
93 PuaMapping::new(u:0x0E49, win_pua:0xF70B, mac_pua:0xF88E), // MAI THO
94 PuaMapping::new(u:0x0E4A, win_pua:0xF70C, mac_pua:0xF891), // MAI TRI
95 PuaMapping::new(u:0x0E4B, win_pua:0xF70D, mac_pua:0xF894), // MAI CHATTAWA
96 PuaMapping::new(u:0x0E4C, win_pua:0xF70E, mac_pua:0xF897), // THANTHAKHAT
97 PuaMapping::new(u:0x0E38, win_pua:0xF718, mac_pua:0xF89B), // SARA U
98 PuaMapping::new(u:0x0E39, win_pua:0xF719, mac_pua:0xF89C), // SARA UU
99 PuaMapping::new(u:0x0E3A, win_pua:0xF71A, mac_pua:0xF89D), // PHINTHU
100 PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000),
101];
102
103const SDL_MAPPINGS: &[PuaMapping] = &[
104 PuaMapping::new(u:0x0E48, win_pua:0xF705, mac_pua:0xF88C), // MAI EK
105 PuaMapping::new(u:0x0E49, win_pua:0xF706, mac_pua:0xF88F), // MAI THO
106 PuaMapping::new(u:0x0E4A, win_pua:0xF707, mac_pua:0xF892), // MAI TRI
107 PuaMapping::new(u:0x0E4B, win_pua:0xF708, mac_pua:0xF895), // MAI CHATTAWA
108 PuaMapping::new(u:0x0E4C, win_pua:0xF709, mac_pua:0xF898), // THANTHAKHAT
109 PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000),
110];
111
112const SL_MAPPINGS: &[PuaMapping] = &[
113 PuaMapping::new(u:0x0E48, win_pua:0xF713, mac_pua:0xF88A), // MAI EK
114 PuaMapping::new(u:0x0E49, win_pua:0xF714, mac_pua:0xF88D), // MAI THO
115 PuaMapping::new(u:0x0E4A, win_pua:0xF715, mac_pua:0xF890), // MAI TRI
116 PuaMapping::new(u:0x0E4B, win_pua:0xF716, mac_pua:0xF893), // MAI CHATTAWA
117 PuaMapping::new(u:0x0E4C, win_pua:0xF717, mac_pua:0xF896), // THANTHAKHAT
118 PuaMapping::new(u:0x0E31, win_pua:0xF710, mac_pua:0xF884), // MAI HAN-AKAT
119 PuaMapping::new(u:0x0E34, win_pua:0xF701, mac_pua:0xF885), // SARA I
120 PuaMapping::new(u:0x0E35, win_pua:0xF702, mac_pua:0xF886), // SARA II
121 PuaMapping::new(u:0x0E36, win_pua:0xF703, mac_pua:0xF887), // SARA UE
122 PuaMapping::new(u:0x0E37, win_pua:0xF704, mac_pua:0xF888), // SARA UEE
123 PuaMapping::new(u:0x0E47, win_pua:0xF712, mac_pua:0xF889), // MAITAIKHU
124 PuaMapping::new(u:0x0E4D, win_pua:0xF711, mac_pua:0xF899), // NIKHAHIT
125 PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000),
126];
127
128const RD_MAPPINGS: &[PuaMapping] = &[
129 PuaMapping::new(u:0x0E0D, win_pua:0xF70F, mac_pua:0xF89A), // YO YING
130 PuaMapping::new(u:0x0E10, win_pua:0xF700, mac_pua:0xF89E), // THO THAN
131 PuaMapping::new(u:0x0000, win_pua:0x0000, mac_pua:0x0000),
132];
133
134fn pua_shape(u: u32, action: Action, face: &hb_font_t) -> u32 {
135 let mappings = match action {
136 Action::NOP => return u,
137 Action::SD => SD_MAPPINGS,
138 Action::SL => SL_MAPPINGS,
139 Action::SDL => SDL_MAPPINGS,
140 Action::RD => RD_MAPPINGS,
141 };
142
143 for m in mappings {
144 if m.u as u32 == u {
145 if face.get_nominal_glyph(m.win_pua as u32).is_some() {
146 return m.win_pua as u32;
147 }
148
149 if face.get_nominal_glyph(m.mac_pua as u32).is_some() {
150 return m.mac_pua as u32;
151 }
152
153 break;
154 }
155 }
156
157 u
158}
159
160#[derive(Clone, Copy)]
161enum AboveState {
162 // Cluster above looks like:
163 T0, // ⣤
164 T1, // ⣼
165 T2, // ⣾
166 T3, // ⣿
167}
168
169const ABOVE_START_STATE: &[AboveState] = &[
170 AboveState::T0, // NC
171 AboveState::T1, // AC
172 AboveState::T0, // RC
173 AboveState::T0, // DC
174 AboveState::T3, // NotConsonant
175];
176
177#[derive(Clone, Copy)]
178struct AboveStateMachineEdge {
179 action: Action,
180 next_state: AboveState,
181}
182
183impl AboveStateMachineEdge {
184 const fn new(action: Action, next_state: AboveState) -> Self {
185 AboveStateMachineEdge { action, next_state }
186 }
187}
188
189type ASME = AboveStateMachineEdge;
190
191const ABOVE_STATE_MACHINE: &[[ASME; 3]] = &[
192 // AV BV T
193 /* T0 */
194 [
195 ASME::new(Action::NOP, next_state:AboveState::T3),
196 ASME::new(Action::NOP, next_state:AboveState::T0),
197 ASME::new(Action::SD, next_state:AboveState::T3),
198 ],
199 /* T1 */
200 [
201 ASME::new(Action::SL, next_state:AboveState::T2),
202 ASME::new(Action::NOP, next_state:AboveState::T1),
203 ASME::new(Action::SDL, next_state:AboveState::T2),
204 ],
205 /* T2 */
206 [
207 ASME::new(Action::NOP, next_state:AboveState::T3),
208 ASME::new(Action::NOP, next_state:AboveState::T2),
209 ASME::new(Action::SL, next_state:AboveState::T3),
210 ],
211 /* T3 */
212 [
213 ASME::new(Action::NOP, next_state:AboveState::T3),
214 ASME::new(Action::NOP, next_state:AboveState::T3),
215 ASME::new(Action::NOP, next_state:AboveState::T3),
216 ],
217];
218
219#[derive(Clone, Copy)]
220enum BelowState {
221 /// No descender.
222 B0,
223 /// Removable descender.
224 B1,
225 /// Strict descender.
226 B2,
227}
228
229const BELOW_START_STATE: &[BelowState] = &[
230 BelowState::B0, // NC
231 BelowState::B0, // AC
232 BelowState::B1, // RC
233 BelowState::B2, // DC
234 BelowState::B2, // NotConsonant
235];
236
237#[derive(Clone, Copy)]
238struct BelowStateMachineEdge {
239 action: Action,
240 next_state: BelowState,
241}
242
243impl BelowStateMachineEdge {
244 const fn new(action: Action, next_state: BelowState) -> Self {
245 BelowStateMachineEdge { action, next_state }
246 }
247}
248
249type BSME = BelowStateMachineEdge;
250
251const BELOW_STATE_MACHINE: &[[BSME; 3]] = &[
252 // AV BV T
253 /* B0 */
254 [
255 BSME::new(Action::NOP, next_state:BelowState::B0),
256 BSME::new(Action::NOP, next_state:BelowState::B2),
257 BSME::new(Action::NOP, next_state:BelowState::B0),
258 ],
259 /* B1 */
260 [
261 BSME::new(Action::NOP, next_state:BelowState::B1),
262 BSME::new(Action::RD, next_state:BelowState::B2),
263 BSME::new(Action::NOP, next_state:BelowState::B1),
264 ],
265 /* B2 */
266 [
267 BSME::new(Action::NOP, next_state:BelowState::B2),
268 BSME::new(Action::SD, next_state:BelowState::B2),
269 BSME::new(Action::NOP, next_state:BelowState::B2),
270 ],
271];
272
273fn do_pua_shaping(face: &hb_font_t, buffer: &mut hb_buffer_t) {
274 let mut above_state = ABOVE_START_STATE[Consonant::NotConsonant as usize];
275 let mut below_state = BELOW_START_STATE[Consonant::NotConsonant as usize];
276 let mut base = 0;
277
278 for i in 0..buffer.len {
279 let mt = get_mark_type(buffer.info[i].glyph_id);
280
281 if mt == Mark::NotMark {
282 let ct = get_consonant_type(buffer.info[i].glyph_id);
283 above_state = ABOVE_START_STATE[ct as usize];
284 below_state = BELOW_START_STATE[ct as usize];
285 base = i;
286 continue;
287 }
288
289 let above_edge = ABOVE_STATE_MACHINE[above_state as usize][mt as usize];
290 let below_edge = BELOW_STATE_MACHINE[below_state as usize][mt as usize];
291 above_state = above_edge.next_state;
292 below_state = below_edge.next_state;
293
294 // At least one of the above/below actions is NOP.
295 let action = if above_edge.action != Action::NOP {
296 above_edge.action
297 } else {
298 below_edge.action
299 };
300
301 buffer.unsafe_to_break(Some(base), Some(i));
302 if action == Action::RD {
303 buffer.info[base].glyph_id = pua_shape(buffer.info[base].glyph_id, action, face);
304 } else {
305 buffer.info[i].glyph_id = pua_shape(buffer.info[i].glyph_id, action, face);
306 }
307 }
308}
309
310// TODO: more tests
311fn preprocess_text(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) {
312 // This function implements the shaping logic documented here:
313 //
314 // https://linux.thai.net/~thep/th-otf/shaping.html
315 //
316 // The first shaping rule listed there is needed even if the font has Thai
317 // OpenType tables. The rest do fallback positioning based on PUA codepoints.
318 // We implement that only if there exist no Thai GSUB in the font.
319
320 // The following is NOT specified in the MS OT Thai spec, however, it seems
321 // to be what Uniscribe and other engines implement. According to Eric Muller:
322 //
323 // When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
324 // NIKHAHIT backwards over any above-base marks (0E31, 0E34-0E37, 0E47-0E4E).
325 //
326 // <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
327 //
328 // This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
329 // when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
330 // not what a user wanted, but the rendering is nevertheless nikhahit above
331 // chattawa.
332 //
333 // Same for Lao.
334 //
335 // Note:
336 //
337 // Uniscribe also does some below-marks reordering. Namely, it positions U+0E3A
338 // after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A.
339 // See unicode->modified_combining_class (). Lao does NOT have a U+0E3A
340 // equivalent.
341
342 // Here are the characters of significance:
343 //
344 // Thai Lao
345 // SARA AM: U+0E33 U+0EB3
346 // SARA AA: U+0E32 U+0EB2
347 // Nikhahit: U+0E4D U+0ECD
348 //
349 // Testing shows that Uniscribe reorder the following marks:
350 // Thai: <0E31,0E34..0E37,0E47..0E4E>
351 // Lao: <0EB1,0EB4..0EB7,0EBB,0EC8..0ECD>
352 //
353 // Note how the Lao versions are the same as Thai + 0x80.
354
355 // We only get one script at a time, so a script-agnostic implementation
356 // is adequate here.
357 #[inline]
358 fn is_sara_am(u: u32) -> bool {
359 (u & !0x0080) == 0x0E33
360 }
361 #[inline]
362 fn nikhahit_from_sara_am(u: u32) -> u32 {
363 u - 0x0E33 + 0x0E4D
364 }
365 #[inline]
366 fn sara_aa_from_sara_am(u: u32) -> u32 {
367 u - 1
368 }
369 #[inline]
370 fn is_above_base_mark(u: u32) -> bool {
371 let u = u & !0x0080;
372 matches!(u, 0x0E34..=0x0E37 | 0x0E47..=0x0E4E | 0x0E31..=0x0E31 | 0x0E3B..=0x0E3B)
373 }
374
375 buffer.clear_output();
376 buffer.idx = 0;
377 while buffer.idx < buffer.len {
378 let u = buffer.cur(0).glyph_id;
379 if !is_sara_am(u) {
380 buffer.next_glyph();
381 continue;
382 }
383
384 // Is SARA AM. Decompose and reorder.
385 buffer.output_glyph(nikhahit_from_sara_am(u));
386 {
387 let out_idx = buffer.out_len - 1;
388 _hb_glyph_info_set_continuation(&mut buffer.out_info_mut()[out_idx]);
389 }
390 buffer.replace_glyph(sara_aa_from_sara_am(u));
391
392 // Make Nikhahit be recognized as a ccc=0 mark when zeroing widths.
393 let end = buffer.out_len;
394 _hb_glyph_info_set_general_category(
395 &mut buffer.out_info_mut()[end - 2],
396 hb_unicode_general_category_t::NonspacingMark,
397 );
398
399 // Ok, let's see...
400 let mut start = end - 2;
401 while start > 0 && is_above_base_mark(buffer.out_info()[start - 1].glyph_id) {
402 start -= 1;
403 }
404
405 if start + 2 < end {
406 // Move Nikhahit (end-2) to the beginning
407 buffer.merge_out_clusters(start, end);
408 let t = buffer.out_info()[end - 2];
409 for i in 0..(end - start - 2) {
410 buffer.out_info_mut()[i + start + 1] = buffer.out_info()[i + start];
411 }
412 buffer.out_info_mut()[start] = t;
413 } else {
414 // Since we decomposed, and NIKHAHIT is combining, merge clusters with the
415 // previous cluster.
416 if start != 0 && buffer.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES {
417 buffer.merge_out_clusters(start - 1, end);
418 }
419 }
420 }
421
422 buffer.sync();
423
424 // If font has Thai GSUB, we are done.
425 if plan.script == Some(script::THAI) && !plan.ot_map.found_script(TableIndex::GSUB) {
426 do_pua_shaping(face, buffer);
427 }
428}
429