1use alloc::boxed::Box;
2
3use super::indic::{category, position};
4use super::*;
5use crate::buffer::Buffer;
6use crate::ot::{feature, FeatureFlags};
7use crate::plan::{ShapePlan, ShapePlanner};
8use crate::unicode::{CharExt, GeneralCategoryExt};
9use crate::{Face, GlyphInfo, Mask, Tag};
10
11pub const KHMER_SHAPER: ComplexShaper = ComplexShaper {
12 collect_features: Some(collect_features),
13 override_features: Some(override_features),
14 create_data: Some(|plan: &ShapePlan| Box::new(KhmerShapePlan::new(plan))),
15 preprocess_text: None,
16 postprocess_glyphs: None,
17 normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit),
18 decompose: Some(decompose),
19 compose: Some(compose),
20 setup_masks: Some(setup_masks),
21 gpos_tag: None,
22 reorder_marks: None,
23 zero_width_marks: None,
24 fallback_position: false,
25};
26
27const KHMER_FEATURES: &[(Tag, FeatureFlags)] = &[
28 // Basic features.
29 // These features are applied all at once, before reordering, constrained
30 // to the syllable.
31 (feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
32 (feature::BELOW_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
33 (feature::ABOVE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
34 (feature::POST_BASE_FORMS, FeatureFlags::MANUAL_JOINERS),
35 (
36 feature::CONJUNCT_FORM_AFTER_RO,
37 FeatureFlags::MANUAL_JOINERS,
38 ),
39 // Other features.
40 // These features are applied all at once after clearing syllables.
41 (
42 feature::PRE_BASE_SUBSTITUTIONS,
43 FeatureFlags::GLOBAL_MANUAL_JOINERS,
44 ),
45 (
46 feature::ABOVE_BASE_SUBSTITUTIONS,
47 FeatureFlags::GLOBAL_MANUAL_JOINERS,
48 ),
49 (
50 feature::BELOW_BASE_SUBSTITUTIONS,
51 FeatureFlags::GLOBAL_MANUAL_JOINERS,
52 ),
53 (
54 feature::POST_BASE_SUBSTITUTIONS,
55 FeatureFlags::GLOBAL_MANUAL_JOINERS,
56 ),
57];
58
59// Must be in the same order as the KHMER_FEATURES array.
60mod khmer_feature {
61 pub const PREF: usize = 0;
62 pub const BLWF: usize = 1;
63 pub const ABVF: usize = 2;
64 pub const PSTF: usize = 3;
65 pub const CFAR: usize = 4;
66}
67
68impl GlyphInfo {
69 fn set_khmer_properties(&mut self) {
70 let u = self.glyph_id;
71 let (mut cat, pos) = super::indic::get_category_and_position(u);
72
73 // Re-assign category
74
75 // These categories are experimentally extracted from what Uniscribe allows.
76
77 match u {
78 0x179A => cat = category::RA,
79 0x17CC | 0x17C9 | 0x17CA => cat = category::ROBATIC,
80 0x17C6 | 0x17CB | 0x17CD | 0x17CE | 0x17CF | 0x17D0 | 0x17D1 => cat = category::X_GROUP,
81 // Just guessing. Uniscribe doesn't categorize it.
82 0x17C7 | 0x17C8 | 0x17DD | 0x17D3 => cat = category::Y_GROUP,
83 _ => {}
84 }
85
86 // Re-assign position.
87
88 if cat == category::M {
89 match pos {
90 position::PRE_C => cat = category::V_PRE,
91 position::BELOW_C => cat = category::V_BLW,
92 position::ABOVE_C => cat = category::V_AVB,
93 position::POST_C => cat = category::V_PST,
94 _ => {}
95 }
96 }
97
98 self.set_indic_category(cat);
99 }
100}
101
102struct KhmerShapePlan {
103 mask_array: [Mask; KHMER_FEATURES.len()],
104}
105
106impl KhmerShapePlan {
107 fn new(plan: &ShapePlan) -> Self {
108 let mut mask_array: [u32; 9] = [0; KHMER_FEATURES.len()];
109 for (i: usize, feature: &(Tag, FeatureFlags)) in KHMER_FEATURES.iter().enumerate() {
110 mask_array[i] = if feature.1.contains(FeatureFlags::GLOBAL) {
111 0
112 } else {
113 plan.ot_map.one_mask(feature_tag:feature.0)
114 }
115 }
116
117 KhmerShapePlan { mask_array }
118 }
119}
120
121fn collect_features(planner: &mut ShapePlanner) {
122 // Do this before any lookups have been applied.
123 planner.ot_map.add_gsub_pause(Some(setup_syllables));
124 planner.ot_map.add_gsub_pause(Some(reorder));
125
126 // Testing suggests that Uniscribe does NOT pause between basic
127 // features. Test with KhmerUI.ttf and the following three
128 // sequences:
129 //
130 // U+1789,U+17BC
131 // U+1789,U+17D2,U+1789
132 // U+1789,U+17D2,U+1789,U+17BC
133 //
134 // https://github.com/harfbuzz/harfbuzz/issues/974
135 planner
136 .ot_map
137 .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1);
138 planner.ot_map.enable_feature(
139 feature::GLYPH_COMPOSITION_DECOMPOSITION,
140 FeatureFlags::empty(),
141 1,
142 );
143
144 for feature in KHMER_FEATURES.iter().take(5) {
145 planner.ot_map.add_feature(feature.0, feature.1, 1);
146 }
147
148 planner
149 .ot_map
150 .add_gsub_pause(Some(crate::ot::clear_syllables));
151
152 for feature in KHMER_FEATURES.iter().skip(5) {
153 planner.ot_map.add_feature(feature.0, feature.1, 1);
154 }
155}
156
157fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
158 super::khmer_machine::find_syllables_khmer(buffer);
159
160 let mut start: usize = 0;
161 let mut end: usize = buffer.next_syllable(start:0);
162 while start < buffer.len {
163 buffer.unsafe_to_break(start:Some(start), end:Some(end));
164 start = end;
165 end = buffer.next_syllable(start);
166 }
167}
168
169fn reorder(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
170 use super::khmer_machine::SyllableType;
171
172 syllabic::insert_dotted_circles(
173 face,
174 buffer,
175 broken_syllable_type:SyllableType::BrokenCluster as u8,
176 dottedcircle_category:category::DOTTED_CIRCLE,
177 repha_category:Some(category::REPHA),
178 dottedcircle_position:None,
179 );
180
181 let khmer_plan: &KhmerShapePlan = plan.data::<KhmerShapePlan>();
182
183 let mut start: usize = 0;
184 let mut end: usize = buffer.next_syllable(start:0);
185 while start < buffer.len {
186 reorder_syllable(khmer_plan, start, end, buffer);
187 start = end;
188 end = buffer.next_syllable(start);
189 }
190}
191
192fn reorder_syllable(khmer_plan: &KhmerShapePlan, start: usize, end: usize, buffer: &mut Buffer) {
193 use super::khmer_machine::SyllableType;
194
195 let syllable_type: SyllableType = match buffer.info[start].syllable() & 0x0F {
196 0 => SyllableType::ConsonantSyllable,
197 1 => SyllableType::BrokenCluster,
198 2 => SyllableType::NonKhmerCluster,
199 _ => unreachable!(),
200 };
201
202 match syllable_type {
203 SyllableType::ConsonantSyllable | SyllableType::BrokenCluster => {
204 reorder_consonant_syllable(khmer_plan, start, end, buffer);
205 }
206 SyllableType::NonKhmerCluster => {}
207 }
208}
209
210// Rules from:
211// https://docs.microsoft.com/en-us/typography/script-development/devanagari
212fn reorder_consonant_syllable(
213 plan: &KhmerShapePlan,
214 start: usize,
215 end: usize,
216 buffer: &mut Buffer,
217) {
218 // Setup masks.
219 {
220 // Post-base
221 let mask = plan.mask_array[khmer_feature::BLWF]
222 | plan.mask_array[khmer_feature::ABVF]
223 | plan.mask_array[khmer_feature::PSTF];
224 for info in &mut buffer.info[start + 1..end] {
225 info.mask |= mask;
226 }
227 }
228
229 let mut num_coengs = 0;
230 for i in start + 1..end {
231 // When a COENG + (Cons | IndV) combination are found (and subscript count
232 // is less than two) the character combination is handled according to the
233 // subscript type of the character following the COENG.
234 //
235 // ...
236 //
237 // Subscript Type 2 - The COENG + RO characters are reordered to immediately
238 // before the base glyph. Then the COENG + RO characters are assigned to have
239 // the 'pref' OpenType feature applied to them.
240 if buffer.info[i].indic_category() == category::COENG && num_coengs <= 2 && i + 1 < end {
241 num_coengs += 1;
242
243 if buffer.info[i + 1].indic_category() == category::RA {
244 for j in 0..2 {
245 buffer.info[i + j].mask |= plan.mask_array[khmer_feature::PREF];
246 }
247
248 // Move the Coeng,Ro sequence to the start.
249 buffer.merge_clusters(start, i + 2);
250 let t0 = buffer.info[i];
251 let t1 = buffer.info[i + 1];
252 for k in (0..i - start).rev() {
253 buffer.info[k + start + 2] = buffer.info[k + start];
254 }
255
256 buffer.info[start] = t0;
257 buffer.info[start + 1] = t1;
258
259 // Mark the subsequent stuff with 'cfar'. Used in Khmer.
260 // Read the feature spec.
261 // This allows distinguishing the following cases with MS Khmer fonts:
262 // U+1784,U+17D2,U+179A,U+17D2,U+1782
263 // U+1784,U+17D2,U+1782,U+17D2,U+179A
264 if plan.mask_array[khmer_feature::CFAR] != 0 {
265 for j in i + 2..end {
266 buffer.info[j].mask |= plan.mask_array[khmer_feature::CFAR];
267 }
268 }
269
270 num_coengs = 2; // Done.
271 }
272 } else if buffer.info[i].indic_category() == category::V_PRE {
273 // Reorder left matra piece.
274
275 // Move to the start.
276 buffer.merge_clusters(start, i + 1);
277 let t = buffer.info[i];
278 for k in (0..i - start).rev() {
279 buffer.info[k + start + 1] = buffer.info[k + start];
280 }
281 buffer.info[start] = t;
282 }
283 }
284}
285
286fn override_features(planner: &mut ShapePlanner) {
287 // Khmer spec has 'clig' as part of required shaping features:
288 // "Apply feature 'clig' to form ligatures that are desired for
289 // typographical correctness.", hence in overrides...
290 planner
291 .ot_map
292 .enable_feature(tag:feature::CONTEXTUAL_LIGATURES, flags:FeatureFlags::empty(), value:1);
293
294 planner.ot_map.disable_feature(tag:feature::STANDARD_LIGATURES);
295}
296
297fn decompose(_: &ShapeNormalizeContext, ab: char) -> Option<(char, char)> {
298 // Decompose split matras that don't have Unicode decompositions.
299 match ab {
300 '\u{17BE}' | '\u{17BF}' | '\u{17C0}' | '\u{17C4}' | '\u{17C5}' => Some(('\u{17C1}', ab)),
301 _ => crate::unicode::decompose(ab),
302 }
303}
304
305fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> {
306 // Avoid recomposing split matras.
307 if a.general_category().is_mark() {
308 return None;
309 }
310
311 crate::unicode::compose(a, b)
312}
313
314fn setup_masks(_: &ShapePlan, _: &Face, buffer: &mut Buffer) {
315 // We cannot setup masks here. We save information about characters
316 // and setup masks later on in a pause-callback.
317 for info: &mut GlyphInfo in buffer.info_slice_mut() {
318 info.set_khmer_properties();
319 }
320}
321