1 | use alloc::boxed::Box; |
2 | |
3 | use super::indic::{category, position}; |
4 | use super::*; |
5 | use crate::buffer::Buffer; |
6 | use crate::ot::{feature, FeatureFlags}; |
7 | use crate::plan::{ShapePlan, ShapePlanner}; |
8 | use crate::unicode::{CharExt, GeneralCategoryExt}; |
9 | use crate::{Face, GlyphInfo, Mask, Tag}; |
10 | |
11 | pub const KHMER_SHAPER: ComplexShaper = ComplexShaper { |
12 | collect_features: Some(collect_features), |
13 | override_features: Some(override_features), |
14 | create_data: Some(|plan: &ShapePlan| Box::new(KhmerShapePlan::new(plan))), |
15 | preprocess_text: None, |
16 | postprocess_glyphs: None, |
17 | normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit), |
18 | decompose: Some(decompose), |
19 | compose: Some(compose), |
20 | setup_masks: Some(setup_masks), |
21 | gpos_tag: None, |
22 | reorder_marks: None, |
23 | zero_width_marks: None, |
24 | fallback_position: false, |
25 | }; |
26 | |
27 | const KHMER_FEATURES: &[(Tag, FeatureFlags)] = &[ |
28 | // Basic features. |
29 | // These features are applied all at once, before reordering, constrained |
30 | // to the syllable. |
31 | (feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
32 | (feature::BELOW_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
33 | (feature::ABOVE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
34 | (feature::POST_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
35 | ( |
36 | feature::CONJUNCT_FORM_AFTER_RO, |
37 | FeatureFlags::MANUAL_JOINERS, |
38 | ), |
39 | // Other features. |
40 | // These features are applied all at once after clearing syllables. |
41 | ( |
42 | feature::PRE_BASE_SUBSTITUTIONS, |
43 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
44 | ), |
45 | ( |
46 | feature::ABOVE_BASE_SUBSTITUTIONS, |
47 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
48 | ), |
49 | ( |
50 | feature::BELOW_BASE_SUBSTITUTIONS, |
51 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
52 | ), |
53 | ( |
54 | feature::POST_BASE_SUBSTITUTIONS, |
55 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
56 | ), |
57 | ]; |
58 | |
59 | // Must be in the same order as the KHMER_FEATURES array. |
60 | mod khmer_feature { |
61 | pub const PREF: usize = 0; |
62 | pub const BLWF: usize = 1; |
63 | pub const ABVF: usize = 2; |
64 | pub const PSTF: usize = 3; |
65 | pub const CFAR: usize = 4; |
66 | } |
67 | |
68 | impl GlyphInfo { |
69 | fn set_khmer_properties(&mut self) { |
70 | let u = self.glyph_id; |
71 | let (mut cat, pos) = super::indic::get_category_and_position(u); |
72 | |
73 | // Re-assign category |
74 | |
75 | // These categories are experimentally extracted from what Uniscribe allows. |
76 | |
77 | match u { |
78 | 0x179A => cat = category::RA, |
79 | 0x17CC | 0x17C9 | 0x17CA => cat = category::ROBATIC, |
80 | 0x17C6 | 0x17CB | 0x17CD | 0x17CE | 0x17CF | 0x17D0 | 0x17D1 => cat = category::X_GROUP, |
81 | // Just guessing. Uniscribe doesn't categorize it. |
82 | 0x17C7 | 0x17C8 | 0x17DD | 0x17D3 => cat = category::Y_GROUP, |
83 | _ => {} |
84 | } |
85 | |
86 | // Re-assign position. |
87 | |
88 | if cat == category::M { |
89 | match pos { |
90 | position::PRE_C => cat = category::V_PRE, |
91 | position::BELOW_C => cat = category::V_BLW, |
92 | position::ABOVE_C => cat = category::V_AVB, |
93 | position::POST_C => cat = category::V_PST, |
94 | _ => {} |
95 | } |
96 | } |
97 | |
98 | self.set_indic_category(cat); |
99 | } |
100 | } |
101 | |
102 | struct KhmerShapePlan { |
103 | mask_array: [Mask; KHMER_FEATURES.len()], |
104 | } |
105 | |
106 | impl KhmerShapePlan { |
107 | fn new(plan: &ShapePlan) -> Self { |
108 | let mut mask_array: [u32; 9] = [0; KHMER_FEATURES.len()]; |
109 | for (i: usize, feature: &(Tag, FeatureFlags)) in KHMER_FEATURES.iter().enumerate() { |
110 | mask_array[i] = if feature.1.contains(FeatureFlags::GLOBAL) { |
111 | 0 |
112 | } else { |
113 | plan.ot_map.one_mask(feature_tag:feature.0) |
114 | } |
115 | } |
116 | |
117 | KhmerShapePlan { mask_array } |
118 | } |
119 | } |
120 | |
121 | fn collect_features(planner: &mut ShapePlanner) { |
122 | // Do this before any lookups have been applied. |
123 | planner.ot_map.add_gsub_pause(Some(setup_syllables)); |
124 | planner.ot_map.add_gsub_pause(Some(reorder)); |
125 | |
126 | // Testing suggests that Uniscribe does NOT pause between basic |
127 | // features. Test with KhmerUI.ttf and the following three |
128 | // sequences: |
129 | // |
130 | // U+1789,U+17BC |
131 | // U+1789,U+17D2,U+1789 |
132 | // U+1789,U+17D2,U+1789,U+17BC |
133 | // |
134 | // https://github.com/harfbuzz/harfbuzz/issues/974 |
135 | planner |
136 | .ot_map |
137 | .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1); |
138 | planner.ot_map.enable_feature( |
139 | feature::GLYPH_COMPOSITION_DECOMPOSITION, |
140 | FeatureFlags::empty(), |
141 | 1, |
142 | ); |
143 | |
144 | for feature in KHMER_FEATURES.iter().take(5) { |
145 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
146 | } |
147 | |
148 | planner |
149 | .ot_map |
150 | .add_gsub_pause(Some(crate::ot::clear_syllables)); |
151 | |
152 | for feature in KHMER_FEATURES.iter().skip(5) { |
153 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
154 | } |
155 | } |
156 | |
157 | fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
158 | super::khmer_machine::find_syllables_khmer(buffer); |
159 | |
160 | let mut start: usize = 0; |
161 | let mut end: usize = buffer.next_syllable(start:0); |
162 | while start < buffer.len { |
163 | buffer.unsafe_to_break(start:Some(start), end:Some(end)); |
164 | start = end; |
165 | end = buffer.next_syllable(start); |
166 | } |
167 | } |
168 | |
169 | fn reorder(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
170 | use super::khmer_machine::SyllableType; |
171 | |
172 | syllabic::insert_dotted_circles( |
173 | face, |
174 | buffer, |
175 | broken_syllable_type:SyllableType::BrokenCluster as u8, |
176 | dottedcircle_category:category::DOTTED_CIRCLE, |
177 | repha_category:Some(category::REPHA), |
178 | dottedcircle_position:None, |
179 | ); |
180 | |
181 | let khmer_plan: &KhmerShapePlan = plan.data::<KhmerShapePlan>(); |
182 | |
183 | let mut start: usize = 0; |
184 | let mut end: usize = buffer.next_syllable(start:0); |
185 | while start < buffer.len { |
186 | reorder_syllable(khmer_plan, start, end, buffer); |
187 | start = end; |
188 | end = buffer.next_syllable(start); |
189 | } |
190 | } |
191 | |
192 | fn reorder_syllable(khmer_plan: &KhmerShapePlan, start: usize, end: usize, buffer: &mut Buffer) { |
193 | use super::khmer_machine::SyllableType; |
194 | |
195 | let syllable_type: SyllableType = match buffer.info[start].syllable() & 0x0F { |
196 | 0 => SyllableType::ConsonantSyllable, |
197 | 1 => SyllableType::BrokenCluster, |
198 | 2 => SyllableType::NonKhmerCluster, |
199 | _ => unreachable!(), |
200 | }; |
201 | |
202 | match syllable_type { |
203 | SyllableType::ConsonantSyllable | SyllableType::BrokenCluster => { |
204 | reorder_consonant_syllable(khmer_plan, start, end, buffer); |
205 | } |
206 | SyllableType::NonKhmerCluster => {} |
207 | } |
208 | } |
209 | |
210 | // Rules from: |
211 | // https://docs.microsoft.com/en-us/typography/script-development/devanagari |
212 | fn reorder_consonant_syllable( |
213 | plan: &KhmerShapePlan, |
214 | start: usize, |
215 | end: usize, |
216 | buffer: &mut Buffer, |
217 | ) { |
218 | // Setup masks. |
219 | { |
220 | // Post-base |
221 | let mask = plan.mask_array[khmer_feature::BLWF] |
222 | | plan.mask_array[khmer_feature::ABVF] |
223 | | plan.mask_array[khmer_feature::PSTF]; |
224 | for info in &mut buffer.info[start + 1..end] { |
225 | info.mask |= mask; |
226 | } |
227 | } |
228 | |
229 | let mut num_coengs = 0; |
230 | for i in start + 1..end { |
231 | // When a COENG + (Cons | IndV) combination are found (and subscript count |
232 | // is less than two) the character combination is handled according to the |
233 | // subscript type of the character following the COENG. |
234 | // |
235 | // ... |
236 | // |
237 | // Subscript Type 2 - The COENG + RO characters are reordered to immediately |
238 | // before the base glyph. Then the COENG + RO characters are assigned to have |
239 | // the 'pref' OpenType feature applied to them. |
240 | if buffer.info[i].indic_category() == category::COENG && num_coengs <= 2 && i + 1 < end { |
241 | num_coengs += 1; |
242 | |
243 | if buffer.info[i + 1].indic_category() == category::RA { |
244 | for j in 0..2 { |
245 | buffer.info[i + j].mask |= plan.mask_array[khmer_feature::PREF]; |
246 | } |
247 | |
248 | // Move the Coeng,Ro sequence to the start. |
249 | buffer.merge_clusters(start, i + 2); |
250 | let t0 = buffer.info[i]; |
251 | let t1 = buffer.info[i + 1]; |
252 | for k in (0..i - start).rev() { |
253 | buffer.info[k + start + 2] = buffer.info[k + start]; |
254 | } |
255 | |
256 | buffer.info[start] = t0; |
257 | buffer.info[start + 1] = t1; |
258 | |
259 | // Mark the subsequent stuff with 'cfar'. Used in Khmer. |
260 | // Read the feature spec. |
261 | // This allows distinguishing the following cases with MS Khmer fonts: |
262 | // U+1784,U+17D2,U+179A,U+17D2,U+1782 |
263 | // U+1784,U+17D2,U+1782,U+17D2,U+179A |
264 | if plan.mask_array[khmer_feature::CFAR] != 0 { |
265 | for j in i + 2..end { |
266 | buffer.info[j].mask |= plan.mask_array[khmer_feature::CFAR]; |
267 | } |
268 | } |
269 | |
270 | num_coengs = 2; // Done. |
271 | } |
272 | } else if buffer.info[i].indic_category() == category::V_PRE { |
273 | // Reorder left matra piece. |
274 | |
275 | // Move to the start. |
276 | buffer.merge_clusters(start, i + 1); |
277 | let t = buffer.info[i]; |
278 | for k in (0..i - start).rev() { |
279 | buffer.info[k + start + 1] = buffer.info[k + start]; |
280 | } |
281 | buffer.info[start] = t; |
282 | } |
283 | } |
284 | } |
285 | |
286 | fn override_features(planner: &mut ShapePlanner) { |
287 | // Khmer spec has 'clig' as part of required shaping features: |
288 | // "Apply feature 'clig' to form ligatures that are desired for |
289 | // typographical correctness.", hence in overrides... |
290 | planner |
291 | .ot_map |
292 | .enable_feature(tag:feature::CONTEXTUAL_LIGATURES, flags:FeatureFlags::empty(), value:1); |
293 | |
294 | planner.ot_map.disable_feature(tag:feature::STANDARD_LIGATURES); |
295 | } |
296 | |
297 | fn decompose(_: &ShapeNormalizeContext, ab: char) -> Option<(char, char)> { |
298 | // Decompose split matras that don't have Unicode decompositions. |
299 | match ab { |
300 | ' \u{17BE}' | ' \u{17BF}' | ' \u{17C0}' | ' \u{17C4}' | ' \u{17C5}' => Some((' \u{17C1}' , ab)), |
301 | _ => crate::unicode::decompose(ab), |
302 | } |
303 | } |
304 | |
305 | fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> { |
306 | // Avoid recomposing split matras. |
307 | if a.general_category().is_mark() { |
308 | return None; |
309 | } |
310 | |
311 | crate::unicode::compose(a, b) |
312 | } |
313 | |
314 | fn setup_masks(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
315 | // We cannot setup masks here. We save information about characters |
316 | // and setup masks later on in a pause-callback. |
317 | for info: &mut GlyphInfo in buffer.info_slice_mut() { |
318 | info.set_khmer_properties(); |
319 | } |
320 | } |
321 | |