1 | use alloc::boxed::Box; |
2 | use core::cmp; |
3 | use core::convert::TryFrom; |
4 | use core::ops::Range; |
5 | |
6 | use ttf_parser::GlyphId; |
7 | |
8 | use super::algs::*; |
9 | use super::buffer::hb_buffer_t; |
10 | use super::ot_layout::*; |
11 | use super::ot_layout_gsubgpos::{WouldApply, WouldApplyContext}; |
12 | use super::ot_map::*; |
13 | use super::ot_shape::*; |
14 | use super::ot_shape_normalize::*; |
15 | use super::ot_shape_plan::hb_ot_shape_plan_t; |
16 | use super::ot_shaper::*; |
17 | use super::unicode::{hb_gc, CharExt, GeneralCategoryExt}; |
18 | use super::{hb_font_t, hb_glyph_info_t, hb_mask_t, hb_tag_t, script, Script}; |
19 | |
20 | pub const INDIC_SHAPER: hb_ot_shaper_t = hb_ot_shaper_t { |
21 | collect_features: Some(collect_features), |
22 | override_features: Some(override_features), |
23 | create_data: Some(|plan: &hb_ot_shape_plan_t| Box::new(IndicShapePlan::new(plan))), |
24 | preprocess_text: Some(preprocess_text), |
25 | postprocess_glyphs: None, |
26 | normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, |
27 | decompose: Some(decompose), |
28 | compose: Some(compose), |
29 | setup_masks: Some(setup_masks), |
30 | gpos_tag: None, |
31 | reorder_marks: None, |
32 | zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, |
33 | fallback_position: false, |
34 | }; |
35 | |
36 | pub type Category = u8; |
37 | |
38 | // This mod doesn't exist in harfbuzz anymore. Instead, the corresponding values are auto-generated |
39 | // by the various machines and stored in `hb-ot-shaper-indic-table`. This means that when updating the |
40 | // values in the machines, we also need to update them here. |
41 | #[allow (dead_code)] |
42 | pub mod ot_category_t { |
43 | pub const OT_X: u8 = 0; |
44 | pub const OT_C: u8 = 1; |
45 | pub const OT_V: u8 = 2; |
46 | pub const OT_N: u8 = 3; |
47 | pub const OT_H: u8 = 4; |
48 | pub const OT_ZWNJ: u8 = 5; |
49 | pub const OT_ZWJ: u8 = 6; |
50 | pub const OT_M: u8 = 7; |
51 | pub const OT_SM: u8 = 8; |
52 | pub const OT_A: u8 = 9; |
53 | pub const OT_VD: u8 = OT_A; |
54 | pub const OT_PLACEHOLDER: u8 = 10; |
55 | pub const OT_GB: u8 = OT_PLACEHOLDER; |
56 | pub const OT_DOTTEDCIRCLE: u8 = 11; |
57 | pub const OT_RS: u8 = 12; // Register Shifter, used in Khmer OT spec. |
58 | pub const OT_MPst: u8 = 13; |
59 | pub const OT_Repha: u8 = 14; // Atomically-encoded logical or visual repha. |
60 | pub const OT_Ra: u8 = 15; |
61 | pub const OT_CM: u8 = 16; // Consonant-Medial. |
62 | pub const OT_Symbol: u8 = 17; // Avagraha, etc that take marks (SM,A,VD). |
63 | pub const OT_CS: u8 = 18; |
64 | |
65 | /* Khmer & Myanmar shapers. */ |
66 | pub const OT_VAbv: u8 = 20; |
67 | pub const OT_VBlw: u8 = 21; |
68 | pub const OT_VPre: u8 = 22; |
69 | pub const OT_VPst: u8 = 23; |
70 | |
71 | /* Khmer. */ |
72 | pub const OT_Robatic: u8 = 25; |
73 | pub const OT_Xgroup: u8 = 26; |
74 | pub const OT_Ygroup: u8 = 27; |
75 | |
76 | /* Myanmar */ |
77 | pub const OT_As: u8 = 32; // Asat |
78 | pub const OT_MH: u8 = 35; // Medial |
79 | pub const OT_MR: u8 = 36; // Medial |
80 | pub const OT_MW: u8 = 37; // Medial |
81 | pub const OT_MY: u8 = 38; // Medial |
82 | pub const OT_PT: u8 = 39; // Pwo and other tones |
83 | pub const OT_VS: u8 = 40; // Variation selectors |
84 | pub const OT_ML: u8 = 41; // Consonant medials |
85 | |
86 | // This one doesn't exist in ot_category_t in harfbuzz, only in |
87 | // the Myanmar machine. However, in Rust we unfortunately can't export |
88 | // inside the Ragel file, so we have to define it here as well. Needs to |
89 | // be kept in sync with the value in the machine. |
90 | pub const IV: u8 = 2; |
91 | } |
92 | |
93 | pub type Position = u8; |
94 | pub mod ot_position_t { |
95 | pub const POS_START: u8 = 0; |
96 | |
97 | pub const POS_RA_TO_BECOME_REPH: u8 = 1; |
98 | pub const POS_PRE_M: u8 = 2; |
99 | pub const POS_PRE_C: u8 = 3; |
100 | |
101 | pub const POS_BASE_C: u8 = 4; |
102 | pub const POS_AFTER_MAIN: u8 = 5; |
103 | |
104 | pub const POS_ABOVE_C: u8 = 6; |
105 | |
106 | pub const POS_BEFORE_SUB: u8 = 7; |
107 | pub const POS_BELOW_C: u8 = 8; |
108 | pub const POS_AFTER_SUB: u8 = 9; |
109 | |
110 | pub const POS_BEFORE_POST: u8 = 10; |
111 | pub const POS_POST_C: u8 = 11; |
112 | pub const POS_AFTER_POST: u8 = 12; |
113 | |
114 | pub const POS_SMVD: u8 = 13; |
115 | |
116 | pub const POS_END: u8 = 14; |
117 | } |
118 | |
119 | const INDIC_FEATURES: &[(hb_tag_t, hb_ot_map_feature_flags_t)] = &[ |
120 | // Basic features. |
121 | // These features are applied in order, one at a time, after initial_reordering, |
122 | // constrained to the syllable. |
123 | ( |
124 | hb_tag_t::from_bytes(b"nukt" ), |
125 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
126 | ), |
127 | ( |
128 | hb_tag_t::from_bytes(b"akhn" ), |
129 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
130 | ), |
131 | ( |
132 | hb_tag_t::from_bytes(b"rphf" ), |
133 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
134 | ), |
135 | ( |
136 | hb_tag_t::from_bytes(b"rkrf" ), |
137 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
138 | ), |
139 | ( |
140 | hb_tag_t::from_bytes(b"pref" ), |
141 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
142 | ), |
143 | ( |
144 | hb_tag_t::from_bytes(b"blwf" ), |
145 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
146 | ), |
147 | ( |
148 | hb_tag_t::from_bytes(b"abvf" ), |
149 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
150 | ), |
151 | ( |
152 | hb_tag_t::from_bytes(b"half" ), |
153 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
154 | ), |
155 | ( |
156 | hb_tag_t::from_bytes(b"pstf" ), |
157 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
158 | ), |
159 | ( |
160 | hb_tag_t::from_bytes(b"vatu" ), |
161 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
162 | ), |
163 | ( |
164 | hb_tag_t::from_bytes(b"cjct" ), |
165 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
166 | ), |
167 | // Other features. |
168 | // These features are applied all at once, after final_reordering, constrained |
169 | // to the syllable. |
170 | // Default Bengali font in Windows for example has intermixed |
171 | // lookups for init,pres,abvs,blws features. |
172 | ( |
173 | hb_tag_t::from_bytes(b"init" ), |
174 | F_MANUAL_JOINERS | F_PER_SYLLABLE, |
175 | ), |
176 | ( |
177 | hb_tag_t::from_bytes(b"pres" ), |
178 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
179 | ), |
180 | ( |
181 | hb_tag_t::from_bytes(b"abvs" ), |
182 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
183 | ), |
184 | ( |
185 | hb_tag_t::from_bytes(b"blws" ), |
186 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
187 | ), |
188 | ( |
189 | hb_tag_t::from_bytes(b"psts" ), |
190 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
191 | ), |
192 | ( |
193 | hb_tag_t::from_bytes(b"haln" ), |
194 | F_GLOBAL_MANUAL_JOINERS | F_PER_SYLLABLE, |
195 | ), |
196 | ]; |
197 | |
198 | // Must be in the same order as the INDIC_FEATURES array. |
199 | #[allow (dead_code)] |
200 | mod indic_feature { |
201 | pub const NUKT: usize = 0; |
202 | pub const AKHN: usize = 1; |
203 | pub const RPHF: usize = 2; |
204 | pub const RKRF: usize = 3; |
205 | pub const PREF: usize = 4; |
206 | pub const BLWF: usize = 5; |
207 | pub const ABVF: usize = 6; |
208 | pub const HALF: usize = 7; |
209 | pub const PSTF: usize = 8; |
210 | pub const VATU: usize = 9; |
211 | pub const CJCT: usize = 10; |
212 | pub const INIT: usize = 11; |
213 | pub const PRES: usize = 12; |
214 | pub const ABVS: usize = 13; |
215 | pub const BLWS: usize = 14; |
216 | pub const PSTS: usize = 15; |
217 | pub const HALN: usize = 16; |
218 | } |
219 | |
220 | pub(crate) const fn category_flag(c: Category) -> u32 { |
221 | rb_flag(c as u32) |
222 | } |
223 | |
224 | // Note: |
225 | // |
226 | // We treat Vowels and placeholders as if they were consonants. This is safe because Vowels |
227 | // cannot happen in a consonant syllable. The plus side however is, we can call the |
228 | // consonant syllable logic from the vowel syllable function and get it all right! |
229 | const CONSONANT_FLAGS_INDIC: u32 = category_flag(ot_category_t::OT_C) |
230 | | category_flag(ot_category_t::OT_CS) |
231 | | category_flag(ot_category_t::OT_Ra) |
232 | | category_flag(ot_category_t::OT_CM) |
233 | | category_flag(ot_category_t::OT_V) |
234 | | category_flag(ot_category_t::OT_PLACEHOLDER) |
235 | | category_flag(ot_category_t::OT_DOTTEDCIRCLE); |
236 | |
237 | const CONSONANT_FLAGS_MYANMAR: u32 = category_flag(ot_category_t::OT_C) |
238 | | category_flag(ot_category_t::OT_CS) |
239 | | category_flag(ot_category_t::OT_Ra) |
240 | // | category_flag(ot_category_t::OT_CM) |
241 | | category_flag(ot_category_t::IV) |
242 | | category_flag(ot_category_t::OT_GB) |
243 | | category_flag(ot_category_t::OT_DOTTEDCIRCLE); |
244 | |
245 | const JOINER_FLAGS: u32 = |
246 | category_flag(ot_category_t::OT_ZWJ) | category_flag(ot_category_t::OT_ZWNJ); |
247 | |
248 | #[derive (Clone, Copy, PartialEq)] |
249 | enum RephPosition { |
250 | AfterMain = ot_position_t::POS_AFTER_MAIN as isize, |
251 | BeforeSub = ot_position_t::POS_BEFORE_SUB as isize, |
252 | AfterSub = ot_position_t::POS_AFTER_SUB as isize, |
253 | BeforePost = ot_position_t::POS_BEFORE_POST as isize, |
254 | AfterPost = ot_position_t::POS_AFTER_POST as isize, |
255 | } |
256 | |
257 | #[derive (Clone, Copy, PartialEq)] |
258 | enum RephMode { |
259 | /// Reph formed out of initial Ra,H sequence. |
260 | Implicit, |
261 | /// Reph formed out of initial Ra,H,ZWJ sequence. |
262 | Explicit, |
263 | /// Encoded Repha character, needs reordering. |
264 | LogRepha, |
265 | } |
266 | |
267 | #[derive (Clone, Copy, PartialEq)] |
268 | enum BlwfMode { |
269 | /// Below-forms feature applied to pre-base and post-base. |
270 | PreAndPost, |
271 | /// Below-forms feature applied to post-base only. |
272 | PostOnly, |
273 | } |
274 | |
275 | #[derive (Clone, Copy)] |
276 | struct IndicConfig { |
277 | script: Option<Script>, |
278 | has_old_spec: bool, |
279 | virama: u32, |
280 | reph_pos: RephPosition, |
281 | reph_mode: RephMode, |
282 | blwf_mode: BlwfMode, |
283 | } |
284 | |
285 | impl IndicConfig { |
286 | const fn new( |
287 | script: Option<Script>, |
288 | has_old_spec: bool, |
289 | virama: u32, |
290 | reph_pos: RephPosition, |
291 | reph_mode: RephMode, |
292 | blwf_mode: BlwfMode, |
293 | ) -> Self { |
294 | IndicConfig { |
295 | script, |
296 | has_old_spec, |
297 | virama, |
298 | reph_pos, |
299 | reph_mode, |
300 | blwf_mode, |
301 | } |
302 | } |
303 | } |
304 | |
305 | const INDIC_CONFIGS: &[IndicConfig] = &[ |
306 | IndicConfig::new( |
307 | script:None, |
308 | has_old_spec:false, |
309 | virama:0, |
310 | reph_pos:RephPosition::BeforePost, |
311 | RephMode::Implicit, |
312 | BlwfMode::PreAndPost, |
313 | ), |
314 | IndicConfig::new( |
315 | script:Some(script::DEVANAGARI), |
316 | has_old_spec:true, |
317 | virama:0x094D, |
318 | reph_pos:RephPosition::BeforePost, |
319 | RephMode::Implicit, |
320 | BlwfMode::PreAndPost, |
321 | ), |
322 | IndicConfig::new( |
323 | script:Some(script::BENGALI), |
324 | has_old_spec:true, |
325 | virama:0x09CD, |
326 | reph_pos:RephPosition::AfterSub, |
327 | RephMode::Implicit, |
328 | BlwfMode::PreAndPost, |
329 | ), |
330 | IndicConfig::new( |
331 | script:Some(script::GURMUKHI), |
332 | has_old_spec:true, |
333 | virama:0x0A4D, |
334 | reph_pos:RephPosition::BeforeSub, |
335 | RephMode::Implicit, |
336 | BlwfMode::PreAndPost, |
337 | ), |
338 | IndicConfig::new( |
339 | script:Some(script::GUJARATI), |
340 | has_old_spec:true, |
341 | virama:0x0ACD, |
342 | reph_pos:RephPosition::BeforePost, |
343 | RephMode::Implicit, |
344 | BlwfMode::PreAndPost, |
345 | ), |
346 | IndicConfig::new( |
347 | script:Some(script::ORIYA), |
348 | has_old_spec:true, |
349 | virama:0x0B4D, |
350 | reph_pos:RephPosition::AfterMain, |
351 | RephMode::Implicit, |
352 | BlwfMode::PreAndPost, |
353 | ), |
354 | IndicConfig::new( |
355 | script:Some(script::TAMIL), |
356 | has_old_spec:true, |
357 | virama:0x0BCD, |
358 | reph_pos:RephPosition::AfterPost, |
359 | RephMode::Implicit, |
360 | BlwfMode::PreAndPost, |
361 | ), |
362 | IndicConfig::new( |
363 | script:Some(script::TELUGU), |
364 | has_old_spec:true, |
365 | virama:0x0C4D, |
366 | reph_pos:RephPosition::AfterPost, |
367 | RephMode::Explicit, |
368 | BlwfMode::PostOnly, |
369 | ), |
370 | IndicConfig::new( |
371 | script:Some(script::KANNADA), |
372 | has_old_spec:true, |
373 | virama:0x0CCD, |
374 | reph_pos:RephPosition::AfterPost, |
375 | RephMode::Implicit, |
376 | BlwfMode::PostOnly, |
377 | ), |
378 | IndicConfig::new( |
379 | script:Some(script::MALAYALAM), |
380 | has_old_spec:true, |
381 | virama:0x0D4D, |
382 | reph_pos:RephPosition::AfterMain, |
383 | RephMode::LogRepha, |
384 | BlwfMode::PreAndPost, |
385 | ), |
386 | IndicConfig::new( |
387 | script:Some(script::SINHALA), |
388 | has_old_spec:false, |
389 | virama:0x0DCA, |
390 | reph_pos:RephPosition::AfterPost, |
391 | RephMode::Explicit, |
392 | BlwfMode::PreAndPost, |
393 | ), |
394 | ]; |
395 | |
396 | struct IndicWouldSubstituteFeature { |
397 | lookups: Range<usize>, |
398 | zero_context: bool, |
399 | } |
400 | |
401 | impl IndicWouldSubstituteFeature { |
402 | pub fn new(map: &hb_ot_map_t, feature_tag: hb_tag_t, zero_context: bool) -> Self { |
403 | IndicWouldSubstituteFeature { |
404 | lookups: match map.get_feature_stage(TableIndex::GSUB, feature_tag) { |
405 | Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage), |
406 | None => 0..0, |
407 | }, |
408 | zero_context, |
409 | } |
410 | } |
411 | |
412 | pub fn would_substitute( |
413 | &self, |
414 | map: &hb_ot_map_t, |
415 | face: &hb_font_t, |
416 | glyphs: &[GlyphId], |
417 | ) -> bool { |
418 | for index in self.lookups.clone() { |
419 | let lookup = map.lookup(TableIndex::GSUB, index); |
420 | let ctx = WouldApplyContext { |
421 | glyphs, |
422 | zero_context: self.zero_context, |
423 | }; |
424 | if face |
425 | .gsub |
426 | .as_ref() |
427 | .and_then(|table| table.get_lookup(lookup.index)) |
428 | .map_or(false, |lookup| lookup.would_apply(&ctx)) |
429 | { |
430 | return true; |
431 | } |
432 | } |
433 | |
434 | false |
435 | } |
436 | } |
437 | |
438 | struct IndicShapePlan { |
439 | config: IndicConfig, |
440 | is_old_spec: bool, |
441 | // virama_glyph: Option<u32>, |
442 | rphf: IndicWouldSubstituteFeature, |
443 | pref: IndicWouldSubstituteFeature, |
444 | blwf: IndicWouldSubstituteFeature, |
445 | pstf: IndicWouldSubstituteFeature, |
446 | vatu: IndicWouldSubstituteFeature, |
447 | mask_array: [hb_mask_t; INDIC_FEATURES.len()], |
448 | } |
449 | |
450 | impl IndicShapePlan { |
451 | fn new(plan: &hb_ot_shape_plan_t) -> Self { |
452 | let script = plan.script; |
453 | let config = if let Some(c) = INDIC_CONFIGS.iter().skip(1).find(|c| c.script == script) { |
454 | *c |
455 | } else { |
456 | INDIC_CONFIGS[0] |
457 | }; |
458 | |
459 | let is_old_spec = config.has_old_spec |
460 | && plan |
461 | .ot_map |
462 | .chosen_script(TableIndex::GSUB) |
463 | .map_or(true, |tag| tag.to_bytes()[3] != b'2' ); |
464 | |
465 | // Use zero-context would_substitute() matching for new-spec of the main |
466 | // Indic scripts, and scripts with one spec only, but not for old-specs. |
467 | // The new-spec for all dual-spec scripts says zero-context matching happens. |
468 | // |
469 | // However, testing with Malayalam shows that old and new spec both allow |
470 | // context. Testing with Bengali new-spec however shows that it doesn't. |
471 | // So, the heuristic here is the way it is. It should *only* be changed, |
472 | // as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE. |
473 | let zero_context = is_old_spec && script != Some(script::MALAYALAM); |
474 | |
475 | let mut mask_array = [0; INDIC_FEATURES.len()]; |
476 | for (i, feature) in INDIC_FEATURES.iter().enumerate() { |
477 | mask_array[i] = if feature.1 & F_GLOBAL != 0 { |
478 | 0 |
479 | } else { |
480 | plan.ot_map.get_1_mask(feature.0) |
481 | } |
482 | } |
483 | |
484 | // TODO: what is this? |
485 | // let mut virama_glyph = None; |
486 | // if config.virama != 0 { |
487 | // if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) { |
488 | // virama_glyph = Some(g.0 as u32); |
489 | // } |
490 | // } |
491 | |
492 | IndicShapePlan { |
493 | config, |
494 | is_old_spec, |
495 | // virama_glyph, |
496 | rphf: IndicWouldSubstituteFeature::new( |
497 | &plan.ot_map, |
498 | hb_tag_t::from_bytes(b"rphf" ), |
499 | zero_context, |
500 | ), |
501 | pref: IndicWouldSubstituteFeature::new( |
502 | &plan.ot_map, |
503 | hb_tag_t::from_bytes(b"pref" ), |
504 | zero_context, |
505 | ), |
506 | blwf: IndicWouldSubstituteFeature::new( |
507 | &plan.ot_map, |
508 | hb_tag_t::from_bytes(b"blwf" ), |
509 | zero_context, |
510 | ), |
511 | pstf: IndicWouldSubstituteFeature::new( |
512 | &plan.ot_map, |
513 | hb_tag_t::from_bytes(b"pstf" ), |
514 | zero_context, |
515 | ), |
516 | vatu: IndicWouldSubstituteFeature::new( |
517 | &plan.ot_map, |
518 | hb_tag_t::from_bytes(b"vatu" ), |
519 | zero_context, |
520 | ), |
521 | mask_array, |
522 | } |
523 | } |
524 | } |
525 | |
526 | impl hb_glyph_info_t { |
527 | pub(crate) fn indic_category(&self) -> Category { |
528 | self.ot_shaper_var_u8_category() |
529 | } |
530 | |
531 | pub(crate) fn myanmar_category(&self) -> Category { |
532 | self.ot_shaper_var_u8_category() |
533 | } |
534 | |
535 | pub(crate) fn khmer_category(&self) -> Category { |
536 | self.ot_shaper_var_u8_category() |
537 | } |
538 | |
539 | pub(crate) fn set_indic_category(&mut self, c: Category) { |
540 | self.set_ot_shaper_var_u8_category(c) |
541 | } |
542 | |
543 | pub(crate) fn set_myanmar_category(&mut self, c: Category) { |
544 | self.set_ot_shaper_var_u8_category(c) |
545 | } |
546 | |
547 | pub(crate) fn indic_position(&self) -> Position { |
548 | self.ot_shaper_var_u8_auxiliary() |
549 | } |
550 | |
551 | pub(crate) fn myanmar_position(&self) -> Position { |
552 | self.ot_shaper_var_u8_auxiliary() |
553 | } |
554 | |
555 | pub(crate) fn set_indic_position(&mut self, c: Position) { |
556 | self.set_ot_shaper_var_u8_auxiliary(c) |
557 | } |
558 | |
559 | pub(crate) fn set_myanmar_position(&mut self, c: Position) { |
560 | self.set_ot_shaper_var_u8_auxiliary(c) |
561 | } |
562 | |
563 | fn is_one_of(&self, flags: u32) -> bool { |
564 | // If it ligated, all bets are off. |
565 | if _hb_glyph_info_ligated(self) { |
566 | return false; |
567 | } |
568 | |
569 | rb_flag_unsafe(self.indic_category() as u32) & flags != 0 |
570 | } |
571 | |
572 | fn is_joiner(&self) -> bool { |
573 | self.is_one_of(JOINER_FLAGS) |
574 | } |
575 | |
576 | pub(crate) fn is_consonant(&self) -> bool { |
577 | self.is_one_of(CONSONANT_FLAGS_INDIC) |
578 | } |
579 | |
580 | pub(crate) fn is_consonant_myanmar(&self) -> bool { |
581 | self.is_one_of(CONSONANT_FLAGS_MYANMAR) |
582 | } |
583 | |
584 | fn is_halant(&self) -> bool { |
585 | self.is_one_of(rb_flag(ot_category_t::OT_H as u32)) |
586 | } |
587 | |
588 | fn set_indic_properties(&mut self) { |
589 | let u = self.glyph_id; |
590 | let (cat, pos) = crate::hb::ot_shaper_indic_table::get_categories(u); |
591 | |
592 | self.set_indic_category(cat); |
593 | self.set_indic_position(pos); |
594 | } |
595 | } |
596 | |
597 | fn collect_features(planner: &mut hb_ot_shape_planner_t) { |
598 | // Do this before any lookups have been applied. |
599 | planner.ot_map.add_gsub_pause(Some(setup_syllables)); |
600 | |
601 | planner |
602 | .ot_map |
603 | .enable_feature(hb_tag_t::from_bytes(b"locl" ), F_PER_SYLLABLE, 1); |
604 | // The Indic specs do not require ccmp, but we apply it here since if |
605 | // there is a use of it, it's typically at the beginning. |
606 | planner |
607 | .ot_map |
608 | .enable_feature(hb_tag_t::from_bytes(b"ccmp" ), F_PER_SYLLABLE, 1); |
609 | |
610 | planner.ot_map.add_gsub_pause(Some(initial_reordering)); |
611 | |
612 | for feature in INDIC_FEATURES.iter().take(11) { |
613 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
614 | planner.ot_map.add_gsub_pause(None); |
615 | } |
616 | |
617 | planner.ot_map.add_gsub_pause(Some(final_reordering)); |
618 | |
619 | for feature in INDIC_FEATURES.iter().skip(11) { |
620 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
621 | } |
622 | } |
623 | |
624 | fn override_features(planner: &mut hb_ot_shape_planner_t) { |
625 | planner |
626 | .ot_map |
627 | .disable_feature(tag:hb_tag_t::from_bytes(b"liga" )); |
628 | planner.ot_map.add_gsub_pause(Some(syllabic_clear_var)); // Don't need syllables anymore. |
629 | } |
630 | |
631 | fn preprocess_text(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) { |
632 | super::ot_shaper_vowel_constraints::preprocess_text_vowel_constraints(buffer); |
633 | } |
634 | |
635 | fn decompose(_: &hb_ot_shape_normalize_context_t, ab: char) -> Option<(char, char)> { |
636 | // Don't decompose these. |
637 | match ab { |
638 | ' \u{0931}' | // DEVANAGARI LETTER RRA |
639 | // https://github.com/harfbuzz/harfbuzz/issues/779 |
640 | ' \u{09DC}' | // BENGALI LETTER RRA |
641 | ' \u{09DD}' | // BENGALI LETTER RHA |
642 | ' \u{0B94}' => return None, // TAMIL LETTER AU |
643 | _ => {} |
644 | } |
645 | |
646 | crate::hb::unicode::decompose(ab) |
647 | } |
648 | |
649 | fn compose(_: &hb_ot_shape_normalize_context_t, a: char, b: char) -> Option<char> { |
650 | // Avoid recomposing split matras. |
651 | if a.general_category().is_mark() { |
652 | return None; |
653 | } |
654 | |
655 | // Composition-exclusion exceptions that we want to recompose. |
656 | if a == ' \u{09AF}' && b == ' \u{09BC}' { |
657 | return Some(' \u{09DF}' ); |
658 | } |
659 | |
660 | crate::hb::unicode::compose(a, b) |
661 | } |
662 | |
663 | fn setup_masks(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) { |
664 | // We cannot setup masks here. We save information about characters |
665 | // and setup masks later on in a pause-callback. |
666 | for info: &mut hb_glyph_info_t in buffer.info_slice_mut() { |
667 | info.set_indic_properties(); |
668 | } |
669 | } |
670 | |
671 | fn setup_syllables(_: &hb_ot_shape_plan_t, _: &hb_font_t, buffer: &mut hb_buffer_t) -> bool { |
672 | super::ot_shaper_indic_machine::find_syllables_indic(buffer); |
673 | |
674 | let mut start: usize = 0; |
675 | let mut end: usize = buffer.next_syllable(start:0); |
676 | while start < buffer.len { |
677 | buffer.unsafe_to_break(start:Some(start), end:Some(end)); |
678 | start = end; |
679 | end = buffer.next_syllable(start); |
680 | } |
681 | |
682 | false |
683 | } |
684 | |
685 | fn initial_reordering( |
686 | plan: &hb_ot_shape_plan_t, |
687 | face: &hb_font_t, |
688 | buffer: &mut hb_buffer_t, |
689 | ) -> bool { |
690 | use super::ot_shaper_indic_machine::SyllableType; |
691 | |
692 | let mut ret = false; |
693 | |
694 | let indic_plan = plan.data::<IndicShapePlan>(); |
695 | |
696 | update_consonant_positions(plan, indic_plan, face, buffer); |
697 | if super::ot_shaper_syllabic::insert_dotted_circles( |
698 | face, |
699 | buffer, |
700 | SyllableType::BrokenCluster as u8, |
701 | ot_category_t::OT_DOTTEDCIRCLE, |
702 | Some(ot_category_t::OT_Repha), |
703 | Some(ot_position_t::POS_END), |
704 | ) { |
705 | ret = true; |
706 | } |
707 | |
708 | let mut start = 0; |
709 | let mut end = buffer.next_syllable(0); |
710 | while start < buffer.len { |
711 | initial_reordering_syllable(plan, indic_plan, face, start, end, buffer); |
712 | start = end; |
713 | end = buffer.next_syllable(start); |
714 | } |
715 | |
716 | ret |
717 | } |
718 | |
719 | fn update_consonant_positions( |
720 | plan: &hb_ot_shape_plan_t, |
721 | indic_plan: &IndicShapePlan, |
722 | face: &hb_font_t, |
723 | buffer: &mut hb_buffer_t, |
724 | ) { |
725 | let mut virama_glyph: Option = None; |
726 | if indic_plan.config.virama != 0 { |
727 | virama_glyph = face.get_nominal_glyph(indic_plan.config.virama); |
728 | } |
729 | |
730 | if let Some(virama: GlyphId) = virama_glyph { |
731 | for info: &mut hb_glyph_info_t in buffer.info_slice_mut() { |
732 | if info.indic_position() == ot_position_t::POS_BASE_C { |
733 | let consonant: GlyphId = info.as_glyph(); |
734 | info.set_indic_position(consonant_position_from_face( |
735 | plan, indic_plan, face, consonant, virama, |
736 | )); |
737 | } |
738 | } |
739 | } |
740 | } |
741 | |
742 | fn consonant_position_from_face( |
743 | plan: &hb_ot_shape_plan_t, |
744 | indic_plan: &IndicShapePlan, |
745 | face: &hb_font_t, |
746 | consonant: GlyphId, |
747 | virama: GlyphId, |
748 | ) -> u8 { |
749 | // For old-spec, the order of glyphs is Consonant,Virama, |
750 | // whereas for new-spec, it's Virama,Consonant. However, |
751 | // some broken fonts (like Free Sans) simply copied lookups |
752 | // from old-spec to new-spec without modification. |
753 | // And oddly enough, Uniscribe seems to respect those lookups. |
754 | // Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds |
755 | // base at 0. The font however, only has lookups matching |
756 | // 930,94D in 'blwf', not the expected 94D,930 (with new-spec |
757 | // table). As such, we simply match both sequences. Seems |
758 | // to work. |
759 | // |
760 | // Vatu is done as well, for: |
761 | // https://github.com/harfbuzz/harfbuzz/issues/1587 |
762 | |
763 | if indic_plan |
764 | .blwf |
765 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
766 | || indic_plan |
767 | .blwf |
768 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
769 | || indic_plan |
770 | .vatu |
771 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
772 | || indic_plan |
773 | .vatu |
774 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
775 | { |
776 | return ot_position_t::POS_BELOW_C; |
777 | } |
778 | |
779 | if indic_plan |
780 | .pstf |
781 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
782 | || indic_plan |
783 | .pstf |
784 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
785 | { |
786 | return ot_position_t::POS_POST_C; |
787 | } |
788 | |
789 | if indic_plan |
790 | .pref |
791 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
792 | || indic_plan |
793 | .pref |
794 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
795 | { |
796 | return ot_position_t::POS_POST_C; |
797 | } |
798 | |
799 | ot_position_t::POS_BASE_C |
800 | } |
801 | |
802 | fn initial_reordering_syllable( |
803 | plan: &hb_ot_shape_plan_t, |
804 | indic_plan: &IndicShapePlan, |
805 | face: &hb_font_t, |
806 | start: usize, |
807 | end: usize, |
808 | buffer: &mut hb_buffer_t, |
809 | ) { |
810 | use super::ot_shaper_indic_machine::SyllableType; |
811 | |
812 | let syllable_type = match buffer.info[start].syllable() & 0x0F { |
813 | 0 => SyllableType::ConsonantSyllable, |
814 | 1 => SyllableType::VowelSyllable, |
815 | 2 => SyllableType::StandaloneCluster, |
816 | 3 => SyllableType::SymbolCluster, |
817 | 4 => SyllableType::BrokenCluster, |
818 | 5 => SyllableType::NonIndicCluster, |
819 | _ => unreachable!(), |
820 | }; |
821 | |
822 | match syllable_type { |
823 | // We made the vowels look like consonants. So let's call the consonant logic! |
824 | SyllableType::VowelSyllable | SyllableType::ConsonantSyllable => { |
825 | initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer); |
826 | } |
827 | // We already inserted dotted-circles, so just call the standalone_cluster. |
828 | SyllableType::BrokenCluster | SyllableType::StandaloneCluster => { |
829 | initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer); |
830 | } |
831 | SyllableType::SymbolCluster | SyllableType::NonIndicCluster => {} |
832 | } |
833 | } |
834 | |
835 | // Rules from: |
836 | // https://docs.microsqoft.com/en-us/typography/script-development/devanagari */ |
837 | fn initial_reordering_consonant_syllable( |
838 | plan: &hb_ot_shape_plan_t, |
839 | indic_plan: &IndicShapePlan, |
840 | face: &hb_font_t, |
841 | start: usize, |
842 | end: usize, |
843 | buffer: &mut hb_buffer_t, |
844 | ) { |
845 | // https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167 |
846 | // For compatibility with legacy usage in Kannada, |
847 | // Ra+h+ZWJ must behave like Ra+ZWJ+h... |
848 | if buffer.script == Some(script::KANNADA) |
849 | && start + 3 <= end |
850 | && buffer.info[start].is_one_of(category_flag(ot_category_t::OT_Ra)) |
851 | && buffer.info[start + 1].is_one_of(category_flag(ot_category_t::OT_H)) |
852 | && buffer.info[start + 2].is_one_of(category_flag(ot_category_t::OT_ZWJ)) |
853 | { |
854 | buffer.merge_clusters(start + 1, start + 3); |
855 | buffer.info.swap(start + 1, start + 2); |
856 | } |
857 | |
858 | // 1. Find base consonant: |
859 | // |
860 | // The shaping engine finds the base consonant of the syllable, using the |
861 | // following algorithm: starting from the end of the syllable, move backwards |
862 | // until a consonant is found that does not have a below-base or post-base |
863 | // form (post-base forms have to follow below-base forms), or that is not a |
864 | // pre-base-reordering Ra, or arrive at the first consonant. The consonant |
865 | // stopped at will be the base. |
866 | // |
867 | // - If the syllable starts with Ra + Halant (in a script that has Reph) |
868 | // and has more than one consonant, Ra is excluded from candidates for |
869 | // base consonants. |
870 | |
871 | let mut base = end; |
872 | let mut has_reph = false; |
873 | |
874 | { |
875 | // -> If the syllable starts with Ra + Halant (in a script that has Reph) |
876 | // and has more than one consonant, Ra is excluded from candidates for |
877 | // base consonants. |
878 | let mut limit = start; |
879 | if indic_plan.mask_array[indic_feature::RPHF] != 0 |
880 | && start + 3 <= end |
881 | && ((indic_plan.config.reph_mode == RephMode::Implicit |
882 | && !buffer.info[start + 2].is_joiner()) |
883 | || (indic_plan.config.reph_mode == RephMode::Explicit |
884 | && buffer.info[start + 2].indic_category() == ot_category_t::OT_ZWJ)) |
885 | { |
886 | // See if it matches the 'rphf' feature. |
887 | let glyphs = &[ |
888 | buffer.info[start].as_glyph(), |
889 | buffer.info[start + 1].as_glyph(), |
890 | if indic_plan.config.reph_mode == RephMode::Explicit { |
891 | buffer.info[start + 2].as_glyph() |
892 | } else { |
893 | GlyphId(0) |
894 | }, |
895 | ]; |
896 | if indic_plan |
897 | .rphf |
898 | .would_substitute(&plan.ot_map, face, &glyphs[0..2]) |
899 | || (indic_plan.config.reph_mode == RephMode::Explicit |
900 | && indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs)) |
901 | { |
902 | limit += 2; |
903 | while limit < end && buffer.info[limit].is_joiner() { |
904 | limit += 1; |
905 | } |
906 | base = start; |
907 | has_reph = true; |
908 | } |
909 | } else if indic_plan.config.reph_mode == RephMode::LogRepha |
910 | && buffer.info[start].indic_category() == ot_category_t::OT_Repha |
911 | { |
912 | limit += 1; |
913 | while limit < end && buffer.info[limit].is_joiner() { |
914 | limit += 1; |
915 | } |
916 | base = start; |
917 | has_reph = true; |
918 | } |
919 | |
920 | { |
921 | // -> starting from the end of the syllable, move backwards |
922 | let mut i = end; |
923 | let mut seen_below = false; |
924 | loop { |
925 | i -= 1; |
926 | // -> until a consonant is found |
927 | if buffer.info[i].is_consonant_myanmar() { |
928 | // -> that does not have a below-base or post-base form |
929 | // (post-base forms have to follow below-base forms), |
930 | if buffer.info[i].indic_position() != ot_position_t::POS_BELOW_C |
931 | && (buffer.info[i].indic_position() != ot_position_t::POS_POST_C |
932 | || seen_below) |
933 | { |
934 | base = i; |
935 | break; |
936 | } |
937 | if buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C { |
938 | seen_below = true; |
939 | } |
940 | |
941 | // -> or that is not a pre-base-reordering Ra, |
942 | // |
943 | // IMPLEMENTATION NOTES: |
944 | // |
945 | // Our pre-base-reordering Ra's are marked position::PostC, so will be skipped |
946 | // by the logic above already. |
947 | |
948 | // -> or arrive at the first consonant. The consonant stopped at will |
949 | // be the base. |
950 | base = i; |
951 | } else { |
952 | // A ZWJ after a Halant stops the base search, and requests an explicit |
953 | // half form. |
954 | // A ZWJ before a Halant, requests a subjoined form instead, and hence |
955 | // search continues. This is particularly important for Bengali |
956 | // sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. |
957 | if start < i |
958 | && buffer.info[i].indic_category() == ot_category_t::OT_ZWJ |
959 | && buffer.info[i - 1].indic_category() == ot_category_t::OT_H |
960 | { |
961 | break; |
962 | } |
963 | } |
964 | |
965 | if i <= limit { |
966 | break; |
967 | } |
968 | } |
969 | } |
970 | |
971 | // -> If the syllable starts with Ra + Halant (in a script that has Reph) |
972 | // and has more than one consonant, Ra is excluded from candidates for |
973 | // base consonants. |
974 | // |
975 | // Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. |
976 | if has_reph && base == start && limit - base <= 2 { |
977 | // Have no other consonant, so Reph is not formed and Ra becomes base. |
978 | has_reph = false; |
979 | } |
980 | } |
981 | |
982 | // 2. Decompose and reorder Matras: |
983 | // |
984 | // Each matra and any syllable modifier sign in the syllable are moved to the |
985 | // appropriate position relative to the consonant(s) in the syllable. The |
986 | // shaping engine decomposes two- or three-part matras into their constituent |
987 | // parts before any repositioning. Matra characters are classified by which |
988 | // consonant in a conjunct they have affinity for and are reordered to the |
989 | // following positions: |
990 | // |
991 | // - Before first half form in the syllable |
992 | // - After subjoined consonants |
993 | // - After post-form consonant |
994 | // - After main consonant (for above marks) |
995 | // |
996 | // IMPLEMENTATION NOTES: |
997 | // |
998 | // The normalize() routine has already decomposed matras for us, so we don't |
999 | // need to worry about that. |
1000 | |
1001 | // 3. Reorder marks to canonical order: |
1002 | // |
1003 | // Adjacent nukta and halant or nukta and vedic sign are always repositioned |
1004 | // if necessary, so that the nukta is first. |
1005 | // |
1006 | // IMPLEMENTATION NOTES: |
1007 | // |
1008 | // We don't need to do this: the normalize() routine already did this for us. |
1009 | |
1010 | // Reorder characters |
1011 | |
1012 | for i in start..base { |
1013 | let pos = buffer.info[i].indic_position(); |
1014 | buffer.info[i].set_indic_position(cmp::min(ot_position_t::POS_PRE_C, pos)); |
1015 | } |
1016 | |
1017 | if base < end { |
1018 | buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C); |
1019 | } |
1020 | |
1021 | // Handle beginning Ra |
1022 | if has_reph { |
1023 | buffer.info[start].set_indic_position(ot_position_t::POS_RA_TO_BECOME_REPH); |
1024 | } |
1025 | |
1026 | // For old-style Indic script tags, move the first post-base Halant after |
1027 | // last consonant. |
1028 | // |
1029 | // Reports suggest that in some scripts Uniscribe does this only if there |
1030 | // is *not* a Halant after last consonant already. We know that is the |
1031 | // case for Kannada, while it reorders unconditionally in other scripts, |
1032 | // eg. Malayalam, Bengali, and Devanagari. We don't currently know about |
1033 | // other scripts, so we block Kannada. |
1034 | // |
1035 | // Kannada test case: |
1036 | // U+0C9A,U+0CCD,U+0C9A,U+0CCD |
1037 | // With some versions of Lohit Kannada. |
1038 | // https://bugs.freedesktop.org/show_bug.cgi?id=59118 |
1039 | // |
1040 | // Malayalam test case: |
1041 | // U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D |
1042 | // With lohit-ttf-20121122/Lohit-Malayalam.ttf |
1043 | // |
1044 | // Bengali test case: |
1045 | // U+0998,U+09CD,U+09AF,U+09CD |
1046 | // With Windows XP vrinda.ttf |
1047 | // https://github.com/harfbuzz/harfbuzz/issues/1073 |
1048 | // |
1049 | // Devanagari test case: |
1050 | // U+091F,U+094D,U+0930,U+094D |
1051 | // With chandas.ttf |
1052 | // https://github.com/harfbuzz/harfbuzz/issues/1071 |
1053 | if indic_plan.is_old_spec { |
1054 | let disallow_double_halants = buffer.script == Some(script::KANNADA); |
1055 | for i in base + 1..end { |
1056 | if buffer.info[i].indic_category() == ot_category_t::OT_H { |
1057 | let mut j = end - 1; |
1058 | while j > i { |
1059 | if buffer.info[j].is_consonant() |
1060 | || (disallow_double_halants |
1061 | && buffer.info[j].indic_category() == ot_category_t::OT_H) |
1062 | { |
1063 | break; |
1064 | } |
1065 | |
1066 | j -= 1; |
1067 | } |
1068 | |
1069 | if buffer.info[j].indic_category() != ot_category_t::OT_H && j > i { |
1070 | // Move Halant to after last consonant. |
1071 | let t = buffer.info[i]; |
1072 | for k in 0..j - i { |
1073 | buffer.info[k + i] = buffer.info[k + i + 1]; |
1074 | } |
1075 | buffer.info[j] = t; |
1076 | } |
1077 | |
1078 | break; |
1079 | } |
1080 | } |
1081 | } |
1082 | |
1083 | // Attach misc marks to previous char to move with them. |
1084 | { |
1085 | let mut last_pos = ot_position_t::POS_START; |
1086 | for i in start..end { |
1087 | let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32) |
1088 | & (category_flag(ot_category_t::OT_ZWJ) |
1089 | | category_flag(ot_category_t::OT_ZWNJ) |
1090 | | category_flag(ot_category_t::OT_N) |
1091 | | category_flag(ot_category_t::OT_RS) |
1092 | | category_flag(ot_category_t::OT_CM) |
1093 | | category_flag(ot_category_t::OT_H)) |
1094 | != 0; |
1095 | if ok { |
1096 | buffer.info[i].set_indic_position(last_pos); |
1097 | |
1098 | if buffer.info[i].indic_category() == ot_category_t::OT_H |
1099 | && buffer.info[i].indic_position() == ot_position_t::POS_PRE_M |
1100 | { |
1101 | // Uniscribe doesn't move the Halant with Left Matra. |
1102 | // TEST: U+092B,U+093F,U+094DE |
1103 | // We follow. |
1104 | for j in (start + 1..=i).rev() { |
1105 | if buffer.info[j - 1].indic_position() != ot_position_t::POS_PRE_M { |
1106 | let pos = buffer.info[j - 1].indic_position(); |
1107 | buffer.info[i].set_indic_position(pos); |
1108 | break; |
1109 | } |
1110 | } |
1111 | } |
1112 | } else if buffer.info[i].indic_position() != ot_position_t::POS_SMVD { |
1113 | if buffer.info[i].indic_category() == ot_category_t::OT_MPst |
1114 | && i > start |
1115 | && buffer.info[i - 1].indic_category() == ot_category_t::OT_SM |
1116 | { |
1117 | let val = buffer.info[i].indic_position(); |
1118 | buffer.info[i - 1].set_indic_position(val); |
1119 | } |
1120 | |
1121 | last_pos = buffer.info[i].indic_position(); |
1122 | } |
1123 | } |
1124 | } |
1125 | // For post-base consonants let them own anything before them |
1126 | // since the last consonant or matra. |
1127 | { |
1128 | let mut last = base; |
1129 | for i in base + 1..end { |
1130 | if buffer.info[i].is_consonant() { |
1131 | for j in last + 1..i { |
1132 | if buffer.info[j].indic_position() < ot_position_t::POS_SMVD { |
1133 | let pos = buffer.info[i].indic_position(); |
1134 | buffer.info[j].set_indic_position(pos); |
1135 | } |
1136 | } |
1137 | |
1138 | last = i; |
1139 | } else if (rb_flag_unsafe(buffer.info[i].indic_category() as u32) |
1140 | & (rb_flag(ot_category_t::OT_M as u32) | rb_flag(ot_category_t::OT_MPst as u32))) |
1141 | != 0 |
1142 | { |
1143 | last = i; |
1144 | } |
1145 | } |
1146 | } |
1147 | |
1148 | { |
1149 | // Use syllable() for sort accounting temporarily. |
1150 | let syllable = buffer.info[start].syllable(); |
1151 | for i in start..end { |
1152 | buffer.info[i].set_syllable(u8::try_from(i - start).unwrap()); |
1153 | } |
1154 | |
1155 | buffer.info[start..end].sort_by_key(|a| a.indic_position()); |
1156 | |
1157 | // Find base again; also flip left-matra sequence. |
1158 | let mut first_left_mantra = end; |
1159 | let mut last_left_mantra = end; |
1160 | base = end; |
1161 | |
1162 | for i in start..end { |
1163 | if buffer.info[i].indic_position() == ot_position_t::POS_BASE_C { |
1164 | base = i; |
1165 | break; |
1166 | } else if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M { |
1167 | if first_left_mantra == end { |
1168 | first_left_mantra = i; |
1169 | } |
1170 | |
1171 | last_left_mantra = i; |
1172 | } |
1173 | } |
1174 | |
1175 | // https://github.com/harfbuzz/harfbuzz/issues/3863 |
1176 | if first_left_mantra < last_left_mantra { |
1177 | // No need to merge clusters, handled later. |
1178 | buffer.reverse_range(first_left_mantra, last_left_mantra + 1); |
1179 | // Reverse back nuktas, etc. |
1180 | let mut i = first_left_mantra; |
1181 | |
1182 | for j in i..=last_left_mantra { |
1183 | if (rb_flag_unsafe(buffer.info[j].indic_category() as u32) |
1184 | & (rb_flag(ot_category_t::OT_M as u32) |
1185 | | rb_flag(ot_category_t::OT_MPst as u32))) |
1186 | != 0 |
1187 | { |
1188 | buffer.reverse_range(i, j + 1); |
1189 | i = j + 1; |
1190 | } |
1191 | } |
1192 | } |
1193 | |
1194 | // Things are out-of-control for post base positions, they may shuffle |
1195 | // around like crazy. In old-spec mode, we move halants around, so in |
1196 | // that case merge all clusters after base. Otherwise, check the sort |
1197 | // order and merge as needed. |
1198 | // For pre-base stuff, we handle cluster issues in final reordering. |
1199 | // |
1200 | // We could use buffer->sort() for this, if there was no special |
1201 | // reordering of pre-base stuff happening later... |
1202 | // We don't want to merge_clusters all of that, which buffer->sort() |
1203 | // would. Here's a concrete example: |
1204 | // |
1205 | // Assume there's a pre-base consonant and explicit Halant before base, |
1206 | // followed by a prebase-reordering (left) Matra: |
1207 | // |
1208 | // C,H,ZWNJ,B,M |
1209 | // |
1210 | // At this point in reordering we would have: |
1211 | // |
1212 | // M,C,H,ZWNJ,B |
1213 | // |
1214 | // whereas in final reordering we will bring the Matra closer to Base: |
1215 | // |
1216 | // C,H,ZWNJ,M,B |
1217 | // |
1218 | // That's why we don't want to merge-clusters anything before the Base |
1219 | // at this point. But if something moved from after Base to before it, |
1220 | // we should merge clusters from base to them. In final-reordering, we |
1221 | // only move things around before base, and merge-clusters up to base. |
1222 | // These two merge-clusters from the two sides of base will interlock |
1223 | // to merge things correctly. See: |
1224 | // https://github.com/harfbuzz/harfbuzz/issues/2272 |
1225 | if indic_plan.is_old_spec || end - start > 127 { |
1226 | buffer.merge_clusters(base, end); |
1227 | } else { |
1228 | // Note! syllable() is a one-byte field. |
1229 | for i in base..end { |
1230 | if buffer.info[i].syllable() != 255 { |
1231 | let mut min = i; |
1232 | let mut max = i; |
1233 | let mut j = start + buffer.info[i].syllable() as usize; |
1234 | while j != i { |
1235 | min = cmp::min(min, j); |
1236 | max = cmp::max(max, j); |
1237 | let next = start + buffer.info[j].syllable() as usize; |
1238 | buffer.info[j].set_syllable(255); // So we don't process j later again. |
1239 | j = next; |
1240 | } |
1241 | |
1242 | buffer.merge_clusters(cmp::max(base, min), max + 1); |
1243 | } |
1244 | } |
1245 | } |
1246 | |
1247 | // Put syllable back in. |
1248 | for info in &mut buffer.info[start..end] { |
1249 | info.set_syllable(syllable); |
1250 | } |
1251 | } |
1252 | |
1253 | // Setup masks now |
1254 | |
1255 | { |
1256 | // Reph |
1257 | for info in &mut buffer.info[start..end] { |
1258 | if info.indic_position() != ot_position_t::POS_RA_TO_BECOME_REPH { |
1259 | break; |
1260 | } |
1261 | |
1262 | info.mask |= indic_plan.mask_array[indic_feature::RPHF]; |
1263 | } |
1264 | |
1265 | // Pre-base |
1266 | let mut mask = indic_plan.mask_array[indic_feature::HALF]; |
1267 | if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost { |
1268 | mask |= indic_plan.mask_array[indic_feature::BLWF]; |
1269 | } |
1270 | |
1271 | for info in &mut buffer.info[start..base] { |
1272 | info.mask |= mask; |
1273 | } |
1274 | |
1275 | // Base |
1276 | mask = 0; |
1277 | if base < end { |
1278 | buffer.info[base].mask |= mask; |
1279 | } |
1280 | |
1281 | // Post-base |
1282 | mask = indic_plan.mask_array[indic_feature::BLWF] |
1283 | | indic_plan.mask_array[indic_feature::ABVF] |
1284 | | indic_plan.mask_array[indic_feature::PSTF]; |
1285 | for i in base + 1..end { |
1286 | buffer.info[i].mask |= mask; |
1287 | } |
1288 | } |
1289 | |
1290 | if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) { |
1291 | // Old-spec eye-lash Ra needs special handling. From the |
1292 | // spec: |
1293 | // |
1294 | // "The feature 'below-base form' is applied to consonants |
1295 | // having below-base forms and following the base consonant. |
1296 | // The exception is vattu, which may appear below half forms |
1297 | // as well as below the base glyph. The feature 'below-base |
1298 | // form' will be applied to all such occurrences of Ra as well." |
1299 | // |
1300 | // Test case: U+0924,U+094D,U+0930,U+094d,U+0915 |
1301 | // with Sanskrit 2003 font. |
1302 | // |
1303 | // However, note that Ra,Halant,ZWJ is the correct way to |
1304 | // request eyelash form of Ra, so we wouldbn't inhibit it |
1305 | // in that sequence. |
1306 | // |
1307 | // Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 |
1308 | for i in start..base.saturating_sub(1) { |
1309 | if buffer.info[i].indic_category() == ot_category_t::OT_Ra |
1310 | && buffer.info[i + 1].indic_category() == ot_category_t::OT_H |
1311 | && (i + 2 == base || buffer.info[i + 2].indic_category() != ot_category_t::OT_ZWJ) |
1312 | { |
1313 | buffer.info[i].mask |= indic_plan.mask_array[indic_feature::BLWF]; |
1314 | buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::BLWF]; |
1315 | } |
1316 | } |
1317 | } |
1318 | |
1319 | let pref_len = 2; |
1320 | if indic_plan.mask_array[indic_feature::PREF] != 0 && base + pref_len < end { |
1321 | // Find a Halant,Ra sequence and mark it for pre-base-reordering processing. |
1322 | for i in base + 1..end - pref_len + 1 { |
1323 | let glyphs = &[buffer.info[i + 0].as_glyph(), buffer.info[i + 1].as_glyph()]; |
1324 | if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) { |
1325 | buffer.info[i + 0].mask |= indic_plan.mask_array[indic_feature::PREF]; |
1326 | buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::PREF]; |
1327 | break; |
1328 | } |
1329 | } |
1330 | } |
1331 | |
1332 | // Apply ZWJ/ZWNJ effects |
1333 | for i in start + 1..end { |
1334 | if buffer.info[i].is_joiner() { |
1335 | let non_joiner = buffer.info[i].indic_category() == ot_category_t::OT_ZWNJ; |
1336 | let mut j = i; |
1337 | |
1338 | loop { |
1339 | j -= 1; |
1340 | |
1341 | // ZWJ/ZWNJ should disable CJCT. They do that by simply |
1342 | // being there, since we don't skip them for the CJCT |
1343 | // feature (ie. F_MANUAL_ZWJ) |
1344 | |
1345 | // A ZWNJ disables HALF. |
1346 | if non_joiner { |
1347 | buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF]; |
1348 | } |
1349 | |
1350 | if j <= start || buffer.info[j].is_consonant() { |
1351 | break; |
1352 | } |
1353 | } |
1354 | } |
1355 | } |
1356 | } |
1357 | |
1358 | fn initial_reordering_standalone_cluster( |
1359 | plan: &hb_ot_shape_plan_t, |
1360 | indic_plan: &IndicShapePlan, |
1361 | face: &hb_font_t, |
1362 | start: usize, |
1363 | end: usize, |
1364 | buffer: &mut hb_buffer_t, |
1365 | ) { |
1366 | // We treat placeholder/dotted-circle as if they are consonants, so we |
1367 | // should just chain. Only if not in compatibility mode that is... |
1368 | initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer); |
1369 | } |
1370 | |
1371 | fn final_reordering(plan: &hb_ot_shape_plan_t, face: &hb_font_t, buffer: &mut hb_buffer_t) -> bool { |
1372 | if buffer.is_empty() { |
1373 | return false; |
1374 | } |
1375 | |
1376 | foreach_syllable!(buffer, start, end, { |
1377 | final_reordering_impl(plan, face, start, end, buffer); |
1378 | }); |
1379 | |
1380 | false |
1381 | } |
1382 | |
1383 | fn final_reordering_impl( |
1384 | plan: &hb_ot_shape_plan_t, |
1385 | face: &hb_font_t, |
1386 | start: usize, |
1387 | end: usize, |
1388 | buffer: &mut hb_buffer_t, |
1389 | ) { |
1390 | let indic_plan = plan.data::<IndicShapePlan>(); |
1391 | |
1392 | // This function relies heavily on halant glyphs. Lots of ligation |
1393 | // and possibly multiple substitutions happened prior to this |
1394 | // phase, and that might have messed up our properties. Recover |
1395 | // from a particular case of that where we're fairly sure that a |
1396 | // class of OT_H is desired but has been lost. |
1397 | // |
1398 | // We don't call load_virama_glyph(), since we know it's already loaded. |
1399 | let mut virama_glyph = None; |
1400 | if indic_plan.config.virama != 0 { |
1401 | if let Some(g) = face.get_nominal_glyph(indic_plan.config.virama) { |
1402 | virama_glyph = Some(g.0 as u32); |
1403 | } |
1404 | } |
1405 | |
1406 | if let Some(virama_glyph) = virama_glyph { |
1407 | for info in &mut buffer.info[start..end] { |
1408 | if info.glyph_id == virama_glyph |
1409 | && _hb_glyph_info_ligated(info) |
1410 | && _hb_glyph_info_multiplied(info) |
1411 | { |
1412 | // This will make sure that this glyph passes is_halant() test. |
1413 | info.set_indic_category(ot_category_t::OT_H); |
1414 | _hb_glyph_info_clear_ligated_and_multiplied(info); |
1415 | } |
1416 | } |
1417 | } |
1418 | |
1419 | // 4. Final reordering: |
1420 | // |
1421 | // After the localized forms and basic shaping forms GSUB features have been |
1422 | // applied (see below), the shaping engine performs some final glyph |
1423 | // reordering before applying all the remaining font features to the entire |
1424 | // syllable. |
1425 | |
1426 | let mut try_pref = indic_plan.mask_array[indic_feature::PREF] != 0; |
1427 | |
1428 | let mut base = start; |
1429 | while base < end { |
1430 | if buffer.info[base].indic_position() as u32 >= ot_position_t::POS_BASE_C as u32 { |
1431 | if try_pref && base + 1 < end { |
1432 | for i in base + 1..end { |
1433 | if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != 0 { |
1434 | if !(_hb_glyph_info_substituted(&buffer.info[i]) |
1435 | && _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i])) |
1436 | { |
1437 | // Ok, this was a 'pref' candidate but didn't form any. |
1438 | // Base is around here... |
1439 | base = i; |
1440 | while base < end && buffer.info[base].is_halant() { |
1441 | base += 1; |
1442 | } |
1443 | |
1444 | if base < end { |
1445 | buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C); |
1446 | } |
1447 | |
1448 | try_pref = false; |
1449 | } |
1450 | |
1451 | break; |
1452 | } |
1453 | |
1454 | if base == end { |
1455 | break; |
1456 | } |
1457 | } |
1458 | } |
1459 | |
1460 | // For Malayalam, skip over unformed below- (but NOT post-) forms. |
1461 | if buffer.script == Some(script::MALAYALAM) { |
1462 | let mut i = base + 1; |
1463 | while i < end { |
1464 | while i < end && buffer.info[i].is_joiner() { |
1465 | i += 1; |
1466 | } |
1467 | |
1468 | if i == end || !buffer.info[i].is_halant() { |
1469 | break; |
1470 | } |
1471 | |
1472 | i += 1; // Skip halant. |
1473 | |
1474 | while i < end && buffer.info[i].is_joiner() { |
1475 | i += 1; |
1476 | } |
1477 | |
1478 | if i < end |
1479 | && buffer.info[i].is_consonant() |
1480 | && buffer.info[i].indic_position() == ot_position_t::POS_BELOW_C |
1481 | { |
1482 | base = i; |
1483 | buffer.info[base].set_indic_position(ot_position_t::POS_BASE_C); |
1484 | } |
1485 | |
1486 | i += 1; |
1487 | } |
1488 | } |
1489 | |
1490 | if start < base |
1491 | && buffer.info[base].indic_position() as u32 > ot_position_t::POS_BASE_C as u32 |
1492 | { |
1493 | base -= 1; |
1494 | } |
1495 | |
1496 | break; |
1497 | } |
1498 | |
1499 | base += 1; |
1500 | } |
1501 | |
1502 | if base == end |
1503 | && start < base |
1504 | && buffer.info[base - 1].is_one_of(rb_flag(ot_category_t::OT_ZWJ as u32)) |
1505 | { |
1506 | base -= 1; |
1507 | } |
1508 | |
1509 | if base < end { |
1510 | while start < base |
1511 | && buffer.info[base].is_one_of( |
1512 | rb_flag(ot_category_t::OT_N as u32) | rb_flag(ot_category_t::OT_H as u32), |
1513 | ) |
1514 | { |
1515 | base -= 1; |
1516 | } |
1517 | } |
1518 | |
1519 | // - Reorder matras: |
1520 | // |
1521 | // If a pre-base matra character had been reordered before applying basic |
1522 | // features, the glyph can be moved closer to the main consonant based on |
1523 | // whether half-forms had been formed. Actual position for the matra is |
1524 | // defined as “after last standalone halant glyph, after initial matra |
1525 | // position and before the main consonant”. If ZWJ or ZWNJ follow this |
1526 | // halant, position is moved after it. |
1527 | // |
1528 | // IMPLEMENTATION NOTES: |
1529 | // |
1530 | // It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe |
1531 | // and Devanagari shows that the behavior is best described as: |
1532 | // |
1533 | // "If ZWJ follows this halant, matra is NOT repositioned after this halant. |
1534 | // If ZWNJ follows this halant, position is moved after it." |
1535 | // |
1536 | // Test case, with Adobe Devanagari or Nirmala UI: |
1537 | // |
1538 | // U+091F,U+094D,U+200C,U+092F,U+093F |
1539 | // (Matra moves to the middle, after ZWNJ.) |
1540 | // |
1541 | // U+091F,U+094D,U+200D,U+092F,U+093F |
1542 | // (Matra does NOT move, stays to the left.) |
1543 | // |
1544 | // https://github.com/harfbuzz/harfbuzz/issues/1070 |
1545 | |
1546 | // Otherwise there can't be any pre-base matra characters. |
1547 | if start + 1 < end && start < base { |
1548 | // If we lost track of base, alas, position before last thingy. |
1549 | let mut new_pos = if base == end { base - 2 } else { base - 1 }; |
1550 | |
1551 | // Malayalam / Tamil do not have "half" forms or explicit virama forms. |
1552 | // The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
1553 | // We want to position matra after them. |
1554 | if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) { |
1555 | loop { |
1556 | while new_pos > start |
1557 | && !buffer.info[new_pos].is_one_of( |
1558 | rb_flag(ot_category_t::OT_M as u32) |
1559 | | rb_flag(ot_category_t::OT_MPst as u32) |
1560 | | rb_flag(ot_category_t::OT_H as u32), |
1561 | ) |
1562 | { |
1563 | new_pos -= 1; |
1564 | } |
1565 | |
1566 | // If we found no Halant we are done. |
1567 | // Otherwise only proceed if the Halant does |
1568 | // not belong to the Matra itself! |
1569 | if buffer.info[new_pos].is_halant() |
1570 | && buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M |
1571 | { |
1572 | if new_pos + 1 < end { |
1573 | // -> If ZWJ follows this halant, matra is NOT repositioned after this halant. |
1574 | if buffer.info[new_pos + 1].indic_category() == ot_category_t::OT_ZWJ { |
1575 | // Keep searching. |
1576 | if new_pos > start { |
1577 | new_pos -= 1; |
1578 | continue; |
1579 | } |
1580 | } |
1581 | |
1582 | // -> If ZWNJ follows this halant, position is moved after it. |
1583 | // |
1584 | // IMPLEMENTATION NOTES: |
1585 | // |
1586 | // This is taken care of by the state-machine. A Halant,ZWNJ is a terminating |
1587 | // sequence for a consonant syllable; any pre-base matras occurring after it |
1588 | // will belong to the subsequent syllable. |
1589 | } |
1590 | } else { |
1591 | new_pos = start; // No move. |
1592 | } |
1593 | |
1594 | break; |
1595 | } |
1596 | } |
1597 | |
1598 | if start < new_pos && buffer.info[new_pos].indic_position() != ot_position_t::POS_PRE_M { |
1599 | // Now go see if there's actually any matras... |
1600 | for i in (start + 1..=new_pos).rev() { |
1601 | if buffer.info[i - 1].indic_position() == ot_position_t::POS_PRE_M { |
1602 | let old_pos = i - 1; |
1603 | // Shouldn't actually happen. |
1604 | if old_pos < base && base <= new_pos { |
1605 | base -= 1; |
1606 | } |
1607 | |
1608 | let tmp = buffer.info[old_pos]; |
1609 | for i in 0..new_pos - old_pos { |
1610 | buffer.info[i + old_pos] = buffer.info[i + old_pos + 1]; |
1611 | } |
1612 | buffer.info[new_pos] = tmp; |
1613 | |
1614 | // Note: this merge_clusters() is intentionally *after* the reordering. |
1615 | // Indic matra reordering is special and tricky... |
1616 | buffer.merge_clusters(new_pos, cmp::min(end, base + 1)); |
1617 | |
1618 | new_pos -= 1; |
1619 | } |
1620 | } |
1621 | } else { |
1622 | for i in start..base { |
1623 | if buffer.info[i].indic_position() == ot_position_t::POS_PRE_M { |
1624 | buffer.merge_clusters(i, cmp::min(end, base + 1)); |
1625 | break; |
1626 | } |
1627 | } |
1628 | } |
1629 | } |
1630 | |
1631 | // - Reorder reph: |
1632 | // |
1633 | // Reph’s original position is always at the beginning of the syllable, |
1634 | // (i.e. it is not reordered at the character reordering stage). However, |
1635 | // it will be reordered according to the basic-forms shaping results. |
1636 | // Possible positions for reph, depending on the script, are; after main, |
1637 | // before post-base consonant forms, and after post-base consonant forms. |
1638 | |
1639 | // Two cases: |
1640 | // |
1641 | // - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then |
1642 | // we should only move it if the sequence ligated to the repha form. |
1643 | // |
1644 | // - If repha is encoded separately and in the logical position, we should only |
1645 | // move it if it did NOT ligate. If it ligated, it's probably the font trying |
1646 | // to make it work without the reordering. |
1647 | |
1648 | if start + 1 < end |
1649 | && buffer.info[start].indic_position() == ot_position_t::POS_RA_TO_BECOME_REPH |
1650 | && (buffer.info[start].indic_category() == ot_category_t::OT_Repha) |
1651 | ^ _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[start]) |
1652 | { |
1653 | let mut new_reph_pos; |
1654 | 'reph: { |
1655 | let reph_pos = indic_plan.config.reph_pos; |
1656 | |
1657 | // 1. If reph should be positioned after post-base consonant forms, |
1658 | // proceed to step 5. |
1659 | if reph_pos != RephPosition::AfterPost { |
1660 | // 2. If the reph repositioning class is not after post-base: target |
1661 | // position is after the first explicit halant glyph between the |
1662 | // first post-reph consonant and last main consonant. If ZWJ or ZWNJ |
1663 | // are following this halant, position is moved after it. If such |
1664 | // position is found, this is the target position. Otherwise, |
1665 | // proceed to the next step. |
1666 | // |
1667 | // Note: in old-implementation fonts, where classifications were |
1668 | // fixed in shaping engine, there was no case where reph position |
1669 | // will be found on this step. |
1670 | { |
1671 | new_reph_pos = start + 1; |
1672 | while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() { |
1673 | new_reph_pos += 1; |
1674 | } |
1675 | |
1676 | if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() { |
1677 | // ->If ZWJ or ZWNJ are following this halant, position is moved after it. |
1678 | if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() { |
1679 | new_reph_pos += 1; |
1680 | } |
1681 | |
1682 | break 'reph; |
1683 | } |
1684 | } |
1685 | |
1686 | // 3. If reph should be repositioned after the main consonant: find the |
1687 | // first consonant not ligated with main, or find the first |
1688 | // consonant that is not a potential pre-base-reordering Ra. |
1689 | if reph_pos == RephPosition::AfterMain { |
1690 | new_reph_pos = base; |
1691 | while new_reph_pos + 1 < end |
1692 | && buffer.info[new_reph_pos + 1].indic_position() |
1693 | <= ot_position_t::POS_AFTER_MAIN |
1694 | { |
1695 | new_reph_pos += 1; |
1696 | } |
1697 | |
1698 | if new_reph_pos < end { |
1699 | break 'reph; |
1700 | } |
1701 | } |
1702 | |
1703 | // 4. If reph should be positioned before post-base consonant, find |
1704 | // first post-base classified consonant not ligated with main. If no |
1705 | // consonant is found, the target position should be before the |
1706 | // first matra, syllable modifier sign or vedic sign. |
1707 | // |
1708 | // This is our take on what step 4 is trying to say (and failing, BADLY). |
1709 | if reph_pos == RephPosition::AfterSub { |
1710 | new_reph_pos = base; |
1711 | while new_reph_pos + 1 < end |
1712 | && (rb_flag_unsafe(buffer.info[new_reph_pos + 1].indic_position() as u32) |
1713 | & (rb_flag(ot_position_t::POS_POST_C as u32) |
1714 | | rb_flag(ot_position_t::POS_AFTER_POST as u32) |
1715 | | rb_flag(ot_position_t::POS_SMVD as u32))) |
1716 | == 0 |
1717 | { |
1718 | new_reph_pos += 1; |
1719 | } |
1720 | |
1721 | if new_reph_pos < end { |
1722 | break 'reph; |
1723 | } |
1724 | } |
1725 | } |
1726 | |
1727 | // 5. If no consonant is found in steps 3 or 4, move reph to a position |
1728 | // immediately before the first post-base matra, syllable modifier |
1729 | // sign or vedic sign that has a reordering class after the intended |
1730 | // reph position. For example, if the reordering position for reph |
1731 | // is post-main, it will skip above-base matras that also have a |
1732 | // post-main position. |
1733 | // |
1734 | // Copied from step 2. |
1735 | new_reph_pos = start + 1; |
1736 | while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() { |
1737 | new_reph_pos += 1; |
1738 | } |
1739 | |
1740 | if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() { |
1741 | /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ |
1742 | if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() { |
1743 | new_reph_pos += 1; |
1744 | } |
1745 | |
1746 | break 'reph; |
1747 | } |
1748 | // See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654 |
1749 | |
1750 | // 6. Otherwise, reorder reph to the end of the syllable. |
1751 | { |
1752 | new_reph_pos = end - 1; |
1753 | while new_reph_pos > start |
1754 | && buffer.info[new_reph_pos].indic_position() == ot_position_t::POS_SMVD |
1755 | { |
1756 | new_reph_pos -= 1; |
1757 | } |
1758 | |
1759 | // If the Reph is to be ending up after a Matra,Halant sequence, |
1760 | // position it before that Halant so it can interact with the Matra. |
1761 | // However, if it's a plain Consonant,Halant we shouldn't do that. |
1762 | // Uniscribe doesn't do this. |
1763 | // TEST: U+0930,U+094D,U+0915,U+094B,U+094D |
1764 | if buffer.info[new_reph_pos].is_halant() { |
1765 | for info in &buffer.info[base + 1..new_reph_pos] { |
1766 | if (rb_flag_unsafe(info.indic_category() as u32) |
1767 | & (rb_flag(ot_category_t::OT_M as u32) |
1768 | | rb_flag(ot_category_t::OT_MPst as u32))) |
1769 | != 0 |
1770 | { |
1771 | // Ok, got it. |
1772 | new_reph_pos -= 1; |
1773 | } |
1774 | } |
1775 | } |
1776 | } |
1777 | |
1778 | break 'reph; |
1779 | } |
1780 | |
1781 | // Move |
1782 | buffer.merge_clusters(start, new_reph_pos + 1); |
1783 | |
1784 | let reph = buffer.info[start]; |
1785 | for i in 0..new_reph_pos - start { |
1786 | buffer.info[i + start] = buffer.info[i + start + 1]; |
1787 | } |
1788 | buffer.info[new_reph_pos] = reph; |
1789 | |
1790 | if start < base && base <= new_reph_pos { |
1791 | base -= 1; |
1792 | } |
1793 | } |
1794 | |
1795 | // - Reorder pre-base-reordering consonants: |
1796 | // |
1797 | // If a pre-base-reordering consonant is found, reorder it according to |
1798 | // the following rules: |
1799 | |
1800 | // Otherwise there can't be any pre-base-reordering Ra. |
1801 | if try_pref && base + 1 < end { |
1802 | for i in base + 1..end { |
1803 | if (buffer.info[i].mask & indic_plan.mask_array[indic_feature::PREF]) != 0 { |
1804 | // 1. Only reorder a glyph produced by substitution during application |
1805 | // of the <pref> feature. (Note that a font may shape a Ra consonant with |
1806 | // the feature generally but block it in certain contexts.) |
1807 | // |
1808 | // Note: We just check that something got substituted. We don't check that |
1809 | // the <pref> feature actually did it... |
1810 | // |
1811 | // Reorder pref only if it ligated. |
1812 | if _hb_glyph_info_ligated_and_didnt_multiply(&buffer.info[i]) { |
1813 | // 2. Try to find a target position the same way as for pre-base matra. |
1814 | // If it is found, reorder pre-base consonant glyph. |
1815 | // |
1816 | // 3. If position is not found, reorder immediately before main consonant. |
1817 | |
1818 | let mut new_pos = base; |
1819 | // Malayalam / Tamil do not have "half" forms or explicit virama forms. |
1820 | // The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
1821 | // We want to position matra after them. |
1822 | if buffer.script != Some(script::MALAYALAM) |
1823 | && buffer.script != Some(script::TAMIL) |
1824 | { |
1825 | while new_pos > start |
1826 | && !buffer.info[new_pos - 1].is_one_of( |
1827 | rb_flag(ot_category_t::OT_M as u32) |
1828 | | rb_flag(ot_category_t::OT_MPst as u32) |
1829 | | rb_flag(ot_category_t::OT_H as u32), |
1830 | ) |
1831 | { |
1832 | new_pos -= 1; |
1833 | } |
1834 | } |
1835 | |
1836 | if new_pos > start && buffer.info[new_pos - 1].is_halant() { |
1837 | // -> If ZWJ or ZWNJ follow this halant, position is moved after it. |
1838 | if new_pos < end && buffer.info[new_pos].is_joiner() { |
1839 | new_pos += 1; |
1840 | } |
1841 | } |
1842 | |
1843 | { |
1844 | let old_pos = i; |
1845 | |
1846 | buffer.merge_clusters(new_pos, old_pos + 1); |
1847 | let tmp = buffer.info[old_pos]; |
1848 | for i in (0..old_pos - new_pos).rev() { |
1849 | buffer.info[i + new_pos + 1] = buffer.info[i + new_pos]; |
1850 | } |
1851 | buffer.info[new_pos] = tmp; |
1852 | |
1853 | if new_pos <= base && base < old_pos { |
1854 | // TODO: investigate |
1855 | #[allow (unused_assignments)] |
1856 | { |
1857 | base += 1; |
1858 | } |
1859 | } |
1860 | } |
1861 | } |
1862 | |
1863 | break; |
1864 | } |
1865 | } |
1866 | } |
1867 | |
1868 | // Apply 'init' to the Left Matra if it's a word start. |
1869 | if buffer.info[start].indic_position() == ot_position_t::POS_PRE_M { |
1870 | if start == 0 |
1871 | || (rb_flag_unsafe( |
1872 | _hb_glyph_info_get_general_category(&buffer.info[start - 1]).to_rb(), |
1873 | ) & rb_flag_range( |
1874 | hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT, |
1875 | hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, |
1876 | )) == 0 |
1877 | { |
1878 | buffer.info[start].mask |= indic_plan.mask_array[indic_feature::INIT]; |
1879 | } else { |
1880 | buffer.unsafe_to_break(Some(start - 1), Some(start + 1)); |
1881 | } |
1882 | } |
1883 | } |
1884 | |