1 | use alloc::boxed::Box; |
2 | use core::cmp; |
3 | use core::convert::TryFrom; |
4 | use core::ops::Range; |
5 | |
6 | use ttf_parser::GlyphId; |
7 | |
8 | use super::*; |
9 | use crate::buffer::Buffer; |
10 | use crate::normalize::ShapeNormalizationMode; |
11 | use crate::ot::{ |
12 | feature, FeatureFlags, LayoutTable, Map, TableIndex, WouldApply, WouldApplyContext, |
13 | }; |
14 | use crate::plan::{ShapePlan, ShapePlanner}; |
15 | use crate::unicode::{hb_gc, CharExt, GeneralCategoryExt}; |
16 | use crate::{script, Face, GlyphInfo, Mask, Script, Tag}; |
17 | |
18 | pub const INDIC_SHAPER: ComplexShaper = ComplexShaper { |
19 | collect_features: Some(collect_features), |
20 | override_features: Some(override_features), |
21 | create_data: Some(|plan: &ShapePlan| Box::new(IndicShapePlan::new(plan))), |
22 | preprocess_text: Some(preprocess_text), |
23 | postprocess_glyphs: None, |
24 | normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit), |
25 | decompose: Some(decompose), |
26 | compose: Some(compose), |
27 | setup_masks: Some(setup_masks), |
28 | gpos_tag: None, |
29 | reorder_marks: None, |
30 | zero_width_marks: None, |
31 | fallback_position: false, |
32 | }; |
33 | |
34 | pub type Category = u8; |
35 | pub mod category { |
36 | pub const X: u8 = 0; |
37 | pub const C: u8 = 1; |
38 | pub const V: u8 = 2; |
39 | pub const N: u8 = 3; |
40 | pub const H: u8 = 4; |
41 | pub const ZWNJ: u8 = 5; |
42 | pub const ZWJ: u8 = 6; |
43 | pub const M: u8 = 7; |
44 | pub const SM: u8 = 8; |
45 | // OT_VD = 9, UNUSED; we use OT_A instead. |
46 | pub const A: u8 = 10; |
47 | pub const PLACEHOLDER: u8 = 11; |
48 | pub const DOTTED_CIRCLE: u8 = 12; |
49 | pub const RS: u8 = 13; // Register Shifter, used in Khmer OT spec. |
50 | pub const COENG: u8 = 14; // Khmer-style Virama. |
51 | pub const REPHA: u8 = 15; // Atomically-encoded logical or visual repha. |
52 | pub const RA: u8 = 16; |
53 | pub const CM: u8 = 17; // Consonant-Medial. |
54 | pub const SYMBOL: u8 = 18; // Avagraha, etc that take marks (SM,A,VD). |
55 | pub const CS: u8 = 19; |
56 | pub const ROBATIC: u8 = 20; |
57 | pub const X_GROUP: u8 = 21; |
58 | pub const Y_GROUP: u8 = 22; |
59 | pub const MW: u8 = 23; |
60 | pub const MY: u8 = 24; |
61 | pub const PT: u8 = 25; |
62 | // The following are used by Khmer & Myanmar shapers. Defined here for them to share. |
63 | pub const V_AVB: u8 = 26; |
64 | pub const V_BLW: u8 = 27; |
65 | pub const V_PRE: u8 = 28; |
66 | pub const V_PST: u8 = 29; |
67 | pub const VS: u8 = 30; // Variation selectors |
68 | pub const P: u8 = 31; // Punctuation |
69 | pub const D: u8 = 32; // Digits except zero |
70 | } |
71 | |
72 | pub type Position = u8; |
73 | pub mod position { |
74 | pub const START: u8 = 0; |
75 | pub const RA_TO_BECOME_REPH: u8 = 1; |
76 | pub const PRE_M: u8 = 2; |
77 | pub const PRE_C: u8 = 3; |
78 | pub const BASE_C: u8 = 4; |
79 | pub const AFTER_MAIN: u8 = 5; |
80 | pub const ABOVE_C: u8 = 6; |
81 | pub const BEFORE_SUB: u8 = 7; |
82 | pub const BELOW_C: u8 = 8; |
83 | pub const AFTER_SUB: u8 = 9; |
84 | pub const BEFORE_POST: u8 = 10; |
85 | pub const POST_C: u8 = 11; |
86 | pub const AFTER_POST: u8 = 12; |
87 | pub const FINAL_C: u8 = 13; |
88 | pub const SMVD: u8 = 14; |
89 | pub const END: u8 = 15; |
90 | } |
91 | |
92 | #[allow (dead_code)] |
93 | #[derive (Clone, Copy, PartialEq)] |
94 | pub enum SyllabicCategory { |
95 | Other, |
96 | Avagraha, |
97 | Bindu, |
98 | BrahmiJoiningNumber, |
99 | CantillationMark, |
100 | Consonant, |
101 | ConsonantDead, |
102 | ConsonantFinal, |
103 | ConsonantHeadLetter, |
104 | ConsonantInitialPostfixed, |
105 | ConsonantKiller, |
106 | ConsonantMedial, |
107 | ConsonantPlaceholder, |
108 | ConsonantPrecedingRepha, |
109 | ConsonantPrefixed, |
110 | ConsonantSubjoined, |
111 | ConsonantSucceedingRepha, |
112 | ConsonantWithStacker, |
113 | GeminationMark, |
114 | InvisibleStacker, |
115 | Joiner, |
116 | ModifyingLetter, |
117 | NonJoiner, |
118 | Nukta, |
119 | Number, |
120 | NumberJoiner, |
121 | PureKiller, |
122 | RegisterShifter, |
123 | SyllableModifier, |
124 | ToneLetter, |
125 | ToneMark, |
126 | Virama, |
127 | Visarga, |
128 | Vowel, |
129 | VowelDependent, |
130 | VowelIndependent, |
131 | } |
132 | |
133 | #[allow (dead_code)] |
134 | #[derive (Clone, Copy)] |
135 | pub enum MatraCategory { |
136 | NotApplicable, |
137 | Left, |
138 | Top, |
139 | Bottom, |
140 | Right, |
141 | BottomAndLeft, |
142 | BottomAndRight, |
143 | LeftAndRight, |
144 | TopAndBottom, |
145 | TopAndBottomAndRight, |
146 | TopAndLeft, |
147 | TopAndLeftAndRight, |
148 | TopAndRight, |
149 | Overstruck, |
150 | VisualOrderLeft, |
151 | } |
152 | |
153 | const INDIC_FEATURES: &[(Tag, FeatureFlags)] = &[ |
154 | // Basic features. |
155 | // These features are applied in order, one at a time, after initial_reordering, |
156 | // constrained to the syllable. |
157 | (feature::NUKTA_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
158 | (feature::AKHANDS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
159 | (feature::REPH_FORMS, FeatureFlags::MANUAL_JOINERS), |
160 | (feature::RAKAR_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
161 | (feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
162 | (feature::BELOW_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
163 | (feature::ABOVE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
164 | (feature::HALF_FORMS, FeatureFlags::MANUAL_JOINERS), |
165 | (feature::POST_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
166 | (feature::VATTU_VARIANTS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
167 | (feature::CONJUNCT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
168 | // Other features. |
169 | // These features are applied all at once, after final_reordering, constrained |
170 | // to the syllable. |
171 | // Default Bengali font in Windows for example has intermixed |
172 | // lookups for init,pres,abvs,blws features. |
173 | (feature::INITIAL_FORMS, FeatureFlags::MANUAL_JOINERS), |
174 | ( |
175 | feature::PRE_BASE_SUBSTITUTIONS, |
176 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
177 | ), |
178 | ( |
179 | feature::ABOVE_BASE_SUBSTITUTIONS, |
180 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
181 | ), |
182 | ( |
183 | feature::BELOW_BASE_SUBSTITUTIONS, |
184 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
185 | ), |
186 | ( |
187 | feature::POST_BASE_SUBSTITUTIONS, |
188 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
189 | ), |
190 | (feature::HALANT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
191 | ]; |
192 | |
193 | // Must be in the same order as the INDIC_FEATURES array. |
194 | #[allow (dead_code)] |
195 | mod indic_feature { |
196 | pub const NUKT: usize = 0; |
197 | pub const AKHN: usize = 1; |
198 | pub const RPHF: usize = 2; |
199 | pub const RKRF: usize = 3; |
200 | pub const PREF: usize = 4; |
201 | pub const BLWF: usize = 5; |
202 | pub const ABVF: usize = 6; |
203 | pub const HALF: usize = 7; |
204 | pub const PSTF: usize = 8; |
205 | pub const VATU: usize = 9; |
206 | pub const CJCT: usize = 10; |
207 | pub const INIT: usize = 11; |
208 | pub const PRES: usize = 12; |
209 | pub const ABVS: usize = 13; |
210 | pub const BLWS: usize = 14; |
211 | pub const PSTS: usize = 15; |
212 | pub const HALN: usize = 16; |
213 | } |
214 | |
215 | const fn category_flag(c: Category) -> u32 { |
216 | rb_flag(c as u32) |
217 | } |
218 | |
219 | const MEDIAL_FLAGS: u32 = category_flag(category::CM); |
220 | // Note: |
221 | // |
222 | // We treat Vowels and placeholders as if they were consonants. This is safe because Vowels |
223 | // cannot happen in a consonant syllable. The plus side however is, we can call the |
224 | // consonant syllable logic from the vowel syllable function and get it all right! |
225 | const CONSONANT_FLAGS: u32 = category_flag(category::C) |
226 | | category_flag(category::CS) |
227 | | category_flag(category::RA) |
228 | | MEDIAL_FLAGS |
229 | | category_flag(category::V) |
230 | | category_flag(category::PLACEHOLDER) |
231 | | category_flag(category::DOTTED_CIRCLE); |
232 | const JOINER_FLAGS: u32 = category_flag(category::ZWJ) | category_flag(category::ZWNJ); |
233 | |
234 | // This is a hack for now. We should move this data into the main Indic table. |
235 | // Or completely remove it and just check in the tables. |
236 | const RA_CHARS: &[u32] = &[ |
237 | 0x0930, // Devanagari |
238 | 0x09B0, // Bengali |
239 | 0x09F0, // Bengali |
240 | 0x0A30, // Gurmukhi. No Reph |
241 | 0x0AB0, // Gujarati |
242 | 0x0B30, // Oriya |
243 | 0x0BB0, // Tamil. No Reph |
244 | 0x0C30, // Telugu. Reph formed only with ZWJ |
245 | 0x0CB0, // Kannada |
246 | 0x0D30, // Malayalam. No Reph, Logical Repha |
247 | 0x0DBB, // Sinhala. Reph formed only with ZWJ |
248 | ]; |
249 | |
250 | #[derive (Clone, Copy, PartialEq)] |
251 | enum BasePosition { |
252 | LastSinhala, |
253 | Last, |
254 | } |
255 | |
256 | #[derive (Clone, Copy, PartialEq)] |
257 | enum RephPosition { |
258 | AfterMain = position::AFTER_MAIN as isize, |
259 | BeforeSub = position::BEFORE_SUB as isize, |
260 | AfterSub = position::AFTER_SUB as isize, |
261 | BeforePost = position::BEFORE_POST as isize, |
262 | AfterPost = position::AFTER_POST as isize, |
263 | } |
264 | |
265 | #[derive (Clone, Copy, PartialEq)] |
266 | enum RephMode { |
267 | /// Reph formed out of initial Ra,H sequence. |
268 | Implicit, |
269 | /// Reph formed out of initial Ra,H,ZWJ sequence. |
270 | Explicit, |
271 | /// Encoded Repha character, needs reordering. |
272 | LogRepha, |
273 | } |
274 | |
275 | #[derive (Clone, Copy, PartialEq)] |
276 | enum BlwfMode { |
277 | /// Below-forms feature applied to pre-base and post-base. |
278 | PreAndPost, |
279 | /// Below-forms feature applied to post-base only. |
280 | PostOnly, |
281 | } |
282 | |
283 | #[derive (Clone, Copy)] |
284 | struct IndicConfig { |
285 | script: Option<Script>, |
286 | has_old_spec: bool, |
287 | virama: u32, |
288 | base_pos: BasePosition, |
289 | reph_pos: RephPosition, |
290 | reph_mode: RephMode, |
291 | blwf_mode: BlwfMode, |
292 | } |
293 | |
294 | impl IndicConfig { |
295 | const fn new( |
296 | script: Option<Script>, |
297 | has_old_spec: bool, |
298 | virama: u32, |
299 | base_pos: BasePosition, |
300 | reph_pos: RephPosition, |
301 | reph_mode: RephMode, |
302 | blwf_mode: BlwfMode, |
303 | ) -> Self { |
304 | IndicConfig { |
305 | script, |
306 | has_old_spec, |
307 | virama, |
308 | base_pos, |
309 | reph_pos, |
310 | reph_mode, |
311 | blwf_mode, |
312 | } |
313 | } |
314 | } |
315 | |
316 | const INDIC_CONFIGS: &[IndicConfig] = &[ |
317 | IndicConfig::new( |
318 | script:None, |
319 | has_old_spec:false, |
320 | virama:0, |
321 | base_pos:BasePosition::Last, |
322 | reph_pos:RephPosition::BeforePost, |
323 | RephMode::Implicit, |
324 | BlwfMode::PreAndPost, |
325 | ), |
326 | IndicConfig::new( |
327 | script:Some(script::DEVANAGARI), |
328 | has_old_spec:true, |
329 | virama:0x094D, |
330 | base_pos:BasePosition::Last, |
331 | reph_pos:RephPosition::BeforePost, |
332 | RephMode::Implicit, |
333 | BlwfMode::PreAndPost, |
334 | ), |
335 | IndicConfig::new( |
336 | script:Some(script::BENGALI), |
337 | has_old_spec:true, |
338 | virama:0x09CD, |
339 | base_pos:BasePosition::Last, |
340 | reph_pos:RephPosition::AfterSub, |
341 | RephMode::Implicit, |
342 | BlwfMode::PreAndPost, |
343 | ), |
344 | IndicConfig::new( |
345 | script:Some(script::GURMUKHI), |
346 | has_old_spec:true, |
347 | virama:0x0A4D, |
348 | base_pos:BasePosition::Last, |
349 | reph_pos:RephPosition::BeforeSub, |
350 | RephMode::Implicit, |
351 | BlwfMode::PreAndPost, |
352 | ), |
353 | IndicConfig::new( |
354 | script:Some(script::GUJARATI), |
355 | has_old_spec:true, |
356 | virama:0x0ACD, |
357 | base_pos:BasePosition::Last, |
358 | reph_pos:RephPosition::BeforePost, |
359 | RephMode::Implicit, |
360 | BlwfMode::PreAndPost, |
361 | ), |
362 | IndicConfig::new( |
363 | script:Some(script::ORIYA), |
364 | has_old_spec:true, |
365 | virama:0x0B4D, |
366 | base_pos:BasePosition::Last, |
367 | reph_pos:RephPosition::AfterMain, |
368 | RephMode::Implicit, |
369 | BlwfMode::PreAndPost, |
370 | ), |
371 | IndicConfig::new( |
372 | script:Some(script::TAMIL), |
373 | has_old_spec:true, |
374 | virama:0x0BCD, |
375 | base_pos:BasePosition::Last, |
376 | reph_pos:RephPosition::AfterPost, |
377 | RephMode::Implicit, |
378 | BlwfMode::PreAndPost, |
379 | ), |
380 | IndicConfig::new( |
381 | script:Some(script::TELUGU), |
382 | has_old_spec:true, |
383 | virama:0x0C4D, |
384 | base_pos:BasePosition::Last, |
385 | reph_pos:RephPosition::AfterPost, |
386 | RephMode::Explicit, |
387 | BlwfMode::PostOnly, |
388 | ), |
389 | IndicConfig::new( |
390 | script:Some(script::KANNADA), |
391 | has_old_spec:true, |
392 | virama:0x0CCD, |
393 | base_pos:BasePosition::Last, |
394 | reph_pos:RephPosition::AfterPost, |
395 | RephMode::Implicit, |
396 | BlwfMode::PostOnly, |
397 | ), |
398 | IndicConfig::new( |
399 | script:Some(script::MALAYALAM), |
400 | has_old_spec:true, |
401 | virama:0x0D4D, |
402 | base_pos:BasePosition::Last, |
403 | reph_pos:RephPosition::AfterMain, |
404 | RephMode::LogRepha, |
405 | BlwfMode::PreAndPost, |
406 | ), |
407 | IndicConfig::new( |
408 | script:Some(script::SINHALA), |
409 | has_old_spec:false, |
410 | virama:0x0DCA, |
411 | base_pos:BasePosition::LastSinhala, |
412 | reph_pos:RephPosition::AfterPost, |
413 | RephMode::Explicit, |
414 | BlwfMode::PreAndPost, |
415 | ), |
416 | ]; |
417 | |
418 | struct IndicWouldSubstituteFeature { |
419 | lookups: Range<usize>, |
420 | zero_context: bool, |
421 | } |
422 | |
423 | impl IndicWouldSubstituteFeature { |
424 | pub fn new(map: &Map, feature_tag: Tag, zero_context: bool) -> Self { |
425 | IndicWouldSubstituteFeature { |
426 | lookups: match map.feature_stage(TableIndex::GSUB, feature_tag) { |
427 | Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage), |
428 | None => 0..0, |
429 | }, |
430 | zero_context, |
431 | } |
432 | } |
433 | |
434 | pub fn would_substitute(&self, map: &Map, face: &Face, glyphs: &[GlyphId]) -> bool { |
435 | for index in self.lookups.clone() { |
436 | let lookup = map.lookup(TableIndex::GSUB, index); |
437 | let ctx = WouldApplyContext { |
438 | glyphs, |
439 | zero_context: self.zero_context, |
440 | }; |
441 | if face |
442 | .gsub |
443 | .as_ref() |
444 | .and_then(|table| table.get_lookup(lookup.index)) |
445 | .map_or(false, |lookup| lookup.would_apply(&ctx)) |
446 | { |
447 | return true; |
448 | } |
449 | } |
450 | |
451 | false |
452 | } |
453 | } |
454 | |
455 | struct IndicShapePlan { |
456 | config: IndicConfig, |
457 | is_old_spec: bool, |
458 | // virama_glyph: Option<u32>, |
459 | rphf: IndicWouldSubstituteFeature, |
460 | pref: IndicWouldSubstituteFeature, |
461 | blwf: IndicWouldSubstituteFeature, |
462 | pstf: IndicWouldSubstituteFeature, |
463 | vatu: IndicWouldSubstituteFeature, |
464 | mask_array: [Mask; INDIC_FEATURES.len()], |
465 | } |
466 | |
467 | impl IndicShapePlan { |
468 | fn new(plan: &ShapePlan) -> Self { |
469 | let script = plan.script; |
470 | let config = if let Some(c) = INDIC_CONFIGS.iter().skip(1).find(|c| c.script == script) { |
471 | *c |
472 | } else { |
473 | INDIC_CONFIGS[0] |
474 | }; |
475 | |
476 | let is_old_spec = config.has_old_spec |
477 | && plan |
478 | .ot_map |
479 | .chosen_script(TableIndex::GSUB) |
480 | .map_or(true, |tag| tag.to_bytes()[3] != b'2' ); |
481 | |
482 | // Use zero-context would_substitute() matching for new-spec of the main |
483 | // Indic scripts, and scripts with one spec only, but not for old-specs. |
484 | // The new-spec for all dual-spec scripts says zero-context matching happens. |
485 | // |
486 | // However, testing with Malayalam shows that old and new spec both allow |
487 | // context. Testing with Bengali new-spec however shows that it doesn't. |
488 | // So, the heuristic here is the way it is. It should *only* be changed, |
489 | // as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE. |
490 | let zero_context = is_old_spec && script != Some(script::MALAYALAM); |
491 | |
492 | let mut mask_array = [0; INDIC_FEATURES.len()]; |
493 | for (i, feature) in INDIC_FEATURES.iter().enumerate() { |
494 | mask_array[i] = if feature.1.contains(FeatureFlags::GLOBAL) { |
495 | 0 |
496 | } else { |
497 | plan.ot_map.one_mask(feature.0) |
498 | } |
499 | } |
500 | |
501 | // TODO: what is this? |
502 | // let mut virama_glyph = None; |
503 | // if config.virama != 0 { |
504 | // if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) { |
505 | // virama_glyph = Some(g.0 as u32); |
506 | // } |
507 | // } |
508 | |
509 | IndicShapePlan { |
510 | config, |
511 | is_old_spec, |
512 | // virama_glyph, |
513 | rphf: IndicWouldSubstituteFeature::new(&plan.ot_map, feature::REPH_FORMS, zero_context), |
514 | pref: IndicWouldSubstituteFeature::new( |
515 | &plan.ot_map, |
516 | feature::PRE_BASE_FORMS, |
517 | zero_context, |
518 | ), |
519 | blwf: IndicWouldSubstituteFeature::new( |
520 | &plan.ot_map, |
521 | feature::BELOW_BASE_FORMS, |
522 | zero_context, |
523 | ), |
524 | pstf: IndicWouldSubstituteFeature::new( |
525 | &plan.ot_map, |
526 | feature::POST_BASE_FORMS, |
527 | zero_context, |
528 | ), |
529 | vatu: IndicWouldSubstituteFeature::new( |
530 | &plan.ot_map, |
531 | feature::VATTU_VARIANTS, |
532 | zero_context, |
533 | ), |
534 | mask_array, |
535 | } |
536 | } |
537 | } |
538 | |
539 | impl GlyphInfo { |
540 | pub(crate) fn indic_category(&self) -> Category { |
541 | self.complex_var_u8_category() |
542 | } |
543 | |
544 | pub(crate) fn set_indic_category(&mut self, c: Category) { |
545 | self.set_complex_var_u8_category(c) |
546 | } |
547 | |
548 | pub(crate) fn indic_position(&self) -> Position { |
549 | self.complex_var_u8_auxiliary() |
550 | } |
551 | |
552 | pub(crate) fn set_indic_position(&mut self, c: Position) { |
553 | self.set_complex_var_u8_auxiliary(c) |
554 | } |
555 | |
556 | fn is_one_of(&self, flags: u32) -> bool { |
557 | // If it ligated, all bets are off. |
558 | if self.is_ligated() { |
559 | return false; |
560 | } |
561 | |
562 | rb_flag_unsafe(self.indic_category() as u32) & flags != 0 |
563 | } |
564 | |
565 | fn is_joiner(&self) -> bool { |
566 | self.is_one_of(JOINER_FLAGS) |
567 | } |
568 | |
569 | pub(crate) fn is_consonant(&self) -> bool { |
570 | self.is_one_of(CONSONANT_FLAGS) |
571 | } |
572 | |
573 | fn is_halant(&self) -> bool { |
574 | self.is_one_of(rb_flag(category::H as u32)) |
575 | } |
576 | |
577 | fn set_indic_properties(&mut self) { |
578 | let u = self.glyph_id; |
579 | let (mut cat, mut pos) = get_category_and_position(u); |
580 | |
581 | // Re-assign category |
582 | |
583 | // The following act more like the Bindus. |
584 | match u { |
585 | 0x0953..=0x0954 => cat = category::SM, |
586 | // The following act like consonants. |
587 | 0x0A72..=0x0A73 | 0x1CF5..=0x1CF6 => cat = category::C, |
588 | // TODO: The following should only be allowed after a Visarga. |
589 | // For now, just treat them like regular tone marks. |
590 | 0x1CE2..=0x1CE8 => cat = category::A, |
591 | // TODO: The following should only be allowed after some of |
592 | // the nasalization marks, maybe only for U+1CE9..U+1CF1. |
593 | // For now, just treat them like tone marks. |
594 | 0x1CED => cat = category::A, |
595 | // The following take marks in standalone clusters, similar to Avagraha. |
596 | 0xA8F2..=0xA8F7 | 0x1CE9..=0x1CEC | 0x1CEE..=0x1CF1 => cat = category::SYMBOL, |
597 | // https://github.com/harfbuzz/harfbuzz/issues/524 |
598 | 0x0A51 => { |
599 | cat = category::M; |
600 | pos = position::BELOW_C; |
601 | } |
602 | // According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, |
603 | // so the Indic shaper needs to know their categories. |
604 | 0x11301 | 0x11303 => cat = category::SM, |
605 | 0x1133B | 0x1133C => cat = category::N, |
606 | // https://github.com/harfbuzz/harfbuzz/issues/552 |
607 | 0x0AFB => cat = category::N, |
608 | // https://github.com/harfbuzz/harfbuzz/issues/2849 |
609 | 0x0B55 => cat = category::N, |
610 | // https://github.com/harfbuzz/harfbuzz/issues/538 |
611 | 0x0980 => cat = category::PLACEHOLDER, |
612 | // https://github.com/harfbuzz/harfbuzz/issues/1613 |
613 | 0x09FC => cat = category::PLACEHOLDER, |
614 | // https://github.com/harfbuzz/harfbuzz/issues/623 |
615 | 0x0C80 => cat = category::PLACEHOLDER, |
616 | 0x2010 | 0x2011 => cat = category::PLACEHOLDER, |
617 | 0x25CC => cat = category::DOTTED_CIRCLE, |
618 | _ => {} |
619 | } |
620 | |
621 | // Re-assign position. |
622 | |
623 | if (rb_flag_unsafe(cat as u32) & CONSONANT_FLAGS) != 0 { |
624 | pos = position::BASE_C; |
625 | if RA_CHARS.contains(&u) { |
626 | cat = category::RA; |
627 | } |
628 | } else if cat == category::M { |
629 | pos = matra_position_indic(u, pos); |
630 | } else if (rb_flag_unsafe(cat as u32) |
631 | & (category_flag(category::SM) |
632 | | category_flag(category::A) |
633 | | category_flag(category::SYMBOL))) |
634 | != 0 |
635 | { |
636 | pos = position::SMVD; |
637 | } |
638 | |
639 | // Oriya Bindu is BeforeSub in the spec. |
640 | if u == 0x0B01 { |
641 | pos = position::BEFORE_SUB; |
642 | } |
643 | |
644 | self.set_indic_category(cat); |
645 | self.set_indic_position(pos); |
646 | } |
647 | } |
648 | |
649 | fn collect_features(planner: &mut ShapePlanner) { |
650 | // Do this before any lookups have been applied. |
651 | planner.ot_map.add_gsub_pause(Some(setup_syllables)); |
652 | |
653 | planner |
654 | .ot_map |
655 | .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1); |
656 | // The Indic specs do not require ccmp, but we apply it here since if |
657 | // there is a use of it, it's typically at the beginning. |
658 | planner.ot_map.enable_feature( |
659 | feature::GLYPH_COMPOSITION_DECOMPOSITION, |
660 | FeatureFlags::empty(), |
661 | 1, |
662 | ); |
663 | |
664 | planner.ot_map.add_gsub_pause(Some(initial_reordering)); |
665 | |
666 | for feature in INDIC_FEATURES.iter().take(10) { |
667 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
668 | planner.ot_map.add_gsub_pause(None); |
669 | } |
670 | |
671 | planner.ot_map.add_gsub_pause(Some(final_reordering)); |
672 | |
673 | for feature in INDIC_FEATURES.iter().skip(10) { |
674 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
675 | } |
676 | |
677 | planner |
678 | .ot_map |
679 | .enable_feature(feature::CONTEXTUAL_ALTERNATES, FeatureFlags::empty(), 1); |
680 | planner |
681 | .ot_map |
682 | .enable_feature(feature::CONTEXTUAL_LIGATURES, FeatureFlags::empty(), 1); |
683 | |
684 | planner |
685 | .ot_map |
686 | .add_gsub_pause(Some(crate::ot::clear_syllables)); |
687 | } |
688 | |
689 | fn override_features(planner: &mut ShapePlanner) { |
690 | planner.ot_map.disable_feature(tag:feature::STANDARD_LIGATURES); |
691 | } |
692 | |
693 | fn preprocess_text(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
694 | super::vowel_constraints::preprocess_text_vowel_constraints(buffer); |
695 | } |
696 | |
697 | fn decompose(ctx: &ShapeNormalizeContext, ab: char) -> Option<(char, char)> { |
698 | // Don't decompose these. |
699 | match ab { |
700 | ' \u{0931}' | // DEVANAGARI LETTER RRA |
701 | // https://github.com/harfbuzz/harfbuzz/issues/779 |
702 | ' \u{09DC}' | // BENGALI LETTER RRA |
703 | ' \u{09DD}' | // BENGALI LETTER RHA |
704 | ' \u{0B94}' => return None, // TAMIL LETTER AU |
705 | _ => {} |
706 | } |
707 | |
708 | if ab == ' \u{0DDA}' || (' \u{0DDC}' ..=' \u{0DDE}' ).contains(&ab) { |
709 | // Sinhala split matras... Let the fun begin. |
710 | // |
711 | // These four characters have Unicode decompositions. However, Uniscribe |
712 | // decomposes them "Khmer-style", that is, it uses the character itself to |
713 | // get the second half. The first half of all four decompositions is always |
714 | // U+0DD9. |
715 | // |
716 | // Now, there are buggy fonts, namely, the widely used lklug.ttf, that are |
717 | // broken with Uniscribe. But we need to support them. As such, we only |
718 | // do the Uniscribe-style decomposition if the character is transformed into |
719 | // its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to |
720 | // Unicode decomposition. |
721 | // |
722 | // Note that we can't unconditionally use Unicode decomposition. That would |
723 | // break some other fonts, that are designed to work with Uniscribe, and |
724 | // don't have positioning features for the Unicode-style decomposition. |
725 | // |
726 | // Argh... |
727 | // |
728 | // The Uniscribe behavior is now documented in the newly published Sinhala |
729 | // spec in 2012: |
730 | // |
731 | // https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping |
732 | |
733 | let mut ok = false; |
734 | if let Some(g) = ctx.face.glyph_index(u32::from(ab)) { |
735 | let indic_plan = ctx.plan.data::<IndicShapePlan>(); |
736 | ok = indic_plan |
737 | .pstf |
738 | .would_substitute(&ctx.plan.ot_map, ctx.face, &[g]); |
739 | } |
740 | |
741 | if ok { |
742 | // Ok, safe to use Uniscribe-style decomposition. |
743 | return Some((' \u{0DD9}' , ab)); |
744 | } |
745 | } |
746 | |
747 | crate::unicode::decompose(ab) |
748 | } |
749 | |
750 | fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> { |
751 | // Avoid recomposing split matras. |
752 | if a.general_category().is_mark() { |
753 | return None; |
754 | } |
755 | |
756 | // Composition-exclusion exceptions that we want to recompose. |
757 | if a == ' \u{09AF}' && b == ' \u{09BC}' { |
758 | return Some(' \u{09DF}' ); |
759 | } |
760 | |
761 | crate::unicode::compose(a, b) |
762 | } |
763 | |
764 | fn setup_masks(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
765 | // We cannot setup masks here. We save information about characters |
766 | // and setup masks later on in a pause-callback. |
767 | for info: &mut GlyphInfo in buffer.info_slice_mut() { |
768 | info.set_indic_properties(); |
769 | } |
770 | } |
771 | |
772 | fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
773 | super::indic_machine::find_syllables_indic(buffer); |
774 | |
775 | let mut start: usize = 0; |
776 | let mut end: usize = buffer.next_syllable(start:0); |
777 | while start < buffer.len { |
778 | buffer.unsafe_to_break(start, end); |
779 | start = end; |
780 | end = buffer.next_syllable(start); |
781 | } |
782 | } |
783 | |
784 | fn initial_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
785 | use super::indic_machine::SyllableType; |
786 | |
787 | let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>(); |
788 | |
789 | update_consonant_positions(plan, indic_plan, face, buffer); |
790 | syllabic::insert_dotted_circles( |
791 | face, |
792 | buffer, |
793 | broken_syllable_type:SyllableType::BrokenCluster as u8, |
794 | dottedcircle_category:category::DOTTED_CIRCLE, |
795 | repha_category:Some(category::REPHA), |
796 | dottedcircle_position:Some(position::END), |
797 | ); |
798 | |
799 | let mut start: usize = 0; |
800 | let mut end: usize = buffer.next_syllable(start:0); |
801 | while start < buffer.len { |
802 | initial_reordering_syllable(plan, indic_plan, face, start, end, buffer); |
803 | start = end; |
804 | end = buffer.next_syllable(start); |
805 | } |
806 | } |
807 | |
808 | fn update_consonant_positions( |
809 | plan: &ShapePlan, |
810 | indic_plan: &IndicShapePlan, |
811 | face: &Face, |
812 | buffer: &mut Buffer, |
813 | ) { |
814 | if indic_plan.config.base_pos != BasePosition::Last { |
815 | return; |
816 | } |
817 | |
818 | let mut virama_glyph: Option = None; |
819 | if indic_plan.config.virama != 0 { |
820 | virama_glyph = face.glyph_index(indic_plan.config.virama); |
821 | } |
822 | |
823 | if let Some(virama: GlyphId) = virama_glyph { |
824 | for info: &mut GlyphInfo in buffer.info_slice_mut() { |
825 | if info.indic_position() == position::BASE_C { |
826 | let consonant: GlyphId = info.as_glyph(); |
827 | info.set_indic_position(consonant_position_from_face( |
828 | plan, indic_plan, face, consonant, virama, |
829 | )); |
830 | } |
831 | } |
832 | } |
833 | } |
834 | |
835 | fn consonant_position_from_face( |
836 | plan: &ShapePlan, |
837 | indic_plan: &IndicShapePlan, |
838 | face: &Face, |
839 | consonant: GlyphId, |
840 | virama: GlyphId, |
841 | ) -> u8 { |
842 | // For old-spec, the order of glyphs is Consonant,Virama, |
843 | // whereas for new-spec, it's Virama,Consonant. However, |
844 | // some broken fonts (like Free Sans) simply copied lookups |
845 | // from old-spec to new-spec without modification. |
846 | // And oddly enough, Uniscribe seems to respect those lookups. |
847 | // Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds |
848 | // base at 0. The font however, only has lookups matching |
849 | // 930,94D in 'blwf', not the expected 94D,930 (with new-spec |
850 | // table). As such, we simply match both sequences. Seems |
851 | // to work. |
852 | // |
853 | // Vatu is done as well, for: |
854 | // https://github.com/harfbuzz/harfbuzz/issues/1587 |
855 | |
856 | if indic_plan |
857 | .blwf |
858 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
859 | || indic_plan |
860 | .blwf |
861 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
862 | || indic_plan |
863 | .vatu |
864 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
865 | || indic_plan |
866 | .vatu |
867 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
868 | { |
869 | return position::BELOW_C; |
870 | } |
871 | |
872 | if indic_plan |
873 | .pstf |
874 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
875 | || indic_plan |
876 | .pstf |
877 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
878 | { |
879 | return position::POST_C; |
880 | } |
881 | |
882 | if indic_plan |
883 | .pref |
884 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
885 | || indic_plan |
886 | .pref |
887 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
888 | { |
889 | return position::POST_C; |
890 | } |
891 | |
892 | position::BASE_C |
893 | } |
894 | |
895 | fn initial_reordering_syllable( |
896 | plan: &ShapePlan, |
897 | indic_plan: &IndicShapePlan, |
898 | face: &Face, |
899 | start: usize, |
900 | end: usize, |
901 | buffer: &mut Buffer, |
902 | ) { |
903 | use super::indic_machine::SyllableType; |
904 | |
905 | let syllable_type = match buffer.info[start].syllable() & 0x0F { |
906 | 0 => SyllableType::ConsonantSyllable, |
907 | 1 => SyllableType::VowelSyllable, |
908 | 2 => SyllableType::StandaloneCluster, |
909 | 3 => SyllableType::SymbolCluster, |
910 | 4 => SyllableType::BrokenCluster, |
911 | 5 => SyllableType::NonIndicCluster, |
912 | _ => unreachable!(), |
913 | }; |
914 | |
915 | match syllable_type { |
916 | // We made the vowels look like consonants. So let's call the consonant logic! |
917 | SyllableType::VowelSyllable | SyllableType::ConsonantSyllable => { |
918 | initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer); |
919 | } |
920 | // We already inserted dotted-circles, so just call the standalone_cluster. |
921 | SyllableType::BrokenCluster | SyllableType::StandaloneCluster => { |
922 | initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer); |
923 | } |
924 | SyllableType::SymbolCluster | SyllableType::NonIndicCluster => {} |
925 | } |
926 | } |
927 | |
928 | // Rules from: |
929 | // https://docs.microsqoft.com/en-us/typography/script-development/devanagari */ |
930 | fn initial_reordering_consonant_syllable( |
931 | plan: &ShapePlan, |
932 | indic_plan: &IndicShapePlan, |
933 | face: &Face, |
934 | start: usize, |
935 | end: usize, |
936 | buffer: &mut Buffer, |
937 | ) { |
938 | // https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167 |
939 | // For compatibility with legacy usage in Kannada, |
940 | // Ra+h+ZWJ must behave like Ra+ZWJ+h... |
941 | if buffer.script == Some(script::KANNADA) |
942 | && start + 3 <= end |
943 | && buffer.info[start].is_one_of(category_flag(category::RA)) |
944 | && buffer.info[start + 1].is_one_of(category_flag(category::H)) |
945 | && buffer.info[start + 2].is_one_of(category_flag(category::ZWJ)) |
946 | { |
947 | buffer.merge_clusters(start + 1, start + 3); |
948 | buffer.info.swap(start + 1, start + 2); |
949 | } |
950 | |
951 | // 1. Find base consonant: |
952 | // |
953 | // The shaping engine finds the base consonant of the syllable, using the |
954 | // following algorithm: starting from the end of the syllable, move backwards |
955 | // until a consonant is found that does not have a below-base or post-base |
956 | // form (post-base forms have to follow below-base forms), or that is not a |
957 | // pre-base-reordering Ra, or arrive at the first consonant. The consonant |
958 | // stopped at will be the base. |
959 | // |
960 | // - If the syllable starts with Ra + Halant (in a script that has Reph) |
961 | // and has more than one consonant, Ra is excluded from candidates for |
962 | // base consonants. |
963 | |
964 | let mut base = end; |
965 | let mut has_reph = false; |
966 | |
967 | { |
968 | // -> If the syllable starts with Ra + Halant (in a script that has Reph) |
969 | // and has more than one consonant, Ra is excluded from candidates for |
970 | // base consonants. |
971 | let mut limit = start; |
972 | if indic_plan.mask_array[indic_feature::RPHF] != 0 |
973 | && start + 3 <= end |
974 | && ((indic_plan.config.reph_mode == RephMode::Implicit |
975 | && !buffer.info[start + 2].is_joiner()) |
976 | || (indic_plan.config.reph_mode == RephMode::Explicit |
977 | && buffer.info[start + 2].indic_category() == category::ZWJ)) |
978 | { |
979 | // See if it matches the 'rphf' feature. |
980 | let glyphs = &[ |
981 | buffer.info[start].as_glyph(), |
982 | buffer.info[start + 1].as_glyph(), |
983 | if indic_plan.config.reph_mode == RephMode::Explicit { |
984 | buffer.info[start + 2].as_glyph() |
985 | } else { |
986 | GlyphId(0) |
987 | }, |
988 | ]; |
989 | if indic_plan |
990 | .rphf |
991 | .would_substitute(&plan.ot_map, face, &glyphs[0..2]) |
992 | || (indic_plan.config.reph_mode == RephMode::Explicit |
993 | && indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs)) |
994 | { |
995 | limit += 2; |
996 | while limit < end && buffer.info[limit].is_joiner() { |
997 | limit += 1; |
998 | } |
999 | base = start; |
1000 | has_reph = true; |
1001 | } |
1002 | } else if indic_plan.config.reph_mode == RephMode::LogRepha |
1003 | && buffer.info[start].indic_category() == category::REPHA |
1004 | { |
1005 | limit += 1; |
1006 | while limit < end && buffer.info[limit].is_joiner() { |
1007 | limit += 1; |
1008 | } |
1009 | base = start; |
1010 | has_reph = true; |
1011 | } |
1012 | |
1013 | match indic_plan.config.base_pos { |
1014 | BasePosition::Last => { |
1015 | // -> starting from the end of the syllable, move backwards |
1016 | let mut i = end; |
1017 | let mut seen_below = false; |
1018 | loop { |
1019 | i -= 1; |
1020 | // -> until a consonant is found |
1021 | if buffer.info[i].is_consonant() { |
1022 | // -> that does not have a below-base or post-base form |
1023 | // (post-base forms have to follow below-base forms), |
1024 | if buffer.info[i].indic_position() != position::BELOW_C |
1025 | && (buffer.info[i].indic_position() != position::POST_C || seen_below) |
1026 | { |
1027 | base = i; |
1028 | break; |
1029 | } |
1030 | if buffer.info[i].indic_position() == position::BELOW_C { |
1031 | seen_below = true; |
1032 | } |
1033 | |
1034 | // -> or that is not a pre-base-reordering Ra, |
1035 | // |
1036 | // IMPLEMENTATION NOTES: |
1037 | // |
1038 | // Our pre-base-reordering Ra's are marked position::PostC, so will be skipped |
1039 | // by the logic above already. |
1040 | |
1041 | // -> or arrive at the first consonant. The consonant stopped at will |
1042 | // be the base. |
1043 | base = i; |
1044 | } else { |
1045 | // A ZWJ after a Halant stops the base search, and requests an explicit |
1046 | // half form. |
1047 | // A ZWJ before a Halant, requests a subjoined form instead, and hence |
1048 | // search continues. This is particularly important for Bengali |
1049 | // sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. |
1050 | if start < i |
1051 | && buffer.info[i].indic_category() == category::ZWJ |
1052 | && buffer.info[i - 1].indic_category() == category::H |
1053 | { |
1054 | break; |
1055 | } |
1056 | } |
1057 | |
1058 | if i <= limit { |
1059 | break; |
1060 | } |
1061 | } |
1062 | } |
1063 | BasePosition::LastSinhala => { |
1064 | // Sinhala base positioning is slightly different from main Indic, in that: |
1065 | // 1. Its ZWJ behavior is different, |
1066 | // 2. We don't need to look into the font for consonant positions. |
1067 | |
1068 | if !has_reph { |
1069 | base = limit; |
1070 | } |
1071 | |
1072 | // Find the last base consonant that is not blocked by ZWJ. If there is |
1073 | // a ZWJ right before a base consonant, that would request a subjoined form. |
1074 | for i in limit..end { |
1075 | if buffer.info[i].is_consonant() { |
1076 | if limit < i && buffer.info[i - 1].indic_category() == category::ZWJ { |
1077 | break; |
1078 | } else { |
1079 | base = i; |
1080 | } |
1081 | } |
1082 | } |
1083 | |
1084 | // Mark all subsequent consonants as below. |
1085 | for i in base + 1..end { |
1086 | if buffer.info[i].is_consonant() { |
1087 | buffer.info[i].set_indic_position(position::BELOW_C); |
1088 | } |
1089 | } |
1090 | } |
1091 | } |
1092 | |
1093 | // -> If the syllable starts with Ra + Halant (in a script that has Reph) |
1094 | // and has more than one consonant, Ra is excluded from candidates for |
1095 | // base consonants. |
1096 | // |
1097 | // Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. |
1098 | if has_reph && base == start && limit - base <= 2 { |
1099 | // Have no other consonant, so Reph is not formed and Ra becomes base. |
1100 | has_reph = false; |
1101 | } |
1102 | } |
1103 | |
1104 | // 2. Decompose and reorder Matras: |
1105 | // |
1106 | // Each matra and any syllable modifier sign in the syllable are moved to the |
1107 | // appropriate position relative to the consonant(s) in the syllable. The |
1108 | // shaping engine decomposes two- or three-part matras into their constituent |
1109 | // parts before any repositioning. Matra characters are classified by which |
1110 | // consonant in a conjunct they have affinity for and are reordered to the |
1111 | // following positions: |
1112 | // |
1113 | // - Before first half form in the syllable |
1114 | // - After subjoined consonants |
1115 | // - After post-form consonant |
1116 | // - After main consonant (for above marks) |
1117 | // |
1118 | // IMPLEMENTATION NOTES: |
1119 | // |
1120 | // The normalize() routine has already decomposed matras for us, so we don't |
1121 | // need to worry about that. |
1122 | |
1123 | // 3. Reorder marks to canonical order: |
1124 | // |
1125 | // Adjacent nukta and halant or nukta and vedic sign are always repositioned |
1126 | // if necessary, so that the nukta is first. |
1127 | // |
1128 | // IMPLEMENTATION NOTES: |
1129 | // |
1130 | // We don't need to do this: the normalize() routine already did this for us. |
1131 | |
1132 | // Reorder characters |
1133 | |
1134 | for i in start..base { |
1135 | let pos = buffer.info[i].indic_position(); |
1136 | buffer.info[i].set_indic_position(cmp::min(position::PRE_C, pos)); |
1137 | } |
1138 | |
1139 | if base < end { |
1140 | buffer.info[base].set_indic_position(position::BASE_C); |
1141 | } |
1142 | |
1143 | // Mark final consonants. A final consonant is one appearing after a matra. |
1144 | // Happens in Sinhala. |
1145 | for i in base + 1..end { |
1146 | if buffer.info[i].indic_category() == category::M { |
1147 | for j in i + 1..end { |
1148 | if buffer.info[j].is_consonant() { |
1149 | buffer.info[j].set_indic_position(position::FINAL_C); |
1150 | break; |
1151 | } |
1152 | } |
1153 | |
1154 | break; |
1155 | } |
1156 | } |
1157 | |
1158 | // Handle beginning Ra |
1159 | if has_reph { |
1160 | buffer.info[start].set_indic_position(position::RA_TO_BECOME_REPH); |
1161 | } |
1162 | |
1163 | // For old-style Indic script tags, move the first post-base Halant after |
1164 | // last consonant. |
1165 | // |
1166 | // Reports suggest that in some scripts Uniscribe does this only if there |
1167 | // is *not* a Halant after last consonant already. We know that is the |
1168 | // case for Kannada, while it reorders unconditionally in other scripts, |
1169 | // eg. Malayalam, Bengali, and Devanagari. We don't currently know about |
1170 | // other scripts, so we block Kannada. |
1171 | // |
1172 | // Kannada test case: |
1173 | // U+0C9A,U+0CCD,U+0C9A,U+0CCD |
1174 | // With some versions of Lohit Kannada. |
1175 | // https://bugs.freedesktop.org/show_bug.cgi?id=59118 |
1176 | // |
1177 | // Malayalam test case: |
1178 | // U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D |
1179 | // With lohit-ttf-20121122/Lohit-Malayalam.ttf |
1180 | // |
1181 | // Bengali test case: |
1182 | // U+0998,U+09CD,U+09AF,U+09CD |
1183 | // With Windows XP vrinda.ttf |
1184 | // https://github.com/harfbuzz/harfbuzz/issues/1073 |
1185 | // |
1186 | // Devanagari test case: |
1187 | // U+091F,U+094D,U+0930,U+094D |
1188 | // With chandas.ttf |
1189 | // https://github.com/harfbuzz/harfbuzz/issues/1071 |
1190 | if indic_plan.is_old_spec { |
1191 | let disallow_double_halants = buffer.script == Some(script::KANNADA); |
1192 | for i in base + 1..end { |
1193 | if buffer.info[i].indic_category() == category::H { |
1194 | let mut j = end - 1; |
1195 | while j > i { |
1196 | if buffer.info[j].is_consonant() |
1197 | || (disallow_double_halants |
1198 | && buffer.info[j].indic_category() == category::H) |
1199 | { |
1200 | break; |
1201 | } |
1202 | |
1203 | j -= 1; |
1204 | } |
1205 | |
1206 | if buffer.info[j].indic_category() != category::H && j > i { |
1207 | // Move Halant to after last consonant. |
1208 | let t = buffer.info[i]; |
1209 | for k in 0..j - i { |
1210 | buffer.info[k + i] = buffer.info[k + i + 1]; |
1211 | } |
1212 | buffer.info[j] = t; |
1213 | } |
1214 | |
1215 | break; |
1216 | } |
1217 | } |
1218 | } |
1219 | |
1220 | // Attach misc marks to previous char to move with them. |
1221 | { |
1222 | let mut last_pos = position::START; |
1223 | for i in start..end { |
1224 | let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32) |
1225 | & (category_flag(category::ZWJ) |
1226 | | category_flag(category::ZWNJ) |
1227 | | category_flag(category::N) |
1228 | | category_flag(category::RS) |
1229 | | category_flag(category::CM) |
1230 | | category_flag(category::H)) |
1231 | != 0; |
1232 | if ok { |
1233 | buffer.info[i].set_indic_position(last_pos); |
1234 | |
1235 | if buffer.info[i].indic_category() == category::H |
1236 | && buffer.info[i].indic_position() == position::PRE_M |
1237 | { |
1238 | // Uniscribe doesn't move the Halant with Left Matra. |
1239 | // TEST: U+092B,U+093F,U+094DE |
1240 | // We follow. This is important for the Sinhala |
1241 | // U+0DDA split matra since it decomposes to U+0DD9,U+0DCA |
1242 | // where U+0DD9 is a left matra and U+0DCA is the virama. |
1243 | // We don't want to move the virama with the left matra. |
1244 | // TEST: U+0D9A,U+0DDA |
1245 | for j in (start + 1..=i).rev() { |
1246 | if buffer.info[j - 1].indic_position() != position::PRE_M { |
1247 | let pos = buffer.info[j - 1].indic_position(); |
1248 | buffer.info[i].set_indic_position(pos); |
1249 | break; |
1250 | } |
1251 | } |
1252 | } |
1253 | } else if buffer.info[i].indic_position() != position::SMVD { |
1254 | last_pos = buffer.info[i].indic_position(); |
1255 | } |
1256 | } |
1257 | } |
1258 | // For post-base consonants let them own anything before them |
1259 | // since the last consonant or matra. |
1260 | { |
1261 | let mut last = base; |
1262 | for i in base + 1..end { |
1263 | if buffer.info[i].is_consonant() { |
1264 | for j in last + 1..i { |
1265 | if (buffer.info[j].indic_position() as u8) < (position::SMVD as u8) { |
1266 | let pos = buffer.info[i].indic_position(); |
1267 | buffer.info[j].set_indic_position(pos); |
1268 | } |
1269 | } |
1270 | |
1271 | last = i; |
1272 | } else if buffer.info[i].indic_category() == category::M { |
1273 | last = i; |
1274 | } |
1275 | } |
1276 | } |
1277 | |
1278 | { |
1279 | // Use syllable() for sort accounting temporarily. |
1280 | let syllable = buffer.info[start].syllable(); |
1281 | for i in start..end { |
1282 | buffer.info[i].set_syllable(u8::try_from(i - start).unwrap()); |
1283 | } |
1284 | |
1285 | buffer.info[start..end].sort_by(|a, b| a.indic_position().cmp(&b.indic_position())); |
1286 | |
1287 | // Find base again. |
1288 | base = end; |
1289 | for i in start..end { |
1290 | if buffer.info[i].indic_position() == position::BASE_C { |
1291 | base = i; |
1292 | break; |
1293 | } |
1294 | } |
1295 | // Things are out-of-control for post base positions, they may shuffle |
1296 | // around like crazy. In old-spec mode, we move halants around, so in |
1297 | // that case merge all clusters after base. Otherwise, check the sort |
1298 | // order and merge as needed. |
1299 | // For pre-base stuff, we handle cluster issues in final reordering. |
1300 | // |
1301 | // We could use buffer->sort() for this, if there was no special |
1302 | // reordering of pre-base stuff happening later... |
1303 | // We don't want to merge_clusters all of that, which buffer->sort() |
1304 | // would. Here's a concrete example: |
1305 | // |
1306 | // Assume there's a pre-base consonant and explicit Halant before base, |
1307 | // followed by a prebase-reordering (left) Matra: |
1308 | // |
1309 | // C,H,ZWNJ,B,M |
1310 | // |
1311 | // At this point in reordering we would have: |
1312 | // |
1313 | // M,C,H,ZWNJ,B |
1314 | // |
1315 | // whereas in final reordering we will bring the Matra closer to Base: |
1316 | // |
1317 | // C,H,ZWNJ,M,B |
1318 | // |
1319 | // That's why we don't want to merge-clusters anything before the Base |
1320 | // at this point. But if something moved from after Base to before it, |
1321 | // we should merge clusters from base to them. In final-reordering, we |
1322 | // only move things around before base, and merge-clusters up to base. |
1323 | // These two merge-clusters from the two sides of base will interlock |
1324 | // to merge things correctly. See: |
1325 | // https://github.com/harfbuzz/harfbuzz/issues/2272 |
1326 | if indic_plan.is_old_spec || end - start > 127 { |
1327 | buffer.merge_clusters(base, end); |
1328 | } else { |
1329 | // Note! syllable() is a one-byte field. |
1330 | for i in base..end { |
1331 | if buffer.info[i].syllable() != 255 { |
1332 | let mut min = i; |
1333 | let mut max = i; |
1334 | let mut j = start + buffer.info[i].syllable() as usize; |
1335 | while j != i { |
1336 | min = cmp::min(min, j); |
1337 | max = cmp::max(max, j); |
1338 | let next = start + buffer.info[j].syllable() as usize; |
1339 | buffer.info[j].set_syllable(255); // So we don't process j later again. |
1340 | j = next; |
1341 | } |
1342 | |
1343 | buffer.merge_clusters(cmp::max(base, min), max + 1); |
1344 | } |
1345 | } |
1346 | } |
1347 | |
1348 | // Put syllable back in. |
1349 | for info in &mut buffer.info[start..end] { |
1350 | info.set_syllable(syllable); |
1351 | } |
1352 | } |
1353 | |
1354 | // Setup masks now |
1355 | |
1356 | { |
1357 | // Reph |
1358 | for info in &mut buffer.info[start..end] { |
1359 | if info.indic_position() != position::RA_TO_BECOME_REPH { |
1360 | break; |
1361 | } |
1362 | |
1363 | info.mask |= indic_plan.mask_array[indic_feature::RPHF]; |
1364 | } |
1365 | |
1366 | // Pre-base |
1367 | let mut mask = indic_plan.mask_array[indic_feature::HALF]; |
1368 | if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost { |
1369 | mask |= indic_plan.mask_array[indic_feature::BLWF]; |
1370 | } |
1371 | |
1372 | for info in &mut buffer.info[start..base] { |
1373 | info.mask |= mask; |
1374 | } |
1375 | |
1376 | // Base |
1377 | mask = 0; |
1378 | if base < end { |
1379 | buffer.info[base].mask |= mask; |
1380 | } |
1381 | |
1382 | // Post-base |
1383 | mask = indic_plan.mask_array[indic_feature::BLWF] |
1384 | | indic_plan.mask_array[indic_feature::ABVF] |
1385 | | indic_plan.mask_array[indic_feature::PSTF]; |
1386 | for i in base + 1..end { |
1387 | buffer.info[i].mask |= mask; |
1388 | } |
1389 | } |
1390 | |
1391 | if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) { |
1392 | // Old-spec eye-lash Ra needs special handling. From the |
1393 | // spec: |
1394 | // |
1395 | // "The feature 'below-base form' is applied to consonants |
1396 | // having below-base forms and following the base consonant. |
1397 | // The exception is vattu, which may appear below half forms |
1398 | // as well as below the base glyph. The feature 'below-base |
1399 | // form' will be applied to all such occurrences of Ra as well." |
1400 | // |
1401 | // Test case: U+0924,U+094D,U+0930,U+094d,U+0915 |
1402 | // with Sanskrit 2003 font. |
1403 | // |
1404 | // However, note that Ra,Halant,ZWJ is the correct way to |
1405 | // request eyelash form of Ra, so we wouldbn't inhibit it |
1406 | // in that sequence. |
1407 | // |
1408 | // Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 |
1409 | for i in start..base.saturating_sub(1) { |
1410 | if buffer.info[i].indic_category() == category::RA |
1411 | && buffer.info[i + 1].indic_category() == category::H |
1412 | && (i + 2 == base || buffer.info[i + 2].indic_category() != category::ZWJ) |
1413 | { |
1414 | buffer.info[i].mask |= indic_plan.mask_array[indic_feature::BLWF]; |
1415 | buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::BLWF]; |
1416 | } |
1417 | } |
1418 | } |
1419 | |
1420 | let pref_len = 2; |
1421 | if indic_plan.mask_array[indic_feature::PREF] != 0 && base + pref_len < end { |
1422 | // Find a Halant,Ra sequence and mark it for pre-base-reordering processing. |
1423 | for i in base + 1..end - pref_len + 1 { |
1424 | let glyphs = &[buffer.info[i + 0].as_glyph(), buffer.info[i + 1].as_glyph()]; |
1425 | if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) { |
1426 | buffer.info[i + 0].mask = indic_plan.mask_array[indic_feature::PREF]; |
1427 | buffer.info[i + 1].mask = indic_plan.mask_array[indic_feature::PREF]; |
1428 | break; |
1429 | } |
1430 | } |
1431 | } |
1432 | |
1433 | // Apply ZWJ/ZWNJ effects |
1434 | for i in start + 1..end { |
1435 | if buffer.info[i].is_joiner() { |
1436 | let non_joiner = buffer.info[i].indic_category() == category::ZWNJ; |
1437 | let mut j = i; |
1438 | |
1439 | loop { |
1440 | j -= 1; |
1441 | |
1442 | // ZWJ/ZWNJ should disable CJCT. They do that by simply |
1443 | // being there, since we don't skip them for the CJCT |
1444 | // feature (ie. F_MANUAL_ZWJ) |
1445 | |
1446 | // A ZWNJ disables HALF. |
1447 | if non_joiner { |
1448 | buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF]; |
1449 | } |
1450 | |
1451 | if j <= start || buffer.info[j].is_consonant() { |
1452 | break; |
1453 | } |
1454 | } |
1455 | } |
1456 | } |
1457 | } |
1458 | |
1459 | fn initial_reordering_standalone_cluster( |
1460 | plan: &ShapePlan, |
1461 | indic_plan: &IndicShapePlan, |
1462 | face: &Face, |
1463 | start: usize, |
1464 | end: usize, |
1465 | buffer: &mut Buffer, |
1466 | ) { |
1467 | // We treat placeholder/dotted-circle as if they are consonants, so we |
1468 | // should just chain. Only if not in compatibility mode that is... |
1469 | initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer); |
1470 | } |
1471 | |
1472 | fn final_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
1473 | if buffer.is_empty() { |
1474 | return; |
1475 | } |
1476 | |
1477 | let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>(); |
1478 | |
1479 | let mut virama_glyph: Option = None; |
1480 | if indic_plan.config.virama != 0 { |
1481 | if let Some(g: GlyphId) = face.glyph_index(indic_plan.config.virama) { |
1482 | virama_glyph = Some(g.0 as u32); |
1483 | } |
1484 | } |
1485 | |
1486 | let mut start: usize = 0; |
1487 | let mut end: usize = buffer.next_syllable(start:0); |
1488 | while start < buffer.len { |
1489 | final_reordering_impl(indic_plan, virama_glyph, start, end, buffer); |
1490 | start = end; |
1491 | end = buffer.next_syllable(start); |
1492 | } |
1493 | } |
1494 | |
1495 | fn final_reordering_impl( |
1496 | plan: &IndicShapePlan, |
1497 | virama_glyph: Option<u32>, |
1498 | start: usize, |
1499 | end: usize, |
1500 | buffer: &mut Buffer, |
1501 | ) { |
1502 | // This function relies heavily on halant glyphs. Lots of ligation |
1503 | // and possibly multiple substitutions happened prior to this |
1504 | // phase, and that might have messed up our properties. Recover |
1505 | // from a particular case of that where we're fairly sure that a |
1506 | // class of OT_H is desired but has been lost. |
1507 | // |
1508 | // We don't call load_virama_glyph(), since we know it's already loaded. |
1509 | if let Some(virama_glyph) = virama_glyph { |
1510 | for info in &mut buffer.info[start..end] { |
1511 | if info.glyph_id == virama_glyph && info.is_ligated() && info.is_multiplied() { |
1512 | // This will make sure that this glyph passes is_halant() test. |
1513 | info.set_indic_category(category::H); |
1514 | info.clear_ligated_and_multiplied(); |
1515 | } |
1516 | } |
1517 | } |
1518 | |
1519 | // 4. Final reordering: |
1520 | // |
1521 | // After the localized forms and basic shaping forms GSUB features have been |
1522 | // applied (see below), the shaping engine performs some final glyph |
1523 | // reordering before applying all the remaining font features to the entire |
1524 | // syllable. |
1525 | |
1526 | let mut try_pref = plan.mask_array[indic_feature::PREF] != 0; |
1527 | |
1528 | let mut base = start; |
1529 | while base < end { |
1530 | if buffer.info[base].indic_position() as u32 >= position::BASE_C as u32 { |
1531 | if try_pref && base + 1 < end { |
1532 | for i in base + 1..end { |
1533 | if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 { |
1534 | if !(buffer.info[i].is_substituted() |
1535 | && buffer.info[i].is_ligated_and_didnt_multiply()) |
1536 | { |
1537 | // Ok, this was a 'pref' candidate but didn't form any. |
1538 | // Base is around here... |
1539 | base = i; |
1540 | while base < end && buffer.info[base].is_halant() { |
1541 | base += 1; |
1542 | } |
1543 | |
1544 | buffer.info[base].set_indic_position(position::BASE_C); |
1545 | try_pref = false; |
1546 | } |
1547 | |
1548 | break; |
1549 | } |
1550 | } |
1551 | } |
1552 | |
1553 | // For Malayalam, skip over unformed below- (but NOT post-) forms. |
1554 | if buffer.script == Some(script::MALAYALAM) { |
1555 | let mut i = base + 1; |
1556 | while i < end { |
1557 | while i < end && buffer.info[i].is_joiner() { |
1558 | i += 1; |
1559 | } |
1560 | |
1561 | if i == end || !buffer.info[i].is_halant() { |
1562 | break; |
1563 | } |
1564 | |
1565 | i += 1; // Skip halant. |
1566 | |
1567 | while i < end && buffer.info[i].is_joiner() { |
1568 | i += 1; |
1569 | } |
1570 | |
1571 | if i < end |
1572 | && buffer.info[i].is_consonant() |
1573 | && buffer.info[i].indic_position() == position::BELOW_C |
1574 | { |
1575 | base = i; |
1576 | buffer.info[base].set_indic_position(position::BASE_C); |
1577 | } |
1578 | |
1579 | i += 1; |
1580 | } |
1581 | } |
1582 | |
1583 | if start < base && buffer.info[base].indic_position() as u32 > position::BASE_C as u32 { |
1584 | base -= 1; |
1585 | } |
1586 | |
1587 | break; |
1588 | } |
1589 | |
1590 | base += 1; |
1591 | } |
1592 | |
1593 | if base == end && start < base && buffer.info[base - 1].is_one_of(rb_flag(category::ZWJ as u32)) |
1594 | { |
1595 | base -= 1; |
1596 | } |
1597 | |
1598 | if base < end { |
1599 | while start < base |
1600 | && buffer.info[base] |
1601 | .is_one_of(rb_flag(category::N as u32) | rb_flag(category::H as u32)) |
1602 | { |
1603 | base -= 1; |
1604 | } |
1605 | } |
1606 | |
1607 | // - Reorder matras: |
1608 | // |
1609 | // If a pre-base matra character had been reordered before applying basic |
1610 | // features, the glyph can be moved closer to the main consonant based on |
1611 | // whether half-forms had been formed. Actual position for the matra is |
1612 | // defined as “after last standalone halant glyph, after initial matra |
1613 | // position and before the main consonant”. If ZWJ or ZWNJ follow this |
1614 | // halant, position is moved after it. |
1615 | // |
1616 | // IMPLEMENTATION NOTES: |
1617 | // |
1618 | // It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe |
1619 | // and Devanagari shows that the behavior is best described as: |
1620 | // |
1621 | // "If ZWJ follows this halant, matra is NOT repositioned after this halant. |
1622 | // If ZWNJ follows this halant, position is moved after it." |
1623 | // |
1624 | // Test case, with Adobe Devanagari or Nirmala UI: |
1625 | // |
1626 | // U+091F,U+094D,U+200C,U+092F,U+093F |
1627 | // (Matra moves to the middle, after ZWNJ.) |
1628 | // |
1629 | // U+091F,U+094D,U+200D,U+092F,U+093F |
1630 | // (Matra does NOT move, stays to the left.) |
1631 | // |
1632 | // https://github.com/harfbuzz/harfbuzz/issues/1070 |
1633 | |
1634 | // Otherwise there can't be any pre-base matra characters. |
1635 | if start + 1 < end && start < base { |
1636 | // If we lost track of base, alas, position before last thingy. |
1637 | let mut new_pos = if base == end { base - 2 } else { base - 1 }; |
1638 | |
1639 | // Malayalam / Tamil do not have "half" forms or explicit virama forms. |
1640 | // The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
1641 | // We want to position matra after them. |
1642 | if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) { |
1643 | loop { |
1644 | while new_pos > start |
1645 | && !buffer.info[new_pos] |
1646 | .is_one_of(rb_flag(category::M as u32) | rb_flag(category::H as u32)) |
1647 | { |
1648 | new_pos -= 1; |
1649 | } |
1650 | |
1651 | // If we found no Halant we are done. |
1652 | // Otherwise only proceed if the Halant does |
1653 | // not belong to the Matra itself! |
1654 | if buffer.info[new_pos].is_halant() |
1655 | && buffer.info[new_pos].indic_position() != position::PRE_M |
1656 | { |
1657 | if new_pos + 1 < end { |
1658 | // -> If ZWJ follows this halant, matra is NOT repositioned after this halant. |
1659 | if buffer.info[new_pos + 1].indic_category() == category::ZWJ { |
1660 | // Keep searching. |
1661 | if new_pos > start { |
1662 | new_pos -= 1; |
1663 | continue; |
1664 | } |
1665 | } |
1666 | |
1667 | // -> If ZWNJ follows this halant, position is moved after it. |
1668 | // |
1669 | // IMPLEMENTATION NOTES: |
1670 | // |
1671 | // This is taken care of by the state-machine. A Halant,ZWNJ is a terminating |
1672 | // sequence for a consonant syllable; any pre-base matras occurring after it |
1673 | // will belong to the subsequent syllable. |
1674 | } |
1675 | } else { |
1676 | new_pos = start; // No move. |
1677 | } |
1678 | |
1679 | break; |
1680 | } |
1681 | } |
1682 | |
1683 | if start < new_pos && buffer.info[new_pos].indic_position() != position::PRE_M { |
1684 | // Now go see if there's actually any matras... |
1685 | for i in (start + 1..=new_pos).rev() { |
1686 | if buffer.info[i - 1].indic_position() == position::PRE_M { |
1687 | let old_pos = i - 1; |
1688 | // Shouldn't actually happen. |
1689 | if old_pos < base && base <= new_pos { |
1690 | base -= 1; |
1691 | } |
1692 | |
1693 | let tmp = buffer.info[old_pos]; |
1694 | for i in 0..new_pos - old_pos { |
1695 | buffer.info[i + old_pos] = buffer.info[i + old_pos + 1]; |
1696 | } |
1697 | buffer.info[new_pos] = tmp; |
1698 | |
1699 | // Note: this merge_clusters() is intentionally *after* the reordering. |
1700 | // Indic matra reordering is special and tricky... |
1701 | buffer.merge_clusters(new_pos, cmp::min(end, base + 1)); |
1702 | |
1703 | new_pos -= 1; |
1704 | } |
1705 | } |
1706 | } else { |
1707 | for i in start..base { |
1708 | if buffer.info[i].indic_position() == position::PRE_M { |
1709 | buffer.merge_clusters(i, cmp::min(end, base + 1)); |
1710 | break; |
1711 | } |
1712 | } |
1713 | } |
1714 | } |
1715 | |
1716 | // - Reorder reph: |
1717 | // |
1718 | // Reph’s original position is always at the beginning of the syllable, |
1719 | // (i.e. it is not reordered at the character reordering stage). However, |
1720 | // it will be reordered according to the basic-forms shaping results. |
1721 | // Possible positions for reph, depending on the script, are; after main, |
1722 | // before post-base consonant forms, and after post-base consonant forms. |
1723 | |
1724 | // Two cases: |
1725 | // |
1726 | // - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then |
1727 | // we should only move it if the sequence ligated to the repha form. |
1728 | // |
1729 | // - If repha is encoded separately and in the logical position, we should only |
1730 | // move it if it did NOT ligate. If it ligated, it's probably the font trying |
1731 | // to make it work without the reordering. |
1732 | |
1733 | if start + 1 < end |
1734 | && buffer.info[start].indic_position() == position::RA_TO_BECOME_REPH |
1735 | && (buffer.info[start].indic_category() == category::REPHA) |
1736 | ^ buffer.info[start].is_ligated_and_didnt_multiply() |
1737 | { |
1738 | let mut new_reph_pos; |
1739 | loop { |
1740 | let reph_pos = plan.config.reph_pos; |
1741 | |
1742 | // 1. If reph should be positioned after post-base consonant forms, |
1743 | // proceed to step 5. |
1744 | if reph_pos != RephPosition::AfterPost { |
1745 | // 2. If the reph repositioning class is not after post-base: target |
1746 | // position is after the first explicit halant glyph between the |
1747 | // first post-reph consonant and last main consonant. If ZWJ or ZWNJ |
1748 | // are following this halant, position is moved after it. If such |
1749 | // position is found, this is the target position. Otherwise, |
1750 | // proceed to the next step. |
1751 | // |
1752 | // Note: in old-implementation fonts, where classifications were |
1753 | // fixed in shaping engine, there was no case where reph position |
1754 | // will be found on this step. |
1755 | { |
1756 | new_reph_pos = start + 1; |
1757 | while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() { |
1758 | new_reph_pos += 1; |
1759 | } |
1760 | |
1761 | if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() { |
1762 | // ->If ZWJ or ZWNJ are following this halant, position is moved after it. |
1763 | if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() { |
1764 | new_reph_pos += 1; |
1765 | } |
1766 | |
1767 | break; |
1768 | } |
1769 | } |
1770 | |
1771 | // 3. If reph should be repositioned after the main consonant: find the |
1772 | // first consonant not ligated with main, or find the first |
1773 | // consonant that is not a potential pre-base-reordering Ra. |
1774 | if reph_pos == RephPosition::AfterMain { |
1775 | new_reph_pos = base; |
1776 | while new_reph_pos + 1 < end |
1777 | && buffer.info[new_reph_pos + 1].indic_position() as u8 |
1778 | <= position::AFTER_MAIN as u8 |
1779 | { |
1780 | new_reph_pos += 1; |
1781 | } |
1782 | |
1783 | if new_reph_pos < end { |
1784 | break; |
1785 | } |
1786 | } |
1787 | |
1788 | // 4. If reph should be positioned before post-base consonant, find |
1789 | // first post-base classified consonant not ligated with main. If no |
1790 | // consonant is found, the target position should be before the |
1791 | // first matra, syllable modifier sign or vedic sign. |
1792 | // |
1793 | // This is our take on what step 4 is trying to say (and failing, BADLY). |
1794 | if reph_pos == RephPosition::AfterSub { |
1795 | new_reph_pos = base; |
1796 | while new_reph_pos + 1 < end |
1797 | && (rb_flag_unsafe(buffer.info[new_reph_pos + 1].indic_position() as u32) |
1798 | & (rb_flag(position::POST_C as u32) |
1799 | | rb_flag(position::AFTER_POST as u32) |
1800 | | rb_flag(position::SMVD as u32))) |
1801 | == 0 |
1802 | { |
1803 | new_reph_pos += 1; |
1804 | } |
1805 | |
1806 | if new_reph_pos < end { |
1807 | break; |
1808 | } |
1809 | } |
1810 | } |
1811 | |
1812 | // 5. If no consonant is found in steps 3 or 4, move reph to a position |
1813 | // immediately before the first post-base matra, syllable modifier |
1814 | // sign or vedic sign that has a reordering class after the intended |
1815 | // reph position. For example, if the reordering position for reph |
1816 | // is post-main, it will skip above-base matras that also have a |
1817 | // post-main position. |
1818 | // |
1819 | // Copied from step 2. |
1820 | new_reph_pos = start + 1; |
1821 | while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() { |
1822 | new_reph_pos += 1; |
1823 | } |
1824 | |
1825 | if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() { |
1826 | /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ |
1827 | if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() { |
1828 | new_reph_pos += 1; |
1829 | } |
1830 | |
1831 | break; |
1832 | } |
1833 | // See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654 |
1834 | |
1835 | // 6. Otherwise, reorder reph to the end of the syllable. |
1836 | { |
1837 | new_reph_pos = end - 1; |
1838 | while new_reph_pos > start |
1839 | && buffer.info[new_reph_pos].indic_position() == position::SMVD |
1840 | { |
1841 | new_reph_pos -= 1; |
1842 | } |
1843 | |
1844 | // If the Reph is to be ending up after a Matra,Halant sequence, |
1845 | // position it before that Halant so it can interact with the Matra. |
1846 | // However, if it's a plain Consonant,Halant we shouldn't do that. |
1847 | // Uniscribe doesn't do this. |
1848 | // TEST: U+0930,U+094D,U+0915,U+094B,U+094D |
1849 | if buffer.info[new_reph_pos].is_halant() { |
1850 | for info in &buffer.info[base + 1..new_reph_pos] { |
1851 | if info.indic_category() == category::M { |
1852 | // Ok, got it. |
1853 | new_reph_pos -= 1; |
1854 | } |
1855 | } |
1856 | } |
1857 | } |
1858 | |
1859 | break; |
1860 | } |
1861 | |
1862 | // Move |
1863 | buffer.merge_clusters(start, new_reph_pos + 1); |
1864 | |
1865 | let reph = buffer.info[start]; |
1866 | for i in 0..new_reph_pos - start { |
1867 | buffer.info[i + start] = buffer.info[i + start + 1]; |
1868 | } |
1869 | buffer.info[new_reph_pos] = reph; |
1870 | |
1871 | if start < base && base <= new_reph_pos { |
1872 | base -= 1; |
1873 | } |
1874 | } |
1875 | |
1876 | // - Reorder pre-base-reordering consonants: |
1877 | // |
1878 | // If a pre-base-reordering consonant is found, reorder it according to |
1879 | // the following rules: |
1880 | |
1881 | // Otherwise there can't be any pre-base-reordering Ra. |
1882 | if try_pref && base + 1 < end { |
1883 | for i in base + 1..end { |
1884 | if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 { |
1885 | // 1. Only reorder a glyph produced by substitution during application |
1886 | // of the <pref> feature. (Note that a font may shape a Ra consonant with |
1887 | // the feature generally but block it in certain contexts.) |
1888 | // |
1889 | // Note: We just check that something got substituted. We don't check that |
1890 | // the <pref> feature actually did it... |
1891 | // |
1892 | // Reorder pref only if it ligated. |
1893 | if buffer.info[i].is_ligated_and_didnt_multiply() { |
1894 | // 2. Try to find a target position the same way as for pre-base matra. |
1895 | // If it is found, reorder pre-base consonant glyph. |
1896 | // |
1897 | // 3. If position is not found, reorder immediately before main consonant. |
1898 | |
1899 | let mut new_pos = base; |
1900 | // Malayalam / Tamil do not have "half" forms or explicit virama forms. |
1901 | // The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
1902 | // We want to position matra after them. |
1903 | if buffer.script != Some(script::MALAYALAM) |
1904 | && buffer.script != Some(script::TAMIL) |
1905 | { |
1906 | while new_pos > start |
1907 | && !buffer.info[new_pos - 1].is_one_of( |
1908 | rb_flag(category::M as u32) | rb_flag(category::H as u32), |
1909 | ) |
1910 | { |
1911 | new_pos -= 1; |
1912 | } |
1913 | } |
1914 | |
1915 | if new_pos > start && buffer.info[new_pos - 1].is_halant() { |
1916 | // -> If ZWJ or ZWNJ follow this halant, position is moved after it. |
1917 | if new_pos < end && buffer.info[new_pos].is_joiner() { |
1918 | new_pos += 1; |
1919 | } |
1920 | } |
1921 | |
1922 | { |
1923 | let old_pos = i; |
1924 | |
1925 | buffer.merge_clusters(new_pos, old_pos + 1); |
1926 | let tmp = buffer.info[old_pos]; |
1927 | for i in (0..=old_pos - new_pos).rev() { |
1928 | buffer.info[i + new_pos + 1] = buffer.info[i + new_pos]; |
1929 | } |
1930 | buffer.info[new_pos] = tmp; |
1931 | |
1932 | if new_pos <= base && base < old_pos { |
1933 | // TODO: investigate |
1934 | #[allow (unused_assignments)] |
1935 | { |
1936 | base += 1; |
1937 | } |
1938 | } |
1939 | } |
1940 | } |
1941 | |
1942 | break; |
1943 | } |
1944 | } |
1945 | } |
1946 | |
1947 | // Apply 'init' to the Left Matra if it's a word start. |
1948 | if buffer.info[start].indic_position() == position::PRE_M { |
1949 | if start == 0 |
1950 | || (rb_flag_unsafe(buffer.info[start - 1].general_category().to_rb()) |
1951 | & rb_flag_range( |
1952 | hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT, |
1953 | hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, |
1954 | )) |
1955 | == 0 |
1956 | { |
1957 | buffer.info[start].mask |= plan.mask_array[indic_feature::INIT]; |
1958 | } else { |
1959 | buffer.unsafe_to_break(start - 1, start + 1); |
1960 | } |
1961 | } |
1962 | } |
1963 | |
1964 | pub fn get_category_and_position(u: u32) -> (Category, Position) { |
1965 | let (c1, c2) = super::indic_table::get_categories(u); |
1966 | let c2 = if c1 == SyllabicCategory::ConsonantMedial |
1967 | || c1 == SyllabicCategory::GeminationMark |
1968 | || c1 == SyllabicCategory::RegisterShifter |
1969 | || c1 == SyllabicCategory::ConsonantSucceedingRepha |
1970 | || c1 == SyllabicCategory::Virama |
1971 | || c1 == SyllabicCategory::VowelDependent |
1972 | || false |
1973 | { |
1974 | c2 |
1975 | } else { |
1976 | MatraCategory::NotApplicable |
1977 | }; |
1978 | |
1979 | let c1 = match c1 { |
1980 | SyllabicCategory::Other => category::X, |
1981 | SyllabicCategory::Avagraha => category::SYMBOL, |
1982 | SyllabicCategory::Bindu => category::SM, |
1983 | SyllabicCategory::BrahmiJoiningNumber => category::PLACEHOLDER, // Don't care. |
1984 | SyllabicCategory::CantillationMark => category::A, |
1985 | SyllabicCategory::Consonant => category::C, |
1986 | SyllabicCategory::ConsonantDead => category::C, |
1987 | SyllabicCategory::ConsonantFinal => category::CM, |
1988 | SyllabicCategory::ConsonantHeadLetter => category::C, |
1989 | SyllabicCategory::ConsonantInitialPostfixed => category::PLACEHOLDER, |
1990 | SyllabicCategory::ConsonantKiller => category::M, // U+17CD only. |
1991 | SyllabicCategory::ConsonantMedial => category::CM, |
1992 | SyllabicCategory::ConsonantPlaceholder => category::PLACEHOLDER, |
1993 | SyllabicCategory::ConsonantPrecedingRepha => category::REPHA, |
1994 | SyllabicCategory::ConsonantPrefixed => category::X, |
1995 | SyllabicCategory::ConsonantSubjoined => category::CM, |
1996 | SyllabicCategory::ConsonantSucceedingRepha => category::CM, |
1997 | SyllabicCategory::ConsonantWithStacker => category::CS, |
1998 | SyllabicCategory::GeminationMark => category::SM, // https://github.com/harfbuzz/harfbuzz/issues/552 |
1999 | SyllabicCategory::InvisibleStacker => category::COENG, |
2000 | SyllabicCategory::Joiner => category::ZWJ, |
2001 | SyllabicCategory::ModifyingLetter => category::X, |
2002 | SyllabicCategory::NonJoiner => category::ZWNJ, |
2003 | SyllabicCategory::Nukta => category::N, |
2004 | SyllabicCategory::Number => category::PLACEHOLDER, |
2005 | SyllabicCategory::NumberJoiner => category::PLACEHOLDER, // Don't care. |
2006 | SyllabicCategory::PureKiller => category::M, |
2007 | SyllabicCategory::RegisterShifter => category::RS, |
2008 | SyllabicCategory::SyllableModifier => category::SM, |
2009 | SyllabicCategory::ToneLetter => category::X, |
2010 | SyllabicCategory::ToneMark => category::N, |
2011 | SyllabicCategory::Virama => category::H, |
2012 | SyllabicCategory::Visarga => category::SM, |
2013 | SyllabicCategory::Vowel => category::V, |
2014 | SyllabicCategory::VowelDependent => category::M, |
2015 | SyllabicCategory::VowelIndependent => category::V, |
2016 | }; |
2017 | |
2018 | let c2 = match c2 { |
2019 | MatraCategory::NotApplicable => position::END, |
2020 | MatraCategory::Left => position::PRE_C, |
2021 | MatraCategory::Top => position::ABOVE_C, |
2022 | MatraCategory::Bottom => position::BELOW_C, |
2023 | MatraCategory::Right => position::POST_C, |
2024 | MatraCategory::BottomAndLeft => position::POST_C, |
2025 | MatraCategory::BottomAndRight => position::POST_C, |
2026 | MatraCategory::LeftAndRight => position::POST_C, |
2027 | MatraCategory::TopAndBottom => position::BELOW_C, |
2028 | MatraCategory::TopAndBottomAndRight => position::POST_C, |
2029 | MatraCategory::TopAndLeft => position::ABOVE_C, |
2030 | MatraCategory::TopAndLeftAndRight => position::POST_C, |
2031 | MatraCategory::TopAndRight => position::POST_C, |
2032 | MatraCategory::Overstruck => position::AFTER_MAIN, |
2033 | MatraCategory::VisualOrderLeft => position::PRE_M, |
2034 | }; |
2035 | |
2036 | (c1, c2) |
2037 | } |
2038 | |
2039 | #[rustfmt::skip] |
2040 | fn matra_position_indic(u: u32, side: u8) -> u8 { |
2041 | #[inline ] fn in_half_block(u: u32, base: u32) -> bool { u & !0x7F == base } |
2042 | #[inline ] fn is_deva(u: u32) -> bool { in_half_block(u, 0x0900) } |
2043 | #[inline ] fn is_beng(u: u32) -> bool { in_half_block(u, 0x0980) } |
2044 | #[inline ] fn is_guru(u: u32) -> bool { in_half_block(u, 0x0A00) } |
2045 | #[inline ] fn is_gujr(u: u32) -> bool { in_half_block(u, 0x0A80) } |
2046 | #[inline ] fn is_orya(u: u32) -> bool { in_half_block(u, 0x0B00) } |
2047 | #[inline ] fn is_taml(u: u32) -> bool { in_half_block(u, 0x0B80) } |
2048 | #[inline ] fn is_telu(u: u32) -> bool { in_half_block(u, 0x0C00) } |
2049 | #[inline ] fn is_knda(u: u32) -> bool { in_half_block(u, 0x0C80) } |
2050 | #[inline ] fn is_mlym(u: u32) -> bool { in_half_block(u, 0x0D00) } |
2051 | #[inline ] fn is_sinh(u: u32) -> bool { in_half_block(u, 0x0D80) } |
2052 | |
2053 | #[inline ] |
2054 | fn matra_pos_right(u: u32) -> Position { |
2055 | if is_deva(u) { |
2056 | position::AFTER_SUB |
2057 | } else if is_beng(u) { |
2058 | position::AFTER_POST |
2059 | } else if is_guru(u) { |
2060 | position::AFTER_POST |
2061 | } else if is_gujr(u) { |
2062 | position::AFTER_POST |
2063 | } else if is_orya(u) { |
2064 | position::AFTER_POST |
2065 | } else if is_taml(u) { |
2066 | position::AFTER_POST |
2067 | } else if is_telu(u) { |
2068 | if u <= 0x0C42 { |
2069 | position::BEFORE_SUB |
2070 | } else { |
2071 | position::AFTER_SUB |
2072 | } |
2073 | } else if is_knda(u) { |
2074 | if u < 0x0CC3 || u > 0xCD6 { |
2075 | position::BEFORE_SUB |
2076 | } else { |
2077 | position::AFTER_SUB |
2078 | } |
2079 | } else if is_mlym(u) { |
2080 | position::AFTER_POST |
2081 | } else if is_sinh(u) { |
2082 | position::AFTER_SUB |
2083 | } else { |
2084 | position::AFTER_SUB |
2085 | } |
2086 | } |
2087 | |
2088 | // BENG and MLYM don't have top matras. |
2089 | #[inline ] |
2090 | fn matra_pos_top(u: u32) -> Position { |
2091 | if is_deva(u) { |
2092 | position::AFTER_SUB |
2093 | } else if is_guru(u) { |
2094 | // Deviate from spec |
2095 | position::AFTER_POST |
2096 | } else if is_gujr(u) { |
2097 | position::AFTER_SUB |
2098 | } else if is_orya(u) { |
2099 | position::AFTER_MAIN |
2100 | } else if is_taml(u) { |
2101 | position::AFTER_SUB |
2102 | } else if is_telu(u) { |
2103 | position::BEFORE_SUB |
2104 | } else if is_knda(u) { |
2105 | position::BEFORE_SUB |
2106 | } else if is_sinh(u) { |
2107 | position::AFTER_SUB |
2108 | } else { |
2109 | position::AFTER_SUB |
2110 | } |
2111 | } |
2112 | |
2113 | #[inline ] |
2114 | fn matra_pos_bottom(u: u32) -> Position { |
2115 | if is_deva(u) { |
2116 | position::AFTER_SUB |
2117 | } else if is_beng(u) { |
2118 | position::AFTER_SUB |
2119 | } else if is_guru(u) { |
2120 | position::AFTER_POST |
2121 | } else if is_gujr(u) { |
2122 | position::AFTER_POST |
2123 | } else if is_orya(u) { |
2124 | position::AFTER_SUB |
2125 | } else if is_taml(u) { |
2126 | position::AFTER_POST |
2127 | } else if is_telu(u) { |
2128 | position::BEFORE_SUB |
2129 | } else if is_knda(u) { |
2130 | position::BEFORE_SUB |
2131 | } else if is_mlym(u) { |
2132 | position::AFTER_POST |
2133 | } else if is_sinh(u) { |
2134 | position::AFTER_SUB |
2135 | } else { |
2136 | position::AFTER_SUB |
2137 | } |
2138 | } |
2139 | |
2140 | match side { |
2141 | position::PRE_C => position::PRE_M, |
2142 | position::POST_C => matra_pos_right(u), |
2143 | position::ABOVE_C => matra_pos_top(u), |
2144 | position::BELOW_C => matra_pos_bottom(u), |
2145 | _ => side, |
2146 | } |
2147 | } |
2148 | |