1 | use alloc::boxed::Box; |
2 | use core::cmp; |
3 | use core::convert::TryFrom; |
4 | use core::ops::Range; |
5 | |
6 | use ttf_parser::GlyphId; |
7 | |
8 | use super::*; |
9 | use crate::buffer::Buffer; |
10 | use crate::normalize::ShapeNormalizationMode; |
11 | use crate::ot::{ |
12 | feature, FeatureFlags, LayoutTable, Map, TableIndex, WouldApply, WouldApplyContext, |
13 | }; |
14 | use crate::plan::{ShapePlan, ShapePlanner}; |
15 | use crate::unicode::{hb_gc, CharExt, GeneralCategoryExt}; |
16 | use crate::{script, Face, GlyphInfo, Mask, Script, Tag}; |
17 | |
18 | pub const INDIC_SHAPER: ComplexShaper = ComplexShaper { |
19 | collect_features: Some(collect_features), |
20 | override_features: Some(override_features), |
21 | create_data: Some(|plan: &ShapePlan| Box::new(IndicShapePlan::new(plan))), |
22 | preprocess_text: Some(preprocess_text), |
23 | postprocess_glyphs: None, |
24 | normalization_mode: Some(ShapeNormalizationMode::ComposedDiacriticsNoShortCircuit), |
25 | decompose: Some(decompose), |
26 | compose: Some(compose), |
27 | setup_masks: Some(setup_masks), |
28 | gpos_tag: None, |
29 | reorder_marks: None, |
30 | zero_width_marks: None, |
31 | fallback_position: false, |
32 | }; |
33 | |
34 | pub type Category = u8; |
35 | pub mod category { |
36 | pub const X: u8 = 0; |
37 | pub const C: u8 = 1; |
38 | pub const V: u8 = 2; |
39 | pub const N: u8 = 3; |
40 | pub const H: u8 = 4; |
41 | pub const ZWNJ: u8 = 5; |
42 | pub const ZWJ: u8 = 6; |
43 | pub const M: u8 = 7; |
44 | pub const SM: u8 = 8; |
45 | // OT_VD = 9, UNUSED; we use OT_A instead. |
46 | pub const A: u8 = 10; |
47 | pub const PLACEHOLDER: u8 = 11; |
48 | pub const DOTTED_CIRCLE: u8 = 12; |
49 | pub const RS: u8 = 13; // Register Shifter, used in Khmer OT spec. |
50 | pub const COENG: u8 = 14; // Khmer-style Virama. |
51 | pub const REPHA: u8 = 15; // Atomically-encoded logical or visual repha. |
52 | pub const RA: u8 = 16; |
53 | pub const CM: u8 = 17; // Consonant-Medial. |
54 | pub const SYMBOL: u8 = 18; // Avagraha, etc that take marks (SM,A,VD). |
55 | pub const CS: u8 = 19; |
56 | pub const ROBATIC: u8 = 20; |
57 | pub const X_GROUP: u8 = 21; |
58 | pub const Y_GROUP: u8 = 22; |
59 | pub const MW: u8 = 23; |
60 | pub const MY: u8 = 24; |
61 | pub const PT: u8 = 25; |
62 | // The following are used by Khmer & Myanmar shapers. Defined here for them to share. |
63 | pub const V_AVB: u8 = 26; |
64 | pub const V_BLW: u8 = 27; |
65 | pub const V_PRE: u8 = 28; |
66 | pub const V_PST: u8 = 29; |
67 | pub const VS: u8 = 30; // Variation selectors |
68 | pub const P: u8 = 31; // Punctuation |
69 | pub const D: u8 = 32; // Digits except zero |
70 | pub const ML: u8 = 33; // Medial la |
71 | } |
72 | |
73 | pub type Position = u8; |
74 | pub mod position { |
75 | pub const START: u8 = 0; |
76 | pub const RA_TO_BECOME_REPH: u8 = 1; |
77 | pub const PRE_M: u8 = 2; |
78 | pub const PRE_C: u8 = 3; |
79 | pub const BASE_C: u8 = 4; |
80 | pub const AFTER_MAIN: u8 = 5; |
81 | pub const ABOVE_C: u8 = 6; |
82 | pub const BEFORE_SUB: u8 = 7; |
83 | pub const BELOW_C: u8 = 8; |
84 | pub const AFTER_SUB: u8 = 9; |
85 | pub const BEFORE_POST: u8 = 10; |
86 | pub const POST_C: u8 = 11; |
87 | pub const AFTER_POST: u8 = 12; |
88 | pub const FINAL_C: u8 = 13; |
89 | pub const SMVD: u8 = 14; |
90 | pub const END: u8 = 15; |
91 | } |
92 | |
93 | #[allow (dead_code)] |
94 | #[derive (Clone, Copy, PartialEq)] |
95 | pub enum SyllabicCategory { |
96 | Other, |
97 | Avagraha, |
98 | Bindu, |
99 | BrahmiJoiningNumber, |
100 | CantillationMark, |
101 | Consonant, |
102 | ConsonantDead, |
103 | ConsonantFinal, |
104 | ConsonantHeadLetter, |
105 | ConsonantInitialPostfixed, |
106 | ConsonantKiller, |
107 | ConsonantMedial, |
108 | ConsonantPlaceholder, |
109 | ConsonantPrecedingRepha, |
110 | ConsonantPrefixed, |
111 | ConsonantSubjoined, |
112 | ConsonantSucceedingRepha, |
113 | ConsonantWithStacker, |
114 | GeminationMark, |
115 | InvisibleStacker, |
116 | Joiner, |
117 | ModifyingLetter, |
118 | NonJoiner, |
119 | Nukta, |
120 | Number, |
121 | NumberJoiner, |
122 | PureKiller, |
123 | RegisterShifter, |
124 | SyllableModifier, |
125 | ToneLetter, |
126 | ToneMark, |
127 | Virama, |
128 | Visarga, |
129 | Vowel, |
130 | VowelDependent, |
131 | VowelIndependent, |
132 | } |
133 | |
134 | #[allow (dead_code)] |
135 | #[derive (Clone, Copy)] |
136 | pub enum MatraCategory { |
137 | NotApplicable, |
138 | Left, |
139 | Top, |
140 | Bottom, |
141 | Right, |
142 | BottomAndLeft, |
143 | BottomAndRight, |
144 | LeftAndRight, |
145 | TopAndBottom, |
146 | TopAndBottomAndRight, |
147 | TopAndBottomAndLeft, |
148 | TopAndLeft, |
149 | TopAndLeftAndRight, |
150 | TopAndRight, |
151 | Overstruck, |
152 | VisualOrderLeft, |
153 | } |
154 | |
155 | const INDIC_FEATURES: &[(Tag, FeatureFlags)] = &[ |
156 | // Basic features. |
157 | // These features are applied in order, one at a time, after initial_reordering, |
158 | // constrained to the syllable. |
159 | (feature::NUKTA_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
160 | (feature::AKHANDS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
161 | (feature::REPH_FORMS, FeatureFlags::MANUAL_JOINERS), |
162 | (feature::RAKAR_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
163 | (feature::PRE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
164 | (feature::BELOW_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
165 | (feature::ABOVE_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
166 | (feature::HALF_FORMS, FeatureFlags::MANUAL_JOINERS), |
167 | (feature::POST_BASE_FORMS, FeatureFlags::MANUAL_JOINERS), |
168 | (feature::VATTU_VARIANTS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
169 | (feature::CONJUNCT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
170 | // Other features. |
171 | // These features are applied all at once, after final_reordering, constrained |
172 | // to the syllable. |
173 | // Default Bengali font in Windows for example has intermixed |
174 | // lookups for init,pres,abvs,blws features. |
175 | (feature::INITIAL_FORMS, FeatureFlags::MANUAL_JOINERS), |
176 | ( |
177 | feature::PRE_BASE_SUBSTITUTIONS, |
178 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
179 | ), |
180 | ( |
181 | feature::ABOVE_BASE_SUBSTITUTIONS, |
182 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
183 | ), |
184 | ( |
185 | feature::BELOW_BASE_SUBSTITUTIONS, |
186 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
187 | ), |
188 | ( |
189 | feature::POST_BASE_SUBSTITUTIONS, |
190 | FeatureFlags::GLOBAL_MANUAL_JOINERS, |
191 | ), |
192 | (feature::HALANT_FORMS, FeatureFlags::GLOBAL_MANUAL_JOINERS), |
193 | ]; |
194 | |
195 | // Must be in the same order as the INDIC_FEATURES array. |
196 | #[allow (dead_code)] |
197 | mod indic_feature { |
198 | pub const NUKT: usize = 0; |
199 | pub const AKHN: usize = 1; |
200 | pub const RPHF: usize = 2; |
201 | pub const RKRF: usize = 3; |
202 | pub const PREF: usize = 4; |
203 | pub const BLWF: usize = 5; |
204 | pub const ABVF: usize = 6; |
205 | pub const HALF: usize = 7; |
206 | pub const PSTF: usize = 8; |
207 | pub const VATU: usize = 9; |
208 | pub const CJCT: usize = 10; |
209 | pub const INIT: usize = 11; |
210 | pub const PRES: usize = 12; |
211 | pub const ABVS: usize = 13; |
212 | pub const BLWS: usize = 14; |
213 | pub const PSTS: usize = 15; |
214 | pub const HALN: usize = 16; |
215 | } |
216 | |
217 | const fn category_flag(c: Category) -> u32 { |
218 | rb_flag(c as u32) |
219 | } |
220 | |
221 | const MEDIAL_FLAGS: u32 = category_flag(category::CM); |
222 | // Note: |
223 | // |
224 | // We treat Vowels and placeholders as if they were consonants. This is safe because Vowels |
225 | // cannot happen in a consonant syllable. The plus side however is, we can call the |
226 | // consonant syllable logic from the vowel syllable function and get it all right! |
227 | const CONSONANT_FLAGS: u32 = category_flag(category::C) |
228 | | category_flag(category::CS) |
229 | | category_flag(category::RA) |
230 | | MEDIAL_FLAGS |
231 | | category_flag(category::V) |
232 | | category_flag(category::PLACEHOLDER) |
233 | | category_flag(category::DOTTED_CIRCLE); |
234 | const JOINER_FLAGS: u32 = category_flag(category::ZWJ) | category_flag(category::ZWNJ); |
235 | |
236 | // This is a hack for now. We should move this data into the main Indic table. |
237 | // Or completely remove it and just check in the tables. |
238 | const RA_CHARS: &[u32] = &[ |
239 | 0x0930, // Devanagari |
240 | 0x09B0, // Bengali |
241 | 0x09F0, // Bengali |
242 | 0x0A30, // Gurmukhi. No Reph |
243 | 0x0AB0, // Gujarati |
244 | 0x0B30, // Oriya |
245 | 0x0BB0, // Tamil. No Reph |
246 | 0x0C30, // Telugu. Reph formed only with ZWJ |
247 | 0x0CB0, // Kannada |
248 | 0x0D30, // Malayalam. No Reph, Logical Repha |
249 | 0x0DBB, // Sinhala. Reph formed only with ZWJ |
250 | ]; |
251 | |
252 | #[derive (Clone, Copy, PartialEq)] |
253 | enum BasePosition { |
254 | LastSinhala, |
255 | Last, |
256 | } |
257 | |
258 | #[derive (Clone, Copy, PartialEq)] |
259 | enum RephPosition { |
260 | AfterMain = position::AFTER_MAIN as isize, |
261 | BeforeSub = position::BEFORE_SUB as isize, |
262 | AfterSub = position::AFTER_SUB as isize, |
263 | BeforePost = position::BEFORE_POST as isize, |
264 | AfterPost = position::AFTER_POST as isize, |
265 | } |
266 | |
267 | #[derive (Clone, Copy, PartialEq)] |
268 | enum RephMode { |
269 | /// Reph formed out of initial Ra,H sequence. |
270 | Implicit, |
271 | /// Reph formed out of initial Ra,H,ZWJ sequence. |
272 | Explicit, |
273 | /// Encoded Repha character, needs reordering. |
274 | LogRepha, |
275 | } |
276 | |
277 | #[derive (Clone, Copy, PartialEq)] |
278 | enum BlwfMode { |
279 | /// Below-forms feature applied to pre-base and post-base. |
280 | PreAndPost, |
281 | /// Below-forms feature applied to post-base only. |
282 | PostOnly, |
283 | } |
284 | |
285 | #[derive (Clone, Copy)] |
286 | struct IndicConfig { |
287 | script: Option<Script>, |
288 | has_old_spec: bool, |
289 | virama: u32, |
290 | base_pos: BasePosition, |
291 | reph_pos: RephPosition, |
292 | reph_mode: RephMode, |
293 | blwf_mode: BlwfMode, |
294 | } |
295 | |
296 | impl IndicConfig { |
297 | const fn new( |
298 | script: Option<Script>, |
299 | has_old_spec: bool, |
300 | virama: u32, |
301 | base_pos: BasePosition, |
302 | reph_pos: RephPosition, |
303 | reph_mode: RephMode, |
304 | blwf_mode: BlwfMode, |
305 | ) -> Self { |
306 | IndicConfig { |
307 | script, |
308 | has_old_spec, |
309 | virama, |
310 | base_pos, |
311 | reph_pos, |
312 | reph_mode, |
313 | blwf_mode, |
314 | } |
315 | } |
316 | } |
317 | |
318 | const INDIC_CONFIGS: &[IndicConfig] = &[ |
319 | IndicConfig::new( |
320 | script:None, |
321 | has_old_spec:false, |
322 | virama:0, |
323 | base_pos:BasePosition::Last, |
324 | reph_pos:RephPosition::BeforePost, |
325 | RephMode::Implicit, |
326 | BlwfMode::PreAndPost, |
327 | ), |
328 | IndicConfig::new( |
329 | script:Some(script::DEVANAGARI), |
330 | has_old_spec:true, |
331 | virama:0x094D, |
332 | base_pos:BasePosition::Last, |
333 | reph_pos:RephPosition::BeforePost, |
334 | RephMode::Implicit, |
335 | BlwfMode::PreAndPost, |
336 | ), |
337 | IndicConfig::new( |
338 | script:Some(script::BENGALI), |
339 | has_old_spec:true, |
340 | virama:0x09CD, |
341 | base_pos:BasePosition::Last, |
342 | reph_pos:RephPosition::AfterSub, |
343 | RephMode::Implicit, |
344 | BlwfMode::PreAndPost, |
345 | ), |
346 | IndicConfig::new( |
347 | script:Some(script::GURMUKHI), |
348 | has_old_spec:true, |
349 | virama:0x0A4D, |
350 | base_pos:BasePosition::Last, |
351 | reph_pos:RephPosition::BeforeSub, |
352 | RephMode::Implicit, |
353 | BlwfMode::PreAndPost, |
354 | ), |
355 | IndicConfig::new( |
356 | script:Some(script::GUJARATI), |
357 | has_old_spec:true, |
358 | virama:0x0ACD, |
359 | base_pos:BasePosition::Last, |
360 | reph_pos:RephPosition::BeforePost, |
361 | RephMode::Implicit, |
362 | BlwfMode::PreAndPost, |
363 | ), |
364 | IndicConfig::new( |
365 | script:Some(script::ORIYA), |
366 | has_old_spec:true, |
367 | virama:0x0B4D, |
368 | base_pos:BasePosition::Last, |
369 | reph_pos:RephPosition::AfterMain, |
370 | RephMode::Implicit, |
371 | BlwfMode::PreAndPost, |
372 | ), |
373 | IndicConfig::new( |
374 | script:Some(script::TAMIL), |
375 | has_old_spec:true, |
376 | virama:0x0BCD, |
377 | base_pos:BasePosition::Last, |
378 | reph_pos:RephPosition::AfterPost, |
379 | RephMode::Implicit, |
380 | BlwfMode::PreAndPost, |
381 | ), |
382 | IndicConfig::new( |
383 | script:Some(script::TELUGU), |
384 | has_old_spec:true, |
385 | virama:0x0C4D, |
386 | base_pos:BasePosition::Last, |
387 | reph_pos:RephPosition::AfterPost, |
388 | RephMode::Explicit, |
389 | BlwfMode::PostOnly, |
390 | ), |
391 | IndicConfig::new( |
392 | script:Some(script::KANNADA), |
393 | has_old_spec:true, |
394 | virama:0x0CCD, |
395 | base_pos:BasePosition::Last, |
396 | reph_pos:RephPosition::AfterPost, |
397 | RephMode::Implicit, |
398 | BlwfMode::PostOnly, |
399 | ), |
400 | IndicConfig::new( |
401 | script:Some(script::MALAYALAM), |
402 | has_old_spec:true, |
403 | virama:0x0D4D, |
404 | base_pos:BasePosition::Last, |
405 | reph_pos:RephPosition::AfterMain, |
406 | RephMode::LogRepha, |
407 | BlwfMode::PreAndPost, |
408 | ), |
409 | IndicConfig::new( |
410 | script:Some(script::SINHALA), |
411 | has_old_spec:false, |
412 | virama:0x0DCA, |
413 | base_pos:BasePosition::LastSinhala, |
414 | reph_pos:RephPosition::AfterPost, |
415 | RephMode::Explicit, |
416 | BlwfMode::PreAndPost, |
417 | ), |
418 | ]; |
419 | |
420 | struct IndicWouldSubstituteFeature { |
421 | lookups: Range<usize>, |
422 | zero_context: bool, |
423 | } |
424 | |
425 | impl IndicWouldSubstituteFeature { |
426 | pub fn new(map: &Map, feature_tag: Tag, zero_context: bool) -> Self { |
427 | IndicWouldSubstituteFeature { |
428 | lookups: match map.feature_stage(TableIndex::GSUB, feature_tag) { |
429 | Some(stage) => map.stage_lookup_range(TableIndex::GSUB, stage), |
430 | None => 0..0, |
431 | }, |
432 | zero_context, |
433 | } |
434 | } |
435 | |
436 | pub fn would_substitute(&self, map: &Map, face: &Face, glyphs: &[GlyphId]) -> bool { |
437 | for index in self.lookups.clone() { |
438 | let lookup = map.lookup(TableIndex::GSUB, index); |
439 | let ctx = WouldApplyContext { |
440 | glyphs, |
441 | zero_context: self.zero_context, |
442 | }; |
443 | if face |
444 | .gsub |
445 | .as_ref() |
446 | .and_then(|table| table.get_lookup(lookup.index)) |
447 | .map_or(false, |lookup| lookup.would_apply(&ctx)) |
448 | { |
449 | return true; |
450 | } |
451 | } |
452 | |
453 | false |
454 | } |
455 | } |
456 | |
457 | struct IndicShapePlan { |
458 | config: IndicConfig, |
459 | is_old_spec: bool, |
460 | // virama_glyph: Option<u32>, |
461 | rphf: IndicWouldSubstituteFeature, |
462 | pref: IndicWouldSubstituteFeature, |
463 | blwf: IndicWouldSubstituteFeature, |
464 | pstf: IndicWouldSubstituteFeature, |
465 | vatu: IndicWouldSubstituteFeature, |
466 | mask_array: [Mask; INDIC_FEATURES.len()], |
467 | } |
468 | |
469 | impl IndicShapePlan { |
470 | fn new(plan: &ShapePlan) -> Self { |
471 | let script = plan.script; |
472 | let config = if let Some(c) = INDIC_CONFIGS.iter().skip(1).find(|c| c.script == script) { |
473 | *c |
474 | } else { |
475 | INDIC_CONFIGS[0] |
476 | }; |
477 | |
478 | let is_old_spec = config.has_old_spec |
479 | && plan |
480 | .ot_map |
481 | .chosen_script(TableIndex::GSUB) |
482 | .map_or(true, |tag| tag.to_bytes()[3] != b'2' ); |
483 | |
484 | // Use zero-context would_substitute() matching for new-spec of the main |
485 | // Indic scripts, and scripts with one spec only, but not for old-specs. |
486 | // The new-spec for all dual-spec scripts says zero-context matching happens. |
487 | // |
488 | // However, testing with Malayalam shows that old and new spec both allow |
489 | // context. Testing with Bengali new-spec however shows that it doesn't. |
490 | // So, the heuristic here is the way it is. It should *only* be changed, |
491 | // as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE. |
492 | let zero_context = is_old_spec && script != Some(script::MALAYALAM); |
493 | |
494 | let mut mask_array = [0; INDIC_FEATURES.len()]; |
495 | for (i, feature) in INDIC_FEATURES.iter().enumerate() { |
496 | mask_array[i] = if feature.1.contains(FeatureFlags::GLOBAL) { |
497 | 0 |
498 | } else { |
499 | plan.ot_map.one_mask(feature.0) |
500 | } |
501 | } |
502 | |
503 | // TODO: what is this? |
504 | // let mut virama_glyph = None; |
505 | // if config.virama != 0 { |
506 | // if let Some(g) = face.glyph_index(char::try_from(config.virama).unwrap()) { |
507 | // virama_glyph = Some(g.0 as u32); |
508 | // } |
509 | // } |
510 | |
511 | IndicShapePlan { |
512 | config, |
513 | is_old_spec, |
514 | // virama_glyph, |
515 | rphf: IndicWouldSubstituteFeature::new(&plan.ot_map, feature::REPH_FORMS, zero_context), |
516 | pref: IndicWouldSubstituteFeature::new( |
517 | &plan.ot_map, |
518 | feature::PRE_BASE_FORMS, |
519 | zero_context, |
520 | ), |
521 | blwf: IndicWouldSubstituteFeature::new( |
522 | &plan.ot_map, |
523 | feature::BELOW_BASE_FORMS, |
524 | zero_context, |
525 | ), |
526 | pstf: IndicWouldSubstituteFeature::new( |
527 | &plan.ot_map, |
528 | feature::POST_BASE_FORMS, |
529 | zero_context, |
530 | ), |
531 | vatu: IndicWouldSubstituteFeature::new( |
532 | &plan.ot_map, |
533 | feature::VATTU_VARIANTS, |
534 | zero_context, |
535 | ), |
536 | mask_array, |
537 | } |
538 | } |
539 | } |
540 | |
541 | impl GlyphInfo { |
542 | pub(crate) fn indic_category(&self) -> Category { |
543 | self.complex_var_u8_category() |
544 | } |
545 | |
546 | pub(crate) fn set_indic_category(&mut self, c: Category) { |
547 | self.set_complex_var_u8_category(c) |
548 | } |
549 | |
550 | pub(crate) fn indic_position(&self) -> Position { |
551 | self.complex_var_u8_auxiliary() |
552 | } |
553 | |
554 | pub(crate) fn set_indic_position(&mut self, c: Position) { |
555 | self.set_complex_var_u8_auxiliary(c) |
556 | } |
557 | |
558 | fn is_one_of(&self, flags: u32) -> bool { |
559 | // If it ligated, all bets are off. |
560 | if self.is_ligated() { |
561 | return false; |
562 | } |
563 | |
564 | rb_flag_unsafe(self.indic_category() as u32) & flags != 0 |
565 | } |
566 | |
567 | fn is_joiner(&self) -> bool { |
568 | self.is_one_of(JOINER_FLAGS) |
569 | } |
570 | |
571 | pub(crate) fn is_consonant(&self) -> bool { |
572 | self.is_one_of(CONSONANT_FLAGS) |
573 | } |
574 | |
575 | fn is_halant(&self) -> bool { |
576 | self.is_one_of(rb_flag(category::H as u32)) |
577 | } |
578 | |
579 | fn set_indic_properties(&mut self) { |
580 | let u = self.glyph_id; |
581 | let (mut cat, mut pos) = get_category_and_position(u); |
582 | |
583 | // Re-assign category |
584 | |
585 | // The following act more like the Bindus. |
586 | match u { |
587 | 0x0953..=0x0954 => cat = category::SM, |
588 | // The following act like consonants. |
589 | 0x0A72..=0x0A73 | 0x1CF5..=0x1CF6 => cat = category::C, |
590 | // TODO: The following should only be allowed after a Visarga. |
591 | // For now, just treat them like regular tone marks. |
592 | 0x1CE2..=0x1CE8 => cat = category::A, |
593 | // TODO: The following should only be allowed after some of |
594 | // the nasalization marks, maybe only for U+1CE9..U+1CF1. |
595 | // For now, just treat them like tone marks. |
596 | 0x1CED => cat = category::A, |
597 | // The following take marks in standalone clusters, similar to Avagraha. |
598 | 0xA8F2..=0xA8F7 | 0x1CE9..=0x1CEC | 0x1CEE..=0x1CF1 => cat = category::SYMBOL, |
599 | // https://github.com/harfbuzz/harfbuzz/issues/524 |
600 | 0x0A51 => { |
601 | cat = category::M; |
602 | pos = position::BELOW_C; |
603 | } |
604 | // According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, |
605 | // so the Indic shaper needs to know their categories. |
606 | 0x11301 | 0x11303 => cat = category::SM, |
607 | 0x1133B | 0x1133C => cat = category::N, |
608 | // https://github.com/harfbuzz/harfbuzz/issues/552 |
609 | 0x0AFB => cat = category::N, |
610 | // https://github.com/harfbuzz/harfbuzz/issues/2849 |
611 | 0x0B55 => cat = category::N, |
612 | // https://github.com/harfbuzz/harfbuzz/issues/538 |
613 | 0x0980 => cat = category::PLACEHOLDER, |
614 | // https://github.com/harfbuzz/harfbuzz/issues/1613 |
615 | 0x09FC => cat = category::PLACEHOLDER, |
616 | // https://github.com/harfbuzz/harfbuzz/issues/623 |
617 | 0x0C80 => cat = category::PLACEHOLDER, |
618 | 0x2010 | 0x2011 => cat = category::PLACEHOLDER, |
619 | 0x25CC => cat = category::DOTTED_CIRCLE, |
620 | _ => {} |
621 | } |
622 | |
623 | // Re-assign position. |
624 | |
625 | if (rb_flag_unsafe(cat as u32) & CONSONANT_FLAGS) != 0 { |
626 | pos = position::BASE_C; |
627 | if RA_CHARS.contains(&u) { |
628 | cat = category::RA; |
629 | } |
630 | } else if cat == category::M { |
631 | pos = matra_position_indic(u, pos); |
632 | } else if (rb_flag_unsafe(cat as u32) |
633 | & (category_flag(category::SM) |
634 | | category_flag(category::A) |
635 | | category_flag(category::SYMBOL))) |
636 | != 0 |
637 | { |
638 | pos = position::SMVD; |
639 | } |
640 | |
641 | // Oriya Bindu is BeforeSub in the spec. |
642 | if u == 0x0B01 { |
643 | pos = position::BEFORE_SUB; |
644 | } |
645 | |
646 | self.set_indic_category(cat); |
647 | self.set_indic_position(pos); |
648 | } |
649 | } |
650 | |
651 | fn collect_features(planner: &mut ShapePlanner) { |
652 | // Do this before any lookups have been applied. |
653 | planner.ot_map.add_gsub_pause(Some(setup_syllables)); |
654 | |
655 | planner |
656 | .ot_map |
657 | .enable_feature(feature::LOCALIZED_FORMS, FeatureFlags::empty(), 1); |
658 | // The Indic specs do not require ccmp, but we apply it here since if |
659 | // there is a use of it, it's typically at the beginning. |
660 | planner.ot_map.enable_feature( |
661 | feature::GLYPH_COMPOSITION_DECOMPOSITION, |
662 | FeatureFlags::empty(), |
663 | 1, |
664 | ); |
665 | |
666 | planner.ot_map.add_gsub_pause(Some(initial_reordering)); |
667 | |
668 | for feature in INDIC_FEATURES.iter().take(10) { |
669 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
670 | planner.ot_map.add_gsub_pause(None); |
671 | } |
672 | |
673 | planner.ot_map.add_gsub_pause(Some(final_reordering)); |
674 | planner |
675 | .ot_map |
676 | .add_gsub_pause(Some(crate::ot::clear_syllables)); |
677 | |
678 | for feature in INDIC_FEATURES.iter().skip(10) { |
679 | planner.ot_map.add_feature(feature.0, feature.1, 1); |
680 | } |
681 | } |
682 | |
683 | fn override_features(planner: &mut ShapePlanner) { |
684 | planner.ot_map.disable_feature(tag:feature::STANDARD_LIGATURES); |
685 | } |
686 | |
687 | fn preprocess_text(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
688 | super::vowel_constraints::preprocess_text_vowel_constraints(buffer); |
689 | } |
690 | |
691 | fn decompose(ctx: &ShapeNormalizeContext, ab: char) -> Option<(char, char)> { |
692 | // Don't decompose these. |
693 | match ab { |
694 | ' \u{0931}' | // DEVANAGARI LETTER RRA |
695 | // https://github.com/harfbuzz/harfbuzz/issues/779 |
696 | ' \u{09DC}' | // BENGALI LETTER RRA |
697 | ' \u{09DD}' | // BENGALI LETTER RHA |
698 | ' \u{0B94}' => return None, // TAMIL LETTER AU |
699 | _ => {} |
700 | } |
701 | |
702 | if ab == ' \u{0DDA}' || (' \u{0DDC}' ..=' \u{0DDE}' ).contains(&ab) { |
703 | // Sinhala split matras... Let the fun begin. |
704 | // |
705 | // These four characters have Unicode decompositions. However, Uniscribe |
706 | // decomposes them "Khmer-style", that is, it uses the character itself to |
707 | // get the second half. The first half of all four decompositions is always |
708 | // U+0DD9. |
709 | // |
710 | // Now, there are buggy fonts, namely, the widely used lklug.ttf, that are |
711 | // broken with Uniscribe. But we need to support them. As such, we only |
712 | // do the Uniscribe-style decomposition if the character is transformed into |
713 | // its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to |
714 | // Unicode decomposition. |
715 | // |
716 | // Note that we can't unconditionally use Unicode decomposition. That would |
717 | // break some other fonts, that are designed to work with Uniscribe, and |
718 | // don't have positioning features for the Unicode-style decomposition. |
719 | // |
720 | // Argh... |
721 | // |
722 | // The Uniscribe behavior is now documented in the newly published Sinhala |
723 | // spec in 2012: |
724 | // |
725 | // https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping |
726 | |
727 | let mut ok = false; |
728 | if let Some(g) = ctx.face.glyph_index(u32::from(ab)) { |
729 | let indic_plan = ctx.plan.data::<IndicShapePlan>(); |
730 | ok = indic_plan |
731 | .pstf |
732 | .would_substitute(&ctx.plan.ot_map, ctx.face, &[g]); |
733 | } |
734 | |
735 | if ok { |
736 | // Ok, safe to use Uniscribe-style decomposition. |
737 | return Some((' \u{0DD9}' , ab)); |
738 | } |
739 | } |
740 | |
741 | crate::unicode::decompose(ab) |
742 | } |
743 | |
744 | fn compose(_: &ShapeNormalizeContext, a: char, b: char) -> Option<char> { |
745 | // Avoid recomposing split matras. |
746 | if a.general_category().is_mark() { |
747 | return None; |
748 | } |
749 | |
750 | // Composition-exclusion exceptions that we want to recompose. |
751 | if a == ' \u{09AF}' && b == ' \u{09BC}' { |
752 | return Some(' \u{09DF}' ); |
753 | } |
754 | |
755 | crate::unicode::compose(a, b) |
756 | } |
757 | |
758 | fn setup_masks(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
759 | // We cannot setup masks here. We save information about characters |
760 | // and setup masks later on in a pause-callback. |
761 | for info: &mut GlyphInfo in buffer.info_slice_mut() { |
762 | info.set_indic_properties(); |
763 | } |
764 | } |
765 | |
766 | fn setup_syllables(_: &ShapePlan, _: &Face, buffer: &mut Buffer) { |
767 | super::indic_machine::find_syllables_indic(buffer); |
768 | |
769 | let mut start: usize = 0; |
770 | let mut end: usize = buffer.next_syllable(start:0); |
771 | while start < buffer.len { |
772 | buffer.unsafe_to_break(start:Some(start), end:Some(end)); |
773 | start = end; |
774 | end = buffer.next_syllable(start); |
775 | } |
776 | } |
777 | |
778 | fn initial_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
779 | use super::indic_machine::SyllableType; |
780 | |
781 | let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>(); |
782 | |
783 | update_consonant_positions(plan, indic_plan, face, buffer); |
784 | syllabic::insert_dotted_circles( |
785 | face, |
786 | buffer, |
787 | broken_syllable_type:SyllableType::BrokenCluster as u8, |
788 | dottedcircle_category:category::DOTTED_CIRCLE, |
789 | repha_category:Some(category::REPHA), |
790 | dottedcircle_position:Some(position::END), |
791 | ); |
792 | |
793 | let mut start: usize = 0; |
794 | let mut end: usize = buffer.next_syllable(start:0); |
795 | while start < buffer.len { |
796 | initial_reordering_syllable(plan, indic_plan, face, start, end, buffer); |
797 | start = end; |
798 | end = buffer.next_syllable(start); |
799 | } |
800 | } |
801 | |
802 | fn update_consonant_positions( |
803 | plan: &ShapePlan, |
804 | indic_plan: &IndicShapePlan, |
805 | face: &Face, |
806 | buffer: &mut Buffer, |
807 | ) { |
808 | if indic_plan.config.base_pos != BasePosition::Last { |
809 | return; |
810 | } |
811 | |
812 | let mut virama_glyph: Option = None; |
813 | if indic_plan.config.virama != 0 { |
814 | virama_glyph = face.glyph_index(indic_plan.config.virama); |
815 | } |
816 | |
817 | if let Some(virama: GlyphId) = virama_glyph { |
818 | for info: &mut GlyphInfo in buffer.info_slice_mut() { |
819 | if info.indic_position() == position::BASE_C { |
820 | let consonant: GlyphId = info.as_glyph(); |
821 | info.set_indic_position(consonant_position_from_face( |
822 | plan, indic_plan, face, consonant, virama, |
823 | )); |
824 | } |
825 | } |
826 | } |
827 | } |
828 | |
829 | fn consonant_position_from_face( |
830 | plan: &ShapePlan, |
831 | indic_plan: &IndicShapePlan, |
832 | face: &Face, |
833 | consonant: GlyphId, |
834 | virama: GlyphId, |
835 | ) -> u8 { |
836 | // For old-spec, the order of glyphs is Consonant,Virama, |
837 | // whereas for new-spec, it's Virama,Consonant. However, |
838 | // some broken fonts (like Free Sans) simply copied lookups |
839 | // from old-spec to new-spec without modification. |
840 | // And oddly enough, Uniscribe seems to respect those lookups. |
841 | // Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds |
842 | // base at 0. The font however, only has lookups matching |
843 | // 930,94D in 'blwf', not the expected 94D,930 (with new-spec |
844 | // table). As such, we simply match both sequences. Seems |
845 | // to work. |
846 | // |
847 | // Vatu is done as well, for: |
848 | // https://github.com/harfbuzz/harfbuzz/issues/1587 |
849 | |
850 | if indic_plan |
851 | .blwf |
852 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
853 | || indic_plan |
854 | .blwf |
855 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
856 | || indic_plan |
857 | .vatu |
858 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
859 | || indic_plan |
860 | .vatu |
861 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
862 | { |
863 | return position::BELOW_C; |
864 | } |
865 | |
866 | if indic_plan |
867 | .pstf |
868 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
869 | || indic_plan |
870 | .pstf |
871 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
872 | { |
873 | return position::POST_C; |
874 | } |
875 | |
876 | if indic_plan |
877 | .pref |
878 | .would_substitute(&plan.ot_map, face, &[virama, consonant]) |
879 | || indic_plan |
880 | .pref |
881 | .would_substitute(&plan.ot_map, face, &[consonant, virama]) |
882 | { |
883 | return position::POST_C; |
884 | } |
885 | |
886 | position::BASE_C |
887 | } |
888 | |
889 | fn initial_reordering_syllable( |
890 | plan: &ShapePlan, |
891 | indic_plan: &IndicShapePlan, |
892 | face: &Face, |
893 | start: usize, |
894 | end: usize, |
895 | buffer: &mut Buffer, |
896 | ) { |
897 | use super::indic_machine::SyllableType; |
898 | |
899 | let syllable_type = match buffer.info[start].syllable() & 0x0F { |
900 | 0 => SyllableType::ConsonantSyllable, |
901 | 1 => SyllableType::VowelSyllable, |
902 | 2 => SyllableType::StandaloneCluster, |
903 | 3 => SyllableType::SymbolCluster, |
904 | 4 => SyllableType::BrokenCluster, |
905 | 5 => SyllableType::NonIndicCluster, |
906 | _ => unreachable!(), |
907 | }; |
908 | |
909 | match syllable_type { |
910 | // We made the vowels look like consonants. So let's call the consonant logic! |
911 | SyllableType::VowelSyllable | SyllableType::ConsonantSyllable => { |
912 | initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer); |
913 | } |
914 | // We already inserted dotted-circles, so just call the standalone_cluster. |
915 | SyllableType::BrokenCluster | SyllableType::StandaloneCluster => { |
916 | initial_reordering_standalone_cluster(plan, indic_plan, face, start, end, buffer); |
917 | } |
918 | SyllableType::SymbolCluster | SyllableType::NonIndicCluster => {} |
919 | } |
920 | } |
921 | |
922 | // Rules from: |
923 | // https://docs.microsqoft.com/en-us/typography/script-development/devanagari */ |
924 | fn initial_reordering_consonant_syllable( |
925 | plan: &ShapePlan, |
926 | indic_plan: &IndicShapePlan, |
927 | face: &Face, |
928 | start: usize, |
929 | end: usize, |
930 | buffer: &mut Buffer, |
931 | ) { |
932 | // https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167 |
933 | // For compatibility with legacy usage in Kannada, |
934 | // Ra+h+ZWJ must behave like Ra+ZWJ+h... |
935 | if buffer.script == Some(script::KANNADA) |
936 | && start + 3 <= end |
937 | && buffer.info[start].is_one_of(category_flag(category::RA)) |
938 | && buffer.info[start + 1].is_one_of(category_flag(category::H)) |
939 | && buffer.info[start + 2].is_one_of(category_flag(category::ZWJ)) |
940 | { |
941 | buffer.merge_clusters(start + 1, start + 3); |
942 | buffer.info.swap(start + 1, start + 2); |
943 | } |
944 | |
945 | // 1. Find base consonant: |
946 | // |
947 | // The shaping engine finds the base consonant of the syllable, using the |
948 | // following algorithm: starting from the end of the syllable, move backwards |
949 | // until a consonant is found that does not have a below-base or post-base |
950 | // form (post-base forms have to follow below-base forms), or that is not a |
951 | // pre-base-reordering Ra, or arrive at the first consonant. The consonant |
952 | // stopped at will be the base. |
953 | // |
954 | // - If the syllable starts with Ra + Halant (in a script that has Reph) |
955 | // and has more than one consonant, Ra is excluded from candidates for |
956 | // base consonants. |
957 | |
958 | let mut base = end; |
959 | let mut has_reph = false; |
960 | |
961 | { |
962 | // -> If the syllable starts with Ra + Halant (in a script that has Reph) |
963 | // and has more than one consonant, Ra is excluded from candidates for |
964 | // base consonants. |
965 | let mut limit = start; |
966 | if indic_plan.mask_array[indic_feature::RPHF] != 0 |
967 | && start + 3 <= end |
968 | && ((indic_plan.config.reph_mode == RephMode::Implicit |
969 | && !buffer.info[start + 2].is_joiner()) |
970 | || (indic_plan.config.reph_mode == RephMode::Explicit |
971 | && buffer.info[start + 2].indic_category() == category::ZWJ)) |
972 | { |
973 | // See if it matches the 'rphf' feature. |
974 | let glyphs = &[ |
975 | buffer.info[start].as_glyph(), |
976 | buffer.info[start + 1].as_glyph(), |
977 | if indic_plan.config.reph_mode == RephMode::Explicit { |
978 | buffer.info[start + 2].as_glyph() |
979 | } else { |
980 | GlyphId(0) |
981 | }, |
982 | ]; |
983 | if indic_plan |
984 | .rphf |
985 | .would_substitute(&plan.ot_map, face, &glyphs[0..2]) |
986 | || (indic_plan.config.reph_mode == RephMode::Explicit |
987 | && indic_plan.rphf.would_substitute(&plan.ot_map, face, glyphs)) |
988 | { |
989 | limit += 2; |
990 | while limit < end && buffer.info[limit].is_joiner() { |
991 | limit += 1; |
992 | } |
993 | base = start; |
994 | has_reph = true; |
995 | } |
996 | } else if indic_plan.config.reph_mode == RephMode::LogRepha |
997 | && buffer.info[start].indic_category() == category::REPHA |
998 | { |
999 | limit += 1; |
1000 | while limit < end && buffer.info[limit].is_joiner() { |
1001 | limit += 1; |
1002 | } |
1003 | base = start; |
1004 | has_reph = true; |
1005 | } |
1006 | |
1007 | match indic_plan.config.base_pos { |
1008 | BasePosition::Last => { |
1009 | // -> starting from the end of the syllable, move backwards |
1010 | let mut i = end; |
1011 | let mut seen_below = false; |
1012 | loop { |
1013 | i -= 1; |
1014 | // -> until a consonant is found |
1015 | if buffer.info[i].is_consonant() { |
1016 | // -> that does not have a below-base or post-base form |
1017 | // (post-base forms have to follow below-base forms), |
1018 | if buffer.info[i].indic_position() != position::BELOW_C |
1019 | && (buffer.info[i].indic_position() != position::POST_C || seen_below) |
1020 | { |
1021 | base = i; |
1022 | break; |
1023 | } |
1024 | if buffer.info[i].indic_position() == position::BELOW_C { |
1025 | seen_below = true; |
1026 | } |
1027 | |
1028 | // -> or that is not a pre-base-reordering Ra, |
1029 | // |
1030 | // IMPLEMENTATION NOTES: |
1031 | // |
1032 | // Our pre-base-reordering Ra's are marked position::PostC, so will be skipped |
1033 | // by the logic above already. |
1034 | |
1035 | // -> or arrive at the first consonant. The consonant stopped at will |
1036 | // be the base. |
1037 | base = i; |
1038 | } else { |
1039 | // A ZWJ after a Halant stops the base search, and requests an explicit |
1040 | // half form. |
1041 | // A ZWJ before a Halant, requests a subjoined form instead, and hence |
1042 | // search continues. This is particularly important for Bengali |
1043 | // sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. |
1044 | if start < i |
1045 | && buffer.info[i].indic_category() == category::ZWJ |
1046 | && buffer.info[i - 1].indic_category() == category::H |
1047 | { |
1048 | break; |
1049 | } |
1050 | } |
1051 | |
1052 | if i <= limit { |
1053 | break; |
1054 | } |
1055 | } |
1056 | } |
1057 | BasePosition::LastSinhala => { |
1058 | // Sinhala base positioning is slightly different from main Indic, in that: |
1059 | // 1. Its ZWJ behavior is different, |
1060 | // 2. We don't need to look into the font for consonant positions. |
1061 | |
1062 | if !has_reph { |
1063 | base = limit; |
1064 | } |
1065 | |
1066 | // Find the last base consonant that is not blocked by ZWJ. If there is |
1067 | // a ZWJ right before a base consonant, that would request a subjoined form. |
1068 | for i in limit..end { |
1069 | if buffer.info[i].is_consonant() { |
1070 | if limit < i && buffer.info[i - 1].indic_category() == category::ZWJ { |
1071 | break; |
1072 | } else { |
1073 | base = i; |
1074 | } |
1075 | } |
1076 | } |
1077 | |
1078 | // Mark all subsequent consonants as below. |
1079 | for i in base + 1..end { |
1080 | if buffer.info[i].is_consonant() { |
1081 | buffer.info[i].set_indic_position(position::BELOW_C); |
1082 | } |
1083 | } |
1084 | } |
1085 | } |
1086 | |
1087 | // -> If the syllable starts with Ra + Halant (in a script that has Reph) |
1088 | // and has more than one consonant, Ra is excluded from candidates for |
1089 | // base consonants. |
1090 | // |
1091 | // Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. |
1092 | if has_reph && base == start && limit - base <= 2 { |
1093 | // Have no other consonant, so Reph is not formed and Ra becomes base. |
1094 | has_reph = false; |
1095 | } |
1096 | } |
1097 | |
1098 | // 2. Decompose and reorder Matras: |
1099 | // |
1100 | // Each matra and any syllable modifier sign in the syllable are moved to the |
1101 | // appropriate position relative to the consonant(s) in the syllable. The |
1102 | // shaping engine decomposes two- or three-part matras into their constituent |
1103 | // parts before any repositioning. Matra characters are classified by which |
1104 | // consonant in a conjunct they have affinity for and are reordered to the |
1105 | // following positions: |
1106 | // |
1107 | // - Before first half form in the syllable |
1108 | // - After subjoined consonants |
1109 | // - After post-form consonant |
1110 | // - After main consonant (for above marks) |
1111 | // |
1112 | // IMPLEMENTATION NOTES: |
1113 | // |
1114 | // The normalize() routine has already decomposed matras for us, so we don't |
1115 | // need to worry about that. |
1116 | |
1117 | // 3. Reorder marks to canonical order: |
1118 | // |
1119 | // Adjacent nukta and halant or nukta and vedic sign are always repositioned |
1120 | // if necessary, so that the nukta is first. |
1121 | // |
1122 | // IMPLEMENTATION NOTES: |
1123 | // |
1124 | // We don't need to do this: the normalize() routine already did this for us. |
1125 | |
1126 | // Reorder characters |
1127 | |
1128 | for i in start..base { |
1129 | let pos = buffer.info[i].indic_position(); |
1130 | buffer.info[i].set_indic_position(cmp::min(position::PRE_C, pos)); |
1131 | } |
1132 | |
1133 | if base < end { |
1134 | buffer.info[base].set_indic_position(position::BASE_C); |
1135 | } |
1136 | |
1137 | // Mark final consonants. A final consonant is one appearing after a matra. |
1138 | // Happens in Sinhala. |
1139 | for i in base + 1..end { |
1140 | if buffer.info[i].indic_category() == category::M { |
1141 | for j in i + 1..end { |
1142 | if buffer.info[j].is_consonant() { |
1143 | buffer.info[j].set_indic_position(position::FINAL_C); |
1144 | break; |
1145 | } |
1146 | } |
1147 | |
1148 | break; |
1149 | } |
1150 | } |
1151 | |
1152 | // Handle beginning Ra |
1153 | if has_reph { |
1154 | buffer.info[start].set_indic_position(position::RA_TO_BECOME_REPH); |
1155 | } |
1156 | |
1157 | // For old-style Indic script tags, move the first post-base Halant after |
1158 | // last consonant. |
1159 | // |
1160 | // Reports suggest that in some scripts Uniscribe does this only if there |
1161 | // is *not* a Halant after last consonant already. We know that is the |
1162 | // case for Kannada, while it reorders unconditionally in other scripts, |
1163 | // eg. Malayalam, Bengali, and Devanagari. We don't currently know about |
1164 | // other scripts, so we block Kannada. |
1165 | // |
1166 | // Kannada test case: |
1167 | // U+0C9A,U+0CCD,U+0C9A,U+0CCD |
1168 | // With some versions of Lohit Kannada. |
1169 | // https://bugs.freedesktop.org/show_bug.cgi?id=59118 |
1170 | // |
1171 | // Malayalam test case: |
1172 | // U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D |
1173 | // With lohit-ttf-20121122/Lohit-Malayalam.ttf |
1174 | // |
1175 | // Bengali test case: |
1176 | // U+0998,U+09CD,U+09AF,U+09CD |
1177 | // With Windows XP vrinda.ttf |
1178 | // https://github.com/harfbuzz/harfbuzz/issues/1073 |
1179 | // |
1180 | // Devanagari test case: |
1181 | // U+091F,U+094D,U+0930,U+094D |
1182 | // With chandas.ttf |
1183 | // https://github.com/harfbuzz/harfbuzz/issues/1071 |
1184 | if indic_plan.is_old_spec { |
1185 | let disallow_double_halants = buffer.script == Some(script::KANNADA); |
1186 | for i in base + 1..end { |
1187 | if buffer.info[i].indic_category() == category::H { |
1188 | let mut j = end - 1; |
1189 | while j > i { |
1190 | if buffer.info[j].is_consonant() |
1191 | || (disallow_double_halants |
1192 | && buffer.info[j].indic_category() == category::H) |
1193 | { |
1194 | break; |
1195 | } |
1196 | |
1197 | j -= 1; |
1198 | } |
1199 | |
1200 | if buffer.info[j].indic_category() != category::H && j > i { |
1201 | // Move Halant to after last consonant. |
1202 | let t = buffer.info[i]; |
1203 | for k in 0..j - i { |
1204 | buffer.info[k + i] = buffer.info[k + i + 1]; |
1205 | } |
1206 | buffer.info[j] = t; |
1207 | } |
1208 | |
1209 | break; |
1210 | } |
1211 | } |
1212 | } |
1213 | |
1214 | // Attach misc marks to previous char to move with them. |
1215 | { |
1216 | let mut last_pos = position::START; |
1217 | for i in start..end { |
1218 | let ok = rb_flag_unsafe(buffer.info[i].indic_category() as u32) |
1219 | & (category_flag(category::ZWJ) |
1220 | | category_flag(category::ZWNJ) |
1221 | | category_flag(category::N) |
1222 | | category_flag(category::RS) |
1223 | | category_flag(category::CM) |
1224 | | category_flag(category::H)) |
1225 | != 0; |
1226 | if ok { |
1227 | buffer.info[i].set_indic_position(last_pos); |
1228 | |
1229 | if buffer.info[i].indic_category() == category::H |
1230 | && buffer.info[i].indic_position() == position::PRE_M |
1231 | { |
1232 | // Uniscribe doesn't move the Halant with Left Matra. |
1233 | // TEST: U+092B,U+093F,U+094DE |
1234 | // We follow. This is important for the Sinhala |
1235 | // U+0DDA split matra since it decomposes to U+0DD9,U+0DCA |
1236 | // where U+0DD9 is a left matra and U+0DCA is the virama. |
1237 | // We don't want to move the virama with the left matra. |
1238 | // TEST: U+0D9A,U+0DDA |
1239 | for j in (start + 1..=i).rev() { |
1240 | if buffer.info[j - 1].indic_position() != position::PRE_M { |
1241 | let pos = buffer.info[j - 1].indic_position(); |
1242 | buffer.info[i].set_indic_position(pos); |
1243 | break; |
1244 | } |
1245 | } |
1246 | } |
1247 | } else if buffer.info[i].indic_position() != position::SMVD { |
1248 | last_pos = buffer.info[i].indic_position(); |
1249 | } |
1250 | } |
1251 | } |
1252 | // For post-base consonants let them own anything before them |
1253 | // since the last consonant or matra. |
1254 | { |
1255 | let mut last = base; |
1256 | for i in base + 1..end { |
1257 | if buffer.info[i].is_consonant() { |
1258 | for j in last + 1..i { |
1259 | if (buffer.info[j].indic_position() as u8) < (position::SMVD as u8) { |
1260 | let pos = buffer.info[i].indic_position(); |
1261 | buffer.info[j].set_indic_position(pos); |
1262 | } |
1263 | } |
1264 | |
1265 | last = i; |
1266 | } else if buffer.info[i].indic_category() == category::M { |
1267 | last = i; |
1268 | } |
1269 | } |
1270 | } |
1271 | |
1272 | { |
1273 | // Use syllable() for sort accounting temporarily. |
1274 | let syllable = buffer.info[start].syllable(); |
1275 | for i in start..end { |
1276 | buffer.info[i].set_syllable(u8::try_from(i - start).unwrap()); |
1277 | } |
1278 | |
1279 | buffer.info[start..end].sort_by(|a, b| a.indic_position().cmp(&b.indic_position())); |
1280 | |
1281 | // Find base again. |
1282 | base = end; |
1283 | for i in start..end { |
1284 | if buffer.info[i].indic_position() == position::BASE_C { |
1285 | base = i; |
1286 | break; |
1287 | } |
1288 | } |
1289 | // Things are out-of-control for post base positions, they may shuffle |
1290 | // around like crazy. In old-spec mode, we move halants around, so in |
1291 | // that case merge all clusters after base. Otherwise, check the sort |
1292 | // order and merge as needed. |
1293 | // For pre-base stuff, we handle cluster issues in final reordering. |
1294 | // |
1295 | // We could use buffer->sort() for this, if there was no special |
1296 | // reordering of pre-base stuff happening later... |
1297 | // We don't want to merge_clusters all of that, which buffer->sort() |
1298 | // would. Here's a concrete example: |
1299 | // |
1300 | // Assume there's a pre-base consonant and explicit Halant before base, |
1301 | // followed by a prebase-reordering (left) Matra: |
1302 | // |
1303 | // C,H,ZWNJ,B,M |
1304 | // |
1305 | // At this point in reordering we would have: |
1306 | // |
1307 | // M,C,H,ZWNJ,B |
1308 | // |
1309 | // whereas in final reordering we will bring the Matra closer to Base: |
1310 | // |
1311 | // C,H,ZWNJ,M,B |
1312 | // |
1313 | // That's why we don't want to merge-clusters anything before the Base |
1314 | // at this point. But if something moved from after Base to before it, |
1315 | // we should merge clusters from base to them. In final-reordering, we |
1316 | // only move things around before base, and merge-clusters up to base. |
1317 | // These two merge-clusters from the two sides of base will interlock |
1318 | // to merge things correctly. See: |
1319 | // https://github.com/harfbuzz/harfbuzz/issues/2272 |
1320 | if indic_plan.is_old_spec || end - start > 127 { |
1321 | buffer.merge_clusters(base, end); |
1322 | } else { |
1323 | // Note! syllable() is a one-byte field. |
1324 | for i in base..end { |
1325 | if buffer.info[i].syllable() != 255 { |
1326 | let mut min = i; |
1327 | let mut max = i; |
1328 | let mut j = start + buffer.info[i].syllable() as usize; |
1329 | while j != i { |
1330 | min = cmp::min(min, j); |
1331 | max = cmp::max(max, j); |
1332 | let next = start + buffer.info[j].syllable() as usize; |
1333 | buffer.info[j].set_syllable(255); // So we don't process j later again. |
1334 | j = next; |
1335 | } |
1336 | |
1337 | buffer.merge_clusters(cmp::max(base, min), max + 1); |
1338 | } |
1339 | } |
1340 | } |
1341 | |
1342 | // Put syllable back in. |
1343 | for info in &mut buffer.info[start..end] { |
1344 | info.set_syllable(syllable); |
1345 | } |
1346 | } |
1347 | |
1348 | // Setup masks now |
1349 | |
1350 | { |
1351 | // Reph |
1352 | for info in &mut buffer.info[start..end] { |
1353 | if info.indic_position() != position::RA_TO_BECOME_REPH { |
1354 | break; |
1355 | } |
1356 | |
1357 | info.mask |= indic_plan.mask_array[indic_feature::RPHF]; |
1358 | } |
1359 | |
1360 | // Pre-base |
1361 | let mut mask = indic_plan.mask_array[indic_feature::HALF]; |
1362 | if !indic_plan.is_old_spec && indic_plan.config.blwf_mode == BlwfMode::PreAndPost { |
1363 | mask |= indic_plan.mask_array[indic_feature::BLWF]; |
1364 | } |
1365 | |
1366 | for info in &mut buffer.info[start..base] { |
1367 | info.mask |= mask; |
1368 | } |
1369 | |
1370 | // Base |
1371 | mask = 0; |
1372 | if base < end { |
1373 | buffer.info[base].mask |= mask; |
1374 | } |
1375 | |
1376 | // Post-base |
1377 | mask = indic_plan.mask_array[indic_feature::BLWF] |
1378 | | indic_plan.mask_array[indic_feature::ABVF] |
1379 | | indic_plan.mask_array[indic_feature::PSTF]; |
1380 | for i in base + 1..end { |
1381 | buffer.info[i].mask |= mask; |
1382 | } |
1383 | } |
1384 | |
1385 | if indic_plan.is_old_spec && buffer.script == Some(script::DEVANAGARI) { |
1386 | // Old-spec eye-lash Ra needs special handling. From the |
1387 | // spec: |
1388 | // |
1389 | // "The feature 'below-base form' is applied to consonants |
1390 | // having below-base forms and following the base consonant. |
1391 | // The exception is vattu, which may appear below half forms |
1392 | // as well as below the base glyph. The feature 'below-base |
1393 | // form' will be applied to all such occurrences of Ra as well." |
1394 | // |
1395 | // Test case: U+0924,U+094D,U+0930,U+094d,U+0915 |
1396 | // with Sanskrit 2003 font. |
1397 | // |
1398 | // However, note that Ra,Halant,ZWJ is the correct way to |
1399 | // request eyelash form of Ra, so we wouldbn't inhibit it |
1400 | // in that sequence. |
1401 | // |
1402 | // Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 |
1403 | for i in start..base.saturating_sub(1) { |
1404 | if buffer.info[i].indic_category() == category::RA |
1405 | && buffer.info[i + 1].indic_category() == category::H |
1406 | && (i + 2 == base || buffer.info[i + 2].indic_category() != category::ZWJ) |
1407 | { |
1408 | buffer.info[i].mask |= indic_plan.mask_array[indic_feature::BLWF]; |
1409 | buffer.info[i + 1].mask |= indic_plan.mask_array[indic_feature::BLWF]; |
1410 | } |
1411 | } |
1412 | } |
1413 | |
1414 | let pref_len = 2; |
1415 | if indic_plan.mask_array[indic_feature::PREF] != 0 && base + pref_len < end { |
1416 | // Find a Halant,Ra sequence and mark it for pre-base-reordering processing. |
1417 | for i in base + 1..end - pref_len + 1 { |
1418 | let glyphs = &[buffer.info[i + 0].as_glyph(), buffer.info[i + 1].as_glyph()]; |
1419 | if indic_plan.pref.would_substitute(&plan.ot_map, face, glyphs) { |
1420 | buffer.info[i + 0].mask = indic_plan.mask_array[indic_feature::PREF]; |
1421 | buffer.info[i + 1].mask = indic_plan.mask_array[indic_feature::PREF]; |
1422 | break; |
1423 | } |
1424 | } |
1425 | } |
1426 | |
1427 | // Apply ZWJ/ZWNJ effects |
1428 | for i in start + 1..end { |
1429 | if buffer.info[i].is_joiner() { |
1430 | let non_joiner = buffer.info[i].indic_category() == category::ZWNJ; |
1431 | let mut j = i; |
1432 | |
1433 | loop { |
1434 | j -= 1; |
1435 | |
1436 | // ZWJ/ZWNJ should disable CJCT. They do that by simply |
1437 | // being there, since we don't skip them for the CJCT |
1438 | // feature (ie. F_MANUAL_ZWJ) |
1439 | |
1440 | // A ZWNJ disables HALF. |
1441 | if non_joiner { |
1442 | buffer.info[j].mask &= !indic_plan.mask_array[indic_feature::HALF]; |
1443 | } |
1444 | |
1445 | if j <= start || buffer.info[j].is_consonant() { |
1446 | break; |
1447 | } |
1448 | } |
1449 | } |
1450 | } |
1451 | } |
1452 | |
1453 | fn initial_reordering_standalone_cluster( |
1454 | plan: &ShapePlan, |
1455 | indic_plan: &IndicShapePlan, |
1456 | face: &Face, |
1457 | start: usize, |
1458 | end: usize, |
1459 | buffer: &mut Buffer, |
1460 | ) { |
1461 | // We treat placeholder/dotted-circle as if they are consonants, so we |
1462 | // should just chain. Only if not in compatibility mode that is... |
1463 | initial_reordering_consonant_syllable(plan, indic_plan, face, start, end, buffer); |
1464 | } |
1465 | |
1466 | fn final_reordering(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
1467 | if buffer.is_empty() { |
1468 | return; |
1469 | } |
1470 | |
1471 | let indic_plan: &IndicShapePlan = plan.data::<IndicShapePlan>(); |
1472 | |
1473 | let mut virama_glyph: Option = None; |
1474 | if indic_plan.config.virama != 0 { |
1475 | if let Some(g: GlyphId) = face.glyph_index(indic_plan.config.virama) { |
1476 | virama_glyph = Some(g.0 as u32); |
1477 | } |
1478 | } |
1479 | |
1480 | let mut start: usize = 0; |
1481 | let mut end: usize = buffer.next_syllable(start:0); |
1482 | while start < buffer.len { |
1483 | final_reordering_impl(indic_plan, virama_glyph, start, end, buffer); |
1484 | start = end; |
1485 | end = buffer.next_syllable(start); |
1486 | } |
1487 | } |
1488 | |
1489 | fn final_reordering_impl( |
1490 | plan: &IndicShapePlan, |
1491 | virama_glyph: Option<u32>, |
1492 | start: usize, |
1493 | end: usize, |
1494 | buffer: &mut Buffer, |
1495 | ) { |
1496 | // This function relies heavily on halant glyphs. Lots of ligation |
1497 | // and possibly multiple substitutions happened prior to this |
1498 | // phase, and that might have messed up our properties. Recover |
1499 | // from a particular case of that where we're fairly sure that a |
1500 | // class of OT_H is desired but has been lost. |
1501 | // |
1502 | // We don't call load_virama_glyph(), since we know it's already loaded. |
1503 | if let Some(virama_glyph) = virama_glyph { |
1504 | for info in &mut buffer.info[start..end] { |
1505 | if info.glyph_id == virama_glyph && info.is_ligated() && info.is_multiplied() { |
1506 | // This will make sure that this glyph passes is_halant() test. |
1507 | info.set_indic_category(category::H); |
1508 | info.clear_ligated_and_multiplied(); |
1509 | } |
1510 | } |
1511 | } |
1512 | |
1513 | // 4. Final reordering: |
1514 | // |
1515 | // After the localized forms and basic shaping forms GSUB features have been |
1516 | // applied (see below), the shaping engine performs some final glyph |
1517 | // reordering before applying all the remaining font features to the entire |
1518 | // syllable. |
1519 | |
1520 | let mut try_pref = plan.mask_array[indic_feature::PREF] != 0; |
1521 | |
1522 | let mut base = start; |
1523 | while base < end { |
1524 | if buffer.info[base].indic_position() as u32 >= position::BASE_C as u32 { |
1525 | if try_pref && base + 1 < end { |
1526 | for i in base + 1..end { |
1527 | if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 { |
1528 | if !(buffer.info[i].is_substituted() |
1529 | && buffer.info[i].is_ligated_and_didnt_multiply()) |
1530 | { |
1531 | // Ok, this was a 'pref' candidate but didn't form any. |
1532 | // Base is around here... |
1533 | base = i; |
1534 | while base < end && buffer.info[base].is_halant() { |
1535 | base += 1; |
1536 | } |
1537 | |
1538 | buffer.info[base].set_indic_position(position::BASE_C); |
1539 | try_pref = false; |
1540 | } |
1541 | |
1542 | break; |
1543 | } |
1544 | } |
1545 | } |
1546 | |
1547 | // For Malayalam, skip over unformed below- (but NOT post-) forms. |
1548 | if buffer.script == Some(script::MALAYALAM) { |
1549 | let mut i = base + 1; |
1550 | while i < end { |
1551 | while i < end && buffer.info[i].is_joiner() { |
1552 | i += 1; |
1553 | } |
1554 | |
1555 | if i == end || !buffer.info[i].is_halant() { |
1556 | break; |
1557 | } |
1558 | |
1559 | i += 1; // Skip halant. |
1560 | |
1561 | while i < end && buffer.info[i].is_joiner() { |
1562 | i += 1; |
1563 | } |
1564 | |
1565 | if i < end |
1566 | && buffer.info[i].is_consonant() |
1567 | && buffer.info[i].indic_position() == position::BELOW_C |
1568 | { |
1569 | base = i; |
1570 | buffer.info[base].set_indic_position(position::BASE_C); |
1571 | } |
1572 | |
1573 | i += 1; |
1574 | } |
1575 | } |
1576 | |
1577 | if start < base && buffer.info[base].indic_position() as u32 > position::BASE_C as u32 { |
1578 | base -= 1; |
1579 | } |
1580 | |
1581 | break; |
1582 | } |
1583 | |
1584 | base += 1; |
1585 | } |
1586 | |
1587 | if base == end && start < base && buffer.info[base - 1].is_one_of(rb_flag(category::ZWJ as u32)) |
1588 | { |
1589 | base -= 1; |
1590 | } |
1591 | |
1592 | if base < end { |
1593 | while start < base |
1594 | && buffer.info[base] |
1595 | .is_one_of(rb_flag(category::N as u32) | rb_flag(category::H as u32)) |
1596 | { |
1597 | base -= 1; |
1598 | } |
1599 | } |
1600 | |
1601 | // - Reorder matras: |
1602 | // |
1603 | // If a pre-base matra character had been reordered before applying basic |
1604 | // features, the glyph can be moved closer to the main consonant based on |
1605 | // whether half-forms had been formed. Actual position for the matra is |
1606 | // defined as “after last standalone halant glyph, after initial matra |
1607 | // position and before the main consonant”. If ZWJ or ZWNJ follow this |
1608 | // halant, position is moved after it. |
1609 | // |
1610 | // IMPLEMENTATION NOTES: |
1611 | // |
1612 | // It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe |
1613 | // and Devanagari shows that the behavior is best described as: |
1614 | // |
1615 | // "If ZWJ follows this halant, matra is NOT repositioned after this halant. |
1616 | // If ZWNJ follows this halant, position is moved after it." |
1617 | // |
1618 | // Test case, with Adobe Devanagari or Nirmala UI: |
1619 | // |
1620 | // U+091F,U+094D,U+200C,U+092F,U+093F |
1621 | // (Matra moves to the middle, after ZWNJ.) |
1622 | // |
1623 | // U+091F,U+094D,U+200D,U+092F,U+093F |
1624 | // (Matra does NOT move, stays to the left.) |
1625 | // |
1626 | // https://github.com/harfbuzz/harfbuzz/issues/1070 |
1627 | |
1628 | // Otherwise there can't be any pre-base matra characters. |
1629 | if start + 1 < end && start < base { |
1630 | // If we lost track of base, alas, position before last thingy. |
1631 | let mut new_pos = if base == end { base - 2 } else { base - 1 }; |
1632 | |
1633 | // Malayalam / Tamil do not have "half" forms or explicit virama forms. |
1634 | // The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
1635 | // We want to position matra after them. |
1636 | if buffer.script != Some(script::MALAYALAM) && buffer.script != Some(script::TAMIL) { |
1637 | loop { |
1638 | while new_pos > start |
1639 | && !buffer.info[new_pos] |
1640 | .is_one_of(rb_flag(category::M as u32) | rb_flag(category::H as u32)) |
1641 | { |
1642 | new_pos -= 1; |
1643 | } |
1644 | |
1645 | // If we found no Halant we are done. |
1646 | // Otherwise only proceed if the Halant does |
1647 | // not belong to the Matra itself! |
1648 | if buffer.info[new_pos].is_halant() |
1649 | && buffer.info[new_pos].indic_position() != position::PRE_M |
1650 | { |
1651 | if new_pos + 1 < end { |
1652 | // -> If ZWJ follows this halant, matra is NOT repositioned after this halant. |
1653 | if buffer.info[new_pos + 1].indic_category() == category::ZWJ { |
1654 | // Keep searching. |
1655 | if new_pos > start { |
1656 | new_pos -= 1; |
1657 | continue; |
1658 | } |
1659 | } |
1660 | |
1661 | // -> If ZWNJ follows this halant, position is moved after it. |
1662 | // |
1663 | // IMPLEMENTATION NOTES: |
1664 | // |
1665 | // This is taken care of by the state-machine. A Halant,ZWNJ is a terminating |
1666 | // sequence for a consonant syllable; any pre-base matras occurring after it |
1667 | // will belong to the subsequent syllable. |
1668 | } |
1669 | } else { |
1670 | new_pos = start; // No move. |
1671 | } |
1672 | |
1673 | break; |
1674 | } |
1675 | } |
1676 | |
1677 | if start < new_pos && buffer.info[new_pos].indic_position() != position::PRE_M { |
1678 | // Now go see if there's actually any matras... |
1679 | for i in (start + 1..=new_pos).rev() { |
1680 | if buffer.info[i - 1].indic_position() == position::PRE_M { |
1681 | let old_pos = i - 1; |
1682 | // Shouldn't actually happen. |
1683 | if old_pos < base && base <= new_pos { |
1684 | base -= 1; |
1685 | } |
1686 | |
1687 | let tmp = buffer.info[old_pos]; |
1688 | for i in 0..new_pos - old_pos { |
1689 | buffer.info[i + old_pos] = buffer.info[i + old_pos + 1]; |
1690 | } |
1691 | buffer.info[new_pos] = tmp; |
1692 | |
1693 | // Note: this merge_clusters() is intentionally *after* the reordering. |
1694 | // Indic matra reordering is special and tricky... |
1695 | buffer.merge_clusters(new_pos, cmp::min(end, base + 1)); |
1696 | |
1697 | new_pos -= 1; |
1698 | } |
1699 | } |
1700 | } else { |
1701 | for i in start..base { |
1702 | if buffer.info[i].indic_position() == position::PRE_M { |
1703 | buffer.merge_clusters(i, cmp::min(end, base + 1)); |
1704 | break; |
1705 | } |
1706 | } |
1707 | } |
1708 | } |
1709 | |
1710 | // - Reorder reph: |
1711 | // |
1712 | // Reph’s original position is always at the beginning of the syllable, |
1713 | // (i.e. it is not reordered at the character reordering stage). However, |
1714 | // it will be reordered according to the basic-forms shaping results. |
1715 | // Possible positions for reph, depending on the script, are; after main, |
1716 | // before post-base consonant forms, and after post-base consonant forms. |
1717 | |
1718 | // Two cases: |
1719 | // |
1720 | // - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then |
1721 | // we should only move it if the sequence ligated to the repha form. |
1722 | // |
1723 | // - If repha is encoded separately and in the logical position, we should only |
1724 | // move it if it did NOT ligate. If it ligated, it's probably the font trying |
1725 | // to make it work without the reordering. |
1726 | |
1727 | if start + 1 < end |
1728 | && buffer.info[start].indic_position() == position::RA_TO_BECOME_REPH |
1729 | && (buffer.info[start].indic_category() == category::REPHA) |
1730 | ^ buffer.info[start].is_ligated_and_didnt_multiply() |
1731 | { |
1732 | let mut new_reph_pos; |
1733 | loop { |
1734 | let reph_pos = plan.config.reph_pos; |
1735 | |
1736 | // 1. If reph should be positioned after post-base consonant forms, |
1737 | // proceed to step 5. |
1738 | if reph_pos != RephPosition::AfterPost { |
1739 | // 2. If the reph repositioning class is not after post-base: target |
1740 | // position is after the first explicit halant glyph between the |
1741 | // first post-reph consonant and last main consonant. If ZWJ or ZWNJ |
1742 | // are following this halant, position is moved after it. If such |
1743 | // position is found, this is the target position. Otherwise, |
1744 | // proceed to the next step. |
1745 | // |
1746 | // Note: in old-implementation fonts, where classifications were |
1747 | // fixed in shaping engine, there was no case where reph position |
1748 | // will be found on this step. |
1749 | { |
1750 | new_reph_pos = start + 1; |
1751 | while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() { |
1752 | new_reph_pos += 1; |
1753 | } |
1754 | |
1755 | if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() { |
1756 | // ->If ZWJ or ZWNJ are following this halant, position is moved after it. |
1757 | if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() { |
1758 | new_reph_pos += 1; |
1759 | } |
1760 | |
1761 | break; |
1762 | } |
1763 | } |
1764 | |
1765 | // 3. If reph should be repositioned after the main consonant: find the |
1766 | // first consonant not ligated with main, or find the first |
1767 | // consonant that is not a potential pre-base-reordering Ra. |
1768 | if reph_pos == RephPosition::AfterMain { |
1769 | new_reph_pos = base; |
1770 | while new_reph_pos + 1 < end |
1771 | && buffer.info[new_reph_pos + 1].indic_position() as u8 |
1772 | <= position::AFTER_MAIN as u8 |
1773 | { |
1774 | new_reph_pos += 1; |
1775 | } |
1776 | |
1777 | if new_reph_pos < end { |
1778 | break; |
1779 | } |
1780 | } |
1781 | |
1782 | // 4. If reph should be positioned before post-base consonant, find |
1783 | // first post-base classified consonant not ligated with main. If no |
1784 | // consonant is found, the target position should be before the |
1785 | // first matra, syllable modifier sign or vedic sign. |
1786 | // |
1787 | // This is our take on what step 4 is trying to say (and failing, BADLY). |
1788 | if reph_pos == RephPosition::AfterSub { |
1789 | new_reph_pos = base; |
1790 | while new_reph_pos + 1 < end |
1791 | && (rb_flag_unsafe(buffer.info[new_reph_pos + 1].indic_position() as u32) |
1792 | & (rb_flag(position::POST_C as u32) |
1793 | | rb_flag(position::AFTER_POST as u32) |
1794 | | rb_flag(position::SMVD as u32))) |
1795 | == 0 |
1796 | { |
1797 | new_reph_pos += 1; |
1798 | } |
1799 | |
1800 | if new_reph_pos < end { |
1801 | break; |
1802 | } |
1803 | } |
1804 | } |
1805 | |
1806 | // 5. If no consonant is found in steps 3 or 4, move reph to a position |
1807 | // immediately before the first post-base matra, syllable modifier |
1808 | // sign or vedic sign that has a reordering class after the intended |
1809 | // reph position. For example, if the reordering position for reph |
1810 | // is post-main, it will skip above-base matras that also have a |
1811 | // post-main position. |
1812 | // |
1813 | // Copied from step 2. |
1814 | new_reph_pos = start + 1; |
1815 | while new_reph_pos < base && !buffer.info[new_reph_pos].is_halant() { |
1816 | new_reph_pos += 1; |
1817 | } |
1818 | |
1819 | if new_reph_pos < base && buffer.info[new_reph_pos].is_halant() { |
1820 | /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ |
1821 | if new_reph_pos + 1 < base && buffer.info[new_reph_pos + 1].is_joiner() { |
1822 | new_reph_pos += 1; |
1823 | } |
1824 | |
1825 | break; |
1826 | } |
1827 | // See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654 |
1828 | |
1829 | // 6. Otherwise, reorder reph to the end of the syllable. |
1830 | { |
1831 | new_reph_pos = end - 1; |
1832 | while new_reph_pos > start |
1833 | && buffer.info[new_reph_pos].indic_position() == position::SMVD |
1834 | { |
1835 | new_reph_pos -= 1; |
1836 | } |
1837 | |
1838 | // If the Reph is to be ending up after a Matra,Halant sequence, |
1839 | // position it before that Halant so it can interact with the Matra. |
1840 | // However, if it's a plain Consonant,Halant we shouldn't do that. |
1841 | // Uniscribe doesn't do this. |
1842 | // TEST: U+0930,U+094D,U+0915,U+094B,U+094D |
1843 | if buffer.info[new_reph_pos].is_halant() { |
1844 | for info in &buffer.info[base + 1..new_reph_pos] { |
1845 | if info.indic_category() == category::M { |
1846 | // Ok, got it. |
1847 | new_reph_pos -= 1; |
1848 | } |
1849 | } |
1850 | } |
1851 | } |
1852 | |
1853 | break; |
1854 | } |
1855 | |
1856 | // Move |
1857 | buffer.merge_clusters(start, new_reph_pos + 1); |
1858 | |
1859 | let reph = buffer.info[start]; |
1860 | for i in 0..new_reph_pos - start { |
1861 | buffer.info[i + start] = buffer.info[i + start + 1]; |
1862 | } |
1863 | buffer.info[new_reph_pos] = reph; |
1864 | |
1865 | if start < base && base <= new_reph_pos { |
1866 | base -= 1; |
1867 | } |
1868 | } |
1869 | |
1870 | // - Reorder pre-base-reordering consonants: |
1871 | // |
1872 | // If a pre-base-reordering consonant is found, reorder it according to |
1873 | // the following rules: |
1874 | |
1875 | // Otherwise there can't be any pre-base-reordering Ra. |
1876 | if try_pref && base + 1 < end { |
1877 | for i in base + 1..end { |
1878 | if (buffer.info[i].mask & plan.mask_array[indic_feature::PREF]) != 0 { |
1879 | // 1. Only reorder a glyph produced by substitution during application |
1880 | // of the <pref> feature. (Note that a font may shape a Ra consonant with |
1881 | // the feature generally but block it in certain contexts.) |
1882 | // |
1883 | // Note: We just check that something got substituted. We don't check that |
1884 | // the <pref> feature actually did it... |
1885 | // |
1886 | // Reorder pref only if it ligated. |
1887 | if buffer.info[i].is_ligated_and_didnt_multiply() { |
1888 | // 2. Try to find a target position the same way as for pre-base matra. |
1889 | // If it is found, reorder pre-base consonant glyph. |
1890 | // |
1891 | // 3. If position is not found, reorder immediately before main consonant. |
1892 | |
1893 | let mut new_pos = base; |
1894 | // Malayalam / Tamil do not have "half" forms or explicit virama forms. |
1895 | // The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
1896 | // We want to position matra after them. |
1897 | if buffer.script != Some(script::MALAYALAM) |
1898 | && buffer.script != Some(script::TAMIL) |
1899 | { |
1900 | while new_pos > start |
1901 | && !buffer.info[new_pos - 1].is_one_of( |
1902 | rb_flag(category::M as u32) | rb_flag(category::H as u32), |
1903 | ) |
1904 | { |
1905 | new_pos -= 1; |
1906 | } |
1907 | } |
1908 | |
1909 | if new_pos > start && buffer.info[new_pos - 1].is_halant() { |
1910 | // -> If ZWJ or ZWNJ follow this halant, position is moved after it. |
1911 | if new_pos < end && buffer.info[new_pos].is_joiner() { |
1912 | new_pos += 1; |
1913 | } |
1914 | } |
1915 | |
1916 | { |
1917 | let old_pos = i; |
1918 | |
1919 | buffer.merge_clusters(new_pos, old_pos + 1); |
1920 | let tmp = buffer.info[old_pos]; |
1921 | for i in (0..=old_pos - new_pos).rev() { |
1922 | buffer.info[i + new_pos + 1] = buffer.info[i + new_pos]; |
1923 | } |
1924 | buffer.info[new_pos] = tmp; |
1925 | |
1926 | if new_pos <= base && base < old_pos { |
1927 | // TODO: investigate |
1928 | #[allow (unused_assignments)] |
1929 | { |
1930 | base += 1; |
1931 | } |
1932 | } |
1933 | } |
1934 | } |
1935 | |
1936 | break; |
1937 | } |
1938 | } |
1939 | } |
1940 | |
1941 | // Apply 'init' to the Left Matra if it's a word start. |
1942 | if buffer.info[start].indic_position() == position::PRE_M { |
1943 | if start == 0 |
1944 | || (rb_flag_unsafe(buffer.info[start - 1].general_category().to_rb()) |
1945 | & rb_flag_range( |
1946 | hb_gc::RB_UNICODE_GENERAL_CATEGORY_FORMAT, |
1947 | hb_gc::RB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, |
1948 | )) |
1949 | == 0 |
1950 | { |
1951 | buffer.info[start].mask |= plan.mask_array[indic_feature::INIT]; |
1952 | } else { |
1953 | buffer.unsafe_to_break(Some(start - 1), Some(start + 1)); |
1954 | } |
1955 | } |
1956 | } |
1957 | |
1958 | pub fn get_category_and_position(u: u32) -> (Category, Position) { |
1959 | let (c1, c2) = super::indic_table::get_categories(u); |
1960 | let c2 = if c1 == SyllabicCategory::ConsonantMedial |
1961 | || c1 == SyllabicCategory::GeminationMark |
1962 | || c1 == SyllabicCategory::RegisterShifter |
1963 | || c1 == SyllabicCategory::ConsonantSucceedingRepha |
1964 | || c1 == SyllabicCategory::Virama |
1965 | || c1 == SyllabicCategory::VowelDependent |
1966 | || false |
1967 | { |
1968 | c2 |
1969 | } else { |
1970 | MatraCategory::NotApplicable |
1971 | }; |
1972 | |
1973 | let c1 = match c1 { |
1974 | SyllabicCategory::Other => category::X, |
1975 | SyllabicCategory::Avagraha => category::SYMBOL, |
1976 | SyllabicCategory::Bindu => category::SM, |
1977 | SyllabicCategory::BrahmiJoiningNumber => category::PLACEHOLDER, // Don't care. |
1978 | SyllabicCategory::CantillationMark => category::A, |
1979 | SyllabicCategory::Consonant => category::C, |
1980 | SyllabicCategory::ConsonantDead => category::C, |
1981 | SyllabicCategory::ConsonantFinal => category::CM, |
1982 | SyllabicCategory::ConsonantHeadLetter => category::C, |
1983 | SyllabicCategory::ConsonantInitialPostfixed => category::PLACEHOLDER, |
1984 | SyllabicCategory::ConsonantKiller => category::M, // U+17CD only. |
1985 | SyllabicCategory::ConsonantMedial => category::CM, |
1986 | SyllabicCategory::ConsonantPlaceholder => category::PLACEHOLDER, |
1987 | SyllabicCategory::ConsonantPrecedingRepha => category::REPHA, |
1988 | SyllabicCategory::ConsonantPrefixed => category::X, |
1989 | SyllabicCategory::ConsonantSubjoined => category::CM, |
1990 | SyllabicCategory::ConsonantSucceedingRepha => category::CM, |
1991 | SyllabicCategory::ConsonantWithStacker => category::CS, |
1992 | SyllabicCategory::GeminationMark => category::SM, // https://github.com/harfbuzz/harfbuzz/issues/552 |
1993 | SyllabicCategory::InvisibleStacker => category::COENG, |
1994 | SyllabicCategory::Joiner => category::ZWJ, |
1995 | SyllabicCategory::ModifyingLetter => category::X, |
1996 | SyllabicCategory::NonJoiner => category::ZWNJ, |
1997 | SyllabicCategory::Nukta => category::N, |
1998 | SyllabicCategory::Number => category::PLACEHOLDER, |
1999 | SyllabicCategory::NumberJoiner => category::PLACEHOLDER, // Don't care. |
2000 | SyllabicCategory::PureKiller => category::M, |
2001 | SyllabicCategory::RegisterShifter => category::RS, |
2002 | SyllabicCategory::SyllableModifier => category::SM, |
2003 | SyllabicCategory::ToneLetter => category::X, |
2004 | SyllabicCategory::ToneMark => category::N, |
2005 | SyllabicCategory::Virama => category::H, |
2006 | SyllabicCategory::Visarga => category::SM, |
2007 | SyllabicCategory::Vowel => category::V, |
2008 | SyllabicCategory::VowelDependent => category::M, |
2009 | SyllabicCategory::VowelIndependent => category::V, |
2010 | }; |
2011 | |
2012 | let c2 = match c2 { |
2013 | MatraCategory::NotApplicable => position::END, |
2014 | MatraCategory::Left => position::PRE_C, |
2015 | MatraCategory::Top => position::ABOVE_C, |
2016 | MatraCategory::Bottom => position::BELOW_C, |
2017 | MatraCategory::Right => position::POST_C, |
2018 | MatraCategory::BottomAndLeft => position::POST_C, |
2019 | MatraCategory::BottomAndRight => position::POST_C, |
2020 | MatraCategory::LeftAndRight => position::POST_C, |
2021 | MatraCategory::TopAndBottom => position::BELOW_C, |
2022 | MatraCategory::TopAndBottomAndRight => position::POST_C, |
2023 | MatraCategory::TopAndBottomAndLeft => position::BELOW_C, |
2024 | MatraCategory::TopAndLeft => position::ABOVE_C, |
2025 | MatraCategory::TopAndLeftAndRight => position::POST_C, |
2026 | MatraCategory::TopAndRight => position::POST_C, |
2027 | MatraCategory::Overstruck => position::AFTER_MAIN, |
2028 | MatraCategory::VisualOrderLeft => position::PRE_M, |
2029 | }; |
2030 | |
2031 | (c1, c2) |
2032 | } |
2033 | |
2034 | #[rustfmt::skip] |
2035 | fn matra_position_indic(u: u32, side: u8) -> u8 { |
2036 | #[inline ] fn in_half_block(u: u32, base: u32) -> bool { u & !0x7F == base } |
2037 | #[inline ] fn is_deva(u: u32) -> bool { in_half_block(u, 0x0900) } |
2038 | #[inline ] fn is_beng(u: u32) -> bool { in_half_block(u, 0x0980) } |
2039 | #[inline ] fn is_guru(u: u32) -> bool { in_half_block(u, 0x0A00) } |
2040 | #[inline ] fn is_gujr(u: u32) -> bool { in_half_block(u, 0x0A80) } |
2041 | #[inline ] fn is_orya(u: u32) -> bool { in_half_block(u, 0x0B00) } |
2042 | #[inline ] fn is_taml(u: u32) -> bool { in_half_block(u, 0x0B80) } |
2043 | #[inline ] fn is_telu(u: u32) -> bool { in_half_block(u, 0x0C00) } |
2044 | #[inline ] fn is_knda(u: u32) -> bool { in_half_block(u, 0x0C80) } |
2045 | #[inline ] fn is_mlym(u: u32) -> bool { in_half_block(u, 0x0D00) } |
2046 | #[inline ] fn is_sinh(u: u32) -> bool { in_half_block(u, 0x0D80) } |
2047 | |
2048 | #[inline ] |
2049 | fn matra_pos_right(u: u32) -> Position { |
2050 | if is_deva(u) { |
2051 | position::AFTER_SUB |
2052 | } else if is_beng(u) { |
2053 | position::AFTER_POST |
2054 | } else if is_guru(u) { |
2055 | position::AFTER_POST |
2056 | } else if is_gujr(u) { |
2057 | position::AFTER_POST |
2058 | } else if is_orya(u) { |
2059 | position::AFTER_POST |
2060 | } else if is_taml(u) { |
2061 | position::AFTER_POST |
2062 | } else if is_telu(u) { |
2063 | if u <= 0x0C42 { |
2064 | position::BEFORE_SUB |
2065 | } else { |
2066 | position::AFTER_SUB |
2067 | } |
2068 | } else if is_knda(u) { |
2069 | if u < 0x0CC3 || u > 0xCD6 { |
2070 | position::BEFORE_SUB |
2071 | } else { |
2072 | position::AFTER_SUB |
2073 | } |
2074 | } else if is_mlym(u) { |
2075 | position::AFTER_POST |
2076 | } else if is_sinh(u) { |
2077 | position::AFTER_SUB |
2078 | } else { |
2079 | position::AFTER_SUB |
2080 | } |
2081 | } |
2082 | |
2083 | // BENG and MLYM don't have top matras. |
2084 | #[inline ] |
2085 | fn matra_pos_top(u: u32) -> Position { |
2086 | if is_deva(u) { |
2087 | position::AFTER_SUB |
2088 | } else if is_guru(u) { |
2089 | // Deviate from spec |
2090 | position::AFTER_POST |
2091 | } else if is_gujr(u) { |
2092 | position::AFTER_SUB |
2093 | } else if is_orya(u) { |
2094 | position::AFTER_MAIN |
2095 | } else if is_taml(u) { |
2096 | position::AFTER_SUB |
2097 | } else if is_telu(u) { |
2098 | position::BEFORE_SUB |
2099 | } else if is_knda(u) { |
2100 | position::BEFORE_SUB |
2101 | } else if is_sinh(u) { |
2102 | position::AFTER_SUB |
2103 | } else { |
2104 | position::AFTER_SUB |
2105 | } |
2106 | } |
2107 | |
2108 | #[inline ] |
2109 | fn matra_pos_bottom(u: u32) -> Position { |
2110 | if is_deva(u) { |
2111 | position::AFTER_SUB |
2112 | } else if is_beng(u) { |
2113 | position::AFTER_SUB |
2114 | } else if is_guru(u) { |
2115 | position::AFTER_POST |
2116 | } else if is_gujr(u) { |
2117 | position::AFTER_POST |
2118 | } else if is_orya(u) { |
2119 | position::AFTER_SUB |
2120 | } else if is_taml(u) { |
2121 | position::AFTER_POST |
2122 | } else if is_telu(u) { |
2123 | position::BEFORE_SUB |
2124 | } else if is_knda(u) { |
2125 | position::BEFORE_SUB |
2126 | } else if is_mlym(u) { |
2127 | position::AFTER_POST |
2128 | } else if is_sinh(u) { |
2129 | position::AFTER_SUB |
2130 | } else { |
2131 | position::AFTER_SUB |
2132 | } |
2133 | } |
2134 | |
2135 | match side { |
2136 | position::PRE_C => position::PRE_M, |
2137 | position::POST_C => matra_pos_right(u), |
2138 | position::ABOVE_C => matra_pos_top(u), |
2139 | position::BELOW_C => matra_pos_bottom(u), |
2140 | _ => side, |
2141 | } |
2142 | } |
2143 | |