1 | pub mod arabic; |
2 | mod arabic_table; |
3 | pub mod hangul; |
4 | pub mod hebrew; |
5 | pub mod indic; |
6 | mod indic_machine; |
7 | mod indic_table; |
8 | pub mod khmer; |
9 | mod khmer_machine; |
10 | mod machine_cursor; |
11 | pub mod myanmar; |
12 | mod myanmar_machine; |
13 | mod syllabic; |
14 | pub mod thai; |
15 | pub mod universal; |
16 | mod universal_machine; |
17 | mod universal_table; |
18 | mod vowel_constraints; |
19 | |
20 | use alloc::boxed::Box; |
21 | use core::any::Any; |
22 | |
23 | use crate::buffer::Buffer; |
24 | use crate::common::TagExt; |
25 | use crate::normalize::{ShapeNormalizationMode, ShapeNormalizeContext}; |
26 | use crate::plan::{ShapePlan, ShapePlanner}; |
27 | use crate::{script, Direction, Face, Script, Tag}; |
28 | |
29 | pub const MAX_COMBINING_MARKS: usize = 32; |
30 | |
31 | pub const DEFAULT_SHAPER: ComplexShaper = ComplexShaper { |
32 | collect_features: None, |
33 | override_features: None, |
34 | create_data: None, |
35 | preprocess_text: None, |
36 | postprocess_glyphs: None, |
37 | normalization_mode: Some(ShapeNormalizationMode::Auto), |
38 | decompose: None, |
39 | compose: None, |
40 | setup_masks: None, |
41 | gpos_tag: None, |
42 | reorder_marks: None, |
43 | zero_width_marks: Some(ZeroWidthMarksMode::ByGdefLate), |
44 | fallback_position: true, |
45 | }; |
46 | |
47 | // Same as default but no mark advance zeroing / fallback positioning. |
48 | // Dumbest shaper ever, basically. |
49 | pub const DUMBER_SHAPER: ComplexShaper = ComplexShaper { |
50 | collect_features: None, |
51 | override_features: None, |
52 | create_data: None, |
53 | preprocess_text: None, |
54 | postprocess_glyphs: None, |
55 | normalization_mode: Some(ShapeNormalizationMode::Auto), |
56 | decompose: None, |
57 | compose: None, |
58 | setup_masks: None, |
59 | gpos_tag: None, |
60 | reorder_marks: None, |
61 | zero_width_marks: None, |
62 | fallback_position: false, |
63 | }; |
64 | |
65 | pub struct ComplexShaper { |
66 | /// Called during `shape_plan()`. |
67 | /// Shapers should use plan.map to add their features and callbacks. |
68 | pub collect_features: Option<fn(&mut ShapePlanner)>, |
69 | |
70 | /// Called during `shape_plan()`. |
71 | /// Shapers should use plan.map to override features and add callbacks after |
72 | /// common features are added. |
73 | pub override_features: Option<fn(&mut ShapePlanner)>, |
74 | |
75 | /// Called at the end of `shape_plan()`. |
76 | /// Whatever shapers return will be accessible through `plan.data()` later. |
77 | pub create_data: Option<fn(&ShapePlan) -> Box<dyn Any + Send + Sync>>, |
78 | |
79 | /// Called during `shape()`. |
80 | /// Shapers can use to modify text before shaping starts. |
81 | pub preprocess_text: Option<fn(&ShapePlan, &Face, &mut Buffer)>, |
82 | |
83 | /// Called during `shape()`. |
84 | /// Shapers can use to modify text before shaping starts. |
85 | pub postprocess_glyphs: Option<fn(&ShapePlan, &Face, &mut Buffer)>, |
86 | |
87 | /// How to normalize. |
88 | pub normalization_mode: Option<ShapeNormalizationMode>, |
89 | |
90 | /// Called during `shape()`'s normalization. |
91 | pub decompose: Option<fn(&ShapeNormalizeContext, char) -> Option<(char, char)>>, |
92 | |
93 | /// Called during `shape()`'s normalization. |
94 | pub compose: Option<fn(&ShapeNormalizeContext, char, char) -> Option<char>>, |
95 | |
96 | /// Called during `shape()`. |
97 | /// Shapers should use map to get feature masks and set on buffer. |
98 | /// Shapers may NOT modify characters. |
99 | pub setup_masks: Option<fn(&ShapePlan, &Face, &mut Buffer)>, |
100 | |
101 | /// If not `None`, then must match found GPOS script tag for |
102 | /// GPOS to be applied. Otherwise, fallback positioning will be used. |
103 | pub gpos_tag: Option<Tag>, |
104 | |
105 | /// Called during `shape()`. |
106 | /// Shapers can use to modify ordering of combining marks. |
107 | pub reorder_marks: Option<fn(&ShapePlan, &mut Buffer, usize, usize)>, |
108 | |
109 | /// If and when to zero-width marks. |
110 | pub zero_width_marks: Option<ZeroWidthMarksMode>, |
111 | |
112 | /// Whether to use fallback mark positioning. |
113 | pub fallback_position: bool, |
114 | } |
115 | |
116 | #[derive (Clone, Copy, Debug, PartialEq, Eq)] |
117 | pub enum ZeroWidthMarksMode { |
118 | ByGdefEarly, |
119 | ByGdefLate, |
120 | } |
121 | |
122 | pub fn complex_categorize( |
123 | script: Script, |
124 | direction: Direction, |
125 | chosen_gsub_script: Option<Tag>, |
126 | ) -> &'static ComplexShaper { |
127 | match script { |
128 | // Unicode-1.1 additions |
129 | script::ARABIC |
130 | |
131 | // Unicode-3.0 additions |
132 | | script::SYRIAC => { |
133 | // For Arabic script, use the Arabic shaper even if no OT script tag was found. |
134 | // This is because we do fallback shaping for Arabic script (and not others). |
135 | // But note that Arabic shaping is applicable only to horizontal layout; for |
136 | // vertical text, just use the generic shaper instead. |
137 | // |
138 | // TODO: Does this still apply? Arabic fallback shaping was removed. |
139 | if (chosen_gsub_script != Some(Tag::default_script()) || script == script::ARABIC) |
140 | && direction.is_horizontal() |
141 | { |
142 | &arabic::ARABIC_SHAPER |
143 | } else { |
144 | &DEFAULT_SHAPER |
145 | } |
146 | } |
147 | |
148 | // Unicode-1.1 additions |
149 | script::THAI |
150 | | script::LAO => &thai::THAI_SHAPER, |
151 | |
152 | // Unicode-1.1 additions |
153 | script::HANGUL => &hangul::HANGUL_SHAPER, |
154 | |
155 | // Unicode-1.1 additions |
156 | script::HEBREW => &hebrew::HEBREW_SHAPER, |
157 | |
158 | // Unicode-1.1 additions |
159 | script::BENGALI |
160 | | script::DEVANAGARI |
161 | | script::GUJARATI |
162 | | script::GURMUKHI |
163 | | script::KANNADA |
164 | | script::MALAYALAM |
165 | | script::ORIYA |
166 | | script::TAMIL |
167 | | script::TELUGU |
168 | |
169 | // Unicode-3.0 additions |
170 | | script::SINHALA => { |
171 | // If the designer designed the font for the 'DFLT' script, |
172 | // (or we ended up arbitrarily pick 'latn'), use the default shaper. |
173 | // Otherwise, use the specific shaper. |
174 | // |
175 | // If it's indy3 tag, send to USE. |
176 | if chosen_gsub_script == Some(Tag::default_script()) || |
177 | chosen_gsub_script == Some(Tag::from_bytes(b"latn" )) { |
178 | &DEFAULT_SHAPER |
179 | } else if chosen_gsub_script.map_or(false, |tag| tag.to_bytes()[3] == b'3' ) { |
180 | &universal::UNIVERSAL_SHAPER |
181 | } else { |
182 | &indic::INDIC_SHAPER |
183 | } |
184 | } |
185 | |
186 | script::KHMER => &khmer::KHMER_SHAPER, |
187 | |
188 | script::MYANMAR => { |
189 | // If the designer designed the font for the 'DFLT' script, |
190 | // (or we ended up arbitrarily pick 'latn'), use the default shaper. |
191 | // Otherwise, use the specific shaper. |
192 | // |
193 | // If designer designed for 'mymr' tag, also send to default |
194 | // shaper. That's tag used from before Myanmar shaping spec |
195 | // was developed. The shaping spec uses 'mym2' tag. |
196 | if chosen_gsub_script == Some(Tag::default_script()) || |
197 | chosen_gsub_script == Some(Tag::from_bytes(b"latn" )) || |
198 | chosen_gsub_script == Some(Tag::from_bytes(b"mymr" )) |
199 | { |
200 | &DEFAULT_SHAPER |
201 | } else { |
202 | &myanmar::MYANMAR_SHAPER |
203 | } |
204 | } |
205 | |
206 | // https://github.com/harfbuzz/harfbuzz/issues/1162 |
207 | script::MYANMAR_ZAWGYI => &myanmar::MYANMAR_ZAWGYI_SHAPER, |
208 | |
209 | // Unicode-2.0 additions |
210 | script::TIBETAN |
211 | |
212 | // Unicode-3.0 additions |
213 | | script::MONGOLIAN |
214 | // | script::SINHALA |
215 | |
216 | // Unicode-3.2 additions |
217 | | script::BUHID |
218 | | script::HANUNOO |
219 | | script::TAGALOG |
220 | | script::TAGBANWA |
221 | |
222 | // Unicode-4.0 additions |
223 | | script::LIMBU |
224 | | script::TAI_LE |
225 | |
226 | // Unicode-4.1 additions |
227 | | script::BUGINESE |
228 | | script::KHAROSHTHI |
229 | | script::SYLOTI_NAGRI |
230 | | script::TIFINAGH |
231 | |
232 | // Unicode-5.0 additions |
233 | | script::BALINESE |
234 | | script::NKO |
235 | | script::PHAGS_PA |
236 | |
237 | // Unicode-5.1 additions |
238 | | script::CHAM |
239 | | script::KAYAH_LI |
240 | | script::LEPCHA |
241 | | script::REJANG |
242 | | script::SAURASHTRA |
243 | | script::SUNDANESE |
244 | |
245 | // Unicode-5.2 additions |
246 | | script::EGYPTIAN_HIEROGLYPHS |
247 | | script::JAVANESE |
248 | | script::KAITHI |
249 | | script::MEETEI_MAYEK |
250 | | script::TAI_THAM |
251 | | script::TAI_VIET |
252 | |
253 | // Unicode-6.0 additions |
254 | | script::BATAK |
255 | | script::BRAHMI |
256 | | script::MANDAIC |
257 | |
258 | // Unicode-6.1 additions |
259 | | script::CHAKMA |
260 | | script::MIAO |
261 | | script::SHARADA |
262 | | script::TAKRI |
263 | |
264 | // Unicode-7.0 additions |
265 | | script::DUPLOYAN |
266 | | script::GRANTHA |
267 | | script::KHOJKI |
268 | | script::KHUDAWADI |
269 | | script::MAHAJANI |
270 | | script::MANICHAEAN |
271 | | script::MODI |
272 | | script::PAHAWH_HMONG |
273 | | script::PSALTER_PAHLAVI |
274 | | script::SIDDHAM |
275 | | script::TIRHUTA |
276 | |
277 | // Unicode-8.0 additions |
278 | | script::AHOM |
279 | | script::MULTANI |
280 | |
281 | // Unicode-9.0 additions |
282 | | script::ADLAM |
283 | | script::BHAIKSUKI |
284 | | script::MARCHEN |
285 | | script::NEWA |
286 | |
287 | // Unicode-10.0 additions |
288 | | script::MASARAM_GONDI |
289 | | script::SOYOMBO |
290 | | script::ZANABAZAR_SQUARE |
291 | |
292 | // Unicode-11.0 additions |
293 | | script::DOGRA |
294 | | script::GUNJALA_GONDI |
295 | | script::HANIFI_ROHINGYA |
296 | | script::MAKASAR |
297 | | script::MEDEFAIDRIN |
298 | | script::OLD_SOGDIAN |
299 | | script::SOGDIAN |
300 | |
301 | // Unicode-12.0 additions |
302 | | script::ELYMAIC |
303 | | script::NANDINAGARI |
304 | | script::NYIAKENG_PUACHUE_HMONG |
305 | | script::WANCHO |
306 | |
307 | // Unicode-13.0 additions |
308 | | script::CHORASMIAN |
309 | | script::DIVES_AKURU |
310 | | script::KHITAN_SMALL_SCRIPT |
311 | | script::YEZIDI |
312 | |
313 | // Unicode-14.0 additions |
314 | | script::CYPRO_MINOAN |
315 | | script::OLD_UYGHUR |
316 | | script::TANGSA |
317 | | script::TOTO |
318 | | script::VITHKUQI => { |
319 | // If the designer designed the font for the 'DFLT' script, |
320 | // (or we ended up arbitrarily pick 'latn'), use the default shaper. |
321 | // Otherwise, use the specific shaper. |
322 | // Note that for some simple scripts, there may not be *any* |
323 | // GSUB/GPOS needed, so there may be no scripts found! |
324 | if chosen_gsub_script == Some(Tag::default_script()) || |
325 | chosen_gsub_script == Some(Tag::from_bytes(b"latn" )) { |
326 | &DEFAULT_SHAPER |
327 | } else { |
328 | &universal::UNIVERSAL_SHAPER |
329 | } |
330 | } |
331 | |
332 | _ => &DEFAULT_SHAPER |
333 | } |
334 | } |
335 | |
336 | // TODO: find a better name |
337 | #[inline ] |
338 | pub const fn rb_flag(x: u32) -> u32 { |
339 | 1 << x |
340 | } |
341 | |
342 | #[inline ] |
343 | pub fn rb_flag_unsafe(x: u32) -> u32 { |
344 | if x < 32 { |
345 | 1 << x |
346 | } else { |
347 | 0 |
348 | } |
349 | } |
350 | |
351 | #[inline ] |
352 | pub fn rb_flag_range(x: u32, y: u32) -> u32 { |
353 | (x < y) as u32 + rb_flag(y + 1) - rb_flag(x) |
354 | } |
355 | |
356 | #[inline ] |
357 | pub const fn rb_flag64(x: u32) -> u64 { |
358 | 1 << x as u64 |
359 | } |
360 | |
361 | #[inline ] |
362 | pub fn rb_flag64_unsafe(x: u32) -> u64 { |
363 | if x < 64 { |
364 | 1 << (x as u64) |
365 | } else { |
366 | 0 |
367 | } |
368 | } |
369 | |