1pub mod arabic;
2mod arabic_table;
3pub mod hangul;
4pub mod hebrew;
5pub mod indic;
6mod indic_machine;
7mod indic_table;
8pub mod khmer;
9mod khmer_machine;
10mod machine_cursor;
11pub mod myanmar;
12mod myanmar_machine;
13mod syllabic;
14pub mod thai;
15pub mod universal;
16mod universal_machine;
17mod universal_table;
18mod vowel_constraints;
19
20use alloc::boxed::Box;
21use core::any::Any;
22
23use crate::buffer::Buffer;
24use crate::common::TagExt;
25use crate::normalize::{ShapeNormalizationMode, ShapeNormalizeContext};
26use crate::plan::{ShapePlan, ShapePlanner};
27use crate::{script, Direction, Face, Script, Tag};
28
29pub const MAX_COMBINING_MARKS: usize = 32;
30
31pub const DEFAULT_SHAPER: ComplexShaper = ComplexShaper {
32 collect_features: None,
33 override_features: None,
34 create_data: None,
35 preprocess_text: None,
36 postprocess_glyphs: None,
37 normalization_mode: Some(ShapeNormalizationMode::Auto),
38 decompose: None,
39 compose: None,
40 setup_masks: None,
41 gpos_tag: None,
42 reorder_marks: None,
43 zero_width_marks: Some(ZeroWidthMarksMode::ByGdefLate),
44 fallback_position: true,
45};
46
47// Same as default but no mark advance zeroing / fallback positioning.
48// Dumbest shaper ever, basically.
49pub const DUMBER_SHAPER: ComplexShaper = ComplexShaper {
50 collect_features: None,
51 override_features: None,
52 create_data: None,
53 preprocess_text: None,
54 postprocess_glyphs: None,
55 normalization_mode: Some(ShapeNormalizationMode::Auto),
56 decompose: None,
57 compose: None,
58 setup_masks: None,
59 gpos_tag: None,
60 reorder_marks: None,
61 zero_width_marks: None,
62 fallback_position: false,
63};
64
65pub struct ComplexShaper {
66 /// Called during `shape_plan()`.
67 /// Shapers should use plan.map to add their features and callbacks.
68 pub collect_features: Option<fn(&mut ShapePlanner)>,
69
70 /// Called during `shape_plan()`.
71 /// Shapers should use plan.map to override features and add callbacks after
72 /// common features are added.
73 pub override_features: Option<fn(&mut ShapePlanner)>,
74
75 /// Called at the end of `shape_plan()`.
76 /// Whatever shapers return will be accessible through `plan.data()` later.
77 pub create_data: Option<fn(&ShapePlan) -> Box<dyn Any + Send + Sync>>,
78
79 /// Called during `shape()`.
80 /// Shapers can use to modify text before shaping starts.
81 pub preprocess_text: Option<fn(&ShapePlan, &Face, &mut Buffer)>,
82
83 /// Called during `shape()`.
84 /// Shapers can use to modify text before shaping starts.
85 pub postprocess_glyphs: Option<fn(&ShapePlan, &Face, &mut Buffer)>,
86
87 /// How to normalize.
88 pub normalization_mode: Option<ShapeNormalizationMode>,
89
90 /// Called during `shape()`'s normalization.
91 pub decompose: Option<fn(&ShapeNormalizeContext, char) -> Option<(char, char)>>,
92
93 /// Called during `shape()`'s normalization.
94 pub compose: Option<fn(&ShapeNormalizeContext, char, char) -> Option<char>>,
95
96 /// Called during `shape()`.
97 /// Shapers should use map to get feature masks and set on buffer.
98 /// Shapers may NOT modify characters.
99 pub setup_masks: Option<fn(&ShapePlan, &Face, &mut Buffer)>,
100
101 /// If not `None`, then must match found GPOS script tag for
102 /// GPOS to be applied. Otherwise, fallback positioning will be used.
103 pub gpos_tag: Option<Tag>,
104
105 /// Called during `shape()`.
106 /// Shapers can use to modify ordering of combining marks.
107 pub reorder_marks: Option<fn(&ShapePlan, &mut Buffer, usize, usize)>,
108
109 /// If and when to zero-width marks.
110 pub zero_width_marks: Option<ZeroWidthMarksMode>,
111
112 /// Whether to use fallback mark positioning.
113 pub fallback_position: bool,
114}
115
116#[derive(Clone, Copy, Debug, PartialEq, Eq)]
117pub enum ZeroWidthMarksMode {
118 ByGdefEarly,
119 ByGdefLate,
120}
121
122pub fn complex_categorize(
123 script: Script,
124 direction: Direction,
125 chosen_gsub_script: Option<Tag>,
126) -> &'static ComplexShaper {
127 match script {
128 // Unicode-1.1 additions
129 script::ARABIC
130
131 // Unicode-3.0 additions
132 | script::SYRIAC => {
133 // For Arabic script, use the Arabic shaper even if no OT script tag was found.
134 // This is because we do fallback shaping for Arabic script (and not others).
135 // But note that Arabic shaping is applicable only to horizontal layout; for
136 // vertical text, just use the generic shaper instead.
137 //
138 // TODO: Does this still apply? Arabic fallback shaping was removed.
139 if (chosen_gsub_script != Some(Tag::default_script()) || script == script::ARABIC)
140 && direction.is_horizontal()
141 {
142 &arabic::ARABIC_SHAPER
143 } else {
144 &DEFAULT_SHAPER
145 }
146 }
147
148 // Unicode-1.1 additions
149 script::THAI
150 | script::LAO => &thai::THAI_SHAPER,
151
152 // Unicode-1.1 additions
153 script::HANGUL => &hangul::HANGUL_SHAPER,
154
155 // Unicode-1.1 additions
156 script::HEBREW => &hebrew::HEBREW_SHAPER,
157
158 // Unicode-1.1 additions
159 script::BENGALI
160 | script::DEVANAGARI
161 | script::GUJARATI
162 | script::GURMUKHI
163 | script::KANNADA
164 | script::MALAYALAM
165 | script::ORIYA
166 | script::TAMIL
167 | script::TELUGU
168
169 // Unicode-3.0 additions
170 | script::SINHALA => {
171 // If the designer designed the font for the 'DFLT' script,
172 // (or we ended up arbitrarily pick 'latn'), use the default shaper.
173 // Otherwise, use the specific shaper.
174 //
175 // If it's indy3 tag, send to USE.
176 if chosen_gsub_script == Some(Tag::default_script()) ||
177 chosen_gsub_script == Some(Tag::from_bytes(b"latn")) {
178 &DEFAULT_SHAPER
179 } else if chosen_gsub_script.map_or(false, |tag| tag.to_bytes()[3] == b'3') {
180 &universal::UNIVERSAL_SHAPER
181 } else {
182 &indic::INDIC_SHAPER
183 }
184 }
185
186 script::KHMER => &khmer::KHMER_SHAPER,
187
188 script::MYANMAR => {
189 // If the designer designed the font for the 'DFLT' script,
190 // (or we ended up arbitrarily pick 'latn'), use the default shaper.
191 // Otherwise, use the specific shaper.
192 //
193 // If designer designed for 'mymr' tag, also send to default
194 // shaper. That's tag used from before Myanmar shaping spec
195 // was developed. The shaping spec uses 'mym2' tag.
196 if chosen_gsub_script == Some(Tag::default_script()) ||
197 chosen_gsub_script == Some(Tag::from_bytes(b"latn")) ||
198 chosen_gsub_script == Some(Tag::from_bytes(b"mymr"))
199 {
200 &DEFAULT_SHAPER
201 } else {
202 &myanmar::MYANMAR_SHAPER
203 }
204 }
205
206 // https://github.com/harfbuzz/harfbuzz/issues/1162
207 script::MYANMAR_ZAWGYI => &myanmar::MYANMAR_ZAWGYI_SHAPER,
208
209 // Unicode-2.0 additions
210 script::TIBETAN
211
212 // Unicode-3.0 additions
213 | script::MONGOLIAN
214 // | script::SINHALA
215
216 // Unicode-3.2 additions
217 | script::BUHID
218 | script::HANUNOO
219 | script::TAGALOG
220 | script::TAGBANWA
221
222 // Unicode-4.0 additions
223 | script::LIMBU
224 | script::TAI_LE
225
226 // Unicode-4.1 additions
227 | script::BUGINESE
228 | script::KHAROSHTHI
229 | script::SYLOTI_NAGRI
230 | script::TIFINAGH
231
232 // Unicode-5.0 additions
233 | script::BALINESE
234 | script::NKO
235 | script::PHAGS_PA
236
237 // Unicode-5.1 additions
238 | script::CHAM
239 | script::KAYAH_LI
240 | script::LEPCHA
241 | script::REJANG
242 | script::SAURASHTRA
243 | script::SUNDANESE
244
245 // Unicode-5.2 additions
246 | script::EGYPTIAN_HIEROGLYPHS
247 | script::JAVANESE
248 | script::KAITHI
249 | script::MEETEI_MAYEK
250 | script::TAI_THAM
251 | script::TAI_VIET
252
253 // Unicode-6.0 additions
254 | script::BATAK
255 | script::BRAHMI
256 | script::MANDAIC
257
258 // Unicode-6.1 additions
259 | script::CHAKMA
260 | script::MIAO
261 | script::SHARADA
262 | script::TAKRI
263
264 // Unicode-7.0 additions
265 | script::DUPLOYAN
266 | script::GRANTHA
267 | script::KHOJKI
268 | script::KHUDAWADI
269 | script::MAHAJANI
270 | script::MANICHAEAN
271 | script::MODI
272 | script::PAHAWH_HMONG
273 | script::PSALTER_PAHLAVI
274 | script::SIDDHAM
275 | script::TIRHUTA
276
277 // Unicode-8.0 additions
278 | script::AHOM
279 | script::MULTANI
280
281 // Unicode-9.0 additions
282 | script::ADLAM
283 | script::BHAIKSUKI
284 | script::MARCHEN
285 | script::NEWA
286
287 // Unicode-10.0 additions
288 | script::MASARAM_GONDI
289 | script::SOYOMBO
290 | script::ZANABAZAR_SQUARE
291
292 // Unicode-11.0 additions
293 | script::DOGRA
294 | script::GUNJALA_GONDI
295 | script::HANIFI_ROHINGYA
296 | script::MAKASAR
297 | script::MEDEFAIDRIN
298 | script::OLD_SOGDIAN
299 | script::SOGDIAN
300
301 // Unicode-12.0 additions
302 | script::ELYMAIC
303 | script::NANDINAGARI
304 | script::NYIAKENG_PUACHUE_HMONG
305 | script::WANCHO
306
307 // Unicode-13.0 additions
308 | script::CHORASMIAN
309 | script::DIVES_AKURU
310 | script::KHITAN_SMALL_SCRIPT
311 | script::YEZIDI
312
313 // Unicode-14.0 additions
314 | script::CYPRO_MINOAN
315 | script::OLD_UYGHUR
316 | script::TANGSA
317 | script::TOTO
318 | script::VITHKUQI => {
319 // If the designer designed the font for the 'DFLT' script,
320 // (or we ended up arbitrarily pick 'latn'), use the default shaper.
321 // Otherwise, use the specific shaper.
322 // Note that for some simple scripts, there may not be *any*
323 // GSUB/GPOS needed, so there may be no scripts found!
324 if chosen_gsub_script == Some(Tag::default_script()) ||
325 chosen_gsub_script == Some(Tag::from_bytes(b"latn")) {
326 &DEFAULT_SHAPER
327 } else {
328 &universal::UNIVERSAL_SHAPER
329 }
330 }
331
332 _ => &DEFAULT_SHAPER
333 }
334}
335
336// TODO: find a better name
337#[inline]
338pub const fn rb_flag(x: u32) -> u32 {
339 1 << x
340}
341
342#[inline]
343pub fn rb_flag_unsafe(x: u32) -> u32 {
344 if x < 32 {
345 1 << x
346 } else {
347 0
348 }
349}
350
351#[inline]
352pub fn rb_flag_range(x: u32, y: u32) -> u32 {
353 (x < y) as u32 + rb_flag(y + 1) - rb_flag(x)
354}
355
356#[inline]
357pub const fn rb_flag64(x: u32) -> u64 {
358 1 << x as u64
359}
360
361#[inline]
362pub fn rb_flag64_unsafe(x: u32) -> u64 {
363 if x < 64 {
364 1 << (x as u64)
365 } else {
366 0
367 }
368}
369