1 | use alloc::{string::String, vec::Vec}; |
2 | use core::convert::TryFrom; |
3 | |
4 | use ttf_parser::GlyphId; |
5 | |
6 | use crate::unicode::{CharExt, GeneralCategory, GeneralCategoryExt, Space}; |
7 | use crate::{script, Direction, Face, Language, Mask, Script}; |
8 | |
9 | const CONTEXT_LENGTH: usize = 5; |
10 | |
11 | pub mod glyph_flag { |
12 | /// Indicates that if input text is broken at the |
13 | /// beginning of the cluster this glyph is part of, |
14 | /// then both sides need to be re-shaped, as the |
15 | /// result might be different. On the flip side, |
16 | /// it means that when this flag is not present, |
17 | /// then it's safe to break the glyph-run at the |
18 | /// beginning of this cluster, and the two sides |
19 | /// represent the exact same result one would get |
20 | /// if breaking input text at the beginning of |
21 | /// this cluster and shaping the two sides |
22 | /// separately. This can be used to optimize |
23 | /// paragraph layout, by avoiding re-shaping |
24 | /// of each line after line-breaking, or limiting |
25 | /// the reshaping to a small piece around the |
26 | /// breaking point only. |
27 | pub const UNSAFE_TO_BREAK: u32 = 0x00000001; |
28 | |
29 | /// All the currently defined flags. |
30 | pub const DEFINED: u32 = 0x00000001; // OR of all defined flags |
31 | } |
32 | |
33 | /// Holds the positions of the glyph in both horizontal and vertical directions. |
34 | /// |
35 | /// All positions are relative to the current point. |
36 | #[repr (C)] |
37 | #[derive (Clone, Copy, Default, Debug)] |
38 | pub struct GlyphPosition { |
39 | /// How much the line advances after drawing this glyph when setting text in |
40 | /// horizontal direction. |
41 | pub x_advance: i32, |
42 | /// How much the line advances after drawing this glyph when setting text in |
43 | /// vertical direction. |
44 | pub y_advance: i32, |
45 | /// How much the glyph moves on the X-axis before drawing it, this should |
46 | /// not affect how much the line advances. |
47 | pub x_offset: i32, |
48 | /// How much the glyph moves on the Y-axis before drawing it, this should |
49 | /// not affect how much the line advances. |
50 | pub y_offset: i32, |
51 | var: u32, |
52 | } |
53 | |
54 | unsafe impl bytemuck::Zeroable for GlyphPosition {} |
55 | unsafe impl bytemuck::Pod for GlyphPosition {} |
56 | |
57 | impl GlyphPosition { |
58 | #[inline ] |
59 | pub(crate) fn attach_chain(&self) -> i16 { |
60 | // glyph to which this attaches to, relative to current glyphs; |
61 | // negative for going back, positive for forward. |
62 | let v: &[i16; 2] = bytemuck::cast_ref(&self.var); |
63 | v[0] |
64 | } |
65 | |
66 | #[inline ] |
67 | pub(crate) fn set_attach_chain(&mut self, n: i16) { |
68 | let v: &mut [i16; 2] = bytemuck::cast_mut(&mut self.var); |
69 | v[0] = n; |
70 | } |
71 | |
72 | #[inline ] |
73 | pub(crate) fn attach_type(&self) -> u8 { |
74 | // attachment type |
75 | // Note! if attach_chain() is zero, the value of attach_type() is irrelevant. |
76 | let v: &[u8; 4] = bytemuck::cast_ref(&self.var); |
77 | v[2] |
78 | } |
79 | |
80 | #[inline ] |
81 | pub(crate) fn set_attach_type(&mut self, n: u8) { |
82 | let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var); |
83 | v[2] = n; |
84 | } |
85 | } |
86 | |
87 | /// A glyph info. |
88 | #[repr (C)] |
89 | #[derive (Clone, Copy, Default, Debug)] |
90 | pub struct GlyphInfo { |
91 | // NOTE: Stores a Unicode codepoint before shaping and a glyph ID after. |
92 | // Just like harfbuzz, we are using the same variable for two purposes. |
93 | // Occupies u32 as a codepoint and u16 as a glyph id. |
94 | /// A selected glyph. |
95 | /// |
96 | /// Guarantee to be <= `u16::MAX`. |
97 | pub glyph_id: u32, |
98 | pub(crate) mask: Mask, |
99 | /// An index to the start of the grapheme cluster in the original string. |
100 | /// |
101 | /// [Read more on clusters](https://harfbuzz.github.io/clusters.html). |
102 | pub cluster: u32, |
103 | pub(crate) var1: u32, |
104 | pub(crate) var2: u32, |
105 | } |
106 | |
107 | unsafe impl bytemuck::Zeroable for GlyphInfo {} |
108 | unsafe impl bytemuck::Pod for GlyphInfo {} |
109 | |
110 | const IS_LIG_BASE: u8 = 0x10; |
111 | |
112 | impl GlyphInfo { |
113 | /// Indicates that if input text is broken at the beginning of the cluster this glyph |
114 | /// is part of, then both sides need to be re-shaped, as the result might be different. |
115 | /// |
116 | /// On the flip side, it means that when this flag is not present, |
117 | /// then it's safe to break the glyph-run at the beginning of this cluster, |
118 | /// and the two sides represent the exact same result one would get if breaking input text |
119 | /// at the beginning of this cluster and shaping the two sides separately. |
120 | /// This can be used to optimize paragraph layout, by avoiding re-shaping of each line |
121 | /// after line-breaking, or limiting the reshaping to a small piece around |
122 | /// the breaking point only. |
123 | pub fn unsafe_to_break(&self) -> bool { |
124 | self.mask & glyph_flag::UNSAFE_TO_BREAK != 0 |
125 | } |
126 | |
127 | #[inline ] |
128 | pub(crate) fn as_char(&self) -> char { |
129 | char::try_from(self.glyph_id).unwrap() |
130 | } |
131 | |
132 | #[inline ] |
133 | pub(crate) fn as_glyph(&self) -> GlyphId { |
134 | debug_assert!(self.glyph_id <= u32::from(u16::MAX)); |
135 | GlyphId(self.glyph_id as u16) |
136 | } |
137 | |
138 | // Var allocation: unicode_props |
139 | // Used during the entire shaping process to store unicode properties |
140 | |
141 | pub(crate) fn complex_var_u8_category(&self) -> u8 { |
142 | let v: &[u8; 4] = bytemuck::cast_ref(&self.var2); |
143 | v[2] |
144 | } |
145 | |
146 | pub(crate) fn set_complex_var_u8_category(&mut self, c: u8) { |
147 | let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var2); |
148 | v[2] = c; |
149 | } |
150 | |
151 | pub(crate) fn complex_var_u8_auxiliary(&self) -> u8 { |
152 | let v: &[u8; 4] = bytemuck::cast_ref(&self.var2); |
153 | v[3] |
154 | } |
155 | |
156 | pub(crate) fn set_complex_var_u8_auxiliary(&mut self, c: u8) { |
157 | let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var2); |
158 | v[3] = c; |
159 | } |
160 | |
161 | #[inline ] |
162 | fn unicode_props(&self) -> u16 { |
163 | let v: &[u16; 2] = bytemuck::cast_ref(&self.var2); |
164 | v[0] |
165 | } |
166 | |
167 | #[inline ] |
168 | fn set_unicode_props(&mut self, n: u16) { |
169 | let v: &mut [u16; 2] = bytemuck::cast_mut(&mut self.var2); |
170 | v[0] = n; |
171 | } |
172 | |
173 | pub(crate) fn init_unicode_props(&mut self, scratch_flags: &mut BufferScratchFlags) { |
174 | let u = self.as_char(); |
175 | let gc = u.general_category(); |
176 | let mut props = gc.to_rb() as u16; |
177 | |
178 | if u as u32 >= 0x80 { |
179 | *scratch_flags |= BufferScratchFlags::HAS_NON_ASCII; |
180 | |
181 | if u.is_default_ignorable() { |
182 | props |= UnicodeProps::IGNORABLE.bits(); |
183 | *scratch_flags |= BufferScratchFlags::HAS_DEFAULT_IGNORABLES; |
184 | |
185 | match u as u32 { |
186 | 0x200C => props |= UnicodeProps::CF_ZWNJ.bits(), |
187 | 0x200D => props |= UnicodeProps::CF_ZWJ.bits(), |
188 | |
189 | // Mongolian Free Variation Selectors need to be remembered |
190 | // because although we need to hide them like default-ignorables, |
191 | // they need to non-ignorable during shaping. This is similar to |
192 | // what we do for joiners in Indic-like shapers, but since the |
193 | // FVSes are GC=Mn, we have use a separate bit to remember them. |
194 | // Fixes: |
195 | // https://github.com/harfbuzz/harfbuzz/issues/234 |
196 | 0x180B..=0x180D => props |= UnicodeProps::HIDDEN.bits(), |
197 | |
198 | // TAG characters need similar treatment. Fixes: |
199 | // https://github.com/harfbuzz/harfbuzz/issues/463 |
200 | 0xE0020..=0xE007F => props |= UnicodeProps::HIDDEN.bits(), |
201 | |
202 | // COMBINING GRAPHEME JOINER should not be skipped; at least some times. |
203 | // https://github.com/harfbuzz/harfbuzz/issues/554 |
204 | 0x034F => { |
205 | props |= UnicodeProps::HIDDEN.bits(); |
206 | *scratch_flags |= BufferScratchFlags::HAS_CGJ; |
207 | } |
208 | |
209 | _ => {} |
210 | } |
211 | } |
212 | |
213 | if gc.is_mark() { |
214 | props |= UnicodeProps::CONTINUATION.bits(); |
215 | props |= (u.modified_combining_class() as u16) << 8; |
216 | } |
217 | } |
218 | |
219 | self.set_unicode_props(props); |
220 | } |
221 | |
222 | #[inline ] |
223 | pub(crate) fn general_category(&self) -> GeneralCategory { |
224 | let n = self.unicode_props() & UnicodeProps::GENERAL_CATEGORY.bits(); |
225 | GeneralCategory::from_rb(n as u32) |
226 | } |
227 | |
228 | #[inline ] |
229 | pub(crate) fn set_general_category(&mut self, gc: GeneralCategory) { |
230 | let gc = gc.to_rb(); |
231 | let n = |
232 | (gc as u16) | (self.unicode_props() & (0xFF & !UnicodeProps::GENERAL_CATEGORY.bits())); |
233 | self.set_unicode_props(n); |
234 | } |
235 | |
236 | #[inline ] |
237 | pub(crate) fn space_fallback(&self) -> Option<Space> { |
238 | if self.general_category() == GeneralCategory::SpaceSeparator { |
239 | let n = (self.unicode_props() >> 8) as u8; |
240 | Some(n) |
241 | } else { |
242 | None |
243 | } |
244 | } |
245 | |
246 | #[inline ] |
247 | pub(crate) fn set_space_fallback(&mut self, space: Space) { |
248 | if self.general_category() == GeneralCategory::SpaceSeparator { |
249 | let n = ((space as u16) << 8) | (self.unicode_props() & 0xFF); |
250 | self.set_unicode_props(n); |
251 | } |
252 | } |
253 | |
254 | #[inline ] |
255 | pub(crate) fn is_unicode_mark(&self) -> bool { |
256 | self.general_category().is_mark() |
257 | } |
258 | |
259 | #[inline ] |
260 | pub(crate) fn is_zwnj(&self) -> bool { |
261 | self.general_category() == GeneralCategory::Format |
262 | && (self.unicode_props() & UnicodeProps::CF_ZWNJ.bits() != 0) |
263 | } |
264 | |
265 | #[inline ] |
266 | pub(crate) fn is_zwj(&self) -> bool { |
267 | self.general_category() == GeneralCategory::Format |
268 | && (self.unicode_props() & UnicodeProps::CF_ZWJ.bits() != 0) |
269 | } |
270 | |
271 | #[inline ] |
272 | pub(crate) fn modified_combining_class(&self) -> u8 { |
273 | if self.is_unicode_mark() { |
274 | (self.unicode_props() >> 8) as u8 |
275 | } else { |
276 | 0 |
277 | } |
278 | } |
279 | |
280 | #[inline ] |
281 | pub(crate) fn set_modified_combining_class(&mut self, mcc: u8) { |
282 | if self.is_unicode_mark() { |
283 | let n = ((mcc as u16) << 8) | (self.unicode_props() & 0xFF); |
284 | self.set_unicode_props(n); |
285 | } |
286 | } |
287 | |
288 | #[inline ] |
289 | pub(crate) fn is_hidden(&self) -> bool { |
290 | self.unicode_props() & UnicodeProps::HIDDEN.bits() != 0 |
291 | } |
292 | |
293 | #[inline ] |
294 | pub(crate) fn unhide(&mut self) { |
295 | let mut n = self.unicode_props(); |
296 | n &= !UnicodeProps::HIDDEN.bits(); |
297 | self.set_unicode_props(n); |
298 | } |
299 | |
300 | #[inline ] |
301 | pub(crate) fn set_continuation(&mut self) { |
302 | let mut n = self.unicode_props(); |
303 | n |= UnicodeProps::CONTINUATION.bits(); |
304 | self.set_unicode_props(n); |
305 | } |
306 | |
307 | #[inline ] |
308 | pub(crate) fn reset_continuation(&mut self) { |
309 | let mut n = self.unicode_props(); |
310 | n &= !UnicodeProps::CONTINUATION.bits(); |
311 | self.set_unicode_props(n); |
312 | } |
313 | |
314 | #[inline ] |
315 | pub(crate) fn is_continuation(&self) -> bool { |
316 | self.unicode_props() & UnicodeProps::CONTINUATION.bits() != 0 |
317 | } |
318 | |
319 | #[inline ] |
320 | pub(crate) fn is_default_ignorable(&self) -> bool { |
321 | let n = self.unicode_props() & UnicodeProps::IGNORABLE.bits(); |
322 | n != 0 && !self.is_substituted() |
323 | } |
324 | |
325 | // Var allocation: lig_props (aka lig_id / lig_comp) |
326 | // Used during the GSUB/GPOS processing to track ligatures |
327 | // |
328 | // When a ligature is formed: |
329 | // |
330 | // - The ligature glyph and any marks in between all the same newly allocated |
331 | // lig_id, |
332 | // - The ligature glyph will get lig_num_comps set to the number of components |
333 | // - The marks get lig_comp > 0, reflecting which component of the ligature |
334 | // they were applied to. |
335 | // - This is used in GPOS to attach marks to the right component of a ligature |
336 | // in MarkLigPos, |
337 | // - Note that when marks are ligated together, much of the above is skipped |
338 | // and the current lig_id reused. |
339 | // |
340 | // When a multiple-substitution is done: |
341 | // |
342 | // - All resulting glyphs will have lig_id = 0, |
343 | // - The resulting glyphs will have lig_comp = 0, 1, 2, ... respectively. |
344 | // - This is used in GPOS to attach marks to the first component of a |
345 | // multiple substitution in MarkBasePos. |
346 | // |
347 | // The numbers are also used in GPOS to do mark-to-mark positioning only |
348 | // to marks that belong to the same component of the same ligature. |
349 | |
350 | #[inline ] |
351 | pub(crate) fn lig_props(&self) -> u8 { |
352 | let v: &[u8; 4] = bytemuck::cast_ref(&self.var1); |
353 | v[2] |
354 | } |
355 | |
356 | #[inline ] |
357 | pub(crate) fn set_lig_props(&mut self, n: u8) { |
358 | let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var1); |
359 | v[2] = n; |
360 | } |
361 | |
362 | pub(crate) fn set_lig_props_for_ligature(&mut self, lig_id: u8, lig_num_comps: u8) { |
363 | self.set_lig_props((lig_id << 5) | IS_LIG_BASE | (lig_num_comps & 0x0F)); |
364 | } |
365 | |
366 | pub(crate) fn set_lig_props_for_mark(&mut self, lig_id: u8, lig_comp: u8) { |
367 | self.set_lig_props((lig_id << 5) | (lig_comp & 0x0F)); |
368 | } |
369 | |
370 | pub(crate) fn set_lig_props_for_component(&mut self, lig_comp: u8) { |
371 | self.set_lig_props_for_mark(0, lig_comp) |
372 | } |
373 | |
374 | #[inline ] |
375 | pub(crate) fn lig_id(&self) -> u8 { |
376 | self.lig_props() >> 5 |
377 | } |
378 | |
379 | #[inline ] |
380 | pub(crate) fn is_ligated_internal(&self) -> bool { |
381 | self.lig_props() & IS_LIG_BASE != 0 |
382 | } |
383 | |
384 | #[inline ] |
385 | pub(crate) fn lig_comp(&self) -> u8 { |
386 | if self.is_ligated_internal() { |
387 | 0 |
388 | } else { |
389 | self.lig_props() & 0x0F |
390 | } |
391 | } |
392 | |
393 | #[inline ] |
394 | pub(crate) fn lig_num_comps(&self) -> u8 { |
395 | if self.glyph_props() & GlyphPropsFlags::LIGATURE.bits() != 0 && self.is_ligated_internal() |
396 | { |
397 | self.lig_props() & 0x0F |
398 | } else { |
399 | 1 |
400 | } |
401 | } |
402 | |
403 | // Var allocation: glyph_props |
404 | // Used during the GSUB/GPOS processing to store GDEF glyph properties |
405 | |
406 | #[inline ] |
407 | pub(crate) fn glyph_props(&self) -> u16 { |
408 | let v: &[u16; 2] = bytemuck::cast_ref(&self.var1); |
409 | v[0] |
410 | } |
411 | |
412 | #[inline ] |
413 | pub(crate) fn set_glyph_props(&mut self, n: u16) { |
414 | let v: &mut [u16; 2] = bytemuck::cast_mut(&mut self.var1); |
415 | v[0] = n; |
416 | } |
417 | |
418 | #[inline ] |
419 | pub(crate) fn is_base_glyph(&self) -> bool { |
420 | self.glyph_props() & GlyphPropsFlags::BASE_GLYPH.bits() != 0 |
421 | } |
422 | |
423 | #[inline ] |
424 | pub(crate) fn is_ligature(&self) -> bool { |
425 | self.glyph_props() & GlyphPropsFlags::LIGATURE.bits() != 0 |
426 | } |
427 | |
428 | #[inline ] |
429 | pub(crate) fn is_mark(&self) -> bool { |
430 | self.glyph_props() & GlyphPropsFlags::MARK.bits() != 0 |
431 | } |
432 | |
433 | #[inline ] |
434 | pub(crate) fn is_substituted(&self) -> bool { |
435 | self.glyph_props() & GlyphPropsFlags::SUBSTITUTED.bits() != 0 |
436 | } |
437 | |
438 | #[inline ] |
439 | pub(crate) fn is_ligated(&self) -> bool { |
440 | self.glyph_props() & GlyphPropsFlags::LIGATED.bits() != 0 |
441 | } |
442 | |
443 | #[inline ] |
444 | pub(crate) fn is_multiplied(&self) -> bool { |
445 | self.glyph_props() & GlyphPropsFlags::MULTIPLIED.bits() != 0 |
446 | } |
447 | |
448 | #[inline ] |
449 | pub(crate) fn is_ligated_and_didnt_multiply(&self) -> bool { |
450 | self.is_ligated() && !self.is_multiplied() |
451 | } |
452 | |
453 | #[inline ] |
454 | pub(crate) fn clear_ligated_and_multiplied(&mut self) { |
455 | let mut n = self.glyph_props(); |
456 | n &= !(GlyphPropsFlags::LIGATED | GlyphPropsFlags::MULTIPLIED).bits(); |
457 | self.set_glyph_props(n); |
458 | } |
459 | |
460 | #[inline ] |
461 | pub(crate) fn clear_substituted(&mut self) { |
462 | let mut n = self.glyph_props(); |
463 | n &= !GlyphPropsFlags::SUBSTITUTED.bits(); |
464 | self.set_glyph_props(n); |
465 | } |
466 | |
467 | // Var allocation: syllable |
468 | // Used during the GSUB/GPOS processing to store shaping boundaries |
469 | |
470 | #[inline ] |
471 | pub(crate) fn syllable(&self) -> u8 { |
472 | let v: &[u8; 4] = bytemuck::cast_ref(&self.var1); |
473 | v[3] |
474 | } |
475 | |
476 | #[inline ] |
477 | pub(crate) fn set_syllable(&mut self, n: u8) { |
478 | let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var1); |
479 | v[3] = n; |
480 | } |
481 | |
482 | // Var allocation: glyph_index |
483 | // Used during the normalization process to store glyph indices |
484 | |
485 | #[inline ] |
486 | pub(crate) fn glyph_index(&mut self) -> u32 { |
487 | self.var1 |
488 | } |
489 | |
490 | #[inline ] |
491 | pub(crate) fn set_glyph_index(&mut self, n: u32) { |
492 | self.var1 = n; |
493 | } |
494 | } |
495 | |
496 | /// A cluster level. |
497 | #[allow (missing_docs)] |
498 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
499 | pub enum BufferClusterLevel { |
500 | MonotoneGraphemes, |
501 | MonotoneCharacters, |
502 | Characters, |
503 | } |
504 | |
505 | impl Default for BufferClusterLevel { |
506 | #[inline ] |
507 | fn default() -> Self { |
508 | BufferClusterLevel::MonotoneGraphemes |
509 | } |
510 | } |
511 | |
512 | pub struct Buffer { |
513 | // Information about how the text in the buffer should be treated. |
514 | pub flags: BufferFlags, |
515 | pub cluster_level: BufferClusterLevel, |
516 | pub invisible: Option<GlyphId>, |
517 | pub scratch_flags: BufferScratchFlags, |
518 | // Maximum allowed len. |
519 | pub max_len: usize, |
520 | /// Maximum allowed operations. |
521 | pub max_ops: i32, |
522 | |
523 | // Buffer contents. |
524 | pub direction: Direction, |
525 | pub script: Option<Script>, |
526 | pub language: Option<Language>, |
527 | |
528 | /// Allocations successful. |
529 | pub successful: bool, |
530 | /// Whether we have an output buffer going on. |
531 | pub(crate) have_output: bool, |
532 | pub have_separate_output: bool, |
533 | /// Whether we have positions |
534 | pub have_positions: bool, |
535 | |
536 | pub idx: usize, |
537 | pub len: usize, |
538 | pub out_len: usize, |
539 | |
540 | pub info: Vec<GlyphInfo>, |
541 | pub pos: Vec<GlyphPosition>, |
542 | |
543 | serial: u32, |
544 | |
545 | // Text before / after the main buffer contents. |
546 | // Always in Unicode, and ordered outward. |
547 | // Index 0 is for "pre-context", 1 for "post-context". |
548 | pub context: [[char; CONTEXT_LENGTH]; 2], |
549 | pub context_len: [usize; 2], |
550 | } |
551 | |
552 | impl Buffer { |
553 | pub const MAX_LEN_FACTOR: usize = 64; |
554 | pub const MAX_LEN_MIN: usize = 16384; |
555 | // Shaping more than a billion chars? Let us know! |
556 | pub const MAX_LEN_DEFAULT: usize = 0x3FFFFFFF; |
557 | |
558 | pub const MAX_OPS_FACTOR: i32 = 1024; |
559 | pub const MAX_OPS_MIN: i32 = 16384; |
560 | // Shaping more than a billion operations? Let us know! |
561 | pub const MAX_OPS_DEFAULT: i32 = 0x1FFFFFFF; |
562 | |
563 | /// Creates a new `Buffer`. |
564 | pub fn new() -> Self { |
565 | Buffer { |
566 | flags: BufferFlags::empty(), |
567 | cluster_level: BufferClusterLevel::default(), |
568 | invisible: None, |
569 | scratch_flags: BufferScratchFlags::default(), |
570 | max_len: Self::MAX_LEN_DEFAULT, |
571 | max_ops: Self::MAX_OPS_DEFAULT, |
572 | direction: Direction::Invalid, |
573 | script: None, |
574 | language: None, |
575 | successful: true, |
576 | have_output: false, |
577 | have_positions: false, |
578 | idx: 0, |
579 | len: 0, |
580 | out_len: 0, |
581 | info: Vec::new(), |
582 | pos: Vec::new(), |
583 | have_separate_output: false, |
584 | serial: 0, |
585 | context: [ |
586 | [' \0' , ' \0' , ' \0' , ' \0' , ' \0' ], |
587 | [' \0' , ' \0' , ' \0' , ' \0' , ' \0' ], |
588 | ], |
589 | context_len: [0, 0], |
590 | } |
591 | } |
592 | |
593 | #[inline ] |
594 | pub fn info_slice(&self) -> &[GlyphInfo] { |
595 | &self.info[..self.len] |
596 | } |
597 | |
598 | #[inline ] |
599 | pub fn info_slice_mut(&mut self) -> &mut [GlyphInfo] { |
600 | &mut self.info[..self.len] |
601 | } |
602 | |
603 | #[inline ] |
604 | pub fn out_info(&self) -> &[GlyphInfo] { |
605 | if self.have_separate_output { |
606 | bytemuck::cast_slice(self.pos.as_slice()) |
607 | } else { |
608 | &self.info |
609 | } |
610 | } |
611 | |
612 | #[inline ] |
613 | pub fn out_info_mut(&mut self) -> &mut [GlyphInfo] { |
614 | if self.have_separate_output { |
615 | bytemuck::cast_slice_mut(self.pos.as_mut_slice()) |
616 | } else { |
617 | &mut self.info |
618 | } |
619 | } |
620 | |
621 | #[inline ] |
622 | fn set_out_info(&mut self, i: usize, info: GlyphInfo) { |
623 | self.out_info_mut()[i] = info; |
624 | } |
625 | |
626 | #[inline ] |
627 | pub fn cur(&self, i: usize) -> &GlyphInfo { |
628 | &self.info[self.idx + i] |
629 | } |
630 | |
631 | #[inline ] |
632 | pub fn cur_mut(&mut self, i: usize) -> &mut GlyphInfo { |
633 | let idx = self.idx + i; |
634 | &mut self.info[idx] |
635 | } |
636 | |
637 | #[inline ] |
638 | pub fn cur_pos_mut(&mut self) -> &mut GlyphPosition { |
639 | let i = self.idx; |
640 | &mut self.pos[i] |
641 | } |
642 | |
643 | #[inline ] |
644 | pub fn prev(&self) -> &GlyphInfo { |
645 | let idx = self.out_len.saturating_sub(1); |
646 | &self.out_info()[idx] |
647 | } |
648 | |
649 | #[inline ] |
650 | pub fn prev_mut(&mut self) -> &mut GlyphInfo { |
651 | let idx = self.out_len.saturating_sub(1); |
652 | &mut self.out_info_mut()[idx] |
653 | } |
654 | |
655 | fn clear(&mut self) { |
656 | self.direction = Direction::Invalid; |
657 | self.script = None; |
658 | self.language = None; |
659 | self.scratch_flags = BufferScratchFlags::default(); |
660 | |
661 | self.successful = true; |
662 | self.have_output = false; |
663 | self.have_positions = false; |
664 | |
665 | self.idx = 0; |
666 | self.info.clear(); |
667 | self.pos.clear(); |
668 | self.len = 0; |
669 | self.out_len = 0; |
670 | self.have_separate_output = false; |
671 | |
672 | self.serial = 0; |
673 | |
674 | self.context = [ |
675 | [' \0' , ' \0' , ' \0' , ' \0' , ' \0' ], |
676 | [' \0' , ' \0' , ' \0' , ' \0' , ' \0' ], |
677 | ]; |
678 | self.context_len = [0, 0]; |
679 | } |
680 | |
681 | #[inline ] |
682 | pub fn backtrack_len(&self) -> usize { |
683 | if self.have_output { |
684 | self.out_len |
685 | } else { |
686 | self.idx |
687 | } |
688 | } |
689 | |
690 | #[inline ] |
691 | pub fn lookahead_len(&self) -> usize { |
692 | self.len - self.idx |
693 | } |
694 | |
695 | #[inline ] |
696 | fn next_serial(&mut self) -> u32 { |
697 | self.serial += 1; |
698 | self.serial |
699 | } |
700 | |
701 | fn add(&mut self, codepoint: u32, cluster: u32) { |
702 | self.ensure(self.len + 1); |
703 | |
704 | let i = self.len; |
705 | self.info[i] = GlyphInfo { |
706 | glyph_id: codepoint, |
707 | mask: 0, |
708 | cluster, |
709 | var1: 0, |
710 | var2: 0, |
711 | }; |
712 | |
713 | self.len += 1; |
714 | } |
715 | |
716 | #[inline ] |
717 | pub fn reverse(&mut self) { |
718 | if self.is_empty() { |
719 | return; |
720 | } |
721 | |
722 | self.reverse_range(0, self.len); |
723 | } |
724 | |
725 | pub fn reverse_range(&mut self, start: usize, end: usize) { |
726 | if end - start < 2 { |
727 | return; |
728 | } |
729 | |
730 | self.info[start..end].reverse(); |
731 | if self.have_positions { |
732 | self.pos[start..end].reverse(); |
733 | } |
734 | } |
735 | |
736 | #[inline ] |
737 | fn reset_clusters(&mut self) { |
738 | for (i, info) in self.info.iter_mut().enumerate() { |
739 | info.cluster = i as u32; |
740 | } |
741 | } |
742 | |
743 | pub fn guess_segment_properties(&mut self) { |
744 | if self.script.is_none() { |
745 | for info in &self.info { |
746 | match info.as_char().script() { |
747 | crate::script::COMMON | crate::script::INHERITED | crate::script::UNKNOWN => {} |
748 | s => { |
749 | self.script = Some(s); |
750 | break; |
751 | } |
752 | } |
753 | } |
754 | } |
755 | |
756 | if self.direction == Direction::Invalid { |
757 | if let Some(script) = self.script { |
758 | self.direction = Direction::from_script(script).unwrap_or_default(); |
759 | } |
760 | |
761 | if self.direction == Direction::Invalid { |
762 | self.direction = Direction::LeftToRight; |
763 | } |
764 | } |
765 | |
766 | // TODO: language must be set |
767 | } |
768 | |
769 | pub fn swap_buffers(&mut self) { |
770 | assert!(self.have_output); |
771 | |
772 | assert!(self.idx <= self.len); |
773 | if !self.successful { |
774 | self.have_output = false; |
775 | self.out_len = 0; |
776 | self.idx = 0; |
777 | return; |
778 | } |
779 | |
780 | self.next_glyphs(self.len - self.idx); |
781 | |
782 | if self.have_separate_output { |
783 | // Swap info and pos buffers. |
784 | let info: Vec<GlyphPosition> = bytemuck::cast_vec(core::mem::take(&mut self.info)); |
785 | let pos: Vec<GlyphInfo> = bytemuck::cast_vec(core::mem::take(&mut self.pos)); |
786 | self.pos = info; |
787 | self.info = pos; |
788 | } |
789 | |
790 | self.len = self.out_len; |
791 | |
792 | self.have_output = false; |
793 | self.out_len = 0; |
794 | self.idx = 0; |
795 | } |
796 | |
797 | pub fn clear_output(&mut self) { |
798 | self.have_output = true; |
799 | self.have_positions = false; |
800 | |
801 | self.idx = 0; |
802 | self.out_len = 0; |
803 | self.have_separate_output = false; |
804 | } |
805 | |
806 | pub fn clear_positions(&mut self) { |
807 | self.have_output = false; |
808 | self.have_positions = true; |
809 | |
810 | self.out_len = 0; |
811 | self.have_separate_output = false; |
812 | |
813 | for pos in &mut self.pos { |
814 | *pos = GlyphPosition::default(); |
815 | } |
816 | } |
817 | |
818 | pub fn replace_glyphs(&mut self, num_in: usize, num_out: usize, glyph_data: &[u32]) { |
819 | if !self.make_room_for(num_in, num_out) { |
820 | return; |
821 | } |
822 | |
823 | assert!(self.idx + num_in <= self.len); |
824 | |
825 | self.merge_clusters(self.idx, self.idx + num_in); |
826 | |
827 | let orig_info = self.info[self.idx]; |
828 | for i in 0..num_out { |
829 | let ii = self.out_len + i; |
830 | self.set_out_info(ii, orig_info); |
831 | self.out_info_mut()[ii].glyph_id = glyph_data[i]; |
832 | } |
833 | |
834 | self.idx += num_in; |
835 | self.out_len += num_out; |
836 | } |
837 | |
838 | pub fn replace_glyph(&mut self, glyph_index: u32) { |
839 | if self.have_separate_output || self.out_len != self.idx { |
840 | if !self.make_room_for(1, 1) { |
841 | return; |
842 | } |
843 | |
844 | self.set_out_info(self.out_len, self.info[self.idx]); |
845 | } |
846 | |
847 | let out_len = self.out_len; |
848 | self.out_info_mut()[out_len].glyph_id = glyph_index; |
849 | |
850 | self.idx += 1; |
851 | self.out_len += 1; |
852 | } |
853 | |
854 | pub fn output_glyph(&mut self, glyph_index: u32) { |
855 | if !self.make_room_for(0, 1) { |
856 | return; |
857 | } |
858 | |
859 | if self.idx == self.len && self.out_len == 0 { |
860 | return; |
861 | } |
862 | |
863 | let out_len = self.out_len; |
864 | if self.idx < self.len { |
865 | self.set_out_info(out_len, self.info[self.idx]); |
866 | } else { |
867 | let info = self.out_info()[out_len - 1]; |
868 | self.set_out_info(out_len, info); |
869 | } |
870 | |
871 | self.out_info_mut()[out_len].glyph_id = glyph_index; |
872 | |
873 | self.out_len += 1; |
874 | } |
875 | |
876 | pub fn output_info(&mut self, glyph_info: GlyphInfo) { |
877 | if !self.make_room_for(0, 1) { |
878 | return; |
879 | } |
880 | |
881 | self.set_out_info(self.out_len, glyph_info); |
882 | self.out_len += 1; |
883 | } |
884 | |
885 | pub fn output_char(&mut self, unichar: u32, glyph: u32) { |
886 | self.cur_mut(0).set_glyph_index(glyph); |
887 | // This is very confusing indeed. |
888 | self.output_glyph(unichar); |
889 | let mut flags = self.scratch_flags; |
890 | self.prev_mut().init_unicode_props(&mut flags); |
891 | self.scratch_flags = flags; |
892 | } |
893 | |
894 | /// Copies glyph at idx to output but doesn't advance idx. |
895 | pub fn copy_glyph(&mut self) { |
896 | if !self.make_room_for(0, 1) { |
897 | return; |
898 | } |
899 | |
900 | self.set_out_info(self.out_len, self.info[self.idx]); |
901 | self.out_len += 1; |
902 | } |
903 | |
904 | /// Copies glyph at idx to output and advance idx. |
905 | /// |
906 | /// If there's no output, just advance idx. |
907 | pub fn next_glyph(&mut self) { |
908 | if self.have_output { |
909 | if self.have_separate_output || self.out_len != self.idx { |
910 | if !self.make_room_for(1, 1) { |
911 | return; |
912 | } |
913 | |
914 | self.set_out_info(self.out_len, self.info[self.idx]); |
915 | } |
916 | |
917 | self.out_len += 1; |
918 | } |
919 | |
920 | self.idx += 1; |
921 | } |
922 | |
923 | /// Copies n glyphs at idx to output and advance idx. |
924 | /// |
925 | /// If there's no output, just advance idx. |
926 | pub fn next_glyphs(&mut self, n: usize) { |
927 | if self.have_output { |
928 | if self.have_separate_output || self.out_len != self.idx { |
929 | if !self.make_room_for(n, n) { |
930 | return; |
931 | } |
932 | |
933 | for i in 0..n { |
934 | self.set_out_info(self.out_len + i, self.info[self.idx + i]); |
935 | } |
936 | } |
937 | |
938 | self.out_len += n; |
939 | } |
940 | |
941 | self.idx += n; |
942 | } |
943 | |
944 | pub fn next_char(&mut self, glyph: u32) { |
945 | self.cur_mut(0).set_glyph_index(glyph); |
946 | self.next_glyph(); |
947 | } |
948 | |
949 | /// Advance idx without copying to output. |
950 | pub fn skip_glyph(&mut self) { |
951 | self.idx += 1; |
952 | } |
953 | |
954 | pub fn reset_masks(&mut self, mask: Mask) { |
955 | for info in &mut self.info[..self.len] { |
956 | info.mask = mask; |
957 | } |
958 | } |
959 | |
960 | pub fn set_masks(&mut self, mut value: Mask, mask: Mask, cluster_start: u32, cluster_end: u32) { |
961 | let not_mask = !mask; |
962 | value &= mask; |
963 | |
964 | if mask == 0 { |
965 | return; |
966 | } |
967 | |
968 | if cluster_start == 0 && cluster_end == core::u32::MAX { |
969 | for info in &mut self.info[..self.len] { |
970 | info.mask = (info.mask & not_mask) | value; |
971 | } |
972 | |
973 | return; |
974 | } |
975 | |
976 | for info in &mut self.info[..self.len] { |
977 | if cluster_start <= info.cluster && info.cluster < cluster_end { |
978 | info.mask = (info.mask & not_mask) | value; |
979 | } |
980 | } |
981 | } |
982 | |
983 | pub fn merge_clusters(&mut self, start: usize, end: usize) { |
984 | if end - start < 2 { |
985 | return; |
986 | } |
987 | |
988 | self.merge_clusters_impl(start, end) |
989 | } |
990 | |
991 | fn merge_clusters_impl(&mut self, mut start: usize, mut end: usize) { |
992 | if self.cluster_level == BufferClusterLevel::Characters { |
993 | self.unsafe_to_break(start, end); |
994 | return; |
995 | } |
996 | |
997 | let mut cluster = self.info[start].cluster; |
998 | |
999 | for i in start + 1..end { |
1000 | cluster = core::cmp::min(cluster, self.info[i].cluster); |
1001 | } |
1002 | |
1003 | // Extend end |
1004 | while end < self.len && self.info[end - 1].cluster == self.info[end].cluster { |
1005 | end += 1; |
1006 | } |
1007 | |
1008 | // Extend start |
1009 | while end < start && self.info[start - 1].cluster == self.info[start].cluster { |
1010 | start -= 1; |
1011 | } |
1012 | |
1013 | // If we hit the start of buffer, continue in out-buffer. |
1014 | if self.idx == start { |
1015 | let mut i = self.out_len; |
1016 | while i != 0 && self.out_info()[i - 1].cluster == self.info[start].cluster { |
1017 | Self::set_cluster(&mut self.out_info_mut()[i - 1], cluster, 0); |
1018 | i -= 1; |
1019 | } |
1020 | } |
1021 | |
1022 | for i in start..end { |
1023 | Self::set_cluster(&mut self.info[i], cluster, 0); |
1024 | } |
1025 | } |
1026 | |
1027 | pub fn merge_out_clusters(&mut self, mut start: usize, mut end: usize) { |
1028 | if self.cluster_level == BufferClusterLevel::Characters { |
1029 | return; |
1030 | } |
1031 | |
1032 | if end - start < 2 { |
1033 | return; |
1034 | } |
1035 | |
1036 | let mut cluster = self.out_info()[start].cluster; |
1037 | |
1038 | for i in start + 1..end { |
1039 | cluster = core::cmp::min(cluster, self.out_info()[i].cluster); |
1040 | } |
1041 | |
1042 | // Extend start |
1043 | while start != 0 && self.out_info()[start - 1].cluster == self.out_info()[start].cluster { |
1044 | start -= 1; |
1045 | } |
1046 | |
1047 | // Extend end |
1048 | while end < self.out_len && self.out_info()[end - 1].cluster == self.out_info()[end].cluster |
1049 | { |
1050 | end += 1; |
1051 | } |
1052 | |
1053 | // If we hit the start of buffer, continue in out-buffer. |
1054 | if end == self.out_len { |
1055 | let mut i = self.idx; |
1056 | while i < self.len && self.info[i].cluster == self.out_info()[end - 1].cluster { |
1057 | Self::set_cluster(&mut self.info[i], cluster, 0); |
1058 | i += 1; |
1059 | } |
1060 | } |
1061 | |
1062 | for i in start..end { |
1063 | Self::set_cluster(&mut self.out_info_mut()[i], cluster, 0); |
1064 | } |
1065 | } |
1066 | |
1067 | /// Merge clusters for deleting current glyph, and skip it. |
1068 | pub fn delete_glyph(&mut self) { |
1069 | let cluster = self.info[self.idx].cluster; |
1070 | |
1071 | if self.idx + 1 < self.len && cluster == self.info[self.idx + 1].cluster { |
1072 | // Cluster survives; do nothing. |
1073 | self.skip_glyph(); |
1074 | return; |
1075 | } |
1076 | |
1077 | if self.out_len != 0 { |
1078 | // Merge cluster backward. |
1079 | if cluster < self.out_info()[self.out_len - 1].cluster { |
1080 | let mask = self.info[self.idx].mask; |
1081 | let old_cluster = self.out_info()[self.out_len - 1].cluster; |
1082 | |
1083 | let mut i = self.out_len; |
1084 | while i != 0 && self.out_info()[i - 1].cluster == old_cluster { |
1085 | Self::set_cluster(&mut self.out_info_mut()[i - 1], cluster, mask); |
1086 | i -= 1; |
1087 | } |
1088 | } |
1089 | |
1090 | self.skip_glyph(); |
1091 | return; |
1092 | } |
1093 | |
1094 | if self.idx + 1 < self.len { |
1095 | // Merge cluster forward. |
1096 | self.merge_clusters(self.idx, self.idx + 2); |
1097 | } |
1098 | |
1099 | self.skip_glyph(); |
1100 | } |
1101 | |
1102 | pub fn delete_glyphs_inplace(&mut self, filter: impl Fn(&GlyphInfo) -> bool) { |
1103 | // Merge clusters and delete filtered glyphs. |
1104 | // NOTE! We can't use out-buffer as we have positioning data. |
1105 | let mut j = 0; |
1106 | |
1107 | for i in 0..self.len { |
1108 | if filter(&self.info[i]) { |
1109 | // Merge clusters. |
1110 | // Same logic as delete_glyph(), but for in-place removal |
1111 | |
1112 | let cluster = self.info[i].cluster; |
1113 | if i + 1 < self.len && cluster == self.info[i + 1].cluster { |
1114 | // Cluster survives; do nothing. |
1115 | continue; |
1116 | } |
1117 | |
1118 | if j != 0 { |
1119 | // Merge cluster backward. |
1120 | if cluster < self.info[j - 1].cluster { |
1121 | let mask = self.info[i].mask; |
1122 | let old_cluster = self.info[j - 1].cluster; |
1123 | |
1124 | let mut k = j; |
1125 | while k > 0 && self.info[k - 1].cluster == old_cluster { |
1126 | Self::set_cluster(&mut self.info[k - 1], cluster, mask); |
1127 | k -= 1; |
1128 | } |
1129 | } |
1130 | continue; |
1131 | } |
1132 | |
1133 | if i + 1 < self.len { |
1134 | // Merge cluster forward. |
1135 | self.merge_clusters(i, i + 2); |
1136 | } |
1137 | |
1138 | continue; |
1139 | } |
1140 | |
1141 | if j != i { |
1142 | self.info[j] = self.info[i]; |
1143 | self.pos[j] = self.pos[i]; |
1144 | } |
1145 | |
1146 | j += 1; |
1147 | } |
1148 | |
1149 | self.len = j; |
1150 | } |
1151 | |
1152 | pub fn unsafe_to_break(&mut self, start: usize, end: usize) { |
1153 | if end - start < 2 { |
1154 | return; |
1155 | } |
1156 | |
1157 | self.unsafe_to_break_impl(start, end); |
1158 | } |
1159 | |
1160 | fn unsafe_to_break_impl(&mut self, start: usize, end: usize) { |
1161 | let mut cluster = core::u32::MAX; |
1162 | cluster = Self::_unsafe_to_break_find_min_cluster(&self.info, start, end, cluster); |
1163 | let unsafe_to_break = Self::_unsafe_to_break_set_mask(&mut self.info, start, end, cluster); |
1164 | if unsafe_to_break { |
1165 | self.scratch_flags |= BufferScratchFlags::HAS_UNSAFE_TO_BREAK; |
1166 | } |
1167 | } |
1168 | |
1169 | pub fn unsafe_to_break_from_outbuffer(&mut self, start: usize, end: usize) { |
1170 | if !self.have_output { |
1171 | self.unsafe_to_break_impl(start, end); |
1172 | return; |
1173 | } |
1174 | |
1175 | assert!(start <= self.out_len); |
1176 | assert!(self.idx <= end); |
1177 | |
1178 | let mut cluster = core::u32::MAX; |
1179 | cluster = |
1180 | Self::_unsafe_to_break_find_min_cluster(self.out_info(), start, self.out_len, cluster); |
1181 | cluster = Self::_unsafe_to_break_find_min_cluster(&self.info, self.idx, end, cluster); |
1182 | let idx = self.idx; |
1183 | let out_len = self.out_len; |
1184 | let unsafe_to_break1 = |
1185 | Self::_unsafe_to_break_set_mask(self.out_info_mut(), start, out_len, cluster); |
1186 | let unsafe_to_break2 = Self::_unsafe_to_break_set_mask(&mut self.info, idx, end, cluster); |
1187 | |
1188 | if unsafe_to_break1 || unsafe_to_break2 { |
1189 | self.scratch_flags |= BufferScratchFlags::HAS_UNSAFE_TO_BREAK; |
1190 | } |
1191 | } |
1192 | |
1193 | pub fn move_to(&mut self, i: usize) -> bool { |
1194 | if !self.have_output { |
1195 | assert!(i <= self.len); |
1196 | self.idx = i; |
1197 | return true; |
1198 | } |
1199 | |
1200 | if !self.successful { |
1201 | return false; |
1202 | } |
1203 | |
1204 | assert!(i <= self.out_len + (self.len - self.idx)); |
1205 | |
1206 | if self.out_len < i { |
1207 | let count = i - self.out_len; |
1208 | if !self.make_room_for(count, count) { |
1209 | return false; |
1210 | } |
1211 | |
1212 | for j in 0..count { |
1213 | self.set_out_info(self.out_len + j, self.info[self.idx + j]); |
1214 | } |
1215 | |
1216 | self.idx += count; |
1217 | self.out_len += count; |
1218 | } else if self.out_len > i { |
1219 | // Tricky part: rewinding... |
1220 | let count = self.out_len - i; |
1221 | |
1222 | // This will blow in our face if memory allocation fails later |
1223 | // in this same lookup... |
1224 | // |
1225 | // We used to shift with extra 32 items. |
1226 | // But that would leave empty slots in the buffer in case of allocation |
1227 | // failures. See comments in shift_forward(). This can cause O(N^2) |
1228 | // behavior more severely than adding 32 empty slots can... |
1229 | if self.idx < count { |
1230 | self.shift_forward(count - self.idx); |
1231 | } |
1232 | |
1233 | assert!(self.idx >= count); |
1234 | |
1235 | self.idx -= count; |
1236 | self.out_len -= count; |
1237 | |
1238 | for j in 0..count { |
1239 | self.info[self.idx + j] = self.out_info()[self.out_len + j]; |
1240 | } |
1241 | } |
1242 | |
1243 | true |
1244 | } |
1245 | |
1246 | pub fn ensure(&mut self, size: usize) -> bool { |
1247 | if size < self.len { |
1248 | return true; |
1249 | } |
1250 | |
1251 | if size > self.max_len { |
1252 | self.successful = false; |
1253 | return false; |
1254 | } |
1255 | |
1256 | self.info.resize(size, GlyphInfo::default()); |
1257 | self.pos.resize(size, GlyphPosition::default()); |
1258 | true |
1259 | } |
1260 | |
1261 | pub fn set_len(&mut self, len: usize) { |
1262 | self.ensure(len); |
1263 | self.len = len; |
1264 | } |
1265 | |
1266 | fn make_room_for(&mut self, num_in: usize, num_out: usize) -> bool { |
1267 | if !self.ensure(self.out_len + num_out) { |
1268 | return false; |
1269 | } |
1270 | |
1271 | if !self.have_separate_output && self.out_len + num_out > self.idx + num_in { |
1272 | assert!(self.have_output); |
1273 | |
1274 | self.have_separate_output = true; |
1275 | for i in 0..self.out_len { |
1276 | self.set_out_info(i, self.info[i]); |
1277 | } |
1278 | } |
1279 | |
1280 | true |
1281 | } |
1282 | |
1283 | fn shift_forward(&mut self, count: usize) { |
1284 | assert!(self.have_output); |
1285 | self.ensure(self.len + count); |
1286 | |
1287 | for i in (0..(self.len - self.idx)).rev() { |
1288 | self.info[self.idx + count + i] = self.info[self.idx + i]; |
1289 | } |
1290 | |
1291 | if self.idx + count > self.len { |
1292 | for info in &mut self.info[self.len..self.idx + count] { |
1293 | *info = GlyphInfo::default(); |
1294 | } |
1295 | } |
1296 | |
1297 | self.len += count; |
1298 | self.idx += count; |
1299 | } |
1300 | |
1301 | fn clear_context(&mut self, side: usize) { |
1302 | self.context_len[side] = 0; |
1303 | } |
1304 | |
1305 | pub fn sort(&mut self, start: usize, end: usize, cmp: impl Fn(&GlyphInfo, &GlyphInfo) -> bool) { |
1306 | assert!(!self.have_positions); |
1307 | |
1308 | for i in start + 1..end { |
1309 | let mut j = i; |
1310 | while j > start && cmp(&self.info[j - 1], &self.info[i]) { |
1311 | j -= 1; |
1312 | } |
1313 | |
1314 | if i == j { |
1315 | continue; |
1316 | } |
1317 | |
1318 | // Move item i to occupy place for item j, shift what's in between. |
1319 | self.merge_clusters(j, i + 1); |
1320 | |
1321 | { |
1322 | let t = self.info[i]; |
1323 | for idx in (0..i - j).rev() { |
1324 | self.info[idx + j + 1] = self.info[idx + j]; |
1325 | } |
1326 | |
1327 | self.info[j] = t; |
1328 | } |
1329 | } |
1330 | } |
1331 | |
1332 | pub fn set_cluster(info: &mut GlyphInfo, cluster: u32, mask: Mask) { |
1333 | if info.cluster != cluster { |
1334 | if mask & glyph_flag::UNSAFE_TO_BREAK != 0 { |
1335 | info.mask |= glyph_flag::UNSAFE_TO_BREAK; |
1336 | } else { |
1337 | info.mask &= !glyph_flag::UNSAFE_TO_BREAK; |
1338 | } |
1339 | } |
1340 | |
1341 | info.cluster = cluster; |
1342 | } |
1343 | |
1344 | fn _unsafe_to_break_find_min_cluster( |
1345 | info: &[GlyphInfo], |
1346 | start: usize, |
1347 | end: usize, |
1348 | mut cluster: u32, |
1349 | ) -> u32 { |
1350 | for glyph_info in &info[start..end] { |
1351 | cluster = core::cmp::min(cluster, glyph_info.cluster); |
1352 | } |
1353 | |
1354 | cluster |
1355 | } |
1356 | |
1357 | fn _unsafe_to_break_set_mask( |
1358 | info: &mut [GlyphInfo], |
1359 | start: usize, |
1360 | end: usize, |
1361 | cluster: u32, |
1362 | ) -> bool { |
1363 | let mut unsafe_to_break = false; |
1364 | for glyph_info in &mut info[start..end] { |
1365 | if glyph_info.cluster != cluster { |
1366 | unsafe_to_break = true; |
1367 | glyph_info.mask |= glyph_flag::UNSAFE_TO_BREAK; |
1368 | } |
1369 | } |
1370 | |
1371 | unsafe_to_break |
1372 | } |
1373 | |
1374 | /// Checks that buffer contains no elements. |
1375 | pub fn is_empty(&self) -> bool { |
1376 | self.len == 0 |
1377 | } |
1378 | |
1379 | fn push_str(&mut self, text: &str) { |
1380 | self.ensure(self.len + text.chars().count()); |
1381 | |
1382 | for (i, c) in text.char_indices() { |
1383 | self.add(c as u32, i as u32); |
1384 | } |
1385 | } |
1386 | |
1387 | fn set_pre_context(&mut self, text: &str) { |
1388 | self.clear_context(0); |
1389 | for (i, c) in text.chars().rev().enumerate().take(CONTEXT_LENGTH) { |
1390 | self.context[0][i] = c; |
1391 | self.context_len[0] += 1; |
1392 | } |
1393 | } |
1394 | |
1395 | fn set_post_context(&mut self, text: &str) { |
1396 | self.clear_context(1); |
1397 | for (i, c) in text.chars().enumerate().take(CONTEXT_LENGTH) { |
1398 | self.context[1][i] = c; |
1399 | self.context_len[1] += 1; |
1400 | } |
1401 | } |
1402 | |
1403 | pub fn next_cluster(&self, mut start: usize) -> usize { |
1404 | if start >= self.len { |
1405 | return start; |
1406 | } |
1407 | |
1408 | let cluster = self.info[start].cluster; |
1409 | start += 1; |
1410 | while start < self.len && cluster == self.info[start].cluster { |
1411 | start += 1; |
1412 | } |
1413 | |
1414 | start |
1415 | } |
1416 | |
1417 | pub fn next_syllable(&self, mut start: usize) -> usize { |
1418 | if start >= self.len { |
1419 | return start; |
1420 | } |
1421 | |
1422 | let syllable = self.info[start].syllable(); |
1423 | start += 1; |
1424 | while start < self.len && syllable == self.info[start].syllable() { |
1425 | start += 1; |
1426 | } |
1427 | |
1428 | start |
1429 | } |
1430 | |
1431 | pub fn next_grapheme(&self, mut start: usize) -> usize { |
1432 | if start >= self.len { |
1433 | return start; |
1434 | } |
1435 | |
1436 | start += 1; |
1437 | while start < self.len && self.info[start].is_continuation() { |
1438 | start += 1; |
1439 | } |
1440 | |
1441 | start |
1442 | } |
1443 | |
1444 | #[inline ] |
1445 | pub fn allocate_lig_id(&mut self) -> u8 { |
1446 | let mut lig_id = self.next_serial() & 0x07; |
1447 | if lig_id == 0 { |
1448 | // In case of overflow. |
1449 | lig_id = self.next_serial() & 0x07; |
1450 | } |
1451 | lig_id as u8 |
1452 | } |
1453 | } |
1454 | |
1455 | // TODO: to iter if possible |
1456 | |
1457 | macro_rules! foreach_cluster { |
1458 | ($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {{ |
1459 | let mut $start = 0; |
1460 | let mut $end = $buffer.next_cluster(0); |
1461 | while $start < $buffer.len { |
1462 | $($body)*; |
1463 | $start = $end; |
1464 | $end = $buffer.next_cluster($start); |
1465 | } |
1466 | }}; |
1467 | } |
1468 | |
1469 | macro_rules! foreach_syllable { |
1470 | ($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {{ |
1471 | let mut $start = 0; |
1472 | let mut $end = $buffer.next_syllable(0); |
1473 | while $start < $buffer.len { |
1474 | $($body)*; |
1475 | $start = $end; |
1476 | $end = $buffer.next_syllable($start); |
1477 | } |
1478 | }}; |
1479 | } |
1480 | |
1481 | macro_rules! foreach_grapheme { |
1482 | ($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {{ |
1483 | let mut $start = 0; |
1484 | let mut $end = $buffer.next_grapheme(0); |
1485 | while $start < $buffer.len { |
1486 | $($body)*; |
1487 | $start = $end; |
1488 | $end = $buffer.next_grapheme($start); |
1489 | } |
1490 | }}; |
1491 | } |
1492 | |
1493 | bitflags::bitflags! { |
1494 | #[derive (Default, Debug, Clone, Copy)] |
1495 | pub struct UnicodeProps: u16 { |
1496 | const GENERAL_CATEGORY = 0x001F; |
1497 | const IGNORABLE = 0x0020; |
1498 | // MONGOLIAN FREE VARIATION SELECTOR 1..3, or TAG characters |
1499 | const HIDDEN = 0x0040; |
1500 | const CONTINUATION = 0x0080; |
1501 | |
1502 | // If GEN_CAT=FORMAT, top byte masks: |
1503 | const CF_ZWJ = 0x0100; |
1504 | const CF_ZWNJ = 0x0200; |
1505 | } |
1506 | } |
1507 | |
1508 | bitflags::bitflags! { |
1509 | #[derive (Default, Debug, Clone, Copy)] |
1510 | pub struct GlyphPropsFlags: u16 { |
1511 | // The following three match LookupFlags::Ignore* numbers. |
1512 | const BASE_GLYPH = 0x02; |
1513 | const LIGATURE = 0x04; |
1514 | const MARK = 0x08; |
1515 | const CLASS_MASK = Self::BASE_GLYPH.bits() | Self::LIGATURE.bits() | Self::MARK.bits(); |
1516 | |
1517 | // The following are used internally; not derived from GDEF. |
1518 | const SUBSTITUTED = 0x10; |
1519 | const LIGATED = 0x20; |
1520 | const MULTIPLIED = 0x40; |
1521 | |
1522 | const PRESERVE = Self::SUBSTITUTED.bits() | Self::LIGATED.bits() | Self::MULTIPLIED.bits(); |
1523 | } |
1524 | } |
1525 | |
1526 | bitflags::bitflags! { |
1527 | /// Flags for buffers. |
1528 | #[derive (Default, Debug, Clone, Copy)] |
1529 | pub struct BufferFlags: u32 { |
1530 | /// Indicates that special handling of the beginning of text paragraph can be applied to this buffer. Should usually be set, unless you are passing to the buffer only part of the text without the full context. |
1531 | const BEGINNING_OF_TEXT = 1 << 1; |
1532 | /// Indicates that special handling of the end of text paragraph can be applied to this buffer, similar to [`BufferFlags::BEGINNING_OF_TEXT`]. |
1533 | const END_OF_TEXT = 1 << 2; |
1534 | /// Indicates that characters with `Default_Ignorable` Unicode property should use the corresponding glyph from the font, instead of hiding them (done by replacing them with the space glyph and zeroing the advance width.) This flag takes precedence over [`BufferFlags::REMOVE_DEFAULT_IGNORABLES`]. |
1535 | const PRESERVE_DEFAULT_IGNORABLES = 1 << 3; |
1536 | /// Indicates that characters with `Default_Ignorable` Unicode property should be removed from glyph string instead of hiding them (done by replacing them with the space glyph and zeroing the advance width.) [`BufferFlags::PRESERVE_DEFAULT_IGNORABLES`] takes precedence over this flag. |
1537 | const REMOVE_DEFAULT_IGNORABLES = 1 << 4; |
1538 | /// Indicates that a dotted circle should not be inserted in the rendering of incorrect character sequences (such as `<0905 093E>`). |
1539 | const DO_NOT_INSERT_DOTTED_CIRCLE = 1 << 5; |
1540 | } |
1541 | } |
1542 | |
1543 | bitflags::bitflags! { |
1544 | #[derive (Default, Debug, Clone, Copy)] |
1545 | pub struct BufferScratchFlags: u32 { |
1546 | const HAS_NON_ASCII = 0x00000001; |
1547 | const HAS_DEFAULT_IGNORABLES = 0x00000002; |
1548 | const HAS_SPACE_FALLBACK = 0x00000004; |
1549 | const HAS_GPOS_ATTACHMENT = 0x00000008; |
1550 | const HAS_UNSAFE_TO_BREAK = 0x00000010; |
1551 | const HAS_CGJ = 0x00000020; |
1552 | |
1553 | // Reserved for complex shapers' internal use. |
1554 | const COMPLEX0 = 0x01000000; |
1555 | const COMPLEX1 = 0x02000000; |
1556 | const COMPLEX2 = 0x04000000; |
1557 | const COMPLEX3 = 0x08000000; |
1558 | } |
1559 | } |
1560 | |
1561 | bitflags::bitflags! { |
1562 | /// Flags used for serialization with a `BufferSerializer`. |
1563 | #[derive (Default)] |
1564 | pub struct SerializeFlags: u8 { |
1565 | /// Do not serialize glyph cluster. |
1566 | const NO_CLUSTERS = 0b00000001; |
1567 | /// Do not serialize glyph position information. |
1568 | const NO_POSITIONS = 0b00000010; |
1569 | /// Do no serialize glyph name. |
1570 | const NO_GLYPH_NAMES = 0b00000100; |
1571 | /// Serialize glyph extents. |
1572 | const GLYPH_EXTENTS = 0b00001000; |
1573 | /// Serialize glyph flags. |
1574 | const GLYPH_FLAGS = 0b00010000; |
1575 | /// Do not serialize glyph advances, glyph offsets will reflect absolute |
1576 | /// glyph positions. |
1577 | const NO_ADVANCES = 0b00100000; |
1578 | } |
1579 | } |
1580 | |
1581 | /// A buffer that contains an input string ready for shaping. |
1582 | pub struct UnicodeBuffer(pub(crate) Buffer); |
1583 | |
1584 | impl UnicodeBuffer { |
1585 | /// Create a new `UnicodeBuffer`. |
1586 | #[inline ] |
1587 | pub fn new() -> UnicodeBuffer { |
1588 | UnicodeBuffer(Buffer::new()) |
1589 | } |
1590 | |
1591 | /// Returns the length of the data of the buffer. |
1592 | /// |
1593 | /// This corresponds to the number of unicode codepoints contained in the |
1594 | /// buffer. |
1595 | #[inline ] |
1596 | pub fn len(&self) -> usize { |
1597 | self.0.len |
1598 | } |
1599 | |
1600 | /// Returns `true` if the buffer contains no elements. |
1601 | #[inline ] |
1602 | pub fn is_empty(&self) -> bool { |
1603 | self.0.is_empty() |
1604 | } |
1605 | |
1606 | /// Pushes a string to a buffer. |
1607 | #[inline ] |
1608 | pub fn push_str(&mut self, str: &str) { |
1609 | self.0.push_str(str); |
1610 | } |
1611 | |
1612 | /// Sets the pre-context for this buffer. |
1613 | #[inline ] |
1614 | pub fn set_pre_context(&mut self, str: &str) { |
1615 | self.0.set_pre_context(str) |
1616 | } |
1617 | |
1618 | /// Sets the post-context for this buffer. |
1619 | #[inline ] |
1620 | pub fn set_post_context(&mut self, str: &str) { |
1621 | self.0.set_post_context(str) |
1622 | } |
1623 | |
1624 | /// Appends a character to a buffer with the given cluster value. |
1625 | #[inline ] |
1626 | pub fn add(&mut self, codepoint: char, cluster: u32) { |
1627 | self.0.add(codepoint as u32, cluster); |
1628 | self.0.context_len[1] = 0; |
1629 | } |
1630 | |
1631 | /// Set the text direction of the `Buffer`'s contents. |
1632 | #[inline ] |
1633 | pub fn set_direction(&mut self, direction: Direction) { |
1634 | self.0.direction = direction; |
1635 | } |
1636 | |
1637 | /// Returns the `Buffer`'s text direction. |
1638 | #[inline ] |
1639 | pub fn direction(&self) -> Direction { |
1640 | self.0.direction |
1641 | } |
1642 | |
1643 | /// Set the script from an ISO15924 tag. |
1644 | #[inline ] |
1645 | pub fn set_script(&mut self, script: Script) { |
1646 | self.0.script = Some(script); |
1647 | } |
1648 | |
1649 | /// Get the ISO15924 script tag. |
1650 | pub fn script(&self) -> Script { |
1651 | self.0.script.unwrap_or(script::UNKNOWN) |
1652 | } |
1653 | |
1654 | /// Set the buffer language. |
1655 | #[inline ] |
1656 | pub fn set_language(&mut self, lang: Language) { |
1657 | self.0.language = Some(lang); |
1658 | } |
1659 | |
1660 | /// Get the buffer language. |
1661 | #[inline ] |
1662 | pub fn language(&self) -> Option<Language> { |
1663 | self.0.language.clone() |
1664 | } |
1665 | |
1666 | /// Guess the segment properties (direction, language, script) for the |
1667 | /// current buffer. |
1668 | #[inline ] |
1669 | pub fn guess_segment_properties(&mut self) { |
1670 | self.0.guess_segment_properties() |
1671 | } |
1672 | |
1673 | /// Set the flags for this buffer. |
1674 | #[inline ] |
1675 | pub fn set_flags(&mut self, flags: BufferFlags) { |
1676 | self.0.flags = flags; |
1677 | } |
1678 | |
1679 | /// Get the flags for this buffer. |
1680 | #[inline ] |
1681 | pub fn flags(&self) -> BufferFlags { |
1682 | self.0.flags |
1683 | } |
1684 | |
1685 | /// Set the cluster level of the buffer. |
1686 | #[inline ] |
1687 | pub fn set_cluster_level(&mut self, cluster_level: BufferClusterLevel) { |
1688 | self.0.cluster_level = cluster_level |
1689 | } |
1690 | |
1691 | /// Retrieve the cluster level of the buffer. |
1692 | #[inline ] |
1693 | pub fn cluster_level(&self) -> BufferClusterLevel { |
1694 | self.0.cluster_level |
1695 | } |
1696 | |
1697 | /// Resets clusters. |
1698 | #[inline ] |
1699 | pub fn reset_clusters(&mut self) { |
1700 | self.0.reset_clusters(); |
1701 | } |
1702 | |
1703 | /// Clear the contents of the buffer. |
1704 | #[inline ] |
1705 | pub fn clear(&mut self) { |
1706 | self.0.clear() |
1707 | } |
1708 | } |
1709 | |
1710 | impl core::fmt::Debug for UnicodeBuffer { |
1711 | fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
1712 | fmt&mut DebugStruct<'_, '_>.debug_struct("UnicodeBuffer" ) |
1713 | .field("direction" , &self.direction()) |
1714 | .field("language" , &self.language()) |
1715 | .field("script" , &self.script()) |
1716 | .field(name:"cluster_level" , &self.cluster_level()) |
1717 | .finish() |
1718 | } |
1719 | } |
1720 | |
1721 | impl Default for UnicodeBuffer { |
1722 | fn default() -> UnicodeBuffer { |
1723 | UnicodeBuffer::new() |
1724 | } |
1725 | } |
1726 | |
1727 | /// A buffer that contains the results of the shaping process. |
1728 | pub struct GlyphBuffer(pub(crate) Buffer); |
1729 | |
1730 | impl GlyphBuffer { |
1731 | /// Returns the length of the data of the buffer. |
1732 | /// |
1733 | /// When called before shaping this is the number of unicode codepoints |
1734 | /// contained in the buffer. When called after shaping it returns the number |
1735 | /// of glyphs stored. |
1736 | #[inline ] |
1737 | pub fn len(&self) -> usize { |
1738 | self.0.len |
1739 | } |
1740 | |
1741 | /// Returns `true` if the buffer contains no elements. |
1742 | #[inline ] |
1743 | pub fn is_empty(&self) -> bool { |
1744 | self.0.is_empty() |
1745 | } |
1746 | |
1747 | /// Get the glyph infos. |
1748 | #[inline ] |
1749 | pub fn glyph_infos(&self) -> &[GlyphInfo] { |
1750 | &self.0.info[0..self.0.len] |
1751 | } |
1752 | |
1753 | /// Get the glyph positions. |
1754 | #[inline ] |
1755 | pub fn glyph_positions(&self) -> &[GlyphPosition] { |
1756 | &self.0.pos[0..self.0.len] |
1757 | } |
1758 | |
1759 | /// Clears the content of the glyph buffer and returns an empty |
1760 | /// `UnicodeBuffer` reusing the existing allocation. |
1761 | #[inline ] |
1762 | pub fn clear(mut self) -> UnicodeBuffer { |
1763 | self.0.clear(); |
1764 | UnicodeBuffer(self.0) |
1765 | } |
1766 | |
1767 | /// Converts the glyph buffer content into a string. |
1768 | pub fn serialize(&self, face: &Face, flags: SerializeFlags) -> String { |
1769 | self.serialize_impl(face, flags).unwrap_or_default() |
1770 | } |
1771 | |
1772 | fn serialize_impl( |
1773 | &self, |
1774 | face: &Face, |
1775 | flags: SerializeFlags, |
1776 | ) -> Result<String, core::fmt::Error> { |
1777 | use core::fmt::Write; |
1778 | |
1779 | let mut s = String::with_capacity(64); |
1780 | |
1781 | let info = self.glyph_infos(); |
1782 | let pos = self.glyph_positions(); |
1783 | let mut x = 0; |
1784 | let mut y = 0; |
1785 | for (info, pos) in info.iter().zip(pos) { |
1786 | if !flags.contains(SerializeFlags::NO_GLYPH_NAMES) { |
1787 | match face.glyph_name(info.as_glyph()) { |
1788 | Some(name) => s.push_str(name), |
1789 | None => write!(&mut s, "gid {}" , info.glyph_id)?, |
1790 | } |
1791 | } else { |
1792 | write!(&mut s, " {}" , info.glyph_id)?; |
1793 | } |
1794 | |
1795 | if !flags.contains(SerializeFlags::NO_CLUSTERS) { |
1796 | write!(&mut s, "= {}" , info.cluster)?; |
1797 | } |
1798 | |
1799 | if !flags.contains(SerializeFlags::NO_POSITIONS) { |
1800 | if x + pos.x_offset != 0 || y + pos.y_offset != 0 { |
1801 | write!(&mut s, "@ {}, {}" , x + pos.x_offset, y + pos.y_offset)?; |
1802 | } |
1803 | |
1804 | if !flags.contains(SerializeFlags::NO_ADVANCES) { |
1805 | write!(&mut s, "+ {}" , pos.x_advance)?; |
1806 | if pos.y_advance != 0 { |
1807 | write!(&mut s, ", {}" , pos.y_advance)?; |
1808 | } |
1809 | } |
1810 | } |
1811 | |
1812 | if flags.contains(SerializeFlags::GLYPH_FLAGS) { |
1813 | if info.mask & glyph_flag::DEFINED != 0 { |
1814 | write!(&mut s, "# {:X}" , info.mask & glyph_flag::DEFINED)?; |
1815 | } |
1816 | } |
1817 | |
1818 | if flags.contains(SerializeFlags::GLYPH_EXTENTS) { |
1819 | let extents = face.glyph_extents(info.as_glyph()).unwrap_or_default(); |
1820 | write!( |
1821 | &mut s, |
1822 | "< {}, {}, {}, {}>" , |
1823 | extents.x_bearing, extents.y_bearing, extents.width, extents.height |
1824 | )?; |
1825 | } |
1826 | |
1827 | if flags.contains(SerializeFlags::NO_ADVANCES) { |
1828 | x += pos.x_advance; |
1829 | y += pos.y_advance; |
1830 | } |
1831 | |
1832 | s.push('|' ); |
1833 | } |
1834 | |
1835 | // Remove last `|`. |
1836 | if !s.is_empty() { |
1837 | s.pop(); |
1838 | } |
1839 | |
1840 | Ok(s) |
1841 | } |
1842 | } |
1843 | |
1844 | impl core::fmt::Debug for GlyphBuffer { |
1845 | fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
1846 | fmt&mut DebugStruct<'_, '_>.debug_struct("GlyphBuffer" ) |
1847 | .field("glyph_positions" , &self.glyph_positions()) |
1848 | .field(name:"glyph_infos" , &self.glyph_infos()) |
1849 | .finish() |
1850 | } |
1851 | } |
1852 | |