1use alloc::{string::String, vec::Vec};
2use core::convert::TryFrom;
3use std::cmp::min;
4
5use ttf_parser::GlyphId;
6
7use crate::buffer::glyph_flag::{UNSAFE_TO_BREAK, UNSAFE_TO_CONCAT};
8use crate::face::GlyphExtents;
9use crate::unicode::{CharExt, GeneralCategory, GeneralCategoryExt, Space};
10use crate::{script, Direction, Face, Language, Mask, Script};
11
12const CONTEXT_LENGTH: usize = 5;
13
14pub mod glyph_flag {
15 /// Indicates that if input text is broken at the
16 /// beginning of the cluster this glyph is part of,
17 /// then both sides need to be re-shaped, as the
18 /// result might be different.
19 ///
20 /// On the flip side, it means that when
21 /// this flag is not present, then it is safe
22 /// to break the glyph-run at the beginning of
23 /// this cluster, and the two sides will represent
24 /// the exact same result one would get if breaking
25 /// input text at the beginning of this cluster and
26 /// shaping the two sides separately.
27 ///
28 /// This can be used to optimize paragraph layout,
29 /// by avoiding re-shaping of each line after line-breaking.
30 pub const UNSAFE_TO_BREAK: u32 = 0x00000001;
31 /// Indicates that if input text is changed on one side
32 /// of the beginning of the cluster this glyph is part
33 /// of, then the shaping results for the other side
34 /// might change.
35 ///
36 /// Note that the absence of this flag will NOT by
37 /// itself mean that it IS safe to concat text. Only
38 /// two pieces of text both of which clear of this
39 /// flag can be concatenated safely.
40 ///
41 /// This can be used to optimize paragraph layout,
42 /// by avoiding re-shaping of each line after
43 /// line-breaking, by limiting the reshaping to a
44 /// small piece around the breaking position only,
45 /// even if the breaking position carries the
46 /// UNSAFE_TO_BREAK or when hyphenation or
47 /// other text transformation happens at
48 /// line-break position, in the following way:
49 ///
50 /// 1. Iterate back from the line-break
51 /// position until the first cluster
52 /// start position that is NOT unsafe-to-concat,
53 /// 2. shape the segment from there till the
54 /// end of line, 3. check whether the resulting
55 /// glyph-run also is clear of the unsafe-to-concat
56 /// at its start-of-text position; if it is, just
57 /// splice it into place and the line is shaped;
58 /// If not, move on to a position further back that
59 /// is clear of unsafe-to-concat and retry from
60 /// there, and repeat.
61 ///
62 /// At the start of next line a similar
63 /// algorithm can be implemented.
64 /// That is: 1. Iterate forward from
65 /// the line-break position until the first cluster
66 /// start position that is NOT unsafe-to-concat, 2.
67 /// shape the segment from beginning of the line to
68 /// that position, 3. check whether the resulting
69 /// glyph-run also is clear of the unsafe-to-concat
70 /// at its end-of-text position; if it is, just splice
71 /// it into place and the beginning is shaped; If not,
72 /// move on to a position further forward that is clear
73 /// of unsafe-to-concat and retry up to there, and repeat.
74 ///
75 /// A slight complication will arise in the
76 /// implementation of the algorithm above,
77 /// because while
78 /// our buffer API has a way to return flags
79 /// for position corresponding to
80 /// start-of-text, there is currently no
81 /// position corresponding to end-of-text.
82 /// This limitation can be alleviated by
83 /// shaping more text than needed and
84 /// looking for unsafe-to-concat flag
85 /// within text clusters.
86 ///
87 /// The UNSAFE_TO_BREAK flag will always imply this flag.
88 /// To use this flag, you must enable the buffer flag
89 /// PRODUCE_UNSAFE_TO_CONCAT during shaping, otherwise
90 /// the buffer flag will not be reliably produced.
91 pub const UNSAFE_TO_CONCAT: u32 = 0x00000002;
92
93 /// All the currently defined flags.
94 pub const DEFINED: u32 = 0x00000003; // OR of all defined flags
95}
96
97/// Holds the positions of the glyph in both horizontal and vertical directions.
98///
99/// All positions are relative to the current point.
100#[repr(C)]
101#[derive(Clone, Copy, Default, Debug)]
102pub struct GlyphPosition {
103 /// How much the line advances after drawing this glyph when setting text in
104 /// horizontal direction.
105 pub x_advance: i32,
106 /// How much the line advances after drawing this glyph when setting text in
107 /// vertical direction.
108 pub y_advance: i32,
109 /// How much the glyph moves on the X-axis before drawing it, this should
110 /// not affect how much the line advances.
111 pub x_offset: i32,
112 /// How much the glyph moves on the Y-axis before drawing it, this should
113 /// not affect how much the line advances.
114 pub y_offset: i32,
115 var: u32,
116}
117
118unsafe impl bytemuck::Zeroable for GlyphPosition {}
119unsafe impl bytemuck::Pod for GlyphPosition {}
120
121impl GlyphPosition {
122 #[inline]
123 pub(crate) fn attach_chain(&self) -> i16 {
124 // glyph to which this attaches to, relative to current glyphs;
125 // negative for going back, positive for forward.
126 let v: &[i16; 2] = bytemuck::cast_ref(&self.var);
127 v[0]
128 }
129
130 #[inline]
131 pub(crate) fn set_attach_chain(&mut self, n: i16) {
132 let v: &mut [i16; 2] = bytemuck::cast_mut(&mut self.var);
133 v[0] = n;
134 }
135
136 #[inline]
137 pub(crate) fn attach_type(&self) -> u8 {
138 // attachment type
139 // Note! if attach_chain() is zero, the value of attach_type() is irrelevant.
140 let v: &[u8; 4] = bytemuck::cast_ref(&self.var);
141 v[2]
142 }
143
144 #[inline]
145 pub(crate) fn set_attach_type(&mut self, n: u8) {
146 let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var);
147 v[2] = n;
148 }
149}
150
151/// A glyph info.
152#[repr(C)]
153#[derive(Clone, Copy, Default, Debug)]
154pub struct GlyphInfo {
155 // NOTE: Stores a Unicode codepoint before shaping and a glyph ID after.
156 // Just like harfbuzz, we are using the same variable for two purposes.
157 // Occupies u32 as a codepoint and u16 as a glyph id.
158 /// A selected glyph.
159 ///
160 /// Guarantee to be <= `u16::MAX`.
161 pub glyph_id: u32,
162 pub(crate) mask: Mask,
163 /// An index to the start of the grapheme cluster in the original string.
164 ///
165 /// [Read more on clusters](https://harfbuzz.github.io/clusters.html).
166 pub cluster: u32,
167 pub(crate) var1: u32,
168 pub(crate) var2: u32,
169}
170
171unsafe impl bytemuck::Zeroable for GlyphInfo {}
172unsafe impl bytemuck::Pod for GlyphInfo {}
173
174const IS_LIG_BASE: u8 = 0x10;
175
176impl GlyphInfo {
177 /// Indicates that if input text is broken at the beginning of the cluster this glyph
178 /// is part of, then both sides need to be re-shaped, as the result might be different.
179 ///
180 /// On the flip side, it means that when this flag is not present,
181 /// then it's safe to break the glyph-run at the beginning of this cluster,
182 /// and the two sides represent the exact same result one would get if breaking input text
183 /// at the beginning of this cluster and shaping the two sides separately.
184 /// This can be used to optimize paragraph layout, by avoiding re-shaping of each line
185 /// after line-breaking, or limiting the reshaping to a small piece around
186 /// the breaking point only.
187 pub fn unsafe_to_break(&self) -> bool {
188 self.mask & glyph_flag::UNSAFE_TO_BREAK != 0
189 }
190
191 #[inline]
192 pub(crate) fn as_char(&self) -> char {
193 char::try_from(self.glyph_id).unwrap()
194 }
195
196 #[inline]
197 pub(crate) fn as_glyph(&self) -> GlyphId {
198 debug_assert!(self.glyph_id <= u32::from(u16::MAX));
199 GlyphId(self.glyph_id as u16)
200 }
201
202 // Var allocation: unicode_props
203 // Used during the entire shaping process to store unicode properties
204
205 pub(crate) fn complex_var_u8_category(&self) -> u8 {
206 let v: &[u8; 4] = bytemuck::cast_ref(&self.var2);
207 v[2]
208 }
209
210 pub(crate) fn set_complex_var_u8_category(&mut self, c: u8) {
211 let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var2);
212 v[2] = c;
213 }
214
215 pub(crate) fn complex_var_u8_auxiliary(&self) -> u8 {
216 let v: &[u8; 4] = bytemuck::cast_ref(&self.var2);
217 v[3]
218 }
219
220 pub(crate) fn set_complex_var_u8_auxiliary(&mut self, c: u8) {
221 let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var2);
222 v[3] = c;
223 }
224
225 #[inline]
226 fn unicode_props(&self) -> u16 {
227 let v: &[u16; 2] = bytemuck::cast_ref(&self.var2);
228 v[0]
229 }
230
231 #[inline]
232 fn set_unicode_props(&mut self, n: u16) {
233 let v: &mut [u16; 2] = bytemuck::cast_mut(&mut self.var2);
234 v[0] = n;
235 }
236
237 pub(crate) fn init_unicode_props(&mut self, scratch_flags: &mut BufferScratchFlags) {
238 let u = self.as_char();
239 let gc = u.general_category();
240 let mut props = gc.to_rb() as u16;
241
242 if u as u32 >= 0x80 {
243 *scratch_flags |= BufferScratchFlags::HAS_NON_ASCII;
244
245 if u.is_default_ignorable() {
246 props |= UnicodeProps::IGNORABLE.bits();
247 *scratch_flags |= BufferScratchFlags::HAS_DEFAULT_IGNORABLES;
248
249 match u as u32 {
250 0x200C => props |= UnicodeProps::CF_ZWNJ.bits(),
251 0x200D => props |= UnicodeProps::CF_ZWJ.bits(),
252
253 // Mongolian Free Variation Selectors need to be remembered
254 // because although we need to hide them like default-ignorables,
255 // they need to non-ignorable during shaping. This is similar to
256 // what we do for joiners in Indic-like shapers, but since the
257 // FVSes are GC=Mn, we have use a separate bit to remember them.
258 // Fixes:
259 // https://github.com/harfbuzz/harfbuzz/issues/234
260 0x180B..=0x180D | 0x180F => props |= UnicodeProps::HIDDEN.bits(),
261
262 // TAG characters need similar treatment. Fixes:
263 // https://github.com/harfbuzz/harfbuzz/issues/463
264 0xE0020..=0xE007F => props |= UnicodeProps::HIDDEN.bits(),
265
266 // COMBINING GRAPHEME JOINER should not be skipped; at least some times.
267 // https://github.com/harfbuzz/harfbuzz/issues/554
268 0x034F => {
269 props |= UnicodeProps::HIDDEN.bits();
270 *scratch_flags |= BufferScratchFlags::HAS_CGJ;
271 }
272
273 _ => {}
274 }
275 }
276
277 if gc.is_mark() {
278 props |= UnicodeProps::CONTINUATION.bits();
279 props |= (u.modified_combining_class() as u16) << 8;
280 }
281 }
282
283 self.set_unicode_props(props);
284 }
285
286 #[inline]
287 pub(crate) fn general_category(&self) -> GeneralCategory {
288 let n = self.unicode_props() & UnicodeProps::GENERAL_CATEGORY.bits();
289 GeneralCategory::from_rb(n as u32)
290 }
291
292 #[inline]
293 pub(crate) fn set_general_category(&mut self, gc: GeneralCategory) {
294 let gc = gc.to_rb();
295 let n =
296 (gc as u16) | (self.unicode_props() & (0xFF & !UnicodeProps::GENERAL_CATEGORY.bits()));
297 self.set_unicode_props(n);
298 }
299
300 #[inline]
301 pub(crate) fn space_fallback(&self) -> Option<Space> {
302 if self.general_category() == GeneralCategory::SpaceSeparator {
303 let n = (self.unicode_props() >> 8) as u8;
304 Some(n)
305 } else {
306 None
307 }
308 }
309
310 #[inline]
311 pub(crate) fn set_space_fallback(&mut self, space: Space) {
312 if self.general_category() == GeneralCategory::SpaceSeparator {
313 let n = ((space as u16) << 8) | (self.unicode_props() & 0xFF);
314 self.set_unicode_props(n);
315 }
316 }
317
318 #[inline]
319 pub(crate) fn is_unicode_mark(&self) -> bool {
320 self.general_category().is_mark()
321 }
322
323 #[inline]
324 pub(crate) fn is_zwnj(&self) -> bool {
325 self.general_category() == GeneralCategory::Format
326 && (self.unicode_props() & UnicodeProps::CF_ZWNJ.bits() != 0)
327 }
328
329 #[inline]
330 pub(crate) fn is_zwj(&self) -> bool {
331 self.general_category() == GeneralCategory::Format
332 && (self.unicode_props() & UnicodeProps::CF_ZWJ.bits() != 0)
333 }
334
335 #[inline]
336 pub(crate) fn modified_combining_class(&self) -> u8 {
337 if self.is_unicode_mark() {
338 (self.unicode_props() >> 8) as u8
339 } else {
340 0
341 }
342 }
343
344 #[inline]
345 pub(crate) fn set_modified_combining_class(&mut self, mcc: u8) {
346 if self.is_unicode_mark() {
347 let n = ((mcc as u16) << 8) | (self.unicode_props() & 0xFF);
348 self.set_unicode_props(n);
349 }
350 }
351
352 #[inline]
353 pub(crate) fn is_hidden(&self) -> bool {
354 self.unicode_props() & UnicodeProps::HIDDEN.bits() != 0
355 }
356
357 #[inline]
358 pub(crate) fn unhide(&mut self) {
359 let mut n = self.unicode_props();
360 n &= !UnicodeProps::HIDDEN.bits();
361 self.set_unicode_props(n);
362 }
363
364 #[inline]
365 pub(crate) fn set_continuation(&mut self) {
366 let mut n = self.unicode_props();
367 n |= UnicodeProps::CONTINUATION.bits();
368 self.set_unicode_props(n);
369 }
370
371 #[inline]
372 pub(crate) fn reset_continuation(&mut self) {
373 let mut n = self.unicode_props();
374 n &= !UnicodeProps::CONTINUATION.bits();
375 self.set_unicode_props(n);
376 }
377
378 #[inline]
379 pub(crate) fn is_continuation(&self) -> bool {
380 self.unicode_props() & UnicodeProps::CONTINUATION.bits() != 0
381 }
382
383 #[inline]
384 pub(crate) fn is_default_ignorable(&self) -> bool {
385 let n = self.unicode_props() & UnicodeProps::IGNORABLE.bits();
386 n != 0 && !self.is_substituted()
387 }
388
389 // Var allocation: lig_props (aka lig_id / lig_comp)
390 // Used during the GSUB/GPOS processing to track ligatures
391 //
392 // When a ligature is formed:
393 //
394 // - The ligature glyph and any marks in between all the same newly allocated
395 // lig_id,
396 // - The ligature glyph will get lig_num_comps set to the number of components
397 // - The marks get lig_comp > 0, reflecting which component of the ligature
398 // they were applied to.
399 // - This is used in GPOS to attach marks to the right component of a ligature
400 // in MarkLigPos,
401 // - Note that when marks are ligated together, much of the above is skipped
402 // and the current lig_id reused.
403 //
404 // When a multiple-substitution is done:
405 //
406 // - All resulting glyphs will have lig_id = 0,
407 // - The resulting glyphs will have lig_comp = 0, 1, 2, ... respectively.
408 // - This is used in GPOS to attach marks to the first component of a
409 // multiple substitution in MarkBasePos.
410 //
411 // The numbers are also used in GPOS to do mark-to-mark positioning only
412 // to marks that belong to the same component of the same ligature.
413
414 #[inline]
415 pub(crate) fn lig_props(&self) -> u8 {
416 let v: &[u8; 4] = bytemuck::cast_ref(&self.var1);
417 v[2]
418 }
419
420 #[inline]
421 pub(crate) fn set_lig_props(&mut self, n: u8) {
422 let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var1);
423 v[2] = n;
424 }
425
426 pub(crate) fn set_lig_props_for_ligature(&mut self, lig_id: u8, lig_num_comps: u8) {
427 self.set_lig_props((lig_id << 5) | IS_LIG_BASE | (lig_num_comps & 0x0F));
428 }
429
430 pub(crate) fn set_lig_props_for_mark(&mut self, lig_id: u8, lig_comp: u8) {
431 self.set_lig_props((lig_id << 5) | (lig_comp & 0x0F));
432 }
433
434 pub(crate) fn set_lig_props_for_component(&mut self, lig_comp: u8) {
435 self.set_lig_props_for_mark(0, lig_comp)
436 }
437
438 #[inline]
439 pub(crate) fn lig_id(&self) -> u8 {
440 self.lig_props() >> 5
441 }
442
443 #[inline]
444 pub(crate) fn is_ligated_internal(&self) -> bool {
445 self.lig_props() & IS_LIG_BASE != 0
446 }
447
448 #[inline]
449 pub(crate) fn lig_comp(&self) -> u8 {
450 if self.is_ligated_internal() {
451 0
452 } else {
453 self.lig_props() & 0x0F
454 }
455 }
456
457 #[inline]
458 pub(crate) fn lig_num_comps(&self) -> u8 {
459 if self.glyph_props() & GlyphPropsFlags::LIGATURE.bits() != 0 && self.is_ligated_internal()
460 {
461 self.lig_props() & 0x0F
462 } else {
463 1
464 }
465 }
466
467 // Var allocation: glyph_props
468 // Used during the GSUB/GPOS processing to store GDEF glyph properties
469
470 #[inline]
471 pub(crate) fn glyph_props(&self) -> u16 {
472 let v: &[u16; 2] = bytemuck::cast_ref(&self.var1);
473 v[0]
474 }
475
476 #[inline]
477 pub(crate) fn set_glyph_props(&mut self, n: u16) {
478 let v: &mut [u16; 2] = bytemuck::cast_mut(&mut self.var1);
479 v[0] = n;
480 }
481
482 #[inline]
483 pub(crate) fn is_base_glyph(&self) -> bool {
484 self.glyph_props() & GlyphPropsFlags::BASE_GLYPH.bits() != 0
485 }
486
487 #[inline]
488 pub(crate) fn is_ligature(&self) -> bool {
489 self.glyph_props() & GlyphPropsFlags::LIGATURE.bits() != 0
490 }
491
492 #[inline]
493 pub(crate) fn is_mark(&self) -> bool {
494 self.glyph_props() & GlyphPropsFlags::MARK.bits() != 0
495 }
496
497 #[inline]
498 pub(crate) fn is_substituted(&self) -> bool {
499 self.glyph_props() & GlyphPropsFlags::SUBSTITUTED.bits() != 0
500 }
501
502 #[inline]
503 pub(crate) fn is_ligated(&self) -> bool {
504 self.glyph_props() & GlyphPropsFlags::LIGATED.bits() != 0
505 }
506
507 #[inline]
508 pub(crate) fn is_multiplied(&self) -> bool {
509 self.glyph_props() & GlyphPropsFlags::MULTIPLIED.bits() != 0
510 }
511
512 #[inline]
513 pub(crate) fn is_ligated_and_didnt_multiply(&self) -> bool {
514 self.is_ligated() && !self.is_multiplied()
515 }
516
517 #[inline]
518 pub(crate) fn clear_ligated_and_multiplied(&mut self) {
519 let mut n = self.glyph_props();
520 n &= !(GlyphPropsFlags::LIGATED | GlyphPropsFlags::MULTIPLIED).bits();
521 self.set_glyph_props(n);
522 }
523
524 #[inline]
525 pub(crate) fn clear_substituted(&mut self) {
526 let mut n = self.glyph_props();
527 n &= !GlyphPropsFlags::SUBSTITUTED.bits();
528 self.set_glyph_props(n);
529 }
530
531 // Var allocation: syllable
532 // Used during the GSUB/GPOS processing to store shaping boundaries
533
534 #[inline]
535 pub(crate) fn syllable(&self) -> u8 {
536 let v: &[u8; 4] = bytemuck::cast_ref(&self.var1);
537 v[3]
538 }
539
540 #[inline]
541 pub(crate) fn set_syllable(&mut self, n: u8) {
542 let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var1);
543 v[3] = n;
544 }
545
546 // Var allocation: glyph_index
547 // Used during the normalization process to store glyph indices
548
549 #[inline]
550 pub(crate) fn glyph_index(&mut self) -> u32 {
551 self.var1
552 }
553
554 #[inline]
555 pub(crate) fn set_glyph_index(&mut self, n: u32) {
556 self.var1 = n;
557 }
558}
559
560/// A cluster level.
561#[allow(missing_docs)]
562#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
563pub enum BufferClusterLevel {
564 MonotoneGraphemes,
565 MonotoneCharacters,
566 Characters,
567}
568
569impl Default for BufferClusterLevel {
570 #[inline]
571 fn default() -> Self {
572 BufferClusterLevel::MonotoneGraphemes
573 }
574}
575
576pub struct Buffer {
577 // Information about how the text in the buffer should be treated.
578 pub flags: BufferFlags,
579 pub cluster_level: BufferClusterLevel,
580 pub invisible: Option<GlyphId>,
581
582 // Buffer contents.
583 pub direction: Direction,
584 pub script: Option<Script>,
585 pub language: Option<Language>,
586
587 /// Allocations successful.
588 pub successful: bool,
589 /// Whether we have an output buffer going on.
590 pub(crate) have_output: bool,
591 pub have_separate_output: bool,
592 /// Whether we have positions
593 pub have_positions: bool,
594
595 pub idx: usize,
596 pub len: usize,
597 pub out_len: usize,
598
599 pub info: Vec<GlyphInfo>,
600 pub pos: Vec<GlyphPosition>,
601
602 // Text before / after the main buffer contents.
603 // Always in Unicode, and ordered outward.
604 // Index 0 is for "pre-context", 1 for "post-context".
605 pub context: [[char; CONTEXT_LENGTH]; 2],
606 pub context_len: [usize; 2],
607
608 // Managed by enter / leave
609 pub serial: u8,
610 pub scratch_flags: BufferScratchFlags,
611 /// Maximum allowed len.
612 pub max_len: usize,
613 /// Maximum allowed operations.
614 pub max_ops: i32,
615}
616
617impl Buffer {
618 pub const MAX_LEN_FACTOR: usize = 64;
619 pub const MAX_LEN_MIN: usize = 16384;
620 // Shaping more than a billion chars? Let us know!
621 pub const MAX_LEN_DEFAULT: usize = 0x3FFFFFFF;
622
623 pub const MAX_OPS_FACTOR: i32 = 1024;
624 pub const MAX_OPS_MIN: i32 = 16384;
625 // Shaping more than a billion operations? Let us know!
626 pub const MAX_OPS_DEFAULT: i32 = 0x1FFFFFFF;
627
628 /// Creates a new `Buffer`.
629 pub fn new() -> Self {
630 Buffer {
631 flags: BufferFlags::empty(),
632 cluster_level: BufferClusterLevel::default(),
633 invisible: None,
634 scratch_flags: BufferScratchFlags::default(),
635 max_len: Self::MAX_LEN_DEFAULT,
636 max_ops: Self::MAX_OPS_DEFAULT,
637 direction: Direction::Invalid,
638 script: None,
639 language: None,
640 successful: true,
641 have_output: false,
642 have_positions: false,
643 idx: 0,
644 len: 0,
645 out_len: 0,
646 info: Vec::new(),
647 pos: Vec::new(),
648 have_separate_output: false,
649 serial: 0,
650 context: [
651 ['\0', '\0', '\0', '\0', '\0'],
652 ['\0', '\0', '\0', '\0', '\0'],
653 ],
654 context_len: [0, 0],
655 }
656 }
657
658 #[inline]
659 pub fn info_slice(&self) -> &[GlyphInfo] {
660 &self.info[..self.len]
661 }
662
663 #[inline]
664 pub fn info_slice_mut(&mut self) -> &mut [GlyphInfo] {
665 &mut self.info[..self.len]
666 }
667
668 #[inline]
669 pub fn out_info(&self) -> &[GlyphInfo] {
670 if self.have_separate_output {
671 bytemuck::cast_slice(self.pos.as_slice())
672 } else {
673 &self.info
674 }
675 }
676
677 #[inline]
678 pub fn out_info_mut(&mut self) -> &mut [GlyphInfo] {
679 if self.have_separate_output {
680 bytemuck::cast_slice_mut(self.pos.as_mut_slice())
681 } else {
682 &mut self.info
683 }
684 }
685
686 #[inline]
687 fn set_out_info(&mut self, i: usize, info: GlyphInfo) {
688 self.out_info_mut()[i] = info;
689 }
690
691 #[inline]
692 pub fn cur(&self, i: usize) -> &GlyphInfo {
693 &self.info[self.idx + i]
694 }
695
696 #[inline]
697 pub fn cur_mut(&mut self, i: usize) -> &mut GlyphInfo {
698 let idx = self.idx + i;
699 &mut self.info[idx]
700 }
701
702 #[inline]
703 pub fn cur_pos_mut(&mut self) -> &mut GlyphPosition {
704 let i = self.idx;
705 &mut self.pos[i]
706 }
707
708 #[inline]
709 pub fn prev(&self) -> &GlyphInfo {
710 let idx = self.out_len.saturating_sub(1);
711 &self.out_info()[idx]
712 }
713
714 #[inline]
715 pub fn prev_mut(&mut self) -> &mut GlyphInfo {
716 let idx = self.out_len.saturating_sub(1);
717 &mut self.out_info_mut()[idx]
718 }
719
720 fn clear(&mut self) {
721 self.direction = Direction::Invalid;
722 self.script = None;
723 self.language = None;
724
725 self.successful = true;
726 self.have_output = false;
727 self.have_positions = false;
728
729 self.idx = 0;
730 self.info.clear();
731 self.pos.clear();
732 self.len = 0;
733 self.out_len = 0;
734 self.have_separate_output = false;
735
736 self.context = [
737 ['\0', '\0', '\0', '\0', '\0'],
738 ['\0', '\0', '\0', '\0', '\0'],
739 ];
740 self.context_len = [0, 0];
741
742 self.serial = 0;
743 self.scratch_flags = BufferScratchFlags::default();
744 self.cluster_level = BufferClusterLevel::default();
745 }
746
747 #[inline]
748 pub fn backtrack_len(&self) -> usize {
749 if self.have_output {
750 self.out_len
751 } else {
752 self.idx
753 }
754 }
755
756 #[inline]
757 pub fn lookahead_len(&self) -> usize {
758 self.len - self.idx
759 }
760
761 #[inline]
762 fn next_serial(&mut self) -> u8 {
763 self.serial += 1;
764
765 if self.serial == 0 {
766 self.serial += 1;
767 }
768
769 self.serial
770 }
771
772 fn add(&mut self, codepoint: u32, cluster: u32) {
773 self.ensure(self.len + 1);
774
775 let i = self.len;
776 self.info[i] = GlyphInfo {
777 glyph_id: codepoint,
778 mask: 0,
779 cluster,
780 var1: 0,
781 var2: 0,
782 };
783
784 self.len += 1;
785 }
786
787 #[inline]
788 pub fn reverse(&mut self) {
789 if self.is_empty() {
790 return;
791 }
792
793 self.reverse_range(0, self.len);
794 }
795
796 pub fn reverse_range(&mut self, start: usize, end: usize) {
797 if end - start < 2 {
798 return;
799 }
800
801 self.info[start..end].reverse();
802 if self.have_positions {
803 self.pos[start..end].reverse();
804 }
805 }
806
807 pub fn reverse_groups<F>(&mut self, group: F, merge_clusters: bool)
808 where
809 F: Fn(&GlyphInfo, &GlyphInfo) -> bool,
810 {
811 if self.is_empty() {
812 return;
813 }
814
815 let mut start = 0;
816
817 for i in 1..self.len {
818 if !group(&self.info[i - 1], &self.info[i]) {
819 if merge_clusters {
820 self.merge_clusters(start, i);
821 }
822
823 self.reverse_range(start, i);
824 start = i;
825 }
826
827 if merge_clusters {
828 self.merge_clusters(start, i);
829 }
830
831 self.reverse_range(start, i);
832
833 self.reverse();
834 }
835 }
836
837 pub fn reverse_graphemes(&mut self) {
838 self.reverse_groups(
839 _grapheme_group_func,
840 self.cluster_level == BufferClusterLevel::MonotoneCharacters,
841 )
842 }
843
844 pub fn group_end<F>(&self, mut start: usize, group: F) -> usize
845 where
846 F: Fn(&GlyphInfo, &GlyphInfo) -> bool,
847 {
848 start += 1;
849
850 while start < self.len && group(&self.info[start - 1], &self.info[start]) {
851 start += 1;
852 }
853
854 start
855 }
856
857 #[inline]
858 fn reset_clusters(&mut self) {
859 for (i, info) in self.info.iter_mut().enumerate() {
860 info.cluster = i as u32;
861 }
862 }
863
864 pub fn guess_segment_properties(&mut self) {
865 if self.script.is_none() {
866 for info in &self.info {
867 match info.as_char().script() {
868 crate::script::COMMON | crate::script::INHERITED | crate::script::UNKNOWN => {}
869 s => {
870 self.script = Some(s);
871 break;
872 }
873 }
874 }
875 }
876
877 if self.direction == Direction::Invalid {
878 if let Some(script) = self.script {
879 self.direction = Direction::from_script(script).unwrap_or_default();
880 }
881
882 if self.direction == Direction::Invalid {
883 self.direction = Direction::LeftToRight;
884 }
885 }
886
887 // TODO: language must be set
888 }
889
890 pub fn sync(&mut self) {
891 assert!(self.have_output);
892
893 assert!(self.idx <= self.len);
894 if !self.successful {
895 self.have_output = false;
896 self.out_len = 0;
897 self.idx = 0;
898 return;
899 }
900
901 self.next_glyphs(self.len - self.idx);
902
903 if self.have_separate_output {
904 // Swap info and pos buffers.
905 let info: Vec<GlyphPosition> = bytemuck::cast_vec(core::mem::take(&mut self.info));
906 let pos: Vec<GlyphInfo> = bytemuck::cast_vec(core::mem::take(&mut self.pos));
907 self.pos = info;
908 self.info = pos;
909 }
910
911 self.len = self.out_len;
912
913 self.have_output = false;
914 self.out_len = 0;
915 self.idx = 0;
916 }
917
918 pub fn clear_output(&mut self) {
919 self.have_output = true;
920 self.have_positions = false;
921
922 self.idx = 0;
923 self.out_len = 0;
924 self.have_separate_output = false;
925 }
926
927 pub fn clear_positions(&mut self) {
928 self.have_output = false;
929 self.have_positions = true;
930
931 self.out_len = 0;
932 self.have_separate_output = false;
933
934 for pos in &mut self.pos {
935 *pos = GlyphPosition::default();
936 }
937 }
938
939 pub fn replace_glyphs(&mut self, num_in: usize, num_out: usize, glyph_data: &[u32]) {
940 if !self.make_room_for(num_in, num_out) {
941 return;
942 }
943
944 assert!(self.idx + num_in <= self.len);
945
946 self.merge_clusters(self.idx, self.idx + num_in);
947
948 let orig_info = self.info[self.idx];
949 for i in 0..num_out {
950 let ii = self.out_len + i;
951 self.set_out_info(ii, orig_info);
952 self.out_info_mut()[ii].glyph_id = glyph_data[i];
953 }
954
955 self.idx += num_in;
956 self.out_len += num_out;
957 }
958
959 pub fn replace_glyph(&mut self, glyph_index: u32) {
960 if self.have_separate_output || self.out_len != self.idx {
961 if !self.make_room_for(1, 1) {
962 return;
963 }
964
965 self.set_out_info(self.out_len, self.info[self.idx]);
966 }
967
968 let out_len = self.out_len;
969 self.out_info_mut()[out_len].glyph_id = glyph_index;
970
971 self.idx += 1;
972 self.out_len += 1;
973 }
974
975 pub fn output_glyph(&mut self, glyph_index: u32) {
976 if !self.make_room_for(0, 1) {
977 return;
978 }
979
980 if self.idx == self.len && self.out_len == 0 {
981 return;
982 }
983
984 let out_len = self.out_len;
985 if self.idx < self.len {
986 self.set_out_info(out_len, self.info[self.idx]);
987 } else {
988 let info = self.out_info()[out_len - 1];
989 self.set_out_info(out_len, info);
990 }
991
992 self.out_info_mut()[out_len].glyph_id = glyph_index;
993
994 self.out_len += 1;
995 }
996
997 pub fn output_info(&mut self, glyph_info: GlyphInfo) {
998 if !self.make_room_for(0, 1) {
999 return;
1000 }
1001
1002 self.set_out_info(self.out_len, glyph_info);
1003 self.out_len += 1;
1004 }
1005
1006 pub fn output_char(&mut self, unichar: u32, glyph: u32) {
1007 self.cur_mut(0).set_glyph_index(glyph);
1008 // This is very confusing indeed.
1009 self.output_glyph(unichar);
1010 let mut flags = self.scratch_flags;
1011 self.prev_mut().init_unicode_props(&mut flags);
1012 self.scratch_flags = flags;
1013 }
1014
1015 /// Copies glyph at idx to output but doesn't advance idx.
1016 pub fn copy_glyph(&mut self) {
1017 if !self.make_room_for(0, 1) {
1018 return;
1019 }
1020
1021 self.set_out_info(self.out_len, self.info[self.idx]);
1022 self.out_len += 1;
1023 }
1024
1025 /// Copies glyph at idx to output and advance idx.
1026 ///
1027 /// If there's no output, just advance idx.
1028 pub fn next_glyph(&mut self) {
1029 if self.have_output {
1030 if self.have_separate_output || self.out_len != self.idx {
1031 if !self.make_room_for(1, 1) {
1032 return;
1033 }
1034
1035 self.set_out_info(self.out_len, self.info[self.idx]);
1036 }
1037
1038 self.out_len += 1;
1039 }
1040
1041 self.idx += 1;
1042 }
1043
1044 /// Copies n glyphs at idx to output and advance idx.
1045 ///
1046 /// If there's no output, just advance idx.
1047 pub fn next_glyphs(&mut self, n: usize) {
1048 if self.have_output {
1049 if self.have_separate_output || self.out_len != self.idx {
1050 if !self.make_room_for(n, n) {
1051 return;
1052 }
1053
1054 for i in 0..n {
1055 self.set_out_info(self.out_len + i, self.info[self.idx + i]);
1056 }
1057 }
1058
1059 self.out_len += n;
1060 }
1061
1062 self.idx += n;
1063 }
1064
1065 pub fn next_char(&mut self, glyph: u32) {
1066 self.cur_mut(0).set_glyph_index(glyph);
1067 self.next_glyph();
1068 }
1069
1070 /// Advance idx without copying to output.
1071 pub fn skip_glyph(&mut self) {
1072 self.idx += 1;
1073 }
1074
1075 pub fn reset_masks(&mut self, mask: Mask) {
1076 for info in &mut self.info[..self.len] {
1077 info.mask = mask;
1078 }
1079 }
1080
1081 pub fn set_masks(&mut self, mut value: Mask, mask: Mask, cluster_start: u32, cluster_end: u32) {
1082 let not_mask = !mask;
1083 value &= mask;
1084
1085 if mask == 0 {
1086 return;
1087 }
1088
1089 if cluster_start == 0 && cluster_end == core::u32::MAX {
1090 for info in &mut self.info[..self.len] {
1091 info.mask = (info.mask & not_mask) | value;
1092 }
1093
1094 return;
1095 }
1096
1097 for info in &mut self.info[..self.len] {
1098 if cluster_start <= info.cluster && info.cluster < cluster_end {
1099 info.mask = (info.mask & not_mask) | value;
1100 }
1101 }
1102 }
1103
1104 pub fn merge_clusters(&mut self, start: usize, end: usize) {
1105 if end - start < 2 {
1106 return;
1107 }
1108
1109 self.merge_clusters_impl(start, end)
1110 }
1111
1112 fn merge_clusters_impl(&mut self, mut start: usize, mut end: usize) {
1113 if self.cluster_level == BufferClusterLevel::Characters {
1114 self.unsafe_to_break(Some(start), Some(end));
1115 return;
1116 }
1117
1118 let mut cluster = self.info[start].cluster;
1119
1120 for i in start + 1..end {
1121 cluster = core::cmp::min(cluster, self.info[i].cluster);
1122 }
1123
1124 // Extend end
1125 while end < self.len && self.info[end - 1].cluster == self.info[end].cluster {
1126 end += 1;
1127 }
1128
1129 // Extend start
1130 while end < start && self.info[start - 1].cluster == self.info[start].cluster {
1131 start -= 1;
1132 }
1133
1134 // If we hit the start of buffer, continue in out-buffer.
1135 if self.idx == start {
1136 let mut i = self.out_len;
1137 while i != 0 && self.out_info()[i - 1].cluster == self.info[start].cluster {
1138 Self::set_cluster(&mut self.out_info_mut()[i - 1], cluster, 0);
1139 i -= 1;
1140 }
1141 }
1142
1143 for i in start..end {
1144 Self::set_cluster(&mut self.info[i], cluster, 0);
1145 }
1146 }
1147
1148 pub fn merge_out_clusters(&mut self, mut start: usize, mut end: usize) {
1149 if self.cluster_level == BufferClusterLevel::Characters {
1150 return;
1151 }
1152
1153 if end - start < 2 {
1154 return;
1155 }
1156
1157 let mut cluster = self.out_info()[start].cluster;
1158
1159 for i in start + 1..end {
1160 cluster = core::cmp::min(cluster, self.out_info()[i].cluster);
1161 }
1162
1163 // Extend start
1164 while start != 0 && self.out_info()[start - 1].cluster == self.out_info()[start].cluster {
1165 start -= 1;
1166 }
1167
1168 // Extend end
1169 while end < self.out_len && self.out_info()[end - 1].cluster == self.out_info()[end].cluster
1170 {
1171 end += 1;
1172 }
1173
1174 // If we hit the start of buffer, continue in out-buffer.
1175 if end == self.out_len {
1176 let mut i = self.idx;
1177 while i < self.len && self.info[i].cluster == self.out_info()[end - 1].cluster {
1178 Self::set_cluster(&mut self.info[i], cluster, 0);
1179 i += 1;
1180 }
1181 }
1182
1183 for i in start..end {
1184 Self::set_cluster(&mut self.out_info_mut()[i], cluster, 0);
1185 }
1186 }
1187
1188 /// Merge clusters for deleting current glyph, and skip it.
1189 pub fn delete_glyph(&mut self) {
1190 let cluster = self.info[self.idx].cluster;
1191
1192 if self.idx + 1 < self.len && cluster == self.info[self.idx + 1].cluster {
1193 // Cluster survives; do nothing.
1194 self.skip_glyph();
1195 return;
1196 }
1197
1198 if self.out_len != 0 {
1199 // Merge cluster backward.
1200 if cluster < self.out_info()[self.out_len - 1].cluster {
1201 let mask = self.info[self.idx].mask;
1202 let old_cluster = self.out_info()[self.out_len - 1].cluster;
1203
1204 let mut i = self.out_len;
1205 while i != 0 && self.out_info()[i - 1].cluster == old_cluster {
1206 Self::set_cluster(&mut self.out_info_mut()[i - 1], cluster, mask);
1207 i -= 1;
1208 }
1209 }
1210
1211 self.skip_glyph();
1212 return;
1213 }
1214
1215 if self.idx + 1 < self.len {
1216 // Merge cluster forward.
1217 self.merge_clusters(self.idx, self.idx + 2);
1218 }
1219
1220 self.skip_glyph();
1221 }
1222
1223 pub fn delete_glyphs_inplace(&mut self, filter: impl Fn(&GlyphInfo) -> bool) {
1224 // Merge clusters and delete filtered glyphs.
1225 // NOTE! We can't use out-buffer as we have positioning data.
1226 let mut j = 0;
1227
1228 for i in 0..self.len {
1229 if filter(&self.info[i]) {
1230 // Merge clusters.
1231 // Same logic as delete_glyph(), but for in-place removal
1232
1233 let cluster = self.info[i].cluster;
1234 if i + 1 < self.len && cluster == self.info[i + 1].cluster {
1235 // Cluster survives; do nothing.
1236 continue;
1237 }
1238
1239 if j != 0 {
1240 // Merge cluster backward.
1241 if cluster < self.info[j - 1].cluster {
1242 let mask = self.info[i].mask;
1243 let old_cluster = self.info[j - 1].cluster;
1244
1245 let mut k = j;
1246 while k > 0 && self.info[k - 1].cluster == old_cluster {
1247 Self::set_cluster(&mut self.info[k - 1], cluster, mask);
1248 k -= 1;
1249 }
1250 }
1251 continue;
1252 }
1253
1254 if i + 1 < self.len {
1255 // Merge cluster forward.
1256 self.merge_clusters(i, i + 2);
1257 }
1258
1259 continue;
1260 }
1261
1262 if j != i {
1263 self.info[j] = self.info[i];
1264 self.pos[j] = self.pos[i];
1265 }
1266
1267 j += 1;
1268 }
1269
1270 self.len = j;
1271 }
1272
1273 pub fn unsafe_to_break(&mut self, start: Option<usize>, end: Option<usize>) {
1274 self._set_glyph_flags(
1275 UNSAFE_TO_BREAK | UNSAFE_TO_CONCAT,
1276 start,
1277 end,
1278 Some(true),
1279 None,
1280 );
1281 }
1282
1283 /// Adds glyph flags in mask to infos with clusters between start and end.
1284 /// The start index will be from out-buffer if from_out_buffer is true.
1285 /// If interior is true, then the cluster having the minimum value is skipped. */
1286 fn _set_glyph_flags(
1287 &mut self,
1288 mask: Mask,
1289 start: Option<usize>,
1290 end: Option<usize>,
1291 interior: Option<bool>,
1292 from_out_buffer: Option<bool>,
1293 ) {
1294 let start = start.unwrap_or(0);
1295 let end = min(end.unwrap_or(self.len), self.len);
1296 let interior = interior.unwrap_or(false);
1297 let from_out_buffer = from_out_buffer.unwrap_or(false);
1298
1299 if interior && !from_out_buffer && end - start < 2 {
1300 return;
1301 }
1302
1303 self.scratch_flags |= BufferScratchFlags::HAS_GLYPH_FLAGS;
1304
1305 if !from_out_buffer || !self.have_output {
1306 if !interior {
1307 for i in start..end {
1308 self.info[i].mask |= mask;
1309 }
1310 } else {
1311 let cluster = Self::_infos_find_min_cluster(&self.info, start, end, None);
1312 if Self::_infos_set_glyph_flags(&mut self.info, start, end, cluster, mask) {
1313 self.scratch_flags |= BufferScratchFlags::HAS_GLYPH_FLAGS;
1314 }
1315 }
1316 } else {
1317 assert!(start <= self.out_len);
1318 assert!(self.idx <= end);
1319
1320 if !interior {
1321 for i in start..self.out_len {
1322 self.out_info_mut()[i].mask |= mask;
1323 }
1324
1325 for i in self.idx..end {
1326 self.info[i].mask |= mask;
1327 }
1328 } else {
1329 let mut cluster = Self::_infos_find_min_cluster(&self.info, self.idx, end, None);
1330 cluster = Self::_infos_find_min_cluster(
1331 &self.out_info(),
1332 start,
1333 self.out_len,
1334 Some(cluster),
1335 );
1336
1337 let out_len = self.out_len;
1338 let first = Self::_infos_set_glyph_flags(
1339 &mut self.out_info_mut(),
1340 start,
1341 out_len,
1342 cluster,
1343 mask,
1344 );
1345 let second =
1346 Self::_infos_set_glyph_flags(&mut self.info, self.idx, end, cluster, mask);
1347
1348 if first || second {
1349 self.scratch_flags |= BufferScratchFlags::HAS_GLYPH_FLAGS;
1350 }
1351 }
1352 }
1353 }
1354
1355 pub fn unsafe_to_concat(&mut self, start: Option<usize>, end: Option<usize>) {
1356 if !self.flags.contains(BufferFlags::PRODUCE_UNSAFE_TO_CONCAT) {
1357 return;
1358 }
1359
1360 self._set_glyph_flags(UNSAFE_TO_CONCAT, start, end, Some(true), None);
1361 }
1362
1363 pub fn unsafe_to_break_from_outbuffer(&mut self, start: Option<usize>, end: Option<usize>) {
1364 if !self.flags.contains(BufferFlags::PRODUCE_UNSAFE_TO_CONCAT) {
1365 return;
1366 }
1367
1368 self._set_glyph_flags(
1369 UNSAFE_TO_BREAK | UNSAFE_TO_CONCAT,
1370 start,
1371 end,
1372 Some(true),
1373 Some(true),
1374 );
1375 }
1376
1377 pub fn unsafe_to_concat_from_outbuffer(&mut self, start: Option<usize>, end: Option<usize>) {
1378 self._set_glyph_flags(UNSAFE_TO_CONCAT, start, end, Some(false), Some(true));
1379 }
1380
1381 pub fn move_to(&mut self, i: usize) -> bool {
1382 if !self.have_output {
1383 assert!(i <= self.len);
1384 self.idx = i;
1385 return true;
1386 }
1387
1388 if !self.successful {
1389 return false;
1390 }
1391
1392 assert!(i <= self.out_len + (self.len - self.idx));
1393
1394 if self.out_len < i {
1395 let count = i - self.out_len;
1396 if !self.make_room_for(count, count) {
1397 return false;
1398 }
1399
1400 for j in 0..count {
1401 self.set_out_info(self.out_len + j, self.info[self.idx + j]);
1402 }
1403
1404 self.idx += count;
1405 self.out_len += count;
1406 } else if self.out_len > i {
1407 // Tricky part: rewinding...
1408 let count = self.out_len - i;
1409
1410 // This will blow in our face if memory allocation fails later
1411 // in this same lookup...
1412 //
1413 // We used to shift with extra 32 items.
1414 // But that would leave empty slots in the buffer in case of allocation
1415 // failures. See comments in shift_forward(). This can cause O(N^2)
1416 // behavior more severely than adding 32 empty slots can...
1417 if self.idx < count {
1418 self.shift_forward(count - self.idx);
1419 }
1420
1421 assert!(self.idx >= count);
1422
1423 self.idx -= count;
1424 self.out_len -= count;
1425
1426 for j in 0..count {
1427 self.info[self.idx + j] = self.out_info()[self.out_len + j];
1428 }
1429 }
1430
1431 true
1432 }
1433
1434 pub fn ensure(&mut self, size: usize) -> bool {
1435 if size < self.len {
1436 return true;
1437 }
1438
1439 if size > self.max_len {
1440 self.successful = false;
1441 return false;
1442 }
1443
1444 self.info.resize(size, GlyphInfo::default());
1445 self.pos.resize(size, GlyphPosition::default());
1446 true
1447 }
1448
1449 pub fn set_len(&mut self, len: usize) {
1450 self.ensure(len);
1451 self.len = len;
1452 }
1453
1454 fn make_room_for(&mut self, num_in: usize, num_out: usize) -> bool {
1455 if !self.ensure(self.out_len + num_out) {
1456 return false;
1457 }
1458
1459 if !self.have_separate_output && self.out_len + num_out > self.idx + num_in {
1460 assert!(self.have_output);
1461
1462 self.have_separate_output = true;
1463 for i in 0..self.out_len {
1464 self.set_out_info(i, self.info[i]);
1465 }
1466 }
1467
1468 true
1469 }
1470
1471 fn shift_forward(&mut self, count: usize) {
1472 assert!(self.have_output);
1473 self.ensure(self.len + count);
1474
1475 for i in (0..(self.len - self.idx)).rev() {
1476 self.info[self.idx + count + i] = self.info[self.idx + i];
1477 }
1478
1479 if self.idx + count > self.len {
1480 for info in &mut self.info[self.len..self.idx + count] {
1481 *info = GlyphInfo::default();
1482 }
1483 }
1484
1485 self.len += count;
1486 self.idx += count;
1487 }
1488
1489 fn clear_context(&mut self, side: usize) {
1490 self.context_len[side] = 0;
1491 }
1492
1493 pub fn sort(&mut self, start: usize, end: usize, cmp: impl Fn(&GlyphInfo, &GlyphInfo) -> bool) {
1494 assert!(!self.have_positions);
1495
1496 for i in start + 1..end {
1497 let mut j = i;
1498 while j > start && cmp(&self.info[j - 1], &self.info[i]) {
1499 j -= 1;
1500 }
1501
1502 if i == j {
1503 continue;
1504 }
1505
1506 // Move item i to occupy place for item j, shift what's in between.
1507 self.merge_clusters(j, i + 1);
1508
1509 {
1510 let t = self.info[i];
1511 for idx in (0..i - j).rev() {
1512 self.info[idx + j + 1] = self.info[idx + j];
1513 }
1514
1515 self.info[j] = t;
1516 }
1517 }
1518 }
1519
1520 pub fn set_cluster(info: &mut GlyphInfo, cluster: u32, mask: Mask) {
1521 if info.cluster != cluster {
1522 info.mask = (info.mask & !glyph_flag::DEFINED) | (mask & glyph_flag::DEFINED);
1523 }
1524
1525 info.cluster = cluster;
1526 }
1527
1528 // Called around shape()
1529 pub(crate) fn enter(&mut self) {
1530 self.serial = 0;
1531 self.scratch_flags = BufferScratchFlags::empty();
1532
1533 if let Some(len) = self.len.checked_mul(Buffer::MAX_LEN_FACTOR) {
1534 self.max_len = len.max(Buffer::MAX_LEN_MIN);
1535 }
1536
1537 if let Ok(len) = i32::try_from(self.len) {
1538 if let Some(ops) = len.checked_mul(Buffer::MAX_OPS_FACTOR) {
1539 self.max_ops = ops.max(Buffer::MAX_OPS_MIN);
1540 }
1541 }
1542 }
1543
1544 // Called around shape()
1545 pub(crate) fn leave(&mut self) {
1546 self.max_len = Buffer::MAX_LEN_DEFAULT;
1547 self.max_ops = Buffer::MAX_OPS_DEFAULT;
1548 self.serial = 0;
1549 }
1550
1551 fn _infos_find_min_cluster(
1552 info: &[GlyphInfo],
1553 start: usize,
1554 end: usize,
1555 cluster: Option<u32>,
1556 ) -> u32 {
1557 let mut cluster = cluster.unwrap_or(core::u32::MAX);
1558
1559 for glyph_info in &info[start..end] {
1560 cluster = core::cmp::min(cluster, glyph_info.cluster);
1561 }
1562
1563 cluster
1564 }
1565
1566 #[must_use]
1567 fn _infos_set_glyph_flags(
1568 info: &mut [GlyphInfo],
1569 start: usize,
1570 end: usize,
1571 cluster: u32,
1572 mask: Mask,
1573 ) -> bool {
1574 // NOTE: Because of problems with ownership, we don't pass the scratch flags to this
1575 // function, unlike in harfbuzz. Because of this, each time you call this function you
1576 // the caller needs to set the "BufferScratchFlags::HAS_GLYPH_FLAGS" scratch flag
1577 // themselves if the function returns true.
1578 let mut unsafe_to_break = false;
1579 for glyph_info in &mut info[start..end] {
1580 if glyph_info.cluster != cluster {
1581 glyph_info.mask |= mask;
1582 unsafe_to_break = true;
1583 }
1584 }
1585
1586 unsafe_to_break
1587 }
1588
1589 /// Checks that buffer contains no elements.
1590 pub fn is_empty(&self) -> bool {
1591 self.len == 0
1592 }
1593
1594 fn push_str(&mut self, text: &str) {
1595 self.ensure(self.len + text.chars().count());
1596
1597 for (i, c) in text.char_indices() {
1598 self.add(c as u32, i as u32);
1599 }
1600 }
1601
1602 fn set_pre_context(&mut self, text: &str) {
1603 self.clear_context(0);
1604 for (i, c) in text.chars().rev().enumerate().take(CONTEXT_LENGTH) {
1605 self.context[0][i] = c;
1606 self.context_len[0] += 1;
1607 }
1608 }
1609
1610 fn set_post_context(&mut self, text: &str) {
1611 self.clear_context(1);
1612 for (i, c) in text.chars().enumerate().take(CONTEXT_LENGTH) {
1613 self.context[1][i] = c;
1614 self.context_len[1] += 1;
1615 }
1616 }
1617
1618 pub fn next_syllable(&self, mut start: usize) -> usize {
1619 if start >= self.len {
1620 return start;
1621 }
1622
1623 let syllable = self.info[start].syllable();
1624 start += 1;
1625 while start < self.len && syllable == self.info[start].syllable() {
1626 start += 1;
1627 }
1628
1629 start
1630 }
1631
1632 #[inline]
1633 pub fn allocate_lig_id(&mut self) -> u8 {
1634 let mut lig_id = self.next_serial() & 0x07;
1635
1636 if lig_id == 0 {
1637 lig_id = self.allocate_lig_id();
1638 }
1639
1640 lig_id
1641 }
1642}
1643
1644pub(crate) fn _cluster_group_func(a: &GlyphInfo, b: &GlyphInfo) -> bool {
1645 a.cluster == b.cluster
1646}
1647
1648pub(crate) fn _grapheme_group_func(_: &GlyphInfo, b: &GlyphInfo) -> bool {
1649 b.is_continuation()
1650}
1651
1652// TODO: to iter if possible
1653
1654macro_rules! foreach_cluster {
1655 ($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {
1656 foreach_group!($buffer, $start, $end, crate::buffer::_cluster_group_func, $($body)*)
1657 };
1658}
1659
1660macro_rules! foreach_group {
1661 ($buffer:expr, $start:ident, $end:ident, $group_func:expr, $($body:tt)*) => {{
1662 let count = $buffer.len;
1663 let mut $start = 0;
1664 let mut $end = if count > 0 { $buffer.group_end(0, $group_func) } else { 0 };
1665
1666 while $start < count {
1667 $($body)*;
1668 $start = $end;
1669 $end = $buffer.group_end($start, $group_func);
1670 }
1671 }};
1672}
1673
1674macro_rules! foreach_syllable {
1675 ($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {{
1676 let mut $start = 0;
1677 let mut $end = $buffer.next_syllable(0);
1678 while $start < $buffer.len {
1679 $($body)*;
1680 $start = $end;
1681 $end = $buffer.next_syllable($start);
1682 }
1683 }};
1684}
1685
1686macro_rules! foreach_grapheme {
1687 ($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {
1688 foreach_group!($buffer, $start, $end, crate::buffer::_grapheme_group_func, $($body)*)
1689 };
1690}
1691
1692bitflags::bitflags! {
1693 #[derive(Default, Debug, Clone, Copy)]
1694 pub struct UnicodeProps: u16 {
1695 const GENERAL_CATEGORY = 0x001F;
1696 const IGNORABLE = 0x0020;
1697 // MONGOLIAN FREE VARIATION SELECTOR 1..4, or TAG characters
1698 const HIDDEN = 0x0040;
1699 const CONTINUATION = 0x0080;
1700
1701 // If GEN_CAT=FORMAT, top byte masks:
1702 const CF_ZWJ = 0x0100;
1703 const CF_ZWNJ = 0x0200;
1704 }
1705}
1706
1707bitflags::bitflags! {
1708 #[derive(Default, Debug, Clone, Copy)]
1709 pub struct GlyphPropsFlags: u16 {
1710 // The following three match LookupFlags::Ignore* numbers.
1711 const BASE_GLYPH = 0x02;
1712 const LIGATURE = 0x04;
1713 const MARK = 0x08;
1714 const CLASS_MASK = Self::BASE_GLYPH.bits() | Self::LIGATURE.bits() | Self::MARK.bits();
1715
1716 // The following are used internally; not derived from GDEF.
1717 const SUBSTITUTED = 0x10;
1718 const LIGATED = 0x20;
1719 const MULTIPLIED = 0x40;
1720
1721 const PRESERVE = Self::SUBSTITUTED.bits() | Self::LIGATED.bits() | Self::MULTIPLIED.bits();
1722 }
1723}
1724
1725bitflags::bitflags! {
1726 /// Flags for buffers.
1727 #[derive(Default, Debug, Clone, Copy)]
1728 pub struct BufferFlags: u32 {
1729 /// Indicates that special handling of the beginning of text paragraph can be applied to this buffer. Should usually be set, unless you are passing to the buffer only part of the text without the full context.
1730 const BEGINNING_OF_TEXT = 1 << 1;
1731 /// Indicates that special handling of the end of text paragraph can be applied to this buffer, similar to [`BufferFlags::BEGINNING_OF_TEXT`].
1732 const END_OF_TEXT = 1 << 2;
1733 /// Indicates that characters with `Default_Ignorable` Unicode property should use the corresponding glyph from the font, instead of hiding them (done by replacing them with the space glyph and zeroing the advance width.) This flag takes precedence over [`BufferFlags::REMOVE_DEFAULT_IGNORABLES`].
1734 const PRESERVE_DEFAULT_IGNORABLES = 1 << 3;
1735 /// Indicates that characters with `Default_Ignorable` Unicode property should be removed from glyph string instead of hiding them (done by replacing them with the space glyph and zeroing the advance width.) [`BufferFlags::PRESERVE_DEFAULT_IGNORABLES`] takes precedence over this flag.
1736 const REMOVE_DEFAULT_IGNORABLES = 1 << 4;
1737 /// Indicates that a dotted circle should not be inserted in the rendering of incorrect character sequences (such as `<0905 093E>`).
1738 const DO_NOT_INSERT_DOTTED_CIRCLE = 1 << 5;
1739 /// Indicates that the shape() call and its variants should perform various verification processes on the results of the shaping operation on the buffer. If the verification fails, then either a buffer message is sent, if a message handler is installed on the buffer, or a message is written to standard error. In either case, the shaping result might be modified to show the failed output.
1740 const VERIFY = 1 << 6;
1741 /// Indicates that the `UNSAFE_TO_CONCAT` glyph-flag should be produced by the shaper. By default it will not be produced since it incurs a cost.
1742 const PRODUCE_UNSAFE_TO_CONCAT = 1 << 7;
1743 }
1744}
1745
1746bitflags::bitflags! {
1747 #[derive(Default, Debug, Clone, Copy)]
1748 pub struct BufferScratchFlags: u32 {
1749 const HAS_NON_ASCII = 0x00000001;
1750 const HAS_DEFAULT_IGNORABLES = 0x00000002;
1751 const HAS_SPACE_FALLBACK = 0x00000004;
1752 const HAS_GPOS_ATTACHMENT = 0x00000008;
1753 const HAS_CGJ = 0x00000010;
1754 const HAS_GLYPH_FLAGS = 0x00000020;
1755
1756 // Reserved for complex shapers' internal use.
1757 const COMPLEX0 = 0x01000000;
1758 const COMPLEX1 = 0x02000000;
1759 const COMPLEX2 = 0x04000000;
1760 const COMPLEX3 = 0x08000000;
1761 }
1762}
1763
1764bitflags::bitflags! {
1765 /// Flags used for serialization with a `BufferSerializer`.
1766 #[derive(Default)]
1767 pub struct SerializeFlags: u8 {
1768 /// Do not serialize glyph cluster.
1769 const NO_CLUSTERS = 0b00000001;
1770 /// Do not serialize glyph position information.
1771 const NO_POSITIONS = 0b00000010;
1772 /// Do no serialize glyph name.
1773 const NO_GLYPH_NAMES = 0b00000100;
1774 /// Serialize glyph extents.
1775 const GLYPH_EXTENTS = 0b00001000;
1776 /// Serialize glyph flags.
1777 const GLYPH_FLAGS = 0b00010000;
1778 /// Do not serialize glyph advances, glyph offsets will reflect absolute
1779 /// glyph positions.
1780 const NO_ADVANCES = 0b00100000;
1781 }
1782}
1783
1784/// A buffer that contains an input string ready for shaping.
1785pub struct UnicodeBuffer(pub(crate) Buffer);
1786
1787impl UnicodeBuffer {
1788 /// Create a new `UnicodeBuffer`.
1789 #[inline]
1790 pub fn new() -> UnicodeBuffer {
1791 UnicodeBuffer(Buffer::new())
1792 }
1793
1794 /// Returns the length of the data of the buffer.
1795 ///
1796 /// This corresponds to the number of unicode codepoints contained in the
1797 /// buffer.
1798 #[inline]
1799 pub fn len(&self) -> usize {
1800 self.0.len
1801 }
1802
1803 /// Returns `true` if the buffer contains no elements.
1804 #[inline]
1805 pub fn is_empty(&self) -> bool {
1806 self.0.is_empty()
1807 }
1808
1809 /// Pushes a string to a buffer.
1810 #[inline]
1811 pub fn push_str(&mut self, str: &str) {
1812 self.0.push_str(str);
1813 }
1814
1815 /// Sets the pre-context for this buffer.
1816 #[inline]
1817 pub fn set_pre_context(&mut self, str: &str) {
1818 self.0.set_pre_context(str)
1819 }
1820
1821 /// Sets the post-context for this buffer.
1822 #[inline]
1823 pub fn set_post_context(&mut self, str: &str) {
1824 self.0.set_post_context(str)
1825 }
1826
1827 /// Appends a character to a buffer with the given cluster value.
1828 #[inline]
1829 pub fn add(&mut self, codepoint: char, cluster: u32) {
1830 self.0.add(codepoint as u32, cluster);
1831 self.0.context_len[1] = 0;
1832 }
1833
1834 /// Set the text direction of the `Buffer`'s contents.
1835 #[inline]
1836 pub fn set_direction(&mut self, direction: Direction) {
1837 self.0.direction = direction;
1838 }
1839
1840 /// Returns the `Buffer`'s text direction.
1841 #[inline]
1842 pub fn direction(&self) -> Direction {
1843 self.0.direction
1844 }
1845
1846 /// Set the script from an ISO15924 tag.
1847 #[inline]
1848 pub fn set_script(&mut self, script: Script) {
1849 self.0.script = Some(script);
1850 }
1851
1852 /// Get the ISO15924 script tag.
1853 pub fn script(&self) -> Script {
1854 self.0.script.unwrap_or(script::UNKNOWN)
1855 }
1856
1857 /// Set the buffer language.
1858 #[inline]
1859 pub fn set_language(&mut self, lang: Language) {
1860 self.0.language = Some(lang);
1861 }
1862
1863 /// Get the buffer language.
1864 #[inline]
1865 pub fn language(&self) -> Option<Language> {
1866 self.0.language.clone()
1867 }
1868
1869 /// Guess the segment properties (direction, language, script) for the
1870 /// current buffer.
1871 #[inline]
1872 pub fn guess_segment_properties(&mut self) {
1873 self.0.guess_segment_properties()
1874 }
1875
1876 /// Set the flags for this buffer.
1877 #[inline]
1878 pub fn set_flags(&mut self, flags: BufferFlags) {
1879 self.0.flags = flags;
1880 }
1881
1882 /// Get the flags for this buffer.
1883 #[inline]
1884 pub fn flags(&self) -> BufferFlags {
1885 self.0.flags
1886 }
1887
1888 /// Set the cluster level of the buffer.
1889 #[inline]
1890 pub fn set_cluster_level(&mut self, cluster_level: BufferClusterLevel) {
1891 self.0.cluster_level = cluster_level
1892 }
1893
1894 /// Retrieve the cluster level of the buffer.
1895 #[inline]
1896 pub fn cluster_level(&self) -> BufferClusterLevel {
1897 self.0.cluster_level
1898 }
1899
1900 /// Resets clusters.
1901 #[inline]
1902 pub fn reset_clusters(&mut self) {
1903 self.0.reset_clusters();
1904 }
1905
1906 /// Clear the contents of the buffer.
1907 #[inline]
1908 pub fn clear(&mut self) {
1909 self.0.clear()
1910 }
1911}
1912
1913impl core::fmt::Debug for UnicodeBuffer {
1914 fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1915 fmt&mut DebugStruct<'_, '_>.debug_struct("UnicodeBuffer")
1916 .field("direction", &self.direction())
1917 .field("language", &self.language())
1918 .field("script", &self.script())
1919 .field(name:"cluster_level", &self.cluster_level())
1920 .finish()
1921 }
1922}
1923
1924impl Default for UnicodeBuffer {
1925 fn default() -> UnicodeBuffer {
1926 UnicodeBuffer::new()
1927 }
1928}
1929
1930/// A buffer that contains the results of the shaping process.
1931pub struct GlyphBuffer(pub(crate) Buffer);
1932
1933impl GlyphBuffer {
1934 /// Returns the length of the data of the buffer.
1935 ///
1936 /// When called before shaping this is the number of unicode codepoints
1937 /// contained in the buffer. When called after shaping it returns the number
1938 /// of glyphs stored.
1939 #[inline]
1940 pub fn len(&self) -> usize {
1941 self.0.len
1942 }
1943
1944 /// Returns `true` if the buffer contains no elements.
1945 #[inline]
1946 pub fn is_empty(&self) -> bool {
1947 self.0.is_empty()
1948 }
1949
1950 /// Get the glyph infos.
1951 #[inline]
1952 pub fn glyph_infos(&self) -> &[GlyphInfo] {
1953 &self.0.info[0..self.0.len]
1954 }
1955
1956 /// Get the glyph positions.
1957 #[inline]
1958 pub fn glyph_positions(&self) -> &[GlyphPosition] {
1959 &self.0.pos[0..self.0.len]
1960 }
1961
1962 /// Clears the content of the glyph buffer and returns an empty
1963 /// `UnicodeBuffer` reusing the existing allocation.
1964 #[inline]
1965 pub fn clear(mut self) -> UnicodeBuffer {
1966 self.0.clear();
1967 UnicodeBuffer(self.0)
1968 }
1969
1970 /// Converts the glyph buffer content into a string.
1971 pub fn serialize(&self, face: &Face, flags: SerializeFlags) -> String {
1972 self.serialize_impl(face, flags).unwrap_or_default()
1973 }
1974
1975 fn serialize_impl(
1976 &self,
1977 face: &Face,
1978 flags: SerializeFlags,
1979 ) -> Result<String, core::fmt::Error> {
1980 use core::fmt::Write;
1981
1982 let mut s = String::with_capacity(64);
1983
1984 let info = self.glyph_infos();
1985 let pos = self.glyph_positions();
1986 let mut x = 0;
1987 let mut y = 0;
1988 for (info, pos) in info.iter().zip(pos) {
1989 if !flags.contains(SerializeFlags::NO_GLYPH_NAMES) {
1990 match face.glyph_name(info.as_glyph()) {
1991 Some(name) => s.push_str(name),
1992 None => write!(&mut s, "gid{}", info.glyph_id)?,
1993 }
1994 } else {
1995 write!(&mut s, "{}", info.glyph_id)?;
1996 }
1997
1998 if !flags.contains(SerializeFlags::NO_CLUSTERS) {
1999 write!(&mut s, "={}", info.cluster)?;
2000 }
2001
2002 if !flags.contains(SerializeFlags::NO_POSITIONS) {
2003 if x + pos.x_offset != 0 || y + pos.y_offset != 0 {
2004 write!(&mut s, "@{},{}", x + pos.x_offset, y + pos.y_offset)?;
2005 }
2006
2007 if !flags.contains(SerializeFlags::NO_ADVANCES) {
2008 write!(&mut s, "+{}", pos.x_advance)?;
2009 if pos.y_advance != 0 {
2010 write!(&mut s, ",{}", pos.y_advance)?;
2011 }
2012 }
2013 }
2014
2015 if flags.contains(SerializeFlags::GLYPH_FLAGS) {
2016 if info.mask & glyph_flag::DEFINED != 0 {
2017 write!(&mut s, "#{:X}", info.mask & glyph_flag::DEFINED)?;
2018 }
2019 }
2020
2021 if flags.contains(SerializeFlags::GLYPH_EXTENTS) {
2022 let mut extents = GlyphExtents::default();
2023 face.glyph_extents(info.as_glyph(), &mut extents);
2024 write!(
2025 &mut s,
2026 "<{},{},{},{}>",
2027 extents.x_bearing, extents.y_bearing, extents.width, extents.height
2028 )?;
2029 }
2030
2031 if flags.contains(SerializeFlags::NO_ADVANCES) {
2032 x += pos.x_advance;
2033 y += pos.y_advance;
2034 }
2035
2036 s.push('|');
2037 }
2038
2039 // Remove last `|`.
2040 if !s.is_empty() {
2041 s.pop();
2042 }
2043
2044 Ok(s)
2045 }
2046}
2047
2048impl core::fmt::Debug for GlyphBuffer {
2049 fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2050 fmt&mut DebugStruct<'_, '_>.debug_struct("GlyphBuffer")
2051 .field("glyph_positions", &self.glyph_positions())
2052 .field(name:"glyph_infos", &self.glyph_infos())
2053 .finish()
2054 }
2055}
2056