1 | use alloc::{string::String, vec::Vec}; |
2 | use core::cmp::min; |
3 | use core::convert::TryFrom; |
4 | use ttf_parser::GlyphId; |
5 | |
6 | use super::buffer::glyph_flag::{SAFE_TO_INSERT_TATWEEL, UNSAFE_TO_BREAK, UNSAFE_TO_CONCAT}; |
7 | use super::face::hb_glyph_extents_t; |
8 | use super::unicode::{CharExt, GeneralCategoryExt}; |
9 | use super::{hb_font_t, hb_mask_t}; |
10 | use crate::hb::set_digest::{hb_set_digest_ext, hb_set_digest_t}; |
11 | use crate::{script, BufferClusterLevel, BufferFlags, Direction, Language, Script, SerializeFlags}; |
12 | |
13 | const CONTEXT_LENGTH: usize = 5; |
14 | |
15 | pub mod glyph_flag { |
16 | /// Indicates that if input text is broken at the |
17 | /// beginning of the cluster this glyph is part of, |
18 | /// then both sides need to be re-shaped, as the |
19 | /// result might be different. |
20 | /// |
21 | /// On the flip side, it means that when |
22 | /// this flag is not present, then it is safe |
23 | /// to break the glyph-run at the beginning of |
24 | /// this cluster, and the two sides will represent |
25 | /// the exact same result one would get if breaking |
26 | /// input text at the beginning of this cluster and |
27 | /// shaping the two sides separately. |
28 | /// |
29 | /// This can be used to optimize paragraph layout, |
30 | /// by avoiding re-shaping of each line after line-breaking. |
31 | pub const UNSAFE_TO_BREAK: u32 = 0x00000001; |
32 | /// Indicates that if input text is changed on one side |
33 | /// of the beginning of the cluster this glyph is part |
34 | /// of, then the shaping results for the other side |
35 | /// might change. |
36 | /// |
37 | /// Note that the absence of this flag will NOT by |
38 | /// itself mean that it IS safe to concat text. Only |
39 | /// two pieces of text both of which clear of this |
40 | /// flag can be concatenated safely. |
41 | /// |
42 | /// This can be used to optimize paragraph layout, |
43 | /// by avoiding re-shaping of each line after |
44 | /// line-breaking, by limiting the reshaping to a |
45 | /// small piece around the breaking position only, |
46 | /// even if the breaking position carries the |
47 | /// UNSAFE_TO_BREAK or when hyphenation or |
48 | /// other text transformation happens at |
49 | /// line-break position, in the following way: |
50 | /// |
51 | /// 1. Iterate back from the line-break |
52 | /// position until the first cluster |
53 | /// start position that is NOT unsafe-to-concat, |
54 | /// 2. shape the segment from there till the |
55 | /// end of line, 3. check whether the resulting |
56 | /// glyph-run also is clear of the unsafe-to-concat |
57 | /// at its start-of-text position; if it is, just |
58 | /// splice it into place and the line is shaped; |
59 | /// If not, move on to a position further back that |
60 | /// is clear of unsafe-to-concat and retry from |
61 | /// there, and repeat. |
62 | /// |
63 | /// At the start of next line a similar |
64 | /// algorithm can be implemented. |
65 | /// That is: 1. Iterate forward from |
66 | /// the line-break position until the first cluster |
67 | /// start position that is NOT unsafe-to-concat, 2. |
68 | /// shape the segment from beginning of the line to |
69 | /// that position, 3. check whether the resulting |
70 | /// glyph-run also is clear of the unsafe-to-concat |
71 | /// at its end-of-text position; if it is, just splice |
72 | /// it into place and the beginning is shaped; If not, |
73 | /// move on to a position further forward that is clear |
74 | /// of unsafe-to-concat and retry up to there, and repeat. |
75 | /// |
76 | /// A slight complication will arise in the |
77 | /// implementation of the algorithm above, |
78 | /// because while |
79 | /// our buffer API has a way to return flags |
80 | /// for position corresponding to |
81 | /// start-of-text, there is currently no |
82 | /// position corresponding to end-of-text. |
83 | /// This limitation can be alleviated by |
84 | /// shaping more text than needed and |
85 | /// looking for unsafe-to-concat flag |
86 | /// within text clusters. |
87 | /// |
88 | /// The UNSAFE_TO_BREAK flag will always imply this flag. |
89 | /// To use this flag, you must enable the buffer flag |
90 | /// PRODUCE_UNSAFE_TO_CONCAT during shaping, otherwise |
91 | /// the buffer flag will not be reliably produced. |
92 | pub const UNSAFE_TO_CONCAT: u32 = 0x00000002; |
93 | |
94 | /// In scripts that use elongation (Arabic, |
95 | /// Mongolian, Syriac, etc.), this flag signifies |
96 | /// that it is safe to insert a U+0640 TATWEEL |
97 | /// character *before* this cluster for elongation. |
98 | pub const SAFE_TO_INSERT_TATWEEL: u32 = 0x00000004; |
99 | |
100 | /// All the currently defined flags. |
101 | pub const DEFINED: u32 = 0x00000007; // OR of all defined flags |
102 | } |
103 | |
104 | /// Holds the positions of the glyph in both horizontal and vertical directions. |
105 | /// |
106 | /// All positions are relative to the current point. |
107 | #[repr (C)] |
108 | #[derive (Clone, Copy, Default, Debug)] |
109 | pub struct GlyphPosition { |
110 | /// How much the line advances after drawing this glyph when setting text in |
111 | /// horizontal direction. |
112 | pub x_advance: i32, |
113 | /// How much the line advances after drawing this glyph when setting text in |
114 | /// vertical direction. |
115 | pub y_advance: i32, |
116 | /// How much the glyph moves on the X-axis before drawing it, this should |
117 | /// not affect how much the line advances. |
118 | pub x_offset: i32, |
119 | /// How much the glyph moves on the Y-axis before drawing it, this should |
120 | /// not affect how much the line advances. |
121 | pub y_offset: i32, |
122 | pub(crate) var: u32, |
123 | } |
124 | |
125 | unsafe impl bytemuck::Zeroable for GlyphPosition {} |
126 | unsafe impl bytemuck::Pod for GlyphPosition {} |
127 | |
128 | impl GlyphPosition { |
129 | #[inline ] |
130 | pub(crate) fn attach_chain(&self) -> i16 { |
131 | // glyph to which this attaches to, relative to current glyphs; |
132 | // negative for going back, positive for forward. |
133 | let v: &[i16; 2] = bytemuck::cast_ref(&self.var); |
134 | v[0] |
135 | } |
136 | |
137 | #[inline ] |
138 | pub(crate) fn set_attach_chain(&mut self, n: i16) { |
139 | let v: &mut [i16; 2] = bytemuck::cast_mut(&mut self.var); |
140 | v[0] = n; |
141 | } |
142 | |
143 | #[inline ] |
144 | pub(crate) fn attach_type(&self) -> u8 { |
145 | // attachment type |
146 | // Note! if attach_chain() is zero, the value of attach_type() is irrelevant. |
147 | let v: &[u8; 4] = bytemuck::cast_ref(&self.var); |
148 | v[2] |
149 | } |
150 | |
151 | #[inline ] |
152 | pub(crate) fn set_attach_type(&mut self, n: u8) { |
153 | let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var); |
154 | v[2] = n; |
155 | } |
156 | } |
157 | |
158 | /// A glyph info. |
159 | #[repr (C)] |
160 | #[derive (Clone, Copy, Default, Debug)] |
161 | pub struct hb_glyph_info_t { |
162 | // NOTE: Stores a Unicode codepoint before shaping and a glyph ID after. |
163 | // Just like harfbuzz, we are using the same variable for two purposes. |
164 | // Occupies u32 as a codepoint and u16 as a glyph id. |
165 | /// A selected glyph. |
166 | /// |
167 | /// Guarantee to be <= `u16::MAX`. |
168 | pub glyph_id: u32, |
169 | pub(crate) mask: hb_mask_t, |
170 | /// An index to the start of the grapheme cluster in the original string. |
171 | /// |
172 | /// [Read more on clusters](https://harfbuzz.github.io/clusters.html). |
173 | pub cluster: u32, |
174 | pub(crate) var1: u32, |
175 | pub(crate) var2: u32, |
176 | } |
177 | |
178 | unsafe impl bytemuck::Zeroable for hb_glyph_info_t {} |
179 | unsafe impl bytemuck::Pod for hb_glyph_info_t {} |
180 | |
181 | impl hb_glyph_info_t { |
182 | /// Indicates that if input text is broken at the beginning of the cluster this glyph |
183 | /// is part of, then both sides need to be re-shaped, as the result might be different. |
184 | /// |
185 | /// On the flip side, it means that when this flag is not present, |
186 | /// then it's safe to break the glyph-run at the beginning of this cluster, |
187 | /// and the two sides represent the exact same result one would get if breaking input text |
188 | /// at the beginning of this cluster and shaping the two sides separately. |
189 | /// This can be used to optimize paragraph layout, by avoiding re-shaping of each line |
190 | /// after line-breaking, or limiting the reshaping to a small piece around |
191 | /// the breaking point only. |
192 | pub fn unsafe_to_break(&self) -> bool { |
193 | self.mask & glyph_flag::UNSAFE_TO_BREAK != 0 |
194 | } |
195 | |
196 | #[inline ] |
197 | pub(crate) fn as_char(&self) -> char { |
198 | char::try_from(self.glyph_id).unwrap() |
199 | } |
200 | |
201 | #[inline ] |
202 | pub(crate) fn as_glyph(&self) -> GlyphId { |
203 | debug_assert!(self.glyph_id <= u32::from(u16::MAX)); |
204 | GlyphId(self.glyph_id as u16) |
205 | } |
206 | |
207 | // Var allocation: unicode_props |
208 | // Used during the entire shaping process to store unicode properties |
209 | |
210 | #[inline ] |
211 | pub(crate) fn unicode_props(&self) -> u16 { |
212 | let v: &[u16; 2] = bytemuck::cast_ref(&self.var2); |
213 | v[0] |
214 | } |
215 | |
216 | #[inline ] |
217 | pub(crate) fn set_unicode_props(&mut self, n: u16) { |
218 | let v: &mut [u16; 2] = bytemuck::cast_mut(&mut self.var2); |
219 | v[0] = n; |
220 | } |
221 | |
222 | pub(crate) fn init_unicode_props(&mut self, scratch_flags: &mut hb_buffer_scratch_flags_t) { |
223 | let u = self.as_char(); |
224 | let gc = u.general_category(); |
225 | let mut props = gc.to_rb() as u16; |
226 | |
227 | if u as u32 >= 0x80 { |
228 | *scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII; |
229 | |
230 | if u.is_default_ignorable() { |
231 | props |= UnicodeProps::IGNORABLE.bits(); |
232 | *scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES; |
233 | |
234 | match u as u32 { |
235 | 0x200C => props |= UnicodeProps::CF_ZWNJ.bits(), |
236 | 0x200D => props |= UnicodeProps::CF_ZWJ.bits(), |
237 | |
238 | // Mongolian Free Variation Selectors need to be remembered |
239 | // because although we need to hide them like default-ignorables, |
240 | // they need to non-ignorable during shaping. This is similar to |
241 | // what we do for joiners in Indic-like shapers, but since the |
242 | // FVSes are GC=Mn, we have use a separate bit to remember them. |
243 | // Fixes: |
244 | // https://github.com/harfbuzz/harfbuzz/issues/234 |
245 | 0x180B..=0x180D | 0x180F => props |= UnicodeProps::HIDDEN.bits(), |
246 | |
247 | // TAG characters need similar treatment. Fixes: |
248 | // https://github.com/harfbuzz/harfbuzz/issues/463 |
249 | 0xE0020..=0xE007F => props |= UnicodeProps::HIDDEN.bits(), |
250 | |
251 | // COMBINING GRAPHEME JOINER should not be skipped during GSUB either. |
252 | // https://github.com/harfbuzz/harfbuzz/issues/554 |
253 | 0x034F => { |
254 | props |= UnicodeProps::HIDDEN.bits(); |
255 | *scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_CGJ; |
256 | } |
257 | |
258 | _ => {} |
259 | } |
260 | } |
261 | |
262 | if gc.is_mark() { |
263 | props |= UnicodeProps::CONTINUATION.bits(); |
264 | props |= (u.modified_combining_class() as u16) << 8; |
265 | } |
266 | } |
267 | |
268 | self.set_unicode_props(props); |
269 | } |
270 | |
271 | #[inline ] |
272 | pub(crate) fn unhide(&mut self) { |
273 | let mut n = self.unicode_props(); |
274 | n &= !UnicodeProps::HIDDEN.bits(); |
275 | self.set_unicode_props(n); |
276 | } |
277 | |
278 | #[inline ] |
279 | pub(crate) fn lig_props(&self) -> u8 { |
280 | let v: &[u8; 4] = bytemuck::cast_ref(&self.var1); |
281 | v[2] |
282 | } |
283 | |
284 | #[inline ] |
285 | pub(crate) fn set_lig_props(&mut self, n: u8) { |
286 | let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var1); |
287 | v[2] = n; |
288 | } |
289 | |
290 | #[inline ] |
291 | pub(crate) fn glyph_props(&self) -> u16 { |
292 | let v: &[u16; 2] = bytemuck::cast_ref(&self.var1); |
293 | v[0] |
294 | } |
295 | |
296 | #[inline ] |
297 | pub(crate) fn set_glyph_props(&mut self, n: u16) { |
298 | let v: &mut [u16; 2] = bytemuck::cast_mut(&mut self.var1); |
299 | v[0] = n; |
300 | } |
301 | |
302 | #[inline ] |
303 | pub(crate) fn syllable(&self) -> u8 { |
304 | let v: &[u8; 4] = bytemuck::cast_ref(&self.var1); |
305 | v[3] |
306 | } |
307 | |
308 | #[inline ] |
309 | pub(crate) fn set_syllable(&mut self, n: u8) { |
310 | let v: &mut [u8; 4] = bytemuck::cast_mut(&mut self.var1); |
311 | v[3] = n; |
312 | } |
313 | |
314 | // Var allocation: glyph_index |
315 | // Used during the normalization process to store glyph indices |
316 | |
317 | #[inline ] |
318 | pub(crate) fn glyph_index(&mut self) -> u32 { |
319 | self.var1 |
320 | } |
321 | |
322 | #[inline ] |
323 | pub(crate) fn set_glyph_index(&mut self, n: u32) { |
324 | self.var1 = n; |
325 | } |
326 | } |
327 | |
328 | pub type hb_buffer_cluster_level_t = u32; |
329 | pub const HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES: u32 = 0; |
330 | pub const HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS: u32 = 1; |
331 | pub const HB_BUFFER_CLUSTER_LEVEL_CHARACTERS: u32 = 2; |
332 | pub const HB_BUFFER_CLUSTER_LEVEL_DEFAULT: u32 = HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES; |
333 | |
334 | pub struct hb_buffer_t { |
335 | // Information about how the text in the buffer should be treated. |
336 | pub flags: BufferFlags, |
337 | pub cluster_level: hb_buffer_cluster_level_t, |
338 | pub invisible: Option<GlyphId>, |
339 | pub not_found_variation_selector: Option<u32>, |
340 | |
341 | // Buffer contents. |
342 | pub direction: Direction, |
343 | pub script: Option<Script>, |
344 | pub language: Option<Language>, |
345 | |
346 | /// Shaping failure |
347 | pub shaping_failed: bool, |
348 | |
349 | /// Allocations successful. |
350 | pub successful: bool, |
351 | /// Whether we have an output buffer going on. |
352 | pub(crate) have_output: bool, |
353 | pub have_separate_output: bool, |
354 | /// Whether we have positions |
355 | pub have_positions: bool, |
356 | |
357 | pub idx: usize, |
358 | pub len: usize, |
359 | pub out_len: usize, |
360 | |
361 | pub info: Vec<hb_glyph_info_t>, |
362 | pub pos: Vec<GlyphPosition>, |
363 | |
364 | // Text before / after the main buffer contents. |
365 | // Always in Unicode, and ordered outward. |
366 | // Index 0 is for "pre-context", 1 for "post-context". |
367 | pub context: [[char; CONTEXT_LENGTH]; 2], |
368 | pub context_len: [usize; 2], |
369 | |
370 | // Managed by enter / leave |
371 | pub serial: u8, |
372 | pub scratch_flags: hb_buffer_scratch_flags_t, |
373 | /// Maximum allowed len. |
374 | pub max_len: usize, |
375 | /// Maximum allowed operations. |
376 | pub max_ops: i32, |
377 | } |
378 | |
379 | impl hb_buffer_t { |
380 | pub const MAX_LEN_FACTOR: usize = 64; |
381 | pub const MAX_LEN_MIN: usize = 16384; |
382 | // Shaping more than a billion chars? Let us know! |
383 | pub const MAX_LEN_DEFAULT: usize = 0x3FFFFFFF; |
384 | |
385 | pub const MAX_OPS_FACTOR: i32 = 1024; |
386 | pub const MAX_OPS_MIN: i32 = 16384; |
387 | // Shaping more than a billion operations? Let us know! |
388 | pub const MAX_OPS_DEFAULT: i32 = 0x1FFFFFFF; |
389 | |
390 | /// Creates a new `Buffer`. |
391 | pub fn new() -> Self { |
392 | hb_buffer_t { |
393 | flags: BufferFlags::empty(), |
394 | cluster_level: HB_BUFFER_CLUSTER_LEVEL_DEFAULT, |
395 | invisible: None, |
396 | scratch_flags: HB_BUFFER_SCRATCH_FLAG_DEFAULT, |
397 | not_found_variation_selector: None, |
398 | max_len: Self::MAX_LEN_DEFAULT, |
399 | max_ops: Self::MAX_OPS_DEFAULT, |
400 | direction: Direction::Invalid, |
401 | script: None, |
402 | language: None, |
403 | shaping_failed: false, |
404 | successful: true, |
405 | have_output: false, |
406 | have_positions: false, |
407 | idx: 0, |
408 | len: 0, |
409 | out_len: 0, |
410 | info: Vec::new(), |
411 | pos: Vec::new(), |
412 | have_separate_output: false, |
413 | serial: 0, |
414 | context: [ |
415 | [' \0' , ' \0' , ' \0' , ' \0' , ' \0' ], |
416 | [' \0' , ' \0' , ' \0' , ' \0' , ' \0' ], |
417 | ], |
418 | context_len: [0, 0], |
419 | } |
420 | } |
421 | |
422 | #[inline ] |
423 | pub fn info_slice_mut(&mut self) -> &mut [hb_glyph_info_t] { |
424 | &mut self.info[..self.len] |
425 | } |
426 | |
427 | #[inline ] |
428 | pub fn out_info(&self) -> &[hb_glyph_info_t] { |
429 | if self.have_separate_output { |
430 | bytemuck::cast_slice(self.pos.as_slice()) |
431 | } else { |
432 | &self.info |
433 | } |
434 | } |
435 | |
436 | #[inline ] |
437 | pub fn out_info_mut(&mut self) -> &mut [hb_glyph_info_t] { |
438 | if self.have_separate_output { |
439 | bytemuck::cast_slice_mut(self.pos.as_mut_slice()) |
440 | } else { |
441 | &mut self.info |
442 | } |
443 | } |
444 | |
445 | #[inline ] |
446 | fn set_out_info(&mut self, i: usize, info: hb_glyph_info_t) { |
447 | self.out_info_mut()[i] = info; |
448 | } |
449 | |
450 | #[inline ] |
451 | pub fn cur(&self, i: usize) -> &hb_glyph_info_t { |
452 | &self.info[self.idx + i] |
453 | } |
454 | |
455 | #[inline ] |
456 | pub fn cur_mut(&mut self, i: usize) -> &mut hb_glyph_info_t { |
457 | let idx = self.idx + i; |
458 | &mut self.info[idx] |
459 | } |
460 | |
461 | #[inline ] |
462 | pub fn cur_pos_mut(&mut self) -> &mut GlyphPosition { |
463 | let i = self.idx; |
464 | &mut self.pos[i] |
465 | } |
466 | |
467 | #[inline ] |
468 | pub fn prev(&self) -> &hb_glyph_info_t { |
469 | let idx = self.out_len.saturating_sub(1); |
470 | &self.out_info()[idx] |
471 | } |
472 | |
473 | #[inline ] |
474 | pub fn prev_mut(&mut self) -> &mut hb_glyph_info_t { |
475 | let idx = self.out_len.saturating_sub(1); |
476 | &mut self.out_info_mut()[idx] |
477 | } |
478 | |
479 | pub fn digest(&self) -> hb_set_digest_t { |
480 | let mut digest = hb_set_digest_t::new(); |
481 | digest.add_array(self.info.iter().map(|i| GlyphId(i.glyph_id as u16))); |
482 | digest |
483 | } |
484 | |
485 | fn clear(&mut self) { |
486 | self.direction = Direction::Invalid; |
487 | self.script = None; |
488 | self.language = None; |
489 | |
490 | self.successful = true; |
491 | self.have_output = false; |
492 | self.have_positions = false; |
493 | |
494 | self.idx = 0; |
495 | self.info.clear(); |
496 | self.pos.clear(); |
497 | self.len = 0; |
498 | self.out_len = 0; |
499 | self.have_separate_output = false; |
500 | |
501 | self.context = [ |
502 | [' \0' , ' \0' , ' \0' , ' \0' , ' \0' ], |
503 | [' \0' , ' \0' , ' \0' , ' \0' , ' \0' ], |
504 | ]; |
505 | self.context_len = [0, 0]; |
506 | |
507 | self.serial = 0; |
508 | self.scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT; |
509 | self.cluster_level = HB_BUFFER_CLUSTER_LEVEL_DEFAULT; |
510 | self.not_found_variation_selector = None; |
511 | } |
512 | |
513 | #[inline ] |
514 | pub fn backtrack_len(&self) -> usize { |
515 | if self.have_output { |
516 | self.out_len |
517 | } else { |
518 | self.idx |
519 | } |
520 | } |
521 | |
522 | #[inline ] |
523 | pub fn lookahead_len(&self) -> usize { |
524 | self.len - self.idx |
525 | } |
526 | |
527 | #[inline ] |
528 | fn next_serial(&mut self) -> u8 { |
529 | // A `serial` overflow/wrap-around here is perfectly fine. |
530 | self.serial = self.serial.wrapping_add(1); |
531 | |
532 | if self.serial == 0 { |
533 | self.serial += 1; |
534 | } |
535 | |
536 | self.serial |
537 | } |
538 | |
539 | fn add(&mut self, codepoint: u32, cluster: u32) { |
540 | if !self.ensure(self.len + 1) { |
541 | return; |
542 | } |
543 | |
544 | let i = self.len; |
545 | self.info[i] = hb_glyph_info_t { |
546 | glyph_id: codepoint, |
547 | mask: 0, |
548 | cluster, |
549 | var1: 0, |
550 | var2: 0, |
551 | }; |
552 | |
553 | self.len += 1; |
554 | } |
555 | |
556 | #[inline ] |
557 | pub fn reverse(&mut self) { |
558 | if self.is_empty() { |
559 | return; |
560 | } |
561 | |
562 | self.reverse_range(0, self.len); |
563 | } |
564 | |
565 | pub fn reverse_range(&mut self, start: usize, end: usize) { |
566 | if end - start < 2 { |
567 | return; |
568 | } |
569 | |
570 | self.info[start..end].reverse(); |
571 | if self.have_positions { |
572 | self.pos[start..end].reverse(); |
573 | } |
574 | } |
575 | |
576 | pub fn reverse_groups<F>(&mut self, group: F, merge_clusters: bool) |
577 | where |
578 | F: Fn(&hb_glyph_info_t, &hb_glyph_info_t) -> bool, |
579 | { |
580 | if self.is_empty() { |
581 | return; |
582 | } |
583 | |
584 | let mut start = 0; |
585 | let mut i = 1; |
586 | |
587 | while i < self.len { |
588 | if !group(&self.info[i - 1], &self.info[i]) { |
589 | if merge_clusters { |
590 | self.merge_clusters(start, i); |
591 | } |
592 | |
593 | self.reverse_range(start, i); |
594 | start = i; |
595 | } |
596 | |
597 | i += 1; |
598 | } |
599 | |
600 | if merge_clusters { |
601 | self.merge_clusters(start, i); |
602 | } |
603 | |
604 | self.reverse_range(start, i); |
605 | |
606 | self.reverse(); |
607 | } |
608 | |
609 | pub fn group_end<F>(&self, mut start: usize, group: F) -> usize |
610 | where |
611 | F: Fn(&hb_glyph_info_t, &hb_glyph_info_t) -> bool, |
612 | { |
613 | start += 1; |
614 | |
615 | while start < self.len && group(&self.info[start - 1], &self.info[start]) { |
616 | start += 1; |
617 | } |
618 | |
619 | start |
620 | } |
621 | |
622 | #[inline ] |
623 | fn reset_clusters(&mut self) { |
624 | for (i, info) in self.info.iter_mut().enumerate() { |
625 | info.cluster = i as u32; |
626 | } |
627 | } |
628 | |
629 | pub fn guess_segment_properties(&mut self) { |
630 | if self.script.is_none() { |
631 | for info in &self.info { |
632 | match info.as_char().script() { |
633 | crate::script::COMMON | crate::script::INHERITED | crate::script::UNKNOWN => {} |
634 | s => { |
635 | self.script = Some(s); |
636 | break; |
637 | } |
638 | } |
639 | } |
640 | } |
641 | |
642 | if self.direction == Direction::Invalid { |
643 | if let Some(script) = self.script { |
644 | self.direction = Direction::from_script(script).unwrap_or_default(); |
645 | } |
646 | |
647 | if self.direction == Direction::Invalid { |
648 | self.direction = Direction::LeftToRight; |
649 | } |
650 | } |
651 | |
652 | // TODO: language must be set |
653 | } |
654 | |
655 | pub fn sync(&mut self) -> bool { |
656 | assert!(self.have_output); |
657 | assert!(self.idx <= self.len); |
658 | |
659 | if !self.successful { |
660 | self.have_output = false; |
661 | self.out_len = 0; |
662 | self.idx = 0; |
663 | return false; |
664 | } |
665 | |
666 | self.next_glyphs(self.len - self.idx); |
667 | |
668 | if self.have_separate_output { |
669 | // Swap info and pos buffers. |
670 | let info: Vec<GlyphPosition> = bytemuck::cast_vec(core::mem::take(&mut self.info)); |
671 | let pos: Vec<hb_glyph_info_t> = bytemuck::cast_vec(core::mem::take(&mut self.pos)); |
672 | self.pos = info; |
673 | self.info = pos; |
674 | self.have_separate_output = false; |
675 | } |
676 | |
677 | self.len = self.out_len; |
678 | |
679 | self.have_output = false; |
680 | self.out_len = 0; |
681 | self.idx = 0; |
682 | true |
683 | } |
684 | |
685 | pub fn clear_output(&mut self) { |
686 | self.have_output = true; |
687 | self.have_positions = false; |
688 | |
689 | self.idx = 0; |
690 | self.out_len = 0; |
691 | self.have_separate_output = false; |
692 | } |
693 | |
694 | pub fn clear_positions(&mut self) { |
695 | self.have_output = false; |
696 | self.have_positions = true; |
697 | |
698 | self.out_len = 0; |
699 | self.have_separate_output = false; |
700 | |
701 | for pos in &mut self.pos { |
702 | *pos = GlyphPosition::default(); |
703 | } |
704 | } |
705 | |
706 | pub fn replace_glyphs(&mut self, num_in: usize, num_out: usize, glyph_data: &[u32]) { |
707 | if !self.make_room_for(num_in, num_out) { |
708 | return; |
709 | } |
710 | |
711 | assert!(self.idx + num_in <= self.len); |
712 | |
713 | self.merge_clusters(self.idx, self.idx + num_in); |
714 | |
715 | let orig_info = self.info[self.idx]; |
716 | for i in 0..num_out { |
717 | let ii = self.out_len + i; |
718 | self.set_out_info(ii, orig_info); |
719 | self.out_info_mut()[ii].glyph_id = glyph_data[i]; |
720 | } |
721 | |
722 | self.idx += num_in; |
723 | self.out_len += num_out; |
724 | } |
725 | |
726 | pub fn replace_glyph(&mut self, glyph_index: u32) { |
727 | if self.have_separate_output || self.out_len != self.idx { |
728 | if !self.make_room_for(1, 1) { |
729 | return; |
730 | } |
731 | |
732 | self.set_out_info(self.out_len, self.info[self.idx]); |
733 | } |
734 | |
735 | let out_len = self.out_len; |
736 | self.out_info_mut()[out_len].glyph_id = glyph_index; |
737 | |
738 | self.idx += 1; |
739 | self.out_len += 1; |
740 | } |
741 | |
742 | pub fn output_glyph(&mut self, glyph_index: u32) { |
743 | if !self.make_room_for(0, 1) { |
744 | return; |
745 | } |
746 | |
747 | if self.idx == self.len && self.out_len == 0 { |
748 | return; |
749 | } |
750 | |
751 | let out_len = self.out_len; |
752 | if self.idx < self.len { |
753 | self.set_out_info(out_len, self.info[self.idx]); |
754 | } else { |
755 | let info = self.out_info()[out_len - 1]; |
756 | self.set_out_info(out_len, info); |
757 | } |
758 | |
759 | self.out_info_mut()[out_len].glyph_id = glyph_index; |
760 | |
761 | self.out_len += 1; |
762 | } |
763 | |
764 | pub fn output_info(&mut self, glyph_info: hb_glyph_info_t) { |
765 | if !self.make_room_for(0, 1) { |
766 | return; |
767 | } |
768 | |
769 | self.set_out_info(self.out_len, glyph_info); |
770 | self.out_len += 1; |
771 | } |
772 | |
773 | /// Copies glyph at idx to output but doesn't advance idx. |
774 | pub fn copy_glyph(&mut self) { |
775 | if !self.make_room_for(0, 1) { |
776 | return; |
777 | } |
778 | |
779 | self.set_out_info(self.out_len, self.info[self.idx]); |
780 | self.out_len += 1; |
781 | } |
782 | |
783 | /// Copies glyph at idx to output and advance idx. |
784 | /// |
785 | /// If there's no output, just advance idx. |
786 | pub fn next_glyph(&mut self) { |
787 | if self.have_output { |
788 | if self.have_separate_output || self.out_len != self.idx { |
789 | if !self.make_room_for(1, 1) { |
790 | return; |
791 | } |
792 | |
793 | self.set_out_info(self.out_len, self.info[self.idx]); |
794 | } |
795 | |
796 | self.out_len += 1; |
797 | } |
798 | |
799 | self.idx += 1; |
800 | } |
801 | |
802 | /// Copies n glyphs at idx to output and advance idx. |
803 | /// |
804 | /// If there's no output, just advance idx. |
805 | pub fn next_glyphs(&mut self, n: usize) { |
806 | if self.have_output { |
807 | if self.have_separate_output || self.out_len != self.idx { |
808 | if !self.make_room_for(n, n) { |
809 | return; |
810 | } |
811 | |
812 | for i in 0..n { |
813 | self.set_out_info(self.out_len + i, self.info[self.idx + i]); |
814 | } |
815 | } |
816 | |
817 | self.out_len += n; |
818 | } |
819 | |
820 | self.idx += n; |
821 | } |
822 | |
823 | /// Advance idx without copying to output. |
824 | pub fn skip_glyph(&mut self) { |
825 | self.idx += 1; |
826 | } |
827 | |
828 | pub fn reset_masks(&mut self, mask: hb_mask_t) { |
829 | for info in &mut self.info[..self.len] { |
830 | info.mask = mask; |
831 | } |
832 | } |
833 | |
834 | pub fn set_masks( |
835 | &mut self, |
836 | mut value: hb_mask_t, |
837 | mask: hb_mask_t, |
838 | cluster_start: u32, |
839 | cluster_end: u32, |
840 | ) { |
841 | if mask == 0 { |
842 | return; |
843 | } |
844 | |
845 | let not_mask = !mask; |
846 | value &= mask; |
847 | |
848 | if cluster_start == 0 && cluster_end == u32::MAX { |
849 | for info in &mut self.info[..self.len] { |
850 | info.mask = (info.mask & not_mask) | value; |
851 | } |
852 | |
853 | return; |
854 | } |
855 | |
856 | for info in &mut self.info[..self.len] { |
857 | if cluster_start <= info.cluster && info.cluster < cluster_end { |
858 | info.mask = (info.mask & not_mask) | value; |
859 | } |
860 | } |
861 | } |
862 | |
863 | pub fn merge_clusters(&mut self, start: usize, end: usize) { |
864 | if end - start < 2 { |
865 | return; |
866 | } |
867 | |
868 | self.merge_clusters_impl(start, end) |
869 | } |
870 | |
871 | fn merge_clusters_impl(&mut self, mut start: usize, mut end: usize) { |
872 | if self.cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS { |
873 | self.unsafe_to_break(Some(start), Some(end)); |
874 | return; |
875 | } |
876 | |
877 | let mut cluster = self.info[start].cluster; |
878 | |
879 | for i in start + 1..end { |
880 | cluster = core::cmp::min(cluster, self.info[i].cluster); |
881 | } |
882 | |
883 | // Extend end |
884 | if cluster != self.info[end - 1].cluster { |
885 | while end < self.len && self.info[end - 1].cluster == self.info[end].cluster { |
886 | end += 1; |
887 | } |
888 | } |
889 | |
890 | // Extend start |
891 | if cluster != self.info[start].cluster { |
892 | while end < start && self.info[start - 1].cluster == self.info[start].cluster { |
893 | start -= 1; |
894 | } |
895 | } |
896 | |
897 | // If we hit the start of buffer, continue in out-buffer. |
898 | if self.idx == start && self.info[start].cluster != cluster { |
899 | let mut i = self.out_len; |
900 | while i != 0 && self.out_info()[i - 1].cluster == self.info[start].cluster { |
901 | Self::set_cluster(&mut self.out_info_mut()[i - 1], cluster, 0); |
902 | i -= 1; |
903 | } |
904 | } |
905 | |
906 | for i in start..end { |
907 | Self::set_cluster(&mut self.info[i], cluster, 0); |
908 | } |
909 | } |
910 | |
911 | pub fn merge_out_clusters(&mut self, mut start: usize, mut end: usize) { |
912 | if self.cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS { |
913 | return; |
914 | } |
915 | |
916 | if end - start < 2 { |
917 | return; |
918 | } |
919 | |
920 | let mut cluster = self.out_info()[start].cluster; |
921 | |
922 | for i in start + 1..end { |
923 | cluster = core::cmp::min(cluster, self.out_info()[i].cluster); |
924 | } |
925 | |
926 | // Extend start |
927 | while start != 0 && self.out_info()[start - 1].cluster == self.out_info()[start].cluster { |
928 | start -= 1; |
929 | } |
930 | |
931 | // Extend end |
932 | while end < self.out_len && self.out_info()[end - 1].cluster == self.out_info()[end].cluster |
933 | { |
934 | end += 1; |
935 | } |
936 | |
937 | // If we hit the start of buffer, continue in out-buffer. |
938 | if end == self.out_len { |
939 | let mut i = self.idx; |
940 | while i < self.len && self.info[i].cluster == self.out_info()[end - 1].cluster { |
941 | Self::set_cluster(&mut self.info[i], cluster, 0); |
942 | i += 1; |
943 | } |
944 | } |
945 | |
946 | for i in start..end { |
947 | Self::set_cluster(&mut self.out_info_mut()[i], cluster, 0); |
948 | } |
949 | } |
950 | |
951 | /// Merge clusters for deleting current glyph, and skip it. |
952 | pub fn delete_glyph(&mut self) { |
953 | let cluster = self.info[self.idx].cluster; |
954 | |
955 | if (self.idx + 1 < self.len && cluster == self.info[self.idx + 1].cluster) |
956 | || (self.out_len != 0 && cluster == self.out_info()[self.out_len - 1].cluster) |
957 | { |
958 | // Cluster survives; do nothing. |
959 | self.skip_glyph(); |
960 | return; |
961 | } |
962 | |
963 | if self.out_len != 0 { |
964 | // Merge cluster backward. |
965 | if cluster < self.out_info()[self.out_len - 1].cluster { |
966 | let mask = self.info[self.idx].mask; |
967 | let old_cluster = self.out_info()[self.out_len - 1].cluster; |
968 | |
969 | let mut i = self.out_len; |
970 | while i != 0 && self.out_info()[i - 1].cluster == old_cluster { |
971 | Self::set_cluster(&mut self.out_info_mut()[i - 1], cluster, mask); |
972 | i -= 1; |
973 | } |
974 | } |
975 | |
976 | self.skip_glyph(); |
977 | return; |
978 | } |
979 | |
980 | if self.idx + 1 < self.len { |
981 | // Merge cluster forward. |
982 | self.merge_clusters(self.idx, self.idx + 2); |
983 | } |
984 | |
985 | self.skip_glyph(); |
986 | } |
987 | |
988 | pub fn delete_glyphs_inplace(&mut self, filter: impl Fn(&hb_glyph_info_t) -> bool) { |
989 | // Merge clusters and delete filtered glyphs. |
990 | // NOTE! We can't use out-buffer as we have positioning data. |
991 | let mut j = 0; |
992 | |
993 | for i in 0..self.len { |
994 | if filter(&self.info[i]) { |
995 | // Merge clusters. |
996 | // Same logic as delete_glyph(), but for in-place removal |
997 | |
998 | let cluster = self.info[i].cluster; |
999 | if i + 1 < self.len && cluster == self.info[i + 1].cluster { |
1000 | // Cluster survives; do nothing. |
1001 | continue; |
1002 | } |
1003 | |
1004 | if j != 0 { |
1005 | // Merge cluster backward. |
1006 | if cluster < self.info[j - 1].cluster { |
1007 | let mask = self.info[i].mask; |
1008 | let old_cluster = self.info[j - 1].cluster; |
1009 | |
1010 | let mut k = j; |
1011 | while k > 0 && self.info[k - 1].cluster == old_cluster { |
1012 | Self::set_cluster(&mut self.info[k - 1], cluster, mask); |
1013 | k -= 1; |
1014 | } |
1015 | } |
1016 | continue; |
1017 | } |
1018 | |
1019 | if i + 1 < self.len { |
1020 | // Merge cluster forward. |
1021 | self.merge_clusters(i, i + 2); |
1022 | } |
1023 | |
1024 | continue; |
1025 | } |
1026 | |
1027 | if j != i { |
1028 | self.info[j] = self.info[i]; |
1029 | self.pos[j] = self.pos[i]; |
1030 | } |
1031 | |
1032 | j += 1; |
1033 | } |
1034 | |
1035 | self.len = j; |
1036 | } |
1037 | |
1038 | pub fn unsafe_to_break(&mut self, start: Option<usize>, end: Option<usize>) { |
1039 | self._set_glyph_flags( |
1040 | UNSAFE_TO_BREAK | UNSAFE_TO_CONCAT, |
1041 | start, |
1042 | end, |
1043 | Some(true), |
1044 | None, |
1045 | ); |
1046 | } |
1047 | |
1048 | pub fn safe_to_insert_tatweel(&mut self, start: Option<usize>, end: Option<usize>) { |
1049 | if !self |
1050 | .flags |
1051 | .contains(BufferFlags::PRODUCE_SAFE_TO_INSERT_TATWEEL) |
1052 | { |
1053 | self.unsafe_to_break(start, end); |
1054 | return; |
1055 | } |
1056 | |
1057 | self._set_glyph_flags(SAFE_TO_INSERT_TATWEEL, start, end, Some(true), None); |
1058 | } |
1059 | |
1060 | /// Adds glyph flags in mask to infos with clusters between start and end. |
1061 | /// The start index will be from out-buffer if from_out_buffer is true. |
1062 | /// If interior is true, then the cluster having the minimum value is skipped. */ |
1063 | fn _set_glyph_flags( |
1064 | &mut self, |
1065 | mask: hb_mask_t, |
1066 | start: Option<usize>, |
1067 | end: Option<usize>, |
1068 | interior: Option<bool>, |
1069 | from_out_buffer: Option<bool>, |
1070 | ) { |
1071 | let start = start.unwrap_or(0); |
1072 | let end = min(end.unwrap_or(self.len), self.len); |
1073 | let interior = interior.unwrap_or(false); |
1074 | let from_out_buffer = from_out_buffer.unwrap_or(false); |
1075 | |
1076 | if interior && !from_out_buffer && end - start < 2 { |
1077 | return; |
1078 | } |
1079 | |
1080 | self.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; |
1081 | |
1082 | if !from_out_buffer || !self.have_output { |
1083 | if !interior { |
1084 | for i in start..end { |
1085 | self.info[i].mask |= mask; |
1086 | } |
1087 | } else { |
1088 | let cluster = self._infos_find_min_cluster(&self.info, start, end, None); |
1089 | self._infos_set_glyph_flags(false, start, end, cluster, mask); |
1090 | } |
1091 | } else { |
1092 | assert!(start <= self.out_len); |
1093 | assert!(self.idx <= end); |
1094 | |
1095 | if !interior { |
1096 | for i in start..self.out_len { |
1097 | self.out_info_mut()[i].mask |= mask; |
1098 | } |
1099 | |
1100 | for i in self.idx..end { |
1101 | self.info[i].mask |= mask; |
1102 | } |
1103 | } else { |
1104 | let mut cluster = self._infos_find_min_cluster(&self.info, self.idx, end, None); |
1105 | cluster = self._infos_find_min_cluster( |
1106 | self.out_info(), |
1107 | start, |
1108 | self.out_len, |
1109 | Some(cluster), |
1110 | ); |
1111 | |
1112 | let out_len = self.out_len; |
1113 | self._infos_set_glyph_flags(true, start, out_len, cluster, mask); |
1114 | self._infos_set_glyph_flags(false, self.idx, end, cluster, mask); |
1115 | } |
1116 | } |
1117 | } |
1118 | |
1119 | pub fn unsafe_to_concat(&mut self, start: Option<usize>, end: Option<usize>) { |
1120 | if !self.flags.contains(BufferFlags::PRODUCE_UNSAFE_TO_CONCAT) { |
1121 | return; |
1122 | } |
1123 | |
1124 | self._set_glyph_flags(UNSAFE_TO_CONCAT, start, end, Some(false), None); |
1125 | } |
1126 | |
1127 | pub fn unsafe_to_break_from_outbuffer(&mut self, start: Option<usize>, end: Option<usize>) { |
1128 | self._set_glyph_flags( |
1129 | UNSAFE_TO_BREAK | UNSAFE_TO_CONCAT, |
1130 | start, |
1131 | end, |
1132 | Some(true), |
1133 | Some(true), |
1134 | ); |
1135 | } |
1136 | |
1137 | pub fn unsafe_to_concat_from_outbuffer(&mut self, start: Option<usize>, end: Option<usize>) { |
1138 | if !self.flags.contains(BufferFlags::PRODUCE_UNSAFE_TO_CONCAT) { |
1139 | return; |
1140 | } |
1141 | |
1142 | self._set_glyph_flags(UNSAFE_TO_CONCAT, start, end, Some(false), Some(true)); |
1143 | } |
1144 | |
1145 | pub fn move_to(&mut self, i: usize) -> bool { |
1146 | if !self.have_output { |
1147 | assert!(i <= self.len); |
1148 | self.idx = i; |
1149 | return true; |
1150 | } |
1151 | |
1152 | if !self.successful { |
1153 | return false; |
1154 | } |
1155 | |
1156 | assert!(i <= self.out_len + (self.len - self.idx)); |
1157 | |
1158 | if self.out_len < i { |
1159 | let count = i - self.out_len; |
1160 | if !self.make_room_for(count, count) { |
1161 | return false; |
1162 | } |
1163 | |
1164 | for j in 0..count { |
1165 | self.set_out_info(self.out_len + j, self.info[self.idx + j]); |
1166 | } |
1167 | |
1168 | self.idx += count; |
1169 | self.out_len += count; |
1170 | } else if self.out_len > i { |
1171 | // Tricky part: rewinding... |
1172 | let count = self.out_len - i; |
1173 | |
1174 | // This will blow in our face if memory allocation fails later |
1175 | // in this same lookup... |
1176 | // |
1177 | // We used to shift with extra 32 items. |
1178 | // But that would leave empty slots in the buffer in case of allocation |
1179 | // failures. See comments in shift_forward(). This can cause O(N^2) |
1180 | // behavior more severely than adding 32 empty slots can... |
1181 | if self.idx < count { |
1182 | self.shift_forward(count - self.idx); |
1183 | } |
1184 | |
1185 | assert!(self.idx >= count); |
1186 | |
1187 | self.idx -= count; |
1188 | self.out_len -= count; |
1189 | |
1190 | for j in 0..count { |
1191 | self.info[self.idx + j] = self.out_info()[self.out_len + j]; |
1192 | } |
1193 | } |
1194 | |
1195 | true |
1196 | } |
1197 | |
1198 | #[must_use ] |
1199 | pub fn ensure(&mut self, size: usize) -> bool { |
1200 | if size < self.len { |
1201 | return true; |
1202 | } |
1203 | |
1204 | if size > self.max_len { |
1205 | self.successful = false; |
1206 | return false; |
1207 | } |
1208 | |
1209 | self.info.resize(size, hb_glyph_info_t::default()); |
1210 | self.pos.resize(size, GlyphPosition::default()); |
1211 | true |
1212 | } |
1213 | |
1214 | #[must_use ] |
1215 | fn make_room_for(&mut self, num_in: usize, num_out: usize) -> bool { |
1216 | if !self.ensure(self.out_len + num_out) { |
1217 | return false; |
1218 | } |
1219 | |
1220 | if !self.have_separate_output && self.out_len + num_out > self.idx + num_in { |
1221 | assert!(self.have_output); |
1222 | |
1223 | self.have_separate_output = true; |
1224 | for i in 0..self.out_len { |
1225 | self.set_out_info(i, self.info[i]); |
1226 | } |
1227 | } |
1228 | |
1229 | true |
1230 | } |
1231 | |
1232 | fn shift_forward(&mut self, count: usize) { |
1233 | assert!(self.have_output); |
1234 | if !self.ensure(self.len + count) { |
1235 | return; |
1236 | } |
1237 | |
1238 | for i in (0..(self.len - self.idx)).rev() { |
1239 | self.info[self.idx + count + i] = self.info[self.idx + i]; |
1240 | } |
1241 | |
1242 | if self.idx + count > self.len { |
1243 | for info in &mut self.info[self.len..self.idx + count] { |
1244 | *info = hb_glyph_info_t::default(); |
1245 | } |
1246 | } |
1247 | |
1248 | self.len += count; |
1249 | self.idx += count; |
1250 | } |
1251 | |
1252 | fn clear_context(&mut self, side: usize) { |
1253 | self.context_len[side] = 0; |
1254 | } |
1255 | |
1256 | pub fn sort( |
1257 | &mut self, |
1258 | start: usize, |
1259 | end: usize, |
1260 | cmp: impl Fn(&hb_glyph_info_t, &hb_glyph_info_t) -> bool, |
1261 | ) { |
1262 | assert!(!self.have_positions); |
1263 | |
1264 | for i in start + 1..end { |
1265 | let mut j = i; |
1266 | while j > start && cmp(&self.info[j - 1], &self.info[i]) { |
1267 | j -= 1; |
1268 | } |
1269 | |
1270 | if i == j { |
1271 | continue; |
1272 | } |
1273 | |
1274 | // Move item i to occupy place for item j, shift what's in between. |
1275 | self.merge_clusters(j, i + 1); |
1276 | |
1277 | { |
1278 | let t = self.info[i]; |
1279 | for idx in (0..i - j).rev() { |
1280 | self.info[idx + j + 1] = self.info[idx + j]; |
1281 | } |
1282 | |
1283 | self.info[j] = t; |
1284 | } |
1285 | } |
1286 | } |
1287 | |
1288 | pub fn set_cluster(info: &mut hb_glyph_info_t, cluster: u32, mask: hb_mask_t) { |
1289 | if info.cluster != cluster { |
1290 | info.mask = (info.mask & !glyph_flag::DEFINED) | (mask & glyph_flag::DEFINED); |
1291 | } |
1292 | |
1293 | info.cluster = cluster; |
1294 | } |
1295 | |
1296 | // Called around shape() |
1297 | pub(crate) fn enter(&mut self) { |
1298 | self.serial = 0; |
1299 | self.shaping_failed = false; |
1300 | self.scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT; |
1301 | |
1302 | if let Some(len) = self.len.checked_mul(hb_buffer_t::MAX_LEN_FACTOR) { |
1303 | self.max_len = len.max(hb_buffer_t::MAX_LEN_MIN); |
1304 | } |
1305 | |
1306 | if let Ok(len) = i32::try_from(self.len) { |
1307 | if let Some(ops) = len.checked_mul(hb_buffer_t::MAX_OPS_FACTOR) { |
1308 | self.max_ops = ops.max(hb_buffer_t::MAX_OPS_MIN); |
1309 | } |
1310 | } |
1311 | } |
1312 | |
1313 | // Called around shape() |
1314 | pub(crate) fn leave(&mut self) { |
1315 | self.max_len = hb_buffer_t::MAX_LEN_DEFAULT; |
1316 | self.max_ops = hb_buffer_t::MAX_OPS_DEFAULT; |
1317 | self.serial = 0; |
1318 | // Intentionally not resetting shaping_failed, such that it can be inspected. |
1319 | } |
1320 | |
1321 | fn _infos_find_min_cluster( |
1322 | &self, |
1323 | info: &[hb_glyph_info_t], |
1324 | start: usize, |
1325 | end: usize, |
1326 | cluster: Option<u32>, |
1327 | ) -> u32 { |
1328 | let mut cluster = cluster.unwrap_or(u32::MAX); |
1329 | |
1330 | if start == end { |
1331 | return cluster; |
1332 | } |
1333 | |
1334 | if self.cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS { |
1335 | for glyph_info in &info[start..end] { |
1336 | cluster = core::cmp::min(cluster, glyph_info.cluster); |
1337 | } |
1338 | } |
1339 | |
1340 | cluster.min(info[start].cluster.min(info[end - 1].cluster)) |
1341 | } |
1342 | |
1343 | fn _infos_set_glyph_flags( |
1344 | &mut self, |
1345 | out_info: bool, |
1346 | start: usize, |
1347 | end: usize, |
1348 | cluster: u32, |
1349 | mask: hb_mask_t, |
1350 | ) { |
1351 | let mut apply_scratch_flags = false; |
1352 | |
1353 | if start == end { |
1354 | return; |
1355 | } |
1356 | |
1357 | let cluster_level = self.cluster_level; |
1358 | |
1359 | let infos = if out_info { |
1360 | self.out_info_mut() |
1361 | } else { |
1362 | self.info.as_mut_slice() |
1363 | }; |
1364 | |
1365 | let cluster_first = infos[start].cluster; |
1366 | let cluster_last = infos[end - 1].cluster; |
1367 | |
1368 | if cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS |
1369 | || (cluster != cluster_first && cluster != cluster_last) |
1370 | { |
1371 | for i in start..end { |
1372 | if infos[i].cluster != cluster { |
1373 | apply_scratch_flags = true; |
1374 | infos[i].mask |= mask; |
1375 | } |
1376 | } |
1377 | |
1378 | if apply_scratch_flags { |
1379 | self.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; |
1380 | } |
1381 | |
1382 | return; |
1383 | } |
1384 | |
1385 | // Monotone clusters |
1386 | if cluster == cluster_first { |
1387 | let mut i = end; |
1388 | while start < i && infos[i - 1].cluster != cluster_first { |
1389 | if cluster != infos[i - 1].cluster { |
1390 | apply_scratch_flags = true; |
1391 | infos[i - 1].mask |= mask; |
1392 | } |
1393 | |
1394 | i -= 1; |
1395 | } |
1396 | } else { |
1397 | let mut i = start; |
1398 | while i < end && infos[i].cluster != cluster_last { |
1399 | if cluster != infos[i].cluster { |
1400 | apply_scratch_flags = true; |
1401 | infos[i].mask |= mask; |
1402 | } |
1403 | |
1404 | i += 1; |
1405 | } |
1406 | } |
1407 | |
1408 | if apply_scratch_flags { |
1409 | self.scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; |
1410 | } |
1411 | } |
1412 | |
1413 | /// Checks that buffer contains no elements. |
1414 | pub fn is_empty(&self) -> bool { |
1415 | self.len == 0 |
1416 | } |
1417 | |
1418 | fn push_str(&mut self, text: &str) { |
1419 | if !self.ensure(self.len + text.chars().count()) { |
1420 | return; |
1421 | } |
1422 | |
1423 | for (i, c) in text.char_indices() { |
1424 | self.add(c as u32, i as u32); |
1425 | } |
1426 | } |
1427 | |
1428 | fn set_pre_context(&mut self, text: &str) { |
1429 | self.clear_context(0); |
1430 | for (i, c) in text.chars().rev().enumerate().take(CONTEXT_LENGTH) { |
1431 | self.context[0][i] = c; |
1432 | self.context_len[0] += 1; |
1433 | } |
1434 | } |
1435 | |
1436 | fn set_post_context(&mut self, text: &str) { |
1437 | self.clear_context(1); |
1438 | for (i, c) in text.chars().enumerate().take(CONTEXT_LENGTH) { |
1439 | self.context[1][i] = c; |
1440 | self.context_len[1] += 1; |
1441 | } |
1442 | } |
1443 | |
1444 | pub fn next_syllable(&self, mut start: usize) -> usize { |
1445 | if start >= self.len { |
1446 | return start; |
1447 | } |
1448 | |
1449 | let syllable = self.info[start].syllable(); |
1450 | start += 1; |
1451 | while start < self.len && syllable == self.info[start].syllable() { |
1452 | start += 1; |
1453 | } |
1454 | |
1455 | start |
1456 | } |
1457 | |
1458 | #[inline ] |
1459 | pub fn allocate_lig_id(&mut self) -> u8 { |
1460 | let mut lig_id = self.next_serial() & 0x07; |
1461 | |
1462 | if lig_id == 0 { |
1463 | lig_id = self.allocate_lig_id(); |
1464 | } |
1465 | |
1466 | lig_id |
1467 | } |
1468 | } |
1469 | |
1470 | pub(crate) fn _cluster_group_func(a: &hb_glyph_info_t, b: &hb_glyph_info_t) -> bool { |
1471 | a.cluster == b.cluster |
1472 | } |
1473 | |
1474 | // TODO: to iter if possible |
1475 | |
1476 | macro_rules! foreach_cluster { |
1477 | ($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => { |
1478 | foreach_group!($buffer, $start, $end, crate::hb::buffer::_cluster_group_func, $($body)*) |
1479 | }; |
1480 | } |
1481 | |
1482 | macro_rules! foreach_group { |
1483 | ($buffer:expr, $start:ident, $end:ident, $group_func:expr, $($body:tt)*) => {{ |
1484 | let count = $buffer.len; |
1485 | let mut $start = 0; |
1486 | let mut $end = if count > 0 { $buffer.group_end(0, $group_func) } else { 0 }; |
1487 | |
1488 | while $start < count { |
1489 | $($body)*; |
1490 | $start = $end; |
1491 | $end = $buffer.group_end($start, $group_func); |
1492 | } |
1493 | }}; |
1494 | } |
1495 | |
1496 | macro_rules! foreach_syllable { |
1497 | ($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => {{ |
1498 | let mut $start = 0; |
1499 | let mut $end = $buffer.next_syllable(0); |
1500 | while $start < $buffer.len { |
1501 | $($body)*; |
1502 | $start = $end; |
1503 | $end = $buffer.next_syllable($start); |
1504 | } |
1505 | }}; |
1506 | } |
1507 | |
1508 | macro_rules! foreach_grapheme { |
1509 | ($buffer:expr, $start:ident, $end:ident, $($body:tt)*) => { |
1510 | foreach_group!($buffer, $start, $end, crate::hb::ot_layout::_hb_grapheme_group_func, $($body)*) |
1511 | }; |
1512 | } |
1513 | |
1514 | bitflags::bitflags! { |
1515 | #[derive (Default, Debug, Clone, Copy)] |
1516 | pub struct UnicodeProps: u16 { |
1517 | const GENERAL_CATEGORY = 0x001F; |
1518 | const IGNORABLE = 0x0020; |
1519 | // MONGOLIAN FREE VARIATION SELECTOR 1..4, or TAG characters, or CGJ sometimes |
1520 | const HIDDEN = 0x0040; |
1521 | const CONTINUATION = 0x0080; |
1522 | |
1523 | // If GEN_CAT=FORMAT, top byte masks: |
1524 | const CF_ZWJ = 0x0100; |
1525 | const CF_ZWNJ = 0x0200; |
1526 | const CF_VS = 0x0400; |
1527 | } |
1528 | } |
1529 | |
1530 | bitflags::bitflags! { |
1531 | #[derive (Default, Debug, Clone, Copy)] |
1532 | pub struct GlyphPropsFlags: u16 { |
1533 | // The following three match LookupFlags::Ignore* numbers. |
1534 | const BASE_GLYPH = 0x02; |
1535 | const LIGATURE = 0x04; |
1536 | const MARK = 0x08; |
1537 | const CLASS_MASK = Self::BASE_GLYPH.bits() | Self::LIGATURE.bits() | Self::MARK.bits(); |
1538 | |
1539 | // The following are used internally; not derived from GDEF. |
1540 | const SUBSTITUTED = 0x10; |
1541 | const LIGATED = 0x20; |
1542 | const MULTIPLIED = 0x40; |
1543 | |
1544 | const PRESERVE = Self::SUBSTITUTED.bits() | Self::LIGATED.bits() | Self::MULTIPLIED.bits(); |
1545 | } |
1546 | } |
1547 | |
1548 | pub type hb_buffer_scratch_flags_t = u32; |
1549 | pub const HB_BUFFER_SCRATCH_FLAG_DEFAULT: u32 = 0x00000000; |
1550 | pub const HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII: u32 = 0x00000001; |
1551 | pub const HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES: u32 = 0x00000002; |
1552 | pub const HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK: u32 = 0x00000004; |
1553 | pub const HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT: u32 = 0x00000008; |
1554 | pub const HB_BUFFER_SCRATCH_FLAG_HAS_CGJ: u32 = 0x00000010; |
1555 | pub const HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS: u32 = 0x00000020; |
1556 | pub const HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE: u32 = 0x00000040; |
1557 | pub const HB_BUFFER_SCRATCH_FLAG_HAS_VARIATION_SELECTOR_FALLBACK: u32 = 0x00000080; |
1558 | |
1559 | /* Reserved for shapers' internal use. */ |
1560 | pub const HB_BUFFER_SCRATCH_FLAG_SHAPER0: u32 = 0x01000000; |
1561 | // pub const HB_BUFFER_SCRATCH_FLAG_SHAPER1: u32 = 0x02000000; |
1562 | // pub const HB_BUFFER_SCRATCH_FLAG_SHAPER2: u32 = 0x04000000; |
1563 | // pub const HB_BUFFER_SCRATCH_FLAG_SHAPER3: u32 = 0x08000000; |
1564 | |
1565 | /// A buffer that contains an input string ready for shaping. |
1566 | pub struct UnicodeBuffer(pub(crate) hb_buffer_t); |
1567 | |
1568 | impl UnicodeBuffer { |
1569 | /// Create a new `UnicodeBuffer`. |
1570 | #[inline ] |
1571 | pub fn new() -> UnicodeBuffer { |
1572 | UnicodeBuffer(hb_buffer_t::new()) |
1573 | } |
1574 | |
1575 | /// Returns the length of the data of the buffer. |
1576 | /// |
1577 | /// This corresponds to the number of unicode codepoints contained in the |
1578 | /// buffer. |
1579 | #[inline ] |
1580 | pub fn len(&self) -> usize { |
1581 | self.0.len |
1582 | } |
1583 | |
1584 | /// Returns `true` if the buffer contains no elements. |
1585 | #[inline ] |
1586 | pub fn is_empty(&self) -> bool { |
1587 | self.0.is_empty() |
1588 | } |
1589 | |
1590 | /// Pushes a string to a buffer. |
1591 | #[inline ] |
1592 | pub fn push_str(&mut self, str: &str) { |
1593 | self.0.push_str(str); |
1594 | } |
1595 | |
1596 | /// Sets the pre-context for this buffer. |
1597 | #[inline ] |
1598 | pub fn set_pre_context(&mut self, str: &str) { |
1599 | self.0.set_pre_context(str) |
1600 | } |
1601 | |
1602 | /// Sets the post-context for this buffer. |
1603 | #[inline ] |
1604 | pub fn set_post_context(&mut self, str: &str) { |
1605 | self.0.set_post_context(str) |
1606 | } |
1607 | |
1608 | /// Appends a character to a buffer with the given cluster value. |
1609 | #[inline ] |
1610 | pub fn add(&mut self, codepoint: char, cluster: u32) { |
1611 | self.0.add(codepoint as u32, cluster); |
1612 | self.0.context_len[1] = 0; |
1613 | } |
1614 | |
1615 | /// Set the text direction of the `Buffer`'s contents. |
1616 | #[inline ] |
1617 | pub fn set_direction(&mut self, direction: Direction) { |
1618 | self.0.direction = direction; |
1619 | } |
1620 | |
1621 | /// Returns the `Buffer`'s text direction. |
1622 | #[inline ] |
1623 | pub fn direction(&self) -> Direction { |
1624 | self.0.direction |
1625 | } |
1626 | |
1627 | /// Set the script from an ISO15924 tag. |
1628 | #[inline ] |
1629 | pub fn set_script(&mut self, script: Script) { |
1630 | self.0.script = Some(script); |
1631 | } |
1632 | |
1633 | /// Get the ISO15924 script tag. |
1634 | pub fn script(&self) -> Script { |
1635 | self.0.script.unwrap_or(script::UNKNOWN) |
1636 | } |
1637 | |
1638 | /// Set the buffer language. |
1639 | #[inline ] |
1640 | pub fn set_language(&mut self, lang: Language) { |
1641 | self.0.language = Some(lang); |
1642 | } |
1643 | |
1644 | /// Set the glyph value to replace not-found variation-selector characters with. |
1645 | #[inline ] |
1646 | pub fn set_not_found_variation_selector_glyph(&mut self, glyph: u32) { |
1647 | self.0.not_found_variation_selector = Some(glyph) |
1648 | } |
1649 | |
1650 | /// Get the buffer language. |
1651 | #[inline ] |
1652 | pub fn language(&self) -> Option<Language> { |
1653 | self.0.language.clone() |
1654 | } |
1655 | |
1656 | /// Guess the segment properties (direction, language, script) for the |
1657 | /// current buffer. |
1658 | #[inline ] |
1659 | pub fn guess_segment_properties(&mut self) { |
1660 | self.0.guess_segment_properties() |
1661 | } |
1662 | |
1663 | /// Set the flags for this buffer. |
1664 | #[inline ] |
1665 | pub fn set_flags(&mut self, flags: BufferFlags) { |
1666 | self.0.flags = flags; |
1667 | } |
1668 | |
1669 | /// Get the flags for this buffer. |
1670 | #[inline ] |
1671 | pub fn flags(&self) -> BufferFlags { |
1672 | self.0.flags |
1673 | } |
1674 | |
1675 | /// Set the cluster level of the buffer. |
1676 | #[inline ] |
1677 | pub fn set_cluster_level(&mut self, cluster_level: BufferClusterLevel) { |
1678 | self.0.cluster_level = match cluster_level { |
1679 | BufferClusterLevel::MonotoneGraphemes => HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES, |
1680 | BufferClusterLevel::MonotoneCharacters => HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS, |
1681 | BufferClusterLevel::Characters => HB_BUFFER_CLUSTER_LEVEL_CHARACTERS, |
1682 | } |
1683 | } |
1684 | |
1685 | /// Retrieve the cluster level of the buffer. |
1686 | #[inline ] |
1687 | pub fn cluster_level(&self) -> BufferClusterLevel { |
1688 | match self.0.cluster_level { |
1689 | HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES => BufferClusterLevel::MonotoneGraphemes, |
1690 | HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS => BufferClusterLevel::MonotoneCharacters, |
1691 | HB_BUFFER_CLUSTER_LEVEL_CHARACTERS => BufferClusterLevel::Characters, |
1692 | _ => BufferClusterLevel::MonotoneGraphemes, |
1693 | } |
1694 | } |
1695 | |
1696 | /// Resets clusters. |
1697 | #[inline ] |
1698 | pub fn reset_clusters(&mut self) { |
1699 | self.0.reset_clusters(); |
1700 | } |
1701 | |
1702 | /// Clear the contents of the buffer. |
1703 | #[inline ] |
1704 | pub fn clear(&mut self) { |
1705 | self.0.clear() |
1706 | } |
1707 | } |
1708 | |
1709 | impl core::fmt::Debug for UnicodeBuffer { |
1710 | fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
1711 | fmt&mut DebugStruct<'_, '_>.debug_struct("UnicodeBuffer" ) |
1712 | .field("direction" , &self.direction()) |
1713 | .field("language" , &self.language()) |
1714 | .field("script" , &self.script()) |
1715 | .field(name:"cluster_level" , &self.cluster_level()) |
1716 | .finish() |
1717 | } |
1718 | } |
1719 | |
1720 | impl Default for UnicodeBuffer { |
1721 | fn default() -> UnicodeBuffer { |
1722 | UnicodeBuffer::new() |
1723 | } |
1724 | } |
1725 | |
1726 | /// A buffer that contains the results of the shaping process. |
1727 | pub struct GlyphBuffer(pub(crate) hb_buffer_t); |
1728 | |
1729 | impl GlyphBuffer { |
1730 | /// Returns the length of the data of the buffer. |
1731 | /// |
1732 | /// When called before shaping this is the number of unicode codepoints |
1733 | /// contained in the buffer. When called after shaping it returns the number |
1734 | /// of glyphs stored. |
1735 | #[inline ] |
1736 | pub fn len(&self) -> usize { |
1737 | self.0.len |
1738 | } |
1739 | |
1740 | /// Returns `true` if the buffer contains no elements. |
1741 | #[inline ] |
1742 | pub fn is_empty(&self) -> bool { |
1743 | self.0.is_empty() |
1744 | } |
1745 | |
1746 | /// Get the glyph infos. |
1747 | #[inline ] |
1748 | pub fn glyph_infos(&self) -> &[hb_glyph_info_t] { |
1749 | &self.0.info[0..self.0.len] |
1750 | } |
1751 | |
1752 | /// Get the glyph positions. |
1753 | #[inline ] |
1754 | pub fn glyph_positions(&self) -> &[GlyphPosition] { |
1755 | &self.0.pos[0..self.0.len] |
1756 | } |
1757 | |
1758 | /// Clears the content of the glyph buffer and returns an empty |
1759 | /// `UnicodeBuffer` reusing the existing allocation. |
1760 | #[inline ] |
1761 | pub fn clear(mut self) -> UnicodeBuffer { |
1762 | self.0.clear(); |
1763 | UnicodeBuffer(self.0) |
1764 | } |
1765 | |
1766 | /// Converts the glyph buffer content into a string. |
1767 | pub fn serialize(&self, face: &hb_font_t, flags: SerializeFlags) -> String { |
1768 | self.serialize_impl(face, flags).unwrap_or_default() |
1769 | } |
1770 | |
1771 | fn serialize_impl( |
1772 | &self, |
1773 | face: &hb_font_t, |
1774 | flags: SerializeFlags, |
1775 | ) -> Result<String, core::fmt::Error> { |
1776 | use core::fmt::Write; |
1777 | |
1778 | let mut s = String::with_capacity(64); |
1779 | |
1780 | let info = self.glyph_infos(); |
1781 | let pos = self.glyph_positions(); |
1782 | let mut x = 0; |
1783 | let mut y = 0; |
1784 | for (info, pos) in info.iter().zip(pos) { |
1785 | if !flags.contains(SerializeFlags::NO_GLYPH_NAMES) { |
1786 | match face.glyph_name(info.as_glyph()) { |
1787 | Some(name) => s.push_str(name), |
1788 | None => write!(&mut s, "gid {}" , info.glyph_id)?, |
1789 | } |
1790 | } else { |
1791 | write!(&mut s, " {}" , info.glyph_id)?; |
1792 | } |
1793 | |
1794 | if !flags.contains(SerializeFlags::NO_CLUSTERS) { |
1795 | write!(&mut s, "= {}" , info.cluster)?; |
1796 | } |
1797 | |
1798 | if !flags.contains(SerializeFlags::NO_POSITIONS) { |
1799 | if x + pos.x_offset != 0 || y + pos.y_offset != 0 { |
1800 | write!(&mut s, "@ {}, {}" , x + pos.x_offset, y + pos.y_offset)?; |
1801 | } |
1802 | |
1803 | if !flags.contains(SerializeFlags::NO_ADVANCES) { |
1804 | write!(&mut s, "+ {}" , pos.x_advance)?; |
1805 | if pos.y_advance != 0 { |
1806 | write!(&mut s, ", {}" , pos.y_advance)?; |
1807 | } |
1808 | } |
1809 | } |
1810 | |
1811 | if flags.contains(SerializeFlags::GLYPH_FLAGS) { |
1812 | if info.mask & glyph_flag::DEFINED != 0 { |
1813 | write!(&mut s, "# {:X}" , info.mask & glyph_flag::DEFINED)?; |
1814 | } |
1815 | } |
1816 | |
1817 | if flags.contains(SerializeFlags::GLYPH_EXTENTS) { |
1818 | let mut extents = hb_glyph_extents_t::default(); |
1819 | face.glyph_extents(info.as_glyph(), &mut extents); |
1820 | write!( |
1821 | &mut s, |
1822 | "< {}, {}, {}, {}>" , |
1823 | extents.x_bearing, extents.y_bearing, extents.width, extents.height |
1824 | )?; |
1825 | } |
1826 | |
1827 | if flags.contains(SerializeFlags::NO_ADVANCES) { |
1828 | x += pos.x_advance; |
1829 | y += pos.y_advance; |
1830 | } |
1831 | |
1832 | s.push('|' ); |
1833 | } |
1834 | |
1835 | // Remove last `|`. |
1836 | if !s.is_empty() { |
1837 | s.pop(); |
1838 | } |
1839 | |
1840 | Ok(s) |
1841 | } |
1842 | } |
1843 | |
1844 | impl core::fmt::Debug for GlyphBuffer { |
1845 | fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
1846 | fmt&mut DebugStruct<'_, '_>.debug_struct("GlyphBuffer" ) |
1847 | .field("glyph_positions" , &self.glyph_positions()) |
1848 | .field(name:"glyph_infos" , &self.glyph_infos()) |
1849 | .finish() |
1850 | } |
1851 | } |
1852 | |