1use core::convert::TryFrom;
2
3use crate::buffer::{
4 glyph_flag, Buffer, BufferClusterLevel, BufferFlags, BufferScratchFlags, GlyphInfo,
5 GlyphPropsFlags,
6};
7use crate::complex::ZeroWidthMarksMode;
8use crate::plan::ShapePlan;
9use crate::unicode::{CharExt, GeneralCategory, GeneralCategoryExt};
10use crate::{
11 aat, fallback, normalize, ot, script, Direction, Face, Feature, GlyphBuffer, UnicodeBuffer,
12};
13
14/// Shapes the buffer content using provided font and features.
15///
16/// Consumes the buffer. You can then run [`GlyphBuffer::clear`] to get the [`UnicodeBuffer`] back
17/// without allocating a new one.
18///
19/// If you plan to shape multiple strings using the same [`Face`] prefer [`shape_with_plan`].
20/// This is because [`ShapePlan`] initialization is pretty slow and should preferably be called
21/// once for each [`Face`].
22pub fn shape(face: &Face, features: &[Feature], mut buffer: UnicodeBuffer) -> GlyphBuffer {
23 buffer.0.guess_segment_properties();
24 let plan: ShapePlan = ShapePlan::new(
25 face,
26 buffer.0.direction,
27 buffer.0.script,
28 language:buffer.0.language.as_ref(),
29 user_features:features,
30 );
31 shape_with_plan(face, &plan, buffer)
32}
33
34/// Shapes the buffer content using the provided font and plan.
35///
36/// Consumes the buffer. You can then run [`GlyphBuffer::clear`] to get the [`UnicodeBuffer`] back
37/// without allocating a new one.
38///
39/// It is up to the caller to ensure that the shape plan matches the properties of the provided
40/// buffer, otherwise the shaping result will likely be incorrect.
41///
42/// # Panics
43///
44/// Will panic when debugging assertions are enabled if the buffer and plan have mismatched
45/// properties.
46pub fn shape_with_plan(face: &Face, plan: &ShapePlan, buffer: UnicodeBuffer) -> GlyphBuffer {
47 let mut buffer: Buffer = buffer.0;
48 buffer.guess_segment_properties();
49
50 debug_assert_eq!(buffer.direction, plan.direction);
51 debug_assert_eq!(
52 buffer.script.unwrap_or(script::UNKNOWN),
53 plan.script.unwrap_or(script::UNKNOWN)
54 );
55
56 if buffer.len > 0 {
57 // Save the original direction, we use it later.
58 let target_direction: Direction = buffer.direction;
59 shape_internal(&mut ShapeContext {
60 plan,
61 face,
62 buffer: &mut buffer,
63 target_direction,
64 });
65 }
66
67 GlyphBuffer(buffer)
68}
69
70struct ShapeContext<'a> {
71 plan: &'a ShapePlan,
72 face: &'a Face<'a>,
73 buffer: &'a mut Buffer,
74 // Transient stuff
75 target_direction: Direction,
76}
77
78// Pull it all together!
79fn shape_internal(ctx: &mut ShapeContext) {
80 ctx.buffer.scratch_flags = BufferScratchFlags::empty();
81
82 if let Some(len) = ctx.buffer.len.checked_mul(Buffer::MAX_LEN_FACTOR) {
83 ctx.buffer.max_len = len.max(Buffer::MAX_LEN_MIN);
84 }
85
86 if let Ok(len) = i32::try_from(ctx.buffer.len) {
87 if let Some(ops) = len.checked_mul(Buffer::MAX_OPS_FACTOR) {
88 ctx.buffer.max_ops = ops.max(Buffer::MAX_OPS_MIN);
89 }
90 }
91
92 initialize_masks(ctx);
93 set_unicode_props(ctx.buffer);
94 insert_dotted_circle(ctx.buffer, ctx.face);
95
96 form_clusters(ctx.buffer);
97
98 ensure_native_direction(ctx.buffer);
99
100 if let Some(func) = ctx.plan.shaper.preprocess_text {
101 func(ctx.plan, ctx.face, ctx.buffer);
102 }
103
104 substitute_pre(ctx);
105 position(ctx);
106 substitute_post(ctx);
107
108 propagate_flags(ctx.buffer);
109
110 ctx.buffer.direction = ctx.target_direction;
111 ctx.buffer.max_len = Buffer::MAX_LEN_DEFAULT;
112 ctx.buffer.max_ops = Buffer::MAX_OPS_DEFAULT;
113}
114
115fn substitute_pre(ctx: &mut ShapeContext) {
116 substitute_default(ctx);
117 substitute_complex(ctx);
118}
119
120fn substitute_post(ctx: &mut ShapeContext) {
121 hide_default_ignorables(ctx.buffer, ctx.face);
122
123 if ctx.plan.apply_morx {
124 aat::remove_deleted_glyphs(ctx.buffer);
125 }
126
127 if let Some(func: fn(&ShapePlan, &Face<'_>, …)) = ctx.plan.shaper.postprocess_glyphs {
128 func(ctx.plan, ctx.face, ctx.buffer);
129 }
130}
131
132fn substitute_default(ctx: &mut ShapeContext) {
133 rotate_chars(ctx);
134
135 normalize::normalize(ctx.plan, ctx.face, ctx.buffer);
136
137 setup_masks(ctx);
138
139 // This is unfortunate to go here, but necessary...
140 if ctx.plan.fallback_mark_positioning {
141 fallback::recategorize_marks(ctx.plan, ctx.face, ctx.buffer);
142 }
143
144 map_glyphs_fast(ctx.buffer);
145}
146
147fn substitute_complex(ctx: &mut ShapeContext) {
148 ot::substitute_start(ctx.face, ctx.buffer);
149
150 if ctx.plan.fallback_glyph_classes {
151 synthesize_glyph_classes(ctx.buffer);
152 }
153
154 substitute_by_plan(ctx.plan, ctx.face, ctx.buffer);
155}
156
157fn substitute_by_plan(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
158 if plan.apply_morx {
159 aat::substitute(plan, face, buffer);
160 } else {
161 ot::substitute(plan, face, buffer);
162 }
163}
164
165fn position(ctx: &mut ShapeContext) {
166 ctx.buffer.clear_positions();
167
168 position_default(ctx);
169
170 position_complex(ctx);
171
172 if ctx.buffer.direction.is_backward() {
173 ctx.buffer.reverse();
174 }
175}
176
177fn position_default(ctx: &mut ShapeContext) {
178 let len = ctx.buffer.len;
179
180 if ctx.buffer.direction.is_horizontal() {
181 for (info, pos) in ctx.buffer.info[..len]
182 .iter()
183 .zip(&mut ctx.buffer.pos[..len])
184 {
185 pos.x_advance = ctx.face.glyph_h_advance(info.as_glyph());
186 }
187 } else {
188 for (info, pos) in ctx.buffer.info[..len]
189 .iter()
190 .zip(&mut ctx.buffer.pos[..len])
191 {
192 let glyph = info.as_glyph();
193 pos.y_advance = ctx.face.glyph_v_advance(glyph);
194 pos.x_offset -= ctx.face.glyph_h_origin(glyph);
195 pos.y_offset -= ctx.face.glyph_v_origin(glyph);
196 }
197 }
198
199 if ctx
200 .buffer
201 .scratch_flags
202 .contains(BufferScratchFlags::HAS_SPACE_FALLBACK)
203 {
204 fallback::adjust_spaces(ctx.plan, ctx.face, ctx.buffer);
205 }
206}
207
208fn position_complex(ctx: &mut ShapeContext) {
209 // If the font has no GPOS and direction is forward, then when
210 // zeroing mark widths, we shift the mark with it, such that the
211 // mark is positioned hanging over the previous glyph. When
212 // direction is backward we don't shift and it will end up
213 // hanging over the next glyph after the final reordering.
214 //
215 // Note: If fallback positioning happens, we don't care about
216 // this as it will be overriden.
217 let adjust_offsets_when_zeroing =
218 ctx.plan.adjust_mark_positioning_when_zeroing && ctx.buffer.direction.is_forward();
219
220 // We change glyph origin to what GPOS expects (horizontal), apply GPOS, change it back.
221
222 ot::position_start(ctx.face, ctx.buffer);
223
224 if ctx.plan.zero_marks
225 && ctx.plan.shaper.zero_width_marks == Some(ZeroWidthMarksMode::ByGdefEarly)
226 {
227 zero_mark_widths_by_gdef(ctx.buffer, adjust_offsets_when_zeroing);
228 }
229
230 position_by_plan(ctx.plan, ctx.face, ctx.buffer);
231
232 if ctx.plan.zero_marks
233 && ctx.plan.shaper.zero_width_marks == Some(ZeroWidthMarksMode::ByGdefLate)
234 {
235 zero_mark_widths_by_gdef(ctx.buffer, adjust_offsets_when_zeroing);
236 }
237
238 // Finish off. Has to follow a certain order.
239 ot::position_finish_advances(ctx.face, ctx.buffer);
240 zero_width_default_ignorables(ctx.buffer);
241
242 if ctx.plan.apply_morx {
243 aat::zero_width_deleted_glyphs(ctx.buffer);
244 }
245
246 ot::position_finish_offsets(ctx.face, ctx.buffer);
247
248 if ctx.plan.fallback_mark_positioning {
249 fallback::position_marks(ctx.plan, ctx.face, ctx.buffer, adjust_offsets_when_zeroing);
250 }
251}
252
253fn position_by_plan(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
254 if plan.apply_gpos {
255 ot::position(plan, face, buffer);
256 } else if plan.apply_kerx {
257 aat::position(plan, face, buffer);
258 }
259 if plan.apply_kern {
260 ot::kern(plan, face, buffer);
261 } else if plan.apply_fallback_kern {
262 fallback::kern(plan, face, buffer);
263 }
264
265 if plan.apply_trak {
266 aat::track(plan, face, buffer);
267 }
268}
269
270fn initialize_masks(ctx: &mut ShapeContext) {
271 let global_mask: u32 = ctx.plan.ot_map.global_mask();
272 ctx.buffer.reset_masks(global_mask);
273}
274
275fn setup_masks(ctx: &mut ShapeContext) {
276 setup_masks_fraction(ctx);
277
278 if let Some(func: fn(&ShapePlan, &Face<'_>, …)) = ctx.plan.shaper.setup_masks {
279 func(ctx.plan, ctx.face, ctx.buffer);
280 }
281
282 for feature: &Feature in &ctx.plan.user_features {
283 if !feature.is_global() {
284 let (mask: u32, shift: u32) = ctx.plan.ot_map.mask(feature_tag:feature.tag);
285 ctx.buffer
286 .set_masks(value:feature.value << shift, mask, cluster_start:feature.start, cluster_end:feature.end);
287 }
288 }
289}
290
291fn setup_masks_fraction(ctx: &mut ShapeContext) {
292 let buffer = &mut ctx.buffer;
293 if !buffer
294 .scratch_flags
295 .contains(BufferScratchFlags::HAS_NON_ASCII)
296 || !ctx.plan.has_frac
297 {
298 return;
299 }
300
301 let (pre_mask, post_mask) = if buffer.direction.is_forward() {
302 (
303 ctx.plan.numr_mask | ctx.plan.frac_mask,
304 ctx.plan.frac_mask | ctx.plan.dnom_mask,
305 )
306 } else {
307 (
308 ctx.plan.frac_mask | ctx.plan.dnom_mask,
309 ctx.plan.numr_mask | ctx.plan.frac_mask,
310 )
311 };
312
313 let len = buffer.len;
314 let mut i = 0;
315 while i < len {
316 // FRACTION SLASH
317 if buffer.info[i].glyph_id == 0x2044 {
318 let mut start = i;
319 while start > 0
320 && buffer.info[start - 1].general_category() == GeneralCategory::DecimalNumber
321 {
322 start -= 1;
323 }
324
325 let mut end = i + 1;
326 while end < len && buffer.info[end].general_category() == GeneralCategory::DecimalNumber
327 {
328 end += 1;
329 }
330
331 buffer.unsafe_to_break(start, end);
332
333 for info in &mut buffer.info[start..i] {
334 info.mask |= pre_mask;
335 }
336
337 buffer.info[i].mask |= ctx.plan.frac_mask;
338
339 for info in &mut buffer.info[i + 1..end] {
340 info.mask |= post_mask;
341 }
342
343 i = end;
344 } else {
345 i += 1;
346 }
347 }
348}
349
350fn set_unicode_props(buffer: &mut Buffer) {
351 // Implement enough of Unicode Graphemes here that shaping
352 // in reverse-direction wouldn't break graphemes. Namely,
353 // we mark all marks and ZWJ and ZWJ,Extended_Pictographic
354 // sequences as continuations. The foreach_grapheme()
355 // macro uses this bit.
356 //
357 // https://www.unicode.org/reports/tr29/#Regex_Definitions
358
359 let len = buffer.len;
360
361 let mut i = 0;
362 while i < len {
363 // Mutably borrow buffer.info[i] and immutably borrow
364 // buffer.info[i - 1] (if present) in a way that the borrow
365 // checker can understand.
366 let (prior, later) = buffer.info.split_at_mut(i);
367 let info = &mut later[0];
368 info.init_unicode_props(&mut buffer.scratch_flags);
369
370 // Marks are already set as continuation by the above line.
371 // Handle Emoji_Modifier and ZWJ-continuation.
372 if info.general_category() == GeneralCategory::ModifierSymbol
373 && matches!(info.glyph_id, 0x1F3FB..=0x1F3FF)
374 {
375 info.set_continuation();
376 } else if i != 0 && matches!(info.glyph_id, 0x1F1E6..=0x1F1FF) {
377 // Should never fail because we checked for i > 0.
378 // TODO: use let chains when they become stable
379 let prev = prior.last().unwrap();
380 if matches!(prev.glyph_id, 0x1F1E6..=0x1F1FF) && !prev.is_continuation() {
381 info.set_continuation();
382 }
383 } else if info.is_zwj() {
384 info.set_continuation();
385 if let Some(next) = buffer.info[..len].get_mut(i + 1) {
386 if next.as_char().is_emoji_extended_pictographic() {
387 next.init_unicode_props(&mut buffer.scratch_flags);
388 next.set_continuation();
389 i += 1;
390 }
391 }
392 } else if matches!(info.glyph_id, 0xE0020..=0xE007F) {
393 // Or part of the Other_Grapheme_Extend that is not marks.
394 // As of Unicode 11 that is just:
395 //
396 // 200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER
397 // FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA
398 // SEMI-VOICED SOUND MARK E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
399 //
400 // ZWNJ is special, we don't want to merge it as there's no need, and keeping
401 // it separate results in more granular clusters. Ignore Katakana for now.
402 // Tags are used for Emoji sub-region flag sequences:
403 // https://github.com/harfbuzz/harfbuzz/issues/1556
404 info.set_continuation();
405 }
406
407 i += 1;
408 }
409}
410
411fn insert_dotted_circle(buffer: &mut Buffer, face: &Face) {
412 if !bufferBufferFlags
413 .flags
414 .contains(BufferFlags::DO_NOT_INSERT_DOTTED_CIRCLE)
415 && buffer.flags.contains(BufferFlags::BEGINNING_OF_TEXT)
416 && buffer.context_len[0] == 0
417 && buffer.info[0].is_unicode_mark()
418 && face.has_glyph(0x25CC)
419 {
420 let mut info: GlyphInfo = GlyphInfo {
421 glyph_id: 0x25CC,
422 mask: buffer.cur(0).mask,
423 cluster: buffer.cur(0).cluster,
424 ..GlyphInfo::default()
425 };
426
427 info.init_unicode_props(&mut buffer.scratch_flags);
428 buffer.clear_output();
429 buffer.output_info(glyph_info:info);
430 buffer.swap_buffers();
431 }
432}
433
434fn form_clusters(buffer: &mut Buffer) {
435 if bufferBufferScratchFlags
436 .scratch_flags
437 .contains(BufferScratchFlags::HAS_NON_ASCII)
438 {
439 if buffer.cluster_level == BufferClusterLevel::MonotoneGraphemes {
440 foreach_grapheme!(buffer, start, end, { buffer.merge_clusters(start, end) });
441 } else {
442 foreach_grapheme!(buffer, start, end, {
443 buffer.unsafe_to_break(start, end);
444 });
445 }
446 }
447}
448
449fn ensure_native_direction(buffer: &mut Buffer) {
450 let dir = buffer.direction;
451 let mut hor = buffer
452 .script
453 .and_then(Direction::from_script)
454 .unwrap_or_default();
455
456 // Numeric runs in natively-RTL scripts are actually native-LTR, so we reset
457 // the horiz_dir if the run contains at least one decimal-number char, and no
458 // letter chars (ideally we should be checking for chars with strong
459 // directionality but hb-unicode currently lacks bidi categories).
460 //
461 // This allows digit sequences in Arabic etc to be shaped in "native"
462 // direction, so that features like ligatures will work as intended.
463 //
464 // https://github.com/harfbuzz/harfbuzz/issues/501
465
466 if hor == Direction::RightToLeft && dir == Direction::LeftToRight {
467 let mut found_number = false;
468 let mut found_letter = false;
469 for info in &buffer.info {
470 let gc = info.general_category();
471 if gc == GeneralCategory::DecimalNumber {
472 found_number = true;
473 } else if gc.is_letter() {
474 found_letter = true;
475 break;
476 }
477 }
478 if found_number && !found_letter {
479 hor = Direction::LeftToRight;
480 }
481 }
482
483 if (dir.is_horizontal() && dir != hor && hor != Direction::Invalid)
484 || (dir.is_vertical() && dir != Direction::TopToBottom)
485 {
486 if buffer.cluster_level == BufferClusterLevel::MonotoneCharacters {
487 foreach_grapheme!(buffer, start, end, {
488 buffer.merge_clusters(start, end);
489 buffer.reverse_range(start, end);
490 });
491 } else {
492 foreach_grapheme!(buffer, start, end, {
493 // form_clusters() merged clusters already, we don't merge.
494 buffer.reverse_range(start, end);
495 })
496 }
497
498 buffer.reverse();
499 buffer.direction = buffer.direction.reverse();
500 }
501}
502
503fn rotate_chars(ctx: &mut ShapeContext) {
504 let len = ctx.buffer.len;
505
506 if ctx.target_direction.is_backward() {
507 let rtlm_mask = ctx.plan.rtlm_mask;
508
509 for info in &mut ctx.buffer.info[..len] {
510 if let Some(c) = info.as_char().mirrored().map(u32::from) {
511 if ctx.face.has_glyph(c) {
512 info.glyph_id = c;
513 continue;
514 }
515 }
516 info.mask |= rtlm_mask;
517 }
518 }
519
520 if ctx.target_direction.is_vertical() && !ctx.plan.has_vert {
521 for info in &mut ctx.buffer.info[..len] {
522 if let Some(c) = info.as_char().vertical().map(u32::from) {
523 if ctx.face.has_glyph(c) {
524 info.glyph_id = c;
525 }
526 }
527 }
528 }
529}
530
531fn map_glyphs_fast(buffer: &mut Buffer) {
532 // Normalization process sets up glyph_index(), we just copy it.
533 let len: usize = buffer.len;
534 for info: &mut GlyphInfo in &mut buffer.info[..len] {
535 info.glyph_id = info.glyph_index();
536 }
537}
538
539fn synthesize_glyph_classes(buffer: &mut Buffer) {
540 let len: usize = buffer.len;
541 for info: &mut GlyphInfo in &mut buffer.info[..len] {
542 // Never mark default-ignorables as marks.
543 // They won't get in the way of lookups anyway,
544 // but having them as mark will cause them to be skipped
545 // over if the lookup-flag says so, but at least for the
546 // Mongolian variation selectors, looks like Uniscribe
547 // marks them as non-mark. Some Mongolian fonts without
548 // GDEF rely on this. Another notable character that
549 // this applies to is COMBINING GRAPHEME JOINER.
550 let class: GlyphPropsFlags = if info.general_category() != GeneralCategory::NonspacingMark
551 || info.is_default_ignorable()
552 {
553 GlyphPropsFlags::BASE_GLYPH
554 } else {
555 GlyphPropsFlags::MARK
556 };
557
558 info.set_glyph_props(class.bits());
559 }
560}
561
562fn zero_width_default_ignorables(buffer: &mut Buffer) {
563 if bufferBufferScratchFlags
564 .scratch_flags
565 .contains(BufferScratchFlags::HAS_DEFAULT_IGNORABLES)
566 && !bufferBufferFlags
567 .flags
568 .contains(BufferFlags::PRESERVE_DEFAULT_IGNORABLES)
569 && !bufferBufferFlags
570 .flags
571 .contains(BufferFlags::REMOVE_DEFAULT_IGNORABLES)
572 {
573 let len: usize = buffer.len;
574 for (info: &GlyphInfo, pos: &mut GlyphPosition) in buffer.info[..len].iter().zip(&mut buffer.pos[..len]) {
575 if info.is_default_ignorable() {
576 pos.x_advance = 0;
577 pos.y_advance = 0;
578 pos.x_offset = 0;
579 pos.y_offset = 0;
580 }
581 }
582 }
583}
584
585fn zero_mark_widths_by_gdef(buffer: &mut Buffer, adjust_offsets: bool) {
586 let len: usize = buffer.len;
587 for (info: &GlyphInfo, pos: &mut GlyphPosition) in buffer.info[..len].iter().zip(&mut buffer.pos[..len]) {
588 if info.is_mark() {
589 if adjust_offsets {
590 pos.x_offset -= pos.x_advance;
591 pos.y_offset -= pos.y_advance;
592 }
593
594 pos.x_advance = 0;
595 pos.y_advance = 0;
596 }
597 }
598}
599
600fn hide_default_ignorables(buffer: &mut Buffer, face: &Face) {
601 if buffer
602 .scratch_flags
603 .contains(BufferScratchFlags::HAS_DEFAULT_IGNORABLES)
604 && !buffer
605 .flags
606 .contains(BufferFlags::PRESERVE_DEFAULT_IGNORABLES)
607 {
608 if !buffer
609 .flags
610 .contains(BufferFlags::REMOVE_DEFAULT_IGNORABLES)
611 {
612 if let Some(invisible) = buffer
613 .invisible
614 .or_else(|| face.glyph_index(u32::from(' ')))
615 {
616 let len = buffer.len;
617 for info in &mut buffer.info[..len] {
618 if info.is_default_ignorable() {
619 info.glyph_id = u32::from(invisible.0);
620 }
621 }
622 return;
623 }
624 }
625
626 buffer.delete_glyphs_inplace(GlyphInfo::is_default_ignorable);
627 }
628}
629
630fn propagate_flags(buffer: &mut Buffer) {
631 // Propagate cluster-level glyph flags to be the same on all cluster glyphs.
632 // Simplifies using them.
633 if bufferBufferScratchFlags
634 .scratch_flags
635 .contains(BufferScratchFlags::HAS_UNSAFE_TO_BREAK)
636 {
637 foreach_cluster!(buffer, start, end, {
638 for info in &buffer.info[start..end] {
639 if info.mask & glyph_flag::UNSAFE_TO_BREAK != 0 {
640 for info in &mut buffer.info[start..end] {
641 info.mask |= glyph_flag::UNSAFE_TO_BREAK;
642 }
643 break;
644 }
645 }
646 });
647 }
648}
649