1use crate::buffer::{
2 glyph_flag, Buffer, BufferClusterLevel, BufferFlags, BufferScratchFlags, GlyphInfo,
3 GlyphPropsFlags,
4};
5use crate::complex::ZeroWidthMarksMode;
6use crate::plan::ShapePlan;
7use crate::unicode::{CharExt, GeneralCategory, GeneralCategoryExt};
8use crate::{
9 aat, fallback, normalize, ot, script, Direction, Face, Feature, GlyphBuffer, UnicodeBuffer,
10};
11
12/// Shapes the buffer content using provided font and features.
13///
14/// Consumes the buffer. You can then run [`GlyphBuffer::clear`] to get the [`UnicodeBuffer`] back
15/// without allocating a new one.
16///
17/// If you plan to shape multiple strings using the same [`Face`] prefer [`shape_with_plan`].
18/// This is because [`ShapePlan`] initialization is pretty slow and should preferably be called
19/// once for each [`Face`].
20pub fn shape(face: &Face, features: &[Feature], mut buffer: UnicodeBuffer) -> GlyphBuffer {
21 buffer.0.guess_segment_properties();
22 let plan: ShapePlan = ShapePlan::new(
23 face,
24 buffer.0.direction,
25 buffer.0.script,
26 language:buffer.0.language.as_ref(),
27 user_features:features,
28 );
29 shape_with_plan(face, &plan, buffer)
30}
31
32/// Shapes the buffer content using the provided font and plan.
33///
34/// Consumes the buffer. You can then run [`GlyphBuffer::clear`] to get the [`UnicodeBuffer`] back
35/// without allocating a new one.
36///
37/// It is up to the caller to ensure that the shape plan matches the properties of the provided
38/// buffer, otherwise the shaping result will likely be incorrect.
39///
40/// # Panics
41///
42/// Will panic when debugging assertions are enabled if the buffer and plan have mismatched
43/// properties.
44pub fn shape_with_plan(face: &Face, plan: &ShapePlan, buffer: UnicodeBuffer) -> GlyphBuffer {
45 let mut buffer: Buffer = buffer.0;
46 buffer.guess_segment_properties();
47
48 debug_assert_eq!(buffer.direction, plan.direction);
49 debug_assert_eq!(
50 buffer.script.unwrap_or(script::UNKNOWN),
51 plan.script.unwrap_or(script::UNKNOWN)
52 );
53
54 if buffer.len > 0 {
55 // Save the original direction, we use it later.
56 let target_direction: Direction = buffer.direction;
57 shape_internal(&mut ShapeContext {
58 plan,
59 face,
60 buffer: &mut buffer,
61 target_direction,
62 });
63 }
64
65 GlyphBuffer(buffer)
66}
67
68struct ShapeContext<'a> {
69 plan: &'a ShapePlan,
70 face: &'a Face<'a>,
71 buffer: &'a mut Buffer,
72 // Transient stuff
73 target_direction: Direction,
74}
75
76// Pull it all together!
77fn shape_internal(ctx: &mut ShapeContext) {
78 ctx.buffer.enter();
79
80 initialize_masks(ctx);
81 set_unicode_props(ctx.buffer);
82 insert_dotted_circle(ctx.buffer, ctx.face);
83
84 form_clusters(ctx.buffer);
85
86 ensure_native_direction(ctx.buffer);
87
88 if let Some(func: fn(&ShapePlan, &Face<'_>, …)) = ctx.plan.shaper.preprocess_text {
89 func(ctx.plan, ctx.face, ctx.buffer);
90 }
91
92 substitute_pre(ctx);
93 position(ctx);
94 substitute_post(ctx);
95
96 propagate_flags(ctx.buffer);
97
98 ctx.buffer.direction = ctx.target_direction;
99 ctx.buffer.leave();
100}
101
102fn substitute_pre(ctx: &mut ShapeContext) {
103 substitute_default(ctx);
104 substitute_complex(ctx);
105}
106
107fn substitute_post(ctx: &mut ShapeContext) {
108 hide_default_ignorables(ctx.buffer, ctx.face);
109
110 if ctx.plan.apply_morx {
111 aat::remove_deleted_glyphs(ctx.buffer);
112 }
113
114 if let Some(func: fn(&ShapePlan, &Face<'_>, …)) = ctx.plan.shaper.postprocess_glyphs {
115 func(ctx.plan, ctx.face, ctx.buffer);
116 }
117}
118
119fn substitute_default(ctx: &mut ShapeContext) {
120 rotate_chars(ctx);
121
122 normalize::normalize(ctx.plan, ctx.face, ctx.buffer);
123
124 setup_masks(ctx);
125
126 // This is unfortunate to go here, but necessary...
127 if ctx.plan.fallback_mark_positioning {
128 fallback::recategorize_marks(ctx.plan, ctx.face, ctx.buffer);
129 }
130
131 map_glyphs_fast(ctx.buffer);
132}
133
134fn substitute_complex(ctx: &mut ShapeContext) {
135 ot::substitute_start(ctx.face, ctx.buffer);
136
137 if ctx.plan.fallback_glyph_classes {
138 synthesize_glyph_classes(ctx.buffer);
139 }
140
141 substitute_by_plan(ctx.plan, ctx.face, ctx.buffer);
142}
143
144fn substitute_by_plan(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
145 if plan.apply_morx {
146 aat::substitute(plan, face, buffer);
147 } else {
148 ot::substitute(plan, face, buffer);
149 }
150}
151
152fn position(ctx: &mut ShapeContext) {
153 ctx.buffer.clear_positions();
154
155 position_default(ctx);
156
157 position_complex(ctx);
158
159 if ctx.buffer.direction.is_backward() {
160 ctx.buffer.reverse();
161 }
162}
163
164fn position_default(ctx: &mut ShapeContext) {
165 let len = ctx.buffer.len;
166
167 if ctx.buffer.direction.is_horizontal() {
168 for (info, pos) in ctx.buffer.info[..len]
169 .iter()
170 .zip(&mut ctx.buffer.pos[..len])
171 {
172 pos.x_advance = ctx.face.glyph_h_advance(info.as_glyph());
173 }
174 } else {
175 for (info, pos) in ctx.buffer.info[..len]
176 .iter()
177 .zip(&mut ctx.buffer.pos[..len])
178 {
179 let glyph = info.as_glyph();
180 pos.y_advance = ctx.face.glyph_v_advance(glyph);
181 pos.x_offset -= ctx.face.glyph_h_origin(glyph);
182 pos.y_offset -= ctx.face.glyph_v_origin(glyph);
183 }
184 }
185
186 if ctx
187 .buffer
188 .scratch_flags
189 .contains(BufferScratchFlags::HAS_SPACE_FALLBACK)
190 {
191 fallback::adjust_spaces(ctx.plan, ctx.face, ctx.buffer);
192 }
193}
194
195fn position_complex(ctx: &mut ShapeContext) {
196 // If the font has no GPOS and direction is forward, then when
197 // zeroing mark widths, we shift the mark with it, such that the
198 // mark is positioned hanging over the previous glyph. When
199 // direction is backward we don't shift and it will end up
200 // hanging over the next glyph after the final reordering.
201 //
202 // Note: If fallback positioning happens, we don't care about
203 // this as it will be overridden.
204 let adjust_offsets_when_zeroing =
205 ctx.plan.adjust_mark_positioning_when_zeroing && ctx.buffer.direction.is_forward();
206
207 // We change glyph origin to what GPOS expects (horizontal), apply GPOS, change it back.
208
209 ot::position_start(ctx.face, ctx.buffer);
210
211 if ctx.plan.zero_marks
212 && ctx.plan.shaper.zero_width_marks == Some(ZeroWidthMarksMode::ByGdefEarly)
213 {
214 zero_mark_widths_by_gdef(ctx.buffer, adjust_offsets_when_zeroing);
215 }
216
217 position_by_plan(ctx.plan, ctx.face, ctx.buffer);
218
219 if ctx.plan.zero_marks
220 && ctx.plan.shaper.zero_width_marks == Some(ZeroWidthMarksMode::ByGdefLate)
221 {
222 zero_mark_widths_by_gdef(ctx.buffer, adjust_offsets_when_zeroing);
223 }
224
225 // Finish off. Has to follow a certain order.
226 ot::position_finish_advances(ctx.face, ctx.buffer);
227 zero_width_default_ignorables(ctx.buffer);
228
229 if ctx.plan.apply_morx {
230 aat::zero_width_deleted_glyphs(ctx.buffer);
231 }
232
233 ot::position_finish_offsets(ctx.face, ctx.buffer);
234
235 if ctx.plan.fallback_mark_positioning {
236 fallback::position_marks(ctx.plan, ctx.face, ctx.buffer, adjust_offsets_when_zeroing);
237 }
238}
239
240fn position_by_plan(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) {
241 if plan.apply_gpos {
242 ot::position(plan, face, buffer);
243 } else if plan.apply_kerx {
244 aat::position(plan, face, buffer);
245 }
246 if plan.apply_kern {
247 ot::kern(plan, face, buffer);
248 } else if plan.apply_fallback_kern {
249 fallback::kern(plan, face, buffer);
250 }
251
252 if plan.apply_trak {
253 aat::track(plan, face, buffer);
254 }
255}
256
257fn initialize_masks(ctx: &mut ShapeContext) {
258 let global_mask: u32 = ctx.plan.ot_map.global_mask();
259 ctx.buffer.reset_masks(global_mask);
260}
261
262fn setup_masks(ctx: &mut ShapeContext) {
263 setup_masks_fraction(ctx);
264
265 if let Some(func: fn(&ShapePlan, &Face<'_>, …)) = ctx.plan.shaper.setup_masks {
266 func(ctx.plan, ctx.face, ctx.buffer);
267 }
268
269 for feature: &Feature in &ctx.plan.user_features {
270 if !feature.is_global() {
271 let (mask: u32, shift: u32) = ctx.plan.ot_map.mask(feature_tag:feature.tag);
272 ctx.buffer
273 .set_masks(value:feature.value << shift, mask, cluster_start:feature.start, cluster_end:feature.end);
274 }
275 }
276}
277
278fn setup_masks_fraction(ctx: &mut ShapeContext) {
279 let buffer = &mut ctx.buffer;
280 if !buffer
281 .scratch_flags
282 .contains(BufferScratchFlags::HAS_NON_ASCII)
283 || !ctx.plan.has_frac
284 {
285 return;
286 }
287
288 let (pre_mask, post_mask) = if buffer.direction.is_forward() {
289 (
290 ctx.plan.numr_mask | ctx.plan.frac_mask,
291 ctx.plan.frac_mask | ctx.plan.dnom_mask,
292 )
293 } else {
294 (
295 ctx.plan.frac_mask | ctx.plan.dnom_mask,
296 ctx.plan.numr_mask | ctx.plan.frac_mask,
297 )
298 };
299
300 let len = buffer.len;
301 let mut i = 0;
302 while i < len {
303 // FRACTION SLASH
304 if buffer.info[i].glyph_id == 0x2044 {
305 let mut start = i;
306 while start > 0
307 && buffer.info[start - 1].general_category() == GeneralCategory::DecimalNumber
308 {
309 start -= 1;
310 }
311
312 let mut end = i + 1;
313 while end < len && buffer.info[end].general_category() == GeneralCategory::DecimalNumber
314 {
315 end += 1;
316 }
317
318 buffer.unsafe_to_break(Some(start), Some(end));
319
320 for info in &mut buffer.info[start..i] {
321 info.mask |= pre_mask;
322 }
323
324 buffer.info[i].mask |= ctx.plan.frac_mask;
325
326 for info in &mut buffer.info[i + 1..end] {
327 info.mask |= post_mask;
328 }
329
330 i = end;
331 } else {
332 i += 1;
333 }
334 }
335}
336
337fn set_unicode_props(buffer: &mut Buffer) {
338 // Implement enough of Unicode Graphemes here that shaping
339 // in reverse-direction wouldn't break graphemes. Namely,
340 // we mark all marks and ZWJ and ZWJ,Extended_Pictographic
341 // sequences as continuations. The foreach_grapheme()
342 // macro uses this bit.
343 //
344 // https://www.unicode.org/reports/tr29/#Regex_Definitions
345
346 let len = buffer.len;
347
348 let mut i = 0;
349 while i < len {
350 // Mutably borrow buffer.info[i] and immutably borrow
351 // buffer.info[i - 1] (if present) in a way that the borrow
352 // checker can understand.
353 let (prior, later) = buffer.info.split_at_mut(i);
354 let info = &mut later[0];
355 info.init_unicode_props(&mut buffer.scratch_flags);
356
357 // Marks are already set as continuation by the above line.
358 // Handle Emoji_Modifier and ZWJ-continuation.
359 if info.general_category() == GeneralCategory::ModifierSymbol
360 && matches!(info.glyph_id, 0x1F3FB..=0x1F3FF)
361 {
362 info.set_continuation();
363 } else if i != 0 && matches!(info.glyph_id, 0x1F1E6..=0x1F1FF) {
364 // Should never fail because we checked for i > 0.
365 // TODO: use let chains when they become stable
366 let prev = prior.last().unwrap();
367 if matches!(prev.glyph_id, 0x1F1E6..=0x1F1FF) && !prev.is_continuation() {
368 info.set_continuation();
369 }
370 } else if info.is_zwj() {
371 info.set_continuation();
372 if let Some(next) = buffer.info[..len].get_mut(i + 1) {
373 if next.as_char().is_emoji_extended_pictographic() {
374 next.init_unicode_props(&mut buffer.scratch_flags);
375 next.set_continuation();
376 i += 1;
377 }
378 }
379 } else if matches!(info.glyph_id, 0xE0020..=0xE007F) {
380 // Or part of the Other_Grapheme_Extend that is not marks.
381 // As of Unicode 11 that is just:
382 //
383 // 200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER
384 // FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA
385 // SEMI-VOICED SOUND MARK E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
386 //
387 // ZWNJ is special, we don't want to merge it as there's no need, and keeping
388 // it separate results in more granular clusters. Ignore Katakana for now.
389 // Tags are used for Emoji sub-region flag sequences:
390 // https://github.com/harfbuzz/harfbuzz/issues/1556
391 info.set_continuation();
392 }
393
394 i += 1;
395 }
396}
397
398fn insert_dotted_circle(buffer: &mut Buffer, face: &Face) {
399 if !bufferBufferFlags
400 .flags
401 .contains(BufferFlags::DO_NOT_INSERT_DOTTED_CIRCLE)
402 && buffer.flags.contains(BufferFlags::BEGINNING_OF_TEXT)
403 && buffer.context_len[0] == 0
404 && buffer.info[0].is_unicode_mark()
405 && face.has_glyph(0x25CC)
406 {
407 let mut info: GlyphInfo = GlyphInfo {
408 glyph_id: 0x25CC,
409 mask: buffer.cur(0).mask,
410 cluster: buffer.cur(0).cluster,
411 ..GlyphInfo::default()
412 };
413
414 info.init_unicode_props(&mut buffer.scratch_flags);
415 buffer.clear_output();
416 buffer.output_info(glyph_info:info);
417 buffer.sync();
418 }
419}
420
421fn form_clusters(buffer: &mut Buffer) {
422 if bufferBufferScratchFlags
423 .scratch_flags
424 .contains(BufferScratchFlags::HAS_NON_ASCII)
425 {
426 if buffer.cluster_level == BufferClusterLevel::MonotoneGraphemes {
427 foreach_grapheme!(buffer, start, end, { buffer.merge_clusters(start, end) });
428 } else {
429 foreach_grapheme!(buffer, start, end, {
430 buffer.unsafe_to_break(Some(start), Some(end));
431 });
432 }
433 }
434}
435
436fn ensure_native_direction(buffer: &mut Buffer) {
437 let dir = buffer.direction;
438 let mut hor = buffer
439 .script
440 .and_then(Direction::from_script)
441 .unwrap_or_default();
442
443 // Numeric runs in natively-RTL scripts are actually native-LTR, so we reset
444 // the horiz_dir if the run contains at least one decimal-number char, and no
445 // letter chars (ideally we should be checking for chars with strong
446 // directionality but hb-unicode currently lacks bidi categories).
447 //
448 // This allows digit sequences in Arabic etc to be shaped in "native"
449 // direction, so that features like ligatures will work as intended.
450 //
451 // https://github.com/harfbuzz/harfbuzz/issues/501
452
453 if hor == Direction::RightToLeft && dir == Direction::LeftToRight {
454 let mut found_number = false;
455 let mut found_letter = false;
456 for info in &buffer.info {
457 let gc = info.general_category();
458 if gc == GeneralCategory::DecimalNumber {
459 found_number = true;
460 } else if gc.is_letter() {
461 found_letter = true;
462 break;
463 }
464 }
465 if found_number && !found_letter {
466 hor = Direction::LeftToRight;
467 }
468 }
469
470 // TODO vertical:
471 // The only BTT vertical script is Ogham, but it's not clear to me whether OpenType
472 // Ogham fonts are supposed to be implemented BTT or not. Need to research that
473 // first.
474 if (dir.is_horizontal() && dir != hor && hor != Direction::Invalid)
475 || (dir.is_vertical() && dir != Direction::TopToBottom)
476 {
477 buffer.reverse_graphemes();
478 buffer.direction = buffer.direction.reverse();
479 }
480}
481
482fn rotate_chars(ctx: &mut ShapeContext) {
483 let len = ctx.buffer.len;
484
485 if ctx.target_direction.is_backward() {
486 let rtlm_mask = ctx.plan.rtlm_mask;
487
488 for info in &mut ctx.buffer.info[..len] {
489 if let Some(c) = info.as_char().mirrored().map(u32::from) {
490 if ctx.face.has_glyph(c) {
491 info.glyph_id = c;
492 continue;
493 }
494 }
495 info.mask |= rtlm_mask;
496 }
497 }
498
499 if ctx.target_direction.is_vertical() && !ctx.plan.has_vert {
500 for info in &mut ctx.buffer.info[..len] {
501 if let Some(c) = info.as_char().vertical().map(u32::from) {
502 if ctx.face.has_glyph(c) {
503 info.glyph_id = c;
504 }
505 }
506 }
507 }
508}
509
510fn map_glyphs_fast(buffer: &mut Buffer) {
511 // Normalization process sets up glyph_index(), we just copy it.
512 let len: usize = buffer.len;
513 for info: &mut GlyphInfo in &mut buffer.info[..len] {
514 info.glyph_id = info.glyph_index();
515 }
516}
517
518fn synthesize_glyph_classes(buffer: &mut Buffer) {
519 let len: usize = buffer.len;
520 for info: &mut GlyphInfo in &mut buffer.info[..len] {
521 // Never mark default-ignorables as marks.
522 // They won't get in the way of lookups anyway,
523 // but having them as mark will cause them to be skipped
524 // over if the lookup-flag says so, but at least for the
525 // Mongolian variation selectors, looks like Uniscribe
526 // marks them as non-mark. Some Mongolian fonts without
527 // GDEF rely on this. Another notable character that
528 // this applies to is COMBINING GRAPHEME JOINER.
529 let class: GlyphPropsFlags = if info.general_category() != GeneralCategory::NonspacingMark
530 || info.is_default_ignorable()
531 {
532 GlyphPropsFlags::BASE_GLYPH
533 } else {
534 GlyphPropsFlags::MARK
535 };
536
537 info.set_glyph_props(class.bits());
538 }
539}
540
541fn zero_width_default_ignorables(buffer: &mut Buffer) {
542 if bufferBufferScratchFlags
543 .scratch_flags
544 .contains(BufferScratchFlags::HAS_DEFAULT_IGNORABLES)
545 && !bufferBufferFlags
546 .flags
547 .contains(BufferFlags::PRESERVE_DEFAULT_IGNORABLES)
548 && !bufferBufferFlags
549 .flags
550 .contains(BufferFlags::REMOVE_DEFAULT_IGNORABLES)
551 {
552 let len: usize = buffer.len;
553 for (info: &GlyphInfo, pos: &mut GlyphPosition) in buffer.info[..len].iter().zip(&mut buffer.pos[..len]) {
554 if info.is_default_ignorable() {
555 pos.x_advance = 0;
556 pos.y_advance = 0;
557 pos.x_offset = 0;
558 pos.y_offset = 0;
559 }
560 }
561 }
562}
563
564fn zero_mark_widths_by_gdef(buffer: &mut Buffer, adjust_offsets: bool) {
565 let len: usize = buffer.len;
566 for (info: &GlyphInfo, pos: &mut GlyphPosition) in buffer.info[..len].iter().zip(&mut buffer.pos[..len]) {
567 if info.is_mark() {
568 if adjust_offsets {
569 pos.x_offset -= pos.x_advance;
570 pos.y_offset -= pos.y_advance;
571 }
572
573 pos.x_advance = 0;
574 pos.y_advance = 0;
575 }
576 }
577}
578
579fn hide_default_ignorables(buffer: &mut Buffer, face: &Face) {
580 if buffer
581 .scratch_flags
582 .contains(BufferScratchFlags::HAS_DEFAULT_IGNORABLES)
583 && !buffer
584 .flags
585 .contains(BufferFlags::PRESERVE_DEFAULT_IGNORABLES)
586 {
587 if !buffer
588 .flags
589 .contains(BufferFlags::REMOVE_DEFAULT_IGNORABLES)
590 {
591 if let Some(invisible) = buffer
592 .invisible
593 .or_else(|| face.glyph_index(u32::from(' ')))
594 {
595 let len = buffer.len;
596 for info in &mut buffer.info[..len] {
597 if info.is_default_ignorable() {
598 info.glyph_id = u32::from(invisible.0);
599 }
600 }
601 return;
602 }
603 }
604
605 buffer.delete_glyphs_inplace(GlyphInfo::is_default_ignorable);
606 }
607}
608
609fn propagate_flags(buffer: &mut Buffer) {
610 // Propagate cluster-level glyph flags to be the same on all cluster glyphs.
611 // Simplifies using them.
612 if bufferBufferScratchFlags
613 .scratch_flags
614 .contains(BufferScratchFlags::HAS_GLYPH_FLAGS)
615 {
616 foreach_cluster!(buffer, start, end, {
617 let mut mask = 0;
618 for info in &buffer.info[start..end] {
619 mask |= info.mask * glyph_flag::DEFINED;
620 }
621
622 if mask != 0 {
623 for info in &mut buffer.info[start..end] {
624 info.mask |= mask;
625 }
626 }
627 });
628 }
629}
630