1 | use core::convert::TryFrom; |
2 | |
3 | use ttf_parser::gsub::*; |
4 | use ttf_parser::GlyphId; |
5 | |
6 | use crate::buffer::{Buffer, GlyphPropsFlags}; |
7 | use crate::plan::ShapePlan; |
8 | use crate::unicode::GeneralCategory; |
9 | use crate::Face; |
10 | |
11 | use super::apply::{Apply, ApplyContext, WouldApply, WouldApplyContext}; |
12 | use super::matching::{match_backtrack, match_glyph, match_input, match_lookahead}; |
13 | use super::{ |
14 | LayoutLookup, LayoutTable, Map, SubstLookup, SubstitutionTable, TableIndex, MAX_CONTEXT_LENGTH, |
15 | MAX_NESTING_LEVEL, |
16 | }; |
17 | use ttf_parser::opentype_layout::LookupIndex; |
18 | |
19 | /// Called before substitution lookups are performed, to ensure that glyph |
20 | /// class and other properties are set on the glyphs in the buffer. |
21 | pub fn substitute_start(face: &Face, buffer: &mut Buffer) { |
22 | set_glyph_props(face, buffer) |
23 | } |
24 | |
25 | pub fn substitute(plan: &ShapePlan, face: &Face, buffer: &mut Buffer) { |
26 | super::apply_layout_table(plan, face, buffer, table:face.gsub.as_ref()); |
27 | } |
28 | |
29 | fn set_glyph_props(face: &Face, buffer: &mut Buffer) { |
30 | let len: usize = buffer.len; |
31 | for info: &mut GlyphInfo in &mut buffer.info[..len] { |
32 | info.set_glyph_props(face.glyph_props(info.as_glyph())); |
33 | info.set_lig_props(0); |
34 | info.set_syllable(0); |
35 | } |
36 | } |
37 | |
38 | impl<'a> LayoutTable for SubstitutionTable<'a> { |
39 | const INDEX: TableIndex = TableIndex::GSUB; |
40 | const IN_PLACE: bool = false; |
41 | |
42 | type Lookup = SubstLookup<'a>; |
43 | |
44 | fn get_lookup(&self, index: LookupIndex) -> Option<&Self::Lookup> { |
45 | self.lookups.get(index:usize::from(index)) |
46 | } |
47 | } |
48 | |
49 | impl LayoutLookup for SubstLookup<'_> { |
50 | fn props(&self) -> u32 { |
51 | self.props |
52 | } |
53 | |
54 | fn is_reverse(&self) -> bool { |
55 | self.reverse |
56 | } |
57 | |
58 | fn covers(&self, glyph: GlyphId) -> bool { |
59 | self.coverage.contains(glyph) |
60 | } |
61 | } |
62 | |
63 | impl WouldApply for SubstLookup<'_> { |
64 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
65 | self.covers(glyph:ctx.glyphs[0]) |
66 | && self |
67 | .subtables |
68 | .iter() |
69 | .any(|subtable: &SubstitutionSubtable<'_>| subtable.would_apply(ctx)) |
70 | } |
71 | } |
72 | |
73 | impl Apply for SubstLookup<'_> { |
74 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
75 | if self.covers(ctx.buffer.cur(0).as_glyph()) { |
76 | for subtable: &SubstitutionSubtable<'_> in &self.subtables { |
77 | if subtable.apply(ctx).is_some() { |
78 | return Some(()); |
79 | } |
80 | } |
81 | } |
82 | |
83 | None |
84 | } |
85 | } |
86 | |
87 | impl WouldApply for SubstitutionSubtable<'_> { |
88 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
89 | match self { |
90 | Self::Single(t: &SingleSubstitution<'_>) => t.would_apply(ctx), |
91 | Self::Multiple(t: &MultipleSubstitution<'_>) => t.would_apply(ctx), |
92 | Self::Alternate(t: &AlternateSubstitution<'_>) => t.would_apply(ctx), |
93 | Self::Ligature(t: &LigatureSubstitution<'_>) => t.would_apply(ctx), |
94 | Self::Context(t: &ContextLookup<'_>) => t.would_apply(ctx), |
95 | Self::ChainContext(t: &ChainedContextLookup<'_>) => t.would_apply(ctx), |
96 | Self::ReverseChainSingle(t: &ReverseChainSingleSubstitution<'_>) => t.would_apply(ctx), |
97 | } |
98 | } |
99 | } |
100 | |
101 | impl Apply for SubstitutionSubtable<'_> { |
102 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
103 | match self { |
104 | Self::Single(t: &SingleSubstitution<'_>) => t.apply(ctx), |
105 | Self::Multiple(t: &MultipleSubstitution<'_>) => t.apply(ctx), |
106 | Self::Alternate(t: &AlternateSubstitution<'_>) => t.apply(ctx), |
107 | Self::Ligature(t: &LigatureSubstitution<'_>) => t.apply(ctx), |
108 | Self::Context(t: &ContextLookup<'_>) => t.apply(ctx), |
109 | Self::ChainContext(t: &ChainedContextLookup<'_>) => t.apply(ctx), |
110 | Self::ReverseChainSingle(t: &ReverseChainSingleSubstitution<'_>) => t.apply(ctx), |
111 | } |
112 | } |
113 | } |
114 | |
115 | impl WouldApply for SingleSubstitution<'_> { |
116 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
117 | ctx.glyphs.len() == 1 && self.coverage().get(glyph:ctx.glyphs[0]).is_some() |
118 | } |
119 | } |
120 | |
121 | impl Apply for SingleSubstitution<'_> { |
122 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
123 | let glyph: GlyphId = ctx.buffer.cur(0).as_glyph(); |
124 | let subst: GlyphId = match *self { |
125 | Self::Format1 { coverage: Coverage<'_>, delta: i16 } => { |
126 | coverage.get(glyph)?; |
127 | // According to the Adobe Annotated OpenType Suite, result is always |
128 | // limited to 16bit, so we explicitly want to truncate. |
129 | GlyphId((i32::from(glyph.0) + i32::from(delta)) as u16) |
130 | } |
131 | Self::Format2 { |
132 | coverage: Coverage<'_>, |
133 | substitutes: LazyArray16<'_, GlyphId>, |
134 | } => { |
135 | let index: u16 = coverage.get(glyph)?; |
136 | substitutes.get(index)? |
137 | } |
138 | }; |
139 | |
140 | ctx.replace_glyph(glyph_id:subst); |
141 | Some(()) |
142 | } |
143 | } |
144 | |
145 | impl WouldApply for MultipleSubstitution<'_> { |
146 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
147 | ctx.glyphs.len() == 1 && self.coverage.get(glyph:ctx.glyphs[0]).is_some() |
148 | } |
149 | } |
150 | |
151 | impl Apply for MultipleSubstitution<'_> { |
152 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
153 | let glyph: GlyphId = ctx.buffer.cur(0).as_glyph(); |
154 | let index: u16 = self.coverage.get(glyph)?; |
155 | let seq: Sequence<'_> = self.sequences.get(index)?; |
156 | seq.apply(ctx) |
157 | } |
158 | } |
159 | |
160 | impl Apply for Sequence<'_> { |
161 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
162 | match self.substitutes.len() { |
163 | // Spec disallows this, but Uniscribe allows it. |
164 | // https://github.com/harfbuzz/harfbuzz/issues/253 |
165 | 0 => ctx.buffer.delete_glyph(), |
166 | |
167 | // Special-case to make it in-place and not consider this |
168 | // as a "multiplied" substitution. |
169 | 1 => ctx.replace_glyph(self.substitutes.get(0)?), |
170 | |
171 | _ => { |
172 | let class = if ctx.buffer.cur(0).is_ligature() { |
173 | GlyphPropsFlags::BASE_GLYPH |
174 | } else { |
175 | GlyphPropsFlags::empty() |
176 | }; |
177 | let lig_id = ctx.buffer.cur(0).lig_id(); |
178 | |
179 | for (i, subst) in self.substitutes.into_iter().enumerate() { |
180 | // If is attached to a ligature, don't disturb that. |
181 | // https://github.com/harfbuzz/harfbuzz/issues/3069 |
182 | if lig_id == 0 { |
183 | // Index is truncated to 4 bits anway, so we can safely cast to u8. |
184 | ctx.buffer.cur_mut(0).set_lig_props_for_component(i as u8); |
185 | } |
186 | ctx.output_glyph_for_component(subst, class); |
187 | } |
188 | |
189 | ctx.buffer.skip_glyph(); |
190 | } |
191 | } |
192 | Some(()) |
193 | } |
194 | } |
195 | |
196 | impl WouldApply for AlternateSubstitution<'_> { |
197 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
198 | ctx.glyphs.len() == 1 && self.coverage.get(glyph:ctx.glyphs[0]).is_some() |
199 | } |
200 | } |
201 | |
202 | impl Apply for AlternateSubstitution<'_> { |
203 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
204 | let glyph: GlyphId = ctx.buffer.cur(0).as_glyph(); |
205 | let index: u16 = self.coverage.get(glyph)?; |
206 | let set: AlternateSet<'_> = self.alternate_sets.get(index)?; |
207 | set.apply(ctx) |
208 | } |
209 | } |
210 | |
211 | impl Apply for AlternateSet<'_> { |
212 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
213 | let len = self.alternates.len(); |
214 | if len == 0 { |
215 | return None; |
216 | } |
217 | |
218 | let glyph_mask = ctx.buffer.cur(0).mask; |
219 | |
220 | // Note: This breaks badly if two features enabled this lookup together. |
221 | let shift = ctx.lookup_mask.trailing_zeros(); |
222 | let mut alt_index = (ctx.lookup_mask & glyph_mask) >> shift; |
223 | |
224 | // If alt_index is MAX_VALUE, randomize feature if it is the rand feature. |
225 | if alt_index == Map::MAX_VALUE && ctx.random { |
226 | // Maybe we can do better than unsafe-to-break all; but since we are |
227 | // changing random state, it would be hard to track that. Good 'nough. |
228 | ctx.buffer.unsafe_to_break(Some(0), Some(ctx.buffer.len)); |
229 | alt_index = ctx.random_number() % u32::from(len) + 1; |
230 | } |
231 | |
232 | let idx = u16::try_from(alt_index).ok()?.checked_sub(1)?; |
233 | ctx.replace_glyph(self.alternates.get(idx)?); |
234 | |
235 | Some(()) |
236 | } |
237 | } |
238 | |
239 | impl WouldApply for LigatureSubstitution<'_> { |
240 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
241 | self.coverage |
242 | .get(ctx.glyphs[0]) |
243 | .and_then(|index| self.ligature_sets.get(index)) |
244 | .map_or(default:false, |set: LazyOffsetArray16<'_, Ligature<'_>>| set.would_apply(ctx)) |
245 | } |
246 | } |
247 | |
248 | impl Apply for LigatureSubstitution<'_> { |
249 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
250 | let glyph: GlyphId = ctx.buffer.cur(0).as_glyph(); |
251 | self.coverage |
252 | .get(glyph) |
253 | .and_then(|index: u16| self.ligature_sets.get(index)) |
254 | .and_then(|set: LazyOffsetArray16<'_, Ligature<'_>>| set.apply(ctx)) |
255 | } |
256 | } |
257 | |
258 | impl WouldApply for LigatureSet<'_> { |
259 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
260 | self.into_iter().any(|lig: Ligature<'_>| lig.would_apply(ctx)) |
261 | } |
262 | } |
263 | |
264 | impl Apply for LigatureSet<'_> { |
265 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
266 | for lig: Ligature<'_> in self.into_iter() { |
267 | if lig.apply(ctx).is_some() { |
268 | return Some(()); |
269 | } |
270 | } |
271 | None |
272 | } |
273 | } |
274 | |
275 | impl WouldApply for Ligature<'_> { |
276 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
277 | ctx.glyphs.len() == usize::from(self.components.len()) + 1 |
278 | && self |
279 | .components |
280 | .into_iter() |
281 | .enumerate() |
282 | .all(|(i: usize, comp: GlyphId)| ctx.glyphs[i + 1] == comp) |
283 | } |
284 | } |
285 | |
286 | impl Apply for Ligature<'_> { |
287 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
288 | // Special-case to make it in-place and not consider this |
289 | // as a "ligated" substitution. |
290 | if self.components.is_empty() { |
291 | ctx.replace_glyph(self.glyph); |
292 | Some(()) |
293 | } else { |
294 | let f = |glyph, num_items| { |
295 | let index = self.components.len() - num_items; |
296 | let value = self.components.get(index).unwrap(); |
297 | match_glyph(glyph, value.0) |
298 | }; |
299 | |
300 | let mut match_end = 0; |
301 | let mut match_positions = [0; MAX_CONTEXT_LENGTH]; |
302 | let mut total_component_count = 0; |
303 | |
304 | if !match_input( |
305 | ctx, |
306 | self.components.len(), |
307 | &f, |
308 | &mut match_end, |
309 | &mut match_positions, |
310 | Some(&mut total_component_count), |
311 | ) { |
312 | ctx.buffer |
313 | .unsafe_to_concat(Some(ctx.buffer.idx), Some(match_end)); |
314 | return None; |
315 | } |
316 | |
317 | let count = usize::from(self.components.len()) + 1; |
318 | ligate( |
319 | ctx, |
320 | count, |
321 | &match_positions, |
322 | match_end, |
323 | total_component_count, |
324 | self.glyph, |
325 | ); |
326 | return Some(()); |
327 | } |
328 | } |
329 | } |
330 | |
331 | fn ligate( |
332 | ctx: &mut ApplyContext, |
333 | // Including the first glyph |
334 | count: usize, |
335 | // Including the first glyph |
336 | match_positions: &[usize; MAX_CONTEXT_LENGTH], |
337 | match_end: usize, |
338 | total_component_count: u8, |
339 | lig_glyph: GlyphId, |
340 | ) { |
341 | // - If a base and one or more marks ligate, consider that as a base, NOT |
342 | // ligature, such that all following marks can still attach to it. |
343 | // https://github.com/harfbuzz/harfbuzz/issues/1109 |
344 | // |
345 | // - If all components of the ligature were marks, we call this a mark ligature. |
346 | // If it *is* a mark ligature, we don't allocate a new ligature id, and leave |
347 | // the ligature to keep its old ligature id. This will allow it to attach to |
348 | // a base ligature in GPOS. Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH, |
349 | // and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA with a |
350 | // ligature id and component value of 2. Then if SHADDA,FATHA form a ligature |
351 | // later, we don't want them to lose their ligature id/component, otherwise |
352 | // GPOS will fail to correctly position the mark ligature on top of the |
353 | // LAM,LAM,HEH ligature. See: |
354 | // https://bugzilla.gnome.org/show_bug.cgi?id=676343 |
355 | // |
356 | // - If a ligature is formed of components that some of which are also ligatures |
357 | // themselves, and those ligature components had marks attached to *their* |
358 | // components, we have to attach the marks to the new ligature component |
359 | // positions! Now *that*'s tricky! And these marks may be following the |
360 | // last component of the whole sequence, so we should loop forward looking |
361 | // for them and update them. |
362 | // |
363 | // Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a |
364 | // 'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature |
365 | // id and component == 1. Now, during 'liga', the LAM and the LAM-HEH ligature |
366 | // form a LAM-LAM-HEH ligature. We need to reassign the SHADDA and FATHA to |
367 | // the new ligature with a component value of 2. |
368 | // |
369 | // This in fact happened to a font... See: |
370 | // https://bugzilla.gnome.org/show_bug.cgi?id=437633 |
371 | // |
372 | |
373 | let mut buffer = &mut ctx.buffer; |
374 | buffer.merge_clusters(buffer.idx, match_end); |
375 | |
376 | let mut is_base_ligature = buffer.info[match_positions[0]].is_base_glyph(); |
377 | let mut is_mark_ligature = buffer.info[match_positions[0]].is_mark(); |
378 | for i in 1..count { |
379 | if !buffer.info[match_positions[i]].is_mark() { |
380 | is_base_ligature = false; |
381 | is_mark_ligature = false; |
382 | } |
383 | } |
384 | |
385 | let is_ligature = !is_base_ligature && !is_mark_ligature; |
386 | let class = if is_ligature { |
387 | GlyphPropsFlags::LIGATURE |
388 | } else { |
389 | GlyphPropsFlags::empty() |
390 | }; |
391 | let lig_id = if is_ligature { |
392 | buffer.allocate_lig_id() |
393 | } else { |
394 | 0 |
395 | }; |
396 | let first = buffer.cur_mut(0); |
397 | let mut last_lig_id = first.lig_id(); |
398 | let mut last_num_comps = first.lig_num_comps(); |
399 | let mut comps_so_far = last_num_comps; |
400 | |
401 | if is_ligature { |
402 | first.set_lig_props_for_ligature(lig_id, total_component_count); |
403 | if first.general_category() == GeneralCategory::NonspacingMark { |
404 | first.set_general_category(GeneralCategory::OtherLetter); |
405 | } |
406 | } |
407 | |
408 | ctx.replace_glyph_with_ligature(lig_glyph, class); |
409 | buffer = &mut ctx.buffer; |
410 | |
411 | for i in 1..count { |
412 | while buffer.idx < match_positions[i] && buffer.successful { |
413 | if is_ligature { |
414 | let cur = buffer.cur_mut(0); |
415 | let mut this_comp = cur.lig_comp(); |
416 | if this_comp == 0 { |
417 | this_comp = last_num_comps; |
418 | } |
419 | let new_lig_comp = comps_so_far - last_num_comps + this_comp.min(last_num_comps); |
420 | cur.set_lig_props_for_mark(lig_id, new_lig_comp); |
421 | } |
422 | buffer.next_glyph(); |
423 | } |
424 | |
425 | let cur = buffer.cur(0); |
426 | last_lig_id = cur.lig_id(); |
427 | last_num_comps = cur.lig_num_comps(); |
428 | comps_so_far += last_num_comps; |
429 | |
430 | // Skip the base glyph. |
431 | buffer.idx += 1; |
432 | } |
433 | |
434 | if !is_mark_ligature && last_lig_id != 0 { |
435 | // Re-adjust components for any marks following. |
436 | for i in buffer.idx..buffer.len { |
437 | let info = &mut buffer.info[i]; |
438 | if last_lig_id != info.lig_id() { |
439 | break; |
440 | } |
441 | |
442 | let this_comp = info.lig_comp(); |
443 | if this_comp == 0 { |
444 | break; |
445 | } |
446 | |
447 | let new_lig_comp = comps_so_far - last_num_comps + this_comp.min(last_num_comps); |
448 | info.set_lig_props_for_mark(lig_id, new_lig_comp) |
449 | } |
450 | } |
451 | } |
452 | |
453 | impl WouldApply for ReverseChainSingleSubstitution<'_> { |
454 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
455 | ctx.glyphs.len() == 1 && self.coverage.get(glyph:ctx.glyphs[0]).is_some() |
456 | } |
457 | } |
458 | |
459 | impl Apply for ReverseChainSingleSubstitution<'_> { |
460 | fn apply(&self, ctx: &mut ApplyContext) -> Option<()> { |
461 | // No chaining to this type. |
462 | if ctx.nesting_level_left != MAX_NESTING_LEVEL { |
463 | return None; |
464 | } |
465 | |
466 | let glyph = ctx.buffer.cur(0).as_glyph(); |
467 | let index = self.coverage.get(glyph)?; |
468 | if index >= self.substitutes.len() { |
469 | return None; |
470 | } |
471 | |
472 | let subst = self.substitutes.get(index)?; |
473 | |
474 | let f1 = |glyph, num_items| { |
475 | let index = self.backtrack_coverages.len() - num_items; |
476 | let value = self.backtrack_coverages.get(index).unwrap(); |
477 | value.contains(glyph) |
478 | }; |
479 | |
480 | let f2 = |glyph, num_items| { |
481 | let index = self.lookahead_coverages.len() - num_items; |
482 | let value = self.lookahead_coverages.get(index).unwrap(); |
483 | value.contains(glyph) |
484 | }; |
485 | |
486 | let mut start_index = 0; |
487 | let mut end_index = 0; |
488 | |
489 | if match_backtrack(ctx, self.backtrack_coverages.len(), &f1, &mut start_index) { |
490 | if match_lookahead( |
491 | ctx, |
492 | self.lookahead_coverages.len(), |
493 | &f2, |
494 | ctx.buffer.idx + 1, |
495 | &mut end_index, |
496 | ) { |
497 | ctx.buffer |
498 | .unsafe_to_break_from_outbuffer(Some(start_index), Some(end_index)); |
499 | ctx.replace_glyph_inplace(subst); |
500 | |
501 | // Note: We DON'T decrease buffer.idx. The main loop does it |
502 | // for us. This is useful for preventing surprises if someone |
503 | // calls us through a Context lookup. |
504 | return Some(()); |
505 | } |
506 | } |
507 | |
508 | ctx.buffer |
509 | .unsafe_to_concat_from_outbuffer(Some(start_index), Some(end_index)); |
510 | return None; |
511 | } |
512 | } |
513 | |