1 | //! Matching of glyph patterns. |
2 | |
3 | use std::cmp::max; |
4 | use ttf_parser::GlyphId; |
5 | |
6 | use super::apply::ApplyContext; |
7 | use super::{TableIndex, MAX_CONTEXT_LENGTH}; |
8 | use crate::buffer::GlyphInfo; |
9 | use crate::Mask; |
10 | |
11 | pub type MatchFunc<'a> = dyn Fn(GlyphId, u16) -> bool + 'a; |
12 | |
13 | /// Value represents glyph id. |
14 | pub fn match_glyph(glyph: GlyphId, value: u16) -> bool { |
15 | glyph == GlyphId(value) |
16 | } |
17 | |
18 | pub fn match_input( |
19 | ctx: &mut ApplyContext, |
20 | input_len: u16, |
21 | match_func: &MatchingFunc, |
22 | end_position: &mut usize, |
23 | match_positions: &mut [usize; MAX_CONTEXT_LENGTH], |
24 | p_total_component_count: Option<&mut u8>, |
25 | ) -> bool { |
26 | // This is perhaps the trickiest part of OpenType... Remarks: |
27 | // |
28 | // - If all components of the ligature were marks, we call this a mark ligature. |
29 | // |
30 | // - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize |
31 | // it as a ligature glyph. |
32 | // |
33 | // - Ligatures cannot be formed across glyphs attached to different components |
34 | // of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and |
35 | // LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother. |
36 | // However, it would be wrong to ligate that SHADDA,FATHA sequence. |
37 | // There are a couple of exceptions to this: |
38 | // |
39 | // o If a ligature tries ligating with marks that belong to it itself, go ahead, |
40 | // assuming that the font designer knows what they are doing (otherwise it can |
41 | // break Indic stuff when a matra wants to ligate with a conjunct, |
42 | // |
43 | // o If two marks want to ligate and they belong to different components of the |
44 | // same ligature glyph, and said ligature glyph is to be ignored according to |
45 | // mark-filtering rules, then allow. |
46 | // https://github.com/harfbuzz/harfbuzz/issues/545 |
47 | |
48 | #[derive (PartialEq)] |
49 | enum Ligbase { |
50 | NotChecked, |
51 | MayNotSkip, |
52 | MaySkip, |
53 | } |
54 | |
55 | let count = usize::from(input_len) + 1; |
56 | if count > MAX_CONTEXT_LENGTH { |
57 | return false; |
58 | } |
59 | |
60 | let mut iter = SkippyIter::new(ctx, ctx.buffer.idx, input_len, false); |
61 | iter.enable_matching(match_func); |
62 | |
63 | let first = ctx.buffer.cur(0); |
64 | let first_lig_id = first.lig_id(); |
65 | let first_lig_comp = first.lig_comp(); |
66 | let mut total_component_count = first.lig_num_comps(); |
67 | let mut ligbase = Ligbase::NotChecked; |
68 | |
69 | match_positions[0] = ctx.buffer.idx; |
70 | |
71 | for position in &mut match_positions[1..count] { |
72 | let mut unsafe_to = 0; |
73 | if !iter.next(Some(&mut unsafe_to)) { |
74 | *end_position = unsafe_to; |
75 | return false; |
76 | } |
77 | |
78 | *position = iter.index(); |
79 | |
80 | let this = ctx.buffer.info[iter.index()]; |
81 | let this_lig_id = this.lig_id(); |
82 | let this_lig_comp = this.lig_comp(); |
83 | |
84 | if first_lig_id != 0 && first_lig_comp != 0 { |
85 | // If first component was attached to a previous ligature component, |
86 | // all subsequent components should be attached to the same ligature |
87 | // component, otherwise we shouldn't ligate them... |
88 | if first_lig_id != this_lig_id || first_lig_comp != this_lig_comp { |
89 | // ...unless, we are attached to a base ligature and that base |
90 | // ligature is ignorable. |
91 | if ligbase == Ligbase::NotChecked { |
92 | let out = ctx.buffer.out_info(); |
93 | let mut j = ctx.buffer.out_len; |
94 | let mut found = false; |
95 | while j > 0 && out[j - 1].lig_id() == first_lig_id { |
96 | if out[j - 1].lig_comp() == 0 { |
97 | j -= 1; |
98 | found = true; |
99 | break; |
100 | } |
101 | j -= 1; |
102 | } |
103 | |
104 | ligbase = if found && iter.may_skip(&out[j]) == Some(true) { |
105 | Ligbase::MaySkip |
106 | } else { |
107 | Ligbase::MayNotSkip |
108 | }; |
109 | } |
110 | |
111 | if ligbase == Ligbase::MayNotSkip { |
112 | return false; |
113 | } |
114 | } |
115 | } else { |
116 | // If first component was NOT attached to a previous ligature component, |
117 | // all subsequent components should also NOT be attached to any ligature |
118 | // component, unless they are attached to the first component itself! |
119 | if this_lig_id != 0 && this_lig_comp != 0 && (this_lig_id != first_lig_id) { |
120 | return false; |
121 | } |
122 | } |
123 | |
124 | total_component_count += this.lig_num_comps(); |
125 | } |
126 | |
127 | *end_position = iter.index() + 1; |
128 | |
129 | if let Some(p_total_component_count) = p_total_component_count { |
130 | *p_total_component_count = total_component_count; |
131 | } |
132 | |
133 | true |
134 | } |
135 | |
136 | pub fn match_backtrack( |
137 | ctx: &mut ApplyContext, |
138 | backtrack_len: u16, |
139 | match_func: &MatchingFunc, |
140 | match_start: &mut usize, |
141 | ) -> bool { |
142 | let mut iter: SkippyIter<'_, '_> = SkippyIter::new(ctx, start_buf_index:ctx.buffer.backtrack_len(), num_items:backtrack_len, context_match:true); |
143 | iter.enable_matching(match_func); |
144 | |
145 | for _ in 0..backtrack_len { |
146 | let mut unsafe_from: usize = 0; |
147 | if !iter.prev(unsafe_from:Some(&mut unsafe_from)) { |
148 | *match_start = unsafe_from; |
149 | return false; |
150 | } |
151 | } |
152 | |
153 | *match_start = iter.index(); |
154 | true |
155 | } |
156 | |
157 | pub fn match_lookahead( |
158 | ctx: &mut ApplyContext, |
159 | lookahead_len: u16, |
160 | match_func: &MatchingFunc, |
161 | start_index: usize, |
162 | end_index: &mut usize, |
163 | ) -> bool { |
164 | let mut iter: SkippyIter<'_, '_> = SkippyIter::new(ctx, start_buf_index:start_index - 1, num_items:lookahead_len, context_match:true); |
165 | iter.enable_matching(match_func); |
166 | |
167 | for _ in 0..lookahead_len { |
168 | let mut unsafe_to: usize = 0; |
169 | if !iter.next(unsafe_to:Some(&mut unsafe_to)) { |
170 | *end_index = unsafe_to; |
171 | return false; |
172 | } |
173 | } |
174 | |
175 | *end_index = iter.index() + 1; |
176 | true |
177 | } |
178 | |
179 | pub type MatchingFunc<'a> = dyn Fn(GlyphId, u16) -> bool + 'a; |
180 | |
181 | pub struct SkippyIter<'a, 'b> { |
182 | ctx: &'a ApplyContext<'a, 'b>, |
183 | lookup_props: u32, |
184 | ignore_zwnj: bool, |
185 | ignore_zwj: bool, |
186 | mask: Mask, |
187 | syllable: u8, |
188 | matching: Option<&'a MatchingFunc<'a>>, |
189 | buf_len: usize, |
190 | buf_idx: usize, |
191 | num_items: u16, |
192 | } |
193 | |
194 | impl<'a, 'b> SkippyIter<'a, 'b> { |
195 | pub fn new( |
196 | ctx: &'a ApplyContext<'a, 'b>, |
197 | start_buf_index: usize, |
198 | num_items: u16, |
199 | context_match: bool, |
200 | ) -> Self { |
201 | SkippyIter { |
202 | ctx, |
203 | lookup_props: ctx.lookup_props, |
204 | // Ignore ZWNJ if we are matching GPOS, or matching GSUB context and asked to. |
205 | ignore_zwnj: ctx.table_index == TableIndex::GPOS || (context_match && ctx.auto_zwnj), |
206 | // Ignore ZWJ if we are matching context, or asked to. |
207 | ignore_zwj: context_match || ctx.auto_zwj, |
208 | mask: if context_match { |
209 | u32::MAX |
210 | } else { |
211 | ctx.lookup_mask |
212 | }, |
213 | syllable: if ctx.buffer.idx == start_buf_index { |
214 | ctx.buffer.cur(0).syllable() |
215 | } else { |
216 | 0 |
217 | }, |
218 | matching: None, |
219 | buf_len: ctx.buffer.len, |
220 | buf_idx: start_buf_index, |
221 | num_items, |
222 | } |
223 | } |
224 | |
225 | pub fn set_lookup_props(&mut self, lookup_props: u32) { |
226 | self.lookup_props = lookup_props; |
227 | } |
228 | |
229 | pub fn enable_matching(&mut self, func: &'a MatchingFunc<'a>) { |
230 | self.matching = Some(func); |
231 | } |
232 | |
233 | pub fn index(&self) -> usize { |
234 | self.buf_idx |
235 | } |
236 | |
237 | pub fn next(&mut self, unsafe_to: Option<&mut usize>) -> bool { |
238 | assert!(self.num_items > 0); |
239 | while self.buf_idx + usize::from(self.num_items) < self.buf_len { |
240 | self.buf_idx += 1; |
241 | let info = &self.ctx.buffer.info[self.buf_idx]; |
242 | |
243 | let skip = self.may_skip(info); |
244 | if skip == Some(true) { |
245 | continue; |
246 | } |
247 | |
248 | let matched = self.may_match(info); |
249 | if matched == Some(true) || (matched.is_none() && skip == Some(false)) { |
250 | self.num_items -= 1; |
251 | return true; |
252 | } |
253 | |
254 | if skip == Some(false) { |
255 | if let Some(unsafe_to) = unsafe_to { |
256 | *unsafe_to = self.buf_idx + 1; |
257 | } |
258 | |
259 | return false; |
260 | } |
261 | } |
262 | |
263 | if let Some(unsafe_to) = unsafe_to { |
264 | *unsafe_to = self.buf_idx + 1; |
265 | } |
266 | |
267 | false |
268 | } |
269 | |
270 | pub fn prev(&mut self, unsafe_from: Option<&mut usize>) -> bool { |
271 | assert!(self.num_items > 0); |
272 | while self.buf_idx >= usize::from(self.num_items) { |
273 | self.buf_idx -= 1; |
274 | let info = &self.ctx.buffer.out_info()[self.buf_idx]; |
275 | |
276 | let skip = self.may_skip(info); |
277 | if skip == Some(true) { |
278 | continue; |
279 | } |
280 | |
281 | let matched = self.may_match(info); |
282 | if matched == Some(true) || (matched.is_none() && skip == Some(false)) { |
283 | self.num_items -= 1; |
284 | return true; |
285 | } |
286 | |
287 | if skip == Some(false) { |
288 | if let Some(unsafe_from) = unsafe_from { |
289 | *unsafe_from = max(1, self.buf_idx) - 1; |
290 | } |
291 | |
292 | return false; |
293 | } |
294 | } |
295 | |
296 | if let Some(unsafe_from) = unsafe_from { |
297 | *unsafe_from = 0; |
298 | } |
299 | |
300 | false |
301 | } |
302 | |
303 | pub fn reject(&mut self) { |
304 | self.num_items += 1; |
305 | } |
306 | |
307 | fn may_match(&self, info: &GlyphInfo) -> Option<bool> { |
308 | if (info.mask & self.mask) != 0 && (self.syllable == 0 || self.syllable == info.syllable()) |
309 | { |
310 | self.matching.map(|f| f(info.as_glyph(), self.num_items)) |
311 | } else { |
312 | Some(false) |
313 | } |
314 | } |
315 | |
316 | fn may_skip(&self, info: &GlyphInfo) -> Option<bool> { |
317 | if !self.ctx.check_glyph_property(info, self.lookup_props) { |
318 | return Some(true); |
319 | } |
320 | |
321 | if !info.is_default_ignorable() |
322 | || info.is_hidden() |
323 | || (!self.ignore_zwnj && info.is_zwnj()) |
324 | || (!self.ignore_zwj && info.is_zwj()) |
325 | { |
326 | return Some(false); |
327 | } |
328 | |
329 | None |
330 | } |
331 | } |
332 | |