1 | //! Matching of glyph patterns. |
2 | |
3 | use ttf_parser::GlyphId; |
4 | |
5 | use super::apply::ApplyContext; |
6 | use super::{TableIndex, MAX_CONTEXT_LENGTH}; |
7 | use crate::buffer::GlyphInfo; |
8 | use crate::Mask; |
9 | |
10 | pub type MatchFunc<'a> = dyn Fn(GlyphId, u16) -> bool + 'a; |
11 | |
12 | /// Value represents glyph id. |
13 | pub fn match_glyph(glyph: GlyphId, value: u16) -> bool { |
14 | glyph == GlyphId(value) |
15 | } |
16 | |
17 | // TODO: Find out whether returning this by value is slow. |
18 | pub struct Matched { |
19 | pub len: usize, |
20 | pub positions: [usize; MAX_CONTEXT_LENGTH], |
21 | pub total_component_count: u8, |
22 | } |
23 | |
24 | pub fn match_input( |
25 | ctx: &ApplyContext, |
26 | input_len: u16, |
27 | match_func: &MatchingFunc, |
28 | ) -> Option<Matched> { |
29 | // This is perhaps the trickiest part of OpenType... Remarks: |
30 | // |
31 | // - If all components of the ligature were marks, we call this a mark ligature. |
32 | // |
33 | // - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize |
34 | // it as a ligature glyph. |
35 | // |
36 | // - Ligatures cannot be formed across glyphs attached to different components |
37 | // of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and |
38 | // LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother. |
39 | // However, it would be wrong to ligate that SHADDA,FATHA sequence. |
40 | // There are a couple of exceptions to this: |
41 | // |
42 | // o If a ligature tries ligating with marks that belong to it itself, go ahead, |
43 | // assuming that the font designer knows what they are doing (otherwise it can |
44 | // break Indic stuff when a matra wants to ligate with a conjunct, |
45 | // |
46 | // o If two marks want to ligate and they belong to different components of the |
47 | // same ligature glyph, and said ligature glyph is to be ignored according to |
48 | // mark-filtering rules, then allow. |
49 | // https://github.com/harfbuzz/harfbuzz/issues/545 |
50 | |
51 | #[derive (PartialEq)] |
52 | enum Ligbase { |
53 | NotChecked, |
54 | MayNotSkip, |
55 | MaySkip, |
56 | } |
57 | |
58 | let count = usize::from(input_len) + 1; |
59 | if count > MAX_CONTEXT_LENGTH { |
60 | return None; |
61 | } |
62 | |
63 | let mut iter = SkippyIter::new(ctx, ctx.buffer.idx, input_len, false); |
64 | iter.enable_matching(match_func); |
65 | |
66 | let first = ctx.buffer.cur(0); |
67 | let first_lig_id = first.lig_id(); |
68 | let first_lig_comp = first.lig_comp(); |
69 | let mut positions = [0; MAX_CONTEXT_LENGTH]; |
70 | let mut total_component_count = first.lig_num_comps(); |
71 | let mut ligbase = Ligbase::NotChecked; |
72 | |
73 | positions[0] = ctx.buffer.idx; |
74 | |
75 | for position in &mut positions[1..count] { |
76 | if !iter.next() { |
77 | return None; |
78 | } |
79 | |
80 | *position = iter.index(); |
81 | |
82 | let this = ctx.buffer.info[iter.index()]; |
83 | let this_lig_id = this.lig_id(); |
84 | let this_lig_comp = this.lig_comp(); |
85 | |
86 | if first_lig_id != 0 && first_lig_comp != 0 { |
87 | // If first component was attached to a previous ligature component, |
88 | // all subsequent components should be attached to the same ligature |
89 | // component, otherwise we shouldn't ligate them... |
90 | if first_lig_id != this_lig_id || first_lig_comp != this_lig_comp { |
91 | // ...unless, we are attached to a base ligature and that base |
92 | // ligature is ignorable. |
93 | if ligbase == Ligbase::NotChecked { |
94 | let out = ctx.buffer.out_info(); |
95 | let mut j = ctx.buffer.out_len; |
96 | let mut found = false; |
97 | while j > 0 && out[j - 1].lig_id() == first_lig_id { |
98 | if out[j - 1].lig_comp() == 0 { |
99 | j -= 1; |
100 | found = true; |
101 | break; |
102 | } |
103 | j -= 1; |
104 | } |
105 | |
106 | ligbase = if found && iter.may_skip(&out[j]) == Some(true) { |
107 | Ligbase::MaySkip |
108 | } else { |
109 | Ligbase::MayNotSkip |
110 | }; |
111 | } |
112 | |
113 | if ligbase == Ligbase::MayNotSkip { |
114 | return None; |
115 | } |
116 | } |
117 | } else { |
118 | // If first component was NOT attached to a previous ligature component, |
119 | // all subsequent components should also NOT be attached to any ligature |
120 | // component, unless they are attached to the first component itself! |
121 | if this_lig_id != 0 && this_lig_comp != 0 && (this_lig_id != first_lig_id) { |
122 | return None; |
123 | } |
124 | } |
125 | |
126 | total_component_count += this.lig_num_comps(); |
127 | } |
128 | |
129 | Some(Matched { |
130 | len: iter.index() - ctx.buffer.idx + 1, |
131 | positions, |
132 | total_component_count, |
133 | }) |
134 | } |
135 | |
136 | pub fn match_backtrack( |
137 | ctx: &ApplyContext, |
138 | backtrack_len: u16, |
139 | match_func: &MatchingFunc, |
140 | ) -> Option<usize> { |
141 | let mut iter: SkippyIter<'_, '_> = SkippyIter::new(ctx, start_buf_index:ctx.buffer.backtrack_len(), num_items:backtrack_len, context_match:true); |
142 | iter.enable_matching(match_func); |
143 | |
144 | for _ in 0..backtrack_len { |
145 | if !iter.prev() { |
146 | return None; |
147 | } |
148 | } |
149 | |
150 | Some(iter.index()) |
151 | } |
152 | |
153 | pub fn match_lookahead( |
154 | ctx: &ApplyContext, |
155 | lookahead_len: u16, |
156 | match_func: &MatchingFunc, |
157 | offset: usize, |
158 | ) -> Option<usize> { |
159 | let mut iter: SkippyIter<'_, '_> = SkippyIter::new(ctx, start_buf_index:ctx.buffer.idx + offset - 1, num_items:lookahead_len, context_match:true); |
160 | iter.enable_matching(match_func); |
161 | |
162 | for _ in 0..lookahead_len { |
163 | if !iter.next() { |
164 | return None; |
165 | } |
166 | } |
167 | |
168 | Some(iter.index() + 1) |
169 | } |
170 | |
171 | pub type MatchingFunc<'a> = dyn Fn(GlyphId, u16) -> bool + 'a; |
172 | |
173 | pub struct SkippyIter<'a, 'b> { |
174 | ctx: &'a ApplyContext<'a, 'b>, |
175 | lookup_props: u32, |
176 | ignore_zwnj: bool, |
177 | ignore_zwj: bool, |
178 | mask: Mask, |
179 | syllable: u8, |
180 | matching: Option<&'a MatchingFunc<'a>>, |
181 | buf_len: usize, |
182 | buf_idx: usize, |
183 | num_items: u16, |
184 | } |
185 | |
186 | impl<'a, 'b> SkippyIter<'a, 'b> { |
187 | pub fn new( |
188 | ctx: &'a ApplyContext<'a, 'b>, |
189 | start_buf_index: usize, |
190 | num_items: u16, |
191 | context_match: bool, |
192 | ) -> Self { |
193 | SkippyIter { |
194 | ctx, |
195 | lookup_props: ctx.lookup_props, |
196 | // Ignore ZWNJ if we are matching GPOS, or matching GSUB context and asked to. |
197 | ignore_zwnj: ctx.table_index == TableIndex::GPOS || (context_match && ctx.auto_zwnj), |
198 | // Ignore ZWJ if we are matching context, or asked to. |
199 | ignore_zwj: context_match || ctx.auto_zwj, |
200 | mask: if context_match { |
201 | u32::MAX |
202 | } else { |
203 | ctx.lookup_mask |
204 | }, |
205 | syllable: if ctx.buffer.idx == start_buf_index { |
206 | ctx.buffer.cur(0).syllable() |
207 | } else { |
208 | 0 |
209 | }, |
210 | matching: None, |
211 | buf_len: ctx.buffer.len, |
212 | buf_idx: start_buf_index, |
213 | num_items, |
214 | } |
215 | } |
216 | |
217 | pub fn set_lookup_props(&mut self, lookup_props: u32) { |
218 | self.lookup_props = lookup_props; |
219 | } |
220 | |
221 | pub fn enable_matching(&mut self, func: &'a MatchingFunc<'a>) { |
222 | self.matching = Some(func); |
223 | } |
224 | |
225 | pub fn index(&self) -> usize { |
226 | self.buf_idx |
227 | } |
228 | |
229 | pub fn next(&mut self) -> bool { |
230 | assert!(self.num_items > 0); |
231 | while self.buf_idx + usize::from(self.num_items) < self.buf_len { |
232 | self.buf_idx += 1; |
233 | let info = &self.ctx.buffer.info[self.buf_idx]; |
234 | |
235 | let skip = self.may_skip(info); |
236 | if skip == Some(true) { |
237 | continue; |
238 | } |
239 | |
240 | let matched = self.may_match(info); |
241 | if matched == Some(true) || (matched.is_none() && skip == Some(false)) { |
242 | self.num_items -= 1; |
243 | return true; |
244 | } |
245 | |
246 | if skip == Some(false) { |
247 | return false; |
248 | } |
249 | } |
250 | |
251 | false |
252 | } |
253 | |
254 | pub fn prev(&mut self) -> bool { |
255 | assert!(self.num_items > 0); |
256 | while self.buf_idx >= usize::from(self.num_items) { |
257 | self.buf_idx -= 1; |
258 | let info = &self.ctx.buffer.out_info()[self.buf_idx]; |
259 | |
260 | let skip = self.may_skip(info); |
261 | if skip == Some(true) { |
262 | continue; |
263 | } |
264 | |
265 | let matched = self.may_match(info); |
266 | if matched == Some(true) || (matched.is_none() && skip == Some(false)) { |
267 | self.num_items -= 1; |
268 | return true; |
269 | } |
270 | |
271 | if skip == Some(false) { |
272 | return false; |
273 | } |
274 | } |
275 | |
276 | false |
277 | } |
278 | |
279 | pub fn reject(&mut self) { |
280 | self.num_items += 1; |
281 | } |
282 | |
283 | fn may_match(&self, info: &GlyphInfo) -> Option<bool> { |
284 | if (info.mask & self.mask) != 0 && (self.syllable == 0 || self.syllable == info.syllable()) |
285 | { |
286 | self.matching.map(|f| f(info.as_glyph(), self.num_items)) |
287 | } else { |
288 | Some(false) |
289 | } |
290 | } |
291 | |
292 | fn may_skip(&self, info: &GlyphInfo) -> Option<bool> { |
293 | if !self.ctx.check_glyph_property(info, self.lookup_props) { |
294 | return Some(true); |
295 | } |
296 | |
297 | if !info.is_default_ignorable() |
298 | || info.is_hidden() |
299 | || (!self.ignore_zwnj && info.is_zwnj()) |
300 | || (!self.ignore_zwj && info.is_zwj()) |
301 | { |
302 | return Some(false); |
303 | } |
304 | |
305 | None |
306 | } |
307 | } |
308 | |