1//! Matching of glyph patterns.
2
3use ttf_parser::GlyphId;
4
5use super::apply::ApplyContext;
6use super::{TableIndex, MAX_CONTEXT_LENGTH};
7use crate::buffer::GlyphInfo;
8use crate::Mask;
9
10pub type MatchFunc<'a> = dyn Fn(GlyphId, u16) -> bool + 'a;
11
12/// Value represents glyph id.
13pub fn match_glyph(glyph: GlyphId, value: u16) -> bool {
14 glyph == GlyphId(value)
15}
16
17// TODO: Find out whether returning this by value is slow.
18pub struct Matched {
19 pub len: usize,
20 pub positions: [usize; MAX_CONTEXT_LENGTH],
21 pub total_component_count: u8,
22}
23
24pub fn match_input(
25 ctx: &ApplyContext,
26 input_len: u16,
27 match_func: &MatchingFunc,
28) -> Option<Matched> {
29 // This is perhaps the trickiest part of OpenType... Remarks:
30 //
31 // - If all components of the ligature were marks, we call this a mark ligature.
32 //
33 // - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize
34 // it as a ligature glyph.
35 //
36 // - Ligatures cannot be formed across glyphs attached to different components
37 // of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and
38 // LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother.
39 // However, it would be wrong to ligate that SHADDA,FATHA sequence.
40 // There are a couple of exceptions to this:
41 //
42 // o If a ligature tries ligating with marks that belong to it itself, go ahead,
43 // assuming that the font designer knows what they are doing (otherwise it can
44 // break Indic stuff when a matra wants to ligate with a conjunct,
45 //
46 // o If two marks want to ligate and they belong to different components of the
47 // same ligature glyph, and said ligature glyph is to be ignored according to
48 // mark-filtering rules, then allow.
49 // https://github.com/harfbuzz/harfbuzz/issues/545
50
51 #[derive(PartialEq)]
52 enum Ligbase {
53 NotChecked,
54 MayNotSkip,
55 MaySkip,
56 }
57
58 let count = usize::from(input_len) + 1;
59 if count > MAX_CONTEXT_LENGTH {
60 return None;
61 }
62
63 let mut iter = SkippyIter::new(ctx, ctx.buffer.idx, input_len, false);
64 iter.enable_matching(match_func);
65
66 let first = ctx.buffer.cur(0);
67 let first_lig_id = first.lig_id();
68 let first_lig_comp = first.lig_comp();
69 let mut positions = [0; MAX_CONTEXT_LENGTH];
70 let mut total_component_count = first.lig_num_comps();
71 let mut ligbase = Ligbase::NotChecked;
72
73 positions[0] = ctx.buffer.idx;
74
75 for position in &mut positions[1..count] {
76 if !iter.next() {
77 return None;
78 }
79
80 *position = iter.index();
81
82 let this = ctx.buffer.info[iter.index()];
83 let this_lig_id = this.lig_id();
84 let this_lig_comp = this.lig_comp();
85
86 if first_lig_id != 0 && first_lig_comp != 0 {
87 // If first component was attached to a previous ligature component,
88 // all subsequent components should be attached to the same ligature
89 // component, otherwise we shouldn't ligate them...
90 if first_lig_id != this_lig_id || first_lig_comp != this_lig_comp {
91 // ...unless, we are attached to a base ligature and that base
92 // ligature is ignorable.
93 if ligbase == Ligbase::NotChecked {
94 let out = ctx.buffer.out_info();
95 let mut j = ctx.buffer.out_len;
96 let mut found = false;
97 while j > 0 && out[j - 1].lig_id() == first_lig_id {
98 if out[j - 1].lig_comp() == 0 {
99 j -= 1;
100 found = true;
101 break;
102 }
103 j -= 1;
104 }
105
106 ligbase = if found && iter.may_skip(&out[j]) == Some(true) {
107 Ligbase::MaySkip
108 } else {
109 Ligbase::MayNotSkip
110 };
111 }
112
113 if ligbase == Ligbase::MayNotSkip {
114 return None;
115 }
116 }
117 } else {
118 // If first component was NOT attached to a previous ligature component,
119 // all subsequent components should also NOT be attached to any ligature
120 // component, unless they are attached to the first component itself!
121 if this_lig_id != 0 && this_lig_comp != 0 && (this_lig_id != first_lig_id) {
122 return None;
123 }
124 }
125
126 total_component_count += this.lig_num_comps();
127 }
128
129 Some(Matched {
130 len: iter.index() - ctx.buffer.idx + 1,
131 positions,
132 total_component_count,
133 })
134}
135
136pub fn match_backtrack(
137 ctx: &ApplyContext,
138 backtrack_len: u16,
139 match_func: &MatchingFunc,
140) -> Option<usize> {
141 let mut iter: SkippyIter<'_, '_> = SkippyIter::new(ctx, start_buf_index:ctx.buffer.backtrack_len(), num_items:backtrack_len, context_match:true);
142 iter.enable_matching(match_func);
143
144 for _ in 0..backtrack_len {
145 if !iter.prev() {
146 return None;
147 }
148 }
149
150 Some(iter.index())
151}
152
153pub fn match_lookahead(
154 ctx: &ApplyContext,
155 lookahead_len: u16,
156 match_func: &MatchingFunc,
157 offset: usize,
158) -> Option<usize> {
159 let mut iter: SkippyIter<'_, '_> = SkippyIter::new(ctx, start_buf_index:ctx.buffer.idx + offset - 1, num_items:lookahead_len, context_match:true);
160 iter.enable_matching(match_func);
161
162 for _ in 0..lookahead_len {
163 if !iter.next() {
164 return None;
165 }
166 }
167
168 Some(iter.index() + 1)
169}
170
171pub type MatchingFunc<'a> = dyn Fn(GlyphId, u16) -> bool + 'a;
172
173pub struct SkippyIter<'a, 'b> {
174 ctx: &'a ApplyContext<'a, 'b>,
175 lookup_props: u32,
176 ignore_zwnj: bool,
177 ignore_zwj: bool,
178 mask: Mask,
179 syllable: u8,
180 matching: Option<&'a MatchingFunc<'a>>,
181 buf_len: usize,
182 buf_idx: usize,
183 num_items: u16,
184}
185
186impl<'a, 'b> SkippyIter<'a, 'b> {
187 pub fn new(
188 ctx: &'a ApplyContext<'a, 'b>,
189 start_buf_index: usize,
190 num_items: u16,
191 context_match: bool,
192 ) -> Self {
193 SkippyIter {
194 ctx,
195 lookup_props: ctx.lookup_props,
196 // Ignore ZWNJ if we are matching GPOS, or matching GSUB context and asked to.
197 ignore_zwnj: ctx.table_index == TableIndex::GPOS || (context_match && ctx.auto_zwnj),
198 // Ignore ZWJ if we are matching context, or asked to.
199 ignore_zwj: context_match || ctx.auto_zwj,
200 mask: if context_match {
201 u32::MAX
202 } else {
203 ctx.lookup_mask
204 },
205 syllable: if ctx.buffer.idx == start_buf_index {
206 ctx.buffer.cur(0).syllable()
207 } else {
208 0
209 },
210 matching: None,
211 buf_len: ctx.buffer.len,
212 buf_idx: start_buf_index,
213 num_items,
214 }
215 }
216
217 pub fn set_lookup_props(&mut self, lookup_props: u32) {
218 self.lookup_props = lookup_props;
219 }
220
221 pub fn enable_matching(&mut self, func: &'a MatchingFunc<'a>) {
222 self.matching = Some(func);
223 }
224
225 pub fn index(&self) -> usize {
226 self.buf_idx
227 }
228
229 pub fn next(&mut self) -> bool {
230 assert!(self.num_items > 0);
231 while self.buf_idx + usize::from(self.num_items) < self.buf_len {
232 self.buf_idx += 1;
233 let info = &self.ctx.buffer.info[self.buf_idx];
234
235 let skip = self.may_skip(info);
236 if skip == Some(true) {
237 continue;
238 }
239
240 let matched = self.may_match(info);
241 if matched == Some(true) || (matched.is_none() && skip == Some(false)) {
242 self.num_items -= 1;
243 return true;
244 }
245
246 if skip == Some(false) {
247 return false;
248 }
249 }
250
251 false
252 }
253
254 pub fn prev(&mut self) -> bool {
255 assert!(self.num_items > 0);
256 while self.buf_idx >= usize::from(self.num_items) {
257 self.buf_idx -= 1;
258 let info = &self.ctx.buffer.out_info()[self.buf_idx];
259
260 let skip = self.may_skip(info);
261 if skip == Some(true) {
262 continue;
263 }
264
265 let matched = self.may_match(info);
266 if matched == Some(true) || (matched.is_none() && skip == Some(false)) {
267 self.num_items -= 1;
268 return true;
269 }
270
271 if skip == Some(false) {
272 return false;
273 }
274 }
275
276 false
277 }
278
279 pub fn reject(&mut self) {
280 self.num_items += 1;
281 }
282
283 fn may_match(&self, info: &GlyphInfo) -> Option<bool> {
284 if (info.mask & self.mask) != 0 && (self.syllable == 0 || self.syllable == info.syllable())
285 {
286 self.matching.map(|f| f(info.as_glyph(), self.num_items))
287 } else {
288 Some(false)
289 }
290 }
291
292 fn may_skip(&self, info: &GlyphInfo) -> Option<bool> {
293 if !self.ctx.check_glyph_property(info, self.lookup_props) {
294 return Some(true);
295 }
296
297 if !info.is_default_ignorable()
298 || info.is_hidden()
299 || (!self.ignore_zwnj && info.is_zwnj())
300 || (!self.ignore_zwj && info.is_zwj())
301 {
302 return Some(false);
303 }
304
305 None
306 }
307}
308