| 1 | //! Matching of glyph patterns. |
| 2 | |
| 3 | use ttf_parser::opentype_layout::*; |
| 4 | use ttf_parser::{GlyphId, LazyArray16}; |
| 5 | |
| 6 | use super::buffer::hb_glyph_info_t; |
| 7 | use super::buffer::{hb_buffer_t, GlyphPropsFlags}; |
| 8 | use super::hb_font_t; |
| 9 | use super::hb_mask_t; |
| 10 | use super::ot_layout::LayoutTable; |
| 11 | use super::ot_layout::*; |
| 12 | use super::ot_layout_common::*; |
| 13 | use super::unicode::hb_unicode_general_category_t; |
| 14 | |
| 15 | /// Value represents glyph id. |
| 16 | pub fn match_glyph(glyph: GlyphId, value: u16) -> bool { |
| 17 | glyph == GlyphId(value) |
| 18 | } |
| 19 | |
| 20 | pub fn match_input( |
| 21 | ctx: &mut hb_ot_apply_context_t, |
| 22 | input_len: u16, |
| 23 | match_func: &match_func_t, |
| 24 | end_position: &mut usize, |
| 25 | match_positions: &mut smallvec::SmallVec<[usize; 4]>, |
| 26 | p_total_component_count: Option<&mut u8>, |
| 27 | ) -> bool { |
| 28 | // This is perhaps the trickiest part of OpenType... Remarks: |
| 29 | // |
| 30 | // - If all components of the ligature were marks, we call this a mark ligature. |
| 31 | // |
| 32 | // - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize |
| 33 | // it as a ligature glyph. |
| 34 | // |
| 35 | // - Ligatures cannot be formed across glyphs attached to different components |
| 36 | // of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and |
| 37 | // LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother. |
| 38 | // However, it would be wrong to ligate that SHADDA,FATHA sequence. |
| 39 | // There are a couple of exceptions to this: |
| 40 | // |
| 41 | // o If a ligature tries ligating with marks that belong to it itself, go ahead, |
| 42 | // assuming that the font designer knows what they are doing (otherwise it can |
| 43 | // break Indic stuff when a matra wants to ligate with a conjunct, |
| 44 | // |
| 45 | // o If two marks want to ligate and they belong to different components of the |
| 46 | // same ligature glyph, and said ligature glyph is to be ignored according to |
| 47 | // mark-filtering rules, then allow. |
| 48 | // https://github.com/harfbuzz/harfbuzz/issues/545 |
| 49 | |
| 50 | #[derive (PartialEq)] |
| 51 | enum Ligbase { |
| 52 | NotChecked, |
| 53 | MayNotSkip, |
| 54 | MaySkip, |
| 55 | } |
| 56 | |
| 57 | let count = usize::from(input_len) + 1; |
| 58 | if count > MAX_CONTEXT_LENGTH { |
| 59 | return false; |
| 60 | } |
| 61 | |
| 62 | if count > match_positions.len() { |
| 63 | match_positions.resize(count, 0); |
| 64 | } |
| 65 | |
| 66 | let mut iter = skipping_iterator_t::new(ctx, ctx.buffer.idx, false); |
| 67 | iter.set_glyph_data(0); |
| 68 | iter.enable_matching(match_func); |
| 69 | |
| 70 | let first = ctx.buffer.cur(0); |
| 71 | let first_lig_id = _hb_glyph_info_get_lig_id(first); |
| 72 | let first_lig_comp = _hb_glyph_info_get_lig_comp(first); |
| 73 | let mut total_component_count = 0; |
| 74 | let mut ligbase = Ligbase::NotChecked; |
| 75 | |
| 76 | for position in &mut match_positions[1..count] { |
| 77 | let mut unsafe_to = 0; |
| 78 | if !iter.next(Some(&mut unsafe_to)) { |
| 79 | *end_position = unsafe_to; |
| 80 | return false; |
| 81 | } |
| 82 | |
| 83 | *position = iter.index(); |
| 84 | |
| 85 | let this = ctx.buffer.info[iter.index()]; |
| 86 | let this_lig_id = _hb_glyph_info_get_lig_id(&this); |
| 87 | let this_lig_comp = _hb_glyph_info_get_lig_comp(&this); |
| 88 | |
| 89 | if first_lig_id != 0 && first_lig_comp != 0 { |
| 90 | // If first component was attached to a previous ligature component, |
| 91 | // all subsequent components should be attached to the same ligature |
| 92 | // component, otherwise we shouldn't ligate them... |
| 93 | if first_lig_id != this_lig_id || first_lig_comp != this_lig_comp { |
| 94 | // ...unless, we are attached to a base ligature and that base |
| 95 | // ligature is ignorable. |
| 96 | if ligbase == Ligbase::NotChecked { |
| 97 | let out = ctx.buffer.out_info(); |
| 98 | let mut j = ctx.buffer.out_len; |
| 99 | let mut found = false; |
| 100 | while j > 0 && _hb_glyph_info_get_lig_id(&out[j - 1]) == first_lig_id { |
| 101 | if _hb_glyph_info_get_lig_comp(&out[j - 1]) == 0 { |
| 102 | j -= 1; |
| 103 | found = true; |
| 104 | break; |
| 105 | } |
| 106 | j -= 1; |
| 107 | } |
| 108 | |
| 109 | ligbase = if found && iter.may_skip(&out[j]) == may_skip_t::SKIP_YES { |
| 110 | Ligbase::MaySkip |
| 111 | } else { |
| 112 | Ligbase::MayNotSkip |
| 113 | }; |
| 114 | } |
| 115 | |
| 116 | if ligbase == Ligbase::MayNotSkip { |
| 117 | return false; |
| 118 | } |
| 119 | } |
| 120 | } else { |
| 121 | // If first component was NOT attached to a previous ligature component, |
| 122 | // all subsequent components should also NOT be attached to any ligature |
| 123 | // component, unless they are attached to the first component itself! |
| 124 | if this_lig_id != 0 && this_lig_comp != 0 && (this_lig_id != first_lig_id) { |
| 125 | return false; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | total_component_count += _hb_glyph_info_get_lig_num_comps(&this); |
| 130 | } |
| 131 | |
| 132 | *end_position = iter.index() + 1; |
| 133 | |
| 134 | if let Some(p_total_component_count) = p_total_component_count { |
| 135 | total_component_count += _hb_glyph_info_get_lig_num_comps(first); |
| 136 | *p_total_component_count = total_component_count; |
| 137 | } |
| 138 | |
| 139 | match_positions[0] = ctx.buffer.idx; |
| 140 | |
| 141 | true |
| 142 | } |
| 143 | |
| 144 | pub fn match_backtrack( |
| 145 | ctx: &mut hb_ot_apply_context_t, |
| 146 | backtrack_len: u16, |
| 147 | match_func: &match_func_t, |
| 148 | match_start: &mut usize, |
| 149 | ) -> bool { |
| 150 | let mut iter: skipping_iterator_t<'_, '_> = skipping_iterator_t::new(ctx, start_buf_index:ctx.buffer.backtrack_len(), context_match:true); |
| 151 | iter.set_glyph_data(0); |
| 152 | iter.enable_matching(match_func); |
| 153 | |
| 154 | for _ in 0..backtrack_len { |
| 155 | let mut unsafe_from: usize = 0; |
| 156 | if !iter.prev(unsafe_from:Some(&mut unsafe_from)) { |
| 157 | *match_start = unsafe_from; |
| 158 | return false; |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | *match_start = iter.index(); |
| 163 | true |
| 164 | } |
| 165 | |
| 166 | pub fn match_lookahead( |
| 167 | ctx: &mut hb_ot_apply_context_t, |
| 168 | lookahead_len: u16, |
| 169 | match_func: &match_func_t, |
| 170 | start_index: usize, |
| 171 | end_index: &mut usize, |
| 172 | ) -> bool { |
| 173 | // Function should always be called with a non-zero starting index |
| 174 | // c.f. https://github.com/harfbuzz/rustybuzz/issues/142 |
| 175 | assert!(start_index >= 1); |
| 176 | let mut iter: skipping_iterator_t<'_, '_> = skipping_iterator_t::new(ctx, start_buf_index:start_index - 1, context_match:true); |
| 177 | iter.set_glyph_data(0); |
| 178 | iter.enable_matching(match_func); |
| 179 | |
| 180 | for _ in 0..lookahead_len { |
| 181 | let mut unsafe_to: usize = 0; |
| 182 | if !iter.next(unsafe_to:Some(&mut unsafe_to)) { |
| 183 | *end_index = unsafe_to; |
| 184 | return false; |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | *end_index = iter.index() + 1; |
| 189 | true |
| 190 | } |
| 191 | |
| 192 | pub type match_func_t<'a> = dyn Fn(GlyphId, u16) -> bool + 'a; |
| 193 | |
| 194 | // In harfbuzz, skipping iterator works quite differently than it works here. In harfbuzz, |
| 195 | // hb_ot_apply_context contains a skipping iterator that itself contains another reference to |
| 196 | // the apply_context, meaning that we have a circular reference. Due to ownership rules in Rust, |
| 197 | // we cannot copy this approach. Because of this, we basically create a new skipping iterator |
| 198 | // when needed, and we do not have the `reset` and `init` methods that exist in harfbuzz. This makes |
| 199 | // backporting related changes very hard, but it seems unavoidable, unfortunately. |
| 200 | pub struct skipping_iterator_t<'a, 'b> { |
| 201 | ctx: &'a hb_ot_apply_context_t<'a, 'b>, |
| 202 | lookup_props: u32, |
| 203 | ignore_zwnj: bool, |
| 204 | ignore_zwj: bool, |
| 205 | ignore_hidden: bool, |
| 206 | mask: hb_mask_t, |
| 207 | syllable: u8, |
| 208 | matching: Option<&'a match_func_t<'a>>, |
| 209 | buf_len: usize, |
| 210 | glyph_data: u16, |
| 211 | pub(crate) buf_idx: usize, |
| 212 | } |
| 213 | |
| 214 | #[derive (PartialEq, Eq, Copy, Clone)] |
| 215 | pub enum match_t { |
| 216 | MATCH, |
| 217 | NOT_MATCH, |
| 218 | SKIP, |
| 219 | } |
| 220 | |
| 221 | #[derive (PartialEq, Eq, Copy, Clone)] |
| 222 | enum may_match_t { |
| 223 | MATCH_NO, |
| 224 | MATCH_YES, |
| 225 | MATCH_MAYBE, |
| 226 | } |
| 227 | |
| 228 | #[derive (PartialEq, Eq, Copy, Clone)] |
| 229 | enum may_skip_t { |
| 230 | SKIP_NO, |
| 231 | SKIP_YES, |
| 232 | SKIP_MAYBE, |
| 233 | } |
| 234 | |
| 235 | impl<'a, 'b> skipping_iterator_t<'a, 'b> { |
| 236 | pub fn new( |
| 237 | ctx: &'a hb_ot_apply_context_t<'a, 'b>, |
| 238 | start_buf_index: usize, |
| 239 | context_match: bool, |
| 240 | ) -> Self { |
| 241 | skipping_iterator_t { |
| 242 | ctx, |
| 243 | lookup_props: ctx.lookup_props, |
| 244 | // Ignore ZWNJ if we are matching GPOS, or matching GSUB context and asked to. |
| 245 | ignore_zwnj: ctx.table_index == TableIndex::GPOS || (context_match && ctx.auto_zwnj), |
| 246 | // Ignore ZWJ if we are matching context, or asked to. |
| 247 | ignore_zwj: context_match || ctx.auto_zwj, |
| 248 | // Ignore hidden glyphs (like CGJ) during GPOS. |
| 249 | ignore_hidden: ctx.table_index == TableIndex::GPOS, |
| 250 | mask: if context_match { |
| 251 | u32::MAX |
| 252 | } else { |
| 253 | ctx.lookup_mask() |
| 254 | }, |
| 255 | syllable: if ctx.buffer.idx == start_buf_index && ctx.per_syllable { |
| 256 | ctx.buffer.cur(0).syllable() |
| 257 | } else { |
| 258 | 0 |
| 259 | }, |
| 260 | glyph_data: 0, |
| 261 | matching: None, |
| 262 | buf_len: ctx.buffer.len, |
| 263 | buf_idx: start_buf_index, |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | pub fn set_glyph_data(&mut self, glyph_data: u16) { |
| 268 | self.glyph_data = glyph_data |
| 269 | } |
| 270 | |
| 271 | fn advance_glyph_data(&mut self) { |
| 272 | self.glyph_data += 1; |
| 273 | } |
| 274 | |
| 275 | pub fn set_lookup_props(&mut self, lookup_props: u32) { |
| 276 | self.lookup_props = lookup_props; |
| 277 | } |
| 278 | |
| 279 | pub fn enable_matching(&mut self, func: &'a match_func_t<'a>) { |
| 280 | self.matching = Some(func); |
| 281 | } |
| 282 | |
| 283 | pub fn index(&self) -> usize { |
| 284 | self.buf_idx |
| 285 | } |
| 286 | |
| 287 | pub fn next(&mut self, unsafe_to: Option<&mut usize>) -> bool { |
| 288 | let stop = self.buf_len as i32 - 1; |
| 289 | |
| 290 | while (self.buf_idx as i32) < stop { |
| 291 | self.buf_idx += 1; |
| 292 | let info = &self.ctx.buffer.info[self.buf_idx]; |
| 293 | |
| 294 | match self.match_(info) { |
| 295 | match_t::MATCH => { |
| 296 | self.advance_glyph_data(); |
| 297 | return true; |
| 298 | } |
| 299 | match_t::NOT_MATCH => { |
| 300 | if let Some(unsafe_to) = unsafe_to { |
| 301 | *unsafe_to = self.buf_idx + 1; |
| 302 | } |
| 303 | |
| 304 | return false; |
| 305 | } |
| 306 | match_t::SKIP => continue, |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | if let Some(unsafe_to) = unsafe_to { |
| 311 | *unsafe_to = self.buf_idx + 1; |
| 312 | } |
| 313 | |
| 314 | false |
| 315 | } |
| 316 | |
| 317 | pub fn prev(&mut self, unsafe_from: Option<&mut usize>) -> bool { |
| 318 | let stop: usize = 0; |
| 319 | |
| 320 | while self.buf_idx > stop { |
| 321 | self.buf_idx -= 1; |
| 322 | let info = &self.ctx.buffer.out_info()[self.buf_idx]; |
| 323 | |
| 324 | match self.match_(info) { |
| 325 | match_t::MATCH => { |
| 326 | self.advance_glyph_data(); |
| 327 | return true; |
| 328 | } |
| 329 | match_t::NOT_MATCH => { |
| 330 | if let Some(unsafe_from) = unsafe_from { |
| 331 | *unsafe_from = self.buf_idx.max(1) - 1; |
| 332 | } |
| 333 | |
| 334 | return false; |
| 335 | } |
| 336 | match_t::SKIP => { |
| 337 | continue; |
| 338 | } |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | if let Some(unsafe_from) = unsafe_from { |
| 343 | *unsafe_from = 0; |
| 344 | } |
| 345 | |
| 346 | false |
| 347 | } |
| 348 | |
| 349 | pub fn match_(&self, info: &hb_glyph_info_t) -> match_t { |
| 350 | let skip = self.may_skip(info); |
| 351 | |
| 352 | if skip == may_skip_t::SKIP_YES { |
| 353 | return match_t::SKIP; |
| 354 | } |
| 355 | |
| 356 | let _match = self.may_match(info); |
| 357 | |
| 358 | if _match == may_match_t::MATCH_YES |
| 359 | || (_match == may_match_t::MATCH_MAYBE && skip == may_skip_t::SKIP_NO) |
| 360 | { |
| 361 | return match_t::MATCH; |
| 362 | } |
| 363 | |
| 364 | if skip == may_skip_t::SKIP_NO { |
| 365 | return match_t::NOT_MATCH; |
| 366 | } |
| 367 | |
| 368 | match_t::SKIP |
| 369 | } |
| 370 | |
| 371 | fn may_match(&self, info: &hb_glyph_info_t) -> may_match_t { |
| 372 | if (info.mask & self.mask) == 0 || (self.syllable != 0 && self.syllable != info.syllable()) |
| 373 | { |
| 374 | return may_match_t::MATCH_NO; |
| 375 | } |
| 376 | |
| 377 | if let Some(match_func) = self.matching { |
| 378 | return if match_func(info.as_glyph(), self.glyph_data) { |
| 379 | may_match_t::MATCH_YES |
| 380 | } else { |
| 381 | may_match_t::MATCH_NO |
| 382 | }; |
| 383 | } |
| 384 | |
| 385 | may_match_t::MATCH_MAYBE |
| 386 | } |
| 387 | |
| 388 | fn may_skip(&self, info: &hb_glyph_info_t) -> may_skip_t { |
| 389 | if !self.ctx.check_glyph_property(info, self.lookup_props) { |
| 390 | return may_skip_t::SKIP_YES; |
| 391 | } |
| 392 | |
| 393 | if _hb_glyph_info_is_default_ignorable(info) |
| 394 | && (self.ignore_zwnj || !_hb_glyph_info_is_zwnj(info)) |
| 395 | && (self.ignore_zwj || !_hb_glyph_info_is_zwj(info)) |
| 396 | && (self.ignore_hidden || !_hb_glyph_info_is_hidden(info)) |
| 397 | { |
| 398 | return may_skip_t::SKIP_MAYBE; |
| 399 | } |
| 400 | |
| 401 | may_skip_t::SKIP_NO |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | impl WouldApply for ContextLookup<'_> { |
| 406 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
| 407 | let glyph: GlyphId = ctx.glyphs[0]; |
| 408 | match *self { |
| 409 | Self::Format1 { coverage: Coverage<'_>, sets: LazyOffsetArray16<'_, LazyOffsetArray16<'_, …>> } => coverage |
| 410 | .get(glyph) |
| 411 | .and_then(|index| sets.get(index)) |
| 412 | .map_or(default:false, |set: LazyOffsetArray16<'_, SequenceRule<'_>>| set.would_apply(ctx, &match_glyph)), |
| 413 | Self::Format2 { classes: ClassDefinition<'_>, sets: LazyOffsetArray16<'_, LazyOffsetArray16<'_, …>>, .. } => { |
| 414 | let class: u16 = classes.get(glyph); |
| 415 | sets.get(class) |
| 416 | .map_or(default:false, |set: LazyOffsetArray16<'_, SequenceRule<'_>>| set.would_apply(ctx, &match_class(class_def:classes))) |
| 417 | } |
| 418 | Self::Format3 { coverages: LazyOffsetArray16<'_, Coverage<'_>>, .. } => { |
| 419 | ctx.glyphs.len() == usize::from(coverages.len()) + 1 |
| 420 | && coveragesimpl Iterator |
| 421 | .into_iter() |
| 422 | .enumerate() |
| 423 | .all(|(i: usize, coverage: Coverage<'_>)| coverage.get(glyph:ctx.glyphs[i + 1]).is_some()) |
| 424 | } |
| 425 | } |
| 426 | } |
| 427 | } |
| 428 | |
| 429 | impl Apply for ContextLookup<'_> { |
| 430 | fn apply(&self, ctx: &mut hb_ot_apply_context_t) -> Option<()> { |
| 431 | let glyph = ctx.buffer.cur(0).as_glyph(); |
| 432 | match *self { |
| 433 | Self::Format1 { coverage, sets } => { |
| 434 | coverage.get(glyph)?; |
| 435 | let set = coverage.get(glyph).and_then(|index| sets.get(index))?; |
| 436 | set.apply(ctx, &match_glyph) |
| 437 | } |
| 438 | Self::Format2 { |
| 439 | coverage, |
| 440 | classes, |
| 441 | sets, |
| 442 | } => { |
| 443 | coverage.get(glyph)?; |
| 444 | let class = classes.get(glyph); |
| 445 | let set = sets.get(class)?; |
| 446 | set.apply(ctx, &match_class(classes)) |
| 447 | } |
| 448 | Self::Format3 { |
| 449 | coverage, |
| 450 | coverages, |
| 451 | lookups, |
| 452 | } => { |
| 453 | coverage.get(glyph)?; |
| 454 | let coverages_len = coverages.len(); |
| 455 | |
| 456 | let match_func = |glyph, index| { |
| 457 | let coverage = coverages.get(index).unwrap(); |
| 458 | coverage.get(glyph).is_some() |
| 459 | }; |
| 460 | |
| 461 | let mut match_end = 0; |
| 462 | let mut match_positions = smallvec::SmallVec::from_elem(0, 4); |
| 463 | |
| 464 | if match_input( |
| 465 | ctx, |
| 466 | coverages_len, |
| 467 | &match_func, |
| 468 | &mut match_end, |
| 469 | &mut match_positions, |
| 470 | None, |
| 471 | ) { |
| 472 | ctx.buffer |
| 473 | .unsafe_to_break(Some(ctx.buffer.idx), Some(match_end)); |
| 474 | apply_lookup( |
| 475 | ctx, |
| 476 | usize::from(coverages_len), |
| 477 | &mut match_positions, |
| 478 | match_end, |
| 479 | lookups, |
| 480 | ); |
| 481 | Some(()) |
| 482 | } else { |
| 483 | ctx.buffer |
| 484 | .unsafe_to_concat(Some(ctx.buffer.idx), Some(match_end)); |
| 485 | None |
| 486 | } |
| 487 | } |
| 488 | } |
| 489 | } |
| 490 | } |
| 491 | |
| 492 | trait SequenceRuleSetExt { |
| 493 | fn would_apply(&self, ctx: &WouldApplyContext, match_func: &match_func_t) -> bool; |
| 494 | fn apply(&self, ctx: &mut hb_ot_apply_context_t, match_func: &match_func_t) -> Option<()>; |
| 495 | } |
| 496 | |
| 497 | impl SequenceRuleSetExt for SequenceRuleSet<'_> { |
| 498 | fn would_apply(&self, ctx: &WouldApplyContext, match_func: &match_func_t) -> bool { |
| 499 | self.into_iter() |
| 500 | .any(|rule: SequenceRule<'_>| rule.would_apply(ctx, match_func)) |
| 501 | } |
| 502 | |
| 503 | fn apply(&self, ctx: &mut hb_ot_apply_context_t, match_func: &match_func_t) -> Option<()> { |
| 504 | if self |
| 505 | .into_iter() |
| 506 | .any(|rule: SequenceRule<'_>| rule.apply(ctx, match_func).is_some()) |
| 507 | { |
| 508 | Some(()) |
| 509 | } else { |
| 510 | None |
| 511 | } |
| 512 | } |
| 513 | } |
| 514 | |
| 515 | trait SequenceRuleExt { |
| 516 | fn would_apply(&self, ctx: &WouldApplyContext, match_func: &match_func_t) -> bool; |
| 517 | fn apply(&self, ctx: &mut hb_ot_apply_context_t, match_func: &match_func_t) -> Option<()>; |
| 518 | } |
| 519 | |
| 520 | impl SequenceRuleExt for SequenceRule<'_> { |
| 521 | fn would_apply(&self, ctx: &WouldApplyContext, match_func: &match_func_t) -> bool { |
| 522 | ctx.glyphs.len() == usize::from(self.input.len()) + 1 |
| 523 | && self |
| 524 | .input |
| 525 | .into_iter() |
| 526 | .enumerate() |
| 527 | .all(|(i: usize, value: u16)| match_func(ctx.glyphs[i + 1], value)) |
| 528 | } |
| 529 | |
| 530 | fn apply(&self, ctx: &mut hb_ot_apply_context_t, match_func: &match_func_t) -> Option<()> { |
| 531 | apply_context(ctx, self.input, match_func, self.lookups) |
| 532 | |
| 533 | // TODO: Port optimized version from https://github.com/harfbuzz/harfbuzz/commit/645fabd10 |
| 534 | } |
| 535 | } |
| 536 | |
| 537 | impl WouldApply for ChainedContextLookup<'_> { |
| 538 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool { |
| 539 | let glyph_id = ctx.glyphs[0]; |
| 540 | match *self { |
| 541 | Self::Format1 { coverage, sets } => coverage |
| 542 | .get(glyph_id) |
| 543 | .and_then(|index| sets.get(index)) |
| 544 | .map_or(false, |set| set.would_apply(ctx, &match_glyph)), |
| 545 | Self::Format2 { |
| 546 | input_classes, |
| 547 | sets, |
| 548 | .. |
| 549 | } => { |
| 550 | let class = input_classes.get(glyph_id); |
| 551 | sets.get(class).map_or(false, |set| { |
| 552 | set.would_apply(ctx, &match_class(input_classes)) |
| 553 | }) |
| 554 | } |
| 555 | Self::Format3 { |
| 556 | backtrack_coverages, |
| 557 | input_coverages, |
| 558 | lookahead_coverages, |
| 559 | .. |
| 560 | } => { |
| 561 | (!ctx.zero_context |
| 562 | || (backtrack_coverages.is_empty() && lookahead_coverages.is_empty())) |
| 563 | && (ctx.glyphs.len() == usize::from(input_coverages.len()) + 1 |
| 564 | && input_coverages |
| 565 | .into_iter() |
| 566 | .enumerate() |
| 567 | .all(|(i, coverage)| coverage.contains(ctx.glyphs[i + 1]))) |
| 568 | } |
| 569 | } |
| 570 | } |
| 571 | } |
| 572 | |
| 573 | impl Apply for ChainedContextLookup<'_> { |
| 574 | fn apply(&self, ctx: &mut hb_ot_apply_context_t) -> Option<()> { |
| 575 | let glyph = ctx.buffer.cur(0).as_glyph(); |
| 576 | match *self { |
| 577 | Self::Format1 { coverage, sets } => { |
| 578 | let index = coverage.get(glyph)?; |
| 579 | let set = sets.get(index)?; |
| 580 | set.apply(ctx, [&match_glyph, &match_glyph, &match_glyph]) |
| 581 | } |
| 582 | Self::Format2 { |
| 583 | coverage, |
| 584 | backtrack_classes, |
| 585 | input_classes, |
| 586 | lookahead_classes, |
| 587 | sets, |
| 588 | } => { |
| 589 | coverage.get(glyph)?; |
| 590 | let class = input_classes.get(glyph); |
| 591 | let set = sets.get(class)?; |
| 592 | set.apply( |
| 593 | ctx, |
| 594 | [ |
| 595 | &match_class(backtrack_classes), |
| 596 | &match_class(input_classes), |
| 597 | &match_class(lookahead_classes), |
| 598 | ], |
| 599 | ) |
| 600 | } |
| 601 | Self::Format3 { |
| 602 | coverage, |
| 603 | backtrack_coverages, |
| 604 | input_coverages, |
| 605 | lookahead_coverages, |
| 606 | lookups, |
| 607 | } => { |
| 608 | coverage.get(glyph)?; |
| 609 | |
| 610 | let back = |glyph, index| { |
| 611 | let coverage = backtrack_coverages.get(index).unwrap(); |
| 612 | coverage.contains(glyph) |
| 613 | }; |
| 614 | |
| 615 | let ahead = |glyph, index| { |
| 616 | let coverage = lookahead_coverages.get(index).unwrap(); |
| 617 | coverage.contains(glyph) |
| 618 | }; |
| 619 | |
| 620 | let input = |glyph, index| { |
| 621 | let coverage = input_coverages.get(index).unwrap(); |
| 622 | coverage.contains(glyph) |
| 623 | }; |
| 624 | |
| 625 | let mut end_index = ctx.buffer.idx; |
| 626 | let mut match_end = 0; |
| 627 | let mut match_positions = smallvec::SmallVec::from_elem(0, 4); |
| 628 | |
| 629 | let input_matches = match_input( |
| 630 | ctx, |
| 631 | input_coverages.len(), |
| 632 | &input, |
| 633 | &mut match_end, |
| 634 | &mut match_positions, |
| 635 | None, |
| 636 | ); |
| 637 | |
| 638 | if input_matches { |
| 639 | end_index = match_end; |
| 640 | } |
| 641 | |
| 642 | if !(input_matches |
| 643 | && match_lookahead( |
| 644 | ctx, |
| 645 | lookahead_coverages.len(), |
| 646 | &ahead, |
| 647 | match_end, |
| 648 | &mut end_index, |
| 649 | )) |
| 650 | { |
| 651 | ctx.buffer |
| 652 | .unsafe_to_concat(Some(ctx.buffer.idx), Some(end_index)); |
| 653 | return None; |
| 654 | } |
| 655 | |
| 656 | let mut start_index = ctx.buffer.out_len; |
| 657 | |
| 658 | if !match_backtrack(ctx, backtrack_coverages.len(), &back, &mut start_index) { |
| 659 | ctx.buffer |
| 660 | .unsafe_to_concat_from_outbuffer(Some(start_index), Some(end_index)); |
| 661 | return None; |
| 662 | } |
| 663 | |
| 664 | ctx.buffer |
| 665 | .unsafe_to_break_from_outbuffer(Some(start_index), Some(end_index)); |
| 666 | apply_lookup( |
| 667 | ctx, |
| 668 | usize::from(input_coverages.len()), |
| 669 | &mut match_positions, |
| 670 | match_end, |
| 671 | lookups, |
| 672 | ); |
| 673 | |
| 674 | Some(()) |
| 675 | } |
| 676 | } |
| 677 | } |
| 678 | } |
| 679 | |
| 680 | trait ChainRuleSetExt { |
| 681 | fn would_apply(&self, ctx: &WouldApplyContext, match_func: &match_func_t) -> bool; |
| 682 | fn apply(&self, ctx: &mut hb_ot_apply_context_t, match_funcs: [&match_func_t; 3]) |
| 683 | -> Option<()>; |
| 684 | } |
| 685 | |
| 686 | impl ChainRuleSetExt for ChainedSequenceRuleSet<'_> { |
| 687 | fn would_apply(&self, ctx: &WouldApplyContext, match_func: &match_func_t) -> bool { |
| 688 | self.into_iter() |
| 689 | .any(|rule: ChainedSequenceRule<'_>| rule.would_apply(ctx, match_func)) |
| 690 | } |
| 691 | |
| 692 | fn apply( |
| 693 | &self, |
| 694 | ctx: &mut hb_ot_apply_context_t, |
| 695 | match_funcs: [&match_func_t; 3], |
| 696 | ) -> Option<()> { |
| 697 | if self |
| 698 | .into_iter() |
| 699 | .any(|rule: ChainedSequenceRule<'_>| rule.apply(ctx, match_funcs).is_some()) |
| 700 | { |
| 701 | Some(()) |
| 702 | } else { |
| 703 | None |
| 704 | } |
| 705 | |
| 706 | // TODO: Port optimized version from https://github.com/harfbuzz/harfbuzz/commit/77080f86f |
| 707 | } |
| 708 | } |
| 709 | |
| 710 | trait ChainRuleExt { |
| 711 | fn would_apply(&self, ctx: &WouldApplyContext, match_func: &match_func_t) -> bool; |
| 712 | fn apply(&self, ctx: &mut hb_ot_apply_context_t, match_funcs: [&match_func_t; 3]) |
| 713 | -> Option<()>; |
| 714 | } |
| 715 | |
| 716 | impl ChainRuleExt for ChainedSequenceRule<'_> { |
| 717 | fn would_apply(&self, ctx: &WouldApplyContext, match_func: &match_func_t) -> bool { |
| 718 | (!ctx.zero_context || (self.backtrack.is_empty() && self.lookahead.is_empty())) |
| 719 | && (ctx.glyphs.len() == usize::from(self.input.len()) + 1 |
| 720 | && self |
| 721 | .input |
| 722 | .into_iter() |
| 723 | .enumerate() |
| 724 | .all(|(i, value)| match_func(ctx.glyphs[i + 1], value))) |
| 725 | } |
| 726 | |
| 727 | fn apply( |
| 728 | &self, |
| 729 | ctx: &mut hb_ot_apply_context_t, |
| 730 | match_funcs: [&match_func_t; 3], |
| 731 | ) -> Option<()> { |
| 732 | apply_chain_context( |
| 733 | ctx, |
| 734 | self.backtrack, |
| 735 | self.input, |
| 736 | self.lookahead, |
| 737 | match_funcs, |
| 738 | self.lookups, |
| 739 | ) |
| 740 | } |
| 741 | } |
| 742 | |
| 743 | fn apply_context( |
| 744 | ctx: &mut hb_ot_apply_context_t, |
| 745 | input: LazyArray16<u16>, |
| 746 | match_func: &match_func_t, |
| 747 | lookups: LazyArray16<SequenceLookupRecord>, |
| 748 | ) -> Option<()> { |
| 749 | let match_func = |glyph, index| { |
| 750 | let value = input.get(index).unwrap(); |
| 751 | match_func(glyph, value) |
| 752 | }; |
| 753 | |
| 754 | let mut match_end = 0; |
| 755 | let mut match_positions = smallvec::SmallVec::from_elem(0, 4); |
| 756 | |
| 757 | if match_input( |
| 758 | ctx, |
| 759 | input.len(), |
| 760 | &match_func, |
| 761 | &mut match_end, |
| 762 | &mut match_positions, |
| 763 | None, |
| 764 | ) { |
| 765 | ctx.buffer |
| 766 | .unsafe_to_break(Some(ctx.buffer.idx), Some(match_end)); |
| 767 | apply_lookup( |
| 768 | ctx, |
| 769 | usize::from(input.len()), |
| 770 | &mut match_positions, |
| 771 | match_end, |
| 772 | lookups, |
| 773 | ); |
| 774 | return Some(()); |
| 775 | } |
| 776 | |
| 777 | None |
| 778 | } |
| 779 | |
| 780 | fn apply_chain_context( |
| 781 | ctx: &mut hb_ot_apply_context_t, |
| 782 | backtrack: LazyArray16<u16>, |
| 783 | input: LazyArray16<u16>, |
| 784 | lookahead: LazyArray16<u16>, |
| 785 | match_funcs: [&match_func_t; 3], |
| 786 | lookups: LazyArray16<SequenceLookupRecord>, |
| 787 | ) -> Option<()> { |
| 788 | // NOTE: Whenever something in this method changes, we also need to |
| 789 | // change it in the `apply` implementation for ChainedContextLookup. |
| 790 | let f1 = |glyph, index| { |
| 791 | let value = backtrack.get(index).unwrap(); |
| 792 | match_funcs[0](glyph, value) |
| 793 | }; |
| 794 | |
| 795 | let f2 = |glyph, index| { |
| 796 | let value = lookahead.get(index).unwrap(); |
| 797 | match_funcs[2](glyph, value) |
| 798 | }; |
| 799 | |
| 800 | let f3 = |glyph, index| { |
| 801 | let value = input.get(index).unwrap(); |
| 802 | match_funcs[1](glyph, value) |
| 803 | }; |
| 804 | |
| 805 | let mut end_index = ctx.buffer.idx; |
| 806 | let mut match_end = 0; |
| 807 | let mut match_positions = smallvec::SmallVec::from_elem(0, 4); |
| 808 | |
| 809 | let input_matches = match_input( |
| 810 | ctx, |
| 811 | input.len(), |
| 812 | &f3, |
| 813 | &mut match_end, |
| 814 | &mut match_positions, |
| 815 | None, |
| 816 | ); |
| 817 | |
| 818 | if input_matches { |
| 819 | end_index = match_end; |
| 820 | } |
| 821 | |
| 822 | if !(input_matches && match_lookahead(ctx, lookahead.len(), &f2, match_end, &mut end_index)) { |
| 823 | ctx.buffer |
| 824 | .unsafe_to_concat(Some(ctx.buffer.idx), Some(end_index)); |
| 825 | return None; |
| 826 | } |
| 827 | |
| 828 | let mut start_index = ctx.buffer.out_len; |
| 829 | |
| 830 | if !match_backtrack(ctx, backtrack.len(), &f1, &mut start_index) { |
| 831 | ctx.buffer |
| 832 | .unsafe_to_concat_from_outbuffer(Some(start_index), Some(end_index)); |
| 833 | return None; |
| 834 | } |
| 835 | |
| 836 | ctx.buffer |
| 837 | .unsafe_to_break_from_outbuffer(Some(start_index), Some(end_index)); |
| 838 | apply_lookup( |
| 839 | ctx, |
| 840 | usize::from(input.len()), |
| 841 | &mut match_positions, |
| 842 | match_end, |
| 843 | lookups, |
| 844 | ); |
| 845 | |
| 846 | Some(()) |
| 847 | } |
| 848 | |
| 849 | fn apply_lookup( |
| 850 | ctx: &mut hb_ot_apply_context_t, |
| 851 | input_len: usize, |
| 852 | match_positions: &mut smallvec::SmallVec<[usize; 4]>, |
| 853 | match_end: usize, |
| 854 | lookups: LazyArray16<SequenceLookupRecord>, |
| 855 | ) { |
| 856 | let mut count = input_len + 1; |
| 857 | |
| 858 | if count > match_positions.len() { |
| 859 | match_positions.resize(count, 0); |
| 860 | } |
| 861 | |
| 862 | // All positions are distance from beginning of *output* buffer. |
| 863 | // Adjust. |
| 864 | let mut end: isize = { |
| 865 | let backtrack_len = ctx.buffer.backtrack_len(); |
| 866 | let delta = backtrack_len as isize - ctx.buffer.idx as isize; |
| 867 | |
| 868 | // Convert positions to new indexing. |
| 869 | for j in 0..count { |
| 870 | match_positions[j] = (match_positions[j] as isize + delta) as _; |
| 871 | } |
| 872 | |
| 873 | backtrack_len as isize + match_end as isize - ctx.buffer.idx as isize |
| 874 | }; |
| 875 | |
| 876 | for record in lookups { |
| 877 | if !ctx.buffer.successful { |
| 878 | break; |
| 879 | } |
| 880 | |
| 881 | let idx = usize::from(record.sequence_index); |
| 882 | if idx >= count { |
| 883 | continue; |
| 884 | } |
| 885 | |
| 886 | let orig_len = ctx.buffer.backtrack_len() + ctx.buffer.lookahead_len(); |
| 887 | |
| 888 | // This can happen if earlier recursed lookups deleted many entries. |
| 889 | if match_positions[idx] >= orig_len { |
| 890 | continue; |
| 891 | } |
| 892 | |
| 893 | if !ctx.buffer.move_to(match_positions[idx]) { |
| 894 | break; |
| 895 | } |
| 896 | |
| 897 | if ctx.buffer.max_ops <= 0 { |
| 898 | break; |
| 899 | } |
| 900 | |
| 901 | if ctx.recurse(record.lookup_list_index).is_none() { |
| 902 | continue; |
| 903 | } |
| 904 | |
| 905 | let new_len = ctx.buffer.backtrack_len() + ctx.buffer.lookahead_len(); |
| 906 | let mut delta = new_len as isize - orig_len as isize; |
| 907 | if delta == 0 { |
| 908 | continue; |
| 909 | } |
| 910 | |
| 911 | // Recursed lookup changed buffer len. Adjust. |
| 912 | // |
| 913 | // TODO: |
| 914 | // |
| 915 | // Right now, if buffer length increased by n, we assume n new glyphs |
| 916 | // were added right after the current position, and if buffer length |
| 917 | // was decreased by n, we assume n match positions after the current |
| 918 | // one where removed. The former (buffer length increased) case is |
| 919 | // fine, but the decrease case can be improved in at least two ways, |
| 920 | // both of which are significant: |
| 921 | // |
| 922 | // - If recursed-to lookup is MultipleSubst and buffer length |
| 923 | // decreased, then it's current match position that was deleted, |
| 924 | // NOT the one after it. |
| 925 | // |
| 926 | // - If buffer length was decreased by n, it does not necessarily |
| 927 | // mean that n match positions where removed, as there recursed-to |
| 928 | // lookup might had a different LookupFlag. Here's a constructed |
| 929 | // case of that: |
| 930 | // https://github.com/harfbuzz/harfbuzz/discussions/3538 |
| 931 | // |
| 932 | // It should be possible to construct tests for both of these cases. |
| 933 | |
| 934 | end += delta; |
| 935 | if end < match_positions[idx] as isize { |
| 936 | // End might end up being smaller than match_positions[idx] if the recursed |
| 937 | // lookup ended up removing many items. |
| 938 | // Just never rewind end beyond start of current position, since that is |
| 939 | // not possible in the recursed lookup. Also adjust delta as such. |
| 940 | // |
| 941 | // https://bugs.chromium.org/p/chromium/issues/detail?id=659496 |
| 942 | // https://github.com/harfbuzz/harfbuzz/issues/1611 |
| 943 | // |
| 944 | delta += match_positions[idx] as isize - end; |
| 945 | end = match_positions[idx] as isize; |
| 946 | } |
| 947 | |
| 948 | // next now is the position after the recursed lookup. |
| 949 | let mut next = idx + 1; |
| 950 | |
| 951 | if delta > 0 { |
| 952 | if delta as usize + count > MAX_CONTEXT_LENGTH { |
| 953 | break; |
| 954 | } |
| 955 | |
| 956 | if delta as usize + count > match_positions.len() { |
| 957 | let inner_max = (core::cmp::max(4, match_positions.len()) as f32 * 1.5) as usize; |
| 958 | match_positions.resize(core::cmp::max(delta as usize + count, inner_max), 0); |
| 959 | } |
| 960 | } else { |
| 961 | // NOTE: delta is non-positive. |
| 962 | delta = delta.max(next as isize - count as isize); |
| 963 | next = (next as isize - delta) as _; |
| 964 | } |
| 965 | |
| 966 | // Shift! |
| 967 | match_positions.copy_within(next..count, (next as isize + delta) as _); |
| 968 | next = (next as isize + delta) as _; |
| 969 | count = (count as isize + delta) as _; |
| 970 | |
| 971 | // Fill in new entries. |
| 972 | for j in idx + 1..next { |
| 973 | match_positions[j] = match_positions[j - 1] + 1; |
| 974 | } |
| 975 | |
| 976 | // And fixup the rest. |
| 977 | while next < count { |
| 978 | match_positions[next] = (match_positions[next] as isize + delta) as _; |
| 979 | next += 1; |
| 980 | } |
| 981 | } |
| 982 | |
| 983 | ctx.buffer.move_to(end.try_into().unwrap()); |
| 984 | } |
| 985 | |
| 986 | /// Value represents glyph class. |
| 987 | fn match_class(class_def: ClassDefinition<'_>) -> impl Fn(GlyphId, u16) -> bool + '_ { |
| 988 | move |glyph: GlyphId, value: u16| class_def.get(glyph) == value |
| 989 | } |
| 990 | |
| 991 | /// Find out whether a lookup would be applied. |
| 992 | pub trait WouldApply { |
| 993 | /// Whether the lookup would be applied. |
| 994 | fn would_apply(&self, ctx: &WouldApplyContext) -> bool; |
| 995 | } |
| 996 | |
| 997 | /// Apply a lookup. |
| 998 | pub trait Apply { |
| 999 | /// Apply the lookup. |
| 1000 | fn apply(&self, ctx: &mut OT::hb_ot_apply_context_t) -> Option<()>; |
| 1001 | } |
| 1002 | |
| 1003 | pub struct WouldApplyContext<'a> { |
| 1004 | pub glyphs: &'a [GlyphId], |
| 1005 | pub zero_context: bool, |
| 1006 | } |
| 1007 | |
| 1008 | pub mod OT { |
| 1009 | use super::*; |
| 1010 | use crate::hb::set_digest::{hb_set_digest_ext, hb_set_digest_t}; |
| 1011 | |
| 1012 | pub struct hb_ot_apply_context_t<'a, 'b> { |
| 1013 | pub table_index: TableIndex, |
| 1014 | pub face: &'a hb_font_t<'b>, |
| 1015 | pub buffer: &'a mut hb_buffer_t, |
| 1016 | lookup_mask: hb_mask_t, |
| 1017 | pub per_syllable: bool, |
| 1018 | pub lookup_index: LookupIndex, |
| 1019 | pub lookup_props: u32, |
| 1020 | pub nesting_level_left: usize, |
| 1021 | pub auto_zwnj: bool, |
| 1022 | pub auto_zwj: bool, |
| 1023 | pub random: bool, |
| 1024 | pub random_state: u32, |
| 1025 | pub last_base: i32, |
| 1026 | pub last_base_until: u32, |
| 1027 | pub digest: hb_set_digest_t, |
| 1028 | } |
| 1029 | |
| 1030 | impl<'a, 'b> hb_ot_apply_context_t<'a, 'b> { |
| 1031 | pub fn new( |
| 1032 | table_index: TableIndex, |
| 1033 | face: &'a hb_font_t<'b>, |
| 1034 | buffer: &'a mut hb_buffer_t, |
| 1035 | ) -> Self { |
| 1036 | let buffer_digest = buffer.digest(); |
| 1037 | Self { |
| 1038 | table_index, |
| 1039 | face, |
| 1040 | buffer, |
| 1041 | lookup_mask: 1, |
| 1042 | per_syllable: false, |
| 1043 | lookup_index: u16::MAX, |
| 1044 | lookup_props: 0, |
| 1045 | nesting_level_left: MAX_NESTING_LEVEL, |
| 1046 | auto_zwnj: true, |
| 1047 | auto_zwj: true, |
| 1048 | random: false, |
| 1049 | random_state: 1, |
| 1050 | last_base: -1, |
| 1051 | last_base_until: 0, |
| 1052 | digest: buffer_digest, |
| 1053 | } |
| 1054 | } |
| 1055 | |
| 1056 | pub fn random_number(&mut self) -> u32 { |
| 1057 | // http://www.cplusplus.com/reference/random/minstd_rand/ |
| 1058 | self.random_state = self.random_state.wrapping_mul(48271) % 2147483647; |
| 1059 | self.random_state |
| 1060 | } |
| 1061 | |
| 1062 | pub fn set_lookup_mask(&mut self, mask: hb_mask_t) { |
| 1063 | self.lookup_mask = mask; |
| 1064 | self.last_base = -1; |
| 1065 | self.last_base_until = 0; |
| 1066 | } |
| 1067 | |
| 1068 | pub fn lookup_mask(&self) -> hb_mask_t { |
| 1069 | self.lookup_mask |
| 1070 | } |
| 1071 | |
| 1072 | pub fn recurse(&mut self, sub_lookup_index: LookupIndex) -> Option<()> { |
| 1073 | if self.nesting_level_left == 0 { |
| 1074 | self.buffer.shaping_failed = true; |
| 1075 | return None; |
| 1076 | } |
| 1077 | |
| 1078 | self.buffer.max_ops -= 1; |
| 1079 | if self.buffer.max_ops < 0 { |
| 1080 | self.buffer.shaping_failed = true; |
| 1081 | return None; |
| 1082 | } |
| 1083 | |
| 1084 | self.nesting_level_left -= 1; |
| 1085 | let saved_props = self.lookup_props; |
| 1086 | let saved_index = self.lookup_index; |
| 1087 | |
| 1088 | self.lookup_index = sub_lookup_index; |
| 1089 | let applied = match self.table_index { |
| 1090 | TableIndex::GSUB => self |
| 1091 | .face |
| 1092 | .gsub |
| 1093 | .as_ref() |
| 1094 | .and_then(|table| table.get_lookup(sub_lookup_index)) |
| 1095 | .and_then(|lookup| { |
| 1096 | self.lookup_props = lookup.props(); |
| 1097 | lookup.apply(self) |
| 1098 | }), |
| 1099 | TableIndex::GPOS => self |
| 1100 | .face |
| 1101 | .gpos |
| 1102 | .as_ref() |
| 1103 | .and_then(|table| table.get_lookup(sub_lookup_index)) |
| 1104 | .and_then(|lookup| { |
| 1105 | self.lookup_props = lookup.props(); |
| 1106 | lookup.apply(self) |
| 1107 | }), |
| 1108 | }; |
| 1109 | |
| 1110 | self.lookup_props = saved_props; |
| 1111 | self.lookup_index = saved_index; |
| 1112 | self.nesting_level_left += 1; |
| 1113 | applied |
| 1114 | } |
| 1115 | |
| 1116 | pub fn check_glyph_property(&self, info: &hb_glyph_info_t, match_props: u32) -> bool { |
| 1117 | let glyph_props = info.glyph_props(); |
| 1118 | |
| 1119 | // Lookup flags are lower 16-bit of match props. |
| 1120 | let lookup_flags = match_props as u16; |
| 1121 | |
| 1122 | // Not covered, if, for example, glyph class is ligature and |
| 1123 | // match_props includes LookupFlags::IgnoreLigatures |
| 1124 | if glyph_props & lookup_flags & lookup_flags::IGNORE_FLAGS != 0 { |
| 1125 | return false; |
| 1126 | } |
| 1127 | |
| 1128 | if glyph_props & GlyphPropsFlags::MARK.bits() != 0 { |
| 1129 | // If using mark filtering sets, the high short of |
| 1130 | // match_props has the set index. |
| 1131 | if lookup_flags & lookup_flags::USE_MARK_FILTERING_SET != 0 { |
| 1132 | let set_index = (match_props >> 16) as u16; |
| 1133 | // TODO: harfbuzz uses a digest here to speed things up if HB_NO_GDEF_CACHE |
| 1134 | // is enabled. But a bit harder to implement for us since it's taken care of by |
| 1135 | // ttf-parser |
| 1136 | if let Some(table) = self.face.tables().gdef { |
| 1137 | return table.is_mark_glyph(info.as_glyph(), Some(set_index)); |
| 1138 | } else { |
| 1139 | return false; |
| 1140 | } |
| 1141 | } |
| 1142 | |
| 1143 | // The second byte of match_props has the meaning |
| 1144 | // "ignore marks of attachment type different than |
| 1145 | // the attachment type specified." |
| 1146 | if lookup_flags & lookup_flags::MARK_ATTACHMENT_TYPE_MASK != 0 { |
| 1147 | return (lookup_flags & lookup_flags::MARK_ATTACHMENT_TYPE_MASK) |
| 1148 | == (glyph_props & lookup_flags::MARK_ATTACHMENT_TYPE_MASK); |
| 1149 | } |
| 1150 | } |
| 1151 | |
| 1152 | true |
| 1153 | } |
| 1154 | |
| 1155 | fn set_glyph_class( |
| 1156 | &mut self, |
| 1157 | glyph_id: GlyphId, |
| 1158 | class_guess: GlyphPropsFlags, |
| 1159 | ligature: bool, |
| 1160 | component: bool, |
| 1161 | ) { |
| 1162 | self.digest.add(glyph_id); |
| 1163 | |
| 1164 | let cur = self.buffer.cur_mut(0); |
| 1165 | let mut props = cur.glyph_props(); |
| 1166 | |
| 1167 | props |= GlyphPropsFlags::SUBSTITUTED.bits(); |
| 1168 | |
| 1169 | if ligature { |
| 1170 | props |= GlyphPropsFlags::LIGATED.bits(); |
| 1171 | // In the only place that the MULTIPLIED bit is used, Uniscribe |
| 1172 | // seems to only care about the "last" transformation between |
| 1173 | // Ligature and Multiple substitutions. Ie. if you ligate, expand, |
| 1174 | // and ligate again, it forgives the multiplication and acts as |
| 1175 | // if only ligation happened. As such, clear MULTIPLIED bit. |
| 1176 | props &= !GlyphPropsFlags::MULTIPLIED.bits(); |
| 1177 | } |
| 1178 | |
| 1179 | if component { |
| 1180 | props |= GlyphPropsFlags::MULTIPLIED.bits(); |
| 1181 | } |
| 1182 | |
| 1183 | let has_glyph_classes = self |
| 1184 | .face |
| 1185 | .tables() |
| 1186 | .gdef |
| 1187 | .map_or(false, |table| table.has_glyph_classes()); |
| 1188 | |
| 1189 | if has_glyph_classes { |
| 1190 | props &= GlyphPropsFlags::PRESERVE.bits(); |
| 1191 | cur.set_glyph_props(props | self.face.glyph_props(glyph_id)); |
| 1192 | } else if !class_guess.is_empty() { |
| 1193 | props &= GlyphPropsFlags::PRESERVE.bits(); |
| 1194 | cur.set_glyph_props(props | class_guess.bits()); |
| 1195 | } else { |
| 1196 | cur.set_glyph_props(props); |
| 1197 | } |
| 1198 | } |
| 1199 | |
| 1200 | pub fn replace_glyph(&mut self, glyph_id: GlyphId) { |
| 1201 | self.set_glyph_class(glyph_id, GlyphPropsFlags::empty(), false, false); |
| 1202 | self.buffer.replace_glyph(u32::from(glyph_id.0)); |
| 1203 | } |
| 1204 | |
| 1205 | pub fn replace_glyph_inplace(&mut self, glyph_id: GlyphId) { |
| 1206 | self.set_glyph_class(glyph_id, GlyphPropsFlags::empty(), false, false); |
| 1207 | self.buffer.cur_mut(0).glyph_id = u32::from(glyph_id.0); |
| 1208 | } |
| 1209 | |
| 1210 | pub fn replace_glyph_with_ligature( |
| 1211 | &mut self, |
| 1212 | glyph_id: GlyphId, |
| 1213 | class_guess: GlyphPropsFlags, |
| 1214 | ) { |
| 1215 | self.set_glyph_class(glyph_id, class_guess, true, false); |
| 1216 | self.buffer.replace_glyph(u32::from(glyph_id.0)); |
| 1217 | } |
| 1218 | |
| 1219 | pub fn output_glyph_for_component( |
| 1220 | &mut self, |
| 1221 | glyph_id: GlyphId, |
| 1222 | class_guess: GlyphPropsFlags, |
| 1223 | ) { |
| 1224 | self.set_glyph_class(glyph_id, class_guess, false, true); |
| 1225 | self.buffer.output_glyph(u32::from(glyph_id.0)); |
| 1226 | } |
| 1227 | } |
| 1228 | } |
| 1229 | |
| 1230 | use OT::hb_ot_apply_context_t; |
| 1231 | |
| 1232 | pub fn ligate_input( |
| 1233 | ctx: &mut hb_ot_apply_context_t, |
| 1234 | // Including the first glyph |
| 1235 | count: usize, |
| 1236 | // Including the first glyph |
| 1237 | match_positions: &smallvec::SmallVec<[usize; 4]>, |
| 1238 | match_end: usize, |
| 1239 | total_component_count: u8, |
| 1240 | lig_glyph: GlyphId, |
| 1241 | ) { |
| 1242 | // - If a base and one or more marks ligate, consider that as a base, NOT |
| 1243 | // ligature, such that all following marks can still attach to it. |
| 1244 | // https://github.com/harfbuzz/harfbuzz/issues/1109 |
| 1245 | // |
| 1246 | // - If all components of the ligature were marks, we call this a mark ligature. |
| 1247 | // If it *is* a mark ligature, we don't allocate a new ligature id, and leave |
| 1248 | // the ligature to keep its old ligature id. This will allow it to attach to |
| 1249 | // a base ligature in GPOS. Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH, |
| 1250 | // and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA with a |
| 1251 | // ligature id and component value of 2. Then if SHADDA,FATHA form a ligature |
| 1252 | // later, we don't want them to lose their ligature id/component, otherwise |
| 1253 | // GPOS will fail to correctly position the mark ligature on top of the |
| 1254 | // LAM,LAM,HEH ligature. See: |
| 1255 | // https://bugzilla.gnome.org/show_bug.cgi?id=676343 |
| 1256 | // |
| 1257 | // - If a ligature is formed of components that some of which are also ligatures |
| 1258 | // themselves, and those ligature components had marks attached to *their* |
| 1259 | // components, we have to attach the marks to the new ligature component |
| 1260 | // positions! Now *that*'s tricky! And these marks may be following the |
| 1261 | // last component of the whole sequence, so we should loop forward looking |
| 1262 | // for them and update them. |
| 1263 | // |
| 1264 | // Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a |
| 1265 | // 'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature |
| 1266 | // id and component == 1. Now, during 'liga', the LAM and the LAM-HEH ligature |
| 1267 | // form a LAM-LAM-HEH ligature. We need to reassign the SHADDA and FATHA to |
| 1268 | // the new ligature with a component value of 2. |
| 1269 | // |
| 1270 | // This in fact happened to a font... See: |
| 1271 | // https://bugzilla.gnome.org/show_bug.cgi?id=437633 |
| 1272 | // |
| 1273 | |
| 1274 | let mut buffer = &mut ctx.buffer; |
| 1275 | buffer.merge_clusters(buffer.idx, match_end); |
| 1276 | |
| 1277 | let mut is_base_ligature = _hb_glyph_info_is_base_glyph(&buffer.info[match_positions[0]]); |
| 1278 | let mut is_mark_ligature = _hb_glyph_info_is_mark(&buffer.info[match_positions[0]]); |
| 1279 | for i in 1..count { |
| 1280 | if !_hb_glyph_info_is_mark(&buffer.info[match_positions[i]]) { |
| 1281 | is_base_ligature = false; |
| 1282 | is_mark_ligature = false; |
| 1283 | } |
| 1284 | } |
| 1285 | |
| 1286 | let is_ligature = !is_base_ligature && !is_mark_ligature; |
| 1287 | let class = if is_ligature { |
| 1288 | GlyphPropsFlags::LIGATURE |
| 1289 | } else { |
| 1290 | GlyphPropsFlags::empty() |
| 1291 | }; |
| 1292 | let lig_id = if is_ligature { |
| 1293 | buffer.allocate_lig_id() |
| 1294 | } else { |
| 1295 | 0 |
| 1296 | }; |
| 1297 | let first = buffer.cur_mut(0); |
| 1298 | let mut last_lig_id = _hb_glyph_info_get_lig_id(first); |
| 1299 | let mut last_num_comps = _hb_glyph_info_get_lig_num_comps(first); |
| 1300 | let mut comps_so_far = last_num_comps; |
| 1301 | |
| 1302 | if is_ligature { |
| 1303 | _hb_glyph_info_set_lig_props_for_ligature(first, lig_id, total_component_count); |
| 1304 | if _hb_glyph_info_get_general_category(first) |
| 1305 | == hb_unicode_general_category_t::NonspacingMark |
| 1306 | { |
| 1307 | _hb_glyph_info_set_general_category(first, hb_unicode_general_category_t::OtherLetter); |
| 1308 | } |
| 1309 | } |
| 1310 | |
| 1311 | ctx.replace_glyph_with_ligature(lig_glyph, class); |
| 1312 | buffer = &mut ctx.buffer; |
| 1313 | |
| 1314 | for i in 1..count { |
| 1315 | while buffer.idx < match_positions[i] && buffer.successful { |
| 1316 | if is_ligature { |
| 1317 | let cur = buffer.cur_mut(0); |
| 1318 | let mut this_comp = _hb_glyph_info_get_lig_comp(cur); |
| 1319 | if this_comp == 0 { |
| 1320 | this_comp = last_num_comps; |
| 1321 | } |
| 1322 | // Avoid the potential for a wrap-around bug when subtracting from an unsigned integer |
| 1323 | // c.f. https://github.com/harfbuzz/rustybuzz/issues/142 |
| 1324 | assert!(comps_so_far >= last_num_comps); |
| 1325 | let new_lig_comp = comps_so_far - last_num_comps + this_comp.min(last_num_comps); |
| 1326 | _hb_glyph_info_set_lig_props_for_mark(cur, lig_id, new_lig_comp); |
| 1327 | } |
| 1328 | buffer.next_glyph(); |
| 1329 | } |
| 1330 | |
| 1331 | let cur = buffer.cur(0); |
| 1332 | last_lig_id = _hb_glyph_info_get_lig_id(cur); |
| 1333 | last_num_comps = _hb_glyph_info_get_lig_num_comps(cur); |
| 1334 | comps_so_far += last_num_comps; |
| 1335 | |
| 1336 | // Skip the base glyph. |
| 1337 | buffer.idx += 1; |
| 1338 | } |
| 1339 | |
| 1340 | if !is_mark_ligature && last_lig_id != 0 { |
| 1341 | // Re-adjust components for any marks following. |
| 1342 | for i in buffer.idx..buffer.len { |
| 1343 | let info = &mut buffer.info[i]; |
| 1344 | if last_lig_id != _hb_glyph_info_get_lig_id(info) { |
| 1345 | break; |
| 1346 | } |
| 1347 | |
| 1348 | let this_comp = _hb_glyph_info_get_lig_comp(info); |
| 1349 | if this_comp == 0 { |
| 1350 | break; |
| 1351 | } |
| 1352 | |
| 1353 | // Avoid the potential for a wrap-around bug when subtracting from an unsigned integer |
| 1354 | // c.f. https://github.com/harfbuzz/rustybuzz/issues/142 |
| 1355 | assert!(comps_so_far >= last_num_comps); |
| 1356 | let new_lig_comp = comps_so_far - last_num_comps + this_comp.min(last_num_comps); |
| 1357 | _hb_glyph_info_set_lig_props_for_mark(info, lig_id, new_lig_comp) |
| 1358 | } |
| 1359 | } |
| 1360 | } |
| 1361 | |