| 1 | // This table has a pretty complex parsing algorithm. |
| 2 | // A detailed explanation can be found here: |
| 3 | // https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table |
| 4 | // https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html |
| 5 | // https://github.com/fonttools/fonttools/blob/a360252709a3d65f899915db0a5bd753007fdbb7/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L360 |
| 6 | |
| 7 | use core::convert::TryFrom; |
| 8 | |
| 9 | use crate::parser::{FromData, LazyArray16, Stream}; |
| 10 | use crate::GlyphId; |
| 11 | |
| 12 | #[derive (Clone, Copy)] |
| 13 | struct SubHeaderRecord { |
| 14 | first_code: u16, |
| 15 | entry_count: u16, |
| 16 | id_delta: i16, |
| 17 | id_range_offset: u16, |
| 18 | } |
| 19 | |
| 20 | impl FromData for SubHeaderRecord { |
| 21 | const SIZE: usize = 8; |
| 22 | |
| 23 | #[inline ] |
| 24 | fn parse(data: &[u8]) -> Option<Self> { |
| 25 | let mut s: Stream<'_> = Stream::new(data); |
| 26 | Some(SubHeaderRecord { |
| 27 | first_code: s.read::<u16>()?, |
| 28 | entry_count: s.read::<u16>()?, |
| 29 | id_delta: s.read::<i16>()?, |
| 30 | id_range_offset: s.read::<u16>()?, |
| 31 | }) |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | /// A [format 2](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table) |
| 36 | /// subtable. |
| 37 | #[derive (Clone, Copy)] |
| 38 | pub struct Subtable2<'a> { |
| 39 | sub_header_keys: LazyArray16<'a, u16>, |
| 40 | sub_headers_offset: usize, |
| 41 | sub_headers: LazyArray16<'a, SubHeaderRecord>, |
| 42 | // The whole subtable data. |
| 43 | data: &'a [u8], |
| 44 | } |
| 45 | |
| 46 | impl<'a> Subtable2<'a> { |
| 47 | /// Parses a subtable from raw data. |
| 48 | pub fn parse(data: &'a [u8]) -> Option<Self> { |
| 49 | let mut s = Stream::new(data); |
| 50 | s.skip::<u16>(); // format |
| 51 | s.skip::<u16>(); // length |
| 52 | s.skip::<u16>(); // language |
| 53 | let sub_header_keys = s.read_array16::<u16>(256)?; |
| 54 | // The maximum index in a sub_header_keys is a sub_headers count. |
| 55 | let sub_headers_count = sub_header_keys.into_iter().map(|n| n / 8).max()? + 1; |
| 56 | |
| 57 | // Remember sub_headers offset before reading. Will be used later. |
| 58 | let sub_headers_offset = s.offset(); |
| 59 | let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?; |
| 60 | |
| 61 | Some(Self { |
| 62 | sub_header_keys, |
| 63 | sub_headers_offset, |
| 64 | sub_headers, |
| 65 | data, |
| 66 | }) |
| 67 | } |
| 68 | |
| 69 | /// Returns a glyph index for a code point. |
| 70 | /// |
| 71 | /// Returns `None` when `code_point` is larger than `u16`. |
| 72 | pub fn glyph_index(&self, code_point: u32) -> Option<GlyphId> { |
| 73 | // This subtable supports code points only in a u16 range. |
| 74 | let code_point = u16::try_from(code_point).ok()?; |
| 75 | |
| 76 | let code_point = code_point; |
| 77 | let high_byte = code_point >> 8; |
| 78 | let low_byte = code_point & 0x00FF; |
| 79 | |
| 80 | let i = if code_point < 0xff { |
| 81 | // 'SubHeader 0 is special: it is used for single-byte character codes.' |
| 82 | 0 |
| 83 | } else { |
| 84 | // 'Array that maps high bytes to subHeaders: value is subHeader index × 8.' |
| 85 | self.sub_header_keys.get(high_byte)? / 8 |
| 86 | }; |
| 87 | |
| 88 | let sub_header = self.sub_headers.get(i)?; |
| 89 | |
| 90 | let first_code = sub_header.first_code; |
| 91 | let range_end = first_code.checked_add(sub_header.entry_count)?; |
| 92 | if low_byte < first_code || low_byte >= range_end { |
| 93 | return None; |
| 94 | } |
| 95 | |
| 96 | // SubHeaderRecord::id_range_offset points to SubHeaderRecord::first_code |
| 97 | // in the glyphIndexArray. So we have to advance to our code point. |
| 98 | let index_offset = usize::from(low_byte.checked_sub(first_code)?) * u16::SIZE; |
| 99 | |
| 100 | // 'The value of the idRangeOffset is the number of bytes |
| 101 | // past the actual location of the idRangeOffset'. |
| 102 | let offset = self.sub_headers_offset |
| 103 | // Advance to required subheader. |
| 104 | + SubHeaderRecord::SIZE * usize::from(i + 1) |
| 105 | // Move back to idRangeOffset start. |
| 106 | - u16::SIZE |
| 107 | // Use defined offset. |
| 108 | + usize::from(sub_header.id_range_offset) |
| 109 | // Advance to required index in the glyphIndexArray. |
| 110 | + index_offset; |
| 111 | |
| 112 | let glyph: u16 = Stream::read_at(self.data, offset)?; |
| 113 | if glyph == 0 { |
| 114 | return None; |
| 115 | } |
| 116 | |
| 117 | u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % 65536) |
| 118 | .ok() |
| 119 | .map(GlyphId) |
| 120 | } |
| 121 | |
| 122 | /// Calls `f` for each codepoint defined in this table. |
| 123 | pub fn codepoints(&self, f: impl FnMut(u32)) { |
| 124 | let _ = self.codepoints_inner(f); |
| 125 | } |
| 126 | |
| 127 | #[inline ] |
| 128 | fn codepoints_inner(&self, mut f: impl FnMut(u32)) -> Option<()> { |
| 129 | for first_byte in 0u16..256 { |
| 130 | let i = self.sub_header_keys.get(first_byte)? / 8; |
| 131 | let sub_header = self.sub_headers.get(i)?; |
| 132 | let first_code = sub_header.first_code; |
| 133 | |
| 134 | if i == 0 { |
| 135 | // This is a single byte code. |
| 136 | let range_end = first_code.checked_add(sub_header.entry_count)?; |
| 137 | if first_byte >= first_code && first_byte < range_end { |
| 138 | f(u32::from(first_byte)); |
| 139 | } |
| 140 | } else { |
| 141 | // This is a two byte code. |
| 142 | let base = first_code.checked_add(first_byte << 8)?; |
| 143 | for k in 0..sub_header.entry_count { |
| 144 | let code_point = base.checked_add(k)?; |
| 145 | f(u32::from(code_point)); |
| 146 | } |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | Some(()) |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | impl core::fmt::Debug for Subtable2<'_> { |
| 155 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| 156 | write!(f, "Subtable2 {{ ... }}" ) |
| 157 | } |
| 158 | } |
| 159 | |