1 | // This table has a pretty complex parsing algorithm. |
2 | // A detailed explanation can be found here: |
3 | // https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table |
4 | // https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html |
5 | // https://github.com/fonttools/fonttools/blob/a360252709a3d65f899915db0a5bd753007fdbb7/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L360 |
6 | |
7 | use core::convert::TryFrom; |
8 | |
9 | use crate::parser::{FromData, LazyArray16, Stream}; |
10 | use crate::GlyphId; |
11 | |
12 | #[derive (Clone, Copy)] |
13 | struct SubHeaderRecord { |
14 | first_code: u16, |
15 | entry_count: u16, |
16 | id_delta: i16, |
17 | id_range_offset: u16, |
18 | } |
19 | |
20 | impl FromData for SubHeaderRecord { |
21 | const SIZE: usize = 8; |
22 | |
23 | #[inline ] |
24 | fn parse(data: &[u8]) -> Option<Self> { |
25 | let mut s: Stream<'_> = Stream::new(data); |
26 | Some(SubHeaderRecord { |
27 | first_code: s.read::<u16>()?, |
28 | entry_count: s.read::<u16>()?, |
29 | id_delta: s.read::<i16>()?, |
30 | id_range_offset: s.read::<u16>()?, |
31 | }) |
32 | } |
33 | } |
34 | |
35 | /// A [format 2](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table) |
36 | /// subtable. |
37 | #[derive (Clone, Copy)] |
38 | pub struct Subtable2<'a> { |
39 | sub_header_keys: LazyArray16<'a, u16>, |
40 | sub_headers_offset: usize, |
41 | sub_headers: LazyArray16<'a, SubHeaderRecord>, |
42 | // The whole subtable data. |
43 | data: &'a [u8], |
44 | } |
45 | |
46 | impl<'a> Subtable2<'a> { |
47 | /// Parses a subtable from raw data. |
48 | pub fn parse(data: &'a [u8]) -> Option<Self> { |
49 | let mut s = Stream::new(data); |
50 | s.skip::<u16>(); // format |
51 | s.skip::<u16>(); // length |
52 | s.skip::<u16>(); // language |
53 | let sub_header_keys = s.read_array16::<u16>(256)?; |
54 | // The maximum index in a sub_header_keys is a sub_headers count. |
55 | let sub_headers_count = sub_header_keys.into_iter().map(|n| n / 8).max()? + 1; |
56 | |
57 | // Remember sub_headers offset before reading. Will be used later. |
58 | let sub_headers_offset = s.offset(); |
59 | let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?; |
60 | |
61 | Some(Self { |
62 | sub_header_keys, |
63 | sub_headers_offset, |
64 | sub_headers, |
65 | data, |
66 | }) |
67 | } |
68 | |
69 | /// Returns a glyph index for a code point. |
70 | /// |
71 | /// Returns `None` when `code_point` is larger than `u16`. |
72 | pub fn glyph_index(&self, code_point: u32) -> Option<GlyphId> { |
73 | // This subtable supports code points only in a u16 range. |
74 | let code_point = u16::try_from(code_point).ok()?; |
75 | let high_byte = code_point >> 8; |
76 | let low_byte = code_point & 0x00FF; |
77 | |
78 | let i = if code_point < 0xff { |
79 | // 'SubHeader 0 is special: it is used for single-byte character codes.' |
80 | 0 |
81 | } else { |
82 | // 'Array that maps high bytes to subHeaders: value is subHeader index × 8.' |
83 | self.sub_header_keys.get(high_byte)? / 8 |
84 | }; |
85 | |
86 | let sub_header = self.sub_headers.get(i)?; |
87 | |
88 | let first_code = sub_header.first_code; |
89 | let range_end = first_code.checked_add(sub_header.entry_count)?; |
90 | if low_byte < first_code || low_byte >= range_end { |
91 | return None; |
92 | } |
93 | |
94 | // SubHeaderRecord::id_range_offset points to SubHeaderRecord::first_code |
95 | // in the glyphIndexArray. So we have to advance to our code point. |
96 | let index_offset = usize::from(low_byte.checked_sub(first_code)?) * u16::SIZE; |
97 | |
98 | // 'The value of the idRangeOffset is the number of bytes |
99 | // past the actual location of the idRangeOffset'. |
100 | let offset = self.sub_headers_offset |
101 | // Advance to required subheader. |
102 | + SubHeaderRecord::SIZE * usize::from(i + 1) |
103 | // Move back to idRangeOffset start. |
104 | - u16::SIZE |
105 | // Use defined offset. |
106 | + usize::from(sub_header.id_range_offset) |
107 | // Advance to required index in the glyphIndexArray. |
108 | + index_offset; |
109 | |
110 | let glyph: u16 = Stream::read_at(self.data, offset)?; |
111 | if glyph == 0 { |
112 | return None; |
113 | } |
114 | |
115 | u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % 65536) |
116 | .ok() |
117 | .map(GlyphId) |
118 | } |
119 | |
120 | /// Calls `f` for each codepoint defined in this table. |
121 | pub fn codepoints(&self, f: impl FnMut(u32)) { |
122 | let _ = self.codepoints_inner(f); |
123 | } |
124 | |
125 | #[inline ] |
126 | fn codepoints_inner(&self, mut f: impl FnMut(u32)) -> Option<()> { |
127 | for first_byte in 0u16..256 { |
128 | let i = self.sub_header_keys.get(first_byte)? / 8; |
129 | let sub_header = self.sub_headers.get(i)?; |
130 | let first_code = sub_header.first_code; |
131 | |
132 | if i == 0 { |
133 | // This is a single byte code. |
134 | let range_end = first_code.checked_add(sub_header.entry_count)?; |
135 | if first_byte >= first_code && first_byte < range_end { |
136 | f(u32::from(first_byte)); |
137 | } |
138 | } else { |
139 | // This is a two byte code. |
140 | let base = first_code.checked_add(first_byte << 8)?; |
141 | for k in 0..sub_header.entry_count { |
142 | let code_point = base.checked_add(k)?; |
143 | f(u32::from(code_point)); |
144 | } |
145 | } |
146 | } |
147 | |
148 | Some(()) |
149 | } |
150 | } |
151 | |
152 | impl core::fmt::Debug for Subtable2<'_> { |
153 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
154 | write!(f, "Subtable2 {{ ... }}" ) |
155 | } |
156 | } |
157 | |