1 | // This table has a pretty complex parsing algorithm. |
2 | // A detailed explanation can be found here: |
3 | // https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table |
4 | // https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html |
5 | // https://github.com/fonttools/fonttools/blob/a360252709a3d65f899915db0a5bd753007fdbb7/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L360 |
6 | |
7 | use core::convert::TryFrom; |
8 | |
9 | use crate::parser::{FromData, LazyArray16, Stream}; |
10 | use crate::GlyphId; |
11 | |
12 | #[derive (Clone, Copy)] |
13 | struct SubHeaderRecord { |
14 | first_code: u16, |
15 | entry_count: u16, |
16 | id_delta: i16, |
17 | id_range_offset: u16, |
18 | } |
19 | |
20 | impl FromData for SubHeaderRecord { |
21 | const SIZE: usize = 8; |
22 | |
23 | #[inline ] |
24 | fn parse(data: &[u8]) -> Option<Self> { |
25 | let mut s: Stream<'_> = Stream::new(data); |
26 | Some(SubHeaderRecord { |
27 | first_code: s.read::<u16>()?, |
28 | entry_count: s.read::<u16>()?, |
29 | id_delta: s.read::<i16>()?, |
30 | id_range_offset: s.read::<u16>()?, |
31 | }) |
32 | } |
33 | } |
34 | |
35 | /// A [format 2](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table) |
36 | /// subtable. |
37 | #[derive (Clone, Copy)] |
38 | pub struct Subtable2<'a> { |
39 | sub_header_keys: LazyArray16<'a, u16>, |
40 | sub_headers_offset: usize, |
41 | sub_headers: LazyArray16<'a, SubHeaderRecord>, |
42 | // The whole subtable data. |
43 | data: &'a [u8], |
44 | } |
45 | |
46 | impl<'a> Subtable2<'a> { |
47 | /// Parses a subtable from raw data. |
48 | pub fn parse(data: &'a [u8]) -> Option<Self> { |
49 | let mut s = Stream::new(data); |
50 | s.skip::<u16>(); // format |
51 | s.skip::<u16>(); // length |
52 | s.skip::<u16>(); // language |
53 | let sub_header_keys = s.read_array16::<u16>(256)?; |
54 | // The maximum index in a sub_header_keys is a sub_headers count. |
55 | let sub_headers_count = sub_header_keys.into_iter().map(|n| n / 8).max()? + 1; |
56 | |
57 | // Remember sub_headers offset before reading. Will be used later. |
58 | let sub_headers_offset = s.offset(); |
59 | let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?; |
60 | |
61 | Some(Self { |
62 | sub_header_keys, |
63 | sub_headers_offset, |
64 | sub_headers, |
65 | data, |
66 | }) |
67 | } |
68 | |
69 | /// Returns a glyph index for a code point. |
70 | /// |
71 | /// Returns `None` when `code_point` is larger than `u16`. |
72 | pub fn glyph_index(&self, code_point: u32) -> Option<GlyphId> { |
73 | // This subtable supports code points only in a u16 range. |
74 | let code_point = u16::try_from(code_point).ok()?; |
75 | |
76 | let code_point = code_point; |
77 | let high_byte = code_point >> 8; |
78 | let low_byte = code_point & 0x00FF; |
79 | |
80 | let i = if code_point < 0xff { |
81 | // 'SubHeader 0 is special: it is used for single-byte character codes.' |
82 | 0 |
83 | } else { |
84 | // 'Array that maps high bytes to subHeaders: value is subHeader index × 8.' |
85 | self.sub_header_keys.get(high_byte)? / 8 |
86 | }; |
87 | |
88 | let sub_header = self.sub_headers.get(i)?; |
89 | |
90 | let first_code = sub_header.first_code; |
91 | let range_end = first_code.checked_add(sub_header.entry_count)?; |
92 | if low_byte < first_code || low_byte >= range_end { |
93 | return None; |
94 | } |
95 | |
96 | // SubHeaderRecord::id_range_offset points to SubHeaderRecord::first_code |
97 | // in the glyphIndexArray. So we have to advance to our code point. |
98 | let index_offset = usize::from(low_byte.checked_sub(first_code)?) * u16::SIZE; |
99 | |
100 | // 'The value of the idRangeOffset is the number of bytes |
101 | // past the actual location of the idRangeOffset'. |
102 | let offset = self.sub_headers_offset |
103 | // Advance to required subheader. |
104 | + SubHeaderRecord::SIZE * usize::from(i + 1) |
105 | // Move back to idRangeOffset start. |
106 | - u16::SIZE |
107 | // Use defined offset. |
108 | + usize::from(sub_header.id_range_offset) |
109 | // Advance to required index in the glyphIndexArray. |
110 | + index_offset; |
111 | |
112 | let glyph: u16 = Stream::read_at(self.data, offset)?; |
113 | if glyph == 0 { |
114 | return None; |
115 | } |
116 | |
117 | u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % 65536) |
118 | .ok() |
119 | .map(GlyphId) |
120 | } |
121 | |
122 | /// Calls `f` for each codepoint defined in this table. |
123 | pub fn codepoints(&self, f: impl FnMut(u32)) { |
124 | let _ = self.codepoints_inner(f); |
125 | } |
126 | |
127 | #[inline ] |
128 | fn codepoints_inner(&self, mut f: impl FnMut(u32)) -> Option<()> { |
129 | for first_byte in 0u16..256 { |
130 | let i = self.sub_header_keys.get(first_byte)? / 8; |
131 | let sub_header = self.sub_headers.get(i)?; |
132 | let first_code = sub_header.first_code; |
133 | |
134 | if i == 0 { |
135 | // This is a single byte code. |
136 | let range_end = first_code.checked_add(sub_header.entry_count)?; |
137 | if first_byte >= first_code && first_byte < range_end { |
138 | f(u32::from(first_byte)); |
139 | } |
140 | } else { |
141 | // This is a two byte code. |
142 | let base = first_code.checked_add(first_byte << 8)?; |
143 | for k in 0..sub_header.entry_count { |
144 | let code_point = base.checked_add(k)?; |
145 | f(u32::from(code_point)); |
146 | } |
147 | } |
148 | } |
149 | |
150 | Some(()) |
151 | } |
152 | } |
153 | |
154 | impl core::fmt::Debug for Subtable2<'_> { |
155 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
156 | write!(f, "Subtable2 {{ ... }}" ) |
157 | } |
158 | } |
159 | |