1// This table has a pretty complex parsing algorithm.
2// A detailed explanation can be found here:
3// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table
4// https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html
5// https://github.com/fonttools/fonttools/blob/a360252709a3d65f899915db0a5bd753007fdbb7/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L360
6
7use core::convert::TryFrom;
8
9use crate::parser::{FromData, LazyArray16, Stream};
10use crate::GlyphId;
11
12#[derive(Clone, Copy)]
13struct SubHeaderRecord {
14 first_code: u16,
15 entry_count: u16,
16 id_delta: i16,
17 id_range_offset: u16,
18}
19
20impl FromData for SubHeaderRecord {
21 const SIZE: usize = 8;
22
23 #[inline]
24 fn parse(data: &[u8]) -> Option<Self> {
25 let mut s: Stream<'_> = Stream::new(data);
26 Some(SubHeaderRecord {
27 first_code: s.read::<u16>()?,
28 entry_count: s.read::<u16>()?,
29 id_delta: s.read::<i16>()?,
30 id_range_offset: s.read::<u16>()?,
31 })
32 }
33}
34
35/// A [format 2](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table)
36/// subtable.
37#[derive(Clone, Copy)]
38pub struct Subtable2<'a> {
39 sub_header_keys: LazyArray16<'a, u16>,
40 sub_headers_offset: usize,
41 sub_headers: LazyArray16<'a, SubHeaderRecord>,
42 // The whole subtable data.
43 data: &'a [u8],
44}
45
46impl<'a> Subtable2<'a> {
47 /// Parses a subtable from raw data.
48 pub fn parse(data: &'a [u8]) -> Option<Self> {
49 let mut s = Stream::new(data);
50 s.skip::<u16>(); // format
51 s.skip::<u16>(); // length
52 s.skip::<u16>(); // language
53 let sub_header_keys = s.read_array16::<u16>(256)?;
54 // The maximum index in a sub_header_keys is a sub_headers count.
55 let sub_headers_count = sub_header_keys.into_iter().map(|n| n / 8).max()? + 1;
56
57 // Remember sub_headers offset before reading. Will be used later.
58 let sub_headers_offset = s.offset();
59 let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?;
60
61 Some(Self {
62 sub_header_keys,
63 sub_headers_offset,
64 sub_headers,
65 data,
66 })
67 }
68
69 /// Returns a glyph index for a code point.
70 ///
71 /// Returns `None` when `code_point` is larger than `u16`.
72 pub fn glyph_index(&self, code_point: u32) -> Option<GlyphId> {
73 // This subtable supports code points only in a u16 range.
74 let code_point = u16::try_from(code_point).ok()?;
75
76 let code_point = code_point;
77 let high_byte = code_point >> 8;
78 let low_byte = code_point & 0x00FF;
79
80 let i = if code_point < 0xff {
81 // 'SubHeader 0 is special: it is used for single-byte character codes.'
82 0
83 } else {
84 // 'Array that maps high bytes to subHeaders: value is subHeader index × 8.'
85 self.sub_header_keys.get(high_byte)? / 8
86 };
87
88 let sub_header = self.sub_headers.get(i)?;
89
90 let first_code = sub_header.first_code;
91 let range_end = first_code.checked_add(sub_header.entry_count)?;
92 if low_byte < first_code || low_byte >= range_end {
93 return None;
94 }
95
96 // SubHeaderRecord::id_range_offset points to SubHeaderRecord::first_code
97 // in the glyphIndexArray. So we have to advance to our code point.
98 let index_offset = usize::from(low_byte.checked_sub(first_code)?) * u16::SIZE;
99
100 // 'The value of the idRangeOffset is the number of bytes
101 // past the actual location of the idRangeOffset'.
102 let offset = self.sub_headers_offset
103 // Advance to required subheader.
104 + SubHeaderRecord::SIZE * usize::from(i + 1)
105 // Move back to idRangeOffset start.
106 - u16::SIZE
107 // Use defined offset.
108 + usize::from(sub_header.id_range_offset)
109 // Advance to required index in the glyphIndexArray.
110 + index_offset;
111
112 let glyph: u16 = Stream::read_at(self.data, offset)?;
113 if glyph == 0 {
114 return None;
115 }
116
117 u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % 65536)
118 .ok()
119 .map(GlyphId)
120 }
121
122 /// Calls `f` for each codepoint defined in this table.
123 pub fn codepoints(&self, f: impl FnMut(u32)) {
124 let _ = self.codepoints_inner(f);
125 }
126
127 #[inline]
128 fn codepoints_inner(&self, mut f: impl FnMut(u32)) -> Option<()> {
129 for first_byte in 0u16..256 {
130 let i = self.sub_header_keys.get(first_byte)? / 8;
131 let sub_header = self.sub_headers.get(i)?;
132 let first_code = sub_header.first_code;
133
134 if i == 0 {
135 // This is a single byte code.
136 let range_end = first_code.checked_add(sub_header.entry_count)?;
137 if first_byte >= first_code && first_byte < range_end {
138 f(u32::from(first_byte));
139 }
140 } else {
141 // This is a two byte code.
142 let base = first_code.checked_add(first_byte << 8)?;
143 for k in 0..sub_header.entry_count {
144 let code_point = base.checked_add(k)?;
145 f(u32::from(code_point));
146 }
147 }
148 }
149
150 Some(())
151 }
152}
153
154impl core::fmt::Debug for Subtable2<'_> {
155 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
156 write!(f, "Subtable2 {{ ... }}")
157 }
158}
159