format2.rs source code [crates/ttf_parser/src/tables/cmap/format2.rs]

1	// This table has a pretty complex parsing algorithm.
2	// A detailed explanation can be found here:
3	// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table
4	// https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html
5	// https://github.com/fonttools/fonttools/blob/a360252709a3d65f899915db0a5bd753007fdbb7/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L360
6
7	use core::convert::TryFrom;
8
9	use crate::parser::{FromData, LazyArray16, Stream};
10	use crate::GlyphId;
11
12	#[derive(Clone, Copy)]
13	struct SubHeaderRecord {
14	first_code: u16,
15	entry_count: u16,
16	id_delta: i16,
17	id_range_offset: u16,
18	}
19
20	impl FromData for SubHeaderRecord {
21	const SIZE: usize = `8`;
22
23	#[inline]
24	fn parse(data: &[u8]) -> Option<Self> {
25	let mut s: Stream<'_> = Stream::new(data);
26	Some(SubHeaderRecord {
27	first_code: s.read::<u16>()?,
28	entry_count: s.read::<u16>()?,
29	id_delta: s.read::<i16>()?,
30	id_range_offset: s.read::<u16>()?,
31	})
32	}
33	}
34
35	/// A [format 2](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table)
36	/// subtable.
37	#[derive(Clone, Copy)]
38	pub struct Subtable2<'a> {
39	sub_header_keys: LazyArray16<'a, u16>,
40	sub_headers_offset: usize,
41	sub_headers: LazyArray16<'a, SubHeaderRecord>,
42	// The whole subtable data.
43	data: &'a [u8],
44	}
45
46	impl<'a> Subtable2<'a> {
47	/// Parses a subtable from raw data.
48	pub fn parse(data: &'a [u8]) -> Option<Self> {
49	let mut s = Stream::new(data);
50	s.skip::<u16>(); // format
51	s.skip::<u16>(); // length
52	s.skip::<u16>(); // language
53	let sub_header_keys = s.read_array16::<u16>(`256`)?;
54	// The maximum index in a sub_header_keys is a sub_headers count.
55	let sub_headers_count = sub_header_keys.into_iter().map(\|n\| n / `8`).max()? + `1`;
56
57	// Remember sub_headers offset before reading. Will be used later.
58	let sub_headers_offset = s.offset();
59	let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?;
60
61	Some(Self {
62	sub_header_keys,
63	sub_headers_offset,
64	sub_headers,
65	data,
66	})
67	}
68
69	/// Returns a glyph index for a code point.
70	///
71	/// Returns `None` when `code_point` is larger than `u16`.
72	pub fn glyph_index(&self, code_point: u32) -> Option<GlyphId> {
73	// This subtable supports code points only in a u16 range.
74	let code_point = u16::try_from(code_point).ok()?;
75	let high_byte = code_point >> `8`;
76	let low_byte = code_point & `0x00FF`;
77
78	let i = if code_point < `0xff` {
79	// 'SubHeader 0 is special: it is used for single-byte character codes.'
80	`0`
81	} else {
82	// 'Array that maps high bytes to subHeaders: value is subHeader index × 8.'
83	self.sub_header_keys.get(high_byte)? / `8`
84	};
85
86	let sub_header = self.sub_headers.get(i)?;
87
88	let first_code = sub_header.first_code;
89	let range_end = first_code.checked_add(sub_header.entry_count)?;
90	if low_byte < first_code \|\| low_byte >= range_end {
91	return None;
92	}
93
94	// SubHeaderRecord::id_range_offset points to SubHeaderRecord::first_code
95	// in the glyphIndexArray. So we have to advance to our code point.
96	let index_offset = usize::from(low_byte.checked_sub(first_code)?) * u16::SIZE;
97
98	// 'The value of the idRangeOffset is the number of bytes
99	// past the actual location of the idRangeOffset'.
100	let offset = self.sub_headers_offset
101	// Advance to required subheader.
102	+ SubHeaderRecord::SIZE * usize::from(i + `1`)
103	// Move back to idRangeOffset start.
104	- u16::SIZE
105	// Use defined offset.
106	+ usize::from(sub_header.id_range_offset)
107	// Advance to required index in the glyphIndexArray.
108	+ index_offset;
109
110	let glyph: u16 = Stream::read_at(self.data, offset)?;
111	if glyph == `0` {
112	return None;
113	}
114
115	u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % `65536`)
116	.ok()
117	.map(GlyphId)
118	}
119
120	/// Calls `f` for each codepoint defined in this table.
121	pub fn codepoints(&self, f: impl FnMut(u32)) {
122	let _ = self.codepoints_inner(f);
123	}
124
125	#[inline]
126	fn codepoints_inner(&self, mut f: impl FnMut(u32)) -> Option<()> {
127	for first_byte in `0u16`..`256` {
128	let i = self.sub_header_keys.get(first_byte)? / `8`;
129	let sub_header = self.sub_headers.get(i)?;
130	let first_code = sub_header.first_code;
131
132	if i == `0` {
133	// This is a single byte code.
134	let range_end = first_code.checked_add(sub_header.entry_count)?;
135	if first_byte >= first_code && first_byte < range_end {
136	f(u32::from(first_byte));
137	}
138	} else {
139	// This is a two byte code.
140	let base = first_code.checked_add(first_byte << `8`)?;
141	for k in `0`..sub_header.entry_count {
142	let code_point = base.checked_add(k)?;
143	f(u32::from(code_point));
144	}
145	}
146	}
147
148	Some(())
149	}
150	}
151
152	impl core::fmt::Debug for Subtable2<'_> {
153	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
154	write!(f, "Subtable2 `{{` ... `}}`")
155	}
156	}
157