format2.rs source code [crates/ttf-parser-0.20.0/src/tables/cmap/format2.rs]

1	// This table has a pretty complex parsing algorithm.
2	// A detailed explanation can be found here:
3	// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table
4	// https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html
5	// https://github.com/fonttools/fonttools/blob/a360252709a3d65f899915db0a5bd753007fdbb7/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L360
6
7	use core::convert::TryFrom;
8
9	use crate::parser::{FromData, LazyArray16, Stream};
10	use crate::GlyphId;
11
12	#[derive(Clone, Copy)]
13	struct SubHeaderRecord {
14	first_code: u16,
15	entry_count: u16,
16	id_delta: i16,
17	id_range_offset: u16,
18	}
19
20	impl FromData for SubHeaderRecord {
21	const SIZE: usize = `8`;
22
23	#[inline]
24	fn parse(data: &[u8]) -> Option<Self> {
25	let mut s: Stream<'_> = Stream::new(data);
26	Some(SubHeaderRecord {
27	first_code: s.read::<u16>()?,
28	entry_count: s.read::<u16>()?,
29	id_delta: s.read::<i16>()?,
30	id_range_offset: s.read::<u16>()?,
31	})
32	}
33	}
34
35	/// A [format 2](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table)
36	/// subtable.
37	#[derive(Clone, Copy)]
38	pub struct Subtable2<'a> {
39	sub_header_keys: LazyArray16<'a, u16>,
40	sub_headers_offset: usize,
41	sub_headers: LazyArray16<'a, SubHeaderRecord>,
42	// The whole subtable data.
43	data: &'a [u8],
44	}
45
46	impl<'a> Subtable2<'a> {
47	/// Parses a subtable from raw data.
48	pub fn parse(data: &'a [u8]) -> Option<Self> {
49	let mut s = Stream::new(data);
50	s.skip::<u16>(); // format
51	s.skip::<u16>(); // length
52	s.skip::<u16>(); // language
53	let sub_header_keys = s.read_array16::<u16>(`256`)?;
54	// The maximum index in a sub_header_keys is a sub_headers count.
55	let sub_headers_count = sub_header_keys.into_iter().map(\|n\| n / `8`).max()? + `1`;
56
57	// Remember sub_headers offset before reading. Will be used later.
58	let sub_headers_offset = s.offset();
59	let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?;
60
61	Some(Self {
62	sub_header_keys,
63	sub_headers_offset,
64	sub_headers,
65	data,
66	})
67	}
68
69	/// Returns a glyph index for a code point.
70	///
71	/// Returns `None` when `code_point` is larger than `u16`.
72	pub fn glyph_index(&self, code_point: u32) -> Option<GlyphId> {
73	// This subtable supports code points only in a u16 range.
74	let code_point = u16::try_from(code_point).ok()?;
75
76	let code_point = code_point;
77	let high_byte = code_point >> `8`;
78	let low_byte = code_point & `0x00FF`;
79
80	let i = if code_point < `0xff` {
81	// 'SubHeader 0 is special: it is used for single-byte character codes.'
82	`0`
83	} else {
84	// 'Array that maps high bytes to subHeaders: value is subHeader index × 8.'
85	self.sub_header_keys.get(high_byte)? / `8`
86	};
87
88	let sub_header = self.sub_headers.get(i)?;
89
90	let first_code = sub_header.first_code;
91	let range_end = first_code.checked_add(sub_header.entry_count)?;
92	if low_byte < first_code \|\| low_byte >= range_end {
93	return None;
94	}
95
96	// SubHeaderRecord::id_range_offset points to SubHeaderRecord::first_code
97	// in the glyphIndexArray. So we have to advance to our code point.
98	let index_offset = usize::from(low_byte.checked_sub(first_code)?) * u16::SIZE;
99
100	// 'The value of the idRangeOffset is the number of bytes
101	// past the actual location of the idRangeOffset'.
102	let offset = self.sub_headers_offset
103	// Advance to required subheader.
104	+ SubHeaderRecord::SIZE * usize::from(i + `1`)
105	// Move back to idRangeOffset start.
106	- u16::SIZE
107	// Use defined offset.
108	+ usize::from(sub_header.id_range_offset)
109	// Advance to required index in the glyphIndexArray.
110	+ index_offset;
111
112	let glyph: u16 = Stream::read_at(self.data, offset)?;
113	if glyph == `0` {
114	return None;
115	}
116
117	u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % `65536`)
118	.ok()
119	.map(GlyphId)
120	}
121
122	/// Calls `f` for each codepoint defined in this table.
123	pub fn codepoints(&self, f: impl FnMut(u32)) {
124	let _ = self.codepoints_inner(f);
125	}
126
127	#[inline]
128	fn codepoints_inner(&self, mut f: impl FnMut(u32)) -> Option<()> {
129	for first_byte in `0u16`..`256` {
130	let i = self.sub_header_keys.get(first_byte)? / `8`;
131	let sub_header = self.sub_headers.get(i)?;
132	let first_code = sub_header.first_code;
133
134	if i == `0` {
135	// This is a single byte code.
136	let range_end = first_code.checked_add(sub_header.entry_count)?;
137	if first_byte >= first_code && first_byte < range_end {
138	f(u32::from(first_byte));
139	}
140	} else {
141	// This is a two byte code.
142	let base = first_code.checked_add(first_byte << `8`)?;
143	for k in `0`..sub_header.entry_count {
144	let code_point = base.checked_add(k)?;
145	f(u32::from(code_point));
146	}
147	}
148	}
149
150	Some(())
151	}
152	}
153
154	impl core::fmt::Debug for Subtable2<'_> {
155	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
156	write!(f, "Subtable2 `{{` ... `}}`")
157	}
158	}
159