dict.rs source code [crates/ttf-parser-0.17.1/src/tables/cff/dict.rs]

1	use core::convert::TryFrom;
2	use core::ops::Range;
3
4	use crate::Stream;
5
6	// Limits according to the Adobe Technical Note #5176, chapter 4 DICT Data.
7	const TWO_BYTE_OPERATOR_MARK: u8 = `12`;
8	const FLOAT_STACK_LEN: usize = `64`;
9	const END_OF_FLOAT_FLAG: u8 = `0xf`;
10
11	#[derive(Clone, Copy, Debug)]
12	pub struct Operator(pub u16);
13
14	impl Operator {
15	#[inline]
16	pub fn get(self) -> u16 {
17	self.0
18	}
19	}
20
21	pub struct DictionaryParser<'a> {
22	data: &'a [u8],
23	// The current offset.
24	offset: usize,
25	// Offset to the last operands start.
26	operands_offset: usize,
27	// Actual operands.
28	//
29	// While CFF can contain only i32 and f32 values, we have to store operands as f64
30	// since f32 cannot represent the whole i32 range.
31	// Meaning we have a choice of storing operands as f64 or as enum of i32/f32.
32	// In both cases the type size would be 8 bytes, so it's easier to simply use f64.
33	operands: &'a mut [f64],
34	// An amount of operands in the `operands` array.
35	operands_len: u16,
36	}
37
38	impl<'a> DictionaryParser<'a> {
39	#[inline]
40	pub fn new(data: &'a [u8], operands_buffer: &'a mut [f64]) -> Self {
41	DictionaryParser {
42	data,
43	offset: `0`,
44	operands_offset: `0`,
45	operands: operands_buffer,
46	operands_len: `0`,
47	}
48	}
49
50	#[inline(never)]
51	pub fn parse_next(&mut self) -> Option<Operator> {
52	let mut s = Stream::new_at(self.data, self.offset)?;
53	self.operands_offset = self.offset;
54	while !s.at_end() {
55	let b = s.read::<u8>()?;
56	// 0..=21 bytes are operators.
57	if is_dict_one_byte_op(b) {
58	let mut operator = u16::from(b);
59
60	// Check that operator is two byte long.
61	if b == TWO_BYTE_OPERATOR_MARK {
62	// Use a 1200 'prefix' to make two byte operators more readable.
63	// 12 3 => 1203
64	operator = `1200` + u16::from(s.read::<u8>()?);
65	}
66
67	self.offset = s.offset();
68	return Some(Operator(operator));
69	} else {
70	skip_number(b, &mut s)?;
71	}
72	}
73
74	None
75	}
76
77	/// Parses operands of the current operator.
78	///
79	/// In the DICT structure, operands are defined before an operator.
80	/// So we are trying to find an operator first and the we can actually parse the operands.
81	///
82	/// Since this methods is pretty expensive and we do not care about most of the operators,
83	/// we can speed up parsing by parsing operands only for required operators.
84	///
85	/// We still have to "skip" operands during operators search (see `skip_number()`),
86	/// but it's still faster that a naive method.
87	pub fn parse_operands(&mut self) -> Option<()> {
88	let mut s = Stream::new_at(self.data, self.operands_offset)?;
89	self.operands_len = `0`;
90	while !s.at_end() {
91	let b = s.read::<u8>()?;
92	// 0..=21 bytes are operators.
93	if is_dict_one_byte_op(b) {
94	break;
95	} else {
96	let op = parse_number(b, &mut s)?;
97	self.operands[usize::from(self.operands_len)] = op;
98	self.operands_len += `1`;
99
100	if usize::from(self.operands_len) >= self.operands.len() {
101	break;
102	}
103	}
104	}
105
106	Some(())
107	}
108
109	#[inline]
110	pub fn operands(&self) -> &[f64] {
111	&self.operands[..usize::from(self.operands_len)]
112	}
113
114	#[inline]
115	pub fn parse_number(&mut self) -> Option<f64> {
116	self.parse_operands()?;
117	self.operands().get(`0`).cloned()
118	}
119
120	#[inline]
121	pub fn parse_offset(&mut self) -> Option<usize> {
122	self.parse_operands()?;
123	let operands = self.operands();
124	if operands.len() == `1` {
125	usize::try_from(operands[`0`] as i32).ok()
126	} else {
127	None
128	}
129	}
130
131	#[inline]
132	pub fn parse_range(&mut self) -> Option<Range<usize>> {
133	self.parse_operands()?;
134	let operands = self.operands();
135	if operands.len() == `2` {
136	let len = usize::try_from(operands[`0`] as i32).ok()?;
137	let start = usize::try_from(operands[`1`] as i32).ok()?;
138	let end = start.checked_add(len)?;
139	Some(start..end)
140	} else {
141	None
142	}
143	}
144	}
145
146	// One-byte CFF DICT Operators according to the
147	// Adobe Technical Note #5176, Appendix H CFF DICT Encoding.
148	pub fn is_dict_one_byte_op(b: u8) -> bool {
149	match b {
150	`0`..=`27` => `true`,
151	`28`..=`30` => `false`, // numbers
152	`31` => `true`, // Reserved
153	`32`..=`254` => `false`, // numbers
154	`255` => `true`, // Reserved
155	}
156	}
157
158	// Adobe Technical Note #5177, Table 3 Operand Encoding
159	pub fn parse_number(b0: u8, s: &mut Stream) -> Option<f64> {
160	match b0 {
161	`28` => {
162	let n = i32::from(s.read::<i16>()?);
163	Some(f64::from(n))
164	}
165	`29` => {
166	let n = s.read::<i32>()?;
167	Some(f64::from(n))
168	}
169	`30` => parse_float(s),
170	`32`..=`246` => {
171	let n = i32::from(b0) - `139`;
172	Some(f64::from(n))
173	}
174	`247`..=`250` => {
175	let b1 = i32::from(s.read::<u8>()?);
176	let n = (i32::from(b0) - `247`) * `256` + b1 + `108`;
177	Some(f64::from(n))
178	}
179	`251`..=`254` => {
180	let b1 = i32::from(s.read::<u8>()?);
181	let n = -(i32::from(b0) - `251`) * `256` - b1 - `108`;
182	Some(f64::from(n))
183	}
184	_ => None,
185	}
186	}
187
188	fn parse_float(s: &mut Stream) -> Option<f64> {
189	let mut data = [`0u8`; FLOAT_STACK_LEN];
190	let mut idx = `0`;
191
192	loop {
193	let b1: u8 = s.read()?;
194	let nibble1 = b1 >> `4`;
195	let nibble2 = b1 & `15`;
196
197	if nibble1 == END_OF_FLOAT_FLAG {
198	break;
199	}
200
201	idx = parse_float_nibble(nibble1, idx, &mut data)?;
202
203	if nibble2 == END_OF_FLOAT_FLAG {
204	break;
205	}
206
207	idx = parse_float_nibble(nibble2, idx, &mut data)?;
208	}
209
210	let s = core::str::from_utf8(&data[..idx]).ok()?;
211	let n = s.parse().ok()?;
212	Some(n)
213	}
214
215	// Adobe Technical Note #5176, Table 5 Nibble Definitions
216	fn parse_float_nibble(nibble: u8, mut idx: usize, data: &mut [u8]) -> Option<usize> {
217	if idx == FLOAT_STACK_LEN {
218	return None;
219	}
220
221	match nibble {
222	`0`..=`9` => {
223	data[idx] = b'0' + nibble;
224	}
225	`10` => {
226	data[idx] = b'.';
227	}
228	`11` => {
229	data[idx] = b'E';
230	}
231	`12` => {
232	if idx + `1` == FLOAT_STACK_LEN {
233	return None;
234	}
235
236	data[idx] = b'E';
237	idx += `1`;
238	data[idx] = b'-';
239	}
240	`13` => {
241	return None;
242	}
243	`14` => {
244	data[idx] = b'-';
245	}
246	_ => {
247	return None;
248	}
249	}
250
251	idx += `1`;
252	Some(idx)
253	}
254
255	// Just like `parse_number`, but doesn't actually parses the data.
256	pub fn skip_number(b0: u8, s: &mut Stream) -> Option<()> {
257	match b0 {
258	`28` => s.skip::<u16>(),
259	`29` => s.skip::<u32>(),
260	`30` => {
261	while !s.at_end() {
262	let b1: u8 = s.read::<u8>()?;
263	let nibble1: u8 = b1 >> `4`;
264	let nibble2: u8 = b1 & `15`;
265	if nibble1 == END_OF_FLOAT_FLAG \|\| nibble2 == END_OF_FLOAT_FLAG {
266	break;
267	}
268	}
269	}
270	`32`..=`246` => {}
271	`247`..=`250` => s.skip::<u8>(),
272	`251`..=`254` => s.skip::<u8>(),
273	_ => return None,
274	}
275
276	Some(())
277	}
278
279	#[cfg(test)]
280	mod tests {
281	use super::*;
282
283	#[test]
284	fn parse_dict_number() {
285	assert_eq!(
286	parse_number(`0xFA`, &mut Stream::new(&[`0x7C`])).unwrap(),
287	`1000.0`
288	);
289	assert_eq!(
290	parse_number(`0xFE`, &mut Stream::new(&[`0x7C`])).unwrap(),
291	`-1000.0`
292	);
293	assert_eq!(
294	parse_number(`0x1C`, &mut Stream::new(&[`0x27`, `0x10`])).unwrap(),
295	`10000.0`
296	);
297	assert_eq!(
298	parse_number(`0x1C`, &mut Stream::new(&[`0xD8`, `0xF0`])).unwrap(),
299	`-10000.0`
300	);
301	assert_eq!(
302	parse_number(`0x1D`, &mut Stream::new(&[`0x00`, `0x01`, `0x86`, `0xA0`])).unwrap(),
303	`100000.0`
304	);
305	assert_eq!(
306	parse_number(`0x1D`, &mut Stream::new(&[`0xFF`, `0xFE`, `0x79`, `0x60`])).unwrap(),
307	`-100000.0`
308	);
309	}
310	}
311