1use core::convert::TryFrom;
2use core::ops::Range;
3
4use crate::Stream;
5
6// Limits according to the Adobe Technical Note #5176, chapter 4 DICT Data.
7const TWO_BYTE_OPERATOR_MARK: u8 = 12;
8const FLOAT_STACK_LEN: usize = 64;
9const END_OF_FLOAT_FLAG: u8 = 0xf;
10
11#[derive(Clone, Copy, Debug)]
12pub struct Operator(pub u16);
13
14impl Operator {
15 #[inline]
16 pub fn get(self) -> u16 {
17 self.0
18 }
19}
20
21pub struct DictionaryParser<'a> {
22 data: &'a [u8],
23 // The current offset.
24 offset: usize,
25 // Offset to the last operands start.
26 operands_offset: usize,
27 // Actual operands.
28 //
29 // While CFF can contain only i32 and f32 values, we have to store operands as f64
30 // since f32 cannot represent the whole i32 range.
31 // Meaning we have a choice of storing operands as f64 or as enum of i32/f32.
32 // In both cases the type size would be 8 bytes, so it's easier to simply use f64.
33 operands: &'a mut [f64],
34 // An amount of operands in the `operands` array.
35 operands_len: u16,
36}
37
38impl<'a> DictionaryParser<'a> {
39 #[inline]
40 pub fn new(data: &'a [u8], operands_buffer: &'a mut [f64]) -> Self {
41 DictionaryParser {
42 data,
43 offset: 0,
44 operands_offset: 0,
45 operands: operands_buffer,
46 operands_len: 0,
47 }
48 }
49
50 #[inline(never)]
51 pub fn parse_next(&mut self) -> Option<Operator> {
52 let mut s = Stream::new_at(self.data, self.offset)?;
53 self.operands_offset = self.offset;
54 while !s.at_end() {
55 let b = s.read::<u8>()?;
56 // 0..=21 bytes are operators.
57 if is_dict_one_byte_op(b) {
58 let mut operator = u16::from(b);
59
60 // Check that operator is two byte long.
61 if b == TWO_BYTE_OPERATOR_MARK {
62 // Use a 1200 'prefix' to make two byte operators more readable.
63 // 12 3 => 1203
64 operator = 1200 + u16::from(s.read::<u8>()?);
65 }
66
67 self.offset = s.offset();
68 return Some(Operator(operator));
69 } else {
70 skip_number(b, &mut s)?;
71 }
72 }
73
74 None
75 }
76
77 /// Parses operands of the current operator.
78 ///
79 /// In the DICT structure, operands are defined before an operator.
80 /// So we are trying to find an operator first and the we can actually parse the operands.
81 ///
82 /// Since this methods is pretty expensive and we do not care about most of the operators,
83 /// we can speed up parsing by parsing operands only for required operators.
84 ///
85 /// We still have to "skip" operands during operators search (see `skip_number()`),
86 /// but it's still faster that a naive method.
87 pub fn parse_operands(&mut self) -> Option<()> {
88 let mut s = Stream::new_at(self.data, self.operands_offset)?;
89 self.operands_len = 0;
90 while !s.at_end() {
91 let b = s.read::<u8>()?;
92 // 0..=21 bytes are operators.
93 if is_dict_one_byte_op(b) {
94 break;
95 } else {
96 let op = parse_number(b, &mut s)?;
97 self.operands[usize::from(self.operands_len)] = op;
98 self.operands_len += 1;
99
100 if usize::from(self.operands_len) >= self.operands.len() {
101 break;
102 }
103 }
104 }
105
106 Some(())
107 }
108
109 #[inline]
110 pub fn operands(&self) -> &[f64] {
111 &self.operands[..usize::from(self.operands_len)]
112 }
113
114 #[inline]
115 pub fn parse_number(&mut self) -> Option<f64> {
116 self.parse_operands()?;
117 self.operands().get(0).cloned()
118 }
119
120 #[inline]
121 pub fn parse_offset(&mut self) -> Option<usize> {
122 self.parse_operands()?;
123 let operands = self.operands();
124 if operands.len() == 1 {
125 usize::try_from(operands[0] as i32).ok()
126 } else {
127 None
128 }
129 }
130
131 #[inline]
132 pub fn parse_range(&mut self) -> Option<Range<usize>> {
133 self.parse_operands()?;
134 let operands = self.operands();
135 if operands.len() == 2 {
136 let len = usize::try_from(operands[0] as i32).ok()?;
137 let start = usize::try_from(operands[1] as i32).ok()?;
138 let end = start.checked_add(len)?;
139 Some(start..end)
140 } else {
141 None
142 }
143 }
144}
145
146// One-byte CFF DICT Operators according to the
147// Adobe Technical Note #5176, Appendix H CFF DICT Encoding.
148pub fn is_dict_one_byte_op(b: u8) -> bool {
149 match b {
150 0..=27 => true,
151 28..=30 => false, // numbers
152 31 => true, // Reserved
153 32..=254 => false, // numbers
154 255 => true, // Reserved
155 }
156}
157
158// Adobe Technical Note #5177, Table 3 Operand Encoding
159pub fn parse_number(b0: u8, s: &mut Stream) -> Option<f64> {
160 match b0 {
161 28 => {
162 let n = i32::from(s.read::<i16>()?);
163 Some(f64::from(n))
164 }
165 29 => {
166 let n = s.read::<i32>()?;
167 Some(f64::from(n))
168 }
169 30 => parse_float(s),
170 32..=246 => {
171 let n = i32::from(b0) - 139;
172 Some(f64::from(n))
173 }
174 247..=250 => {
175 let b1 = i32::from(s.read::<u8>()?);
176 let n = (i32::from(b0) - 247) * 256 + b1 + 108;
177 Some(f64::from(n))
178 }
179 251..=254 => {
180 let b1 = i32::from(s.read::<u8>()?);
181 let n = -(i32::from(b0) - 251) * 256 - b1 - 108;
182 Some(f64::from(n))
183 }
184 _ => None,
185 }
186}
187
188fn parse_float(s: &mut Stream) -> Option<f64> {
189 let mut data = [0u8; FLOAT_STACK_LEN];
190 let mut idx = 0;
191
192 loop {
193 let b1: u8 = s.read()?;
194 let nibble1 = b1 >> 4;
195 let nibble2 = b1 & 15;
196
197 if nibble1 == END_OF_FLOAT_FLAG {
198 break;
199 }
200
201 idx = parse_float_nibble(nibble1, idx, &mut data)?;
202
203 if nibble2 == END_OF_FLOAT_FLAG {
204 break;
205 }
206
207 idx = parse_float_nibble(nibble2, idx, &mut data)?;
208 }
209
210 let s = core::str::from_utf8(&data[..idx]).ok()?;
211 let n = s.parse().ok()?;
212 Some(n)
213}
214
215// Adobe Technical Note #5176, Table 5 Nibble Definitions
216fn parse_float_nibble(nibble: u8, mut idx: usize, data: &mut [u8]) -> Option<usize> {
217 if idx == FLOAT_STACK_LEN {
218 return None;
219 }
220
221 match nibble {
222 0..=9 => {
223 data[idx] = b'0' + nibble;
224 }
225 10 => {
226 data[idx] = b'.';
227 }
228 11 => {
229 data[idx] = b'E';
230 }
231 12 => {
232 if idx + 1 == FLOAT_STACK_LEN {
233 return None;
234 }
235
236 data[idx] = b'E';
237 idx += 1;
238 data[idx] = b'-';
239 }
240 13 => {
241 return None;
242 }
243 14 => {
244 data[idx] = b'-';
245 }
246 _ => {
247 return None;
248 }
249 }
250
251 idx += 1;
252 Some(idx)
253}
254
255// Just like `parse_number`, but doesn't actually parses the data.
256pub fn skip_number(b0: u8, s: &mut Stream) -> Option<()> {
257 match b0 {
258 28 => s.skip::<u16>(),
259 29 => s.skip::<u32>(),
260 30 => {
261 while !s.at_end() {
262 let b1: u8 = s.read::<u8>()?;
263 let nibble1: u8 = b1 >> 4;
264 let nibble2: u8 = b1 & 15;
265 if nibble1 == END_OF_FLOAT_FLAG || nibble2 == END_OF_FLOAT_FLAG {
266 break;
267 }
268 }
269 }
270 32..=246 => {}
271 247..=250 => s.skip::<u8>(),
272 251..=254 => s.skip::<u8>(),
273 _ => return None,
274 }
275
276 Some(())
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282
283 #[test]
284 fn parse_dict_number() {
285 assert_eq!(
286 parse_number(0xFA, &mut Stream::new(&[0x7C])).unwrap(),
287 1000.0
288 );
289 assert_eq!(
290 parse_number(0xFE, &mut Stream::new(&[0x7C])).unwrap(),
291 -1000.0
292 );
293 assert_eq!(
294 parse_number(0x1C, &mut Stream::new(&[0x27, 0x10])).unwrap(),
295 10000.0
296 );
297 assert_eq!(
298 parse_number(0x1C, &mut Stream::new(&[0xD8, 0xF0])).unwrap(),
299 -10000.0
300 );
301 assert_eq!(
302 parse_number(0x1D, &mut Stream::new(&[0x00, 0x01, 0x86, 0xA0])).unwrap(),
303 100000.0
304 );
305 assert_eq!(
306 parse_number(0x1D, &mut Stream::new(&[0xFF, 0xFE, 0x79, 0x60])).unwrap(),
307 -100000.0
308 );
309 }
310}
311