1 | use core::convert::TryFrom; |
2 | use core::ops::Range; |
3 | |
4 | use crate::Stream; |
5 | |
6 | // Limits according to the Adobe Technical Note #5176, chapter 4 DICT Data. |
7 | const TWO_BYTE_OPERATOR_MARK: u8 = 12; |
8 | const FLOAT_STACK_LEN: usize = 64; |
9 | const END_OF_FLOAT_FLAG: u8 = 0xf; |
10 | |
11 | #[derive (Clone, Copy, Debug)] |
12 | pub struct Operator(pub u16); |
13 | |
14 | impl Operator { |
15 | #[inline ] |
16 | pub fn get(self) -> u16 { |
17 | self.0 |
18 | } |
19 | } |
20 | |
21 | pub struct DictionaryParser<'a> { |
22 | data: &'a [u8], |
23 | // The current offset. |
24 | offset: usize, |
25 | // Offset to the last operands start. |
26 | operands_offset: usize, |
27 | // Actual operands. |
28 | // |
29 | // While CFF can contain only i32 and f32 values, we have to store operands as f64 |
30 | // since f32 cannot represent the whole i32 range. |
31 | // Meaning we have a choice of storing operands as f64 or as enum of i32/f32. |
32 | // In both cases the type size would be 8 bytes, so it's easier to simply use f64. |
33 | operands: &'a mut [f64], |
34 | // An amount of operands in the `operands` array. |
35 | operands_len: u16, |
36 | } |
37 | |
38 | impl<'a> DictionaryParser<'a> { |
39 | #[inline ] |
40 | pub fn new(data: &'a [u8], operands_buffer: &'a mut [f64]) -> Self { |
41 | DictionaryParser { |
42 | data, |
43 | offset: 0, |
44 | operands_offset: 0, |
45 | operands: operands_buffer, |
46 | operands_len: 0, |
47 | } |
48 | } |
49 | |
50 | #[inline (never)] |
51 | pub fn parse_next(&mut self) -> Option<Operator> { |
52 | let mut s = Stream::new_at(self.data, self.offset)?; |
53 | self.operands_offset = self.offset; |
54 | while !s.at_end() { |
55 | let b = s.read::<u8>()?; |
56 | // 0..=21 bytes are operators. |
57 | if is_dict_one_byte_op(b) { |
58 | let mut operator = u16::from(b); |
59 | |
60 | // Check that operator is two byte long. |
61 | if b == TWO_BYTE_OPERATOR_MARK { |
62 | // Use a 1200 'prefix' to make two byte operators more readable. |
63 | // 12 3 => 1203 |
64 | operator = 1200 + u16::from(s.read::<u8>()?); |
65 | } |
66 | |
67 | self.offset = s.offset(); |
68 | return Some(Operator(operator)); |
69 | } else { |
70 | skip_number(b, &mut s)?; |
71 | } |
72 | } |
73 | |
74 | None |
75 | } |
76 | |
77 | /// Parses operands of the current operator. |
78 | /// |
79 | /// In the DICT structure, operands are defined before an operator. |
80 | /// So we are trying to find an operator first and the we can actually parse the operands. |
81 | /// |
82 | /// Since this methods is pretty expensive and we do not care about most of the operators, |
83 | /// we can speed up parsing by parsing operands only for required operators. |
84 | /// |
85 | /// We still have to "skip" operands during operators search (see `skip_number()`), |
86 | /// but it's still faster that a naive method. |
87 | pub fn parse_operands(&mut self) -> Option<()> { |
88 | let mut s = Stream::new_at(self.data, self.operands_offset)?; |
89 | self.operands_len = 0; |
90 | while !s.at_end() { |
91 | let b = s.read::<u8>()?; |
92 | // 0..=21 bytes are operators. |
93 | if is_dict_one_byte_op(b) { |
94 | break; |
95 | } else { |
96 | let op = parse_number(b, &mut s)?; |
97 | self.operands[usize::from(self.operands_len)] = op; |
98 | self.operands_len += 1; |
99 | |
100 | if usize::from(self.operands_len) >= self.operands.len() { |
101 | break; |
102 | } |
103 | } |
104 | } |
105 | |
106 | Some(()) |
107 | } |
108 | |
109 | #[inline ] |
110 | pub fn operands(&self) -> &[f64] { |
111 | &self.operands[..usize::from(self.operands_len)] |
112 | } |
113 | |
114 | #[inline ] |
115 | pub fn parse_number(&mut self) -> Option<f64> { |
116 | self.parse_operands()?; |
117 | self.operands().get(0).cloned() |
118 | } |
119 | |
120 | #[inline ] |
121 | pub fn parse_offset(&mut self) -> Option<usize> { |
122 | self.parse_operands()?; |
123 | let operands = self.operands(); |
124 | if operands.len() == 1 { |
125 | usize::try_from(operands[0] as i32).ok() |
126 | } else { |
127 | None |
128 | } |
129 | } |
130 | |
131 | #[inline ] |
132 | pub fn parse_range(&mut self) -> Option<Range<usize>> { |
133 | self.parse_operands()?; |
134 | let operands = self.operands(); |
135 | if operands.len() == 2 { |
136 | let len = usize::try_from(operands[0] as i32).ok()?; |
137 | let start = usize::try_from(operands[1] as i32).ok()?; |
138 | let end = start.checked_add(len)?; |
139 | Some(start..end) |
140 | } else { |
141 | None |
142 | } |
143 | } |
144 | } |
145 | |
146 | // One-byte CFF DICT Operators according to the |
147 | // Adobe Technical Note #5176, Appendix H CFF DICT Encoding. |
148 | pub fn is_dict_one_byte_op(b: u8) -> bool { |
149 | match b { |
150 | 0..=27 => true, |
151 | 28..=30 => false, // numbers |
152 | 31 => true, // Reserved |
153 | 32..=254 => false, // numbers |
154 | 255 => true, // Reserved |
155 | } |
156 | } |
157 | |
158 | // Adobe Technical Note #5177, Table 3 Operand Encoding |
159 | pub fn parse_number(b0: u8, s: &mut Stream) -> Option<f64> { |
160 | match b0 { |
161 | 28 => { |
162 | let n = i32::from(s.read::<i16>()?); |
163 | Some(f64::from(n)) |
164 | } |
165 | 29 => { |
166 | let n = s.read::<i32>()?; |
167 | Some(f64::from(n)) |
168 | } |
169 | 30 => parse_float(s), |
170 | 32..=246 => { |
171 | let n = i32::from(b0) - 139; |
172 | Some(f64::from(n)) |
173 | } |
174 | 247..=250 => { |
175 | let b1 = i32::from(s.read::<u8>()?); |
176 | let n = (i32::from(b0) - 247) * 256 + b1 + 108; |
177 | Some(f64::from(n)) |
178 | } |
179 | 251..=254 => { |
180 | let b1 = i32::from(s.read::<u8>()?); |
181 | let n = -(i32::from(b0) - 251) * 256 - b1 - 108; |
182 | Some(f64::from(n)) |
183 | } |
184 | _ => None, |
185 | } |
186 | } |
187 | |
188 | fn parse_float(s: &mut Stream) -> Option<f64> { |
189 | let mut data = [0u8; FLOAT_STACK_LEN]; |
190 | let mut idx = 0; |
191 | |
192 | loop { |
193 | let b1: u8 = s.read()?; |
194 | let nibble1 = b1 >> 4; |
195 | let nibble2 = b1 & 15; |
196 | |
197 | if nibble1 == END_OF_FLOAT_FLAG { |
198 | break; |
199 | } |
200 | |
201 | idx = parse_float_nibble(nibble1, idx, &mut data)?; |
202 | |
203 | if nibble2 == END_OF_FLOAT_FLAG { |
204 | break; |
205 | } |
206 | |
207 | idx = parse_float_nibble(nibble2, idx, &mut data)?; |
208 | } |
209 | |
210 | let s = core::str::from_utf8(&data[..idx]).ok()?; |
211 | let n = s.parse().ok()?; |
212 | Some(n) |
213 | } |
214 | |
215 | // Adobe Technical Note #5176, Table 5 Nibble Definitions |
216 | fn parse_float_nibble(nibble: u8, mut idx: usize, data: &mut [u8]) -> Option<usize> { |
217 | if idx == FLOAT_STACK_LEN { |
218 | return None; |
219 | } |
220 | |
221 | match nibble { |
222 | 0..=9 => { |
223 | data[idx] = b'0' + nibble; |
224 | } |
225 | 10 => { |
226 | data[idx] = b'.' ; |
227 | } |
228 | 11 => { |
229 | data[idx] = b'E' ; |
230 | } |
231 | 12 => { |
232 | if idx + 1 == FLOAT_STACK_LEN { |
233 | return None; |
234 | } |
235 | |
236 | data[idx] = b'E' ; |
237 | idx += 1; |
238 | data[idx] = b'-' ; |
239 | } |
240 | 13 => { |
241 | return None; |
242 | } |
243 | 14 => { |
244 | data[idx] = b'-' ; |
245 | } |
246 | _ => { |
247 | return None; |
248 | } |
249 | } |
250 | |
251 | idx += 1; |
252 | Some(idx) |
253 | } |
254 | |
255 | // Just like `parse_number`, but doesn't actually parses the data. |
256 | pub fn skip_number(b0: u8, s: &mut Stream) -> Option<()> { |
257 | match b0 { |
258 | 28 => s.skip::<u16>(), |
259 | 29 => s.skip::<u32>(), |
260 | 30 => { |
261 | while !s.at_end() { |
262 | let b1: u8 = s.read::<u8>()?; |
263 | let nibble1: u8 = b1 >> 4; |
264 | let nibble2: u8 = b1 & 15; |
265 | if nibble1 == END_OF_FLOAT_FLAG || nibble2 == END_OF_FLOAT_FLAG { |
266 | break; |
267 | } |
268 | } |
269 | } |
270 | 32..=246 => {} |
271 | 247..=250 => s.skip::<u8>(), |
272 | 251..=254 => s.skip::<u8>(), |
273 | _ => return None, |
274 | } |
275 | |
276 | Some(()) |
277 | } |
278 | |
279 | #[cfg (test)] |
280 | mod tests { |
281 | use super::*; |
282 | |
283 | #[test ] |
284 | fn parse_dict_number() { |
285 | assert_eq!( |
286 | parse_number(0xFA, &mut Stream::new(&[0x7C])).unwrap(), |
287 | 1000.0 |
288 | ); |
289 | assert_eq!( |
290 | parse_number(0xFE, &mut Stream::new(&[0x7C])).unwrap(), |
291 | -1000.0 |
292 | ); |
293 | assert_eq!( |
294 | parse_number(0x1C, &mut Stream::new(&[0x27, 0x10])).unwrap(), |
295 | 10000.0 |
296 | ); |
297 | assert_eq!( |
298 | parse_number(0x1C, &mut Stream::new(&[0xD8, 0xF0])).unwrap(), |
299 | -10000.0 |
300 | ); |
301 | assert_eq!( |
302 | parse_number(0x1D, &mut Stream::new(&[0x00, 0x01, 0x86, 0xA0])).unwrap(), |
303 | 100000.0 |
304 | ); |
305 | assert_eq!( |
306 | parse_number(0x1D, &mut Stream::new(&[0xFF, 0xFE, 0x79, 0x60])).unwrap(), |
307 | -100000.0 |
308 | ); |
309 | } |
310 | } |
311 | |