| 1 | use core::convert::TryFrom; |
| 2 | use core::ops::Range; |
| 3 | |
| 4 | use crate::Stream; |
| 5 | |
| 6 | // Limits according to the Adobe Technical Note #5176, chapter 4 DICT Data. |
| 7 | const TWO_BYTE_OPERATOR_MARK: u8 = 12; |
| 8 | const FLOAT_STACK_LEN: usize = 64; |
| 9 | const END_OF_FLOAT_FLAG: u8 = 0xf; |
| 10 | |
| 11 | #[derive (Clone, Copy, Debug)] |
| 12 | pub struct Operator(pub u16); |
| 13 | |
| 14 | impl Operator { |
| 15 | #[inline ] |
| 16 | pub fn get(self) -> u16 { |
| 17 | self.0 |
| 18 | } |
| 19 | } |
| 20 | |
| 21 | pub struct DictionaryParser<'a> { |
| 22 | data: &'a [u8], |
| 23 | // The current offset. |
| 24 | offset: usize, |
| 25 | // Offset to the last operands start. |
| 26 | operands_offset: usize, |
| 27 | // Actual operands. |
| 28 | // |
| 29 | // While CFF can contain only i32 and f32 values, we have to store operands as f64 |
| 30 | // since f32 cannot represent the whole i32 range. |
| 31 | // Meaning we have a choice of storing operands as f64 or as enum of i32/f32. |
| 32 | // In both cases the type size would be 8 bytes, so it's easier to simply use f64. |
| 33 | operands: &'a mut [f64], |
| 34 | // An amount of operands in the `operands` array. |
| 35 | operands_len: u16, |
| 36 | } |
| 37 | |
| 38 | impl<'a> DictionaryParser<'a> { |
| 39 | #[inline ] |
| 40 | pub fn new(data: &'a [u8], operands_buffer: &'a mut [f64]) -> Self { |
| 41 | DictionaryParser { |
| 42 | data, |
| 43 | offset: 0, |
| 44 | operands_offset: 0, |
| 45 | operands: operands_buffer, |
| 46 | operands_len: 0, |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | #[inline (never)] |
| 51 | pub fn parse_next(&mut self) -> Option<Operator> { |
| 52 | let mut s = Stream::new_at(self.data, self.offset)?; |
| 53 | self.operands_offset = self.offset; |
| 54 | while !s.at_end() { |
| 55 | let b = s.read::<u8>()?; |
| 56 | // 0..=21 bytes are operators. |
| 57 | if is_dict_one_byte_op(b) { |
| 58 | let mut operator = u16::from(b); |
| 59 | |
| 60 | // Check that operator is two byte long. |
| 61 | if b == TWO_BYTE_OPERATOR_MARK { |
| 62 | // Use a 1200 'prefix' to make two byte operators more readable. |
| 63 | // 12 3 => 1203 |
| 64 | operator = 1200 + u16::from(s.read::<u8>()?); |
| 65 | } |
| 66 | |
| 67 | self.offset = s.offset(); |
| 68 | return Some(Operator(operator)); |
| 69 | } else { |
| 70 | skip_number(b, &mut s)?; |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | None |
| 75 | } |
| 76 | |
| 77 | /// Parses operands of the current operator. |
| 78 | /// |
| 79 | /// In the DICT structure, operands are defined before an operator. |
| 80 | /// So we are trying to find an operator first and the we can actually parse the operands. |
| 81 | /// |
| 82 | /// Since this methods is pretty expensive and we do not care about most of the operators, |
| 83 | /// we can speed up parsing by parsing operands only for required operators. |
| 84 | /// |
| 85 | /// We still have to "skip" operands during operators search (see `skip_number()`), |
| 86 | /// but it's still faster that a naive method. |
| 87 | pub fn parse_operands(&mut self) -> Option<()> { |
| 88 | let mut s = Stream::new_at(self.data, self.operands_offset)?; |
| 89 | self.operands_len = 0; |
| 90 | while !s.at_end() { |
| 91 | let b = s.read::<u8>()?; |
| 92 | // 0..=21 bytes are operators. |
| 93 | if is_dict_one_byte_op(b) { |
| 94 | break; |
| 95 | } else { |
| 96 | let op = parse_number(b, &mut s)?; |
| 97 | self.operands[usize::from(self.operands_len)] = op; |
| 98 | self.operands_len += 1; |
| 99 | |
| 100 | if usize::from(self.operands_len) >= self.operands.len() { |
| 101 | break; |
| 102 | } |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | Some(()) |
| 107 | } |
| 108 | |
| 109 | #[inline ] |
| 110 | pub fn operands(&self) -> &[f64] { |
| 111 | &self.operands[..usize::from(self.operands_len)] |
| 112 | } |
| 113 | |
| 114 | #[inline ] |
| 115 | pub fn parse_number(&mut self) -> Option<f64> { |
| 116 | self.parse_operands()?; |
| 117 | self.operands().get(0).cloned() |
| 118 | } |
| 119 | |
| 120 | #[inline ] |
| 121 | pub fn parse_offset(&mut self) -> Option<usize> { |
| 122 | self.parse_operands()?; |
| 123 | let operands = self.operands(); |
| 124 | if operands.len() == 1 { |
| 125 | usize::try_from(operands[0] as i32).ok() |
| 126 | } else { |
| 127 | None |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | #[inline ] |
| 132 | pub fn parse_range(&mut self) -> Option<Range<usize>> { |
| 133 | self.parse_operands()?; |
| 134 | let operands = self.operands(); |
| 135 | if operands.len() == 2 { |
| 136 | let len = usize::try_from(operands[0] as i32).ok()?; |
| 137 | let start = usize::try_from(operands[1] as i32).ok()?; |
| 138 | let end = start.checked_add(len)?; |
| 139 | Some(start..end) |
| 140 | } else { |
| 141 | None |
| 142 | } |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | // One-byte CFF DICT Operators according to the |
| 147 | // Adobe Technical Note #5176, Appendix H CFF DICT Encoding. |
| 148 | pub fn is_dict_one_byte_op(b: u8) -> bool { |
| 149 | match b { |
| 150 | 0..=27 => true, |
| 151 | 28..=30 => false, // numbers |
| 152 | 31 => true, // Reserved |
| 153 | 32..=254 => false, // numbers |
| 154 | 255 => true, // Reserved |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | // Adobe Technical Note #5177, Table 3 Operand Encoding |
| 159 | pub fn parse_number(b0: u8, s: &mut Stream) -> Option<f64> { |
| 160 | match b0 { |
| 161 | 28 => { |
| 162 | let n = i32::from(s.read::<i16>()?); |
| 163 | Some(f64::from(n)) |
| 164 | } |
| 165 | 29 => { |
| 166 | let n = s.read::<i32>()?; |
| 167 | Some(f64::from(n)) |
| 168 | } |
| 169 | 30 => parse_float(s), |
| 170 | 32..=246 => { |
| 171 | let n = i32::from(b0) - 139; |
| 172 | Some(f64::from(n)) |
| 173 | } |
| 174 | 247..=250 => { |
| 175 | let b1 = i32::from(s.read::<u8>()?); |
| 176 | let n = (i32::from(b0) - 247) * 256 + b1 + 108; |
| 177 | Some(f64::from(n)) |
| 178 | } |
| 179 | 251..=254 => { |
| 180 | let b1 = i32::from(s.read::<u8>()?); |
| 181 | let n = -(i32::from(b0) - 251) * 256 - b1 - 108; |
| 182 | Some(f64::from(n)) |
| 183 | } |
| 184 | _ => None, |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | fn parse_float(s: &mut Stream) -> Option<f64> { |
| 189 | let mut data = [0u8; FLOAT_STACK_LEN]; |
| 190 | let mut idx = 0; |
| 191 | |
| 192 | loop { |
| 193 | let b1: u8 = s.read()?; |
| 194 | let nibble1 = b1 >> 4; |
| 195 | let nibble2 = b1 & 15; |
| 196 | |
| 197 | if nibble1 == END_OF_FLOAT_FLAG { |
| 198 | break; |
| 199 | } |
| 200 | |
| 201 | idx = parse_float_nibble(nibble1, idx, &mut data)?; |
| 202 | |
| 203 | if nibble2 == END_OF_FLOAT_FLAG { |
| 204 | break; |
| 205 | } |
| 206 | |
| 207 | idx = parse_float_nibble(nibble2, idx, &mut data)?; |
| 208 | } |
| 209 | |
| 210 | let s = core::str::from_utf8(&data[..idx]).ok()?; |
| 211 | let n = s.parse().ok()?; |
| 212 | Some(n) |
| 213 | } |
| 214 | |
| 215 | // Adobe Technical Note #5176, Table 5 Nibble Definitions |
| 216 | fn parse_float_nibble(nibble: u8, mut idx: usize, data: &mut [u8]) -> Option<usize> { |
| 217 | if idx == FLOAT_STACK_LEN { |
| 218 | return None; |
| 219 | } |
| 220 | |
| 221 | match nibble { |
| 222 | 0..=9 => { |
| 223 | data[idx] = b'0' + nibble; |
| 224 | } |
| 225 | 10 => { |
| 226 | data[idx] = b'.' ; |
| 227 | } |
| 228 | 11 => { |
| 229 | data[idx] = b'E' ; |
| 230 | } |
| 231 | 12 => { |
| 232 | if idx + 1 == FLOAT_STACK_LEN { |
| 233 | return None; |
| 234 | } |
| 235 | |
| 236 | data[idx] = b'E' ; |
| 237 | idx += 1; |
| 238 | data[idx] = b'-' ; |
| 239 | } |
| 240 | 13 => { |
| 241 | return None; |
| 242 | } |
| 243 | 14 => { |
| 244 | data[idx] = b'-' ; |
| 245 | } |
| 246 | _ => { |
| 247 | return None; |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | idx += 1; |
| 252 | Some(idx) |
| 253 | } |
| 254 | |
| 255 | // Just like `parse_number`, but doesn't actually parses the data. |
| 256 | pub fn skip_number(b0: u8, s: &mut Stream) -> Option<()> { |
| 257 | match b0 { |
| 258 | 28 => s.skip::<u16>(), |
| 259 | 29 => s.skip::<u32>(), |
| 260 | 30 => { |
| 261 | while !s.at_end() { |
| 262 | let b1: u8 = s.read::<u8>()?; |
| 263 | let nibble1: u8 = b1 >> 4; |
| 264 | let nibble2: u8 = b1 & 15; |
| 265 | if nibble1 == END_OF_FLOAT_FLAG || nibble2 == END_OF_FLOAT_FLAG { |
| 266 | break; |
| 267 | } |
| 268 | } |
| 269 | } |
| 270 | 32..=246 => {} |
| 271 | 247..=250 => s.skip::<u8>(), |
| 272 | 251..=254 => s.skip::<u8>(), |
| 273 | _ => return None, |
| 274 | } |
| 275 | |
| 276 | Some(()) |
| 277 | } |
| 278 | |
| 279 | #[cfg (test)] |
| 280 | mod tests { |
| 281 | use super::*; |
| 282 | |
| 283 | #[test ] |
| 284 | fn parse_dict_number() { |
| 285 | assert_eq!( |
| 286 | parse_number(0xFA, &mut Stream::new(&[0x7C])).unwrap(), |
| 287 | 1000.0 |
| 288 | ); |
| 289 | assert_eq!( |
| 290 | parse_number(0xFE, &mut Stream::new(&[0x7C])).unwrap(), |
| 291 | -1000.0 |
| 292 | ); |
| 293 | assert_eq!( |
| 294 | parse_number(0x1C, &mut Stream::new(&[0x27, 0x10])).unwrap(), |
| 295 | 10000.0 |
| 296 | ); |
| 297 | assert_eq!( |
| 298 | parse_number(0x1C, &mut Stream::new(&[0xD8, 0xF0])).unwrap(), |
| 299 | -10000.0 |
| 300 | ); |
| 301 | assert_eq!( |
| 302 | parse_number(0x1D, &mut Stream::new(&[0x00, 0x01, 0x86, 0xA0])).unwrap(), |
| 303 | 100000.0 |
| 304 | ); |
| 305 | assert_eq!( |
| 306 | parse_number(0x1D, &mut Stream::new(&[0xFF, 0xFE, 0x79, 0x60])).unwrap(), |
| 307 | -100000.0 |
| 308 | ); |
| 309 | } |
| 310 | } |
| 311 | |