| 1 | use std::cell::{Cell, RefCell}; | 
| 2 | use std::cmp; | 
|---|
| 3 | use std::convert::TryFrom; | 
|---|
| 4 | use std::fs; | 
|---|
| 5 | use std::io::prelude::*; | 
|---|
| 6 | use std::io::{self, SeekFrom}; | 
|---|
| 7 | use std::marker; | 
|---|
| 8 | use std::path::Path; | 
|---|
| 9 |  | 
|---|
| 10 | use crate::entry::{EntryFields, EntryIo}; | 
|---|
| 11 | use crate::error::TarError; | 
|---|
| 12 | use crate::other; | 
|---|
| 13 | use crate::pax::*; | 
|---|
| 14 | use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; | 
|---|
| 15 |  | 
|---|
| 16 | /// A top-level representation of an archive file. | 
|---|
| 17 | /// | 
|---|
| 18 | /// This archive can have an entry added to it and it can be iterated over. | 
|---|
| 19 | pub struct Archive<R: ?Sized + Read> { | 
|---|
| 20 | inner: ArchiveInner<R>, | 
|---|
| 21 | } | 
|---|
| 22 |  | 
|---|
| 23 | pub struct ArchiveInner<R: ?Sized> { | 
|---|
| 24 | pos: Cell<u64>, | 
|---|
| 25 | mask: u32, | 
|---|
| 26 | unpack_xattrs: bool, | 
|---|
| 27 | preserve_permissions: bool, | 
|---|
| 28 | preserve_ownerships: bool, | 
|---|
| 29 | preserve_mtime: bool, | 
|---|
| 30 | overwrite: bool, | 
|---|
| 31 | ignore_zeros: bool, | 
|---|
| 32 | obj: RefCell<R>, | 
|---|
| 33 | } | 
|---|
| 34 |  | 
|---|
| 35 | /// An iterator over the entries of an archive. | 
|---|
| 36 | pub struct Entries<'a, R: 'a + Read> { | 
|---|
| 37 | fields: EntriesFields<'a>, | 
|---|
| 38 | _ignored: marker::PhantomData<&'a Archive<R>>, | 
|---|
| 39 | } | 
|---|
| 40 |  | 
|---|
| 41 | trait SeekRead: Read + Seek {} | 
|---|
| 42 | impl<R: Read + Seek> SeekRead for R {} | 
|---|
| 43 |  | 
|---|
| 44 | struct EntriesFields<'a> { | 
|---|
| 45 | archive: &'a Archive<dyn Read + 'a>, | 
|---|
| 46 | seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, | 
|---|
| 47 | next: u64, | 
|---|
| 48 | done: bool, | 
|---|
| 49 | raw: bool, | 
|---|
| 50 | } | 
|---|
| 51 |  | 
|---|
| 52 | impl<R: Read> Archive<R> { | 
|---|
| 53 | /// Create a new archive with the underlying object as the reader. | 
|---|
| 54 | pub fn new(obj: R) -> Archive<R> { | 
|---|
| 55 | Archive { | 
|---|
| 56 | inner: ArchiveInner { | 
|---|
| 57 | mask: u32::MIN, | 
|---|
| 58 | unpack_xattrs: false, | 
|---|
| 59 | preserve_permissions: false, | 
|---|
| 60 | preserve_ownerships: false, | 
|---|
| 61 | preserve_mtime: true, | 
|---|
| 62 | overwrite: true, | 
|---|
| 63 | ignore_zeros: false, | 
|---|
| 64 | obj: RefCell::new(obj), | 
|---|
| 65 | pos: Cell::new(0), | 
|---|
| 66 | }, | 
|---|
| 67 | } | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 | /// Unwrap this archive, returning the underlying object. | 
|---|
| 71 | pub fn into_inner(self) -> R { | 
|---|
| 72 | self.inner.obj.into_inner() | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | /// Construct an iterator over the entries in this archive. | 
|---|
| 76 | /// | 
|---|
| 77 | /// Note that care must be taken to consider each entry within an archive in | 
|---|
| 78 | /// sequence. If entries are processed out of sequence (from what the | 
|---|
| 79 | /// iterator returns), then the contents read for each entry may be | 
|---|
| 80 | /// corrupted. | 
|---|
| 81 | pub fn entries(&mut self) -> io::Result<Entries<R>> { | 
|---|
| 82 | let me: &mut Archive<dyn Read> = self; | 
|---|
| 83 | me._entries(None).map(|fields| Entries { | 
|---|
| 84 | fields: fields, | 
|---|
| 85 | _ignored: marker::PhantomData, | 
|---|
| 86 | }) | 
|---|
| 87 | } | 
|---|
| 88 |  | 
|---|
| 89 | /// Unpacks the contents tarball into the specified `dst`. | 
|---|
| 90 | /// | 
|---|
| 91 | /// This function will iterate over the entire contents of this tarball, | 
|---|
| 92 | /// extracting each file in turn to the location specified by the entry's | 
|---|
| 93 | /// path name. | 
|---|
| 94 | /// | 
|---|
| 95 | /// This operation is relatively sensitive in that it will not write files | 
|---|
| 96 | /// outside of the path specified by `dst`. Files in the archive which have | 
|---|
| 97 | /// a '..' in their path are skipped during the unpacking process. | 
|---|
| 98 | /// | 
|---|
| 99 | /// # Examples | 
|---|
| 100 | /// | 
|---|
| 101 | /// ```no_run | 
|---|
| 102 | /// use std::fs::File; | 
|---|
| 103 | /// use tar::Archive; | 
|---|
| 104 | /// | 
|---|
| 105 | /// let mut ar = Archive::new(File::open( "foo.tar").unwrap()); | 
|---|
| 106 | /// ar.unpack( "foo").unwrap(); | 
|---|
| 107 | /// ``` | 
|---|
| 108 | pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> { | 
|---|
| 109 | let me: &mut Archive<dyn Read> = self; | 
|---|
| 110 | me._unpack(dst.as_ref()) | 
|---|
| 111 | } | 
|---|
| 112 |  | 
|---|
| 113 | /// Set the mask of the permission bits when unpacking this entry. | 
|---|
| 114 | /// | 
|---|
| 115 | /// The mask will be inverted when applying against a mode, similar to how | 
|---|
| 116 | /// `umask` works on Unix. In logical notation it looks like: | 
|---|
| 117 | /// | 
|---|
| 118 | /// ```text | 
|---|
| 119 | /// new_mode = old_mode & (~mask) | 
|---|
| 120 | /// ``` | 
|---|
| 121 | /// | 
|---|
| 122 | /// The mask is 0 by default and is currently only implemented on Unix. | 
|---|
| 123 | pub fn set_mask(&mut self, mask: u32) { | 
|---|
| 124 | self.inner.mask = mask; | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | /// Indicate whether extended file attributes (xattrs on Unix) are preserved | 
|---|
| 128 | /// when unpacking this archive. | 
|---|
| 129 | /// | 
|---|
| 130 | /// This flag is disabled by default and is currently only implemented on | 
|---|
| 131 | /// Unix using xattr support. This may eventually be implemented for | 
|---|
| 132 | /// Windows, however, if other archive implementations are found which do | 
|---|
| 133 | /// this as well. | 
|---|
| 134 | pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { | 
|---|
| 135 | self.inner.unpack_xattrs = unpack_xattrs; | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | /// Indicate whether extended permissions (like suid on Unix) are preserved | 
|---|
| 139 | /// when unpacking this entry. | 
|---|
| 140 | /// | 
|---|
| 141 | /// This flag is disabled by default and is currently only implemented on | 
|---|
| 142 | /// Unix. | 
|---|
| 143 | pub fn set_preserve_permissions(&mut self, preserve: bool) { | 
|---|
| 144 | self.inner.preserve_permissions = preserve; | 
|---|
| 145 | } | 
|---|
| 146 |  | 
|---|
| 147 | /// Indicate whether numeric ownership ids (like uid and gid on Unix) | 
|---|
| 148 | /// are preserved when unpacking this entry. | 
|---|
| 149 | /// | 
|---|
| 150 | /// This flag is disabled by default and is currently only implemented on | 
|---|
| 151 | /// Unix. | 
|---|
| 152 | pub fn set_preserve_ownerships(&mut self, preserve: bool) { | 
|---|
| 153 | self.inner.preserve_ownerships = preserve; | 
|---|
| 154 | } | 
|---|
| 155 |  | 
|---|
| 156 | /// Indicate whether files and symlinks should be overwritten on extraction. | 
|---|
| 157 | pub fn set_overwrite(&mut self, overwrite: bool) { | 
|---|
| 158 | self.inner.overwrite = overwrite; | 
|---|
| 159 | } | 
|---|
| 160 |  | 
|---|
| 161 | /// Indicate whether access time information is preserved when unpacking | 
|---|
| 162 | /// this entry. | 
|---|
| 163 | /// | 
|---|
| 164 | /// This flag is enabled by default. | 
|---|
| 165 | pub fn set_preserve_mtime(&mut self, preserve: bool) { | 
|---|
| 166 | self.inner.preserve_mtime = preserve; | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more | 
|---|
| 170 | /// entries. | 
|---|
| 171 | /// | 
|---|
| 172 | /// This can be used in case multiple tar archives have been concatenated together. | 
|---|
| 173 | pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) { | 
|---|
| 174 | self.inner.ignore_zeros = ignore_zeros; | 
|---|
| 175 | } | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | impl<R: Seek + Read> Archive<R> { | 
|---|
| 179 | /// Construct an iterator over the entries in this archive for a seekable | 
|---|
| 180 | /// reader. Seek will be used to efficiently skip over file contents. | 
|---|
| 181 | /// | 
|---|
| 182 | /// Note that care must be taken to consider each entry within an archive in | 
|---|
| 183 | /// sequence. If entries are processed out of sequence (from what the | 
|---|
| 184 | /// iterator returns), then the contents read for each entry may be | 
|---|
| 185 | /// corrupted. | 
|---|
| 186 | pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> { | 
|---|
| 187 | let me: &Archive<dyn Read> = self; | 
|---|
| 188 | let me_seekable: &Archive<dyn SeekRead> = self; | 
|---|
| 189 | me._entries(Some(me_seekable)).map(|fields: EntriesFields<'_>| Entries { | 
|---|
| 190 | fields: fields, | 
|---|
| 191 | _ignored: marker::PhantomData, | 
|---|
| 192 | }) | 
|---|
| 193 | } | 
|---|
| 194 | } | 
|---|
| 195 |  | 
|---|
| 196 | impl Archive<dyn Read + '_> { | 
|---|
| 197 | fn _entries<'a>( | 
|---|
| 198 | &'a self, | 
|---|
| 199 | seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, | 
|---|
| 200 | ) -> io::Result<EntriesFields<'a>> { | 
|---|
| 201 | if self.inner.pos.get() != 0 { | 
|---|
| 202 | return Err(other( | 
|---|
| 203 | "cannot call entries unless archive is at \ | 
|---|
| 204 |                  position 0", | 
|---|
| 205 | )); | 
|---|
| 206 | } | 
|---|
| 207 | Ok(EntriesFields { | 
|---|
| 208 | archive: self, | 
|---|
| 209 | seekable_archive, | 
|---|
| 210 | done: false, | 
|---|
| 211 | next: 0, | 
|---|
| 212 | raw: false, | 
|---|
| 213 | }) | 
|---|
| 214 | } | 
|---|
| 215 |  | 
|---|
| 216 | fn _unpack(&mut self, dst: &Path) -> io::Result<()> { | 
|---|
| 217 | if dst.symlink_metadata().is_err() { | 
|---|
| 218 | fs::create_dir_all(&dst) | 
|---|
| 219 | .map_err(|e| TarError::new(format!( "failed to create `{} `", dst.display()), e))?; | 
|---|
| 220 | } | 
|---|
| 221 |  | 
|---|
| 222 | // Canonicalizing the dst directory will prepend the path with '\\?\' | 
|---|
| 223 | // on windows which will allow windows APIs to treat the path as an | 
|---|
| 224 | // extended-length path with a 32,767 character limit. Otherwise all | 
|---|
| 225 | // unpacked paths over 260 characters will fail on creation with a | 
|---|
| 226 | // NotFound exception. | 
|---|
| 227 | let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf()); | 
|---|
| 228 |  | 
|---|
| 229 | // Delay any directory entries until the end (they will be created if needed by | 
|---|
| 230 | // descendants), to ensure that directory permissions do not interfer with descendant | 
|---|
| 231 | // extraction. | 
|---|
| 232 | let mut directories = Vec::new(); | 
|---|
| 233 | for entry in self._entries(None)? { | 
|---|
| 234 | let mut file = entry.map_err(|e| TarError::new( "failed to iterate over archive", e))?; | 
|---|
| 235 | if file.header().entry_type() == crate::EntryType::Directory { | 
|---|
| 236 | directories.push(file); | 
|---|
| 237 | } else { | 
|---|
| 238 | file.unpack_in(dst)?; | 
|---|
| 239 | } | 
|---|
| 240 | } | 
|---|
| 241 |  | 
|---|
| 242 | // Apply the directories. | 
|---|
| 243 | // | 
|---|
| 244 | // Note: the order of application is important to permissions. That is, we must traverse | 
|---|
| 245 | // the filesystem graph in topological ordering or else we risk not being able to create | 
|---|
| 246 | // child directories within those of more restrictive permissions. See [0] for details. | 
|---|
| 247 | // | 
|---|
| 248 | // [0]: <https://github.com/alexcrichton/tar-rs/issues/242> | 
|---|
| 249 | directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes())); | 
|---|
| 250 | for mut dir in directories { | 
|---|
| 251 | dir.unpack_in(dst)?; | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | Ok(()) | 
|---|
| 255 | } | 
|---|
| 256 | } | 
|---|
| 257 |  | 
|---|
| 258 | impl<'a, R: Read> Entries<'a, R> { | 
|---|
| 259 | /// Indicates whether this iterator will return raw entries or not. | 
|---|
| 260 | /// | 
|---|
| 261 | /// If the raw list of entries are returned, then no preprocessing happens | 
|---|
| 262 | /// on account of this library, for example taking into account GNU long name | 
|---|
| 263 | /// or long link archive members. Raw iteration is disabled by default. | 
|---|
| 264 | pub fn raw(self, raw: bool) -> Entries<'a, R> { | 
|---|
| 265 | Entries { | 
|---|
| 266 | fields: EntriesFields { | 
|---|
| 267 | raw: raw, | 
|---|
| 268 | ..self.fields | 
|---|
| 269 | }, | 
|---|
| 270 | _ignored: marker::PhantomData, | 
|---|
| 271 | } | 
|---|
| 272 | } | 
|---|
| 273 | } | 
|---|
| 274 | impl<'a, R: Read> Iterator for Entries<'a, R> { | 
|---|
| 275 | type Item = io::Result<Entry<'a, R>>; | 
|---|
| 276 |  | 
|---|
| 277 | fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> { | 
|---|
| 278 | self.fields | 
|---|
| 279 | .next() | 
|---|
| 280 | .map(|result: Result, …>| result.map(|e: Entry<'a, Empty>| EntryFields::from(entry:e).into_entry())) | 
|---|
| 281 | } | 
|---|
| 282 | } | 
|---|
| 283 |  | 
|---|
| 284 | impl<'a> EntriesFields<'a> { | 
|---|
| 285 | fn next_entry_raw( | 
|---|
| 286 | &mut self, | 
|---|
| 287 | pax_extensions: Option<&[u8]>, | 
|---|
| 288 | ) -> io::Result<Option<Entry<'a, io::Empty>>> { | 
|---|
| 289 | let mut header = Header::new_old(); | 
|---|
| 290 | let mut header_pos = self.next; | 
|---|
| 291 | loop { | 
|---|
| 292 | // Seek to the start of the next header in the archive | 
|---|
| 293 | let delta = self.next - self.archive.inner.pos.get(); | 
|---|
| 294 | self.skip(delta)?; | 
|---|
| 295 |  | 
|---|
| 296 | // EOF is an indicator that we are at the end of the archive. | 
|---|
| 297 | if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { | 
|---|
| 298 | return Ok(None); | 
|---|
| 299 | } | 
|---|
| 300 |  | 
|---|
| 301 | // If a header is not all zeros, we have another valid header. | 
|---|
| 302 | // Otherwise, check if we are ignoring zeros and continue, or break as if this is the | 
|---|
| 303 | // end of the archive. | 
|---|
| 304 | if !header.as_bytes().iter().all(|i| *i == 0) { | 
|---|
| 305 | self.next += 512; | 
|---|
| 306 | break; | 
|---|
| 307 | } | 
|---|
| 308 |  | 
|---|
| 309 | if !self.archive.inner.ignore_zeros { | 
|---|
| 310 | return Ok(None); | 
|---|
| 311 | } | 
|---|
| 312 | self.next += 512; | 
|---|
| 313 | header_pos = self.next; | 
|---|
| 314 | } | 
|---|
| 315 |  | 
|---|
| 316 | // Make sure the checksum is ok | 
|---|
| 317 | let sum = header.as_bytes()[..148] | 
|---|
| 318 | .iter() | 
|---|
| 319 | .chain(&header.as_bytes()[156..]) | 
|---|
| 320 | .fold(0, |a, b| a + (*b as u32)) | 
|---|
| 321 | + 8 * 32; | 
|---|
| 322 | let cksum = header.cksum()?; | 
|---|
| 323 | if sum != cksum { | 
|---|
| 324 | return Err(other( "archive header checksum mismatch")); | 
|---|
| 325 | } | 
|---|
| 326 |  | 
|---|
| 327 | let mut pax_size: Option<u64> = None; | 
|---|
| 328 | if let Some(pax_extensions_ref) = &pax_extensions { | 
|---|
| 329 | pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE); | 
|---|
| 330 |  | 
|---|
| 331 | if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) { | 
|---|
| 332 | header.set_uid(pax_uid); | 
|---|
| 333 | } | 
|---|
| 334 |  | 
|---|
| 335 | if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) { | 
|---|
| 336 | header.set_gid(pax_gid); | 
|---|
| 337 | } | 
|---|
| 338 | } | 
|---|
| 339 |  | 
|---|
| 340 | let file_pos = self.next; | 
|---|
| 341 | let mut size = header.entry_size()?; | 
|---|
| 342 | if size == 0 { | 
|---|
| 343 | if let Some(pax_size) = pax_size { | 
|---|
| 344 | size = pax_size; | 
|---|
| 345 | } | 
|---|
| 346 | } | 
|---|
| 347 | let ret = EntryFields { | 
|---|
| 348 | size: size, | 
|---|
| 349 | header_pos: header_pos, | 
|---|
| 350 | file_pos: file_pos, | 
|---|
| 351 | data: vec![EntryIo::Data((&self.archive.inner).take(size))], | 
|---|
| 352 | header: header, | 
|---|
| 353 | long_pathname: None, | 
|---|
| 354 | long_linkname: None, | 
|---|
| 355 | pax_extensions: None, | 
|---|
| 356 | mask: self.archive.inner.mask, | 
|---|
| 357 | unpack_xattrs: self.archive.inner.unpack_xattrs, | 
|---|
| 358 | preserve_permissions: self.archive.inner.preserve_permissions, | 
|---|
| 359 | preserve_mtime: self.archive.inner.preserve_mtime, | 
|---|
| 360 | overwrite: self.archive.inner.overwrite, | 
|---|
| 361 | preserve_ownerships: self.archive.inner.preserve_ownerships, | 
|---|
| 362 | }; | 
|---|
| 363 |  | 
|---|
| 364 | // Store where the next entry is, rounding up by 512 bytes (the size of | 
|---|
| 365 | // a header); | 
|---|
| 366 | let size = size | 
|---|
| 367 | .checked_add(511) | 
|---|
| 368 | .ok_or_else(|| other( "size overflow"))?; | 
|---|
| 369 | self.next = self | 
|---|
| 370 | .next | 
|---|
| 371 | .checked_add(size & !(512 - 1)) | 
|---|
| 372 | .ok_or_else(|| other( "size overflow"))?; | 
|---|
| 373 |  | 
|---|
| 374 | Ok(Some(ret.into_entry())) | 
|---|
| 375 | } | 
|---|
| 376 |  | 
|---|
| 377 | fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> { | 
|---|
| 378 | if self.raw { | 
|---|
| 379 | return self.next_entry_raw(None); | 
|---|
| 380 | } | 
|---|
| 381 |  | 
|---|
| 382 | let mut gnu_longname = None; | 
|---|
| 383 | let mut gnu_longlink = None; | 
|---|
| 384 | let mut pax_extensions = None; | 
|---|
| 385 | let mut processed = 0; | 
|---|
| 386 | loop { | 
|---|
| 387 | processed += 1; | 
|---|
| 388 | let entry = match self.next_entry_raw(pax_extensions.as_deref())? { | 
|---|
| 389 | Some(entry) => entry, | 
|---|
| 390 | None if processed > 1 => { | 
|---|
| 391 | return Err(other( | 
|---|
| 392 | "members found describing a future member \ | 
|---|
| 393 |                          but no future member found", | 
|---|
| 394 | )); | 
|---|
| 395 | } | 
|---|
| 396 | None => return Ok(None), | 
|---|
| 397 | }; | 
|---|
| 398 |  | 
|---|
| 399 | let is_recognized_header = | 
|---|
| 400 | entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some(); | 
|---|
| 401 |  | 
|---|
| 402 | if is_recognized_header && entry.header().entry_type().is_gnu_longname() { | 
|---|
| 403 | if gnu_longname.is_some() { | 
|---|
| 404 | return Err(other( | 
|---|
| 405 | "two long name entries describing \ | 
|---|
| 406 |                          the same member", | 
|---|
| 407 | )); | 
|---|
| 408 | } | 
|---|
| 409 | gnu_longname = Some(EntryFields::from(entry).read_all()?); | 
|---|
| 410 | continue; | 
|---|
| 411 | } | 
|---|
| 412 |  | 
|---|
| 413 | if is_recognized_header && entry.header().entry_type().is_gnu_longlink() { | 
|---|
| 414 | if gnu_longlink.is_some() { | 
|---|
| 415 | return Err(other( | 
|---|
| 416 | "two long name entries describing \ | 
|---|
| 417 |                          the same member", | 
|---|
| 418 | )); | 
|---|
| 419 | } | 
|---|
| 420 | gnu_longlink = Some(EntryFields::from(entry).read_all()?); | 
|---|
| 421 | continue; | 
|---|
| 422 | } | 
|---|
| 423 |  | 
|---|
| 424 | if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() { | 
|---|
| 425 | if pax_extensions.is_some() { | 
|---|
| 426 | return Err(other( | 
|---|
| 427 | "two pax extensions entries describing \ | 
|---|
| 428 |                          the same member", | 
|---|
| 429 | )); | 
|---|
| 430 | } | 
|---|
| 431 | pax_extensions = Some(EntryFields::from(entry).read_all()?); | 
|---|
| 432 | continue; | 
|---|
| 433 | } | 
|---|
| 434 |  | 
|---|
| 435 | let mut fields = EntryFields::from(entry); | 
|---|
| 436 | fields.long_pathname = gnu_longname; | 
|---|
| 437 | fields.long_linkname = gnu_longlink; | 
|---|
| 438 | fields.pax_extensions = pax_extensions; | 
|---|
| 439 | self.parse_sparse_header(&mut fields)?; | 
|---|
| 440 | return Ok(Some(fields.into_entry())); | 
|---|
| 441 | } | 
|---|
| 442 | } | 
|---|
| 443 |  | 
|---|
| 444 | fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { | 
|---|
| 445 | if !entry.header.entry_type().is_gnu_sparse() { | 
|---|
| 446 | return Ok(()); | 
|---|
| 447 | } | 
|---|
| 448 | let gnu = match entry.header.as_gnu() { | 
|---|
| 449 | Some(gnu) => gnu, | 
|---|
| 450 | None => return Err(other( "sparse entry type listed but not GNU header")), | 
|---|
| 451 | }; | 
|---|
| 452 |  | 
|---|
| 453 | // Sparse files are represented internally as a list of blocks that are | 
|---|
| 454 | // read. Blocks are either a bunch of 0's or they're data from the | 
|---|
| 455 | // underlying archive. | 
|---|
| 456 | // | 
|---|
| 457 | // Blocks of a sparse file are described by the `GnuSparseHeader` | 
|---|
| 458 | // structure, some of which are contained in `GnuHeader` but some of | 
|---|
| 459 | // which may also be contained after the first header in further | 
|---|
| 460 | // headers. | 
|---|
| 461 | // | 
|---|
| 462 | // We read off all the blocks here and use the `add_block` function to | 
|---|
| 463 | // incrementally add them to the list of I/O block (in `entry.data`). | 
|---|
| 464 | // The `add_block` function also validates that each chunk comes after | 
|---|
| 465 | // the previous, we don't overrun the end of the file, and each block is | 
|---|
| 466 | // aligned to a 512-byte boundary in the archive itself. | 
|---|
| 467 | // | 
|---|
| 468 | // At the end we verify that the sparse file size (`Header::size`) is | 
|---|
| 469 | // the same as the current offset (described by the list of blocks) as | 
|---|
| 470 | // well as the amount of data read equals the size of the entry | 
|---|
| 471 | // (`Header::entry_size`). | 
|---|
| 472 | entry.data.truncate(0); | 
|---|
| 473 |  | 
|---|
| 474 | let mut cur = 0; | 
|---|
| 475 | let mut remaining = entry.size; | 
|---|
| 476 | { | 
|---|
| 477 | let data = &mut entry.data; | 
|---|
| 478 | let reader = &self.archive.inner; | 
|---|
| 479 | let size = entry.size; | 
|---|
| 480 | let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { | 
|---|
| 481 | if block.is_empty() { | 
|---|
| 482 | return Ok(()); | 
|---|
| 483 | } | 
|---|
| 484 | let off = block.offset()?; | 
|---|
| 485 | let len = block.length()?; | 
|---|
| 486 | if len != 0 && (size - remaining) % 512 != 0 { | 
|---|
| 487 | return Err(other( | 
|---|
| 488 | "previous block in sparse file was not \ | 
|---|
| 489 |                          aligned to 512-byte boundary", | 
|---|
| 490 | )); | 
|---|
| 491 | } else if off < cur { | 
|---|
| 492 | return Err(other( | 
|---|
| 493 | "out of order or overlapping sparse \ | 
|---|
| 494 |                          blocks", | 
|---|
| 495 | )); | 
|---|
| 496 | } else if cur < off { | 
|---|
| 497 | let block = io::repeat(0).take(off - cur); | 
|---|
| 498 | data.push(EntryIo::Pad(block)); | 
|---|
| 499 | } | 
|---|
| 500 | cur = off | 
|---|
| 501 | .checked_add(len) | 
|---|
| 502 | .ok_or_else(|| other( "more bytes listed in sparse file than u64 can hold"))?; | 
|---|
| 503 | remaining = remaining.checked_sub(len).ok_or_else(|| { | 
|---|
| 504 | other( | 
|---|
| 505 | "sparse file consumed more data than the header \ | 
|---|
| 506 |                          listed", | 
|---|
| 507 | ) | 
|---|
| 508 | })?; | 
|---|
| 509 | data.push(EntryIo::Data(reader.take(len))); | 
|---|
| 510 | Ok(()) | 
|---|
| 511 | }; | 
|---|
| 512 | for block in gnu.sparse.iter() { | 
|---|
| 513 | add_block(block)? | 
|---|
| 514 | } | 
|---|
| 515 | if gnu.is_extended() { | 
|---|
| 516 | let mut ext = GnuExtSparseHeader::new(); | 
|---|
| 517 | ext.isextended[0] = 1; | 
|---|
| 518 | while ext.is_extended() { | 
|---|
| 519 | if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? { | 
|---|
| 520 | return Err(other( "failed to read extension")); | 
|---|
| 521 | } | 
|---|
| 522 |  | 
|---|
| 523 | self.next += 512; | 
|---|
| 524 | for block in ext.sparse.iter() { | 
|---|
| 525 | add_block(block)?; | 
|---|
| 526 | } | 
|---|
| 527 | } | 
|---|
| 528 | } | 
|---|
| 529 | } | 
|---|
| 530 | if cur != gnu.real_size()? { | 
|---|
| 531 | return Err(other( | 
|---|
| 532 | "mismatch in sparse file chunks and \ | 
|---|
| 533 |                  size in header", | 
|---|
| 534 | )); | 
|---|
| 535 | } | 
|---|
| 536 | entry.size = cur; | 
|---|
| 537 | if remaining > 0 { | 
|---|
| 538 | return Err(other( | 
|---|
| 539 | "mismatch in sparse file chunks and \ | 
|---|
| 540 |                  entry size in header", | 
|---|
| 541 | )); | 
|---|
| 542 | } | 
|---|
| 543 | Ok(()) | 
|---|
| 544 | } | 
|---|
| 545 |  | 
|---|
| 546 | fn skip(&mut self, mut amt: u64) -> io::Result<()> { | 
|---|
| 547 | if let Some(seekable_archive) = self.seekable_archive { | 
|---|
| 548 | let pos = io::SeekFrom::Current( | 
|---|
| 549 | i64::try_from(amt).map_err(|_| other( "seek position out of bounds"))?, | 
|---|
| 550 | ); | 
|---|
| 551 | (&seekable_archive.inner).seek(pos)?; | 
|---|
| 552 | } else { | 
|---|
| 553 | let mut buf = [0u8; 4096 * 8]; | 
|---|
| 554 | while amt > 0 { | 
|---|
| 555 | let n = cmp::min(amt, buf.len() as u64); | 
|---|
| 556 | let n = (&self.archive.inner).read(&mut buf[..n as usize])?; | 
|---|
| 557 | if n == 0 { | 
|---|
| 558 | return Err(other( "unexpected EOF during skip")); | 
|---|
| 559 | } | 
|---|
| 560 | amt -= n as u64; | 
|---|
| 561 | } | 
|---|
| 562 | } | 
|---|
| 563 | Ok(()) | 
|---|
| 564 | } | 
|---|
| 565 | } | 
|---|
| 566 |  | 
|---|
| 567 | impl<'a> Iterator for EntriesFields<'a> { | 
|---|
| 568 | type Item = io::Result<Entry<'a, io::Empty>>; | 
|---|
| 569 |  | 
|---|
| 570 | fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> { | 
|---|
| 571 | if self.done { | 
|---|
| 572 | None | 
|---|
| 573 | } else { | 
|---|
| 574 | match self.next_entry() { | 
|---|
| 575 | Ok(Some(e: Entry<'a, Empty>)) => Some(Ok(e)), | 
|---|
| 576 | Ok(None) => { | 
|---|
| 577 | self.done = true; | 
|---|
| 578 | None | 
|---|
| 579 | } | 
|---|
| 580 | Err(e: Error) => { | 
|---|
| 581 | self.done = true; | 
|---|
| 582 | Some(Err(e)) | 
|---|
| 583 | } | 
|---|
| 584 | } | 
|---|
| 585 | } | 
|---|
| 586 | } | 
|---|
| 587 | } | 
|---|
| 588 |  | 
|---|
| 589 | impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> { | 
|---|
| 590 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { | 
|---|
| 591 | let i: usize = self.obj.borrow_mut().read(buf:into)?; | 
|---|
| 592 | self.pos.set(self.pos.get() + i as u64); | 
|---|
| 593 | Ok(i) | 
|---|
| 594 | } | 
|---|
| 595 | } | 
|---|
| 596 |  | 
|---|
| 597 | impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> { | 
|---|
| 598 | fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { | 
|---|
| 599 | let pos: u64 = self.obj.borrow_mut().seek(pos)?; | 
|---|
| 600 | self.pos.set(val:pos); | 
|---|
| 601 | Ok(pos) | 
|---|
| 602 | } | 
|---|
| 603 | } | 
|---|
| 604 |  | 
|---|
| 605 | /// Try to fill the buffer from the reader. | 
|---|
| 606 | /// | 
|---|
| 607 | /// If the reader reaches its end before filling the buffer at all, returns `false`. | 
|---|
| 608 | /// Otherwise returns `true`. | 
|---|
| 609 | fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> { | 
|---|
| 610 | let mut read: usize = 0; | 
|---|
| 611 | while read < buf.len() { | 
|---|
| 612 | match r.read(&mut buf[read..])? { | 
|---|
| 613 | 0 => { | 
|---|
| 614 | if read == 0 { | 
|---|
| 615 | return Ok(false); | 
|---|
| 616 | } | 
|---|
| 617 |  | 
|---|
| 618 | return Err(other(msg: "failed to read entire block")); | 
|---|
| 619 | } | 
|---|
| 620 | n: usize => read += n, | 
|---|
| 621 | } | 
|---|
| 622 | } | 
|---|
| 623 | Ok(true) | 
|---|
| 624 | } | 
|---|
| 625 |  | 
|---|