1use std::cell::{Cell, RefCell};
2use std::cmp;
3use std::convert::TryFrom;
4use std::fs;
5use std::io::prelude::*;
6use std::io::{self, SeekFrom};
7use std::marker;
8use std::path::Path;
9
10use crate::entry::{EntryFields, EntryIo};
11use crate::error::TarError;
12use crate::other;
13use crate::pax::*;
14use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
15
16/// A top-level representation of an archive file.
17///
18/// This archive can have an entry added to it and it can be iterated over.
19pub struct Archive<R: ?Sized + Read> {
20 inner: ArchiveInner<R>,
21}
22
23pub struct ArchiveInner<R: ?Sized> {
24 pos: Cell<u64>,
25 mask: u32,
26 unpack_xattrs: bool,
27 preserve_permissions: bool,
28 preserve_ownerships: bool,
29 preserve_mtime: bool,
30 overwrite: bool,
31 ignore_zeros: bool,
32 obj: RefCell<R>,
33}
34
35/// An iterator over the entries of an archive.
36pub struct Entries<'a, R: 'a + Read> {
37 fields: EntriesFields<'a>,
38 _ignored: marker::PhantomData<&'a Archive<R>>,
39}
40
41trait SeekRead: Read + Seek {}
42impl<R: Read + Seek> SeekRead for R {}
43
44struct EntriesFields<'a> {
45 archive: &'a Archive<dyn Read + 'a>,
46 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
47 next: u64,
48 done: bool,
49 raw: bool,
50}
51
52impl<R: Read> Archive<R> {
53 /// Create a new archive with the underlying object as the reader.
54 pub fn new(obj: R) -> Archive<R> {
55 Archive {
56 inner: ArchiveInner {
57 mask: u32::MIN,
58 unpack_xattrs: false,
59 preserve_permissions: false,
60 preserve_ownerships: false,
61 preserve_mtime: true,
62 overwrite: true,
63 ignore_zeros: false,
64 obj: RefCell::new(obj),
65 pos: Cell::new(0),
66 },
67 }
68 }
69
70 /// Unwrap this archive, returning the underlying object.
71 pub fn into_inner(self) -> R {
72 self.inner.obj.into_inner()
73 }
74
75 /// Construct an iterator over the entries in this archive.
76 ///
77 /// Note that care must be taken to consider each entry within an archive in
78 /// sequence. If entries are processed out of sequence (from what the
79 /// iterator returns), then the contents read for each entry may be
80 /// corrupted.
81 pub fn entries(&mut self) -> io::Result<Entries<R>> {
82 let me: &mut Archive<dyn Read> = self;
83 me._entries(None).map(|fields| Entries {
84 fields: fields,
85 _ignored: marker::PhantomData,
86 })
87 }
88
89 /// Unpacks the contents tarball into the specified `dst`.
90 ///
91 /// This function will iterate over the entire contents of this tarball,
92 /// extracting each file in turn to the location specified by the entry's
93 /// path name.
94 ///
95 /// This operation is relatively sensitive in that it will not write files
96 /// outside of the path specified by `dst`. Files in the archive which have
97 /// a '..' in their path are skipped during the unpacking process.
98 ///
99 /// # Examples
100 ///
101 /// ```no_run
102 /// use std::fs::File;
103 /// use tar::Archive;
104 ///
105 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
106 /// ar.unpack("foo").unwrap();
107 /// ```
108 pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
109 let me: &mut Archive<dyn Read> = self;
110 me._unpack(dst.as_ref())
111 }
112
113 /// Set the mask of the permission bits when unpacking this entry.
114 ///
115 /// The mask will be inverted when applying against a mode, similar to how
116 /// `umask` works on Unix. In logical notation it looks like:
117 ///
118 /// ```text
119 /// new_mode = old_mode & (~mask)
120 /// ```
121 ///
122 /// The mask is 0 by default and is currently only implemented on Unix.
123 pub fn set_mask(&mut self, mask: u32) {
124 self.inner.mask = mask;
125 }
126
127 /// Indicate whether extended file attributes (xattrs on Unix) are preserved
128 /// when unpacking this archive.
129 ///
130 /// This flag is disabled by default and is currently only implemented on
131 /// Unix using xattr support. This may eventually be implemented for
132 /// Windows, however, if other archive implementations are found which do
133 /// this as well.
134 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
135 self.inner.unpack_xattrs = unpack_xattrs;
136 }
137
138 /// Indicate whether extended permissions (like suid on Unix) are preserved
139 /// when unpacking this entry.
140 ///
141 /// This flag is disabled by default and is currently only implemented on
142 /// Unix.
143 pub fn set_preserve_permissions(&mut self, preserve: bool) {
144 self.inner.preserve_permissions = preserve;
145 }
146
147 /// Indicate whether numeric ownership ids (like uid and gid on Unix)
148 /// are preserved when unpacking this entry.
149 ///
150 /// This flag is disabled by default and is currently only implemented on
151 /// Unix.
152 pub fn set_preserve_ownerships(&mut self, preserve: bool) {
153 self.inner.preserve_ownerships = preserve;
154 }
155
156 /// Indicate whether files and symlinks should be overwritten on extraction.
157 pub fn set_overwrite(&mut self, overwrite: bool) {
158 self.inner.overwrite = overwrite;
159 }
160
161 /// Indicate whether access time information is preserved when unpacking
162 /// this entry.
163 ///
164 /// This flag is enabled by default.
165 pub fn set_preserve_mtime(&mut self, preserve: bool) {
166 self.inner.preserve_mtime = preserve;
167 }
168
169 /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
170 /// entries.
171 ///
172 /// This can be used in case multiple tar archives have been concatenated together.
173 pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
174 self.inner.ignore_zeros = ignore_zeros;
175 }
176}
177
178impl<R: Seek + Read> Archive<R> {
179 /// Construct an iterator over the entries in this archive for a seekable
180 /// reader. Seek will be used to efficiently skip over file contents.
181 ///
182 /// Note that care must be taken to consider each entry within an archive in
183 /// sequence. If entries are processed out of sequence (from what the
184 /// iterator returns), then the contents read for each entry may be
185 /// corrupted.
186 pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
187 let me: &Archive<dyn Read> = self;
188 let me_seekable: &Archive<dyn SeekRead> = self;
189 me._entries(Some(me_seekable)).map(|fields: EntriesFields<'_>| Entries {
190 fields: fields,
191 _ignored: marker::PhantomData,
192 })
193 }
194}
195
196impl Archive<dyn Read + '_> {
197 fn _entries<'a>(
198 &'a self,
199 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
200 ) -> io::Result<EntriesFields<'a>> {
201 if self.inner.pos.get() != 0 {
202 return Err(other(
203 "cannot call entries unless archive is at \
204 position 0",
205 ));
206 }
207 Ok(EntriesFields {
208 archive: self,
209 seekable_archive,
210 done: false,
211 next: 0,
212 raw: false,
213 })
214 }
215
216 fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
217 if dst.symlink_metadata().is_err() {
218 fs::create_dir_all(&dst)
219 .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
220 }
221
222 // Canonicalizing the dst directory will prepend the path with '\\?\'
223 // on windows which will allow windows APIs to treat the path as an
224 // extended-length path with a 32,767 character limit. Otherwise all
225 // unpacked paths over 260 characters will fail on creation with a
226 // NotFound exception.
227 let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
228
229 // Delay any directory entries until the end (they will be created if needed by
230 // descendants), to ensure that directory permissions do not interfer with descendant
231 // extraction.
232 let mut directories = Vec::new();
233 for entry in self._entries(None)? {
234 let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
235 if file.header().entry_type() == crate::EntryType::Directory {
236 directories.push(file);
237 } else {
238 file.unpack_in(dst)?;
239 }
240 }
241
242 // Apply the directories.
243 //
244 // Note: the order of application is important to permissions. That is, we must traverse
245 // the filesystem graph in topological ordering or else we risk not being able to create
246 // child directories within those of more restrictive permissions. See [0] for details.
247 //
248 // [0]: <https://github.com/alexcrichton/tar-rs/issues/242>
249 directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes()));
250 for mut dir in directories {
251 dir.unpack_in(dst)?;
252 }
253
254 Ok(())
255 }
256}
257
258impl<'a, R: Read> Entries<'a, R> {
259 /// Indicates whether this iterator will return raw entries or not.
260 ///
261 /// If the raw list of entries are returned, then no preprocessing happens
262 /// on account of this library, for example taking into account GNU long name
263 /// or long link archive members. Raw iteration is disabled by default.
264 pub fn raw(self, raw: bool) -> Entries<'a, R> {
265 Entries {
266 fields: EntriesFields {
267 raw: raw,
268 ..self.fields
269 },
270 _ignored: marker::PhantomData,
271 }
272 }
273}
274impl<'a, R: Read> Iterator for Entries<'a, R> {
275 type Item = io::Result<Entry<'a, R>>;
276
277 fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
278 self.fields
279 .next()
280 .map(|result: Result, …>| result.map(|e: Entry<'a, Empty>| EntryFields::from(entry:e).into_entry()))
281 }
282}
283
284impl<'a> EntriesFields<'a> {
285 fn next_entry_raw(
286 &mut self,
287 pax_extensions: Option<&[u8]>,
288 ) -> io::Result<Option<Entry<'a, io::Empty>>> {
289 let mut header = Header::new_old();
290 let mut header_pos = self.next;
291 loop {
292 // Seek to the start of the next header in the archive
293 let delta = self.next - self.archive.inner.pos.get();
294 self.skip(delta)?;
295
296 // EOF is an indicator that we are at the end of the archive.
297 if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
298 return Ok(None);
299 }
300
301 // If a header is not all zeros, we have another valid header.
302 // Otherwise, check if we are ignoring zeros and continue, or break as if this is the
303 // end of the archive.
304 if !header.as_bytes().iter().all(|i| *i == 0) {
305 self.next += 512;
306 break;
307 }
308
309 if !self.archive.inner.ignore_zeros {
310 return Ok(None);
311 }
312 self.next += 512;
313 header_pos = self.next;
314 }
315
316 // Make sure the checksum is ok
317 let sum = header.as_bytes()[..148]
318 .iter()
319 .chain(&header.as_bytes()[156..])
320 .fold(0, |a, b| a + (*b as u32))
321 + 8 * 32;
322 let cksum = header.cksum()?;
323 if sum != cksum {
324 return Err(other("archive header checksum mismatch"));
325 }
326
327 let mut pax_size: Option<u64> = None;
328 if let Some(pax_extensions_ref) = &pax_extensions {
329 pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
330
331 if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
332 header.set_uid(pax_uid);
333 }
334
335 if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
336 header.set_gid(pax_gid);
337 }
338 }
339
340 let file_pos = self.next;
341 let mut size = header.entry_size()?;
342 if size == 0 {
343 if let Some(pax_size) = pax_size {
344 size = pax_size;
345 }
346 }
347 let ret = EntryFields {
348 size: size,
349 header_pos: header_pos,
350 file_pos: file_pos,
351 data: vec![EntryIo::Data((&self.archive.inner).take(size))],
352 header: header,
353 long_pathname: None,
354 long_linkname: None,
355 pax_extensions: None,
356 mask: self.archive.inner.mask,
357 unpack_xattrs: self.archive.inner.unpack_xattrs,
358 preserve_permissions: self.archive.inner.preserve_permissions,
359 preserve_mtime: self.archive.inner.preserve_mtime,
360 overwrite: self.archive.inner.overwrite,
361 preserve_ownerships: self.archive.inner.preserve_ownerships,
362 };
363
364 // Store where the next entry is, rounding up by 512 bytes (the size of
365 // a header);
366 let size = size
367 .checked_add(511)
368 .ok_or_else(|| other("size overflow"))?;
369 self.next = self
370 .next
371 .checked_add(size & !(512 - 1))
372 .ok_or_else(|| other("size overflow"))?;
373
374 Ok(Some(ret.into_entry()))
375 }
376
377 fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
378 if self.raw {
379 return self.next_entry_raw(None);
380 }
381
382 let mut gnu_longname = None;
383 let mut gnu_longlink = None;
384 let mut pax_extensions = None;
385 let mut processed = 0;
386 loop {
387 processed += 1;
388 let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
389 Some(entry) => entry,
390 None if processed > 1 => {
391 return Err(other(
392 "members found describing a future member \
393 but no future member found",
394 ));
395 }
396 None => return Ok(None),
397 };
398
399 let is_recognized_header =
400 entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
401
402 if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
403 if gnu_longname.is_some() {
404 return Err(other(
405 "two long name entries describing \
406 the same member",
407 ));
408 }
409 gnu_longname = Some(EntryFields::from(entry).read_all()?);
410 continue;
411 }
412
413 if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
414 if gnu_longlink.is_some() {
415 return Err(other(
416 "two long name entries describing \
417 the same member",
418 ));
419 }
420 gnu_longlink = Some(EntryFields::from(entry).read_all()?);
421 continue;
422 }
423
424 if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
425 if pax_extensions.is_some() {
426 return Err(other(
427 "two pax extensions entries describing \
428 the same member",
429 ));
430 }
431 pax_extensions = Some(EntryFields::from(entry).read_all()?);
432 continue;
433 }
434
435 let mut fields = EntryFields::from(entry);
436 fields.long_pathname = gnu_longname;
437 fields.long_linkname = gnu_longlink;
438 fields.pax_extensions = pax_extensions;
439 self.parse_sparse_header(&mut fields)?;
440 return Ok(Some(fields.into_entry()));
441 }
442 }
443
444 fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
445 if !entry.header.entry_type().is_gnu_sparse() {
446 return Ok(());
447 }
448 let gnu = match entry.header.as_gnu() {
449 Some(gnu) => gnu,
450 None => return Err(other("sparse entry type listed but not GNU header")),
451 };
452
453 // Sparse files are represented internally as a list of blocks that are
454 // read. Blocks are either a bunch of 0's or they're data from the
455 // underlying archive.
456 //
457 // Blocks of a sparse file are described by the `GnuSparseHeader`
458 // structure, some of which are contained in `GnuHeader` but some of
459 // which may also be contained after the first header in further
460 // headers.
461 //
462 // We read off all the blocks here and use the `add_block` function to
463 // incrementally add them to the list of I/O block (in `entry.data`).
464 // The `add_block` function also validates that each chunk comes after
465 // the previous, we don't overrun the end of the file, and each block is
466 // aligned to a 512-byte boundary in the archive itself.
467 //
468 // At the end we verify that the sparse file size (`Header::size`) is
469 // the same as the current offset (described by the list of blocks) as
470 // well as the amount of data read equals the size of the entry
471 // (`Header::entry_size`).
472 entry.data.truncate(0);
473
474 let mut cur = 0;
475 let mut remaining = entry.size;
476 {
477 let data = &mut entry.data;
478 let reader = &self.archive.inner;
479 let size = entry.size;
480 let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
481 if block.is_empty() {
482 return Ok(());
483 }
484 let off = block.offset()?;
485 let len = block.length()?;
486 if len != 0 && (size - remaining) % 512 != 0 {
487 return Err(other(
488 "previous block in sparse file was not \
489 aligned to 512-byte boundary",
490 ));
491 } else if off < cur {
492 return Err(other(
493 "out of order or overlapping sparse \
494 blocks",
495 ));
496 } else if cur < off {
497 let block = io::repeat(0).take(off - cur);
498 data.push(EntryIo::Pad(block));
499 }
500 cur = off
501 .checked_add(len)
502 .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
503 remaining = remaining.checked_sub(len).ok_or_else(|| {
504 other(
505 "sparse file consumed more data than the header \
506 listed",
507 )
508 })?;
509 data.push(EntryIo::Data(reader.take(len)));
510 Ok(())
511 };
512 for block in gnu.sparse.iter() {
513 add_block(block)?
514 }
515 if gnu.is_extended() {
516 let mut ext = GnuExtSparseHeader::new();
517 ext.isextended[0] = 1;
518 while ext.is_extended() {
519 if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
520 return Err(other("failed to read extension"));
521 }
522
523 self.next += 512;
524 for block in ext.sparse.iter() {
525 add_block(block)?;
526 }
527 }
528 }
529 }
530 if cur != gnu.real_size()? {
531 return Err(other(
532 "mismatch in sparse file chunks and \
533 size in header",
534 ));
535 }
536 entry.size = cur;
537 if remaining > 0 {
538 return Err(other(
539 "mismatch in sparse file chunks and \
540 entry size in header",
541 ));
542 }
543 Ok(())
544 }
545
546 fn skip(&mut self, mut amt: u64) -> io::Result<()> {
547 if let Some(seekable_archive) = self.seekable_archive {
548 let pos = io::SeekFrom::Current(
549 i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
550 );
551 (&seekable_archive.inner).seek(pos)?;
552 } else {
553 let mut buf = [0u8; 4096 * 8];
554 while amt > 0 {
555 let n = cmp::min(amt, buf.len() as u64);
556 let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
557 if n == 0 {
558 return Err(other("unexpected EOF during skip"));
559 }
560 amt -= n as u64;
561 }
562 }
563 Ok(())
564 }
565}
566
567impl<'a> Iterator for EntriesFields<'a> {
568 type Item = io::Result<Entry<'a, io::Empty>>;
569
570 fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
571 if self.done {
572 None
573 } else {
574 match self.next_entry() {
575 Ok(Some(e: Entry<'a, Empty>)) => Some(Ok(e)),
576 Ok(None) => {
577 self.done = true;
578 None
579 }
580 Err(e: Error) => {
581 self.done = true;
582 Some(Err(e))
583 }
584 }
585 }
586 }
587}
588
589impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
590 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
591 let i: usize = self.obj.borrow_mut().read(buf:into)?;
592 self.pos.set(self.pos.get() + i as u64);
593 Ok(i)
594 }
595}
596
597impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
598 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
599 let pos: u64 = self.obj.borrow_mut().seek(pos)?;
600 self.pos.set(val:pos);
601 Ok(pos)
602 }
603}
604
605/// Try to fill the buffer from the reader.
606///
607/// If the reader reaches its end before filling the buffer at all, returns `false`.
608/// Otherwise returns `true`.
609fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
610 let mut read: usize = 0;
611 while read < buf.len() {
612 match r.read(&mut buf[read..])? {
613 0 => {
614 if read == 0 {
615 return Ok(false);
616 }
617
618 return Err(other(msg:"failed to read entire block"));
619 }
620 n: usize => read += n,
621 }
622 }
623 Ok(true)
624}
625