1use std::cell::{Cell, RefCell};
2use std::cmp;
3use std::convert::TryFrom;
4use std::fs;
5use std::io::prelude::*;
6use std::io::{self, SeekFrom};
7use std::marker;
8use std::path::Path;
9
10use crate::entry::{EntryFields, EntryIo};
11use crate::error::TarError;
12use crate::header::BLOCK_SIZE;
13use crate::other;
14use crate::pax::*;
15use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
16
17/// A top-level representation of an archive file.
18///
19/// This archive can have an entry added to it and it can be iterated over.
20pub struct Archive<R: ?Sized + Read> {
21 inner: ArchiveInner<R>,
22}
23
24pub struct ArchiveInner<R: ?Sized> {
25 pos: Cell<u64>,
26 mask: u32,
27 unpack_xattrs: bool,
28 preserve_permissions: bool,
29 preserve_ownerships: bool,
30 preserve_mtime: bool,
31 overwrite: bool,
32 ignore_zeros: bool,
33 obj: RefCell<R>,
34}
35
36/// An iterator over the entries of an archive.
37pub struct Entries<'a, R: 'a + Read> {
38 fields: EntriesFields<'a>,
39 _ignored: marker::PhantomData<&'a Archive<R>>,
40}
41
42trait SeekRead: Read + Seek {}
43impl<R: Read + Seek> SeekRead for R {}
44
45struct EntriesFields<'a> {
46 archive: &'a Archive<dyn Read + 'a>,
47 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
48 next: u64,
49 done: bool,
50 raw: bool,
51}
52
53impl<R: Read> Archive<R> {
54 /// Create a new archive with the underlying object as the reader.
55 pub fn new(obj: R) -> Archive<R> {
56 Archive {
57 inner: ArchiveInner {
58 mask: u32::MIN,
59 unpack_xattrs: false,
60 preserve_permissions: false,
61 preserve_ownerships: false,
62 preserve_mtime: true,
63 overwrite: true,
64 ignore_zeros: false,
65 obj: RefCell::new(obj),
66 pos: Cell::new(0),
67 },
68 }
69 }
70
71 /// Unwrap this archive, returning the underlying object.
72 pub fn into_inner(self) -> R {
73 self.inner.obj.into_inner()
74 }
75
76 /// Construct an iterator over the entries in this archive.
77 ///
78 /// Note that care must be taken to consider each entry within an archive in
79 /// sequence. If entries are processed out of sequence (from what the
80 /// iterator returns), then the contents read for each entry may be
81 /// corrupted.
82 pub fn entries(&mut self) -> io::Result<Entries<R>> {
83 let me: &mut Archive<dyn Read> = self;
84 me._entries(None).map(|fields| Entries {
85 fields: fields,
86 _ignored: marker::PhantomData,
87 })
88 }
89
90 /// Unpacks the contents tarball into the specified `dst`.
91 ///
92 /// This function will iterate over the entire contents of this tarball,
93 /// extracting each file in turn to the location specified by the entry's
94 /// path name.
95 ///
96 /// This operation is relatively sensitive in that it will not write files
97 /// outside of the path specified by `dst`. Files in the archive which have
98 /// a '..' in their path are skipped during the unpacking process.
99 ///
100 /// # Examples
101 ///
102 /// ```no_run
103 /// use std::fs::File;
104 /// use tar::Archive;
105 ///
106 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
107 /// ar.unpack("foo").unwrap();
108 /// ```
109 pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
110 let me: &mut Archive<dyn Read> = self;
111 me._unpack(dst.as_ref())
112 }
113
114 /// Set the mask of the permission bits when unpacking this entry.
115 ///
116 /// The mask will be inverted when applying against a mode, similar to how
117 /// `umask` works on Unix. In logical notation it looks like:
118 ///
119 /// ```text
120 /// new_mode = old_mode & (~mask)
121 /// ```
122 ///
123 /// The mask is 0 by default and is currently only implemented on Unix.
124 pub fn set_mask(&mut self, mask: u32) {
125 self.inner.mask = mask;
126 }
127
128 /// Indicate whether extended file attributes (xattrs on Unix) are preserved
129 /// when unpacking this archive.
130 ///
131 /// This flag is disabled by default and is currently only implemented on
132 /// Unix using xattr support. This may eventually be implemented for
133 /// Windows, however, if other archive implementations are found which do
134 /// this as well.
135 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
136 self.inner.unpack_xattrs = unpack_xattrs;
137 }
138
139 /// Indicate whether extended permissions (like suid on Unix) are preserved
140 /// when unpacking this entry.
141 ///
142 /// This flag is disabled by default and is currently only implemented on
143 /// Unix.
144 pub fn set_preserve_permissions(&mut self, preserve: bool) {
145 self.inner.preserve_permissions = preserve;
146 }
147
148 /// Indicate whether numeric ownership ids (like uid and gid on Unix)
149 /// are preserved when unpacking this entry.
150 ///
151 /// This flag is disabled by default and is currently only implemented on
152 /// Unix.
153 pub fn set_preserve_ownerships(&mut self, preserve: bool) {
154 self.inner.preserve_ownerships = preserve;
155 }
156
157 /// Indicate whether files and symlinks should be overwritten on extraction.
158 pub fn set_overwrite(&mut self, overwrite: bool) {
159 self.inner.overwrite = overwrite;
160 }
161
162 /// Indicate whether access time information is preserved when unpacking
163 /// this entry.
164 ///
165 /// This flag is enabled by default.
166 pub fn set_preserve_mtime(&mut self, preserve: bool) {
167 self.inner.preserve_mtime = preserve;
168 }
169
170 /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
171 /// entries.
172 ///
173 /// This can be used in case multiple tar archives have been concatenated together.
174 pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
175 self.inner.ignore_zeros = ignore_zeros;
176 }
177}
178
179impl<R: Seek + Read> Archive<R> {
180 /// Construct an iterator over the entries in this archive for a seekable
181 /// reader. Seek will be used to efficiently skip over file contents.
182 ///
183 /// Note that care must be taken to consider each entry within an archive in
184 /// sequence. If entries are processed out of sequence (from what the
185 /// iterator returns), then the contents read for each entry may be
186 /// corrupted.
187 pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
188 let me: &Archive<dyn Read> = self;
189 let me_seekable: &Archive<dyn SeekRead> = self;
190 me._entries(Some(me_seekable)).map(|fields: EntriesFields<'_>| Entries {
191 fields: fields,
192 _ignored: marker::PhantomData,
193 })
194 }
195}
196
197impl Archive<dyn Read + '_> {
198 fn _entries<'a>(
199 &'a self,
200 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
201 ) -> io::Result<EntriesFields<'a>> {
202 if self.inner.pos.get() != 0 {
203 return Err(other(
204 "cannot call entries unless archive is at \
205 position 0",
206 ));
207 }
208 Ok(EntriesFields {
209 archive: self,
210 seekable_archive,
211 done: false,
212 next: 0,
213 raw: false,
214 })
215 }
216
217 fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
218 if dst.symlink_metadata().is_err() {
219 fs::create_dir_all(&dst)
220 .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
221 }
222
223 // Canonicalizing the dst directory will prepend the path with '\\?\'
224 // on windows which will allow windows APIs to treat the path as an
225 // extended-length path with a 32,767 character limit. Otherwise all
226 // unpacked paths over 260 characters will fail on creation with a
227 // NotFound exception.
228 let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
229
230 // Delay any directory entries until the end (they will be created if needed by
231 // descendants), to ensure that directory permissions do not interfer with descendant
232 // extraction.
233 let mut directories = Vec::new();
234 for entry in self._entries(None)? {
235 let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
236 if file.header().entry_type() == crate::EntryType::Directory {
237 directories.push(file);
238 } else {
239 file.unpack_in(dst)?;
240 }
241 }
242
243 // Apply the directories.
244 //
245 // Note: the order of application is important to permissions. That is, we must traverse
246 // the filesystem graph in topological ordering or else we risk not being able to create
247 // child directories within those of more restrictive permissions. See [0] for details.
248 //
249 // [0]: <https://github.com/alexcrichton/tar-rs/issues/242>
250 directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes()));
251 for mut dir in directories {
252 dir.unpack_in(dst)?;
253 }
254
255 Ok(())
256 }
257}
258
259impl<'a, R: Read> Entries<'a, R> {
260 /// Indicates whether this iterator will return raw entries or not.
261 ///
262 /// If the raw list of entries is returned, then no preprocessing happens
263 /// on account of this library, for example taking into account GNU long name
264 /// or long link archive members. Raw iteration is disabled by default.
265 pub fn raw(self, raw: bool) -> Entries<'a, R> {
266 Entries {
267 fields: EntriesFields {
268 raw: raw,
269 ..self.fields
270 },
271 _ignored: marker::PhantomData,
272 }
273 }
274}
275impl<'a, R: Read> Iterator for Entries<'a, R> {
276 type Item = io::Result<Entry<'a, R>>;
277
278 fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
279 self.fields
280 .next()
281 .map(|result: Result, …>| result.map(|e: Entry<'a, Empty>| EntryFields::from(entry:e).into_entry()))
282 }
283}
284
285impl<'a> EntriesFields<'a> {
286 fn next_entry_raw(
287 &mut self,
288 pax_extensions: Option<&[u8]>,
289 ) -> io::Result<Option<Entry<'a, io::Empty>>> {
290 let mut header = Header::new_old();
291 let mut header_pos = self.next;
292 loop {
293 // Seek to the start of the next header in the archive
294 let delta = self.next - self.archive.inner.pos.get();
295 self.skip(delta)?;
296
297 // EOF is an indicator that we are at the end of the archive.
298 if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
299 return Ok(None);
300 }
301
302 // If a header is not all zeros, we have another valid header.
303 // Otherwise, check if we are ignoring zeros and continue, or break as if this is the
304 // end of the archive.
305 if !header.as_bytes().iter().all(|i| *i == 0) {
306 self.next += BLOCK_SIZE;
307 break;
308 }
309
310 if !self.archive.inner.ignore_zeros {
311 return Ok(None);
312 }
313 self.next += BLOCK_SIZE;
314 header_pos = self.next;
315 }
316
317 // Make sure the checksum is ok
318 let sum = header.as_bytes()[..148]
319 .iter()
320 .chain(&header.as_bytes()[156..])
321 .fold(0, |a, b| a + (*b as u32))
322 + 8 * 32;
323 let cksum = header.cksum()?;
324 if sum != cksum {
325 return Err(other("archive header checksum mismatch"));
326 }
327
328 let mut pax_size: Option<u64> = None;
329 if let Some(pax_extensions_ref) = &pax_extensions {
330 pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
331
332 if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
333 header.set_uid(pax_uid);
334 }
335
336 if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
337 header.set_gid(pax_gid);
338 }
339 }
340
341 let file_pos = self.next;
342 let mut size = header.entry_size()?;
343 if size == 0 {
344 if let Some(pax_size) = pax_size {
345 size = pax_size;
346 }
347 }
348 let ret = EntryFields {
349 size: size,
350 header_pos: header_pos,
351 file_pos: file_pos,
352 data: vec![EntryIo::Data((&self.archive.inner).take(size))],
353 header: header,
354 long_pathname: None,
355 long_linkname: None,
356 pax_extensions: None,
357 mask: self.archive.inner.mask,
358 unpack_xattrs: self.archive.inner.unpack_xattrs,
359 preserve_permissions: self.archive.inner.preserve_permissions,
360 preserve_mtime: self.archive.inner.preserve_mtime,
361 overwrite: self.archive.inner.overwrite,
362 preserve_ownerships: self.archive.inner.preserve_ownerships,
363 };
364
365 // Store where the next entry is, rounding up by 512 bytes (the size of
366 // a header);
367 let size = size
368 .checked_add(BLOCK_SIZE - 1)
369 .ok_or_else(|| other("size overflow"))?;
370 self.next = self
371 .next
372 .checked_add(size & !(BLOCK_SIZE - 1))
373 .ok_or_else(|| other("size overflow"))?;
374
375 Ok(Some(ret.into_entry()))
376 }
377
378 fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
379 if self.raw {
380 return self.next_entry_raw(None);
381 }
382
383 let mut gnu_longname = None;
384 let mut gnu_longlink = None;
385 let mut pax_extensions = None;
386 let mut processed = 0;
387 loop {
388 processed += 1;
389 let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
390 Some(entry) => entry,
391 None if processed > 1 => {
392 return Err(other(
393 "members found describing a future member \
394 but no future member found",
395 ));
396 }
397 None => return Ok(None),
398 };
399
400 let is_recognized_header =
401 entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
402
403 if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
404 if gnu_longname.is_some() {
405 return Err(other(
406 "two long name entries describing \
407 the same member",
408 ));
409 }
410 gnu_longname = Some(EntryFields::from(entry).read_all()?);
411 continue;
412 }
413
414 if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
415 if gnu_longlink.is_some() {
416 return Err(other(
417 "two long name entries describing \
418 the same member",
419 ));
420 }
421 gnu_longlink = Some(EntryFields::from(entry).read_all()?);
422 continue;
423 }
424
425 if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
426 if pax_extensions.is_some() {
427 return Err(other(
428 "two pax extensions entries describing \
429 the same member",
430 ));
431 }
432 pax_extensions = Some(EntryFields::from(entry).read_all()?);
433 continue;
434 }
435
436 let mut fields = EntryFields::from(entry);
437 fields.long_pathname = gnu_longname;
438 fields.long_linkname = gnu_longlink;
439 fields.pax_extensions = pax_extensions;
440 self.parse_sparse_header(&mut fields)?;
441 return Ok(Some(fields.into_entry()));
442 }
443 }
444
445 fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
446 if !entry.header.entry_type().is_gnu_sparse() {
447 return Ok(());
448 }
449 let gnu = match entry.header.as_gnu() {
450 Some(gnu) => gnu,
451 None => return Err(other("sparse entry type listed but not GNU header")),
452 };
453
454 // Sparse files are represented internally as a list of blocks that are
455 // read. Blocks are either a bunch of 0's or they're data from the
456 // underlying archive.
457 //
458 // Blocks of a sparse file are described by the `GnuSparseHeader`
459 // structure, some of which are contained in `GnuHeader` but some of
460 // which may also be contained after the first header in further
461 // headers.
462 //
463 // We read off all the blocks here and use the `add_block` function to
464 // incrementally add them to the list of I/O block (in `entry.data`).
465 // The `add_block` function also validates that each chunk comes after
466 // the previous, we don't overrun the end of the file, and each block is
467 // aligned to a 512-byte boundary in the archive itself.
468 //
469 // At the end we verify that the sparse file size (`Header::size`) is
470 // the same as the current offset (described by the list of blocks) as
471 // well as the amount of data read equals the size of the entry
472 // (`Header::entry_size`).
473 entry.data.truncate(0);
474
475 let mut cur = 0;
476 let mut remaining = entry.size;
477 {
478 let data = &mut entry.data;
479 let reader = &self.archive.inner;
480 let size = entry.size;
481 let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
482 if block.is_empty() {
483 return Ok(());
484 }
485 let off = block.offset()?;
486 let len = block.length()?;
487 if len != 0 && (size - remaining) % BLOCK_SIZE != 0 {
488 return Err(other(
489 "previous block in sparse file was not \
490 aligned to 512-byte boundary",
491 ));
492 } else if off < cur {
493 return Err(other(
494 "out of order or overlapping sparse \
495 blocks",
496 ));
497 } else if cur < off {
498 let block = io::repeat(0).take(off - cur);
499 data.push(EntryIo::Pad(block));
500 }
501 cur = off
502 .checked_add(len)
503 .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
504 remaining = remaining.checked_sub(len).ok_or_else(|| {
505 other(
506 "sparse file consumed more data than the header \
507 listed",
508 )
509 })?;
510 data.push(EntryIo::Data(reader.take(len)));
511 Ok(())
512 };
513 for block in gnu.sparse.iter() {
514 add_block(block)?
515 }
516 if gnu.is_extended() {
517 let mut ext = GnuExtSparseHeader::new();
518 ext.isextended[0] = 1;
519 while ext.is_extended() {
520 if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
521 return Err(other("failed to read extension"));
522 }
523
524 self.next += BLOCK_SIZE;
525 for block in ext.sparse.iter() {
526 add_block(block)?;
527 }
528 }
529 }
530 }
531 if cur != gnu.real_size()? {
532 return Err(other(
533 "mismatch in sparse file chunks and \
534 size in header",
535 ));
536 }
537 entry.size = cur;
538 if remaining > 0 {
539 return Err(other(
540 "mismatch in sparse file chunks and \
541 entry size in header",
542 ));
543 }
544 Ok(())
545 }
546
547 fn skip(&mut self, mut amt: u64) -> io::Result<()> {
548 if let Some(seekable_archive) = self.seekable_archive {
549 let pos = io::SeekFrom::Current(
550 i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
551 );
552 (&seekable_archive.inner).seek(pos)?;
553 } else {
554 let mut buf = [0u8; 4096 * 8];
555 while amt > 0 {
556 let n = cmp::min(amt, buf.len() as u64);
557 let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
558 if n == 0 {
559 return Err(other("unexpected EOF during skip"));
560 }
561 amt -= n as u64;
562 }
563 }
564 Ok(())
565 }
566}
567
568impl<'a> Iterator for EntriesFields<'a> {
569 type Item = io::Result<Entry<'a, io::Empty>>;
570
571 fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
572 if self.done {
573 None
574 } else {
575 match self.next_entry() {
576 Ok(Some(e: Entry<'a, Empty>)) => Some(Ok(e)),
577 Ok(None) => {
578 self.done = true;
579 None
580 }
581 Err(e: Error) => {
582 self.done = true;
583 Some(Err(e))
584 }
585 }
586 }
587 }
588}
589
590impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
591 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
592 let i: usize = self.obj.borrow_mut().read(buf:into)?;
593 self.pos.set(self.pos.get() + i as u64);
594 Ok(i)
595 }
596}
597
598impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
599 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
600 let pos: u64 = self.obj.borrow_mut().seek(pos)?;
601 self.pos.set(val:pos);
602 Ok(pos)
603 }
604}
605
606/// Try to fill the buffer from the reader.
607///
608/// If the reader reaches its end before filling the buffer at all, returns `false`.
609/// Otherwise returns `true`.
610fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
611 let mut read: usize = 0;
612 while read < buf.len() {
613 match r.read(&mut buf[read..])? {
614 0 => {
615 if read == 0 {
616 return Ok(false);
617 }
618
619 return Err(other(msg:"failed to read entire block"));
620 }
621 n: usize => read += n,
622 }
623 }
624 Ok(true)
625}
626