1use std::cell::{Cell, RefCell};
2use std::cmp;
3use std::convert::TryFrom;
4use std::fs;
5use std::io::prelude::*;
6use std::io::{self, SeekFrom};
7use std::marker;
8use std::path::Path;
9
10use crate::entry::{EntryFields, EntryIo};
11use crate::error::TarError;
12use crate::other;
13use crate::pax::pax_extensions_size;
14use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
15
16/// A top-level representation of an archive file.
17///
18/// This archive can have an entry added to it and it can be iterated over.
19pub struct Archive<R: ?Sized + Read> {
20 inner: ArchiveInner<R>,
21}
22
23pub struct ArchiveInner<R: ?Sized> {
24 pos: Cell<u64>,
25 unpack_xattrs: bool,
26 preserve_permissions: bool,
27 preserve_mtime: bool,
28 overwrite: bool,
29 ignore_zeros: bool,
30 obj: RefCell<R>,
31}
32
33/// An iterator over the entries of an archive.
34pub struct Entries<'a, R: 'a + Read> {
35 fields: EntriesFields<'a>,
36 _ignored: marker::PhantomData<&'a Archive<R>>,
37}
38
39trait SeekRead: Read + Seek {}
40impl<R: Read + Seek> SeekRead for R {}
41
42struct EntriesFields<'a> {
43 archive: &'a Archive<dyn Read + 'a>,
44 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
45 next: u64,
46 done: bool,
47 raw: bool,
48}
49
50impl<R: Read> Archive<R> {
51 /// Create a new archive with the underlying object as the reader.
52 pub fn new(obj: R) -> Archive<R> {
53 Archive {
54 inner: ArchiveInner {
55 unpack_xattrs: false,
56 preserve_permissions: false,
57 preserve_mtime: true,
58 overwrite: true,
59 ignore_zeros: false,
60 obj: RefCell::new(obj),
61 pos: Cell::new(0),
62 },
63 }
64 }
65
66 /// Unwrap this archive, returning the underlying object.
67 pub fn into_inner(self) -> R {
68 self.inner.obj.into_inner()
69 }
70
71 /// Construct an iterator over the entries in this archive.
72 ///
73 /// Note that care must be taken to consider each entry within an archive in
74 /// sequence. If entries are processed out of sequence (from what the
75 /// iterator returns), then the contents read for each entry may be
76 /// corrupted.
77 pub fn entries(&mut self) -> io::Result<Entries<R>> {
78 let me: &mut Archive<dyn Read> = self;
79 me._entries(None).map(|fields| Entries {
80 fields: fields,
81 _ignored: marker::PhantomData,
82 })
83 }
84
85 /// Unpacks the contents tarball into the specified `dst`.
86 ///
87 /// This function will iterate over the entire contents of this tarball,
88 /// extracting each file in turn to the location specified by the entry's
89 /// path name.
90 ///
91 /// This operation is relatively sensitive in that it will not write files
92 /// outside of the path specified by `dst`. Files in the archive which have
93 /// a '..' in their path are skipped during the unpacking process.
94 ///
95 /// # Examples
96 ///
97 /// ```no_run
98 /// use std::fs::File;
99 /// use tar::Archive;
100 ///
101 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
102 /// ar.unpack("foo").unwrap();
103 /// ```
104 pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
105 let me: &mut Archive<dyn Read> = self;
106 me._unpack(dst.as_ref())
107 }
108
109 /// Indicate whether extended file attributes (xattrs on Unix) are preserved
110 /// when unpacking this archive.
111 ///
112 /// This flag is disabled by default and is currently only implemented on
113 /// Unix using xattr support. This may eventually be implemented for
114 /// Windows, however, if other archive implementations are found which do
115 /// this as well.
116 pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
117 self.inner.unpack_xattrs = unpack_xattrs;
118 }
119
120 /// Indicate whether extended permissions (like suid on Unix) are preserved
121 /// when unpacking this entry.
122 ///
123 /// This flag is disabled by default and is currently only implemented on
124 /// Unix.
125 pub fn set_preserve_permissions(&mut self, preserve: bool) {
126 self.inner.preserve_permissions = preserve;
127 }
128
129 /// Indicate whether files and symlinks should be overwritten on extraction.
130 pub fn set_overwrite(&mut self, overwrite: bool) {
131 self.inner.overwrite = overwrite;
132 }
133
134 /// Indicate whether access time information is preserved when unpacking
135 /// this entry.
136 ///
137 /// This flag is enabled by default.
138 pub fn set_preserve_mtime(&mut self, preserve: bool) {
139 self.inner.preserve_mtime = preserve;
140 }
141
142 /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
143 /// entries.
144 ///
145 /// This can be used in case multiple tar archives have been concatenated together.
146 pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
147 self.inner.ignore_zeros = ignore_zeros;
148 }
149}
150
151impl<R: Seek + Read> Archive<R> {
152 /// Construct an iterator over the entries in this archive for a seekable
153 /// reader. Seek will be used to efficiently skip over file contents.
154 ///
155 /// Note that care must be taken to consider each entry within an archive in
156 /// sequence. If entries are processed out of sequence (from what the
157 /// iterator returns), then the contents read for each entry may be
158 /// corrupted.
159 pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
160 let me: &Archive<dyn Read> = self;
161 let me_seekable: &Archive<dyn SeekRead> = self;
162 me._entries(Some(me_seekable)).map(|fields: EntriesFields<'_>| Entries {
163 fields: fields,
164 _ignored: marker::PhantomData,
165 })
166 }
167}
168
169impl Archive<dyn Read + '_> {
170 fn _entries<'a>(
171 &'a self,
172 seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
173 ) -> io::Result<EntriesFields<'a>> {
174 if self.inner.pos.get() != 0 {
175 return Err(other(
176 "cannot call entries unless archive is at \
177 position 0",
178 ));
179 }
180 Ok(EntriesFields {
181 archive: self,
182 seekable_archive,
183 done: false,
184 next: 0,
185 raw: false,
186 })
187 }
188
189 fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
190 if dst.symlink_metadata().is_err() {
191 fs::create_dir_all(&dst)
192 .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
193 }
194
195 // Canonicalizing the dst directory will prepend the path with '\\?\'
196 // on windows which will allow windows APIs to treat the path as an
197 // extended-length path with a 32,767 character limit. Otherwise all
198 // unpacked paths over 260 characters will fail on creation with a
199 // NotFound exception.
200 let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
201
202 // Delay any directory entries until the end (they will be created if needed by
203 // descendants), to ensure that directory permissions do not interfer with descendant
204 // extraction.
205 let mut directories = Vec::new();
206 for entry in self._entries(None)? {
207 let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
208 if file.header().entry_type() == crate::EntryType::Directory {
209 directories.push(file);
210 } else {
211 file.unpack_in(dst)?;
212 }
213 }
214 for mut dir in directories {
215 dir.unpack_in(dst)?;
216 }
217
218 Ok(())
219 }
220}
221
222impl<'a, R: Read> Entries<'a, R> {
223 /// Indicates whether this iterator will return raw entries or not.
224 ///
225 /// If the raw list of entries are returned, then no preprocessing happens
226 /// on account of this library, for example taking into account GNU long name
227 /// or long link archive members. Raw iteration is disabled by default.
228 pub fn raw(self, raw: bool) -> Entries<'a, R> {
229 Entries {
230 fields: EntriesFields {
231 raw: raw,
232 ..self.fields
233 },
234 _ignored: marker::PhantomData,
235 }
236 }
237}
238impl<'a, R: Read> Iterator for Entries<'a, R> {
239 type Item = io::Result<Entry<'a, R>>;
240
241 fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
242 self.fields
243 .next()
244 .map(|result: Result, …>| result.map(|e: Entry<'_, Empty>| EntryFields::from(entry:e).into_entry()))
245 }
246}
247
248impl<'a> EntriesFields<'a> {
249 fn next_entry_raw(
250 &mut self,
251 pax_size: Option<u64>,
252 ) -> io::Result<Option<Entry<'a, io::Empty>>> {
253 let mut header = Header::new_old();
254 let mut header_pos = self.next;
255 loop {
256 // Seek to the start of the next header in the archive
257 let delta = self.next - self.archive.inner.pos.get();
258 self.skip(delta)?;
259
260 // EOF is an indicator that we are at the end of the archive.
261 if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
262 return Ok(None);
263 }
264
265 // If a header is not all zeros, we have another valid header.
266 // Otherwise, check if we are ignoring zeros and continue, or break as if this is the
267 // end of the archive.
268 if !header.as_bytes().iter().all(|i| *i == 0) {
269 self.next += 512;
270 break;
271 }
272
273 if !self.archive.inner.ignore_zeros {
274 return Ok(None);
275 }
276 self.next += 512;
277 header_pos = self.next;
278 }
279
280 // Make sure the checksum is ok
281 let sum = header.as_bytes()[..148]
282 .iter()
283 .chain(&header.as_bytes()[156..])
284 .fold(0, |a, b| a + (*b as u32))
285 + 8 * 32;
286 let cksum = header.cksum()?;
287 if sum != cksum {
288 return Err(other("archive header checksum mismatch"));
289 }
290
291 let file_pos = self.next;
292 let mut size = header.entry_size()?;
293 if size == 0 {
294 if let Some(pax_size) = pax_size {
295 size = pax_size;
296 }
297 }
298 let ret = EntryFields {
299 size: size,
300 header_pos: header_pos,
301 file_pos: file_pos,
302 data: vec![EntryIo::Data((&self.archive.inner).take(size))],
303 header: header,
304 long_pathname: None,
305 long_linkname: None,
306 pax_extensions: None,
307 unpack_xattrs: self.archive.inner.unpack_xattrs,
308 preserve_permissions: self.archive.inner.preserve_permissions,
309 preserve_mtime: self.archive.inner.preserve_mtime,
310 overwrite: self.archive.inner.overwrite,
311 };
312
313 // Store where the next entry is, rounding up by 512 bytes (the size of
314 // a header);
315 let size = size
316 .checked_add(511)
317 .ok_or_else(|| other("size overflow"))?;
318 self.next = self
319 .next
320 .checked_add(size & !(512 - 1))
321 .ok_or_else(|| other("size overflow"))?;
322
323 Ok(Some(ret.into_entry()))
324 }
325
326 fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
327 if self.raw {
328 return self.next_entry_raw(None);
329 }
330
331 let mut gnu_longname = None;
332 let mut gnu_longlink = None;
333 let mut pax_extensions = None;
334 let mut pax_size = None;
335 let mut processed = 0;
336 loop {
337 processed += 1;
338 let entry = match self.next_entry_raw(pax_size)? {
339 Some(entry) => entry,
340 None if processed > 1 => {
341 return Err(other(
342 "members found describing a future member \
343 but no future member found",
344 ));
345 }
346 None => return Ok(None),
347 };
348
349 let is_recognized_header =
350 entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
351
352 if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
353 if gnu_longname.is_some() {
354 return Err(other(
355 "two long name entries describing \
356 the same member",
357 ));
358 }
359 gnu_longname = Some(EntryFields::from(entry).read_all()?);
360 continue;
361 }
362
363 if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
364 if gnu_longlink.is_some() {
365 return Err(other(
366 "two long name entries describing \
367 the same member",
368 ));
369 }
370 gnu_longlink = Some(EntryFields::from(entry).read_all()?);
371 continue;
372 }
373
374 if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
375 if pax_extensions.is_some() {
376 return Err(other(
377 "two pax extensions entries describing \
378 the same member",
379 ));
380 }
381 pax_extensions = Some(EntryFields::from(entry).read_all()?);
382 if let Some(pax_extensions_ref) = &pax_extensions {
383 pax_size = pax_extensions_size(pax_extensions_ref);
384 }
385 continue;
386 }
387
388 let mut fields = EntryFields::from(entry);
389 fields.long_pathname = gnu_longname;
390 fields.long_linkname = gnu_longlink;
391 fields.pax_extensions = pax_extensions;
392 self.parse_sparse_header(&mut fields)?;
393 return Ok(Some(fields.into_entry()));
394 }
395 }
396
397 fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
398 if !entry.header.entry_type().is_gnu_sparse() {
399 return Ok(());
400 }
401 let gnu = match entry.header.as_gnu() {
402 Some(gnu) => gnu,
403 None => return Err(other("sparse entry type listed but not GNU header")),
404 };
405
406 // Sparse files are represented internally as a list of blocks that are
407 // read. Blocks are either a bunch of 0's or they're data from the
408 // underlying archive.
409 //
410 // Blocks of a sparse file are described by the `GnuSparseHeader`
411 // structure, some of which are contained in `GnuHeader` but some of
412 // which may also be contained after the first header in further
413 // headers.
414 //
415 // We read off all the blocks here and use the `add_block` function to
416 // incrementally add them to the list of I/O block (in `entry.data`).
417 // The `add_block` function also validates that each chunk comes after
418 // the previous, we don't overrun the end of the file, and each block is
419 // aligned to a 512-byte boundary in the archive itself.
420 //
421 // At the end we verify that the sparse file size (`Header::size`) is
422 // the same as the current offset (described by the list of blocks) as
423 // well as the amount of data read equals the size of the entry
424 // (`Header::entry_size`).
425 entry.data.truncate(0);
426
427 let mut cur = 0;
428 let mut remaining = entry.size;
429 {
430 let data = &mut entry.data;
431 let reader = &self.archive.inner;
432 let size = entry.size;
433 let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
434 if block.is_empty() {
435 return Ok(());
436 }
437 let off = block.offset()?;
438 let len = block.length()?;
439 if len != 0 && (size - remaining) % 512 != 0 {
440 return Err(other(
441 "previous block in sparse file was not \
442 aligned to 512-byte boundary",
443 ));
444 } else if off < cur {
445 return Err(other(
446 "out of order or overlapping sparse \
447 blocks",
448 ));
449 } else if cur < off {
450 let block = io::repeat(0).take(off - cur);
451 data.push(EntryIo::Pad(block));
452 }
453 cur = off
454 .checked_add(len)
455 .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
456 remaining = remaining.checked_sub(len).ok_or_else(|| {
457 other(
458 "sparse file consumed more data than the header \
459 listed",
460 )
461 })?;
462 data.push(EntryIo::Data(reader.take(len)));
463 Ok(())
464 };
465 for block in gnu.sparse.iter() {
466 add_block(block)?
467 }
468 if gnu.is_extended() {
469 let mut ext = GnuExtSparseHeader::new();
470 ext.isextended[0] = 1;
471 while ext.is_extended() {
472 if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
473 return Err(other("failed to read extension"));
474 }
475
476 self.next += 512;
477 for block in ext.sparse.iter() {
478 add_block(block)?;
479 }
480 }
481 }
482 }
483 if cur != gnu.real_size()? {
484 return Err(other(
485 "mismatch in sparse file chunks and \
486 size in header",
487 ));
488 }
489 entry.size = cur;
490 if remaining > 0 {
491 return Err(other(
492 "mismatch in sparse file chunks and \
493 entry size in header",
494 ));
495 }
496 Ok(())
497 }
498
499 fn skip(&mut self, mut amt: u64) -> io::Result<()> {
500 if let Some(seekable_archive) = self.seekable_archive {
501 let pos = io::SeekFrom::Current(
502 i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
503 );
504 (&seekable_archive.inner).seek(pos)?;
505 } else {
506 let mut buf = [0u8; 4096 * 8];
507 while amt > 0 {
508 let n = cmp::min(amt, buf.len() as u64);
509 let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
510 if n == 0 {
511 return Err(other("unexpected EOF during skip"));
512 }
513 amt -= n as u64;
514 }
515 }
516 Ok(())
517 }
518}
519
520impl<'a> Iterator for EntriesFields<'a> {
521 type Item = io::Result<Entry<'a, io::Empty>>;
522
523 fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
524 if self.done {
525 None
526 } else {
527 match self.next_entry() {
528 Ok(Some(e: Entry<'_, Empty>)) => Some(Ok(e)),
529 Ok(None) => {
530 self.done = true;
531 None
532 }
533 Err(e: Error) => {
534 self.done = true;
535 Some(Err(e))
536 }
537 }
538 }
539 }
540}
541
542impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
543 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
544 let i: usize = self.obj.borrow_mut().read(buf:into)?;
545 self.pos.set(self.pos.get() + i as u64);
546 Ok(i)
547 }
548}
549
550impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
551 fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
552 let pos: u64 = self.obj.borrow_mut().seek(pos)?;
553 self.pos.set(val:pos);
554 Ok(pos)
555 }
556}
557
558/// Try to fill the buffer from the reader.
559///
560/// If the reader reaches its end before filling the buffer at all, returns `false`.
561/// Otherwise returns `true`.
562fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
563 let mut read: usize = 0;
564 while read < buf.len() {
565 match r.read(&mut buf[read..])? {
566 0 => {
567 if read == 0 {
568 return Ok(false);
569 }
570
571 return Err(other(msg:"failed to read entire block"));
572 }
573 n: usize => read += n,
574 }
575 }
576 Ok(true)
577}
578