1 | use std::cell::{Cell, RefCell}; |
2 | use std::cmp; |
3 | use std::convert::TryFrom; |
4 | use std::fs; |
5 | use std::io::prelude::*; |
6 | use std::io::{self, SeekFrom}; |
7 | use std::marker; |
8 | use std::path::Path; |
9 | |
10 | use crate::entry::{EntryFields, EntryIo}; |
11 | use crate::error::TarError; |
12 | use crate::header::BLOCK_SIZE; |
13 | use crate::other; |
14 | use crate::pax::*; |
15 | use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; |
16 | |
17 | /// A top-level representation of an archive file. |
18 | /// |
19 | /// This archive can have an entry added to it and it can be iterated over. |
20 | pub struct Archive<R: ?Sized + Read> { |
21 | inner: ArchiveInner<R>, |
22 | } |
23 | |
24 | pub struct ArchiveInner<R: ?Sized> { |
25 | pos: Cell<u64>, |
26 | mask: u32, |
27 | unpack_xattrs: bool, |
28 | preserve_permissions: bool, |
29 | preserve_ownerships: bool, |
30 | preserve_mtime: bool, |
31 | overwrite: bool, |
32 | ignore_zeros: bool, |
33 | obj: RefCell<R>, |
34 | } |
35 | |
36 | /// An iterator over the entries of an archive. |
37 | pub struct Entries<'a, R: 'a + Read> { |
38 | fields: EntriesFields<'a>, |
39 | _ignored: marker::PhantomData<&'a Archive<R>>, |
40 | } |
41 | |
42 | trait SeekRead: Read + Seek {} |
43 | impl<R: Read + Seek> SeekRead for R {} |
44 | |
45 | struct EntriesFields<'a> { |
46 | archive: &'a Archive<dyn Read + 'a>, |
47 | seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, |
48 | next: u64, |
49 | done: bool, |
50 | raw: bool, |
51 | } |
52 | |
53 | impl<R: Read> Archive<R> { |
54 | /// Create a new archive with the underlying object as the reader. |
55 | pub fn new(obj: R) -> Archive<R> { |
56 | Archive { |
57 | inner: ArchiveInner { |
58 | mask: u32::MIN, |
59 | unpack_xattrs: false, |
60 | preserve_permissions: false, |
61 | preserve_ownerships: false, |
62 | preserve_mtime: true, |
63 | overwrite: true, |
64 | ignore_zeros: false, |
65 | obj: RefCell::new(obj), |
66 | pos: Cell::new(0), |
67 | }, |
68 | } |
69 | } |
70 | |
71 | /// Unwrap this archive, returning the underlying object. |
72 | pub fn into_inner(self) -> R { |
73 | self.inner.obj.into_inner() |
74 | } |
75 | |
76 | /// Construct an iterator over the entries in this archive. |
77 | /// |
78 | /// Note that care must be taken to consider each entry within an archive in |
79 | /// sequence. If entries are processed out of sequence (from what the |
80 | /// iterator returns), then the contents read for each entry may be |
81 | /// corrupted. |
82 | pub fn entries(&mut self) -> io::Result<Entries<R>> { |
83 | let me: &mut Archive<dyn Read> = self; |
84 | me._entries(None).map(|fields| Entries { |
85 | fields: fields, |
86 | _ignored: marker::PhantomData, |
87 | }) |
88 | } |
89 | |
90 | /// Unpacks the contents tarball into the specified `dst`. |
91 | /// |
92 | /// This function will iterate over the entire contents of this tarball, |
93 | /// extracting each file in turn to the location specified by the entry's |
94 | /// path name. |
95 | /// |
96 | /// This operation is relatively sensitive in that it will not write files |
97 | /// outside of the path specified by `dst`. Files in the archive which have |
98 | /// a '..' in their path are skipped during the unpacking process. |
99 | /// |
100 | /// # Examples |
101 | /// |
102 | /// ```no_run |
103 | /// use std::fs::File; |
104 | /// use tar::Archive; |
105 | /// |
106 | /// let mut ar = Archive::new(File::open("foo.tar" ).unwrap()); |
107 | /// ar.unpack("foo" ).unwrap(); |
108 | /// ``` |
109 | pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> { |
110 | let me: &mut Archive<dyn Read> = self; |
111 | me._unpack(dst.as_ref()) |
112 | } |
113 | |
114 | /// Set the mask of the permission bits when unpacking this entry. |
115 | /// |
116 | /// The mask will be inverted when applying against a mode, similar to how |
117 | /// `umask` works on Unix. In logical notation it looks like: |
118 | /// |
119 | /// ```text |
120 | /// new_mode = old_mode & (~mask) |
121 | /// ``` |
122 | /// |
123 | /// The mask is 0 by default and is currently only implemented on Unix. |
124 | pub fn set_mask(&mut self, mask: u32) { |
125 | self.inner.mask = mask; |
126 | } |
127 | |
128 | /// Indicate whether extended file attributes (xattrs on Unix) are preserved |
129 | /// when unpacking this archive. |
130 | /// |
131 | /// This flag is disabled by default and is currently only implemented on |
132 | /// Unix using xattr support. This may eventually be implemented for |
133 | /// Windows, however, if other archive implementations are found which do |
134 | /// this as well. |
135 | pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { |
136 | self.inner.unpack_xattrs = unpack_xattrs; |
137 | } |
138 | |
139 | /// Indicate whether extended permissions (like suid on Unix) are preserved |
140 | /// when unpacking this entry. |
141 | /// |
142 | /// This flag is disabled by default and is currently only implemented on |
143 | /// Unix. |
144 | pub fn set_preserve_permissions(&mut self, preserve: bool) { |
145 | self.inner.preserve_permissions = preserve; |
146 | } |
147 | |
148 | /// Indicate whether numeric ownership ids (like uid and gid on Unix) |
149 | /// are preserved when unpacking this entry. |
150 | /// |
151 | /// This flag is disabled by default and is currently only implemented on |
152 | /// Unix. |
153 | pub fn set_preserve_ownerships(&mut self, preserve: bool) { |
154 | self.inner.preserve_ownerships = preserve; |
155 | } |
156 | |
157 | /// Indicate whether files and symlinks should be overwritten on extraction. |
158 | pub fn set_overwrite(&mut self, overwrite: bool) { |
159 | self.inner.overwrite = overwrite; |
160 | } |
161 | |
162 | /// Indicate whether access time information is preserved when unpacking |
163 | /// this entry. |
164 | /// |
165 | /// This flag is enabled by default. |
166 | pub fn set_preserve_mtime(&mut self, preserve: bool) { |
167 | self.inner.preserve_mtime = preserve; |
168 | } |
169 | |
170 | /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more |
171 | /// entries. |
172 | /// |
173 | /// This can be used in case multiple tar archives have been concatenated together. |
174 | pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) { |
175 | self.inner.ignore_zeros = ignore_zeros; |
176 | } |
177 | } |
178 | |
179 | impl<R: Seek + Read> Archive<R> { |
180 | /// Construct an iterator over the entries in this archive for a seekable |
181 | /// reader. Seek will be used to efficiently skip over file contents. |
182 | /// |
183 | /// Note that care must be taken to consider each entry within an archive in |
184 | /// sequence. If entries are processed out of sequence (from what the |
185 | /// iterator returns), then the contents read for each entry may be |
186 | /// corrupted. |
187 | pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> { |
188 | let me: &Archive<dyn Read> = self; |
189 | let me_seekable: &Archive<dyn SeekRead> = self; |
190 | me._entries(Some(me_seekable)).map(|fields: EntriesFields<'_>| Entries { |
191 | fields: fields, |
192 | _ignored: marker::PhantomData, |
193 | }) |
194 | } |
195 | } |
196 | |
197 | impl Archive<dyn Read + '_> { |
198 | fn _entries<'a>( |
199 | &'a self, |
200 | seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, |
201 | ) -> io::Result<EntriesFields<'a>> { |
202 | if self.inner.pos.get() != 0 { |
203 | return Err(other( |
204 | "cannot call entries unless archive is at \ |
205 | position 0" , |
206 | )); |
207 | } |
208 | Ok(EntriesFields { |
209 | archive: self, |
210 | seekable_archive, |
211 | done: false, |
212 | next: 0, |
213 | raw: false, |
214 | }) |
215 | } |
216 | |
217 | fn _unpack(&mut self, dst: &Path) -> io::Result<()> { |
218 | if dst.symlink_metadata().is_err() { |
219 | fs::create_dir_all(&dst) |
220 | .map_err(|e| TarError::new(format!("failed to create ` {}`" , dst.display()), e))?; |
221 | } |
222 | |
223 | // Canonicalizing the dst directory will prepend the path with '\\?\' |
224 | // on windows which will allow windows APIs to treat the path as an |
225 | // extended-length path with a 32,767 character limit. Otherwise all |
226 | // unpacked paths over 260 characters will fail on creation with a |
227 | // NotFound exception. |
228 | let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf()); |
229 | |
230 | // Delay any directory entries until the end (they will be created if needed by |
231 | // descendants), to ensure that directory permissions do not interfer with descendant |
232 | // extraction. |
233 | let mut directories = Vec::new(); |
234 | for entry in self._entries(None)? { |
235 | let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive" , e))?; |
236 | if file.header().entry_type() == crate::EntryType::Directory { |
237 | directories.push(file); |
238 | } else { |
239 | file.unpack_in(dst)?; |
240 | } |
241 | } |
242 | |
243 | // Apply the directories. |
244 | // |
245 | // Note: the order of application is important to permissions. That is, we must traverse |
246 | // the filesystem graph in topological ordering or else we risk not being able to create |
247 | // child directories within those of more restrictive permissions. See [0] for details. |
248 | // |
249 | // [0]: <https://github.com/alexcrichton/tar-rs/issues/242> |
250 | directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes())); |
251 | for mut dir in directories { |
252 | dir.unpack_in(dst)?; |
253 | } |
254 | |
255 | Ok(()) |
256 | } |
257 | } |
258 | |
259 | impl<'a, R: Read> Entries<'a, R> { |
260 | /// Indicates whether this iterator will return raw entries or not. |
261 | /// |
262 | /// If the raw list of entries is returned, then no preprocessing happens |
263 | /// on account of this library, for example taking into account GNU long name |
264 | /// or long link archive members. Raw iteration is disabled by default. |
265 | pub fn raw(self, raw: bool) -> Entries<'a, R> { |
266 | Entries { |
267 | fields: EntriesFields { |
268 | raw: raw, |
269 | ..self.fields |
270 | }, |
271 | _ignored: marker::PhantomData, |
272 | } |
273 | } |
274 | } |
275 | impl<'a, R: Read> Iterator for Entries<'a, R> { |
276 | type Item = io::Result<Entry<'a, R>>; |
277 | |
278 | fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> { |
279 | self.fields |
280 | .next() |
281 | .map(|result: Result, …>| result.map(|e: Entry<'a, Empty>| EntryFields::from(entry:e).into_entry())) |
282 | } |
283 | } |
284 | |
285 | impl<'a> EntriesFields<'a> { |
286 | fn next_entry_raw( |
287 | &mut self, |
288 | pax_extensions: Option<&[u8]>, |
289 | ) -> io::Result<Option<Entry<'a, io::Empty>>> { |
290 | let mut header = Header::new_old(); |
291 | let mut header_pos = self.next; |
292 | loop { |
293 | // Seek to the start of the next header in the archive |
294 | let delta = self.next - self.archive.inner.pos.get(); |
295 | self.skip(delta)?; |
296 | |
297 | // EOF is an indicator that we are at the end of the archive. |
298 | if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { |
299 | return Ok(None); |
300 | } |
301 | |
302 | // If a header is not all zeros, we have another valid header. |
303 | // Otherwise, check if we are ignoring zeros and continue, or break as if this is the |
304 | // end of the archive. |
305 | if !header.as_bytes().iter().all(|i| *i == 0) { |
306 | self.next += BLOCK_SIZE; |
307 | break; |
308 | } |
309 | |
310 | if !self.archive.inner.ignore_zeros { |
311 | return Ok(None); |
312 | } |
313 | self.next += BLOCK_SIZE; |
314 | header_pos = self.next; |
315 | } |
316 | |
317 | // Make sure the checksum is ok |
318 | let sum = header.as_bytes()[..148] |
319 | .iter() |
320 | .chain(&header.as_bytes()[156..]) |
321 | .fold(0, |a, b| a + (*b as u32)) |
322 | + 8 * 32; |
323 | let cksum = header.cksum()?; |
324 | if sum != cksum { |
325 | return Err(other("archive header checksum mismatch" )); |
326 | } |
327 | |
328 | let mut pax_size: Option<u64> = None; |
329 | if let Some(pax_extensions_ref) = &pax_extensions { |
330 | pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE); |
331 | |
332 | if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) { |
333 | header.set_uid(pax_uid); |
334 | } |
335 | |
336 | if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) { |
337 | header.set_gid(pax_gid); |
338 | } |
339 | } |
340 | |
341 | let file_pos = self.next; |
342 | let mut size = header.entry_size()?; |
343 | if size == 0 { |
344 | if let Some(pax_size) = pax_size { |
345 | size = pax_size; |
346 | } |
347 | } |
348 | let ret = EntryFields { |
349 | size: size, |
350 | header_pos: header_pos, |
351 | file_pos: file_pos, |
352 | data: vec![EntryIo::Data((&self.archive.inner).take(size))], |
353 | header: header, |
354 | long_pathname: None, |
355 | long_linkname: None, |
356 | pax_extensions: None, |
357 | mask: self.archive.inner.mask, |
358 | unpack_xattrs: self.archive.inner.unpack_xattrs, |
359 | preserve_permissions: self.archive.inner.preserve_permissions, |
360 | preserve_mtime: self.archive.inner.preserve_mtime, |
361 | overwrite: self.archive.inner.overwrite, |
362 | preserve_ownerships: self.archive.inner.preserve_ownerships, |
363 | }; |
364 | |
365 | // Store where the next entry is, rounding up by 512 bytes (the size of |
366 | // a header); |
367 | let size = size |
368 | .checked_add(BLOCK_SIZE - 1) |
369 | .ok_or_else(|| other("size overflow" ))?; |
370 | self.next = self |
371 | .next |
372 | .checked_add(size & !(BLOCK_SIZE - 1)) |
373 | .ok_or_else(|| other("size overflow" ))?; |
374 | |
375 | Ok(Some(ret.into_entry())) |
376 | } |
377 | |
378 | fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> { |
379 | if self.raw { |
380 | return self.next_entry_raw(None); |
381 | } |
382 | |
383 | let mut gnu_longname = None; |
384 | let mut gnu_longlink = None; |
385 | let mut pax_extensions = None; |
386 | let mut processed = 0; |
387 | loop { |
388 | processed += 1; |
389 | let entry = match self.next_entry_raw(pax_extensions.as_deref())? { |
390 | Some(entry) => entry, |
391 | None if processed > 1 => { |
392 | return Err(other( |
393 | "members found describing a future member \ |
394 | but no future member found" , |
395 | )); |
396 | } |
397 | None => return Ok(None), |
398 | }; |
399 | |
400 | let is_recognized_header = |
401 | entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some(); |
402 | |
403 | if is_recognized_header && entry.header().entry_type().is_gnu_longname() { |
404 | if gnu_longname.is_some() { |
405 | return Err(other( |
406 | "two long name entries describing \ |
407 | the same member" , |
408 | )); |
409 | } |
410 | gnu_longname = Some(EntryFields::from(entry).read_all()?); |
411 | continue; |
412 | } |
413 | |
414 | if is_recognized_header && entry.header().entry_type().is_gnu_longlink() { |
415 | if gnu_longlink.is_some() { |
416 | return Err(other( |
417 | "two long name entries describing \ |
418 | the same member" , |
419 | )); |
420 | } |
421 | gnu_longlink = Some(EntryFields::from(entry).read_all()?); |
422 | continue; |
423 | } |
424 | |
425 | if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() { |
426 | if pax_extensions.is_some() { |
427 | return Err(other( |
428 | "two pax extensions entries describing \ |
429 | the same member" , |
430 | )); |
431 | } |
432 | pax_extensions = Some(EntryFields::from(entry).read_all()?); |
433 | continue; |
434 | } |
435 | |
436 | let mut fields = EntryFields::from(entry); |
437 | fields.long_pathname = gnu_longname; |
438 | fields.long_linkname = gnu_longlink; |
439 | fields.pax_extensions = pax_extensions; |
440 | self.parse_sparse_header(&mut fields)?; |
441 | return Ok(Some(fields.into_entry())); |
442 | } |
443 | } |
444 | |
445 | fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { |
446 | if !entry.header.entry_type().is_gnu_sparse() { |
447 | return Ok(()); |
448 | } |
449 | let gnu = match entry.header.as_gnu() { |
450 | Some(gnu) => gnu, |
451 | None => return Err(other("sparse entry type listed but not GNU header" )), |
452 | }; |
453 | |
454 | // Sparse files are represented internally as a list of blocks that are |
455 | // read. Blocks are either a bunch of 0's or they're data from the |
456 | // underlying archive. |
457 | // |
458 | // Blocks of a sparse file are described by the `GnuSparseHeader` |
459 | // structure, some of which are contained in `GnuHeader` but some of |
460 | // which may also be contained after the first header in further |
461 | // headers. |
462 | // |
463 | // We read off all the blocks here and use the `add_block` function to |
464 | // incrementally add them to the list of I/O block (in `entry.data`). |
465 | // The `add_block` function also validates that each chunk comes after |
466 | // the previous, we don't overrun the end of the file, and each block is |
467 | // aligned to a 512-byte boundary in the archive itself. |
468 | // |
469 | // At the end we verify that the sparse file size (`Header::size`) is |
470 | // the same as the current offset (described by the list of blocks) as |
471 | // well as the amount of data read equals the size of the entry |
472 | // (`Header::entry_size`). |
473 | entry.data.truncate(0); |
474 | |
475 | let mut cur = 0; |
476 | let mut remaining = entry.size; |
477 | { |
478 | let data = &mut entry.data; |
479 | let reader = &self.archive.inner; |
480 | let size = entry.size; |
481 | let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { |
482 | if block.is_empty() { |
483 | return Ok(()); |
484 | } |
485 | let off = block.offset()?; |
486 | let len = block.length()?; |
487 | if len != 0 && (size - remaining) % BLOCK_SIZE != 0 { |
488 | return Err(other( |
489 | "previous block in sparse file was not \ |
490 | aligned to 512-byte boundary" , |
491 | )); |
492 | } else if off < cur { |
493 | return Err(other( |
494 | "out of order or overlapping sparse \ |
495 | blocks" , |
496 | )); |
497 | } else if cur < off { |
498 | let block = io::repeat(0).take(off - cur); |
499 | data.push(EntryIo::Pad(block)); |
500 | } |
501 | cur = off |
502 | .checked_add(len) |
503 | .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold" ))?; |
504 | remaining = remaining.checked_sub(len).ok_or_else(|| { |
505 | other( |
506 | "sparse file consumed more data than the header \ |
507 | listed" , |
508 | ) |
509 | })?; |
510 | data.push(EntryIo::Data(reader.take(len))); |
511 | Ok(()) |
512 | }; |
513 | for block in gnu.sparse.iter() { |
514 | add_block(block)? |
515 | } |
516 | if gnu.is_extended() { |
517 | let mut ext = GnuExtSparseHeader::new(); |
518 | ext.isextended[0] = 1; |
519 | while ext.is_extended() { |
520 | if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? { |
521 | return Err(other("failed to read extension" )); |
522 | } |
523 | |
524 | self.next += BLOCK_SIZE; |
525 | for block in ext.sparse.iter() { |
526 | add_block(block)?; |
527 | } |
528 | } |
529 | } |
530 | } |
531 | if cur != gnu.real_size()? { |
532 | return Err(other( |
533 | "mismatch in sparse file chunks and \ |
534 | size in header" , |
535 | )); |
536 | } |
537 | entry.size = cur; |
538 | if remaining > 0 { |
539 | return Err(other( |
540 | "mismatch in sparse file chunks and \ |
541 | entry size in header" , |
542 | )); |
543 | } |
544 | Ok(()) |
545 | } |
546 | |
547 | fn skip(&mut self, mut amt: u64) -> io::Result<()> { |
548 | if let Some(seekable_archive) = self.seekable_archive { |
549 | let pos = io::SeekFrom::Current( |
550 | i64::try_from(amt).map_err(|_| other("seek position out of bounds" ))?, |
551 | ); |
552 | (&seekable_archive.inner).seek(pos)?; |
553 | } else { |
554 | let mut buf = [0u8; 4096 * 8]; |
555 | while amt > 0 { |
556 | let n = cmp::min(amt, buf.len() as u64); |
557 | let n = (&self.archive.inner).read(&mut buf[..n as usize])?; |
558 | if n == 0 { |
559 | return Err(other("unexpected EOF during skip" )); |
560 | } |
561 | amt -= n as u64; |
562 | } |
563 | } |
564 | Ok(()) |
565 | } |
566 | } |
567 | |
568 | impl<'a> Iterator for EntriesFields<'a> { |
569 | type Item = io::Result<Entry<'a, io::Empty>>; |
570 | |
571 | fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> { |
572 | if self.done { |
573 | None |
574 | } else { |
575 | match self.next_entry() { |
576 | Ok(Some(e: Entry<'a, Empty>)) => Some(Ok(e)), |
577 | Ok(None) => { |
578 | self.done = true; |
579 | None |
580 | } |
581 | Err(e: Error) => { |
582 | self.done = true; |
583 | Some(Err(e)) |
584 | } |
585 | } |
586 | } |
587 | } |
588 | } |
589 | |
590 | impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> { |
591 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
592 | let i: usize = self.obj.borrow_mut().read(buf:into)?; |
593 | self.pos.set(self.pos.get() + i as u64); |
594 | Ok(i) |
595 | } |
596 | } |
597 | |
598 | impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> { |
599 | fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { |
600 | let pos: u64 = self.obj.borrow_mut().seek(pos)?; |
601 | self.pos.set(val:pos); |
602 | Ok(pos) |
603 | } |
604 | } |
605 | |
606 | /// Try to fill the buffer from the reader. |
607 | /// |
608 | /// If the reader reaches its end before filling the buffer at all, returns `false`. |
609 | /// Otherwise returns `true`. |
610 | fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> { |
611 | let mut read: usize = 0; |
612 | while read < buf.len() { |
613 | match r.read(&mut buf[read..])? { |
614 | 0 => { |
615 | if read == 0 { |
616 | return Ok(false); |
617 | } |
618 | |
619 | return Err(other(msg:"failed to read entire block" )); |
620 | } |
621 | n: usize => read += n, |
622 | } |
623 | } |
624 | Ok(true) |
625 | } |
626 | |