1 | use std::cell::{Cell, RefCell}; |
2 | use std::cmp; |
3 | use std::convert::TryFrom; |
4 | use std::fs; |
5 | use std::io::prelude::*; |
6 | use std::io::{self, SeekFrom}; |
7 | use std::marker; |
8 | use std::path::Path; |
9 | |
10 | use crate::entry::{EntryFields, EntryIo}; |
11 | use crate::error::TarError; |
12 | use crate::other; |
13 | use crate::pax::*; |
14 | use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; |
15 | |
16 | /// A top-level representation of an archive file. |
17 | /// |
18 | /// This archive can have an entry added to it and it can be iterated over. |
19 | pub struct Archive<R: ?Sized + Read> { |
20 | inner: ArchiveInner<R>, |
21 | } |
22 | |
23 | pub struct ArchiveInner<R: ?Sized> { |
24 | pos: Cell<u64>, |
25 | mask: u32, |
26 | unpack_xattrs: bool, |
27 | preserve_permissions: bool, |
28 | preserve_ownerships: bool, |
29 | preserve_mtime: bool, |
30 | overwrite: bool, |
31 | ignore_zeros: bool, |
32 | obj: RefCell<R>, |
33 | } |
34 | |
35 | /// An iterator over the entries of an archive. |
36 | pub struct Entries<'a, R: 'a + Read> { |
37 | fields: EntriesFields<'a>, |
38 | _ignored: marker::PhantomData<&'a Archive<R>>, |
39 | } |
40 | |
41 | trait SeekRead: Read + Seek {} |
42 | impl<R: Read + Seek> SeekRead for R {} |
43 | |
44 | struct EntriesFields<'a> { |
45 | archive: &'a Archive<dyn Read + 'a>, |
46 | seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, |
47 | next: u64, |
48 | done: bool, |
49 | raw: bool, |
50 | } |
51 | |
52 | impl<R: Read> Archive<R> { |
53 | /// Create a new archive with the underlying object as the reader. |
54 | pub fn new(obj: R) -> Archive<R> { |
55 | Archive { |
56 | inner: ArchiveInner { |
57 | mask: u32::MIN, |
58 | unpack_xattrs: false, |
59 | preserve_permissions: false, |
60 | preserve_ownerships: false, |
61 | preserve_mtime: true, |
62 | overwrite: true, |
63 | ignore_zeros: false, |
64 | obj: RefCell::new(obj), |
65 | pos: Cell::new(0), |
66 | }, |
67 | } |
68 | } |
69 | |
70 | /// Unwrap this archive, returning the underlying object. |
71 | pub fn into_inner(self) -> R { |
72 | self.inner.obj.into_inner() |
73 | } |
74 | |
75 | /// Construct an iterator over the entries in this archive. |
76 | /// |
77 | /// Note that care must be taken to consider each entry within an archive in |
78 | /// sequence. If entries are processed out of sequence (from what the |
79 | /// iterator returns), then the contents read for each entry may be |
80 | /// corrupted. |
81 | pub fn entries(&mut self) -> io::Result<Entries<R>> { |
82 | let me: &mut Archive<dyn Read> = self; |
83 | me._entries(None).map(|fields| Entries { |
84 | fields: fields, |
85 | _ignored: marker::PhantomData, |
86 | }) |
87 | } |
88 | |
89 | /// Unpacks the contents tarball into the specified `dst`. |
90 | /// |
91 | /// This function will iterate over the entire contents of this tarball, |
92 | /// extracting each file in turn to the location specified by the entry's |
93 | /// path name. |
94 | /// |
95 | /// This operation is relatively sensitive in that it will not write files |
96 | /// outside of the path specified by `dst`. Files in the archive which have |
97 | /// a '..' in their path are skipped during the unpacking process. |
98 | /// |
99 | /// # Examples |
100 | /// |
101 | /// ```no_run |
102 | /// use std::fs::File; |
103 | /// use tar::Archive; |
104 | /// |
105 | /// let mut ar = Archive::new(File::open("foo.tar" ).unwrap()); |
106 | /// ar.unpack("foo" ).unwrap(); |
107 | /// ``` |
108 | pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> { |
109 | let me: &mut Archive<dyn Read> = self; |
110 | me._unpack(dst.as_ref()) |
111 | } |
112 | |
113 | /// Set the mask of the permission bits when unpacking this entry. |
114 | /// |
115 | /// The mask will be inverted when applying against a mode, similar to how |
116 | /// `umask` works on Unix. In logical notation it looks like: |
117 | /// |
118 | /// ```text |
119 | /// new_mode = old_mode & (~mask) |
120 | /// ``` |
121 | /// |
122 | /// The mask is 0 by default and is currently only implemented on Unix. |
123 | pub fn set_mask(&mut self, mask: u32) { |
124 | self.inner.mask = mask; |
125 | } |
126 | |
127 | /// Indicate whether extended file attributes (xattrs on Unix) are preserved |
128 | /// when unpacking this archive. |
129 | /// |
130 | /// This flag is disabled by default and is currently only implemented on |
131 | /// Unix using xattr support. This may eventually be implemented for |
132 | /// Windows, however, if other archive implementations are found which do |
133 | /// this as well. |
134 | pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { |
135 | self.inner.unpack_xattrs = unpack_xattrs; |
136 | } |
137 | |
138 | /// Indicate whether extended permissions (like suid on Unix) are preserved |
139 | /// when unpacking this entry. |
140 | /// |
141 | /// This flag is disabled by default and is currently only implemented on |
142 | /// Unix. |
143 | pub fn set_preserve_permissions(&mut self, preserve: bool) { |
144 | self.inner.preserve_permissions = preserve; |
145 | } |
146 | |
147 | /// Indicate whether numeric ownership ids (like uid and gid on Unix) |
148 | /// are preserved when unpacking this entry. |
149 | /// |
150 | /// This flag is disabled by default and is currently only implemented on |
151 | /// Unix. |
152 | pub fn set_preserve_ownerships(&mut self, preserve: bool) { |
153 | self.inner.preserve_ownerships = preserve; |
154 | } |
155 | |
156 | /// Indicate whether files and symlinks should be overwritten on extraction. |
157 | pub fn set_overwrite(&mut self, overwrite: bool) { |
158 | self.inner.overwrite = overwrite; |
159 | } |
160 | |
161 | /// Indicate whether access time information is preserved when unpacking |
162 | /// this entry. |
163 | /// |
164 | /// This flag is enabled by default. |
165 | pub fn set_preserve_mtime(&mut self, preserve: bool) { |
166 | self.inner.preserve_mtime = preserve; |
167 | } |
168 | |
169 | /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more |
170 | /// entries. |
171 | /// |
172 | /// This can be used in case multiple tar archives have been concatenated together. |
173 | pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) { |
174 | self.inner.ignore_zeros = ignore_zeros; |
175 | } |
176 | } |
177 | |
178 | impl<R: Seek + Read> Archive<R> { |
179 | /// Construct an iterator over the entries in this archive for a seekable |
180 | /// reader. Seek will be used to efficiently skip over file contents. |
181 | /// |
182 | /// Note that care must be taken to consider each entry within an archive in |
183 | /// sequence. If entries are processed out of sequence (from what the |
184 | /// iterator returns), then the contents read for each entry may be |
185 | /// corrupted. |
186 | pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> { |
187 | let me: &Archive<dyn Read> = self; |
188 | let me_seekable: &Archive<dyn SeekRead> = self; |
189 | me._entries(Some(me_seekable)).map(|fields: EntriesFields<'_>| Entries { |
190 | fields: fields, |
191 | _ignored: marker::PhantomData, |
192 | }) |
193 | } |
194 | } |
195 | |
196 | impl Archive<dyn Read + '_> { |
197 | fn _entries<'a>( |
198 | &'a self, |
199 | seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, |
200 | ) -> io::Result<EntriesFields<'a>> { |
201 | if self.inner.pos.get() != 0 { |
202 | return Err(other( |
203 | "cannot call entries unless archive is at \ |
204 | position 0" , |
205 | )); |
206 | } |
207 | Ok(EntriesFields { |
208 | archive: self, |
209 | seekable_archive, |
210 | done: false, |
211 | next: 0, |
212 | raw: false, |
213 | }) |
214 | } |
215 | |
216 | fn _unpack(&mut self, dst: &Path) -> io::Result<()> { |
217 | if dst.symlink_metadata().is_err() { |
218 | fs::create_dir_all(&dst) |
219 | .map_err(|e| TarError::new(format!("failed to create ` {}`" , dst.display()), e))?; |
220 | } |
221 | |
222 | // Canonicalizing the dst directory will prepend the path with '\\?\' |
223 | // on windows which will allow windows APIs to treat the path as an |
224 | // extended-length path with a 32,767 character limit. Otherwise all |
225 | // unpacked paths over 260 characters will fail on creation with a |
226 | // NotFound exception. |
227 | let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf()); |
228 | |
229 | // Delay any directory entries until the end (they will be created if needed by |
230 | // descendants), to ensure that directory permissions do not interfer with descendant |
231 | // extraction. |
232 | let mut directories = Vec::new(); |
233 | for entry in self._entries(None)? { |
234 | let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive" , e))?; |
235 | if file.header().entry_type() == crate::EntryType::Directory { |
236 | directories.push(file); |
237 | } else { |
238 | file.unpack_in(dst)?; |
239 | } |
240 | } |
241 | for mut dir in directories { |
242 | dir.unpack_in(dst)?; |
243 | } |
244 | |
245 | Ok(()) |
246 | } |
247 | } |
248 | |
249 | impl<'a, R: Read> Entries<'a, R> { |
250 | /// Indicates whether this iterator will return raw entries or not. |
251 | /// |
252 | /// If the raw list of entries are returned, then no preprocessing happens |
253 | /// on account of this library, for example taking into account GNU long name |
254 | /// or long link archive members. Raw iteration is disabled by default. |
255 | pub fn raw(self, raw: bool) -> Entries<'a, R> { |
256 | Entries { |
257 | fields: EntriesFields { |
258 | raw: raw, |
259 | ..self.fields |
260 | }, |
261 | _ignored: marker::PhantomData, |
262 | } |
263 | } |
264 | } |
265 | impl<'a, R: Read> Iterator for Entries<'a, R> { |
266 | type Item = io::Result<Entry<'a, R>>; |
267 | |
268 | fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> { |
269 | self.fields |
270 | .next() |
271 | .map(|result: Result, …>| result.map(|e: Entry<'_, Empty>| EntryFields::from(entry:e).into_entry())) |
272 | } |
273 | } |
274 | |
275 | impl<'a> EntriesFields<'a> { |
276 | fn next_entry_raw( |
277 | &mut self, |
278 | pax_extensions: Option<&[u8]>, |
279 | ) -> io::Result<Option<Entry<'a, io::Empty>>> { |
280 | let mut header = Header::new_old(); |
281 | let mut header_pos = self.next; |
282 | loop { |
283 | // Seek to the start of the next header in the archive |
284 | let delta = self.next - self.archive.inner.pos.get(); |
285 | self.skip(delta)?; |
286 | |
287 | // EOF is an indicator that we are at the end of the archive. |
288 | if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { |
289 | return Ok(None); |
290 | } |
291 | |
292 | // If a header is not all zeros, we have another valid header. |
293 | // Otherwise, check if we are ignoring zeros and continue, or break as if this is the |
294 | // end of the archive. |
295 | if !header.as_bytes().iter().all(|i| *i == 0) { |
296 | self.next += 512; |
297 | break; |
298 | } |
299 | |
300 | if !self.archive.inner.ignore_zeros { |
301 | return Ok(None); |
302 | } |
303 | self.next += 512; |
304 | header_pos = self.next; |
305 | } |
306 | |
307 | // Make sure the checksum is ok |
308 | let sum = header.as_bytes()[..148] |
309 | .iter() |
310 | .chain(&header.as_bytes()[156..]) |
311 | .fold(0, |a, b| a + (*b as u32)) |
312 | + 8 * 32; |
313 | let cksum = header.cksum()?; |
314 | if sum != cksum { |
315 | return Err(other("archive header checksum mismatch" )); |
316 | } |
317 | |
318 | let mut pax_size: Option<u64> = None; |
319 | if let Some(pax_extensions_ref) = &pax_extensions { |
320 | pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE); |
321 | |
322 | if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) { |
323 | header.set_uid(pax_uid); |
324 | } |
325 | |
326 | if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) { |
327 | header.set_gid(pax_gid); |
328 | } |
329 | } |
330 | |
331 | let file_pos = self.next; |
332 | let mut size = header.entry_size()?; |
333 | if size == 0 { |
334 | if let Some(pax_size) = pax_size { |
335 | size = pax_size; |
336 | } |
337 | } |
338 | let ret = EntryFields { |
339 | size: size, |
340 | header_pos: header_pos, |
341 | file_pos: file_pos, |
342 | data: vec![EntryIo::Data((&self.archive.inner).take(size))], |
343 | header: header, |
344 | long_pathname: None, |
345 | long_linkname: None, |
346 | pax_extensions: None, |
347 | mask: self.archive.inner.mask, |
348 | unpack_xattrs: self.archive.inner.unpack_xattrs, |
349 | preserve_permissions: self.archive.inner.preserve_permissions, |
350 | preserve_mtime: self.archive.inner.preserve_mtime, |
351 | overwrite: self.archive.inner.overwrite, |
352 | preserve_ownerships: self.archive.inner.preserve_ownerships, |
353 | }; |
354 | |
355 | // Store where the next entry is, rounding up by 512 bytes (the size of |
356 | // a header); |
357 | let size = size |
358 | .checked_add(511) |
359 | .ok_or_else(|| other("size overflow" ))?; |
360 | self.next = self |
361 | .next |
362 | .checked_add(size & !(512 - 1)) |
363 | .ok_or_else(|| other("size overflow" ))?; |
364 | |
365 | Ok(Some(ret.into_entry())) |
366 | } |
367 | |
368 | fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> { |
369 | if self.raw { |
370 | return self.next_entry_raw(None); |
371 | } |
372 | |
373 | let mut gnu_longname = None; |
374 | let mut gnu_longlink = None; |
375 | let mut pax_extensions = None; |
376 | let mut processed = 0; |
377 | loop { |
378 | processed += 1; |
379 | let entry = match self.next_entry_raw(pax_extensions.as_deref())? { |
380 | Some(entry) => entry, |
381 | None if processed > 1 => { |
382 | return Err(other( |
383 | "members found describing a future member \ |
384 | but no future member found" , |
385 | )); |
386 | } |
387 | None => return Ok(None), |
388 | }; |
389 | |
390 | let is_recognized_header = |
391 | entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some(); |
392 | |
393 | if is_recognized_header && entry.header().entry_type().is_gnu_longname() { |
394 | if gnu_longname.is_some() { |
395 | return Err(other( |
396 | "two long name entries describing \ |
397 | the same member" , |
398 | )); |
399 | } |
400 | gnu_longname = Some(EntryFields::from(entry).read_all()?); |
401 | continue; |
402 | } |
403 | |
404 | if is_recognized_header && entry.header().entry_type().is_gnu_longlink() { |
405 | if gnu_longlink.is_some() { |
406 | return Err(other( |
407 | "two long name entries describing \ |
408 | the same member" , |
409 | )); |
410 | } |
411 | gnu_longlink = Some(EntryFields::from(entry).read_all()?); |
412 | continue; |
413 | } |
414 | |
415 | if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() { |
416 | if pax_extensions.is_some() { |
417 | return Err(other( |
418 | "two pax extensions entries describing \ |
419 | the same member" , |
420 | )); |
421 | } |
422 | pax_extensions = Some(EntryFields::from(entry).read_all()?); |
423 | continue; |
424 | } |
425 | |
426 | let mut fields = EntryFields::from(entry); |
427 | fields.long_pathname = gnu_longname; |
428 | fields.long_linkname = gnu_longlink; |
429 | fields.pax_extensions = pax_extensions; |
430 | self.parse_sparse_header(&mut fields)?; |
431 | return Ok(Some(fields.into_entry())); |
432 | } |
433 | } |
434 | |
435 | fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { |
436 | if !entry.header.entry_type().is_gnu_sparse() { |
437 | return Ok(()); |
438 | } |
439 | let gnu = match entry.header.as_gnu() { |
440 | Some(gnu) => gnu, |
441 | None => return Err(other("sparse entry type listed but not GNU header" )), |
442 | }; |
443 | |
444 | // Sparse files are represented internally as a list of blocks that are |
445 | // read. Blocks are either a bunch of 0's or they're data from the |
446 | // underlying archive. |
447 | // |
448 | // Blocks of a sparse file are described by the `GnuSparseHeader` |
449 | // structure, some of which are contained in `GnuHeader` but some of |
450 | // which may also be contained after the first header in further |
451 | // headers. |
452 | // |
453 | // We read off all the blocks here and use the `add_block` function to |
454 | // incrementally add them to the list of I/O block (in `entry.data`). |
455 | // The `add_block` function also validates that each chunk comes after |
456 | // the previous, we don't overrun the end of the file, and each block is |
457 | // aligned to a 512-byte boundary in the archive itself. |
458 | // |
459 | // At the end we verify that the sparse file size (`Header::size`) is |
460 | // the same as the current offset (described by the list of blocks) as |
461 | // well as the amount of data read equals the size of the entry |
462 | // (`Header::entry_size`). |
463 | entry.data.truncate(0); |
464 | |
465 | let mut cur = 0; |
466 | let mut remaining = entry.size; |
467 | { |
468 | let data = &mut entry.data; |
469 | let reader = &self.archive.inner; |
470 | let size = entry.size; |
471 | let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { |
472 | if block.is_empty() { |
473 | return Ok(()); |
474 | } |
475 | let off = block.offset()?; |
476 | let len = block.length()?; |
477 | if len != 0 && (size - remaining) % 512 != 0 { |
478 | return Err(other( |
479 | "previous block in sparse file was not \ |
480 | aligned to 512-byte boundary" , |
481 | )); |
482 | } else if off < cur { |
483 | return Err(other( |
484 | "out of order or overlapping sparse \ |
485 | blocks" , |
486 | )); |
487 | } else if cur < off { |
488 | let block = io::repeat(0).take(off - cur); |
489 | data.push(EntryIo::Pad(block)); |
490 | } |
491 | cur = off |
492 | .checked_add(len) |
493 | .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold" ))?; |
494 | remaining = remaining.checked_sub(len).ok_or_else(|| { |
495 | other( |
496 | "sparse file consumed more data than the header \ |
497 | listed" , |
498 | ) |
499 | })?; |
500 | data.push(EntryIo::Data(reader.take(len))); |
501 | Ok(()) |
502 | }; |
503 | for block in gnu.sparse.iter() { |
504 | add_block(block)? |
505 | } |
506 | if gnu.is_extended() { |
507 | let mut ext = GnuExtSparseHeader::new(); |
508 | ext.isextended[0] = 1; |
509 | while ext.is_extended() { |
510 | if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? { |
511 | return Err(other("failed to read extension" )); |
512 | } |
513 | |
514 | self.next += 512; |
515 | for block in ext.sparse.iter() { |
516 | add_block(block)?; |
517 | } |
518 | } |
519 | } |
520 | } |
521 | if cur != gnu.real_size()? { |
522 | return Err(other( |
523 | "mismatch in sparse file chunks and \ |
524 | size in header" , |
525 | )); |
526 | } |
527 | entry.size = cur; |
528 | if remaining > 0 { |
529 | return Err(other( |
530 | "mismatch in sparse file chunks and \ |
531 | entry size in header" , |
532 | )); |
533 | } |
534 | Ok(()) |
535 | } |
536 | |
537 | fn skip(&mut self, mut amt: u64) -> io::Result<()> { |
538 | if let Some(seekable_archive) = self.seekable_archive { |
539 | let pos = io::SeekFrom::Current( |
540 | i64::try_from(amt).map_err(|_| other("seek position out of bounds" ))?, |
541 | ); |
542 | (&seekable_archive.inner).seek(pos)?; |
543 | } else { |
544 | let mut buf = [0u8; 4096 * 8]; |
545 | while amt > 0 { |
546 | let n = cmp::min(amt, buf.len() as u64); |
547 | let n = (&self.archive.inner).read(&mut buf[..n as usize])?; |
548 | if n == 0 { |
549 | return Err(other("unexpected EOF during skip" )); |
550 | } |
551 | amt -= n as u64; |
552 | } |
553 | } |
554 | Ok(()) |
555 | } |
556 | } |
557 | |
558 | impl<'a> Iterator for EntriesFields<'a> { |
559 | type Item = io::Result<Entry<'a, io::Empty>>; |
560 | |
561 | fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> { |
562 | if self.done { |
563 | None |
564 | } else { |
565 | match self.next_entry() { |
566 | Ok(Some(e: Entry<'_, Empty>)) => Some(Ok(e)), |
567 | Ok(None) => { |
568 | self.done = true; |
569 | None |
570 | } |
571 | Err(e: Error) => { |
572 | self.done = true; |
573 | Some(Err(e)) |
574 | } |
575 | } |
576 | } |
577 | } |
578 | } |
579 | |
580 | impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> { |
581 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
582 | let i: usize = self.obj.borrow_mut().read(buf:into)?; |
583 | self.pos.set(self.pos.get() + i as u64); |
584 | Ok(i) |
585 | } |
586 | } |
587 | |
588 | impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> { |
589 | fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { |
590 | let pos: u64 = self.obj.borrow_mut().seek(pos)?; |
591 | self.pos.set(val:pos); |
592 | Ok(pos) |
593 | } |
594 | } |
595 | |
596 | /// Try to fill the buffer from the reader. |
597 | /// |
598 | /// If the reader reaches its end before filling the buffer at all, returns `false`. |
599 | /// Otherwise returns `true`. |
600 | fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> { |
601 | let mut read: usize = 0; |
602 | while read < buf.len() { |
603 | match r.read(&mut buf[read..])? { |
604 | 0 => { |
605 | if read == 0 { |
606 | return Ok(false); |
607 | } |
608 | |
609 | return Err(other(msg:"failed to read entire block" )); |
610 | } |
611 | n: usize => read += n, |
612 | } |
613 | } |
614 | Ok(true) |
615 | } |
616 | |