1 | use std::cell::{Cell, RefCell}; |
2 | use std::cmp; |
3 | use std::convert::TryFrom; |
4 | use std::fs; |
5 | use std::io::prelude::*; |
6 | use std::io::{self, SeekFrom}; |
7 | use std::marker; |
8 | use std::path::Path; |
9 | |
10 | use crate::entry::{EntryFields, EntryIo}; |
11 | use crate::error::TarError; |
12 | use crate::other; |
13 | use crate::pax::pax_extensions_size; |
14 | use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; |
15 | |
16 | /// A top-level representation of an archive file. |
17 | /// |
18 | /// This archive can have an entry added to it and it can be iterated over. |
19 | pub struct Archive<R: ?Sized + Read> { |
20 | inner: ArchiveInner<R>, |
21 | } |
22 | |
23 | pub struct ArchiveInner<R: ?Sized> { |
24 | pos: Cell<u64>, |
25 | unpack_xattrs: bool, |
26 | preserve_permissions: bool, |
27 | preserve_mtime: bool, |
28 | overwrite: bool, |
29 | ignore_zeros: bool, |
30 | obj: RefCell<R>, |
31 | } |
32 | |
33 | /// An iterator over the entries of an archive. |
34 | pub struct Entries<'a, R: 'a + Read> { |
35 | fields: EntriesFields<'a>, |
36 | _ignored: marker::PhantomData<&'a Archive<R>>, |
37 | } |
38 | |
39 | trait SeekRead: Read + Seek {} |
40 | impl<R: Read + Seek> SeekRead for R {} |
41 | |
42 | struct EntriesFields<'a> { |
43 | archive: &'a Archive<dyn Read + 'a>, |
44 | seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, |
45 | next: u64, |
46 | done: bool, |
47 | raw: bool, |
48 | } |
49 | |
50 | impl<R: Read> Archive<R> { |
51 | /// Create a new archive with the underlying object as the reader. |
52 | pub fn new(obj: R) -> Archive<R> { |
53 | Archive { |
54 | inner: ArchiveInner { |
55 | unpack_xattrs: false, |
56 | preserve_permissions: false, |
57 | preserve_mtime: true, |
58 | overwrite: true, |
59 | ignore_zeros: false, |
60 | obj: RefCell::new(obj), |
61 | pos: Cell::new(0), |
62 | }, |
63 | } |
64 | } |
65 | |
66 | /// Unwrap this archive, returning the underlying object. |
67 | pub fn into_inner(self) -> R { |
68 | self.inner.obj.into_inner() |
69 | } |
70 | |
71 | /// Construct an iterator over the entries in this archive. |
72 | /// |
73 | /// Note that care must be taken to consider each entry within an archive in |
74 | /// sequence. If entries are processed out of sequence (from what the |
75 | /// iterator returns), then the contents read for each entry may be |
76 | /// corrupted. |
77 | pub fn entries(&mut self) -> io::Result<Entries<R>> { |
78 | let me: &mut Archive<dyn Read> = self; |
79 | me._entries(None).map(|fields| Entries { |
80 | fields: fields, |
81 | _ignored: marker::PhantomData, |
82 | }) |
83 | } |
84 | |
85 | /// Unpacks the contents tarball into the specified `dst`. |
86 | /// |
87 | /// This function will iterate over the entire contents of this tarball, |
88 | /// extracting each file in turn to the location specified by the entry's |
89 | /// path name. |
90 | /// |
91 | /// This operation is relatively sensitive in that it will not write files |
92 | /// outside of the path specified by `dst`. Files in the archive which have |
93 | /// a '..' in their path are skipped during the unpacking process. |
94 | /// |
95 | /// # Examples |
96 | /// |
97 | /// ```no_run |
98 | /// use std::fs::File; |
99 | /// use tar::Archive; |
100 | /// |
101 | /// let mut ar = Archive::new(File::open("foo.tar" ).unwrap()); |
102 | /// ar.unpack("foo" ).unwrap(); |
103 | /// ``` |
104 | pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> { |
105 | let me: &mut Archive<dyn Read> = self; |
106 | me._unpack(dst.as_ref()) |
107 | } |
108 | |
109 | /// Indicate whether extended file attributes (xattrs on Unix) are preserved |
110 | /// when unpacking this archive. |
111 | /// |
112 | /// This flag is disabled by default and is currently only implemented on |
113 | /// Unix using xattr support. This may eventually be implemented for |
114 | /// Windows, however, if other archive implementations are found which do |
115 | /// this as well. |
116 | pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { |
117 | self.inner.unpack_xattrs = unpack_xattrs; |
118 | } |
119 | |
120 | /// Indicate whether extended permissions (like suid on Unix) are preserved |
121 | /// when unpacking this entry. |
122 | /// |
123 | /// This flag is disabled by default and is currently only implemented on |
124 | /// Unix. |
125 | pub fn set_preserve_permissions(&mut self, preserve: bool) { |
126 | self.inner.preserve_permissions = preserve; |
127 | } |
128 | |
129 | /// Indicate whether files and symlinks should be overwritten on extraction. |
130 | pub fn set_overwrite(&mut self, overwrite: bool) { |
131 | self.inner.overwrite = overwrite; |
132 | } |
133 | |
134 | /// Indicate whether access time information is preserved when unpacking |
135 | /// this entry. |
136 | /// |
137 | /// This flag is enabled by default. |
138 | pub fn set_preserve_mtime(&mut self, preserve: bool) { |
139 | self.inner.preserve_mtime = preserve; |
140 | } |
141 | |
142 | /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more |
143 | /// entries. |
144 | /// |
145 | /// This can be used in case multiple tar archives have been concatenated together. |
146 | pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) { |
147 | self.inner.ignore_zeros = ignore_zeros; |
148 | } |
149 | } |
150 | |
151 | impl<R: Seek + Read> Archive<R> { |
152 | /// Construct an iterator over the entries in this archive for a seekable |
153 | /// reader. Seek will be used to efficiently skip over file contents. |
154 | /// |
155 | /// Note that care must be taken to consider each entry within an archive in |
156 | /// sequence. If entries are processed out of sequence (from what the |
157 | /// iterator returns), then the contents read for each entry may be |
158 | /// corrupted. |
159 | pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> { |
160 | let me: &Archive<dyn Read> = self; |
161 | let me_seekable: &Archive<dyn SeekRead> = self; |
162 | me._entries(Some(me_seekable)).map(|fields: EntriesFields<'_>| Entries { |
163 | fields: fields, |
164 | _ignored: marker::PhantomData, |
165 | }) |
166 | } |
167 | } |
168 | |
169 | impl Archive<dyn Read + '_> { |
170 | fn _entries<'a>( |
171 | &'a self, |
172 | seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, |
173 | ) -> io::Result<EntriesFields<'a>> { |
174 | if self.inner.pos.get() != 0 { |
175 | return Err(other( |
176 | "cannot call entries unless archive is at \ |
177 | position 0" , |
178 | )); |
179 | } |
180 | Ok(EntriesFields { |
181 | archive: self, |
182 | seekable_archive, |
183 | done: false, |
184 | next: 0, |
185 | raw: false, |
186 | }) |
187 | } |
188 | |
189 | fn _unpack(&mut self, dst: &Path) -> io::Result<()> { |
190 | if dst.symlink_metadata().is_err() { |
191 | fs::create_dir_all(&dst) |
192 | .map_err(|e| TarError::new(format!("failed to create ` {}`" , dst.display()), e))?; |
193 | } |
194 | |
195 | // Canonicalizing the dst directory will prepend the path with '\\?\' |
196 | // on windows which will allow windows APIs to treat the path as an |
197 | // extended-length path with a 32,767 character limit. Otherwise all |
198 | // unpacked paths over 260 characters will fail on creation with a |
199 | // NotFound exception. |
200 | let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf()); |
201 | |
202 | // Delay any directory entries until the end (they will be created if needed by |
203 | // descendants), to ensure that directory permissions do not interfer with descendant |
204 | // extraction. |
205 | let mut directories = Vec::new(); |
206 | for entry in self._entries(None)? { |
207 | let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive" , e))?; |
208 | if file.header().entry_type() == crate::EntryType::Directory { |
209 | directories.push(file); |
210 | } else { |
211 | file.unpack_in(dst)?; |
212 | } |
213 | } |
214 | for mut dir in directories { |
215 | dir.unpack_in(dst)?; |
216 | } |
217 | |
218 | Ok(()) |
219 | } |
220 | } |
221 | |
222 | impl<'a, R: Read> Entries<'a, R> { |
223 | /// Indicates whether this iterator will return raw entries or not. |
224 | /// |
225 | /// If the raw list of entries are returned, then no preprocessing happens |
226 | /// on account of this library, for example taking into account GNU long name |
227 | /// or long link archive members. Raw iteration is disabled by default. |
228 | pub fn raw(self, raw: bool) -> Entries<'a, R> { |
229 | Entries { |
230 | fields: EntriesFields { |
231 | raw: raw, |
232 | ..self.fields |
233 | }, |
234 | _ignored: marker::PhantomData, |
235 | } |
236 | } |
237 | } |
238 | impl<'a, R: Read> Iterator for Entries<'a, R> { |
239 | type Item = io::Result<Entry<'a, R>>; |
240 | |
241 | fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> { |
242 | self.fields |
243 | .next() |
244 | .map(|result: Result, …>| result.map(|e: Entry<'_, Empty>| EntryFields::from(entry:e).into_entry())) |
245 | } |
246 | } |
247 | |
248 | impl<'a> EntriesFields<'a> { |
249 | fn next_entry_raw( |
250 | &mut self, |
251 | pax_size: Option<u64>, |
252 | ) -> io::Result<Option<Entry<'a, io::Empty>>> { |
253 | let mut header = Header::new_old(); |
254 | let mut header_pos = self.next; |
255 | loop { |
256 | // Seek to the start of the next header in the archive |
257 | let delta = self.next - self.archive.inner.pos.get(); |
258 | self.skip(delta)?; |
259 | |
260 | // EOF is an indicator that we are at the end of the archive. |
261 | if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { |
262 | return Ok(None); |
263 | } |
264 | |
265 | // If a header is not all zeros, we have another valid header. |
266 | // Otherwise, check if we are ignoring zeros and continue, or break as if this is the |
267 | // end of the archive. |
268 | if !header.as_bytes().iter().all(|i| *i == 0) { |
269 | self.next += 512; |
270 | break; |
271 | } |
272 | |
273 | if !self.archive.inner.ignore_zeros { |
274 | return Ok(None); |
275 | } |
276 | self.next += 512; |
277 | header_pos = self.next; |
278 | } |
279 | |
280 | // Make sure the checksum is ok |
281 | let sum = header.as_bytes()[..148] |
282 | .iter() |
283 | .chain(&header.as_bytes()[156..]) |
284 | .fold(0, |a, b| a + (*b as u32)) |
285 | + 8 * 32; |
286 | let cksum = header.cksum()?; |
287 | if sum != cksum { |
288 | return Err(other("archive header checksum mismatch" )); |
289 | } |
290 | |
291 | let file_pos = self.next; |
292 | let mut size = header.entry_size()?; |
293 | if size == 0 { |
294 | if let Some(pax_size) = pax_size { |
295 | size = pax_size; |
296 | } |
297 | } |
298 | let ret = EntryFields { |
299 | size: size, |
300 | header_pos: header_pos, |
301 | file_pos: file_pos, |
302 | data: vec![EntryIo::Data((&self.archive.inner).take(size))], |
303 | header: header, |
304 | long_pathname: None, |
305 | long_linkname: None, |
306 | pax_extensions: None, |
307 | unpack_xattrs: self.archive.inner.unpack_xattrs, |
308 | preserve_permissions: self.archive.inner.preserve_permissions, |
309 | preserve_mtime: self.archive.inner.preserve_mtime, |
310 | overwrite: self.archive.inner.overwrite, |
311 | }; |
312 | |
313 | // Store where the next entry is, rounding up by 512 bytes (the size of |
314 | // a header); |
315 | let size = size |
316 | .checked_add(511) |
317 | .ok_or_else(|| other("size overflow" ))?; |
318 | self.next = self |
319 | .next |
320 | .checked_add(size & !(512 - 1)) |
321 | .ok_or_else(|| other("size overflow" ))?; |
322 | |
323 | Ok(Some(ret.into_entry())) |
324 | } |
325 | |
326 | fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> { |
327 | if self.raw { |
328 | return self.next_entry_raw(None); |
329 | } |
330 | |
331 | let mut gnu_longname = None; |
332 | let mut gnu_longlink = None; |
333 | let mut pax_extensions = None; |
334 | let mut pax_size = None; |
335 | let mut processed = 0; |
336 | loop { |
337 | processed += 1; |
338 | let entry = match self.next_entry_raw(pax_size)? { |
339 | Some(entry) => entry, |
340 | None if processed > 1 => { |
341 | return Err(other( |
342 | "members found describing a future member \ |
343 | but no future member found" , |
344 | )); |
345 | } |
346 | None => return Ok(None), |
347 | }; |
348 | |
349 | let is_recognized_header = |
350 | entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some(); |
351 | |
352 | if is_recognized_header && entry.header().entry_type().is_gnu_longname() { |
353 | if gnu_longname.is_some() { |
354 | return Err(other( |
355 | "two long name entries describing \ |
356 | the same member" , |
357 | )); |
358 | } |
359 | gnu_longname = Some(EntryFields::from(entry).read_all()?); |
360 | continue; |
361 | } |
362 | |
363 | if is_recognized_header && entry.header().entry_type().is_gnu_longlink() { |
364 | if gnu_longlink.is_some() { |
365 | return Err(other( |
366 | "two long name entries describing \ |
367 | the same member" , |
368 | )); |
369 | } |
370 | gnu_longlink = Some(EntryFields::from(entry).read_all()?); |
371 | continue; |
372 | } |
373 | |
374 | if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() { |
375 | if pax_extensions.is_some() { |
376 | return Err(other( |
377 | "two pax extensions entries describing \ |
378 | the same member" , |
379 | )); |
380 | } |
381 | pax_extensions = Some(EntryFields::from(entry).read_all()?); |
382 | if let Some(pax_extensions_ref) = &pax_extensions { |
383 | pax_size = pax_extensions_size(pax_extensions_ref); |
384 | } |
385 | continue; |
386 | } |
387 | |
388 | let mut fields = EntryFields::from(entry); |
389 | fields.long_pathname = gnu_longname; |
390 | fields.long_linkname = gnu_longlink; |
391 | fields.pax_extensions = pax_extensions; |
392 | self.parse_sparse_header(&mut fields)?; |
393 | return Ok(Some(fields.into_entry())); |
394 | } |
395 | } |
396 | |
397 | fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { |
398 | if !entry.header.entry_type().is_gnu_sparse() { |
399 | return Ok(()); |
400 | } |
401 | let gnu = match entry.header.as_gnu() { |
402 | Some(gnu) => gnu, |
403 | None => return Err(other("sparse entry type listed but not GNU header" )), |
404 | }; |
405 | |
406 | // Sparse files are represented internally as a list of blocks that are |
407 | // read. Blocks are either a bunch of 0's or they're data from the |
408 | // underlying archive. |
409 | // |
410 | // Blocks of a sparse file are described by the `GnuSparseHeader` |
411 | // structure, some of which are contained in `GnuHeader` but some of |
412 | // which may also be contained after the first header in further |
413 | // headers. |
414 | // |
415 | // We read off all the blocks here and use the `add_block` function to |
416 | // incrementally add them to the list of I/O block (in `entry.data`). |
417 | // The `add_block` function also validates that each chunk comes after |
418 | // the previous, we don't overrun the end of the file, and each block is |
419 | // aligned to a 512-byte boundary in the archive itself. |
420 | // |
421 | // At the end we verify that the sparse file size (`Header::size`) is |
422 | // the same as the current offset (described by the list of blocks) as |
423 | // well as the amount of data read equals the size of the entry |
424 | // (`Header::entry_size`). |
425 | entry.data.truncate(0); |
426 | |
427 | let mut cur = 0; |
428 | let mut remaining = entry.size; |
429 | { |
430 | let data = &mut entry.data; |
431 | let reader = &self.archive.inner; |
432 | let size = entry.size; |
433 | let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { |
434 | if block.is_empty() { |
435 | return Ok(()); |
436 | } |
437 | let off = block.offset()?; |
438 | let len = block.length()?; |
439 | if len != 0 && (size - remaining) % 512 != 0 { |
440 | return Err(other( |
441 | "previous block in sparse file was not \ |
442 | aligned to 512-byte boundary" , |
443 | )); |
444 | } else if off < cur { |
445 | return Err(other( |
446 | "out of order or overlapping sparse \ |
447 | blocks" , |
448 | )); |
449 | } else if cur < off { |
450 | let block = io::repeat(0).take(off - cur); |
451 | data.push(EntryIo::Pad(block)); |
452 | } |
453 | cur = off |
454 | .checked_add(len) |
455 | .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold" ))?; |
456 | remaining = remaining.checked_sub(len).ok_or_else(|| { |
457 | other( |
458 | "sparse file consumed more data than the header \ |
459 | listed" , |
460 | ) |
461 | })?; |
462 | data.push(EntryIo::Data(reader.take(len))); |
463 | Ok(()) |
464 | }; |
465 | for block in gnu.sparse.iter() { |
466 | add_block(block)? |
467 | } |
468 | if gnu.is_extended() { |
469 | let mut ext = GnuExtSparseHeader::new(); |
470 | ext.isextended[0] = 1; |
471 | while ext.is_extended() { |
472 | if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? { |
473 | return Err(other("failed to read extension" )); |
474 | } |
475 | |
476 | self.next += 512; |
477 | for block in ext.sparse.iter() { |
478 | add_block(block)?; |
479 | } |
480 | } |
481 | } |
482 | } |
483 | if cur != gnu.real_size()? { |
484 | return Err(other( |
485 | "mismatch in sparse file chunks and \ |
486 | size in header" , |
487 | )); |
488 | } |
489 | entry.size = cur; |
490 | if remaining > 0 { |
491 | return Err(other( |
492 | "mismatch in sparse file chunks and \ |
493 | entry size in header" , |
494 | )); |
495 | } |
496 | Ok(()) |
497 | } |
498 | |
499 | fn skip(&mut self, mut amt: u64) -> io::Result<()> { |
500 | if let Some(seekable_archive) = self.seekable_archive { |
501 | let pos = io::SeekFrom::Current( |
502 | i64::try_from(amt).map_err(|_| other("seek position out of bounds" ))?, |
503 | ); |
504 | (&seekable_archive.inner).seek(pos)?; |
505 | } else { |
506 | let mut buf = [0u8; 4096 * 8]; |
507 | while amt > 0 { |
508 | let n = cmp::min(amt, buf.len() as u64); |
509 | let n = (&self.archive.inner).read(&mut buf[..n as usize])?; |
510 | if n == 0 { |
511 | return Err(other("unexpected EOF during skip" )); |
512 | } |
513 | amt -= n as u64; |
514 | } |
515 | } |
516 | Ok(()) |
517 | } |
518 | } |
519 | |
520 | impl<'a> Iterator for EntriesFields<'a> { |
521 | type Item = io::Result<Entry<'a, io::Empty>>; |
522 | |
523 | fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> { |
524 | if self.done { |
525 | None |
526 | } else { |
527 | match self.next_entry() { |
528 | Ok(Some(e: Entry<'_, Empty>)) => Some(Ok(e)), |
529 | Ok(None) => { |
530 | self.done = true; |
531 | None |
532 | } |
533 | Err(e: Error) => { |
534 | self.done = true; |
535 | Some(Err(e)) |
536 | } |
537 | } |
538 | } |
539 | } |
540 | } |
541 | |
542 | impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> { |
543 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
544 | let i: usize = self.obj.borrow_mut().read(buf:into)?; |
545 | self.pos.set(self.pos.get() + i as u64); |
546 | Ok(i) |
547 | } |
548 | } |
549 | |
550 | impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> { |
551 | fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { |
552 | let pos: u64 = self.obj.borrow_mut().seek(pos)?; |
553 | self.pos.set(val:pos); |
554 | Ok(pos) |
555 | } |
556 | } |
557 | |
558 | /// Try to fill the buffer from the reader. |
559 | /// |
560 | /// If the reader reaches its end before filling the buffer at all, returns `false`. |
561 | /// Otherwise returns `true`. |
562 | fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> { |
563 | let mut read: usize = 0; |
564 | while read < buf.len() { |
565 | match r.read(&mut buf[read..])? { |
566 | 0 => { |
567 | if read == 0 { |
568 | return Ok(false); |
569 | } |
570 | |
571 | return Err(other(msg:"failed to read entire block" )); |
572 | } |
573 | n: usize => read += n, |
574 | } |
575 | } |
576 | Ok(true) |
577 | } |
578 | |