archive.rs source code [crates/tar/src/archive.rs]

1	use std::cell::{Cell, RefCell};
2	use std::cmp;
3	use std::convert::TryFrom;
4	use std::fs;
5	use std::io::prelude::*;
6	use std::io::{self, SeekFrom};
7	use std::marker;
8	use std::path::Path;
9
10	use crate::entry::{EntryFields, EntryIo};
11	use crate::error::TarError;
12	use crate::header::BLOCK_SIZE;
13	use crate::other;
14	use crate::pax::*;
15	use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
16
17	/// A top-level representation of an archive file.
18	///
19	/// This archive can have an entry added to it and it can be iterated over.
20	pub struct Archive<R: ?Sized + Read> {
21	inner: ArchiveInner<R>,
22	}
23
24	pub struct ArchiveInner<R: ?Sized> {
25	pos: Cell<u64>,
26	mask: u32,
27	unpack_xattrs: bool,
28	preserve_permissions: bool,
29	preserve_ownerships: bool,
30	preserve_mtime: bool,
31	overwrite: bool,
32	ignore_zeros: bool,
33	obj: RefCell<R>,
34	}
35
36	/// An iterator over the entries of an archive.
37	pub struct Entries<'a, R: 'a + Read> {
38	fields: EntriesFields<'a>,
39	_ignored: marker::PhantomData<&'a Archive<R>>,
40	}
41
42	trait SeekRead: Read + Seek {}
43	impl<R: Read + Seek> SeekRead for R {}
44
45	struct EntriesFields<'a> {
46	archive: &'a Archive<dyn Read + 'a>,
47	seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
48	next: u64,
49	done: bool,
50	raw: bool,
51	}
52
53	impl<R: Read> Archive<R> {
54	/// Create a new archive with the underlying object as the reader.
55	pub fn new(obj: R) -> Archive<R> {
56	Archive {
57	inner: ArchiveInner {
58	mask: u32::MIN,
59	unpack_xattrs: `false`,
60	preserve_permissions: `false`,
61	preserve_ownerships: `false`,
62	preserve_mtime: `true`,
63	overwrite: `true`,
64	ignore_zeros: `false`,
65	obj: RefCell::new(obj),
66	pos: Cell::new(`0`),
67	},
68	}
69	}
70
71	/// Unwrap this archive, returning the underlying object.
72	pub fn into_inner(self) -> R {
73	self.inner.obj.into_inner()
74	}
75
76	/// Construct an iterator over the entries in this archive.
77	///
78	/// Note that care must be taken to consider each entry within an archive in
79	/// sequence. If entries are processed out of sequence (from what the
80	/// iterator returns), then the contents read for each entry may be
81	/// corrupted.
82	pub fn entries(&mut self) -> io::Result<Entries<R>> {
83	let me: &mut Archive<dyn Read> = self;
84	me._entries(None).map(\|fields\| Entries {
85	fields: fields,
86	_ignored: marker::PhantomData,
87	})
88	}
89
90	/// Unpacks the contents tarball into the specified `dst`.
91	///
92	/// This function will iterate over the entire contents of this tarball,
93	/// extracting each file in turn to the location specified by the entry's
94	/// path name.
95	///
96	/// This operation is relatively sensitive in that it will not write files
97	/// outside of the path specified by `dst`. Files in the archive which have
98	/// a '..' in their path are skipped during the unpacking process.
99	///
100	/// # Examples
101	///
102	/// ```no_run
103	/// use std::fs::File;
104	/// use tar::Archive;
105	///
106	/// let mut ar = Archive::new(File::open("foo.tar").unwrap());
107	/// ar.unpack("foo").unwrap();
108	/// ```
109	pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
110	let me: &mut Archive<dyn Read> = self;
111	me._unpack(dst.as_ref())
112	}
113
114	/// Set the mask of the permission bits when unpacking this entry.
115	///
116	/// The mask will be inverted when applying against a mode, similar to how
117	/// `umask` works on Unix. In logical notation it looks like:
118	///
119	/// ```text
120	/// new_mode = old_mode & (~mask)
121	/// ```
122	///
123	/// The mask is 0 by default and is currently only implemented on Unix.
124	pub fn set_mask(&mut self, mask: u32) {
125	self.inner.mask = mask;
126	}
127
128	/// Indicate whether extended file attributes (xattrs on Unix) are preserved
129	/// when unpacking this archive.
130	///
131	/// This flag is disabled by default and is currently only implemented on
132	/// Unix using xattr support. This may eventually be implemented for
133	/// Windows, however, if other archive implementations are found which do
134	/// this as well.
135	pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
136	self.inner.unpack_xattrs = unpack_xattrs;
137	}
138
139	/// Indicate whether extended permissions (like suid on Unix) are preserved
140	/// when unpacking this entry.
141	///
142	/// This flag is disabled by default and is currently only implemented on
143	/// Unix.
144	pub fn set_preserve_permissions(&mut self, preserve: bool) {
145	self.inner.preserve_permissions = preserve;
146	}
147
148	/// Indicate whether numeric ownership ids (like uid and gid on Unix)
149	/// are preserved when unpacking this entry.
150	///
151	/// This flag is disabled by default and is currently only implemented on
152	/// Unix.
153	pub fn set_preserve_ownerships(&mut self, preserve: bool) {
154	self.inner.preserve_ownerships = preserve;
155	}
156
157	/// Indicate whether files and symlinks should be overwritten on extraction.
158	pub fn set_overwrite(&mut self, overwrite: bool) {
159	self.inner.overwrite = overwrite;
160	}
161
162	/// Indicate whether access time information is preserved when unpacking
163	/// this entry.
164	///
165	/// This flag is enabled by default.
166	pub fn set_preserve_mtime(&mut self, preserve: bool) {
167	self.inner.preserve_mtime = preserve;
168	}
169
170	/// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
171	/// entries.
172	///
173	/// This can be used in case multiple tar archives have been concatenated together.
174	pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
175	self.inner.ignore_zeros = ignore_zeros;
176	}
177	}
178
179	impl<R: Seek + Read> Archive<R> {
180	/// Construct an iterator over the entries in this archive for a seekable
181	/// reader. Seek will be used to efficiently skip over file contents.
182	///
183	/// Note that care must be taken to consider each entry within an archive in
184	/// sequence. If entries are processed out of sequence (from what the
185	/// iterator returns), then the contents read for each entry may be
186	/// corrupted.
187	pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
188	let me: &Archive<dyn Read> = self;
189	let me_seekable: &Archive<dyn SeekRead> = self;
190	me._entries(Some(me_seekable)).map(\|fields: EntriesFields<'_>\| Entries {
191	fields: fields,
192	_ignored: marker::PhantomData,
193	})
194	}
195	}
196
197	impl Archive<dyn Read + '_> {
198	fn _entries<'a>(
199	&'a self,
200	seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
201	) -> io::Result<EntriesFields<'a>> {
202	if self.inner.pos.get() != `0` {
203	return Err(other(
204	"cannot call entries unless archive is at \
205	position 0",
206	));
207	}
208	Ok(EntriesFields {
209	archive: self,
210	seekable_archive,
211	done: `false`,
212	next: `0`,
213	raw: `false`,
214	})
215	}
216
217	fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
218	if dst.symlink_metadata().is_err() {
219	fs::create_dir_all(&dst)
220	.map_err(\|e\| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
221	}
222
223	// Canonicalizing the dst directory will prepend the path with '\\?\'
224	// on windows which will allow windows APIs to treat the path as an
225	// extended-length path with a 32,767 character limit. Otherwise all
226	// unpacked paths over 260 characters will fail on creation with a
227	// NotFound exception.
228	let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
229
230	// Delay any directory entries until the end (they will be created if needed by
231	// descendants), to ensure that directory permissions do not interfer with descendant
232	// extraction.
233	let mut directories = Vec::new();
234	for entry in self._entries(None)? {
235	let mut file = entry.map_err(\|e\| TarError::new("failed to iterate over archive", e))?;
236	if file.header().entry_type() == crate::EntryType::Directory {
237	directories.push(file);
238	} else {
239	file.unpack_in(dst)?;
240	}
241	}
242
243	// Apply the directories.
244	//
245	// Note: the order of application is important to permissions. That is, we must traverse
246	// the filesystem graph in topological ordering or else we risk not being able to create
247	// child directories within those of more restrictive permissions. See [0] for details.
248	//
249	// [0]: <https://github.com/alexcrichton/tar-rs/issues/242>
250	directories.sort_by(\|a, b\| b.path_bytes().cmp(&a.path_bytes()));
251	for mut dir in directories {
252	dir.unpack_in(dst)?;
253	}
254
255	Ok(())
256	}
257	}
258
259	impl<'a, R: Read> Entries<'a, R> {
260	/// Indicates whether this iterator will return raw entries or not.
261	///
262	/// If the raw list of entries is returned, then no preprocessing happens
263	/// on account of this library, for example taking into account GNU long name
264	/// or long link archive members. Raw iteration is disabled by default.
265	pub fn raw(self, raw: bool) -> Entries<'a, R> {
266	Entries {
267	fields: EntriesFields {
268	raw: raw,
269	..self.fields
270	},
271	_ignored: marker::PhantomData,
272	}
273	}
274	}
275	impl<'a, R: Read> Iterator for Entries<'a, R> {
276	type Item = io::Result<Entry<'a, R>>;
277
278	fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
279	self.fields
280	.next()
281	.map(\|result: Result, …>\| result.map(\|e: Entry<'a, Empty>\| EntryFields::from(entry:e).into_entry()))
282	}
283	}
284
285	impl<'a> EntriesFields<'a> {
286	fn next_entry_raw(
287	&mut self,
288	pax_extensions: Option<&[u8]>,
289	) -> io::Result<Option<Entry<'a, io::Empty>>> {
290	let mut header = Header::new_old();
291	let mut header_pos = self.next;
292	loop {
293	// Seek to the start of the next header in the archive
294	let delta = self.next - self.archive.inner.pos.get();
295	self.skip(delta)?;
296
297	// EOF is an indicator that we are at the end of the archive.
298	if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
299	return Ok(None);
300	}
301
302	// If a header is not all zeros, we have another valid header.
303	// Otherwise, check if we are ignoring zeros and continue, or break as if this is the
304	// end of the archive.
305	if !header.as_bytes().iter().all(\|i\| *i == `0`) {
306	self.next += BLOCK_SIZE;
307	break;
308	}
309
310	if !self.archive.inner.ignore_zeros {
311	return Ok(None);
312	}
313	self.next += BLOCK_SIZE;
314	header_pos = self.next;
315	}
316
317	// Make sure the checksum is ok
318	let sum = header.as_bytes()[..`148`]
319	.iter()
320	.chain(&header.as_bytes()[`156`..])
321	.fold(`0`, \|a, b\| a + (b as u32*))
322	+ `8` * `32`;
323	let cksum = header.cksum()?;
324	if sum != cksum {
325	return Err(other("archive header checksum mismatch"));
326	}
327
328	let mut pax_size: Option<u64> = None;
329	if let Some(pax_extensions_ref) = &pax_extensions {
330	pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
331
332	if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
333	header.set_uid(pax_uid);
334	}
335
336	if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
337	header.set_gid(pax_gid);
338	}
339	}
340
341	let file_pos = self.next;
342	let mut size = header.entry_size()?;
343	if size == `0` {
344	if let Some(pax_size) = pax_size {
345	size = pax_size;
346	}
347	}
348	let ret = EntryFields {
349	size: size,
350	header_pos: header_pos,
351	file_pos: file_pos,
352	data: vec![EntryIo::Data((&self.archive.inner).take(size))],
353	header: header,
354	long_pathname: None,
355	long_linkname: None,
356	pax_extensions: None,
357	mask: self.archive.inner.mask,
358	unpack_xattrs: self.archive.inner.unpack_xattrs,
359	preserve_permissions: self.archive.inner.preserve_permissions,
360	preserve_mtime: self.archive.inner.preserve_mtime,
361	overwrite: self.archive.inner.overwrite,
362	preserve_ownerships: self.archive.inner.preserve_ownerships,
363	};
364
365	// Store where the next entry is, rounding up by 512 bytes (the size of
366	// a header);
367	let size = size
368	.checked_add(BLOCK_SIZE - `1`)
369	.ok_or_else(\|\| other("size overflow"))?;
370	self.next = self
371	.next
372	.checked_add(size & !(BLOCK_SIZE - `1`))
373	.ok_or_else(\|\| other("size overflow"))?;
374
375	Ok(Some(ret.into_entry()))
376	}
377
378	fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
379	if self.raw {
380	return self.next_entry_raw(None);
381	}
382
383	let mut gnu_longname = None;
384	let mut gnu_longlink = None;
385	let mut pax_extensions = None;
386	let mut processed = `0`;
387	loop {
388	processed += `1`;
389	let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
390	Some(entry) => entry,
391	None if processed > `1` => {
392	return Err(other(
393	"members found describing a future member \
394	but no future member found",
395	));
396	}
397	None => return Ok(None),
398	};
399
400	let is_recognized_header =
401	entry.header().as_gnu().is_some() \|\| entry.header().as_ustar().is_some();
402
403	if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
404	if gnu_longname.is_some() {
405	return Err(other(
406	"two long name entries describing \
407	the same member",
408	));
409	}
410	gnu_longname = Some(EntryFields::from(entry).read_all()?);
411	continue;
412	}
413
414	if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
415	if gnu_longlink.is_some() {
416	return Err(other(
417	"two long name entries describing \
418	the same member",
419	));
420	}
421	gnu_longlink = Some(EntryFields::from(entry).read_all()?);
422	continue;
423	}
424
425	if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
426	if pax_extensions.is_some() {
427	return Err(other(
428	"two pax extensions entries describing \
429	the same member",
430	));
431	}
432	pax_extensions = Some(EntryFields::from(entry).read_all()?);
433	continue;
434	}
435
436	let mut fields = EntryFields::from(entry);
437	fields.long_pathname = gnu_longname;
438	fields.long_linkname = gnu_longlink;
439	fields.pax_extensions = pax_extensions;
440	self.parse_sparse_header(&mut fields)?;
441	return Ok(Some(fields.into_entry()));
442	}
443	}
444
445	fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
446	if !entry.header.entry_type().is_gnu_sparse() {
447	return Ok(());
448	}
449	let gnu = match entry.header.as_gnu() {
450	Some(gnu) => gnu,
451	None => return Err(other("sparse entry type listed but not GNU header")),
452	};
453
454	// Sparse files are represented internally as a list of blocks that are
455	// read. Blocks are either a bunch of 0's or they're data from the
456	// underlying archive.
457	//
458	// Blocks of a sparse file are described by the `GnuSparseHeader`
459	// structure, some of which are contained in `GnuHeader` but some of
460	// which may also be contained after the first header in further
461	// headers.
462	//
463	// We read off all the blocks here and use the `add_block` function to
464	// incrementally add them to the list of I/O block (in `entry.data`).
465	// The `add_block` function also validates that each chunk comes after
466	// the previous, we don't overrun the end of the file, and each block is
467	// aligned to a 512-byte boundary in the archive itself.
468	//
469	// At the end we verify that the sparse file size (`Header::size`) is
470	// the same as the current offset (described by the list of blocks) as
471	// well as the amount of data read equals the size of the entry
472	// (`Header::entry_size`).
473	entry.data.truncate(`0`);
474
475	let mut cur = `0`;
476	let mut remaining = entry.size;
477	{
478	let data = &mut entry.data;
479	let reader = &self.archive.inner;
480	let size = entry.size;
481	let mut add_block = \|block: &GnuSparseHeader\| -> io::Result<_> {
482	if block.is_empty() {
483	return Ok(());
484	}
485	let off = block.offset()?;
486	let len = block.length()?;
487	if len != `0` && (size - remaining) % BLOCK_SIZE != `0` {
488	return Err(other(
489	"previous block in sparse file was not \
490	aligned to 512-byte boundary",
491	));
492	} else if off < cur {
493	return Err(other(
494	"out of order or overlapping sparse \
495	blocks",
496	));
497	} else if cur < off {
498	let block = io::repeat(`0`).take(off - cur);
499	data.push(EntryIo::Pad(block));
500	}
501	cur = off
502	.checked_add(len)
503	.ok_or_else(\|\| other("more bytes listed in sparse file than u64 can hold"))?;
504	remaining = remaining.checked_sub(len).ok_or_else(\|\| {
505	other(
506	"sparse file consumed more data than the header \
507	listed",
508	)
509	})?;
510	data.push(EntryIo::Data(reader.take(len)));
511	Ok(())
512	};
513	for block in gnu.sparse.iter() {
514	add_block(block)?
515	}
516	if gnu.is_extended() {
517	let mut ext = GnuExtSparseHeader::new();
518	ext.isextended[`0`] = `1`;
519	while ext.is_extended() {
520	if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
521	return Err(other("failed to read extension"));
522	}
523
524	self.next += BLOCK_SIZE;
525	for block in ext.sparse.iter() {
526	add_block(block)?;
527	}
528	}
529	}
530	}
531	if cur != gnu.real_size()? {
532	return Err(other(
533	"mismatch in sparse file chunks and \
534	size in header",
535	));
536	}
537	entry.size = cur;
538	if remaining > `0` {
539	return Err(other(
540	"mismatch in sparse file chunks and \
541	entry size in header",
542	));
543	}
544	Ok(())
545	}
546
547	fn skip(&mut self, mut amt: u64) -> io::Result<()> {
548	if let Some(seekable_archive) = self.seekable_archive {
549	let pos = io::SeekFrom::Current(
550	i64::try_from(amt).map_err(\|_\| other("seek position out of bounds"))?,
551	);
552	(&seekable_archive.inner).seek(pos)?;
553	} else {
554	let mut buf = [`0u8`; `4096` * `8`];
555	while amt > `0` {
556	let n = cmp::min(amt, buf.len() as u64);
557	let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
558	if n == `0` {
559	return Err(other("unexpected EOF during skip"));
560	}
561	amt -= n as u64;
562	}
563	}
564	Ok(())
565	}
566	}
567
568	impl<'a> Iterator for EntriesFields<'a> {
569	type Item = io::Result<Entry<'a, io::Empty>>;
570
571	fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
572	if self.done {
573	None
574	} else {
575	match self.next_entry() {
576	Ok(Some(e: Entry<'a, Empty>)) => Some(Ok(e)),
577	Ok(None) => {
578	self.done = `true`;
579	None
580	}
581	Err(e: Error) => {
582	self.done = `true`;
583	Some(Err(e))
584	}
585	}
586	}
587	}
588	}
589
590	impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
591	fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
592	let i: usize = self.obj.borrow_mut().read(buf:into)?;
593	self.pos.set(self.pos.get() + i as u64);
594	Ok(i)
595	}
596	}
597
598	impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
599	fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
600	let pos: u64 = self.obj.borrow_mut().seek(pos)?;
601	self.pos.set(val:pos);
602	Ok(pos)
603	}
604	}
605
606	/// Try to fill the buffer from the reader.
607	///
608	/// If the reader reaches its end before filling the buffer at all, returns `false`.
609	/// Otherwise returns `true`.
610	fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
611	let mut read: usize = `0`;
612	while read < buf.len() {
613	match r.read(&mut buf[read..])? {
614	`0` => {
615	if read == `0` {
616	return Ok(`false`);
617	}
618
619	return Err(other(msg:"failed to read entire block"));
620	}
621	n: usize => read += n,
622	}
623	}
624	Ok(`true`)
625	}
626