archive.rs source code [crates/tar/src/archive.rs]

1	use std::cell::{Cell, RefCell};
2	use std::cmp;
3	use std::convert::TryFrom;
4	use std::fs;
5	use std::io::prelude::*;
6	use std::io::{self, SeekFrom};
7	use std::marker;
8	use std::path::Path;
9
10	use crate::entry::{EntryFields, EntryIo};
11	use crate::error::TarError;
12	use crate::other;
13	use crate::pax::*;
14	use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
15
16	/// A top-level representation of an archive file.
17	///
18	/// This archive can have an entry added to it and it can be iterated over.
19	pub struct Archive<R: ?Sized + Read> {
20	inner: ArchiveInner<R>,
21	}
22
23	pub struct ArchiveInner<R: ?Sized> {
24	pos: Cell<u64>,
25	mask: u32,
26	unpack_xattrs: bool,
27	preserve_permissions: bool,
28	preserve_ownerships: bool,
29	preserve_mtime: bool,
30	overwrite: bool,
31	ignore_zeros: bool,
32	obj: RefCell<R>,
33	}
34
35	/// An iterator over the entries of an archive.
36	pub struct Entries<'a, R: 'a + Read> {
37	fields: EntriesFields<'a>,
38	_ignored: marker::PhantomData<&'a Archive<R>>,
39	}
40
41	trait SeekRead: Read + Seek {}
42	impl<R: Read + Seek> SeekRead for R {}
43
44	struct EntriesFields<'a> {
45	archive: &'a Archive<dyn Read + 'a>,
46	seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
47	next: u64,
48	done: bool,
49	raw: bool,
50	}
51
52	impl<R: Read> Archive<R> {
53	/// Create a new archive with the underlying object as the reader.
54	pub fn new(obj: R) -> Archive<R> {
55	Archive {
56	inner: ArchiveInner {
57	mask: u32::MIN,
58	unpack_xattrs: `false`,
59	preserve_permissions: `false`,
60	preserve_ownerships: `false`,
61	preserve_mtime: `true`,
62	overwrite: `true`,
63	ignore_zeros: `false`,
64	obj: RefCell::new(obj),
65	pos: Cell::new(`0`),
66	},
67	}
68	}
69
70	/// Unwrap this archive, returning the underlying object.
71	pub fn into_inner(self) -> R {
72	self.inner.obj.into_inner()
73	}
74
75	/// Construct an iterator over the entries in this archive.
76	///
77	/// Note that care must be taken to consider each entry within an archive in
78	/// sequence. If entries are processed out of sequence (from what the
79	/// iterator returns), then the contents read for each entry may be
80	/// corrupted.
81	pub fn entries(&mut self) -> io::Result<Entries<R>> {
82	let me: &mut Archive<dyn Read> = self;
83	me._entries(None).map(\|fields\| Entries {
84	fields: fields,
85	_ignored: marker::PhantomData,
86	})
87	}
88
89	/// Unpacks the contents tarball into the specified `dst`.
90	///
91	/// This function will iterate over the entire contents of this tarball,
92	/// extracting each file in turn to the location specified by the entry's
93	/// path name.
94	///
95	/// This operation is relatively sensitive in that it will not write files
96	/// outside of the path specified by `dst`. Files in the archive which have
97	/// a '..' in their path are skipped during the unpacking process.
98	///
99	/// # Examples
100	///
101	/// ```no_run
102	/// use std::fs::File;
103	/// use tar::Archive;
104	///
105	/// let mut ar = Archive::new(File::open("foo.tar").unwrap());
106	/// ar.unpack("foo").unwrap();
107	/// ```
108	pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
109	let me: &mut Archive<dyn Read> = self;
110	me._unpack(dst.as_ref())
111	}
112
113	/// Set the mask of the permission bits when unpacking this entry.
114	///
115	/// The mask will be inverted when applying against a mode, similar to how
116	/// `umask` works on Unix. In logical notation it looks like:
117	///
118	/// ```text
119	/// new_mode = old_mode & (~mask)
120	/// ```
121	///
122	/// The mask is 0 by default and is currently only implemented on Unix.
123	pub fn set_mask(&mut self, mask: u32) {
124	self.inner.mask = mask;
125	}
126
127	/// Indicate whether extended file attributes (xattrs on Unix) are preserved
128	/// when unpacking this archive.
129	///
130	/// This flag is disabled by default and is currently only implemented on
131	/// Unix using xattr support. This may eventually be implemented for
132	/// Windows, however, if other archive implementations are found which do
133	/// this as well.
134	pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
135	self.inner.unpack_xattrs = unpack_xattrs;
136	}
137
138	/// Indicate whether extended permissions (like suid on Unix) are preserved
139	/// when unpacking this entry.
140	///
141	/// This flag is disabled by default and is currently only implemented on
142	/// Unix.
143	pub fn set_preserve_permissions(&mut self, preserve: bool) {
144	self.inner.preserve_permissions = preserve;
145	}
146
147	/// Indicate whether numeric ownership ids (like uid and gid on Unix)
148	/// are preserved when unpacking this entry.
149	///
150	/// This flag is disabled by default and is currently only implemented on
151	/// Unix.
152	pub fn set_preserve_ownerships(&mut self, preserve: bool) {
153	self.inner.preserve_ownerships = preserve;
154	}
155
156	/// Indicate whether files and symlinks should be overwritten on extraction.
157	pub fn set_overwrite(&mut self, overwrite: bool) {
158	self.inner.overwrite = overwrite;
159	}
160
161	/// Indicate whether access time information is preserved when unpacking
162	/// this entry.
163	///
164	/// This flag is enabled by default.
165	pub fn set_preserve_mtime(&mut self, preserve: bool) {
166	self.inner.preserve_mtime = preserve;
167	}
168
169	/// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
170	/// entries.
171	///
172	/// This can be used in case multiple tar archives have been concatenated together.
173	pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
174	self.inner.ignore_zeros = ignore_zeros;
175	}
176	}
177
178	impl<R: Seek + Read> Archive<R> {
179	/// Construct an iterator over the entries in this archive for a seekable
180	/// reader. Seek will be used to efficiently skip over file contents.
181	///
182	/// Note that care must be taken to consider each entry within an archive in
183	/// sequence. If entries are processed out of sequence (from what the
184	/// iterator returns), then the contents read for each entry may be
185	/// corrupted.
186	pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
187	let me: &Archive<dyn Read> = self;
188	let me_seekable: &Archive<dyn SeekRead> = self;
189	me._entries(Some(me_seekable)).map(\|fields: EntriesFields<'_>\| Entries {
190	fields: fields,
191	_ignored: marker::PhantomData,
192	})
193	}
194	}
195
196	impl Archive<dyn Read + '_> {
197	fn _entries<'a>(
198	&'a self,
199	seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
200	) -> io::Result<EntriesFields<'a>> {
201	if self.inner.pos.get() != `0` {
202	return Err(other(
203	"cannot call entries unless archive is at \
204	position 0",
205	));
206	}
207	Ok(EntriesFields {
208	archive: self,
209	seekable_archive,
210	done: `false`,
211	next: `0`,
212	raw: `false`,
213	})
214	}
215
216	fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
217	if dst.symlink_metadata().is_err() {
218	fs::create_dir_all(&dst)
219	.map_err(\|e\| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
220	}
221
222	// Canonicalizing the dst directory will prepend the path with '\\?\'
223	// on windows which will allow windows APIs to treat the path as an
224	// extended-length path with a 32,767 character limit. Otherwise all
225	// unpacked paths over 260 characters will fail on creation with a
226	// NotFound exception.
227	let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
228
229	// Delay any directory entries until the end (they will be created if needed by
230	// descendants), to ensure that directory permissions do not interfer with descendant
231	// extraction.
232	let mut directories = Vec::new();
233	for entry in self._entries(None)? {
234	let mut file = entry.map_err(\|e\| TarError::new("failed to iterate over archive", e))?;
235	if file.header().entry_type() == crate::EntryType::Directory {
236	directories.push(file);
237	} else {
238	file.unpack_in(dst)?;
239	}
240	}
241
242	// Apply the directories.
243	//
244	// Note: the order of application is important to permissions. That is, we must traverse
245	// the filesystem graph in topological ordering or else we risk not being able to create
246	// child directories within those of more restrictive permissions. See [0] for details.
247	//
248	// [0]: <https://github.com/alexcrichton/tar-rs/issues/242>
249	directories.sort_by(\|a, b\| b.path_bytes().cmp(&a.path_bytes()));
250	for mut dir in directories {
251	dir.unpack_in(dst)?;
252	}
253
254	Ok(())
255	}
256	}
257
258	impl<'a, R: Read> Entries<'a, R> {
259	/// Indicates whether this iterator will return raw entries or not.
260	///
261	/// If the raw list of entries are returned, then no preprocessing happens
262	/// on account of this library, for example taking into account GNU long name
263	/// or long link archive members. Raw iteration is disabled by default.
264	pub fn raw(self, raw: bool) -> Entries<'a, R> {
265	Entries {
266	fields: EntriesFields {
267	raw: raw,
268	..self.fields
269	},
270	_ignored: marker::PhantomData,
271	}
272	}
273	}
274	impl<'a, R: Read> Iterator for Entries<'a, R> {
275	type Item = io::Result<Entry<'a, R>>;
276
277	fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
278	self.fields
279	.next()
280	.map(\|result: Result, …>\| result.map(\|e: Entry<'a, Empty>\| EntryFields::from(entry:e).into_entry()))
281	}
282	}
283
284	impl<'a> EntriesFields<'a> {
285	fn next_entry_raw(
286	&mut self,
287	pax_extensions: Option<&[u8]>,
288	) -> io::Result<Option<Entry<'a, io::Empty>>> {
289	let mut header = Header::new_old();
290	let mut header_pos = self.next;
291	loop {
292	// Seek to the start of the next header in the archive
293	let delta = self.next - self.archive.inner.pos.get();
294	self.skip(delta)?;
295
296	// EOF is an indicator that we are at the end of the archive.
297	if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
298	return Ok(None);
299	}
300
301	// If a header is not all zeros, we have another valid header.
302	// Otherwise, check if we are ignoring zeros and continue, or break as if this is the
303	// end of the archive.
304	if !header.as_bytes().iter().all(\|i\| *i == `0`) {
305	self.next += `512`;
306	break;
307	}
308
309	if !self.archive.inner.ignore_zeros {
310	return Ok(None);
311	}
312	self.next += `512`;
313	header_pos = self.next;
314	}
315
316	// Make sure the checksum is ok
317	let sum = header.as_bytes()[..`148`]
318	.iter()
319	.chain(&header.as_bytes()[`156`..])
320	.fold(`0`, \|a, b\| a + (b as u32*))
321	+ `8` * `32`;
322	let cksum = header.cksum()?;
323	if sum != cksum {
324	return Err(other("archive header checksum mismatch"));
325	}
326
327	let mut pax_size: Option<u64> = None;
328	if let Some(pax_extensions_ref) = &pax_extensions {
329	pax_size = pax_extensions_value(pax_extensions_ref, PAX_SIZE);
330
331	if let Some(pax_uid) = pax_extensions_value(pax_extensions_ref, PAX_UID) {
332	header.set_uid(pax_uid);
333	}
334
335	if let Some(pax_gid) = pax_extensions_value(pax_extensions_ref, PAX_GID) {
336	header.set_gid(pax_gid);
337	}
338	}
339
340	let file_pos = self.next;
341	let mut size = header.entry_size()?;
342	if size == `0` {
343	if let Some(pax_size) = pax_size {
344	size = pax_size;
345	}
346	}
347	let ret = EntryFields {
348	size: size,
349	header_pos: header_pos,
350	file_pos: file_pos,
351	data: vec![EntryIo::Data((&self.archive.inner).take(size))],
352	header: header,
353	long_pathname: None,
354	long_linkname: None,
355	pax_extensions: None,
356	mask: self.archive.inner.mask,
357	unpack_xattrs: self.archive.inner.unpack_xattrs,
358	preserve_permissions: self.archive.inner.preserve_permissions,
359	preserve_mtime: self.archive.inner.preserve_mtime,
360	overwrite: self.archive.inner.overwrite,
361	preserve_ownerships: self.archive.inner.preserve_ownerships,
362	};
363
364	// Store where the next entry is, rounding up by 512 bytes (the size of
365	// a header);
366	let size = size
367	.checked_add(`511`)
368	.ok_or_else(\|\| other("size overflow"))?;
369	self.next = self
370	.next
371	.checked_add(size & !(`512` - `1`))
372	.ok_or_else(\|\| other("size overflow"))?;
373
374	Ok(Some(ret.into_entry()))
375	}
376
377	fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
378	if self.raw {
379	return self.next_entry_raw(None);
380	}
381
382	let mut gnu_longname = None;
383	let mut gnu_longlink = None;
384	let mut pax_extensions = None;
385	let mut processed = `0`;
386	loop {
387	processed += `1`;
388	let entry = match self.next_entry_raw(pax_extensions.as_deref())? {
389	Some(entry) => entry,
390	None if processed > `1` => {
391	return Err(other(
392	"members found describing a future member \
393	but no future member found",
394	));
395	}
396	None => return Ok(None),
397	};
398
399	let is_recognized_header =
400	entry.header().as_gnu().is_some() \|\| entry.header().as_ustar().is_some();
401
402	if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
403	if gnu_longname.is_some() {
404	return Err(other(
405	"two long name entries describing \
406	the same member",
407	));
408	}
409	gnu_longname = Some(EntryFields::from(entry).read_all()?);
410	continue;
411	}
412
413	if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
414	if gnu_longlink.is_some() {
415	return Err(other(
416	"two long name entries describing \
417	the same member",
418	));
419	}
420	gnu_longlink = Some(EntryFields::from(entry).read_all()?);
421	continue;
422	}
423
424	if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
425	if pax_extensions.is_some() {
426	return Err(other(
427	"two pax extensions entries describing \
428	the same member",
429	));
430	}
431	pax_extensions = Some(EntryFields::from(entry).read_all()?);
432	continue;
433	}
434
435	let mut fields = EntryFields::from(entry);
436	fields.long_pathname = gnu_longname;
437	fields.long_linkname = gnu_longlink;
438	fields.pax_extensions = pax_extensions;
439	self.parse_sparse_header(&mut fields)?;
440	return Ok(Some(fields.into_entry()));
441	}
442	}
443
444	fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
445	if !entry.header.entry_type().is_gnu_sparse() {
446	return Ok(());
447	}
448	let gnu = match entry.header.as_gnu() {
449	Some(gnu) => gnu,
450	None => return Err(other("sparse entry type listed but not GNU header")),
451	};
452
453	// Sparse files are represented internally as a list of blocks that are
454	// read. Blocks are either a bunch of 0's or they're data from the
455	// underlying archive.
456	//
457	// Blocks of a sparse file are described by the `GnuSparseHeader`
458	// structure, some of which are contained in `GnuHeader` but some of
459	// which may also be contained after the first header in further
460	// headers.
461	//
462	// We read off all the blocks here and use the `add_block` function to
463	// incrementally add them to the list of I/O block (in `entry.data`).
464	// The `add_block` function also validates that each chunk comes after
465	// the previous, we don't overrun the end of the file, and each block is
466	// aligned to a 512-byte boundary in the archive itself.
467	//
468	// At the end we verify that the sparse file size (`Header::size`) is
469	// the same as the current offset (described by the list of blocks) as
470	// well as the amount of data read equals the size of the entry
471	// (`Header::entry_size`).
472	entry.data.truncate(`0`);
473
474	let mut cur = `0`;
475	let mut remaining = entry.size;
476	{
477	let data = &mut entry.data;
478	let reader = &self.archive.inner;
479	let size = entry.size;
480	let mut add_block = \|block: &GnuSparseHeader\| -> io::Result<_> {
481	if block.is_empty() {
482	return Ok(());
483	}
484	let off = block.offset()?;
485	let len = block.length()?;
486	if len != `0` && (size - remaining) % `512` != `0` {
487	return Err(other(
488	"previous block in sparse file was not \
489	aligned to 512-byte boundary",
490	));
491	} else if off < cur {
492	return Err(other(
493	"out of order or overlapping sparse \
494	blocks",
495	));
496	} else if cur < off {
497	let block = io::repeat(`0`).take(off - cur);
498	data.push(EntryIo::Pad(block));
499	}
500	cur = off
501	.checked_add(len)
502	.ok_or_else(\|\| other("more bytes listed in sparse file than u64 can hold"))?;
503	remaining = remaining.checked_sub(len).ok_or_else(\|\| {
504	other(
505	"sparse file consumed more data than the header \
506	listed",
507	)
508	})?;
509	data.push(EntryIo::Data(reader.take(len)));
510	Ok(())
511	};
512	for block in gnu.sparse.iter() {
513	add_block(block)?
514	}
515	if gnu.is_extended() {
516	let mut ext = GnuExtSparseHeader::new();
517	ext.isextended[`0`] = `1`;
518	while ext.is_extended() {
519	if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
520	return Err(other("failed to read extension"));
521	}
522
523	self.next += `512`;
524	for block in ext.sparse.iter() {
525	add_block(block)?;
526	}
527	}
528	}
529	}
530	if cur != gnu.real_size()? {
531	return Err(other(
532	"mismatch in sparse file chunks and \
533	size in header",
534	));
535	}
536	entry.size = cur;
537	if remaining > `0` {
538	return Err(other(
539	"mismatch in sparse file chunks and \
540	entry size in header",
541	));
542	}
543	Ok(())
544	}
545
546	fn skip(&mut self, mut amt: u64) -> io::Result<()> {
547	if let Some(seekable_archive) = self.seekable_archive {
548	let pos = io::SeekFrom::Current(
549	i64::try_from(amt).map_err(\|_\| other("seek position out of bounds"))?,
550	);
551	(&seekable_archive.inner).seek(pos)?;
552	} else {
553	let mut buf = [`0u8`; `4096` * `8`];
554	while amt > `0` {
555	let n = cmp::min(amt, buf.len() as u64);
556	let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
557	if n == `0` {
558	return Err(other("unexpected EOF during skip"));
559	}
560	amt -= n as u64;
561	}
562	}
563	Ok(())
564	}
565	}
566
567	impl<'a> Iterator for EntriesFields<'a> {
568	type Item = io::Result<Entry<'a, io::Empty>>;
569
570	fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
571	if self.done {
572	None
573	} else {
574	match self.next_entry() {
575	Ok(Some(e: Entry<'a, Empty>)) => Some(Ok(e)),
576	Ok(None) => {
577	self.done = `true`;
578	None
579	}
580	Err(e: Error) => {
581	self.done = `true`;
582	Some(Err(e))
583	}
584	}
585	}
586	}
587	}
588
589	impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
590	fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
591	let i: usize = self.obj.borrow_mut().read(buf:into)?;
592	self.pos.set(self.pos.get() + i as u64);
593	Ok(i)
594	}
595	}
596
597	impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
598	fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
599	let pos: u64 = self.obj.borrow_mut().seek(pos)?;
600	self.pos.set(val:pos);
601	Ok(pos)
602	}
603	}
604
605	/// Try to fill the buffer from the reader.
606	///
607	/// If the reader reaches its end before filling the buffer at all, returns `false`.
608	/// Otherwise returns `true`.
609	fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
610	let mut read: usize = `0`;
611	while read < buf.len() {
612	match r.read(&mut buf[read..])? {
613	`0` => {
614	if read == `0` {
615	return Ok(`false`);
616	}
617
618	return Err(other(msg:"failed to read entire block"));
619	}
620	n: usize => read += n,
621	}
622	}
623	Ok(`true`)
624	}
625