1 | use std::borrow::Cow; |
2 | use std::cmp; |
3 | use std::fs; |
4 | use std::fs::OpenOptions; |
5 | use std::io::prelude::*; |
6 | use std::io::{self, Error, ErrorKind, SeekFrom}; |
7 | use std::marker; |
8 | use std::path::{Component, Path, PathBuf}; |
9 | |
10 | use filetime::{self, FileTime}; |
11 | |
12 | use crate::archive::ArchiveInner; |
13 | use crate::error::TarError; |
14 | use crate::header::bytes2path; |
15 | use crate::other; |
16 | use crate::{Archive, Header, PaxExtensions}; |
17 | |
18 | /// A read-only view into an entry of an archive. |
19 | /// |
20 | /// This structure is a window into a portion of a borrowed archive which can |
21 | /// be inspected. It acts as a file handle by implementing the Reader trait. An |
22 | /// entry cannot be rewritten once inserted into an archive. |
23 | pub struct Entry<'a, R: 'a + Read> { |
24 | fields: EntryFields<'a>, |
25 | _ignored: marker::PhantomData<&'a Archive<R>>, |
26 | } |
27 | |
28 | // private implementation detail of `Entry`, but concrete (no type parameters) |
29 | // and also all-public to be constructed from other modules. |
30 | pub struct EntryFields<'a> { |
31 | pub long_pathname: Option<Vec<u8>>, |
32 | pub long_linkname: Option<Vec<u8>>, |
33 | pub pax_extensions: Option<Vec<u8>>, |
34 | pub mask: u32, |
35 | pub header: Header, |
36 | pub size: u64, |
37 | pub header_pos: u64, |
38 | pub file_pos: u64, |
39 | pub data: Vec<EntryIo<'a>>, |
40 | pub unpack_xattrs: bool, |
41 | pub preserve_permissions: bool, |
42 | pub preserve_ownerships: bool, |
43 | pub preserve_mtime: bool, |
44 | pub overwrite: bool, |
45 | } |
46 | |
47 | pub enum EntryIo<'a> { |
48 | Pad(io::Take<io::Repeat>), |
49 | Data(io::Take<&'a ArchiveInner<dyn Read + 'a>>), |
50 | } |
51 | |
52 | /// When unpacking items the unpacked thing is returned to allow custom |
53 | /// additional handling by users. Today the File is returned, in future |
54 | /// the enum may be extended with kinds for links, directories etc. |
55 | #[derive (Debug)] |
56 | pub enum Unpacked { |
57 | /// A file was unpacked. |
58 | File(std::fs::File), |
59 | /// A directory, hardlink, symlink, or other node was unpacked. |
60 | #[doc (hidden)] |
61 | __Nonexhaustive, |
62 | } |
63 | |
64 | impl<'a, R: Read> Entry<'a, R> { |
65 | /// Returns the path name for this entry. |
66 | /// |
67 | /// This method may fail if the pathname is not valid Unicode and this is |
68 | /// called on a Windows platform. |
69 | /// |
70 | /// Note that this function will convert any `\` characters to directory |
71 | /// separators, and it will not always return the same value as |
72 | /// `self.header().path()` as some archive formats have support for longer |
73 | /// path names described in separate entries. |
74 | /// |
75 | /// It is recommended to use this method instead of inspecting the `header` |
76 | /// directly to ensure that various archive formats are handled correctly. |
77 | pub fn path(&self) -> io::Result<Cow<Path>> { |
78 | self.fields.path() |
79 | } |
80 | |
81 | /// Returns the raw bytes listed for this entry. |
82 | /// |
83 | /// Note that this function will convert any `\` characters to directory |
84 | /// separators, and it will not always return the same value as |
85 | /// `self.header().path_bytes()` as some archive formats have support for |
86 | /// longer path names described in separate entries. |
87 | pub fn path_bytes(&self) -> Cow<[u8]> { |
88 | self.fields.path_bytes() |
89 | } |
90 | |
91 | /// Returns the link name for this entry, if any is found. |
92 | /// |
93 | /// This method may fail if the pathname is not valid Unicode and this is |
94 | /// called on a Windows platform. `Ok(None)` being returned, however, |
95 | /// indicates that the link name was not present. |
96 | /// |
97 | /// Note that this function will convert any `\` characters to directory |
98 | /// separators, and it will not always return the same value as |
99 | /// `self.header().link_name()` as some archive formats have support for |
100 | /// longer path names described in separate entries. |
101 | /// |
102 | /// It is recommended to use this method instead of inspecting the `header` |
103 | /// directly to ensure that various archive formats are handled correctly. |
104 | pub fn link_name(&self) -> io::Result<Option<Cow<Path>>> { |
105 | self.fields.link_name() |
106 | } |
107 | |
108 | /// Returns the link name for this entry, in bytes, if listed. |
109 | /// |
110 | /// Note that this will not always return the same value as |
111 | /// `self.header().link_name_bytes()` as some archive formats have support for |
112 | /// longer path names described in separate entries. |
113 | pub fn link_name_bytes(&self) -> Option<Cow<[u8]>> { |
114 | self.fields.link_name_bytes() |
115 | } |
116 | |
117 | /// Returns an iterator over the pax extensions contained in this entry. |
118 | /// |
119 | /// Pax extensions are a form of archive where extra metadata is stored in |
120 | /// key/value pairs in entries before the entry they're intended to |
121 | /// describe. For example this can be used to describe long file name or |
122 | /// other metadata like atime/ctime/mtime in more precision. |
123 | /// |
124 | /// The returned iterator will yield key/value pairs for each extension. |
125 | /// |
126 | /// `None` will be returned if this entry does not indicate that it itself |
127 | /// contains extensions, or if there were no previous extensions describing |
128 | /// it. |
129 | /// |
130 | /// Note that global pax extensions are intended to be applied to all |
131 | /// archive entries. |
132 | /// |
133 | /// Also note that this function will read the entire entry if the entry |
134 | /// itself is a list of extensions. |
135 | pub fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> { |
136 | self.fields.pax_extensions() |
137 | } |
138 | |
139 | /// Returns access to the header of this entry in the archive. |
140 | /// |
141 | /// This provides access to the metadata for this entry in the archive. |
142 | pub fn header(&self) -> &Header { |
143 | &self.fields.header |
144 | } |
145 | |
146 | /// Returns access to the size of this entry in the archive. |
147 | /// |
148 | /// In the event the size is stored in a pax extension, that size value |
149 | /// will be referenced. Otherwise, the entry size will be stored in the header. |
150 | pub fn size(&self) -> u64 { |
151 | self.fields.size |
152 | } |
153 | |
154 | /// Returns the starting position, in bytes, of the header of this entry in |
155 | /// the archive. |
156 | /// |
157 | /// The header is always a contiguous section of 512 bytes, so if the |
158 | /// underlying reader implements `Seek`, then the slice from `header_pos` to |
159 | /// `header_pos + 512` contains the raw header bytes. |
160 | pub fn raw_header_position(&self) -> u64 { |
161 | self.fields.header_pos |
162 | } |
163 | |
164 | /// Returns the starting position, in bytes, of the file of this entry in |
165 | /// the archive. |
166 | /// |
167 | /// If the file of this entry is continuous (e.g. not a sparse file), and |
168 | /// if the underlying reader implements `Seek`, then the slice from |
169 | /// `file_pos` to `file_pos + entry_size` contains the raw file bytes. |
170 | pub fn raw_file_position(&self) -> u64 { |
171 | self.fields.file_pos |
172 | } |
173 | |
174 | /// Writes this file to the specified location. |
175 | /// |
176 | /// This function will write the entire contents of this file into the |
177 | /// location specified by `dst`. Metadata will also be propagated to the |
178 | /// path `dst`. |
179 | /// |
180 | /// This function will create a file at the path `dst`, and it is required |
181 | /// that the intermediate directories are created. Any existing file at the |
182 | /// location `dst` will be overwritten. |
183 | /// |
184 | /// > **Note**: This function does not have as many sanity checks as |
185 | /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're |
186 | /// > thinking of unpacking untrusted tarballs you may want to review the |
187 | /// > implementations of the previous two functions and perhaps implement |
188 | /// > similar logic yourself. |
189 | /// |
190 | /// # Examples |
191 | /// |
192 | /// ```no_run |
193 | /// use std::fs::File; |
194 | /// use tar::Archive; |
195 | /// |
196 | /// let mut ar = Archive::new(File::open("foo.tar" ).unwrap()); |
197 | /// |
198 | /// for (i, file) in ar.entries().unwrap().enumerate() { |
199 | /// let mut file = file.unwrap(); |
200 | /// file.unpack(format!("file-{}" , i)).unwrap(); |
201 | /// } |
202 | /// ``` |
203 | pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> { |
204 | self.fields.unpack(None, dst.as_ref()) |
205 | } |
206 | |
207 | /// Extracts this file under the specified path, avoiding security issues. |
208 | /// |
209 | /// This function will write the entire contents of this file into the |
210 | /// location obtained by appending the path of this file in the archive to |
211 | /// `dst`, creating any intermediate directories if needed. Metadata will |
212 | /// also be propagated to the path `dst`. Any existing file at the location |
213 | /// `dst` will be overwritten. |
214 | /// |
215 | /// This function carefully avoids writing outside of `dst`. If the file has |
216 | /// a '..' in its path, this function will skip it and return false. |
217 | /// |
218 | /// # Examples |
219 | /// |
220 | /// ```no_run |
221 | /// use std::fs::File; |
222 | /// use tar::Archive; |
223 | /// |
224 | /// let mut ar = Archive::new(File::open("foo.tar" ).unwrap()); |
225 | /// |
226 | /// for (i, file) in ar.entries().unwrap().enumerate() { |
227 | /// let mut file = file.unwrap(); |
228 | /// file.unpack_in("target" ).unwrap(); |
229 | /// } |
230 | /// ``` |
231 | pub fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<bool> { |
232 | self.fields.unpack_in(dst.as_ref()) |
233 | } |
234 | |
235 | /// Set the mask of the permission bits when unpacking this entry. |
236 | /// |
237 | /// The mask will be inverted when applying against a mode, similar to how |
238 | /// `umask` works on Unix. In logical notation it looks like: |
239 | /// |
240 | /// ```text |
241 | /// new_mode = old_mode & (~mask) |
242 | /// ``` |
243 | /// |
244 | /// The mask is 0 by default and is currently only implemented on Unix. |
245 | pub fn set_mask(&mut self, mask: u32) { |
246 | self.fields.mask = mask; |
247 | } |
248 | |
249 | /// Indicate whether extended file attributes (xattrs on Unix) are preserved |
250 | /// when unpacking this entry. |
251 | /// |
252 | /// This flag is disabled by default and is currently only implemented on |
253 | /// Unix using xattr support. This may eventually be implemented for |
254 | /// Windows, however, if other archive implementations are found which do |
255 | /// this as well. |
256 | pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { |
257 | self.fields.unpack_xattrs = unpack_xattrs; |
258 | } |
259 | |
260 | /// Indicate whether extended permissions (like suid on Unix) are preserved |
261 | /// when unpacking this entry. |
262 | /// |
263 | /// This flag is disabled by default and is currently only implemented on |
264 | /// Unix. |
265 | pub fn set_preserve_permissions(&mut self, preserve: bool) { |
266 | self.fields.preserve_permissions = preserve; |
267 | } |
268 | |
269 | /// Indicate whether access time information is preserved when unpacking |
270 | /// this entry. |
271 | /// |
272 | /// This flag is enabled by default. |
273 | pub fn set_preserve_mtime(&mut self, preserve: bool) { |
274 | self.fields.preserve_mtime = preserve; |
275 | } |
276 | } |
277 | |
278 | impl<'a, R: Read> Read for Entry<'a, R> { |
279 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
280 | self.fields.read(buf:into) |
281 | } |
282 | } |
283 | |
284 | impl<'a> EntryFields<'a> { |
285 | pub fn from<R: Read>(entry: Entry<R>) -> EntryFields { |
286 | entry.fields |
287 | } |
288 | |
289 | pub fn into_entry<R: Read>(self) -> Entry<'a, R> { |
290 | Entry { |
291 | fields: self, |
292 | _ignored: marker::PhantomData, |
293 | } |
294 | } |
295 | |
296 | pub fn read_all(&mut self) -> io::Result<Vec<u8>> { |
297 | // Preallocate some data but don't let ourselves get too crazy now. |
298 | let cap = cmp::min(self.size, 128 * 1024); |
299 | let mut v = Vec::with_capacity(cap as usize); |
300 | self.read_to_end(&mut v).map(|_| v) |
301 | } |
302 | |
303 | fn path(&self) -> io::Result<Cow<Path>> { |
304 | bytes2path(self.path_bytes()) |
305 | } |
306 | |
307 | fn path_bytes(&self) -> Cow<[u8]> { |
308 | match self.long_pathname { |
309 | Some(ref bytes) => { |
310 | if let Some(&0) = bytes.last() { |
311 | Cow::Borrowed(&bytes[..bytes.len() - 1]) |
312 | } else { |
313 | Cow::Borrowed(bytes) |
314 | } |
315 | } |
316 | None => { |
317 | if let Some(ref pax) = self.pax_extensions { |
318 | let pax = PaxExtensions::new(pax) |
319 | .filter_map(|f| f.ok()) |
320 | .find(|f| f.key_bytes() == b"path" ) |
321 | .map(|f| f.value_bytes()); |
322 | if let Some(field) = pax { |
323 | return Cow::Borrowed(field); |
324 | } |
325 | } |
326 | self.header.path_bytes() |
327 | } |
328 | } |
329 | } |
330 | |
331 | /// Gets the path in a "lossy" way, used for error reporting ONLY. |
332 | fn path_lossy(&self) -> String { |
333 | String::from_utf8_lossy(&self.path_bytes()).to_string() |
334 | } |
335 | |
336 | fn link_name(&self) -> io::Result<Option<Cow<Path>>> { |
337 | match self.link_name_bytes() { |
338 | Some(bytes) => bytes2path(bytes).map(Some), |
339 | None => Ok(None), |
340 | } |
341 | } |
342 | |
343 | fn link_name_bytes(&self) -> Option<Cow<[u8]>> { |
344 | match self.long_linkname { |
345 | Some(ref bytes) => { |
346 | if let Some(&0) = bytes.last() { |
347 | Some(Cow::Borrowed(&bytes[..bytes.len() - 1])) |
348 | } else { |
349 | Some(Cow::Borrowed(bytes)) |
350 | } |
351 | } |
352 | None => { |
353 | if let Some(ref pax) = self.pax_extensions { |
354 | let pax = PaxExtensions::new(pax) |
355 | .filter_map(|f| f.ok()) |
356 | .find(|f| f.key_bytes() == b"linkpath" ) |
357 | .map(|f| f.value_bytes()); |
358 | if let Some(field) = pax { |
359 | return Some(Cow::Borrowed(field)); |
360 | } |
361 | } |
362 | self.header.link_name_bytes() |
363 | } |
364 | } |
365 | } |
366 | |
367 | fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> { |
368 | if self.pax_extensions.is_none() { |
369 | if !self.header.entry_type().is_pax_global_extensions() |
370 | && !self.header.entry_type().is_pax_local_extensions() |
371 | { |
372 | return Ok(None); |
373 | } |
374 | self.pax_extensions = Some(self.read_all()?); |
375 | } |
376 | Ok(Some(PaxExtensions::new( |
377 | self.pax_extensions.as_ref().unwrap(), |
378 | ))) |
379 | } |
380 | |
381 | fn unpack_in(&mut self, dst: &Path) -> io::Result<bool> { |
382 | // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: |
383 | // * Leading '/'s are trimmed. For example, `///test` is treated as |
384 | // `test`. |
385 | // * If the filename contains '..', then the file is skipped when |
386 | // extracting the tarball. |
387 | // * '//' within a filename is effectively skipped. An error is |
388 | // logged, but otherwise the effect is as if any two or more |
389 | // adjacent '/'s within the filename were consolidated into one |
390 | // '/'. |
391 | // |
392 | // Most of this is handled by the `path` module of the standard |
393 | // library, but we specially handle a few cases here as well. |
394 | |
395 | let mut file_dst = dst.to_path_buf(); |
396 | { |
397 | let path = self.path().map_err(|e| { |
398 | TarError::new( |
399 | format!("invalid path in entry header: {}" , self.path_lossy()), |
400 | e, |
401 | ) |
402 | })?; |
403 | for part in path.components() { |
404 | match part { |
405 | // Leading '/' characters, root paths, and '.' |
406 | // components are just ignored and treated as "empty |
407 | // components" |
408 | Component::Prefix(..) | Component::RootDir | Component::CurDir => continue, |
409 | |
410 | // If any part of the filename is '..', then skip over |
411 | // unpacking the file to prevent directory traversal |
412 | // security issues. See, e.g.: CVE-2001-1267, |
413 | // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 |
414 | Component::ParentDir => return Ok(false), |
415 | |
416 | Component::Normal(part) => file_dst.push(part), |
417 | } |
418 | } |
419 | } |
420 | |
421 | // Skip cases where only slashes or '.' parts were seen, because |
422 | // this is effectively an empty filename. |
423 | if *dst == *file_dst { |
424 | return Ok(true); |
425 | } |
426 | |
427 | // Skip entries without a parent (i.e. outside of FS root) |
428 | let parent = match file_dst.parent() { |
429 | Some(p) => p, |
430 | None => return Ok(false), |
431 | }; |
432 | |
433 | self.ensure_dir_created(&dst, parent) |
434 | .map_err(|e| TarError::new(format!("failed to create ` {}`" , parent.display()), e))?; |
435 | |
436 | let canon_target = self.validate_inside_dst(&dst, parent)?; |
437 | |
438 | self.unpack(Some(&canon_target), &file_dst) |
439 | .map_err(|e| TarError::new(format!("failed to unpack ` {}`" , file_dst.display()), e))?; |
440 | |
441 | Ok(true) |
442 | } |
443 | |
444 | /// Unpack as destination directory `dst`. |
445 | fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> { |
446 | // If the directory already exists just let it slide |
447 | fs::create_dir(dst).or_else(|err| { |
448 | if err.kind() == ErrorKind::AlreadyExists { |
449 | let prev = fs::metadata(dst); |
450 | if prev.map(|m| m.is_dir()).unwrap_or(false) { |
451 | return Ok(()); |
452 | } |
453 | } |
454 | Err(Error::new( |
455 | err.kind(), |
456 | format!(" {} when creating dir {}" , err, dst.display()), |
457 | )) |
458 | }) |
459 | } |
460 | |
461 | /// Returns access to the header of this entry in the archive. |
462 | fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> { |
463 | fn set_perms_ownerships( |
464 | dst: &Path, |
465 | f: Option<&mut std::fs::File>, |
466 | header: &Header, |
467 | mask: u32, |
468 | perms: bool, |
469 | ownerships: bool, |
470 | ) -> io::Result<()> { |
471 | // ownerships need to be set first to avoid stripping SUID bits in the permissions ... |
472 | if ownerships { |
473 | set_ownerships(dst, &f, header.uid()?, header.gid()?)?; |
474 | } |
475 | // ... then set permissions, SUID bits set here is kept |
476 | if let Ok(mode) = header.mode() { |
477 | set_perms(dst, f, mode, mask, perms)?; |
478 | } |
479 | |
480 | Ok(()) |
481 | } |
482 | |
483 | fn get_mtime(header: &Header) -> Option<FileTime> { |
484 | header.mtime().ok().map(|mtime| { |
485 | // For some more information on this see the comments in |
486 | // `Header::fill_platform_from`, but the general idea is that |
487 | // we're trying to avoid 0-mtime files coming out of archives |
488 | // since some tools don't ingest them well. Perhaps one day |
489 | // when Cargo stops working with 0-mtime archives we can remove |
490 | // this. |
491 | let mtime = if mtime == 0 { 1 } else { mtime }; |
492 | FileTime::from_unix_time(mtime as i64, 0) |
493 | }) |
494 | } |
495 | |
496 | let kind = self.header.entry_type(); |
497 | |
498 | if kind.is_dir() { |
499 | self.unpack_dir(dst)?; |
500 | set_perms_ownerships( |
501 | dst, |
502 | None, |
503 | &self.header, |
504 | self.mask, |
505 | self.preserve_permissions, |
506 | self.preserve_ownerships, |
507 | )?; |
508 | return Ok(Unpacked::__Nonexhaustive); |
509 | } else if kind.is_hard_link() || kind.is_symlink() { |
510 | let src = match self.link_name()? { |
511 | Some(name) => name, |
512 | None => { |
513 | return Err(other(&format!( |
514 | "hard link listed for {} but no link name found" , |
515 | String::from_utf8_lossy(self.header.as_bytes()) |
516 | ))); |
517 | } |
518 | }; |
519 | |
520 | if src.iter().count() == 0 { |
521 | return Err(other(&format!( |
522 | "symlink destination for {} is empty" , |
523 | String::from_utf8_lossy(self.header.as_bytes()) |
524 | ))); |
525 | } |
526 | |
527 | if kind.is_hard_link() { |
528 | let link_src = match target_base { |
529 | // If we're unpacking within a directory then ensure that |
530 | // the destination of this hard link is both present and |
531 | // inside our own directory. This is needed because we want |
532 | // to make sure to not overwrite anything outside the root. |
533 | // |
534 | // Note that this logic is only needed for hard links |
535 | // currently. With symlinks the `validate_inside_dst` which |
536 | // happens before this method as part of `unpack_in` will |
537 | // use canonicalization to ensure this guarantee. For hard |
538 | // links though they're canonicalized to their existing path |
539 | // so we need to validate at this time. |
540 | Some(ref p) => { |
541 | let link_src = p.join(src); |
542 | self.validate_inside_dst(p, &link_src)?; |
543 | link_src |
544 | } |
545 | None => src.into_owned(), |
546 | }; |
547 | fs::hard_link(&link_src, dst).map_err(|err| { |
548 | Error::new( |
549 | err.kind(), |
550 | format!( |
551 | " {} when hard linking {} to {}" , |
552 | err, |
553 | link_src.display(), |
554 | dst.display() |
555 | ), |
556 | ) |
557 | })?; |
558 | } else { |
559 | symlink(&src, dst) |
560 | .or_else(|err_io| { |
561 | if err_io.kind() == io::ErrorKind::AlreadyExists && self.overwrite { |
562 | // remove dest and try once more |
563 | std::fs::remove_file(dst).and_then(|()| symlink(&src, dst)) |
564 | } else { |
565 | Err(err_io) |
566 | } |
567 | }) |
568 | .map_err(|err| { |
569 | Error::new( |
570 | err.kind(), |
571 | format!( |
572 | " {} when symlinking {} to {}" , |
573 | err, |
574 | src.display(), |
575 | dst.display() |
576 | ), |
577 | ) |
578 | })?; |
579 | // While permissions on symlinks are meaningless on most systems, the ownership |
580 | // of symlinks is important as it dictates the access control to the symlink |
581 | // itself. |
582 | if self.preserve_ownerships { |
583 | set_ownerships(dst, &None, self.header.uid()?, self.header.gid()?)?; |
584 | } |
585 | if self.preserve_mtime { |
586 | if let Some(mtime) = get_mtime(&self.header) { |
587 | filetime::set_symlink_file_times(dst, mtime, mtime).map_err(|e| { |
588 | TarError::new(format!("failed to set mtime for ` {}`" , dst.display()), e) |
589 | })?; |
590 | } |
591 | } |
592 | } |
593 | return Ok(Unpacked::__Nonexhaustive); |
594 | |
595 | #[cfg (target_arch = "wasm32" )] |
596 | #[allow (unused_variables)] |
597 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
598 | Err(io::Error::new(io::ErrorKind::Other, "Not implemented" )) |
599 | } |
600 | |
601 | #[cfg (windows)] |
602 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
603 | ::std::os::windows::fs::symlink_file(src, dst) |
604 | } |
605 | |
606 | #[cfg (unix)] |
607 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
608 | ::std::os::unix::fs::symlink(src, dst) |
609 | } |
610 | } else if kind.is_pax_global_extensions() |
611 | || kind.is_pax_local_extensions() |
612 | || kind.is_gnu_longname() |
613 | || kind.is_gnu_longlink() |
614 | { |
615 | return Ok(Unpacked::__Nonexhaustive); |
616 | }; |
617 | |
618 | // Old BSD-tar compatibility. |
619 | // Names that have a trailing slash should be treated as a directory. |
620 | // Only applies to old headers. |
621 | if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/" ) { |
622 | self.unpack_dir(dst)?; |
623 | set_perms_ownerships( |
624 | dst, |
625 | None, |
626 | &self.header, |
627 | self.mask, |
628 | self.preserve_permissions, |
629 | self.preserve_ownerships, |
630 | )?; |
631 | return Ok(Unpacked::__Nonexhaustive); |
632 | } |
633 | |
634 | // Note the lack of `else` clause above. According to the FreeBSD |
635 | // documentation: |
636 | // |
637 | // > A POSIX-compliant implementation must treat any unrecognized |
638 | // > typeflag value as a regular file. |
639 | // |
640 | // As a result if we don't recognize the kind we just write out the file |
641 | // as we would normally. |
642 | |
643 | // Ensure we write a new file rather than overwriting in-place which |
644 | // is attackable; if an existing file is found unlink it. |
645 | fn open(dst: &Path) -> io::Result<std::fs::File> { |
646 | OpenOptions::new().write(true).create_new(true).open(dst) |
647 | } |
648 | let mut f = (|| -> io::Result<std::fs::File> { |
649 | let mut f = open(dst).or_else(|err| { |
650 | if err.kind() != ErrorKind::AlreadyExists { |
651 | Err(err) |
652 | } else if self.overwrite { |
653 | match fs::remove_file(dst) { |
654 | Ok(()) => open(dst), |
655 | Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst), |
656 | Err(e) => Err(e), |
657 | } |
658 | } else { |
659 | Err(err) |
660 | } |
661 | })?; |
662 | for io in self.data.drain(..) { |
663 | match io { |
664 | EntryIo::Data(mut d) => { |
665 | let expected = d.limit(); |
666 | if io::copy(&mut d, &mut f)? != expected { |
667 | return Err(other("failed to write entire file" )); |
668 | } |
669 | } |
670 | EntryIo::Pad(d) => { |
671 | // TODO: checked cast to i64 |
672 | let to = SeekFrom::Current(d.limit() as i64); |
673 | let size = f.seek(to)?; |
674 | f.set_len(size)?; |
675 | } |
676 | } |
677 | } |
678 | Ok(f) |
679 | })() |
680 | .map_err(|e| { |
681 | let header = self.header.path_bytes(); |
682 | TarError::new( |
683 | format!( |
684 | "failed to unpack ` {}` into ` {}`" , |
685 | String::from_utf8_lossy(&header), |
686 | dst.display() |
687 | ), |
688 | e, |
689 | ) |
690 | })?; |
691 | |
692 | if self.preserve_mtime { |
693 | if let Some(mtime) = get_mtime(&self.header) { |
694 | filetime::set_file_handle_times(&f, Some(mtime), Some(mtime)).map_err(|e| { |
695 | TarError::new(format!("failed to set mtime for ` {}`" , dst.display()), e) |
696 | })?; |
697 | } |
698 | } |
699 | set_perms_ownerships( |
700 | dst, |
701 | Some(&mut f), |
702 | &self.header, |
703 | self.mask, |
704 | self.preserve_permissions, |
705 | self.preserve_ownerships, |
706 | )?; |
707 | if self.unpack_xattrs { |
708 | set_xattrs(self, dst)?; |
709 | } |
710 | return Ok(Unpacked::File(f)); |
711 | |
712 | fn set_ownerships( |
713 | dst: &Path, |
714 | f: &Option<&mut std::fs::File>, |
715 | uid: u64, |
716 | gid: u64, |
717 | ) -> Result<(), TarError> { |
718 | _set_ownerships(dst, f, uid, gid).map_err(|e| { |
719 | TarError::new( |
720 | format!( |
721 | "failed to set ownerships to uid= {:?}, gid= {:?} \ |
722 | for ` {}`" , |
723 | uid, |
724 | gid, |
725 | dst.display() |
726 | ), |
727 | e, |
728 | ) |
729 | }) |
730 | } |
731 | |
732 | #[cfg (unix)] |
733 | fn _set_ownerships( |
734 | dst: &Path, |
735 | f: &Option<&mut std::fs::File>, |
736 | uid: u64, |
737 | gid: u64, |
738 | ) -> io::Result<()> { |
739 | use std::os::unix::prelude::*; |
740 | |
741 | let uid: libc::uid_t = uid.try_into().map_err(|_| { |
742 | io::Error::new(io::ErrorKind::Other, format!("UID {} is too large!" , uid)) |
743 | })?; |
744 | let gid: libc::gid_t = gid.try_into().map_err(|_| { |
745 | io::Error::new(io::ErrorKind::Other, format!("GID {} is too large!" , gid)) |
746 | })?; |
747 | match f { |
748 | Some(f) => unsafe { |
749 | let fd = f.as_raw_fd(); |
750 | if libc::fchown(fd, uid, gid) != 0 { |
751 | Err(io::Error::last_os_error()) |
752 | } else { |
753 | Ok(()) |
754 | } |
755 | }, |
756 | None => unsafe { |
757 | let path = std::ffi::CString::new(dst.as_os_str().as_bytes()).map_err(|e| { |
758 | io::Error::new( |
759 | io::ErrorKind::Other, |
760 | format!("path contains null character: {:?}" , e), |
761 | ) |
762 | })?; |
763 | if libc::lchown(path.as_ptr(), uid, gid) != 0 { |
764 | Err(io::Error::last_os_error()) |
765 | } else { |
766 | Ok(()) |
767 | } |
768 | }, |
769 | } |
770 | } |
771 | |
772 | // Windows does not support posix numeric ownership IDs |
773 | #[cfg (any(windows, target_arch = "wasm32" ))] |
774 | fn _set_ownerships( |
775 | _: &Path, |
776 | _: &Option<&mut std::fs::File>, |
777 | _: u64, |
778 | _: u64, |
779 | ) -> io::Result<()> { |
780 | Ok(()) |
781 | } |
782 | |
783 | fn set_perms( |
784 | dst: &Path, |
785 | f: Option<&mut std::fs::File>, |
786 | mode: u32, |
787 | mask: u32, |
788 | preserve: bool, |
789 | ) -> Result<(), TarError> { |
790 | _set_perms(dst, f, mode, mask, preserve).map_err(|e| { |
791 | TarError::new( |
792 | format!( |
793 | "failed to set permissions to {:o} \ |
794 | for ` {}`" , |
795 | mode, |
796 | dst.display() |
797 | ), |
798 | e, |
799 | ) |
800 | }) |
801 | } |
802 | |
803 | #[cfg (unix)] |
804 | fn _set_perms( |
805 | dst: &Path, |
806 | f: Option<&mut std::fs::File>, |
807 | mode: u32, |
808 | mask: u32, |
809 | preserve: bool, |
810 | ) -> io::Result<()> { |
811 | use std::os::unix::prelude::*; |
812 | |
813 | let mode = if preserve { mode } else { mode & 0o777 }; |
814 | let mode = mode & !mask; |
815 | let perm = fs::Permissions::from_mode(mode as _); |
816 | match f { |
817 | Some(f) => f.set_permissions(perm), |
818 | None => fs::set_permissions(dst, perm), |
819 | } |
820 | } |
821 | |
822 | #[cfg (windows)] |
823 | fn _set_perms( |
824 | dst: &Path, |
825 | f: Option<&mut std::fs::File>, |
826 | mode: u32, |
827 | _mask: u32, |
828 | _preserve: bool, |
829 | ) -> io::Result<()> { |
830 | if mode & 0o200 == 0o200 { |
831 | return Ok(()); |
832 | } |
833 | match f { |
834 | Some(f) => { |
835 | let mut perm = f.metadata()?.permissions(); |
836 | perm.set_readonly(true); |
837 | f.set_permissions(perm) |
838 | } |
839 | None => { |
840 | let mut perm = fs::metadata(dst)?.permissions(); |
841 | perm.set_readonly(true); |
842 | fs::set_permissions(dst, perm) |
843 | } |
844 | } |
845 | } |
846 | |
847 | #[cfg (target_arch = "wasm32" )] |
848 | #[allow (unused_variables)] |
849 | fn _set_perms( |
850 | dst: &Path, |
851 | f: Option<&mut std::fs::File>, |
852 | mode: u32, |
853 | mask: u32, |
854 | _preserve: bool, |
855 | ) -> io::Result<()> { |
856 | Err(io::Error::new(io::ErrorKind::Other, "Not implemented" )) |
857 | } |
858 | |
859 | #[cfg (all(unix, feature = "xattr" ))] |
860 | fn set_xattrs(me: &mut EntryFields, dst: &Path) -> io::Result<()> { |
861 | use std::ffi::OsStr; |
862 | use std::os::unix::prelude::*; |
863 | |
864 | let exts = match me.pax_extensions() { |
865 | Ok(Some(e)) => e, |
866 | _ => return Ok(()), |
867 | }; |
868 | let exts = exts |
869 | .filter_map(|e| e.ok()) |
870 | .filter_map(|e| { |
871 | let key = e.key_bytes(); |
872 | let prefix = crate::pax::PAX_SCHILYXATTR.as_bytes(); |
873 | key.strip_prefix(prefix).map(|rest| (rest, e)) |
874 | }) |
875 | .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes())); |
876 | |
877 | for (key, value) in exts { |
878 | xattr::set(dst, key, value).map_err(|e| { |
879 | TarError::new( |
880 | format!( |
881 | "failed to set extended \ |
882 | attributes to {}. \ |
883 | Xattrs: key= {:?}, value= {:?}." , |
884 | dst.display(), |
885 | key, |
886 | String::from_utf8_lossy(value) |
887 | ), |
888 | e, |
889 | ) |
890 | })?; |
891 | } |
892 | |
893 | Ok(()) |
894 | } |
895 | // Windows does not completely support posix xattrs |
896 | // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT |
897 | #[cfg (any(windows, not(feature = "xattr" ), target_arch = "wasm32" ))] |
898 | fn set_xattrs(_: &mut EntryFields, _: &Path) -> io::Result<()> { |
899 | Ok(()) |
900 | } |
901 | } |
902 | |
903 | fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> { |
904 | let mut ancestor = dir; |
905 | let mut dirs_to_create = Vec::new(); |
906 | while ancestor.symlink_metadata().is_err() { |
907 | dirs_to_create.push(ancestor); |
908 | if let Some(parent) = ancestor.parent() { |
909 | ancestor = parent; |
910 | } else { |
911 | break; |
912 | } |
913 | } |
914 | for ancestor in dirs_to_create.into_iter().rev() { |
915 | if let Some(parent) = ancestor.parent() { |
916 | self.validate_inside_dst(dst, parent)?; |
917 | } |
918 | fs::create_dir_all(ancestor)?; |
919 | } |
920 | Ok(()) |
921 | } |
922 | |
923 | fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<PathBuf> { |
924 | // Abort if target (canonical) parent is outside of `dst` |
925 | let canon_parent = file_dst.canonicalize().map_err(|err| { |
926 | Error::new( |
927 | err.kind(), |
928 | format!(" {} while canonicalizing {}" , err, file_dst.display()), |
929 | ) |
930 | })?; |
931 | let canon_target = dst.canonicalize().map_err(|err| { |
932 | Error::new( |
933 | err.kind(), |
934 | format!(" {} while canonicalizing {}" , err, dst.display()), |
935 | ) |
936 | })?; |
937 | if !canon_parent.starts_with(&canon_target) { |
938 | let err = TarError::new( |
939 | format!( |
940 | "trying to unpack outside of destination path: {}" , |
941 | canon_target.display() |
942 | ), |
943 | // TODO: use ErrorKind::InvalidInput here? (minor breaking change) |
944 | Error::new(ErrorKind::Other, "Invalid argument" ), |
945 | ); |
946 | return Err(err.into()); |
947 | } |
948 | Ok(canon_target) |
949 | } |
950 | } |
951 | |
952 | impl<'a> Read for EntryFields<'a> { |
953 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
954 | loop { |
955 | match self.data.get_mut(index:0).map(|io: &mut EntryIo<'a>| io.read(buf:into)) { |
956 | Some(Ok(0)) => { |
957 | self.data.remove(index:0); |
958 | } |
959 | Some(r: Result) => return r, |
960 | None => return Ok(0), |
961 | } |
962 | } |
963 | } |
964 | } |
965 | |
966 | impl<'a> Read for EntryIo<'a> { |
967 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
968 | match *self { |
969 | EntryIo::Pad(ref mut io: &mut Take) => io.read(buf:into), |
970 | EntryIo::Data(ref mut io: &mut Take<&ArchiveInner>) => io.read(buf:into), |
971 | } |
972 | } |
973 | } |
974 | |