1 | use std::borrow::Cow; |
2 | use std::cmp; |
3 | use std::fs; |
4 | use std::fs::OpenOptions; |
5 | use std::io::prelude::*; |
6 | use std::io::{self, Error, ErrorKind, SeekFrom}; |
7 | use std::marker; |
8 | use std::path::{Component, Path, PathBuf}; |
9 | |
10 | use filetime::{self, FileTime}; |
11 | |
12 | use crate::archive::ArchiveInner; |
13 | use crate::error::TarError; |
14 | use crate::header::bytes2path; |
15 | use crate::other; |
16 | use crate::{Archive, Header, PaxExtensions}; |
17 | |
18 | /// A read-only view into an entry of an archive. |
19 | /// |
20 | /// This structure is a window into a portion of a borrowed archive which can |
21 | /// be inspected. It acts as a file handle by implementing the Reader trait. An |
22 | /// entry cannot be rewritten once inserted into an archive. |
23 | pub struct Entry<'a, R: 'a + Read> { |
24 | fields: EntryFields<'a>, |
25 | _ignored: marker::PhantomData<&'a Archive<R>>, |
26 | } |
27 | |
28 | // private implementation detail of `Entry`, but concrete (no type parameters) |
29 | // and also all-public to be constructed from other modules. |
30 | pub struct EntryFields<'a> { |
31 | pub long_pathname: Option<Vec<u8>>, |
32 | pub long_linkname: Option<Vec<u8>>, |
33 | pub pax_extensions: Option<Vec<u8>>, |
34 | pub mask: u32, |
35 | pub header: Header, |
36 | pub size: u64, |
37 | pub header_pos: u64, |
38 | pub file_pos: u64, |
39 | pub data: Vec<EntryIo<'a>>, |
40 | pub unpack_xattrs: bool, |
41 | pub preserve_permissions: bool, |
42 | pub preserve_ownerships: bool, |
43 | pub preserve_mtime: bool, |
44 | pub overwrite: bool, |
45 | } |
46 | |
47 | pub enum EntryIo<'a> { |
48 | Pad(io::Take<io::Repeat>), |
49 | Data(io::Take<&'a ArchiveInner<dyn Read + 'a>>), |
50 | } |
51 | |
52 | /// When unpacking items the unpacked thing is returned to allow custom |
53 | /// additional handling by users. Today the File is returned, in future |
54 | /// the enum may be extended with kinds for links, directories etc. |
55 | #[derive (Debug)] |
56 | pub enum Unpacked { |
57 | /// A file was unpacked. |
58 | File(std::fs::File), |
59 | /// A directory, hardlink, symlink, or other node was unpacked. |
60 | #[doc (hidden)] |
61 | __Nonexhaustive, |
62 | } |
63 | |
64 | impl<'a, R: Read> Entry<'a, R> { |
65 | /// Returns the path name for this entry. |
66 | /// |
67 | /// This method may fail if the pathname is not valid Unicode and this is |
68 | /// called on a Windows platform. |
69 | /// |
70 | /// Note that this function will convert any `\` characters to directory |
71 | /// separators, and it will not always return the same value as |
72 | /// `self.header().path()` as some archive formats have support for longer |
73 | /// path names described in separate entries. |
74 | /// |
75 | /// It is recommended to use this method instead of inspecting the `header` |
76 | /// directly to ensure that various archive formats are handled correctly. |
77 | pub fn path(&self) -> io::Result<Cow<Path>> { |
78 | self.fields.path() |
79 | } |
80 | |
81 | /// Returns the raw bytes listed for this entry. |
82 | /// |
83 | /// Note that this function will convert any `\` characters to directory |
84 | /// separators, and it will not always return the same value as |
85 | /// `self.header().path_bytes()` as some archive formats have support for |
86 | /// longer path names described in separate entries. |
87 | pub fn path_bytes(&self) -> Cow<[u8]> { |
88 | self.fields.path_bytes() |
89 | } |
90 | |
91 | /// Returns the link name for this entry, if any is found. |
92 | /// |
93 | /// This method may fail if the pathname is not valid Unicode and this is |
94 | /// called on a Windows platform. `Ok(None)` being returned, however, |
95 | /// indicates that the link name was not present. |
96 | /// |
97 | /// Note that this function will convert any `\` characters to directory |
98 | /// separators, and it will not always return the same value as |
99 | /// `self.header().link_name()` as some archive formats have support for |
100 | /// longer path names described in separate entries. |
101 | /// |
102 | /// It is recommended to use this method instead of inspecting the `header` |
103 | /// directly to ensure that various archive formats are handled correctly. |
104 | pub fn link_name(&self) -> io::Result<Option<Cow<Path>>> { |
105 | self.fields.link_name() |
106 | } |
107 | |
108 | /// Returns the link name for this entry, in bytes, if listed. |
109 | /// |
110 | /// Note that this will not always return the same value as |
111 | /// `self.header().link_name_bytes()` as some archive formats have support for |
112 | /// longer path names described in separate entries. |
113 | pub fn link_name_bytes(&self) -> Option<Cow<[u8]>> { |
114 | self.fields.link_name_bytes() |
115 | } |
116 | |
117 | /// Returns an iterator over the pax extensions contained in this entry. |
118 | /// |
119 | /// Pax extensions are a form of archive where extra metadata is stored in |
120 | /// key/value pairs in entries before the entry they're intended to |
121 | /// describe. For example this can be used to describe long file name or |
122 | /// other metadata like atime/ctime/mtime in more precision. |
123 | /// |
124 | /// The returned iterator will yield key/value pairs for each extension. |
125 | /// |
126 | /// `None` will be returned if this entry does not indicate that it itself |
127 | /// contains extensions, or if there were no previous extensions describing |
128 | /// it. |
129 | /// |
130 | /// Note that global pax extensions are intended to be applied to all |
131 | /// archive entries. |
132 | /// |
133 | /// Also note that this function will read the entire entry if the entry |
134 | /// itself is a list of extensions. |
135 | pub fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> { |
136 | self.fields.pax_extensions() |
137 | } |
138 | |
139 | /// Returns access to the header of this entry in the archive. |
140 | /// |
141 | /// This provides access to the metadata for this entry in the archive. |
142 | pub fn header(&self) -> &Header { |
143 | &self.fields.header |
144 | } |
145 | |
146 | /// Returns access to the size of this entry in the archive. |
147 | /// |
148 | /// In the event the size is stored in a pax extension, that size value |
149 | /// will be referenced. Otherwise, the entry size will be stored in the header. |
150 | pub fn size(&self) -> u64 { |
151 | self.fields.size |
152 | } |
153 | |
154 | /// Returns the starting position, in bytes, of the header of this entry in |
155 | /// the archive. |
156 | /// |
157 | /// The header is always a contiguous section of 512 bytes, so if the |
158 | /// underlying reader implements `Seek`, then the slice from `header_pos` to |
159 | /// `header_pos + 512` contains the raw header bytes. |
160 | pub fn raw_header_position(&self) -> u64 { |
161 | self.fields.header_pos |
162 | } |
163 | |
164 | /// Returns the starting position, in bytes, of the file of this entry in |
165 | /// the archive. |
166 | /// |
167 | /// If the file of this entry is continuous (e.g. not a sparse file), and |
168 | /// if the underlying reader implements `Seek`, then the slice from |
169 | /// `file_pos` to `file_pos + entry_size` contains the raw file bytes. |
170 | pub fn raw_file_position(&self) -> u64 { |
171 | self.fields.file_pos |
172 | } |
173 | |
174 | /// Writes this file to the specified location. |
175 | /// |
176 | /// This function will write the entire contents of this file into the |
177 | /// location specified by `dst`. Metadata will also be propagated to the |
178 | /// path `dst`. |
179 | /// |
180 | /// This function will create a file at the path `dst`, and it is required |
181 | /// that the intermediate directories are created. Any existing file at the |
182 | /// location `dst` will be overwritten. |
183 | /// |
184 | /// > **Note**: This function does not have as many sanity checks as |
185 | /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're |
186 | /// > thinking of unpacking untrusted tarballs you may want to review the |
187 | /// > implementations of the previous two functions and perhaps implement |
188 | /// > similar logic yourself. |
189 | /// |
190 | /// # Examples |
191 | /// |
192 | /// ```no_run |
193 | /// use std::fs::File; |
194 | /// use tar::Archive; |
195 | /// |
196 | /// let mut ar = Archive::new(File::open("foo.tar" ).unwrap()); |
197 | /// |
198 | /// for (i, file) in ar.entries().unwrap().enumerate() { |
199 | /// let mut file = file.unwrap(); |
200 | /// file.unpack(format!("file-{}" , i)).unwrap(); |
201 | /// } |
202 | /// ``` |
203 | pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> { |
204 | self.fields.unpack(None, dst.as_ref()) |
205 | } |
206 | |
207 | /// Extracts this file under the specified path, avoiding security issues. |
208 | /// |
209 | /// This function will write the entire contents of this file into the |
210 | /// location obtained by appending the path of this file in the archive to |
211 | /// `dst`, creating any intermediate directories if needed. Metadata will |
212 | /// also be propagated to the path `dst`. Any existing file at the location |
213 | /// `dst` will be overwritten. |
214 | /// |
215 | /// This function carefully avoids writing outside of `dst`. If the file has |
216 | /// a '..' in its path, this function will skip it and return false. |
217 | /// |
218 | /// # Examples |
219 | /// |
220 | /// ```no_run |
221 | /// use std::fs::File; |
222 | /// use tar::Archive; |
223 | /// |
224 | /// let mut ar = Archive::new(File::open("foo.tar" ).unwrap()); |
225 | /// |
226 | /// for (i, file) in ar.entries().unwrap().enumerate() { |
227 | /// let mut file = file.unwrap(); |
228 | /// file.unpack_in("target" ).unwrap(); |
229 | /// } |
230 | /// ``` |
231 | pub fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<bool> { |
232 | self.fields.unpack_in(dst.as_ref()) |
233 | } |
234 | |
235 | /// Set the mask of the permission bits when unpacking this entry. |
236 | /// |
237 | /// The mask will be inverted when applying against a mode, similar to how |
238 | /// `umask` works on Unix. In logical notation it looks like: |
239 | /// |
240 | /// ```text |
241 | /// new_mode = old_mode & (~mask) |
242 | /// ``` |
243 | /// |
244 | /// The mask is 0 by default and is currently only implemented on Unix. |
245 | pub fn set_mask(&mut self, mask: u32) { |
246 | self.fields.mask = mask; |
247 | } |
248 | |
249 | /// Indicate whether extended file attributes (xattrs on Unix) are preserved |
250 | /// when unpacking this entry. |
251 | /// |
252 | /// This flag is disabled by default and is currently only implemented on |
253 | /// Unix using xattr support. This may eventually be implemented for |
254 | /// Windows, however, if other archive implementations are found which do |
255 | /// this as well. |
256 | pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { |
257 | self.fields.unpack_xattrs = unpack_xattrs; |
258 | } |
259 | |
260 | /// Indicate whether extended permissions (like suid on Unix) are preserved |
261 | /// when unpacking this entry. |
262 | /// |
263 | /// This flag is disabled by default and is currently only implemented on |
264 | /// Unix. |
265 | pub fn set_preserve_permissions(&mut self, preserve: bool) { |
266 | self.fields.preserve_permissions = preserve; |
267 | } |
268 | |
269 | /// Indicate whether access time information is preserved when unpacking |
270 | /// this entry. |
271 | /// |
272 | /// This flag is enabled by default. |
273 | pub fn set_preserve_mtime(&mut self, preserve: bool) { |
274 | self.fields.preserve_mtime = preserve; |
275 | } |
276 | } |
277 | |
278 | impl<'a, R: Read> Read for Entry<'a, R> { |
279 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
280 | self.fields.read(buf:into) |
281 | } |
282 | } |
283 | |
284 | impl<'a> EntryFields<'a> { |
285 | pub fn from<R: Read>(entry: Entry<R>) -> EntryFields { |
286 | entry.fields |
287 | } |
288 | |
289 | pub fn into_entry<R: Read>(self) -> Entry<'a, R> { |
290 | Entry { |
291 | fields: self, |
292 | _ignored: marker::PhantomData, |
293 | } |
294 | } |
295 | |
296 | pub fn read_all(&mut self) -> io::Result<Vec<u8>> { |
297 | // Preallocate some data but don't let ourselves get too crazy now. |
298 | let cap = cmp::min(self.size, 128 * 1024); |
299 | let mut v = Vec::with_capacity(cap as usize); |
300 | self.read_to_end(&mut v).map(|_| v) |
301 | } |
302 | |
303 | fn path(&self) -> io::Result<Cow<Path>> { |
304 | bytes2path(self.path_bytes()) |
305 | } |
306 | |
307 | fn path_bytes(&self) -> Cow<[u8]> { |
308 | match self.long_pathname { |
309 | Some(ref bytes) => { |
310 | if let Some(&0) = bytes.last() { |
311 | Cow::Borrowed(&bytes[..bytes.len() - 1]) |
312 | } else { |
313 | Cow::Borrowed(bytes) |
314 | } |
315 | } |
316 | None => { |
317 | if let Some(ref pax) = self.pax_extensions { |
318 | let pax = PaxExtensions::new(pax) |
319 | .filter_map(|f| f.ok()) |
320 | .find(|f| f.key_bytes() == b"path" ) |
321 | .map(|f| f.value_bytes()); |
322 | if let Some(field) = pax { |
323 | return Cow::Borrowed(field); |
324 | } |
325 | } |
326 | self.header.path_bytes() |
327 | } |
328 | } |
329 | } |
330 | |
331 | /// Gets the path in a "lossy" way, used for error reporting ONLY. |
332 | fn path_lossy(&self) -> String { |
333 | String::from_utf8_lossy(&self.path_bytes()).to_string() |
334 | } |
335 | |
336 | fn link_name(&self) -> io::Result<Option<Cow<Path>>> { |
337 | match self.link_name_bytes() { |
338 | Some(bytes) => bytes2path(bytes).map(Some), |
339 | None => Ok(None), |
340 | } |
341 | } |
342 | |
343 | fn link_name_bytes(&self) -> Option<Cow<[u8]>> { |
344 | match self.long_linkname { |
345 | Some(ref bytes) => { |
346 | if let Some(&0) = bytes.last() { |
347 | Some(Cow::Borrowed(&bytes[..bytes.len() - 1])) |
348 | } else { |
349 | Some(Cow::Borrowed(bytes)) |
350 | } |
351 | } |
352 | None => { |
353 | if let Some(ref pax) = self.pax_extensions { |
354 | let pax = PaxExtensions::new(pax) |
355 | .filter_map(|f| f.ok()) |
356 | .find(|f| f.key_bytes() == b"linkpath" ) |
357 | .map(|f| f.value_bytes()); |
358 | if let Some(field) = pax { |
359 | return Some(Cow::Borrowed(field)); |
360 | } |
361 | } |
362 | self.header.link_name_bytes() |
363 | } |
364 | } |
365 | } |
366 | |
367 | fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> { |
368 | if self.pax_extensions.is_none() { |
369 | if !self.header.entry_type().is_pax_global_extensions() |
370 | && !self.header.entry_type().is_pax_local_extensions() |
371 | { |
372 | return Ok(None); |
373 | } |
374 | self.pax_extensions = Some(self.read_all()?); |
375 | } |
376 | Ok(Some(PaxExtensions::new( |
377 | self.pax_extensions.as_ref().unwrap(), |
378 | ))) |
379 | } |
380 | |
381 | fn unpack_in(&mut self, dst: &Path) -> io::Result<bool> { |
382 | // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: |
383 | // * Leading '/'s are trimmed. For example, `///test` is treated as |
384 | // `test`. |
385 | // * If the filename contains '..', then the file is skipped when |
386 | // extracting the tarball. |
387 | // * '//' within a filename is effectively skipped. An error is |
388 | // logged, but otherwise the effect is as if any two or more |
389 | // adjacent '/'s within the filename were consolidated into one |
390 | // '/'. |
391 | // |
392 | // Most of this is handled by the `path` module of the standard |
393 | // library, but we specially handle a few cases here as well. |
394 | |
395 | let mut file_dst = dst.to_path_buf(); |
396 | { |
397 | let path = self.path().map_err(|e| { |
398 | TarError::new( |
399 | format!("invalid path in entry header: {}" , self.path_lossy()), |
400 | e, |
401 | ) |
402 | })?; |
403 | for part in path.components() { |
404 | match part { |
405 | // Leading '/' characters, root paths, and '.' |
406 | // components are just ignored and treated as "empty |
407 | // components" |
408 | Component::Prefix(..) | Component::RootDir | Component::CurDir => continue, |
409 | |
410 | // If any part of the filename is '..', then skip over |
411 | // unpacking the file to prevent directory traversal |
412 | // security issues. See, e.g.: CVE-2001-1267, |
413 | // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 |
414 | Component::ParentDir => return Ok(false), |
415 | |
416 | Component::Normal(part) => file_dst.push(part), |
417 | } |
418 | } |
419 | } |
420 | |
421 | // Skip cases where only slashes or '.' parts were seen, because |
422 | // this is effectively an empty filename. |
423 | if *dst == *file_dst { |
424 | return Ok(true); |
425 | } |
426 | |
427 | // Skip entries without a parent (i.e. outside of FS root) |
428 | let parent = match file_dst.parent() { |
429 | Some(p) => p, |
430 | None => return Ok(false), |
431 | }; |
432 | |
433 | self.ensure_dir_created(&dst, parent) |
434 | .map_err(|e| TarError::new(format!("failed to create ` {}`" , parent.display()), e))?; |
435 | |
436 | let canon_target = self.validate_inside_dst(&dst, parent)?; |
437 | |
438 | self.unpack(Some(&canon_target), &file_dst) |
439 | .map_err(|e| TarError::new(format!("failed to unpack ` {}`" , file_dst.display()), e))?; |
440 | |
441 | Ok(true) |
442 | } |
443 | |
444 | /// Unpack as destination directory `dst`. |
445 | fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> { |
446 | // If the directory already exists just let it slide |
447 | fs::create_dir(dst).or_else(|err| { |
448 | if err.kind() == ErrorKind::AlreadyExists { |
449 | let prev = fs::metadata(dst); |
450 | if prev.map(|m| m.is_dir()).unwrap_or(false) { |
451 | return Ok(()); |
452 | } |
453 | } |
454 | Err(Error::new( |
455 | err.kind(), |
456 | format!(" {} when creating dir {}" , err, dst.display()), |
457 | )) |
458 | }) |
459 | } |
460 | |
461 | /// Returns access to the header of this entry in the archive. |
462 | fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> { |
463 | fn set_perms_ownerships( |
464 | dst: &Path, |
465 | f: Option<&mut std::fs::File>, |
466 | header: &Header, |
467 | mask: u32, |
468 | perms: bool, |
469 | ownerships: bool, |
470 | ) -> io::Result<()> { |
471 | // ownerships need to be set first to avoid stripping SUID bits in the permissions ... |
472 | if ownerships { |
473 | set_ownerships(dst, &f, header.uid()?, header.gid()?)?; |
474 | } |
475 | // ... then set permissions, SUID bits set here is kept |
476 | if let Ok(mode) = header.mode() { |
477 | set_perms(dst, f, mode, mask, perms)?; |
478 | } |
479 | |
480 | Ok(()) |
481 | } |
482 | |
483 | fn get_mtime(header: &Header) -> Option<FileTime> { |
484 | header.mtime().ok().map(|mtime| { |
485 | // For some more information on this see the comments in |
486 | // `Header::fill_platform_from`, but the general idea is that |
487 | // we're trying to avoid 0-mtime files coming out of archives |
488 | // since some tools don't ingest them well. Perhaps one day |
489 | // when Cargo stops working with 0-mtime archives we can remove |
490 | // this. |
491 | let mtime = if mtime == 0 { 1 } else { mtime }; |
492 | FileTime::from_unix_time(mtime as i64, 0) |
493 | }) |
494 | } |
495 | |
496 | let kind = self.header.entry_type(); |
497 | |
498 | if kind.is_dir() { |
499 | self.unpack_dir(dst)?; |
500 | set_perms_ownerships( |
501 | dst, |
502 | None, |
503 | &self.header, |
504 | self.mask, |
505 | self.preserve_permissions, |
506 | self.preserve_ownerships, |
507 | )?; |
508 | return Ok(Unpacked::__Nonexhaustive); |
509 | } else if kind.is_hard_link() || kind.is_symlink() { |
510 | let src = match self.link_name()? { |
511 | Some(name) => name, |
512 | None => { |
513 | return Err(other(&format!( |
514 | "hard link listed for {} but no link name found" , |
515 | String::from_utf8_lossy(self.header.as_bytes()) |
516 | ))); |
517 | } |
518 | }; |
519 | |
520 | if src.iter().count() == 0 { |
521 | return Err(other(&format!( |
522 | "symlink destination for {} is empty" , |
523 | String::from_utf8_lossy(self.header.as_bytes()) |
524 | ))); |
525 | } |
526 | |
527 | if kind.is_hard_link() { |
528 | let link_src = match target_base { |
529 | // If we're unpacking within a directory then ensure that |
530 | // the destination of this hard link is both present and |
531 | // inside our own directory. This is needed because we want |
532 | // to make sure to not overwrite anything outside the root. |
533 | // |
534 | // Note that this logic is only needed for hard links |
535 | // currently. With symlinks the `validate_inside_dst` which |
536 | // happens before this method as part of `unpack_in` will |
537 | // use canonicalization to ensure this guarantee. For hard |
538 | // links though they're canonicalized to their existing path |
539 | // so we need to validate at this time. |
540 | Some(ref p) => { |
541 | let link_src = p.join(src); |
542 | self.validate_inside_dst(p, &link_src)?; |
543 | link_src |
544 | } |
545 | None => src.into_owned(), |
546 | }; |
547 | fs::hard_link(&link_src, dst).map_err(|err| { |
548 | Error::new( |
549 | err.kind(), |
550 | format!( |
551 | " {} when hard linking {} to {}" , |
552 | err, |
553 | link_src.display(), |
554 | dst.display() |
555 | ), |
556 | ) |
557 | })?; |
558 | } else { |
559 | symlink(&src, dst) |
560 | .or_else(|err_io| { |
561 | if err_io.kind() == io::ErrorKind::AlreadyExists && self.overwrite { |
562 | // remove dest and try once more |
563 | std::fs::remove_file(dst).and_then(|()| symlink(&src, dst)) |
564 | } else { |
565 | Err(err_io) |
566 | } |
567 | }) |
568 | .map_err(|err| { |
569 | Error::new( |
570 | err.kind(), |
571 | format!( |
572 | " {} when symlinking {} to {}" , |
573 | err, |
574 | src.display(), |
575 | dst.display() |
576 | ), |
577 | ) |
578 | })?; |
579 | if self.preserve_mtime { |
580 | if let Some(mtime) = get_mtime(&self.header) { |
581 | filetime::set_symlink_file_times(dst, mtime, mtime).map_err(|e| { |
582 | TarError::new(format!("failed to set mtime for ` {}`" , dst.display()), e) |
583 | })?; |
584 | } |
585 | } |
586 | } |
587 | return Ok(Unpacked::__Nonexhaustive); |
588 | |
589 | #[cfg (target_arch = "wasm32" )] |
590 | #[allow (unused_variables)] |
591 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
592 | Err(io::Error::new(io::ErrorKind::Other, "Not implemented" )) |
593 | } |
594 | |
595 | #[cfg (windows)] |
596 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
597 | ::std::os::windows::fs::symlink_file(src, dst) |
598 | } |
599 | |
600 | #[cfg (unix)] |
601 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
602 | ::std::os::unix::fs::symlink(src, dst) |
603 | } |
604 | } else if kind.is_pax_global_extensions() |
605 | || kind.is_pax_local_extensions() |
606 | || kind.is_gnu_longname() |
607 | || kind.is_gnu_longlink() |
608 | { |
609 | return Ok(Unpacked::__Nonexhaustive); |
610 | }; |
611 | |
612 | // Old BSD-tar compatibility. |
613 | // Names that have a trailing slash should be treated as a directory. |
614 | // Only applies to old headers. |
615 | if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/" ) { |
616 | self.unpack_dir(dst)?; |
617 | set_perms_ownerships( |
618 | dst, |
619 | None, |
620 | &self.header, |
621 | self.mask, |
622 | self.preserve_permissions, |
623 | self.preserve_ownerships, |
624 | )?; |
625 | return Ok(Unpacked::__Nonexhaustive); |
626 | } |
627 | |
628 | // Note the lack of `else` clause above. According to the FreeBSD |
629 | // documentation: |
630 | // |
631 | // > A POSIX-compliant implementation must treat any unrecognized |
632 | // > typeflag value as a regular file. |
633 | // |
634 | // As a result if we don't recognize the kind we just write out the file |
635 | // as we would normally. |
636 | |
637 | // Ensure we write a new file rather than overwriting in-place which |
638 | // is attackable; if an existing file is found unlink it. |
639 | fn open(dst: &Path) -> io::Result<std::fs::File> { |
640 | OpenOptions::new().write(true).create_new(true).open(dst) |
641 | } |
642 | let mut f = (|| -> io::Result<std::fs::File> { |
643 | let mut f = open(dst).or_else(|err| { |
644 | if err.kind() != ErrorKind::AlreadyExists { |
645 | Err(err) |
646 | } else if self.overwrite { |
647 | match fs::remove_file(dst) { |
648 | Ok(()) => open(dst), |
649 | Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst), |
650 | Err(e) => Err(e), |
651 | } |
652 | } else { |
653 | Err(err) |
654 | } |
655 | })?; |
656 | for io in self.data.drain(..) { |
657 | match io { |
658 | EntryIo::Data(mut d) => { |
659 | let expected = d.limit(); |
660 | if io::copy(&mut d, &mut f)? != expected { |
661 | return Err(other("failed to write entire file" )); |
662 | } |
663 | } |
664 | EntryIo::Pad(d) => { |
665 | // TODO: checked cast to i64 |
666 | let to = SeekFrom::Current(d.limit() as i64); |
667 | let size = f.seek(to)?; |
668 | f.set_len(size)?; |
669 | } |
670 | } |
671 | } |
672 | Ok(f) |
673 | })() |
674 | .map_err(|e| { |
675 | let header = self.header.path_bytes(); |
676 | TarError::new( |
677 | format!( |
678 | "failed to unpack ` {}` into ` {}`" , |
679 | String::from_utf8_lossy(&header), |
680 | dst.display() |
681 | ), |
682 | e, |
683 | ) |
684 | })?; |
685 | |
686 | if self.preserve_mtime { |
687 | if let Some(mtime) = get_mtime(&self.header) { |
688 | filetime::set_file_handle_times(&f, Some(mtime), Some(mtime)).map_err(|e| { |
689 | TarError::new(format!("failed to set mtime for ` {}`" , dst.display()), e) |
690 | })?; |
691 | } |
692 | } |
693 | set_perms_ownerships( |
694 | dst, |
695 | Some(&mut f), |
696 | &self.header, |
697 | self.mask, |
698 | self.preserve_permissions, |
699 | self.preserve_ownerships, |
700 | )?; |
701 | if self.unpack_xattrs { |
702 | set_xattrs(self, dst)?; |
703 | } |
704 | return Ok(Unpacked::File(f)); |
705 | |
706 | fn set_ownerships( |
707 | dst: &Path, |
708 | f: &Option<&mut std::fs::File>, |
709 | uid: u64, |
710 | gid: u64, |
711 | ) -> Result<(), TarError> { |
712 | _set_ownerships(dst, f, uid, gid).map_err(|e| { |
713 | TarError::new( |
714 | format!( |
715 | "failed to set ownerships to uid= {:?}, gid= {:?} \ |
716 | for ` {}`" , |
717 | uid, |
718 | gid, |
719 | dst.display() |
720 | ), |
721 | e, |
722 | ) |
723 | }) |
724 | } |
725 | |
726 | #[cfg (unix)] |
727 | fn _set_ownerships( |
728 | dst: &Path, |
729 | f: &Option<&mut std::fs::File>, |
730 | uid: u64, |
731 | gid: u64, |
732 | ) -> io::Result<()> { |
733 | use std::convert::TryInto; |
734 | use std::os::unix::prelude::*; |
735 | |
736 | let uid: libc::uid_t = uid.try_into().map_err(|_| { |
737 | io::Error::new(io::ErrorKind::Other, format!("UID {} is too large!" , uid)) |
738 | })?; |
739 | let gid: libc::gid_t = gid.try_into().map_err(|_| { |
740 | io::Error::new(io::ErrorKind::Other, format!("GID {} is too large!" , gid)) |
741 | })?; |
742 | match f { |
743 | Some(f) => unsafe { |
744 | let fd = f.as_raw_fd(); |
745 | if libc::fchown(fd, uid, gid) != 0 { |
746 | Err(io::Error::last_os_error()) |
747 | } else { |
748 | Ok(()) |
749 | } |
750 | }, |
751 | None => unsafe { |
752 | let path = std::ffi::CString::new(dst.as_os_str().as_bytes()).map_err(|e| { |
753 | io::Error::new( |
754 | io::ErrorKind::Other, |
755 | format!("path contains null character: {:?}" , e), |
756 | ) |
757 | })?; |
758 | if libc::lchown(path.as_ptr(), uid, gid) != 0 { |
759 | Err(io::Error::last_os_error()) |
760 | } else { |
761 | Ok(()) |
762 | } |
763 | }, |
764 | } |
765 | } |
766 | |
767 | // Windows does not support posix numeric ownership IDs |
768 | #[cfg (any(windows, target_arch = "wasm32" ))] |
769 | fn _set_ownerships( |
770 | _: &Path, |
771 | _: &Option<&mut std::fs::File>, |
772 | _: u64, |
773 | _: u64, |
774 | ) -> io::Result<()> { |
775 | Ok(()) |
776 | } |
777 | |
778 | fn set_perms( |
779 | dst: &Path, |
780 | f: Option<&mut std::fs::File>, |
781 | mode: u32, |
782 | mask: u32, |
783 | preserve: bool, |
784 | ) -> Result<(), TarError> { |
785 | _set_perms(dst, f, mode, mask, preserve).map_err(|e| { |
786 | TarError::new( |
787 | format!( |
788 | "failed to set permissions to {:o} \ |
789 | for ` {}`" , |
790 | mode, |
791 | dst.display() |
792 | ), |
793 | e, |
794 | ) |
795 | }) |
796 | } |
797 | |
798 | #[cfg (unix)] |
799 | fn _set_perms( |
800 | dst: &Path, |
801 | f: Option<&mut std::fs::File>, |
802 | mode: u32, |
803 | mask: u32, |
804 | preserve: bool, |
805 | ) -> io::Result<()> { |
806 | use std::os::unix::prelude::*; |
807 | |
808 | let mode = if preserve { mode } else { mode & 0o777 }; |
809 | let mode = mode & !mask; |
810 | let perm = fs::Permissions::from_mode(mode as _); |
811 | match f { |
812 | Some(f) => f.set_permissions(perm), |
813 | None => fs::set_permissions(dst, perm), |
814 | } |
815 | } |
816 | |
817 | #[cfg (windows)] |
818 | fn _set_perms( |
819 | dst: &Path, |
820 | f: Option<&mut std::fs::File>, |
821 | mode: u32, |
822 | _mask: u32, |
823 | _preserve: bool, |
824 | ) -> io::Result<()> { |
825 | if mode & 0o200 == 0o200 { |
826 | return Ok(()); |
827 | } |
828 | match f { |
829 | Some(f) => { |
830 | let mut perm = f.metadata()?.permissions(); |
831 | perm.set_readonly(true); |
832 | f.set_permissions(perm) |
833 | } |
834 | None => { |
835 | let mut perm = fs::metadata(dst)?.permissions(); |
836 | perm.set_readonly(true); |
837 | fs::set_permissions(dst, perm) |
838 | } |
839 | } |
840 | } |
841 | |
842 | #[cfg (target_arch = "wasm32" )] |
843 | #[allow (unused_variables)] |
844 | fn _set_perms( |
845 | dst: &Path, |
846 | f: Option<&mut std::fs::File>, |
847 | mode: u32, |
848 | mask: u32, |
849 | _preserve: bool, |
850 | ) -> io::Result<()> { |
851 | Err(io::Error::new(io::ErrorKind::Other, "Not implemented" )) |
852 | } |
853 | |
854 | #[cfg (all(unix, feature = "xattr" ))] |
855 | fn set_xattrs(me: &mut EntryFields, dst: &Path) -> io::Result<()> { |
856 | use std::ffi::OsStr; |
857 | use std::os::unix::prelude::*; |
858 | |
859 | let exts = match me.pax_extensions() { |
860 | Ok(Some(e)) => e, |
861 | _ => return Ok(()), |
862 | }; |
863 | let exts = exts |
864 | .filter_map(|e| e.ok()) |
865 | .filter_map(|e| { |
866 | let key = e.key_bytes(); |
867 | let prefix = b"SCHILY.xattr." ; |
868 | if key.starts_with(prefix) { |
869 | Some((&key[prefix.len()..], e)) |
870 | } else { |
871 | None |
872 | } |
873 | }) |
874 | .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes())); |
875 | |
876 | for (key, value) in exts { |
877 | xattr::set(dst, key, value).map_err(|e| { |
878 | TarError::new( |
879 | format!( |
880 | "failed to set extended \ |
881 | attributes to {}. \ |
882 | Xattrs: key= {:?}, value= {:?}." , |
883 | dst.display(), |
884 | key, |
885 | String::from_utf8_lossy(value) |
886 | ), |
887 | e, |
888 | ) |
889 | })?; |
890 | } |
891 | |
892 | Ok(()) |
893 | } |
894 | // Windows does not completely support posix xattrs |
895 | // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT |
896 | #[cfg (any(windows, not(feature = "xattr" ), target_arch = "wasm32" ))] |
897 | fn set_xattrs(_: &mut EntryFields, _: &Path) -> io::Result<()> { |
898 | Ok(()) |
899 | } |
900 | } |
901 | |
902 | fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> { |
903 | let mut ancestor = dir; |
904 | let mut dirs_to_create = Vec::new(); |
905 | while ancestor.symlink_metadata().is_err() { |
906 | dirs_to_create.push(ancestor); |
907 | if let Some(parent) = ancestor.parent() { |
908 | ancestor = parent; |
909 | } else { |
910 | break; |
911 | } |
912 | } |
913 | for ancestor in dirs_to_create.into_iter().rev() { |
914 | if let Some(parent) = ancestor.parent() { |
915 | self.validate_inside_dst(dst, parent)?; |
916 | } |
917 | fs::create_dir_all(ancestor)?; |
918 | } |
919 | Ok(()) |
920 | } |
921 | |
922 | fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<PathBuf> { |
923 | // Abort if target (canonical) parent is outside of `dst` |
924 | let canon_parent = file_dst.canonicalize().map_err(|err| { |
925 | Error::new( |
926 | err.kind(), |
927 | format!(" {} while canonicalizing {}" , err, file_dst.display()), |
928 | ) |
929 | })?; |
930 | let canon_target = dst.canonicalize().map_err(|err| { |
931 | Error::new( |
932 | err.kind(), |
933 | format!(" {} while canonicalizing {}" , err, dst.display()), |
934 | ) |
935 | })?; |
936 | if !canon_parent.starts_with(&canon_target) { |
937 | let err = TarError::new( |
938 | format!( |
939 | "trying to unpack outside of destination path: {}" , |
940 | canon_target.display() |
941 | ), |
942 | // TODO: use ErrorKind::InvalidInput here? (minor breaking change) |
943 | Error::new(ErrorKind::Other, "Invalid argument" ), |
944 | ); |
945 | return Err(err.into()); |
946 | } |
947 | Ok(canon_target) |
948 | } |
949 | } |
950 | |
951 | impl<'a> Read for EntryFields<'a> { |
952 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
953 | loop { |
954 | match self.data.get_mut(index:0).map(|io: &mut EntryIo<'_>| io.read(buf:into)) { |
955 | Some(Ok(0)) => { |
956 | self.data.remove(index:0); |
957 | } |
958 | Some(r: Result) => return r, |
959 | None => return Ok(0), |
960 | } |
961 | } |
962 | } |
963 | } |
964 | |
965 | impl<'a> Read for EntryIo<'a> { |
966 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
967 | match *self { |
968 | EntryIo::Pad(ref mut io: &mut Take) => io.read(buf:into), |
969 | EntryIo::Data(ref mut io: &mut Take<&ArchiveInner>) => io.read(buf:into), |
970 | } |
971 | } |
972 | } |
973 | |