| 1 | use std::borrow::Cow; |
| 2 | use std::cmp; |
| 3 | use std::fs; |
| 4 | use std::fs::OpenOptions; |
| 5 | use std::io::prelude::*; |
| 6 | use std::io::{self, Error, ErrorKind, SeekFrom}; |
| 7 | use std::marker; |
| 8 | use std::path::{Component, Path, PathBuf}; |
| 9 | |
| 10 | use filetime::{self, FileTime}; |
| 11 | |
| 12 | use crate::archive::ArchiveInner; |
| 13 | use crate::error::TarError; |
| 14 | use crate::header::bytes2path; |
| 15 | use crate::other; |
| 16 | use crate::{Archive, Header, PaxExtensions}; |
| 17 | |
| 18 | /// A read-only view into an entry of an archive. |
| 19 | /// |
| 20 | /// This structure is a window into a portion of a borrowed archive which can |
| 21 | /// be inspected. It acts as a file handle by implementing the Reader trait. An |
| 22 | /// entry cannot be rewritten once inserted into an archive. |
| 23 | pub struct Entry<'a, R: 'a + Read> { |
| 24 | fields: EntryFields<'a>, |
| 25 | _ignored: marker::PhantomData<&'a Archive<R>>, |
| 26 | } |
| 27 | |
| 28 | // private implementation detail of `Entry`, but concrete (no type parameters) |
| 29 | // and also all-public to be constructed from other modules. |
| 30 | pub struct EntryFields<'a> { |
| 31 | pub long_pathname: Option<Vec<u8>>, |
| 32 | pub long_linkname: Option<Vec<u8>>, |
| 33 | pub pax_extensions: Option<Vec<u8>>, |
| 34 | pub mask: u32, |
| 35 | pub header: Header, |
| 36 | pub size: u64, |
| 37 | pub header_pos: u64, |
| 38 | pub file_pos: u64, |
| 39 | pub data: Vec<EntryIo<'a>>, |
| 40 | pub unpack_xattrs: bool, |
| 41 | pub preserve_permissions: bool, |
| 42 | pub preserve_ownerships: bool, |
| 43 | pub preserve_mtime: bool, |
| 44 | pub overwrite: bool, |
| 45 | } |
| 46 | |
| 47 | pub enum EntryIo<'a> { |
| 48 | Pad(io::Take<io::Repeat>), |
| 49 | Data(io::Take<&'a ArchiveInner<dyn Read + 'a>>), |
| 50 | } |
| 51 | |
| 52 | /// When unpacking items the unpacked thing is returned to allow custom |
| 53 | /// additional handling by users. Today the File is returned, in future |
| 54 | /// the enum may be extended with kinds for links, directories etc. |
| 55 | #[derive (Debug)] |
| 56 | pub enum Unpacked { |
| 57 | /// A file was unpacked. |
| 58 | File(std::fs::File), |
| 59 | /// A directory, hardlink, symlink, or other node was unpacked. |
| 60 | #[doc (hidden)] |
| 61 | __Nonexhaustive, |
| 62 | } |
| 63 | |
| 64 | impl<'a, R: Read> Entry<'a, R> { |
| 65 | /// Returns the path name for this entry. |
| 66 | /// |
| 67 | /// This method may fail if the pathname is not valid Unicode and this is |
| 68 | /// called on a Windows platform. |
| 69 | /// |
| 70 | /// Note that this function will convert any `\` characters to directory |
| 71 | /// separators, and it will not always return the same value as |
| 72 | /// `self.header().path()` as some archive formats have support for longer |
| 73 | /// path names described in separate entries. |
| 74 | /// |
| 75 | /// It is recommended to use this method instead of inspecting the `header` |
| 76 | /// directly to ensure that various archive formats are handled correctly. |
| 77 | pub fn path(&self) -> io::Result<Cow<Path>> { |
| 78 | self.fields.path() |
| 79 | } |
| 80 | |
| 81 | /// Returns the raw bytes listed for this entry. |
| 82 | /// |
| 83 | /// Note that this function will convert any `\` characters to directory |
| 84 | /// separators, and it will not always return the same value as |
| 85 | /// `self.header().path_bytes()` as some archive formats have support for |
| 86 | /// longer path names described in separate entries. |
| 87 | pub fn path_bytes(&self) -> Cow<[u8]> { |
| 88 | self.fields.path_bytes() |
| 89 | } |
| 90 | |
| 91 | /// Returns the link name for this entry, if any is found. |
| 92 | /// |
| 93 | /// This method may fail if the pathname is not valid Unicode and this is |
| 94 | /// called on a Windows platform. `Ok(None)` being returned, however, |
| 95 | /// indicates that the link name was not present. |
| 96 | /// |
| 97 | /// Note that this function will convert any `\` characters to directory |
| 98 | /// separators, and it will not always return the same value as |
| 99 | /// `self.header().link_name()` as some archive formats have support for |
| 100 | /// longer path names described in separate entries. |
| 101 | /// |
| 102 | /// It is recommended to use this method instead of inspecting the `header` |
| 103 | /// directly to ensure that various archive formats are handled correctly. |
| 104 | pub fn link_name(&self) -> io::Result<Option<Cow<Path>>> { |
| 105 | self.fields.link_name() |
| 106 | } |
| 107 | |
| 108 | /// Returns the link name for this entry, in bytes, if listed. |
| 109 | /// |
| 110 | /// Note that this will not always return the same value as |
| 111 | /// `self.header().link_name_bytes()` as some archive formats have support for |
| 112 | /// longer path names described in separate entries. |
| 113 | pub fn link_name_bytes(&self) -> Option<Cow<[u8]>> { |
| 114 | self.fields.link_name_bytes() |
| 115 | } |
| 116 | |
| 117 | /// Returns an iterator over the pax extensions contained in this entry. |
| 118 | /// |
| 119 | /// Pax extensions are a form of archive where extra metadata is stored in |
| 120 | /// key/value pairs in entries before the entry they're intended to |
| 121 | /// describe. For example this can be used to describe long file name or |
| 122 | /// other metadata like atime/ctime/mtime in more precision. |
| 123 | /// |
| 124 | /// The returned iterator will yield key/value pairs for each extension. |
| 125 | /// |
| 126 | /// `None` will be returned if this entry does not indicate that it itself |
| 127 | /// contains extensions, or if there were no previous extensions describing |
| 128 | /// it. |
| 129 | /// |
| 130 | /// Note that global pax extensions are intended to be applied to all |
| 131 | /// archive entries. |
| 132 | /// |
| 133 | /// Also note that this function will read the entire entry if the entry |
| 134 | /// itself is a list of extensions. |
| 135 | pub fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> { |
| 136 | self.fields.pax_extensions() |
| 137 | } |
| 138 | |
| 139 | /// Returns access to the header of this entry in the archive. |
| 140 | /// |
| 141 | /// This provides access to the metadata for this entry in the archive. |
| 142 | pub fn header(&self) -> &Header { |
| 143 | &self.fields.header |
| 144 | } |
| 145 | |
| 146 | /// Returns access to the size of this entry in the archive. |
| 147 | /// |
| 148 | /// In the event the size is stored in a pax extension, that size value |
| 149 | /// will be referenced. Otherwise, the entry size will be stored in the header. |
| 150 | pub fn size(&self) -> u64 { |
| 151 | self.fields.size |
| 152 | } |
| 153 | |
| 154 | /// Returns the starting position, in bytes, of the header of this entry in |
| 155 | /// the archive. |
| 156 | /// |
| 157 | /// The header is always a contiguous section of 512 bytes, so if the |
| 158 | /// underlying reader implements `Seek`, then the slice from `header_pos` to |
| 159 | /// `header_pos + 512` contains the raw header bytes. |
| 160 | pub fn raw_header_position(&self) -> u64 { |
| 161 | self.fields.header_pos |
| 162 | } |
| 163 | |
| 164 | /// Returns the starting position, in bytes, of the file of this entry in |
| 165 | /// the archive. |
| 166 | /// |
| 167 | /// If the file of this entry is continuous (e.g. not a sparse file), and |
| 168 | /// if the underlying reader implements `Seek`, then the slice from |
| 169 | /// `file_pos` to `file_pos + entry_size` contains the raw file bytes. |
| 170 | pub fn raw_file_position(&self) -> u64 { |
| 171 | self.fields.file_pos |
| 172 | } |
| 173 | |
| 174 | /// Writes this file to the specified location. |
| 175 | /// |
| 176 | /// This function will write the entire contents of this file into the |
| 177 | /// location specified by `dst`. Metadata will also be propagated to the |
| 178 | /// path `dst`. |
| 179 | /// |
| 180 | /// This function will create a file at the path `dst`, and it is required |
| 181 | /// that the intermediate directories are created. Any existing file at the |
| 182 | /// location `dst` will be overwritten. |
| 183 | /// |
| 184 | /// > **Note**: This function does not have as many sanity checks as |
| 185 | /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're |
| 186 | /// > thinking of unpacking untrusted tarballs you may want to review the |
| 187 | /// > implementations of the previous two functions and perhaps implement |
| 188 | /// > similar logic yourself. |
| 189 | /// |
| 190 | /// # Examples |
| 191 | /// |
| 192 | /// ```no_run |
| 193 | /// use std::fs::File; |
| 194 | /// use tar::Archive; |
| 195 | /// |
| 196 | /// let mut ar = Archive::new(File::open("foo.tar" ).unwrap()); |
| 197 | /// |
| 198 | /// for (i, file) in ar.entries().unwrap().enumerate() { |
| 199 | /// let mut file = file.unwrap(); |
| 200 | /// file.unpack(format!("file-{}" , i)).unwrap(); |
| 201 | /// } |
| 202 | /// ``` |
| 203 | pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> { |
| 204 | self.fields.unpack(None, dst.as_ref()) |
| 205 | } |
| 206 | |
| 207 | /// Extracts this file under the specified path, avoiding security issues. |
| 208 | /// |
| 209 | /// This function will write the entire contents of this file into the |
| 210 | /// location obtained by appending the path of this file in the archive to |
| 211 | /// `dst`, creating any intermediate directories if needed. Metadata will |
| 212 | /// also be propagated to the path `dst`. Any existing file at the location |
| 213 | /// `dst` will be overwritten. |
| 214 | /// |
| 215 | /// This function carefully avoids writing outside of `dst`. If the file has |
| 216 | /// a '..' in its path, this function will skip it and return false. |
| 217 | /// |
| 218 | /// # Examples |
| 219 | /// |
| 220 | /// ```no_run |
| 221 | /// use std::fs::File; |
| 222 | /// use tar::Archive; |
| 223 | /// |
| 224 | /// let mut ar = Archive::new(File::open("foo.tar" ).unwrap()); |
| 225 | /// |
| 226 | /// for (i, file) in ar.entries().unwrap().enumerate() { |
| 227 | /// let mut file = file.unwrap(); |
| 228 | /// file.unpack_in("target" ).unwrap(); |
| 229 | /// } |
| 230 | /// ``` |
| 231 | pub fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<bool> { |
| 232 | self.fields.unpack_in(dst.as_ref()) |
| 233 | } |
| 234 | |
| 235 | /// Set the mask of the permission bits when unpacking this entry. |
| 236 | /// |
| 237 | /// The mask will be inverted when applying against a mode, similar to how |
| 238 | /// `umask` works on Unix. In logical notation it looks like: |
| 239 | /// |
| 240 | /// ```text |
| 241 | /// new_mode = old_mode & (~mask) |
| 242 | /// ``` |
| 243 | /// |
| 244 | /// The mask is 0 by default and is currently only implemented on Unix. |
| 245 | pub fn set_mask(&mut self, mask: u32) { |
| 246 | self.fields.mask = mask; |
| 247 | } |
| 248 | |
| 249 | /// Indicate whether extended file attributes (xattrs on Unix) are preserved |
| 250 | /// when unpacking this entry. |
| 251 | /// |
| 252 | /// This flag is disabled by default and is currently only implemented on |
| 253 | /// Unix using xattr support. This may eventually be implemented for |
| 254 | /// Windows, however, if other archive implementations are found which do |
| 255 | /// this as well. |
| 256 | pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { |
| 257 | self.fields.unpack_xattrs = unpack_xattrs; |
| 258 | } |
| 259 | |
| 260 | /// Indicate whether extended permissions (like suid on Unix) are preserved |
| 261 | /// when unpacking this entry. |
| 262 | /// |
| 263 | /// This flag is disabled by default and is currently only implemented on |
| 264 | /// Unix. |
| 265 | pub fn set_preserve_permissions(&mut self, preserve: bool) { |
| 266 | self.fields.preserve_permissions = preserve; |
| 267 | } |
| 268 | |
| 269 | /// Indicate whether access time information is preserved when unpacking |
| 270 | /// this entry. |
| 271 | /// |
| 272 | /// This flag is enabled by default. |
| 273 | pub fn set_preserve_mtime(&mut self, preserve: bool) { |
| 274 | self.fields.preserve_mtime = preserve; |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | impl<'a, R: Read> Read for Entry<'a, R> { |
| 279 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| 280 | self.fields.read(buf:into) |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | impl<'a> EntryFields<'a> { |
| 285 | pub fn from<R: Read>(entry: Entry<R>) -> EntryFields { |
| 286 | entry.fields |
| 287 | } |
| 288 | |
| 289 | pub fn into_entry<R: Read>(self) -> Entry<'a, R> { |
| 290 | Entry { |
| 291 | fields: self, |
| 292 | _ignored: marker::PhantomData, |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | pub fn read_all(&mut self) -> io::Result<Vec<u8>> { |
| 297 | // Preallocate some data but don't let ourselves get too crazy now. |
| 298 | let cap = cmp::min(self.size, 128 * 1024); |
| 299 | let mut v = Vec::with_capacity(cap as usize); |
| 300 | self.read_to_end(&mut v).map(|_| v) |
| 301 | } |
| 302 | |
| 303 | fn path(&self) -> io::Result<Cow<Path>> { |
| 304 | bytes2path(self.path_bytes()) |
| 305 | } |
| 306 | |
| 307 | fn path_bytes(&self) -> Cow<[u8]> { |
| 308 | match self.long_pathname { |
| 309 | Some(ref bytes) => { |
| 310 | if let Some(&0) = bytes.last() { |
| 311 | Cow::Borrowed(&bytes[..bytes.len() - 1]) |
| 312 | } else { |
| 313 | Cow::Borrowed(bytes) |
| 314 | } |
| 315 | } |
| 316 | None => { |
| 317 | if let Some(ref pax) = self.pax_extensions { |
| 318 | let pax = PaxExtensions::new(pax) |
| 319 | .filter_map(|f| f.ok()) |
| 320 | .find(|f| f.key_bytes() == b"path" ) |
| 321 | .map(|f| f.value_bytes()); |
| 322 | if let Some(field) = pax { |
| 323 | return Cow::Borrowed(field); |
| 324 | } |
| 325 | } |
| 326 | self.header.path_bytes() |
| 327 | } |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | /// Gets the path in a "lossy" way, used for error reporting ONLY. |
| 332 | fn path_lossy(&self) -> String { |
| 333 | String::from_utf8_lossy(&self.path_bytes()).to_string() |
| 334 | } |
| 335 | |
| 336 | fn link_name(&self) -> io::Result<Option<Cow<Path>>> { |
| 337 | match self.link_name_bytes() { |
| 338 | Some(bytes) => bytes2path(bytes).map(Some), |
| 339 | None => Ok(None), |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | fn link_name_bytes(&self) -> Option<Cow<[u8]>> { |
| 344 | match self.long_linkname { |
| 345 | Some(ref bytes) => { |
| 346 | if let Some(&0) = bytes.last() { |
| 347 | Some(Cow::Borrowed(&bytes[..bytes.len() - 1])) |
| 348 | } else { |
| 349 | Some(Cow::Borrowed(bytes)) |
| 350 | } |
| 351 | } |
| 352 | None => { |
| 353 | if let Some(ref pax) = self.pax_extensions { |
| 354 | let pax = PaxExtensions::new(pax) |
| 355 | .filter_map(|f| f.ok()) |
| 356 | .find(|f| f.key_bytes() == b"linkpath" ) |
| 357 | .map(|f| f.value_bytes()); |
| 358 | if let Some(field) = pax { |
| 359 | return Some(Cow::Borrowed(field)); |
| 360 | } |
| 361 | } |
| 362 | self.header.link_name_bytes() |
| 363 | } |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> { |
| 368 | if self.pax_extensions.is_none() { |
| 369 | if !self.header.entry_type().is_pax_global_extensions() |
| 370 | && !self.header.entry_type().is_pax_local_extensions() |
| 371 | { |
| 372 | return Ok(None); |
| 373 | } |
| 374 | self.pax_extensions = Some(self.read_all()?); |
| 375 | } |
| 376 | Ok(Some(PaxExtensions::new( |
| 377 | self.pax_extensions.as_ref().unwrap(), |
| 378 | ))) |
| 379 | } |
| 380 | |
| 381 | fn unpack_in(&mut self, dst: &Path) -> io::Result<bool> { |
| 382 | // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: |
| 383 | // * Leading '/'s are trimmed. For example, `///test` is treated as |
| 384 | // `test`. |
| 385 | // * If the filename contains '..', then the file is skipped when |
| 386 | // extracting the tarball. |
| 387 | // * '//' within a filename is effectively skipped. An error is |
| 388 | // logged, but otherwise the effect is as if any two or more |
| 389 | // adjacent '/'s within the filename were consolidated into one |
| 390 | // '/'. |
| 391 | // |
| 392 | // Most of this is handled by the `path` module of the standard |
| 393 | // library, but we specially handle a few cases here as well. |
| 394 | |
| 395 | let mut file_dst = dst.to_path_buf(); |
| 396 | { |
| 397 | let path = self.path().map_err(|e| { |
| 398 | TarError::new( |
| 399 | format!("invalid path in entry header: {}" , self.path_lossy()), |
| 400 | e, |
| 401 | ) |
| 402 | })?; |
| 403 | for part in path.components() { |
| 404 | match part { |
| 405 | // Leading '/' characters, root paths, and '.' |
| 406 | // components are just ignored and treated as "empty |
| 407 | // components" |
| 408 | Component::Prefix(..) | Component::RootDir | Component::CurDir => continue, |
| 409 | |
| 410 | // If any part of the filename is '..', then skip over |
| 411 | // unpacking the file to prevent directory traversal |
| 412 | // security issues. See, e.g.: CVE-2001-1267, |
| 413 | // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 |
| 414 | Component::ParentDir => return Ok(false), |
| 415 | |
| 416 | Component::Normal(part) => file_dst.push(part), |
| 417 | } |
| 418 | } |
| 419 | } |
| 420 | |
| 421 | // Skip cases where only slashes or '.' parts were seen, because |
| 422 | // this is effectively an empty filename. |
| 423 | if *dst == *file_dst { |
| 424 | return Ok(true); |
| 425 | } |
| 426 | |
| 427 | // Skip entries without a parent (i.e. outside of FS root) |
| 428 | let parent = match file_dst.parent() { |
| 429 | Some(p) => p, |
| 430 | None => return Ok(false), |
| 431 | }; |
| 432 | |
| 433 | self.ensure_dir_created(&dst, parent) |
| 434 | .map_err(|e| TarError::new(format!("failed to create ` {}`" , parent.display()), e))?; |
| 435 | |
| 436 | let canon_target = self.validate_inside_dst(&dst, parent)?; |
| 437 | |
| 438 | self.unpack(Some(&canon_target), &file_dst) |
| 439 | .map_err(|e| TarError::new(format!("failed to unpack ` {}`" , file_dst.display()), e))?; |
| 440 | |
| 441 | Ok(true) |
| 442 | } |
| 443 | |
| 444 | /// Unpack as destination directory `dst`. |
| 445 | fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> { |
| 446 | // If the directory already exists just let it slide |
| 447 | fs::create_dir(dst).or_else(|err| { |
| 448 | if err.kind() == ErrorKind::AlreadyExists { |
| 449 | let prev = fs::metadata(dst); |
| 450 | if prev.map(|m| m.is_dir()).unwrap_or(false) { |
| 451 | return Ok(()); |
| 452 | } |
| 453 | } |
| 454 | Err(Error::new( |
| 455 | err.kind(), |
| 456 | format!(" {} when creating dir {}" , err, dst.display()), |
| 457 | )) |
| 458 | }) |
| 459 | } |
| 460 | |
| 461 | /// Returns access to the header of this entry in the archive. |
| 462 | fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> { |
| 463 | fn set_perms_ownerships( |
| 464 | dst: &Path, |
| 465 | f: Option<&mut std::fs::File>, |
| 466 | header: &Header, |
| 467 | mask: u32, |
| 468 | perms: bool, |
| 469 | ownerships: bool, |
| 470 | ) -> io::Result<()> { |
| 471 | // ownerships need to be set first to avoid stripping SUID bits in the permissions ... |
| 472 | if ownerships { |
| 473 | set_ownerships(dst, &f, header.uid()?, header.gid()?)?; |
| 474 | } |
| 475 | // ... then set permissions, SUID bits set here is kept |
| 476 | if let Ok(mode) = header.mode() { |
| 477 | set_perms(dst, f, mode, mask, perms)?; |
| 478 | } |
| 479 | |
| 480 | Ok(()) |
| 481 | } |
| 482 | |
| 483 | fn get_mtime(header: &Header) -> Option<FileTime> { |
| 484 | header.mtime().ok().map(|mtime| { |
| 485 | // For some more information on this see the comments in |
| 486 | // `Header::fill_platform_from`, but the general idea is that |
| 487 | // we're trying to avoid 0-mtime files coming out of archives |
| 488 | // since some tools don't ingest them well. Perhaps one day |
| 489 | // when Cargo stops working with 0-mtime archives we can remove |
| 490 | // this. |
| 491 | let mtime = if mtime == 0 { 1 } else { mtime }; |
| 492 | FileTime::from_unix_time(mtime as i64, 0) |
| 493 | }) |
| 494 | } |
| 495 | |
| 496 | let kind = self.header.entry_type(); |
| 497 | |
| 498 | if kind.is_dir() { |
| 499 | self.unpack_dir(dst)?; |
| 500 | set_perms_ownerships( |
| 501 | dst, |
| 502 | None, |
| 503 | &self.header, |
| 504 | self.mask, |
| 505 | self.preserve_permissions, |
| 506 | self.preserve_ownerships, |
| 507 | )?; |
| 508 | return Ok(Unpacked::__Nonexhaustive); |
| 509 | } else if kind.is_hard_link() || kind.is_symlink() { |
| 510 | let src = match self.link_name()? { |
| 511 | Some(name) => name, |
| 512 | None => { |
| 513 | return Err(other(&format!( |
| 514 | "hard link listed for {} but no link name found" , |
| 515 | String::from_utf8_lossy(self.header.as_bytes()) |
| 516 | ))); |
| 517 | } |
| 518 | }; |
| 519 | |
| 520 | if src.iter().count() == 0 { |
| 521 | return Err(other(&format!( |
| 522 | "symlink destination for {} is empty" , |
| 523 | String::from_utf8_lossy(self.header.as_bytes()) |
| 524 | ))); |
| 525 | } |
| 526 | |
| 527 | if kind.is_hard_link() { |
| 528 | let link_src = match target_base { |
| 529 | // If we're unpacking within a directory then ensure that |
| 530 | // the destination of this hard link is both present and |
| 531 | // inside our own directory. This is needed because we want |
| 532 | // to make sure to not overwrite anything outside the root. |
| 533 | // |
| 534 | // Note that this logic is only needed for hard links |
| 535 | // currently. With symlinks the `validate_inside_dst` which |
| 536 | // happens before this method as part of `unpack_in` will |
| 537 | // use canonicalization to ensure this guarantee. For hard |
| 538 | // links though they're canonicalized to their existing path |
| 539 | // so we need to validate at this time. |
| 540 | Some(ref p) => { |
| 541 | let link_src = p.join(src); |
| 542 | self.validate_inside_dst(p, &link_src)?; |
| 543 | link_src |
| 544 | } |
| 545 | None => src.into_owned(), |
| 546 | }; |
| 547 | fs::hard_link(&link_src, dst).map_err(|err| { |
| 548 | Error::new( |
| 549 | err.kind(), |
| 550 | format!( |
| 551 | " {} when hard linking {} to {}" , |
| 552 | err, |
| 553 | link_src.display(), |
| 554 | dst.display() |
| 555 | ), |
| 556 | ) |
| 557 | })?; |
| 558 | } else { |
| 559 | symlink(&src, dst) |
| 560 | .or_else(|err_io| { |
| 561 | if err_io.kind() == io::ErrorKind::AlreadyExists && self.overwrite { |
| 562 | // remove dest and try once more |
| 563 | std::fs::remove_file(dst).and_then(|()| symlink(&src, dst)) |
| 564 | } else { |
| 565 | Err(err_io) |
| 566 | } |
| 567 | }) |
| 568 | .map_err(|err| { |
| 569 | Error::new( |
| 570 | err.kind(), |
| 571 | format!( |
| 572 | " {} when symlinking {} to {}" , |
| 573 | err, |
| 574 | src.display(), |
| 575 | dst.display() |
| 576 | ), |
| 577 | ) |
| 578 | })?; |
| 579 | // While permissions on symlinks are meaningless on most systems, the ownership |
| 580 | // of symlinks is important as it dictates the access control to the symlink |
| 581 | // itself. |
| 582 | if self.preserve_ownerships { |
| 583 | set_ownerships(dst, &None, self.header.uid()?, self.header.gid()?)?; |
| 584 | } |
| 585 | if self.preserve_mtime { |
| 586 | if let Some(mtime) = get_mtime(&self.header) { |
| 587 | filetime::set_symlink_file_times(dst, mtime, mtime).map_err(|e| { |
| 588 | TarError::new(format!("failed to set mtime for ` {}`" , dst.display()), e) |
| 589 | })?; |
| 590 | } |
| 591 | } |
| 592 | } |
| 593 | return Ok(Unpacked::__Nonexhaustive); |
| 594 | |
| 595 | #[cfg (target_arch = "wasm32" )] |
| 596 | #[allow (unused_variables)] |
| 597 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
| 598 | Err(io::Error::new(io::ErrorKind::Other, "Not implemented" )) |
| 599 | } |
| 600 | |
| 601 | #[cfg (windows)] |
| 602 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
| 603 | ::std::os::windows::fs::symlink_file(src, dst) |
| 604 | } |
| 605 | |
| 606 | #[cfg (unix)] |
| 607 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
| 608 | ::std::os::unix::fs::symlink(src, dst) |
| 609 | } |
| 610 | } else if kind.is_pax_global_extensions() |
| 611 | || kind.is_pax_local_extensions() |
| 612 | || kind.is_gnu_longname() |
| 613 | || kind.is_gnu_longlink() |
| 614 | { |
| 615 | return Ok(Unpacked::__Nonexhaustive); |
| 616 | }; |
| 617 | |
| 618 | // Old BSD-tar compatibility. |
| 619 | // Names that have a trailing slash should be treated as a directory. |
| 620 | // Only applies to old headers. |
| 621 | if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/" ) { |
| 622 | self.unpack_dir(dst)?; |
| 623 | set_perms_ownerships( |
| 624 | dst, |
| 625 | None, |
| 626 | &self.header, |
| 627 | self.mask, |
| 628 | self.preserve_permissions, |
| 629 | self.preserve_ownerships, |
| 630 | )?; |
| 631 | return Ok(Unpacked::__Nonexhaustive); |
| 632 | } |
| 633 | |
| 634 | // Note the lack of `else` clause above. According to the FreeBSD |
| 635 | // documentation: |
| 636 | // |
| 637 | // > A POSIX-compliant implementation must treat any unrecognized |
| 638 | // > typeflag value as a regular file. |
| 639 | // |
| 640 | // As a result if we don't recognize the kind we just write out the file |
| 641 | // as we would normally. |
| 642 | |
| 643 | // Ensure we write a new file rather than overwriting in-place which |
| 644 | // is attackable; if an existing file is found unlink it. |
| 645 | fn open(dst: &Path) -> io::Result<std::fs::File> { |
| 646 | OpenOptions::new().write(true).create_new(true).open(dst) |
| 647 | } |
| 648 | let mut f = (|| -> io::Result<std::fs::File> { |
| 649 | let mut f = open(dst).or_else(|err| { |
| 650 | if err.kind() != ErrorKind::AlreadyExists { |
| 651 | Err(err) |
| 652 | } else if self.overwrite { |
| 653 | match fs::remove_file(dst) { |
| 654 | Ok(()) => open(dst), |
| 655 | Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst), |
| 656 | Err(e) => Err(e), |
| 657 | } |
| 658 | } else { |
| 659 | Err(err) |
| 660 | } |
| 661 | })?; |
| 662 | for io in self.data.drain(..) { |
| 663 | match io { |
| 664 | EntryIo::Data(mut d) => { |
| 665 | let expected = d.limit(); |
| 666 | if io::copy(&mut d, &mut f)? != expected { |
| 667 | return Err(other("failed to write entire file" )); |
| 668 | } |
| 669 | } |
| 670 | EntryIo::Pad(d) => { |
| 671 | // TODO: checked cast to i64 |
| 672 | let to = SeekFrom::Current(d.limit() as i64); |
| 673 | let size = f.seek(to)?; |
| 674 | f.set_len(size)?; |
| 675 | } |
| 676 | } |
| 677 | } |
| 678 | Ok(f) |
| 679 | })() |
| 680 | .map_err(|e| { |
| 681 | let header = self.header.path_bytes(); |
| 682 | TarError::new( |
| 683 | format!( |
| 684 | "failed to unpack ` {}` into ` {}`" , |
| 685 | String::from_utf8_lossy(&header), |
| 686 | dst.display() |
| 687 | ), |
| 688 | e, |
| 689 | ) |
| 690 | })?; |
| 691 | |
| 692 | if self.preserve_mtime { |
| 693 | if let Some(mtime) = get_mtime(&self.header) { |
| 694 | filetime::set_file_handle_times(&f, Some(mtime), Some(mtime)).map_err(|e| { |
| 695 | TarError::new(format!("failed to set mtime for ` {}`" , dst.display()), e) |
| 696 | })?; |
| 697 | } |
| 698 | } |
| 699 | set_perms_ownerships( |
| 700 | dst, |
| 701 | Some(&mut f), |
| 702 | &self.header, |
| 703 | self.mask, |
| 704 | self.preserve_permissions, |
| 705 | self.preserve_ownerships, |
| 706 | )?; |
| 707 | if self.unpack_xattrs { |
| 708 | set_xattrs(self, dst)?; |
| 709 | } |
| 710 | return Ok(Unpacked::File(f)); |
| 711 | |
| 712 | fn set_ownerships( |
| 713 | dst: &Path, |
| 714 | f: &Option<&mut std::fs::File>, |
| 715 | uid: u64, |
| 716 | gid: u64, |
| 717 | ) -> Result<(), TarError> { |
| 718 | _set_ownerships(dst, f, uid, gid).map_err(|e| { |
| 719 | TarError::new( |
| 720 | format!( |
| 721 | "failed to set ownerships to uid= {:?}, gid= {:?} \ |
| 722 | for ` {}`" , |
| 723 | uid, |
| 724 | gid, |
| 725 | dst.display() |
| 726 | ), |
| 727 | e, |
| 728 | ) |
| 729 | }) |
| 730 | } |
| 731 | |
| 732 | #[cfg (unix)] |
| 733 | fn _set_ownerships( |
| 734 | dst: &Path, |
| 735 | f: &Option<&mut std::fs::File>, |
| 736 | uid: u64, |
| 737 | gid: u64, |
| 738 | ) -> io::Result<()> { |
| 739 | use std::os::unix::prelude::*; |
| 740 | |
| 741 | let uid: libc::uid_t = uid.try_into().map_err(|_| { |
| 742 | io::Error::new(io::ErrorKind::Other, format!("UID {} is too large!" , uid)) |
| 743 | })?; |
| 744 | let gid: libc::gid_t = gid.try_into().map_err(|_| { |
| 745 | io::Error::new(io::ErrorKind::Other, format!("GID {} is too large!" , gid)) |
| 746 | })?; |
| 747 | match f { |
| 748 | Some(f) => unsafe { |
| 749 | let fd = f.as_raw_fd(); |
| 750 | if libc::fchown(fd, uid, gid) != 0 { |
| 751 | Err(io::Error::last_os_error()) |
| 752 | } else { |
| 753 | Ok(()) |
| 754 | } |
| 755 | }, |
| 756 | None => unsafe { |
| 757 | let path = std::ffi::CString::new(dst.as_os_str().as_bytes()).map_err(|e| { |
| 758 | io::Error::new( |
| 759 | io::ErrorKind::Other, |
| 760 | format!("path contains null character: {:?}" , e), |
| 761 | ) |
| 762 | })?; |
| 763 | if libc::lchown(path.as_ptr(), uid, gid) != 0 { |
| 764 | Err(io::Error::last_os_error()) |
| 765 | } else { |
| 766 | Ok(()) |
| 767 | } |
| 768 | }, |
| 769 | } |
| 770 | } |
| 771 | |
| 772 | // Windows does not support posix numeric ownership IDs |
| 773 | #[cfg (any(windows, target_arch = "wasm32" ))] |
| 774 | fn _set_ownerships( |
| 775 | _: &Path, |
| 776 | _: &Option<&mut std::fs::File>, |
| 777 | _: u64, |
| 778 | _: u64, |
| 779 | ) -> io::Result<()> { |
| 780 | Ok(()) |
| 781 | } |
| 782 | |
| 783 | fn set_perms( |
| 784 | dst: &Path, |
| 785 | f: Option<&mut std::fs::File>, |
| 786 | mode: u32, |
| 787 | mask: u32, |
| 788 | preserve: bool, |
| 789 | ) -> Result<(), TarError> { |
| 790 | _set_perms(dst, f, mode, mask, preserve).map_err(|e| { |
| 791 | TarError::new( |
| 792 | format!( |
| 793 | "failed to set permissions to {:o} \ |
| 794 | for ` {}`" , |
| 795 | mode, |
| 796 | dst.display() |
| 797 | ), |
| 798 | e, |
| 799 | ) |
| 800 | }) |
| 801 | } |
| 802 | |
| 803 | #[cfg (unix)] |
| 804 | fn _set_perms( |
| 805 | dst: &Path, |
| 806 | f: Option<&mut std::fs::File>, |
| 807 | mode: u32, |
| 808 | mask: u32, |
| 809 | preserve: bool, |
| 810 | ) -> io::Result<()> { |
| 811 | use std::os::unix::prelude::*; |
| 812 | |
| 813 | let mode = if preserve { mode } else { mode & 0o777 }; |
| 814 | let mode = mode & !mask; |
| 815 | let perm = fs::Permissions::from_mode(mode as _); |
| 816 | match f { |
| 817 | Some(f) => f.set_permissions(perm), |
| 818 | None => fs::set_permissions(dst, perm), |
| 819 | } |
| 820 | } |
| 821 | |
| 822 | #[cfg (windows)] |
| 823 | fn _set_perms( |
| 824 | dst: &Path, |
| 825 | f: Option<&mut std::fs::File>, |
| 826 | mode: u32, |
| 827 | _mask: u32, |
| 828 | _preserve: bool, |
| 829 | ) -> io::Result<()> { |
| 830 | if mode & 0o200 == 0o200 { |
| 831 | return Ok(()); |
| 832 | } |
| 833 | match f { |
| 834 | Some(f) => { |
| 835 | let mut perm = f.metadata()?.permissions(); |
| 836 | perm.set_readonly(true); |
| 837 | f.set_permissions(perm) |
| 838 | } |
| 839 | None => { |
| 840 | let mut perm = fs::metadata(dst)?.permissions(); |
| 841 | perm.set_readonly(true); |
| 842 | fs::set_permissions(dst, perm) |
| 843 | } |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | #[cfg (target_arch = "wasm32" )] |
| 848 | #[allow (unused_variables)] |
| 849 | fn _set_perms( |
| 850 | dst: &Path, |
| 851 | f: Option<&mut std::fs::File>, |
| 852 | mode: u32, |
| 853 | mask: u32, |
| 854 | _preserve: bool, |
| 855 | ) -> io::Result<()> { |
| 856 | Err(io::Error::new(io::ErrorKind::Other, "Not implemented" )) |
| 857 | } |
| 858 | |
| 859 | #[cfg (all(unix, feature = "xattr" ))] |
| 860 | fn set_xattrs(me: &mut EntryFields, dst: &Path) -> io::Result<()> { |
| 861 | use std::ffi::OsStr; |
| 862 | use std::os::unix::prelude::*; |
| 863 | |
| 864 | let exts = match me.pax_extensions() { |
| 865 | Ok(Some(e)) => e, |
| 866 | _ => return Ok(()), |
| 867 | }; |
| 868 | let exts = exts |
| 869 | .filter_map(|e| e.ok()) |
| 870 | .filter_map(|e| { |
| 871 | let key = e.key_bytes(); |
| 872 | let prefix = crate::pax::PAX_SCHILYXATTR.as_bytes(); |
| 873 | key.strip_prefix(prefix).map(|rest| (rest, e)) |
| 874 | }) |
| 875 | .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes())); |
| 876 | |
| 877 | for (key, value) in exts { |
| 878 | xattr::set(dst, key, value).map_err(|e| { |
| 879 | TarError::new( |
| 880 | format!( |
| 881 | "failed to set extended \ |
| 882 | attributes to {}. \ |
| 883 | Xattrs: key= {:?}, value= {:?}." , |
| 884 | dst.display(), |
| 885 | key, |
| 886 | String::from_utf8_lossy(value) |
| 887 | ), |
| 888 | e, |
| 889 | ) |
| 890 | })?; |
| 891 | } |
| 892 | |
| 893 | Ok(()) |
| 894 | } |
| 895 | // Windows does not completely support posix xattrs |
| 896 | // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT |
| 897 | #[cfg (any(windows, not(feature = "xattr" ), target_arch = "wasm32" ))] |
| 898 | fn set_xattrs(_: &mut EntryFields, _: &Path) -> io::Result<()> { |
| 899 | Ok(()) |
| 900 | } |
| 901 | } |
| 902 | |
| 903 | fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> { |
| 904 | let mut ancestor = dir; |
| 905 | let mut dirs_to_create = Vec::new(); |
| 906 | while ancestor.symlink_metadata().is_err() { |
| 907 | dirs_to_create.push(ancestor); |
| 908 | if let Some(parent) = ancestor.parent() { |
| 909 | ancestor = parent; |
| 910 | } else { |
| 911 | break; |
| 912 | } |
| 913 | } |
| 914 | for ancestor in dirs_to_create.into_iter().rev() { |
| 915 | if let Some(parent) = ancestor.parent() { |
| 916 | self.validate_inside_dst(dst, parent)?; |
| 917 | } |
| 918 | fs::create_dir_all(ancestor)?; |
| 919 | } |
| 920 | Ok(()) |
| 921 | } |
| 922 | |
| 923 | fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<PathBuf> { |
| 924 | // Abort if target (canonical) parent is outside of `dst` |
| 925 | let canon_parent = file_dst.canonicalize().map_err(|err| { |
| 926 | Error::new( |
| 927 | err.kind(), |
| 928 | format!(" {} while canonicalizing {}" , err, file_dst.display()), |
| 929 | ) |
| 930 | })?; |
| 931 | let canon_target = dst.canonicalize().map_err(|err| { |
| 932 | Error::new( |
| 933 | err.kind(), |
| 934 | format!(" {} while canonicalizing {}" , err, dst.display()), |
| 935 | ) |
| 936 | })?; |
| 937 | if !canon_parent.starts_with(&canon_target) { |
| 938 | let err = TarError::new( |
| 939 | format!( |
| 940 | "trying to unpack outside of destination path: {}" , |
| 941 | canon_target.display() |
| 942 | ), |
| 943 | // TODO: use ErrorKind::InvalidInput here? (minor breaking change) |
| 944 | Error::new(ErrorKind::Other, "Invalid argument" ), |
| 945 | ); |
| 946 | return Err(err.into()); |
| 947 | } |
| 948 | Ok(canon_target) |
| 949 | } |
| 950 | } |
| 951 | |
| 952 | impl<'a> Read for EntryFields<'a> { |
| 953 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| 954 | loop { |
| 955 | match self.data.get_mut(index:0).map(|io: &mut EntryIo<'a>| io.read(buf:into)) { |
| 956 | Some(Ok(0)) => { |
| 957 | self.data.remove(index:0); |
| 958 | } |
| 959 | Some(r: Result) => return r, |
| 960 | None => return Ok(0), |
| 961 | } |
| 962 | } |
| 963 | } |
| 964 | } |
| 965 | |
| 966 | impl<'a> Read for EntryIo<'a> { |
| 967 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| 968 | match *self { |
| 969 | EntryIo::Pad(ref mut io: &mut Take) => io.read(buf:into), |
| 970 | EntryIo::Data(ref mut io: &mut Take<&ArchiveInner>) => io.read(buf:into), |
| 971 | } |
| 972 | } |
| 973 | } |
| 974 | |