1use std::fs;
2use std::io;
3use std::io::prelude::*;
4use std::path::Path;
5use std::str;
6
7use crate::header::GNU_SPARSE_HEADERS_COUNT;
8use crate::header::{path2bytes, HeaderMode};
9use crate::GnuExtSparseHeader;
10use crate::{other, EntryType, Header};
11
12/// A structure for building archives
13///
14/// This structure has methods for building up an archive from scratch into any
15/// arbitrary writer.
16pub struct Builder<W: Write> {
17 options: BuilderOptions,
18 finished: bool,
19 obj: Option<W>,
20}
21
22#[derive(Clone, Copy)]
23struct BuilderOptions {
24 mode: HeaderMode,
25 follow: bool,
26 sparse: bool,
27}
28
29impl<W: Write> Builder<W> {
30 /// Create a new archive builder with the underlying object as the
31 /// destination of all data written. The builder will use
32 /// `HeaderMode::Complete` by default.
33 pub fn new(obj: W) -> Builder<W> {
34 Builder {
35 options: BuilderOptions {
36 mode: HeaderMode::Complete,
37 follow: true,
38 sparse: true,
39 },
40 finished: false,
41 obj: Some(obj),
42 }
43 }
44
45 /// Changes the HeaderMode that will be used when reading fs Metadata for
46 /// methods that implicitly read metadata for an input Path. Notably, this
47 /// does _not_ apply to `append(Header)`.
48 pub fn mode(&mut self, mode: HeaderMode) {
49 self.options.mode = mode;
50 }
51
52 /// Follow symlinks, archiving the contents of the file they point to rather
53 /// than adding a symlink to the archive. Defaults to true.
54 ///
55 /// When true, it exhibits the same behavior as GNU `tar` command's
56 /// `--dereference` or `-h` options <https://man7.org/linux/man-pages/man1/tar.1.html>.
57 pub fn follow_symlinks(&mut self, follow: bool) {
58 self.options.follow = follow;
59 }
60
61 /// Handle sparse files efficiently, if supported by the underlying
62 /// filesystem. When true, sparse file information is read from disk and
63 /// empty segments are omitted from the archive. Defaults to true.
64 pub fn sparse(&mut self, sparse: bool) {
65 self.options.sparse = sparse;
66 }
67
68 /// Gets shared reference to the underlying object.
69 pub fn get_ref(&self) -> &W {
70 self.obj.as_ref().unwrap()
71 }
72
73 /// Gets mutable reference to the underlying object.
74 ///
75 /// Note that care must be taken while writing to the underlying
76 /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
77 /// useful in the situations when one needs to be ensured that
78 /// tar entry was flushed to the disk.
79 pub fn get_mut(&mut self) -> &mut W {
80 self.obj.as_mut().unwrap()
81 }
82
83 /// Unwrap this archive, returning the underlying object.
84 ///
85 /// This function will finish writing the archive if the `finish` function
86 /// hasn't yet been called, returning any I/O error which happens during
87 /// that operation.
88 pub fn into_inner(mut self) -> io::Result<W> {
89 if !self.finished {
90 self.finish()?;
91 }
92 Ok(self.obj.take().unwrap())
93 }
94
95 /// Adds a new entry to this archive.
96 ///
97 /// This function will append the header specified, followed by contents of
98 /// the stream specified by `data`. To produce a valid archive the `size`
99 /// field of `header` must be the same as the length of the stream that's
100 /// being written. Additionally the checksum for the header should have been
101 /// set via the `set_cksum` method.
102 ///
103 /// Note that this will not attempt to seek the archive to a valid position,
104 /// so if the archive is in the middle of a read or some other similar
105 /// operation then this may corrupt the archive.
106 ///
107 /// Also note that after all entries have been written to an archive the
108 /// `finish` function needs to be called to finish writing the archive.
109 ///
110 /// # Errors
111 ///
112 /// This function will return an error for any intermittent I/O error which
113 /// occurs when either reading or writing.
114 ///
115 /// # Examples
116 ///
117 /// ```
118 /// use tar::{Builder, Header};
119 ///
120 /// let mut header = Header::new_gnu();
121 /// header.set_path("foo").unwrap();
122 /// header.set_size(4);
123 /// header.set_cksum();
124 ///
125 /// let mut data: &[u8] = &[1, 2, 3, 4];
126 ///
127 /// let mut ar = Builder::new(Vec::new());
128 /// ar.append(&header, data).unwrap();
129 /// let data = ar.into_inner().unwrap();
130 /// ```
131 pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
132 append(self.get_mut(), header, &mut data)
133 }
134
135 /// Adds a new entry to this archive with the specified path.
136 ///
137 /// This function will set the specified path in the given header, which may
138 /// require appending a GNU long-name extension entry to the archive first.
139 /// The checksum for the header will be automatically updated via the
140 /// `set_cksum` method after setting the path. No other metadata in the
141 /// header will be modified.
142 ///
143 /// Then it will append the header, followed by contents of the stream
144 /// specified by `data`. To produce a valid archive the `size` field of
145 /// `header` must be the same as the length of the stream that's being
146 /// written.
147 ///
148 /// Note that this will not attempt to seek the archive to a valid position,
149 /// so if the archive is in the middle of a read or some other similar
150 /// operation then this may corrupt the archive.
151 ///
152 /// Also note that after all entries have been written to an archive the
153 /// `finish` function needs to be called to finish writing the archive.
154 ///
155 /// # Errors
156 ///
157 /// This function will return an error for any intermittent I/O error which
158 /// occurs when either reading or writing.
159 ///
160 /// # Examples
161 ///
162 /// ```
163 /// use tar::{Builder, Header};
164 ///
165 /// let mut header = Header::new_gnu();
166 /// header.set_size(4);
167 /// header.set_cksum();
168 ///
169 /// let mut data: &[u8] = &[1, 2, 3, 4];
170 ///
171 /// let mut ar = Builder::new(Vec::new());
172 /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap();
173 /// let data = ar.into_inner().unwrap();
174 /// ```
175 pub fn append_data<P: AsRef<Path>, R: Read>(
176 &mut self,
177 header: &mut Header,
178 path: P,
179 data: R,
180 ) -> io::Result<()> {
181 prepare_header_path(self.get_mut(), header, path.as_ref())?;
182 header.set_cksum();
183 self.append(&header, data)
184 }
185
186 /// Adds a new entry to this archive and returns an [`EntryWriter`] for
187 /// adding its contents.
188 ///
189 /// This function is similar to [`Self::append_data`] but returns a
190 /// [`io::Write`] implementation instead of taking data as a parameter.
191 ///
192 /// Similar constraints around the position of the archive and completion
193 /// apply as with [`Self::append_data`]. It requires the underlying writer
194 /// to implement [`Seek`] to update the header after writing the data.
195 ///
196 /// # Errors
197 ///
198 /// This function will return an error for any intermittent I/O error which
199 /// occurs when either reading or writing.
200 ///
201 /// # Examples
202 ///
203 /// ```
204 /// use std::io::Cursor;
205 /// use std::io::Write as _;
206 /// use tar::{Builder, Header};
207 ///
208 /// let mut header = Header::new_gnu();
209 ///
210 /// let mut ar = Builder::new(Cursor::new(Vec::new()));
211 /// let mut entry = ar.append_writer(&mut header, "hi.txt").unwrap();
212 /// entry.write_all(b"Hello, ").unwrap();
213 /// entry.write_all(b"world!\n").unwrap();
214 /// entry.finish().unwrap();
215 /// ```
216 pub fn append_writer<'a, P: AsRef<Path>>(
217 &'a mut self,
218 header: &'a mut Header,
219 path: P,
220 ) -> io::Result<EntryWriter<'a>>
221 where
222 W: Seek,
223 {
224 EntryWriter::start(self.get_mut(), header, path.as_ref())
225 }
226
227 /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target.
228 ///
229 /// This function is similar to [`Self::append_data`] which supports long filenames,
230 /// but also supports long link targets using GNU extensions if necessary.
231 /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`].
232 /// The `set_cksum` method will be invoked after setting the path. No other metadata in the
233 /// header will be modified.
234 ///
235 /// If you are intending to use GNU extensions, you must use this method over calling
236 /// [`Header::set_link_name`] because that function will fail on long links.
237 ///
238 /// Similar constraints around the position of the archive and completion
239 /// apply as with [`Self::append_data`].
240 ///
241 /// # Errors
242 ///
243 /// This function will return an error for any intermittent I/O error which
244 /// occurs when either reading or writing.
245 ///
246 /// # Examples
247 ///
248 /// ```
249 /// use tar::{Builder, Header, EntryType};
250 ///
251 /// let mut ar = Builder::new(Vec::new());
252 /// let mut header = Header::new_gnu();
253 /// header.set_username("foo");
254 /// header.set_entry_type(EntryType::Symlink);
255 /// header.set_size(0);
256 /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap();
257 /// let data = ar.into_inner().unwrap();
258 /// ```
259 pub fn append_link<P: AsRef<Path>, T: AsRef<Path>>(
260 &mut self,
261 header: &mut Header,
262 path: P,
263 target: T,
264 ) -> io::Result<()> {
265 self._append_link(header, path.as_ref(), target.as_ref())
266 }
267
268 fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> {
269 prepare_header_path(self.get_mut(), header, path)?;
270 prepare_header_link(self.get_mut(), header, target)?;
271 header.set_cksum();
272 self.append(&header, std::io::empty())
273 }
274
275 /// Adds a file on the local filesystem to this archive.
276 ///
277 /// This function will open the file specified by `path` and insert the file
278 /// into the archive with the appropriate metadata set, returning any I/O
279 /// error which occurs while writing. The path name for the file inside of
280 /// this archive will be the same as `path`, and it is required that the
281 /// path is a relative path.
282 ///
283 /// Note that this will not attempt to seek the archive to a valid position,
284 /// so if the archive is in the middle of a read or some other similar
285 /// operation then this may corrupt the archive.
286 ///
287 /// Also note that after all files have been written to an archive the
288 /// `finish` function needs to be called to finish writing the archive.
289 ///
290 /// # Examples
291 ///
292 /// ```no_run
293 /// use tar::Builder;
294 ///
295 /// let mut ar = Builder::new(Vec::new());
296 ///
297 /// ar.append_path("foo/bar.txt").unwrap();
298 /// ```
299 pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
300 let options = self.options;
301 append_path_with_name(self.get_mut(), path.as_ref(), None, options)
302 }
303
304 /// Adds a file on the local filesystem to this archive under another name.
305 ///
306 /// This function will open the file specified by `path` and insert the file
307 /// into the archive as `name` with appropriate metadata set, returning any
308 /// I/O error which occurs while writing. The path name for the file inside
309 /// of this archive will be `name` is required to be a relative path.
310 ///
311 /// Note that this will not attempt to seek the archive to a valid position,
312 /// so if the archive is in the middle of a read or some other similar
313 /// operation then this may corrupt the archive.
314 ///
315 /// Note if the `path` is a directory. This will just add an entry to the archive,
316 /// rather than contents of the directory.
317 ///
318 /// Also note that after all files have been written to an archive the
319 /// `finish` function needs to be called to finish writing the archive.
320 ///
321 /// # Examples
322 ///
323 /// ```no_run
324 /// use tar::Builder;
325 ///
326 /// let mut ar = Builder::new(Vec::new());
327 ///
328 /// // Insert the local file "foo/bar.txt" in the archive but with the name
329 /// // "bar/foo.txt".
330 /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap();
331 /// ```
332 pub fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
333 &mut self,
334 path: P,
335 name: N,
336 ) -> io::Result<()> {
337 let options = self.options;
338 append_path_with_name(self.get_mut(), path.as_ref(), Some(name.as_ref()), options)
339 }
340
341 /// Adds a file to this archive with the given path as the name of the file
342 /// in the archive.
343 ///
344 /// This will use the metadata of `file` to populate a `Header`, and it will
345 /// then append the file to the archive with the name `path`.
346 ///
347 /// Note that this will not attempt to seek the archive to a valid position,
348 /// so if the archive is in the middle of a read or some other similar
349 /// operation then this may corrupt the archive.
350 ///
351 /// Also note that after all files have been written to an archive the
352 /// `finish` function needs to be called to finish writing the archive.
353 ///
354 /// # Examples
355 ///
356 /// ```no_run
357 /// use std::fs::File;
358 /// use tar::Builder;
359 ///
360 /// let mut ar = Builder::new(Vec::new());
361 ///
362 /// // Open the file at one location, but insert it into the archive with a
363 /// // different name.
364 /// let mut f = File::open("foo/bar/baz.txt").unwrap();
365 /// ar.append_file("bar/baz.txt", &mut f).unwrap();
366 /// ```
367 pub fn append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> {
368 let options = self.options;
369 append_file(self.get_mut(), path.as_ref(), file, options)
370 }
371
372 /// Adds a directory to this archive with the given path as the name of the
373 /// directory in the archive.
374 ///
375 /// This will use `stat` to populate a `Header`, and it will then append the
376 /// directory to the archive with the name `path`.
377 ///
378 /// Note that this will not attempt to seek the archive to a valid position,
379 /// so if the archive is in the middle of a read or some other similar
380 /// operation then this may corrupt the archive.
381 ///
382 /// Note this will not add the contents of the directory to the archive.
383 /// See `append_dir_all` for recusively adding the contents of the directory.
384 ///
385 /// Also note that after all files have been written to an archive the
386 /// `finish` function needs to be called to finish writing the archive.
387 ///
388 /// # Examples
389 ///
390 /// ```
391 /// use std::fs;
392 /// use tar::Builder;
393 ///
394 /// let mut ar = Builder::new(Vec::new());
395 ///
396 /// // Use the directory at one location, but insert it into the archive
397 /// // with a different name.
398 /// ar.append_dir("bardir", ".").unwrap();
399 /// ```
400 pub fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
401 where
402 P: AsRef<Path>,
403 Q: AsRef<Path>,
404 {
405 let options = self.options;
406 append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
407 }
408
409 /// Adds a directory and all of its contents (recursively) to this archive
410 /// with the given path as the name of the directory in the archive.
411 ///
412 /// Note that this will not attempt to seek the archive to a valid position,
413 /// so if the archive is in the middle of a read or some other similar
414 /// operation then this may corrupt the archive.
415 ///
416 /// Also note that after all files have been written to an archive the
417 /// `finish` or `into_inner` function needs to be called to finish
418 /// writing the archive.
419 ///
420 /// # Examples
421 ///
422 /// ```
423 /// use std::fs;
424 /// use tar::Builder;
425 ///
426 /// let mut ar = Builder::new(Vec::new());
427 ///
428 /// // Use the directory at one location ("."), but insert it into the archive
429 /// // with a different name ("bardir").
430 /// ar.append_dir_all("bardir", ".").unwrap();
431 /// ar.finish().unwrap();
432 /// ```
433 ///
434 /// Use `append_dir_all` with an empty string as the first path argument to
435 /// create an archive from all files in a directory without renaming.
436 ///
437 /// ```
438 /// use std::fs;
439 /// use std::path::PathBuf;
440 /// use tar::{Archive, Builder};
441 ///
442 /// let tmpdir = tempfile::tempdir().unwrap();
443 /// let path = tmpdir.path();
444 /// fs::write(path.join("a.txt"), b"hello").unwrap();
445 /// fs::write(path.join("b.txt"), b"world").unwrap();
446 ///
447 /// // Create a tarball from the files in the directory
448 /// let mut ar = Builder::new(Vec::new());
449 /// ar.append_dir_all("", path).unwrap();
450 ///
451 /// // List files in the archive
452 /// let archive = ar.into_inner().unwrap();
453 /// let archived_files = Archive::new(archive.as_slice())
454 /// .entries()
455 /// .unwrap()
456 /// .map(|entry| entry.unwrap().path().unwrap().into_owned())
457 /// .collect::<Vec<_>>();
458 ///
459 /// assert!(archived_files.contains(&PathBuf::from("a.txt")));
460 /// assert!(archived_files.contains(&PathBuf::from("b.txt")));
461 /// ```
462 pub fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
463 where
464 P: AsRef<Path>,
465 Q: AsRef<Path>,
466 {
467 let options = self.options;
468 append_dir_all(self.get_mut(), path.as_ref(), src_path.as_ref(), options)
469 }
470
471 /// Finish writing this archive, emitting the termination sections.
472 ///
473 /// This function should only be called when the archive has been written
474 /// entirely and if an I/O error happens the underlying object still needs
475 /// to be acquired.
476 ///
477 /// In most situations the `into_inner` method should be preferred.
478 pub fn finish(&mut self) -> io::Result<()> {
479 if self.finished {
480 return Ok(());
481 }
482 self.finished = true;
483 self.get_mut().write_all(&[0; 1024])
484 }
485}
486
487trait SeekWrite: Write + Seek {
488 fn as_write(&mut self) -> &mut dyn Write;
489}
490
491impl<T: Write + Seek> SeekWrite for T {
492 fn as_write(&mut self) -> &mut dyn Write {
493 self
494 }
495}
496
497/// A writer for a single entry in a tar archive.
498///
499/// This struct is returned by [`Builder::append_writer`] and provides a
500/// [`Write`] implementation for adding content to an archive entry.
501///
502/// After writing all data to the entry, it must be finalized either by
503/// explicitly calling [`EntryWriter::finish`] or by letting it drop.
504pub struct EntryWriter<'a> {
505 // NOTE: Do not add any fields here which require Drop!
506 // See the comment below in finish().
507 obj: &'a mut dyn SeekWrite,
508 header: &'a mut Header,
509 written: u64,
510}
511
512impl EntryWriter<'_> {
513 fn start<'a>(
514 obj: &'a mut dyn SeekWrite,
515 header: &'a mut Header,
516 path: &Path,
517 ) -> io::Result<EntryWriter<'a>> {
518 prepare_header_path(obj.as_write(), header, path)?;
519
520 // Reserve space for header, will be overwritten once data is written.
521 obj.write_all([0u8; 512].as_ref())?;
522
523 Ok(EntryWriter {
524 obj,
525 header,
526 written: 0,
527 })
528 }
529
530 /// Finish writing the current entry in the archive.
531 pub fn finish(self) -> io::Result<()> {
532 // NOTE: This is an optimization for "fallible destructuring".
533 // We want finish() to return an error, but we also need to invoke
534 // cleanup in our Drop handler, which will run unconditionally
535 // and try to do the same work.
536 // By using ManuallyDrop, we suppress that drop. However, this would
537 // be a memory leak if we ever had any struct members which required
538 // Drop - which we don't right now.
539 // But if we ever gain one, we will need to change to use e.g. Option<>
540 // around some of the fields or have a `bool finished` etc.
541 let mut this = std::mem::ManuallyDrop::new(self);
542 this.do_finish()
543 }
544
545 fn do_finish(&mut self) -> io::Result<()> {
546 // Pad with zeros if necessary.
547 let buf = [0u8; 512];
548 let remaining = u64::wrapping_sub(512, self.written) % 512;
549 self.obj.write_all(&buf[..remaining as usize])?;
550 let written = (self.written + remaining) as i64;
551
552 // Seek back to the header position.
553 self.obj.seek(io::SeekFrom::Current(-written - 512))?;
554
555 self.header.set_size(self.written);
556 self.header.set_cksum();
557 self.obj.write_all(self.header.as_bytes())?;
558
559 // Seek forward to restore the position.
560 self.obj.seek(io::SeekFrom::Current(written))?;
561
562 Ok(())
563 }
564}
565
566impl Write for EntryWriter<'_> {
567 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
568 let len: usize = self.obj.write(buf)?;
569 self.written += len as u64;
570 Ok(len)
571 }
572
573 fn flush(&mut self) -> io::Result<()> {
574 self.obj.flush()
575 }
576}
577
578impl Drop for EntryWriter<'_> {
579 fn drop(&mut self) {
580 let _ = self.do_finish();
581 }
582}
583
584fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> {
585 dst.write_all(buf:header.as_bytes())?;
586 let len: u64 = io::copy(&mut data, &mut dst)?;
587 pad_zeroes(&mut dst, len)?;
588 Ok(())
589}
590
591fn pad_zeroes(dst: &mut dyn Write, len: u64) -> io::Result<()> {
592 let buf: [u8; 512] = [0; 512];
593 let remaining: u64 = 512 - (len % 512);
594 if remaining < 512 {
595 dst.write_all(&buf[..remaining as usize])?;
596 }
597 Ok(())
598}
599
600fn append_path_with_name(
601 dst: &mut dyn Write,
602 path: &Path,
603 name: Option<&Path>,
604 options: BuilderOptions,
605) -> io::Result<()> {
606 let stat = if options.follow {
607 fs::metadata(path).map_err(|err| {
608 io::Error::new(
609 err.kind(),
610 format!("{} when getting metadata for {}", err, path.display()),
611 )
612 })?
613 } else {
614 fs::symlink_metadata(path).map_err(|err| {
615 io::Error::new(
616 err.kind(),
617 format!("{} when getting metadata for {}", err, path.display()),
618 )
619 })?
620 };
621 let ar_name = name.unwrap_or(path);
622 if stat.is_file() {
623 append_file(dst, ar_name, &mut fs::File::open(path)?, options)
624 } else if stat.is_dir() {
625 append_fs(dst, ar_name, &stat, options.mode, None)
626 } else if stat.file_type().is_symlink() {
627 let link_name = fs::read_link(path)?;
628 append_fs(dst, ar_name, &stat, options.mode, Some(&link_name))
629 } else {
630 #[cfg(unix)]
631 {
632 append_special(dst, path, &stat, options.mode)
633 }
634 #[cfg(not(unix))]
635 {
636 Err(other(&format!("{} has unknown file type", path.display())))
637 }
638 }
639}
640
641#[cfg(unix)]
642fn append_special(
643 dst: &mut dyn Write,
644 path: &Path,
645 stat: &fs::Metadata,
646 mode: HeaderMode,
647) -> io::Result<()> {
648 use ::std::os::unix::fs::{FileTypeExt, MetadataExt};
649
650 let file_type = stat.file_type();
651 let entry_type;
652 if file_type.is_socket() {
653 // sockets can't be archived
654 return Err(other(&format!(
655 "{}: socket can not be archived",
656 path.display()
657 )));
658 } else if file_type.is_fifo() {
659 entry_type = EntryType::Fifo;
660 } else if file_type.is_char_device() {
661 entry_type = EntryType::Char;
662 } else if file_type.is_block_device() {
663 entry_type = EntryType::Block;
664 } else {
665 return Err(other(&format!("{} has unknown file type", path.display())));
666 }
667
668 let mut header = Header::new_gnu();
669 header.set_metadata_in_mode(stat, mode);
670 prepare_header_path(dst, &mut header, path)?;
671
672 header.set_entry_type(entry_type);
673 let dev_id = stat.rdev();
674 let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff);
675 let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff);
676 header.set_device_major(dev_major as u32)?;
677 header.set_device_minor(dev_minor as u32)?;
678
679 header.set_cksum();
680 dst.write_all(header.as_bytes())?;
681
682 Ok(())
683}
684
685fn append_file(
686 dst: &mut dyn Write,
687 path: &Path,
688 file: &mut fs::File,
689 options: BuilderOptions,
690) -> io::Result<()> {
691 let stat = file.metadata()?;
692 let mut header = Header::new_gnu();
693
694 prepare_header_path(dst, &mut header, path)?;
695 header.set_metadata_in_mode(&stat, options.mode);
696 let sparse_entries = if options.sparse {
697 prepare_header_sparse(file, &stat, &mut header)?
698 } else {
699 None
700 };
701 header.set_cksum();
702 dst.write_all(header.as_bytes())?;
703
704 if let Some(sparse_entries) = sparse_entries {
705 append_extended_sparse_headers(dst, &sparse_entries)?;
706 for entry in sparse_entries.entries {
707 file.seek(io::SeekFrom::Start(entry.offset))?;
708 io::copy(&mut file.take(entry.num_bytes), dst)?;
709 }
710 pad_zeroes(dst, sparse_entries.on_disk_size)?;
711 } else {
712 let len = io::copy(file, dst)?;
713 pad_zeroes(dst, len)?;
714 }
715
716 Ok(())
717}
718
719fn append_dir(
720 dst: &mut dyn Write,
721 path: &Path,
722 src_path: &Path,
723 options: BuilderOptions,
724) -> io::Result<()> {
725 let stat: Metadata = fs::metadata(src_path)?;
726 append_fs(dst, path, &stat, options.mode, link_name:None)
727}
728
729fn prepare_header(size: u64, entry_type: u8) -> Header {
730 let mut header: Header = Header::new_gnu();
731 let name: &'static [u8; 13] = b"././@LongLink";
732 header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
733 header.set_mode(0o644);
734 header.set_uid(0);
735 header.set_gid(0);
736 header.set_mtime(0);
737 // + 1 to be compliant with GNU tar
738 header.set_size(size + 1);
739 header.set_entry_type(ty:EntryType::new(byte:entry_type));
740 header.set_cksum();
741 header
742}
743
744fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> {
745 // Try to encode the path directly in the header, but if it ends up not
746 // working (probably because it's too long) then try to use the GNU-specific
747 // long name extension by emitting an entry which indicates that it's the
748 // filename.
749 if let Err(e) = header.set_path(path) {
750 let data = path2bytes(&path)?;
751 let max = header.as_old().name.len();
752 // Since `e` isn't specific enough to let us know the path is indeed too
753 // long, verify it first before using the extension.
754 if data.len() < max {
755 return Err(e);
756 }
757 let header2 = prepare_header(data.len() as u64, b'L');
758 // null-terminated string
759 let mut data2 = data.chain(io::repeat(0).take(1));
760 append(dst, &header2, &mut data2)?;
761
762 // Truncate the path to store in the header we're about to emit to
763 // ensure we've got something at least mentioned. Note that we use
764 // `str`-encoding to be compatible with Windows, but in general the
765 // entry in the header itself shouldn't matter too much since extraction
766 // doesn't look at it.
767 let truncated = match str::from_utf8(&data[..max]) {
768 Ok(s) => s,
769 Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
770 };
771 header.set_path(truncated)?;
772 }
773 Ok(())
774}
775
776fn prepare_header_link(
777 dst: &mut dyn Write,
778 header: &mut Header,
779 link_name: &Path,
780) -> io::Result<()> {
781 // Same as previous function but for linkname
782 if let Err(e: Error) = header.set_link_name(&link_name) {
783 let data: Cow<'_, [u8]> = path2bytes(&link_name)?;
784 if data.len() < header.as_old().linkname.len() {
785 return Err(e);
786 }
787 let header2: Header = prepare_header(size:data.len() as u64, entry_type:b'K');
788 let mut data2: Chain<&[u8], Take> = data.chain(next:io::repeat(0).take(limit:1));
789 append(dst, &header2, &mut data2)?;
790 }
791 Ok(())
792}
793
794fn prepare_header_sparse(
795 file: &mut fs::File,
796 stat: &fs::Metadata,
797 header: &mut Header,
798) -> io::Result<Option<SparseEntries>> {
799 let entries: SparseEntries = match find_sparse_entries(file, stat)? {
800 Some(entries: SparseEntries) => entries,
801 _ => return Ok(None),
802 };
803
804 header.set_entry_type(ty:EntryType::GNUSparse);
805 header.set_size(entries.on_disk_size);
806
807 // Write the first 4 (GNU_SPARSE_HEADERS_COUNT) entries to the given header.
808 // The remaining entries will be written as subsequent extended headers. See
809 // https://www.gnu.org/software/tar/manual/html_section/Sparse-Formats.html#Old-GNU-Format
810 // for details on the format.
811 let gnu_header: &mut &mut GnuHeader = &mut header.as_gnu_mut().unwrap();
812 gnu_header.set_real_size(entries.size());
813
814 for (entry: &SparseEntry, header_entry: &mut GnuSparseHeader) in std::iter::zip(&entries.entries, &mut gnu_header.sparse) {
815 header_entry.set_offset(entry.offset);
816 header_entry.set_length(entry.num_bytes);
817 }
818 gnu_header.set_is_extended(entries.entries.len() > gnu_header.sparse.len());
819
820 Ok(Some(entries))
821}
822
823/// Write extra sparse headers into `dst` for those entries that did not fit in the main header.
824fn append_extended_sparse_headers(dst: &mut dyn Write, entries: &SparseEntries) -> io::Result<()> {
825 // The first `GNU_SPARSE_HEADERS_COUNT` entries are written to the main header, so skip them.
826 let mut it = entries
827 .entries
828 .iter()
829 .skip(GNU_SPARSE_HEADERS_COUNT)
830 .peekable();
831
832 // Each GnuExtSparseHeader can hold up to fixed number of sparse entries (21).
833 // So we pack entries into multiple headers if necessary.
834 while it.peek().is_some() {
835 let mut ext_header = GnuExtSparseHeader::new();
836 for header_entry in ext_header.sparse.iter_mut() {
837 if let Some(entry) = it.next() {
838 header_entry.set_offset(entry.offset);
839 header_entry.set_length(entry.num_bytes);
840 } else {
841 break;
842 }
843 }
844 ext_header.set_is_extended(it.peek().is_some());
845 dst.write_all(ext_header.as_bytes())?;
846 }
847
848 Ok(())
849}
850
851fn append_fs(
852 dst: &mut dyn Write,
853 path: &Path,
854 meta: &fs::Metadata,
855 mode: HeaderMode,
856 link_name: Option<&Path>,
857) -> io::Result<()> {
858 let mut header: Header = Header::new_gnu();
859
860 prepare_header_path(dst, &mut header, path)?;
861 header.set_metadata_in_mode(meta, mode);
862 if let Some(link_name: &Path) = link_name {
863 prepare_header_link(dst, &mut header, link_name)?;
864 }
865 header.set_cksum();
866 dst.write_all(buf:header.as_bytes())
867}
868
869fn append_dir_all(
870 dst: &mut dyn Write,
871 path: &Path,
872 src_path: &Path,
873 options: BuilderOptions,
874) -> io::Result<()> {
875 let mut stack = vec![(src_path.to_path_buf(), true, false)];
876 while let Some((src, is_dir, is_symlink)) = stack.pop() {
877 let dest = path.join(src.strip_prefix(&src_path).unwrap());
878 // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
879 if is_dir || (is_symlink && options.follow && src.is_dir()) {
880 for entry in fs::read_dir(&src)? {
881 let entry = entry?;
882 let file_type = entry.file_type()?;
883 stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
884 }
885 if dest != Path::new("") {
886 append_dir(dst, &dest, &src, options)?;
887 }
888 } else if !options.follow && is_symlink {
889 let stat = fs::symlink_metadata(&src)?;
890 let link_name = fs::read_link(&src)?;
891 append_fs(dst, &dest, &stat, options.mode, Some(&link_name))?;
892 } else {
893 #[cfg(unix)]
894 {
895 let stat = fs::metadata(&src)?;
896 if !stat.is_file() {
897 append_special(dst, &dest, &stat, options.mode)?;
898 continue;
899 }
900 }
901 append_file(dst, &dest, &mut fs::File::open(src)?, options)?;
902 }
903 }
904 Ok(())
905}
906
907#[derive(Debug, Clone, PartialEq, Eq)]
908struct SparseEntries {
909 entries: Vec<SparseEntry>,
910 on_disk_size: u64,
911}
912
913impl SparseEntries {
914 fn size(&self) -> u64 {
915 self.entries.last().map_or(default:0, |e: &SparseEntry| e.offset + e.num_bytes)
916 }
917}
918
919#[derive(Debug, Copy, Clone, PartialEq, Eq)]
920struct SparseEntry {
921 offset: u64,
922 num_bytes: u64,
923}
924
925/// Find sparse entries in a file. Returns:
926/// * `Ok(Some(_))` if the file is sparse.
927/// * `Ok(None)` if the file is not sparse, or if the file system does not
928/// support sparse files.
929/// * `Err(_)` if an error occurred. The lack of support for sparse files is not
930/// considered an error. It might return an error if the file is modified
931/// while reading.
932fn find_sparse_entries(
933 file: &mut fs::File,
934 stat: &fs::Metadata,
935) -> io::Result<Option<SparseEntries>> {
936 #[cfg(not(any(target_os = "android", target_os = "freebsd", target_os = "linux")))]
937 {
938 let _ = file;
939 let _ = stat;
940 Ok(None)
941 }
942
943 #[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
944 find_sparse_entries_seek(file, stat)
945}
946
947/// Implementation of `find_sparse_entries` using `SEEK_HOLE` and `SEEK_DATA`.
948#[cfg(any(target_os = "android", target_os = "freebsd", target_os = "linux"))]
949fn find_sparse_entries_seek(
950 file: &mut fs::File,
951 stat: &fs::Metadata,
952) -> io::Result<Option<SparseEntries>> {
953 use std::os::unix::fs::MetadataExt as _;
954 use std::os::unix::io::AsRawFd as _;
955
956 fn lseek(file: &fs::File, offset: i64, whence: libc::c_int) -> Result<i64, i32> {
957 #[cfg(any(target_os = "linux", target_os = "android"))]
958 let lseek = libc::lseek64;
959 #[cfg(not(any(target_os = "linux", target_os = "android")))]
960 let lseek = libc::lseek;
961
962 match unsafe { lseek(file.as_raw_fd(), offset, whence) } {
963 -1 => Err(io::Error::last_os_error().raw_os_error().unwrap()),
964 off => Ok(off),
965 }
966 }
967
968 if stat.blocks() == 0 {
969 return Ok(if stat.size() == 0 {
970 // Empty file.
971 None
972 } else {
973 // Fully sparse file.
974 Some(SparseEntries {
975 entries: vec![SparseEntry {
976 offset: stat.size(),
977 num_bytes: 0,
978 }],
979 on_disk_size: 0,
980 })
981 });
982 }
983
984 // On most Unices, we need to read `_PC_MIN_HOLE_SIZE` to see if the file
985 // system supports `SEEK_HOLE`.
986 // FreeBSD: https://man.freebsd.org/cgi/man.cgi?query=lseek&sektion=2&manpath=FreeBSD+14.1-STABLE
987 #[cfg(not(any(target_os = "linux", target_os = "android")))]
988 if unsafe { libc::fpathconf(file.as_raw_fd(), libc::_PC_MIN_HOLE_SIZE) } == -1 {
989 return Ok(None);
990 }
991
992 // Linux is the only UNIX-like without support for `_PC_MIN_HOLE_SIZE`, so
993 // instead we try to call `lseek` and see if it fails.
994 #[cfg(any(target_os = "linux", target_os = "android"))]
995 match lseek(file, 0, libc::SEEK_HOLE) {
996 Ok(_) => (),
997 Err(libc::ENXIO) => {
998 // The file is empty. Treat it as non-sparse.
999 return Ok(None);
1000 }
1001 Err(_) => return Ok(None),
1002 }
1003
1004 let mut entries = Vec::new();
1005 let mut on_disk_size = 0;
1006 let mut off_s = 0;
1007 loop {
1008 // off_s=0 │ off_s │ off_s
1009 // ↓ │ ↓ │ ↓
1010 // | DATA |… │ ……………| HOLE | DATA |… │ …|×EOF×
1011 // ↑ │ ↑ ↑ │
1012 // (a) │ (b) (c) (d) │ (e)
1013 match lseek(file, off_s, libc::SEEK_DATA) {
1014 Ok(0) if off_s == 0 => (), // (a) The file starts with data.
1015 Ok(off) if off < off_s => {
1016 // (b) Unlikely.
1017 return Err(std::io::Error::new(
1018 io::ErrorKind::Other,
1019 "lseek(SEEK_DATA) went backwards",
1020 ));
1021 }
1022 Ok(off) if off == off_s => {
1023 // (c) The data at the same offset as the hole.
1024 return Err(std::io::Error::new(
1025 io::ErrorKind::Other,
1026 "lseek(SEEK_DATA) did not advance. \
1027 Did the file change while appending?",
1028 ));
1029 }
1030 Ok(off) => off_s = off, // (d) Jump to the next hole.
1031 Err(libc::ENXIO) => break, // (e) Reached the end of the file.
1032 Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1033 };
1034
1035 // off_s=0 │ off_s │ off_s
1036 // ↓ │ ↓ │ ↓
1037 // | DATA |×EOF× │ ……………| DATA | HOLE |… │ …|×EOF×
1038 // ↑ │ ↑ ↑ │
1039 // (a) │ (b) (c) (d) │ (e)
1040 match lseek(file, off_s, libc::SEEK_HOLE) {
1041 Ok(off_e) if off_s == 0 && (off_e as u64) == stat.size() => {
1042 // (a) The file is not sparse.
1043 file.seek(io::SeekFrom::Start(0))?;
1044 return Ok(None);
1045 }
1046 Ok(off_e) if off_e < off_s => {
1047 // (b) Unlikely.
1048 return Err(std::io::Error::new(
1049 io::ErrorKind::Other,
1050 "lseek(SEEK_HOLE) went backwards",
1051 ));
1052 }
1053 Ok(off_e) if off_e == off_s => {
1054 // (c) The hole at the same offset as the data.
1055 return Err(std::io::Error::new(
1056 io::ErrorKind::Other,
1057 "lseek(SEEK_HOLE) did not advance. \
1058 Did the file change while appending?",
1059 ));
1060 }
1061 Ok(off_e) => {
1062 // (d) Found a hole or reached the end of the file (implicit
1063 // zero-length hole).
1064 entries.push(SparseEntry {
1065 offset: off_s as u64,
1066 num_bytes: off_e as u64 - off_s as u64,
1067 });
1068 on_disk_size += off_e as u64 - off_s as u64;
1069 off_s = off_e;
1070 }
1071 Err(libc::ENXIO) => {
1072 // (e) off_s was already beyond the end of the file.
1073 return Err(std::io::Error::new(
1074 io::ErrorKind::Other,
1075 "lseek(SEEK_HOLE) returned ENXIO. \
1076 Did the file change while appending?",
1077 ));
1078 }
1079 Err(errno) => return Err(io::Error::from_raw_os_error(errno)),
1080 };
1081 }
1082
1083 if off_s as u64 > stat.size() {
1084 return Err(std::io::Error::new(
1085 io::ErrorKind::Other,
1086 "lseek(SEEK_DATA) went beyond the end of the file. \
1087 Did the file change while appending?",
1088 ));
1089 }
1090
1091 // Add a final zero-length entry. It is required if the file ends with a
1092 // hole, and redundant otherwise. However, we add it unconditionally to
1093 // mimic GNU tar behavior.
1094 entries.push(SparseEntry {
1095 offset: stat.size(),
1096 num_bytes: 0,
1097 });
1098
1099 file.seek(io::SeekFrom::Start(0))?;
1100
1101 Ok(Some(SparseEntries {
1102 entries,
1103 on_disk_size,
1104 }))
1105}
1106
1107impl<W: Write> Drop for Builder<W> {
1108 fn drop(&mut self) {
1109 let _ = self.finish();
1110 }
1111}
1112
1113#[cfg(test)]
1114mod tests {
1115 use super::*;
1116
1117 /// Should be multiple of 4KiB on ext4, multiple of 32KiB on FreeBSD/UFS.
1118 const SPARSE_BLOCK_SIZE: u64 = 32768;
1119
1120 #[test]
1121 fn test_find_sparse_entries() {
1122 let cases: &[(&str, &[SparseEntry])] = &[
1123 ("|", &[]),
1124 (
1125 "| | | | |",
1126 &[SparseEntry {
1127 offset: 4 * SPARSE_BLOCK_SIZE,
1128 num_bytes: 0,
1129 }],
1130 ),
1131 (
1132 "|####|####|####|####|",
1133 &[
1134 SparseEntry {
1135 offset: 0,
1136 num_bytes: 4 * SPARSE_BLOCK_SIZE,
1137 },
1138 SparseEntry {
1139 offset: 4 * SPARSE_BLOCK_SIZE,
1140 num_bytes: 0,
1141 },
1142 ],
1143 ),
1144 (
1145 "|####|####| | |",
1146 &[
1147 SparseEntry {
1148 offset: 0,
1149 num_bytes: 2 * SPARSE_BLOCK_SIZE,
1150 },
1151 SparseEntry {
1152 offset: 4 * SPARSE_BLOCK_SIZE,
1153 num_bytes: 0,
1154 },
1155 ],
1156 ),
1157 (
1158 "| | |####|####|",
1159 &[
1160 SparseEntry {
1161 offset: 2 * SPARSE_BLOCK_SIZE,
1162 num_bytes: 2 * SPARSE_BLOCK_SIZE,
1163 },
1164 SparseEntry {
1165 offset: 4 * SPARSE_BLOCK_SIZE,
1166 num_bytes: 0,
1167 },
1168 ],
1169 ),
1170 (
1171 "|####| |####| |",
1172 &[
1173 SparseEntry {
1174 offset: 0,
1175 num_bytes: SPARSE_BLOCK_SIZE,
1176 },
1177 SparseEntry {
1178 offset: 2 * SPARSE_BLOCK_SIZE,
1179 num_bytes: SPARSE_BLOCK_SIZE,
1180 },
1181 SparseEntry {
1182 offset: 4 * SPARSE_BLOCK_SIZE,
1183 num_bytes: 0,
1184 },
1185 ],
1186 ),
1187 (
1188 "|####| | |####|",
1189 &[
1190 SparseEntry {
1191 offset: 0,
1192 num_bytes: SPARSE_BLOCK_SIZE,
1193 },
1194 SparseEntry {
1195 offset: 3 * SPARSE_BLOCK_SIZE,
1196 num_bytes: SPARSE_BLOCK_SIZE,
1197 },
1198 SparseEntry {
1199 offset: 4 * SPARSE_BLOCK_SIZE,
1200 num_bytes: 0,
1201 },
1202 ],
1203 ),
1204 (
1205 "| |####|####| |",
1206 &[
1207 SparseEntry {
1208 offset: 1 * SPARSE_BLOCK_SIZE,
1209 num_bytes: 2 * SPARSE_BLOCK_SIZE,
1210 },
1211 SparseEntry {
1212 offset: 4 * SPARSE_BLOCK_SIZE,
1213 num_bytes: 0,
1214 },
1215 ],
1216 ),
1217 ];
1218
1219 let mut file = tempfile::tempfile().unwrap();
1220
1221 for &(description, map) in cases {
1222 file.set_len(0).unwrap();
1223 file.set_len(map.last().map_or(0, |e| e.offset + e.num_bytes))
1224 .unwrap();
1225
1226 for e in map {
1227 file.seek(io::SeekFrom::Start(e.offset)).unwrap();
1228 for _ in 0..e.num_bytes / SPARSE_BLOCK_SIZE {
1229 file.write_all(&[0xFF; SPARSE_BLOCK_SIZE as usize]).unwrap();
1230 }
1231 }
1232
1233 let expected = match map {
1234 // Empty file.
1235 &[] => None,
1236
1237 // 100% dense.
1238 &[SparseEntry {
1239 offset: 0,
1240 num_bytes: x1,
1241 }, SparseEntry {
1242 offset: x2,
1243 num_bytes: 0,
1244 }] if x1 == x2 => None,
1245
1246 // Sparse.
1247 map => Some(SparseEntries {
1248 entries: map.to_vec(),
1249 on_disk_size: map.iter().map(|e| e.num_bytes).sum(),
1250 }),
1251 };
1252
1253 let stat = file.metadata().unwrap();
1254 let reported = find_sparse_entries(&mut file, &stat).unwrap();
1255
1256 // Loose check: we did not miss any data blocks.
1257 if let Err(e) = loose_check_sparse_entries(reported.as_ref(), expected.as_ref()) {
1258 panic!(
1259 "Case: {description}\n\
1260 Reported: {reported:?}\n\
1261 Expected: {expected:?}\n\
1262 Error: {e}",
1263 );
1264 }
1265
1266 // On Linux, always do a strict check. Skip on FreeBSD, as on UFS
1267 // the last block is always dense, even if it's zero-filled.
1268 #[cfg(any(target_os = "android", target_os = "linux"))]
1269 assert_eq!(reported, expected, "Case: {description}");
1270 }
1271 }
1272
1273 fn loose_check_sparse_entries(
1274 reported: Option<&SparseEntries>,
1275 expected: Option<&SparseEntries>,
1276 ) -> Result<(), &'static str> {
1277 let reported = match reported {
1278 Some(entries) => entries, // Reported as sparse.
1279 // It's not an error to report a sparse file as non-sparse.
1280 None => return Ok(()),
1281 };
1282 let expected = match expected {
1283 Some(entries) => entries,
1284 None => return Err("Expected dense file, but reported as sparse"),
1285 };
1286
1287 // Check that we didn't miss any data blocks. However, reporting some
1288 // holes as data is not an error during the loose check.
1289 if expected.entries.iter().any(|e| {
1290 !reported
1291 .entries
1292 .iter()
1293 .any(|r| e.offset >= r.offset && e.offset + e.num_bytes <= r.offset + r.num_bytes)
1294 }) {
1295 return Err("Reported is not a superset of expected");
1296 }
1297
1298 if reported.entries.last() != expected.entries.last() {
1299 return Err("Last zero-length entry is not as expected");
1300 }
1301
1302 // Check invariants of SparseEntries.
1303 let mut prev_end = None;
1304 for e in &reported.entries[..reported.entries.len()] {
1305 if prev_end.map_or(false, |p| e.offset < p) {
1306 return Err("Overlapping or unsorted entries");
1307 }
1308 prev_end = Some(e.offset + e.num_bytes);
1309 }
1310
1311 if reported.on_disk_size != reported.entries.iter().map(|e| e.num_bytes).sum() {
1312 return Err("Incorrect on-disk size");
1313 }
1314
1315 Ok(())
1316 }
1317}
1318