1use std::fs;
2use std::io;
3use std::io::prelude::*;
4use std::path::Path;
5use std::str;
6
7use crate::header::{path2bytes, HeaderMode};
8use crate::{other, EntryType, Header};
9
10/// A structure for building archives
11///
12/// This structure has methods for building up an archive from scratch into any
13/// arbitrary writer.
14pub struct Builder<W: Write> {
15 mode: HeaderMode,
16 follow: bool,
17 finished: bool,
18 obj: Option<W>,
19}
20
21impl<W: Write> Builder<W> {
22 /// Create a new archive builder with the underlying object as the
23 /// destination of all data written. The builder will use
24 /// `HeaderMode::Complete` by default.
25 pub fn new(obj: W) -> Builder<W> {
26 Builder {
27 mode: HeaderMode::Complete,
28 follow: true,
29 finished: false,
30 obj: Some(obj),
31 }
32 }
33
34 /// Changes the HeaderMode that will be used when reading fs Metadata for
35 /// methods that implicitly read metadata for an input Path. Notably, this
36 /// does _not_ apply to `append(Header)`.
37 pub fn mode(&mut self, mode: HeaderMode) {
38 self.mode = mode;
39 }
40
41 /// Follow symlinks, archiving the contents of the file they point to rather
42 /// than adding a symlink to the archive. Defaults to true.
43 pub fn follow_symlinks(&mut self, follow: bool) {
44 self.follow = follow;
45 }
46
47 /// Gets shared reference to the underlying object.
48 pub fn get_ref(&self) -> &W {
49 self.obj.as_ref().unwrap()
50 }
51
52 /// Gets mutable reference to the underlying object.
53 ///
54 /// Note that care must be taken while writing to the underlying
55 /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
56 /// useful in the situations when one needs to be ensured that
57 /// tar entry was flushed to the disk.
58 pub fn get_mut(&mut self) -> &mut W {
59 self.obj.as_mut().unwrap()
60 }
61
62 /// Unwrap this archive, returning the underlying object.
63 ///
64 /// This function will finish writing the archive if the `finish` function
65 /// hasn't yet been called, returning any I/O error which happens during
66 /// that operation.
67 pub fn into_inner(mut self) -> io::Result<W> {
68 if !self.finished {
69 self.finish()?;
70 }
71 Ok(self.obj.take().unwrap())
72 }
73
74 /// Adds a new entry to this archive.
75 ///
76 /// This function will append the header specified, followed by contents of
77 /// the stream specified by `data`. To produce a valid archive the `size`
78 /// field of `header` must be the same as the length of the stream that's
79 /// being written. Additionally the checksum for the header should have been
80 /// set via the `set_cksum` method.
81 ///
82 /// Note that this will not attempt to seek the archive to a valid position,
83 /// so if the archive is in the middle of a read or some other similar
84 /// operation then this may corrupt the archive.
85 ///
86 /// Also note that after all entries have been written to an archive the
87 /// `finish` function needs to be called to finish writing the archive.
88 ///
89 /// # Errors
90 ///
91 /// This function will return an error for any intermittent I/O error which
92 /// occurs when either reading or writing.
93 ///
94 /// # Examples
95 ///
96 /// ```
97 /// use tar::{Builder, Header};
98 ///
99 /// let mut header = Header::new_gnu();
100 /// header.set_path("foo").unwrap();
101 /// header.set_size(4);
102 /// header.set_cksum();
103 ///
104 /// let mut data: &[u8] = &[1, 2, 3, 4];
105 ///
106 /// let mut ar = Builder::new(Vec::new());
107 /// ar.append(&header, data).unwrap();
108 /// let data = ar.into_inner().unwrap();
109 /// ```
110 pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
111 append(self.get_mut(), header, &mut data)
112 }
113
114 /// Adds a new entry to this archive with the specified path.
115 ///
116 /// This function will set the specified path in the given header, which may
117 /// require appending a GNU long-name extension entry to the archive first.
118 /// The checksum for the header will be automatically updated via the
119 /// `set_cksum` method after setting the path. No other metadata in the
120 /// header will be modified.
121 ///
122 /// Then it will append the header, followed by contents of the stream
123 /// specified by `data`. To produce a valid archive the `size` field of
124 /// `header` must be the same as the length of the stream that's being
125 /// written.
126 ///
127 /// Note that this will not attempt to seek the archive to a valid position,
128 /// so if the archive is in the middle of a read or some other similar
129 /// operation then this may corrupt the archive.
130 ///
131 /// Also note that after all entries have been written to an archive the
132 /// `finish` function needs to be called to finish writing the archive.
133 ///
134 /// # Errors
135 ///
136 /// This function will return an error for any intermittent I/O error which
137 /// occurs when either reading or writing.
138 ///
139 /// # Examples
140 ///
141 /// ```
142 /// use tar::{Builder, Header};
143 ///
144 /// let mut header = Header::new_gnu();
145 /// header.set_size(4);
146 /// header.set_cksum();
147 ///
148 /// let mut data: &[u8] = &[1, 2, 3, 4];
149 ///
150 /// let mut ar = Builder::new(Vec::new());
151 /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap();
152 /// let data = ar.into_inner().unwrap();
153 /// ```
154 pub fn append_data<P: AsRef<Path>, R: Read>(
155 &mut self,
156 header: &mut Header,
157 path: P,
158 data: R,
159 ) -> io::Result<()> {
160 prepare_header_path(self.get_mut(), header, path.as_ref())?;
161 header.set_cksum();
162 self.append(&header, data)
163 }
164
165 /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target.
166 ///
167 /// This function is similar to [`Self::append_data`] which supports long filenames,
168 /// but also supports long link targets using GNU extensions if necessary.
169 /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`].
170 /// The `set_cksum` method will be invoked after setting the path. No other metadata in the
171 /// header will be modified.
172 ///
173 /// If you are intending to use GNU extensions, you must use this method over calling
174 /// [`Header::set_link_name`] because that function will fail on long links.
175 ///
176 /// Similar constraints around the position of the archive and completion
177 /// apply as with [`Self::append_data`].
178 ///
179 /// # Errors
180 ///
181 /// This function will return an error for any intermittent I/O error which
182 /// occurs when either reading or writing.
183 ///
184 /// # Examples
185 ///
186 /// ```
187 /// use tar::{Builder, Header, EntryType};
188 ///
189 /// let mut ar = Builder::new(Vec::new());
190 /// let mut header = Header::new_gnu();
191 /// header.set_username("foo");
192 /// header.set_entry_type(EntryType::Symlink);
193 /// header.set_size(0);
194 /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap();
195 /// let data = ar.into_inner().unwrap();
196 /// ```
197 pub fn append_link<P: AsRef<Path>, T: AsRef<Path>>(
198 &mut self,
199 header: &mut Header,
200 path: P,
201 target: T,
202 ) -> io::Result<()> {
203 self._append_link(header, path.as_ref(), target.as_ref())
204 }
205
206 fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> {
207 prepare_header_path(self.get_mut(), header, path)?;
208 prepare_header_link(self.get_mut(), header, target)?;
209 header.set_cksum();
210 self.append(&header, std::io::empty())
211 }
212
213 /// Adds a file on the local filesystem to this archive.
214 ///
215 /// This function will open the file specified by `path` and insert the file
216 /// into the archive with the appropriate metadata set, returning any I/O
217 /// error which occurs while writing. The path name for the file inside of
218 /// this archive will be the same as `path`, and it is required that the
219 /// path is a relative path.
220 ///
221 /// Note that this will not attempt to seek the archive to a valid position,
222 /// so if the archive is in the middle of a read or some other similar
223 /// operation then this may corrupt the archive.
224 ///
225 /// Also note that after all files have been written to an archive the
226 /// `finish` function needs to be called to finish writing the archive.
227 ///
228 /// # Examples
229 ///
230 /// ```no_run
231 /// use tar::Builder;
232 ///
233 /// let mut ar = Builder::new(Vec::new());
234 ///
235 /// ar.append_path("foo/bar.txt").unwrap();
236 /// ```
237 pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
238 let mode = self.mode.clone();
239 let follow = self.follow;
240 append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow)
241 }
242
243 /// Adds a file on the local filesystem to this archive under another name.
244 ///
245 /// This function will open the file specified by `path` and insert the file
246 /// into the archive as `name` with appropriate metadata set, returning any
247 /// I/O error which occurs while writing. The path name for the file inside
248 /// of this archive will be `name` is required to be a relative path.
249 ///
250 /// Note that this will not attempt to seek the archive to a valid position,
251 /// so if the archive is in the middle of a read or some other similar
252 /// operation then this may corrupt the archive.
253 ///
254 /// Note if the `path` is a directory. This will just add an entry to the archive,
255 /// rather than contents of the directory.
256 ///
257 /// Also note that after all files have been written to an archive the
258 /// `finish` function needs to be called to finish writing the archive.
259 ///
260 /// # Examples
261 ///
262 /// ```no_run
263 /// use tar::Builder;
264 ///
265 /// let mut ar = Builder::new(Vec::new());
266 ///
267 /// // Insert the local file "foo/bar.txt" in the archive but with the name
268 /// // "bar/foo.txt".
269 /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap();
270 /// ```
271 pub fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
272 &mut self,
273 path: P,
274 name: N,
275 ) -> io::Result<()> {
276 let mode = self.mode.clone();
277 let follow = self.follow;
278 append_path_with_name(
279 self.get_mut(),
280 path.as_ref(),
281 Some(name.as_ref()),
282 mode,
283 follow,
284 )
285 }
286
287 /// Adds a file to this archive with the given path as the name of the file
288 /// in the archive.
289 ///
290 /// This will use the metadata of `file` to populate a `Header`, and it will
291 /// then append the file to the archive with the name `path`.
292 ///
293 /// Note that this will not attempt to seek the archive to a valid position,
294 /// so if the archive is in the middle of a read or some other similar
295 /// operation then this may corrupt the archive.
296 ///
297 /// Also note that after all files have been written to an archive the
298 /// `finish` function needs to be called to finish writing the archive.
299 ///
300 /// # Examples
301 ///
302 /// ```no_run
303 /// use std::fs::File;
304 /// use tar::Builder;
305 ///
306 /// let mut ar = Builder::new(Vec::new());
307 ///
308 /// // Open the file at one location, but insert it into the archive with a
309 /// // different name.
310 /// let mut f = File::open("foo/bar/baz.txt").unwrap();
311 /// ar.append_file("bar/baz.txt", &mut f).unwrap();
312 /// ```
313 pub fn append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> {
314 let mode = self.mode.clone();
315 append_file(self.get_mut(), path.as_ref(), file, mode)
316 }
317
318 /// Adds a directory to this archive with the given path as the name of the
319 /// directory in the archive.
320 ///
321 /// This will use `stat` to populate a `Header`, and it will then append the
322 /// directory to the archive with the name `path`.
323 ///
324 /// Note that this will not attempt to seek the archive to a valid position,
325 /// so if the archive is in the middle of a read or some other similar
326 /// operation then this may corrupt the archive.
327 ///
328 /// Note this will not add the contents of the directory to the archive.
329 /// See `append_dir_all` for recusively adding the contents of the directory.
330 ///
331 /// Also note that after all files have been written to an archive the
332 /// `finish` function needs to be called to finish writing the archive.
333 ///
334 /// # Examples
335 ///
336 /// ```
337 /// use std::fs;
338 /// use tar::Builder;
339 ///
340 /// let mut ar = Builder::new(Vec::new());
341 ///
342 /// // Use the directory at one location, but insert it into the archive
343 /// // with a different name.
344 /// ar.append_dir("bardir", ".").unwrap();
345 /// ```
346 pub fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
347 where
348 P: AsRef<Path>,
349 Q: AsRef<Path>,
350 {
351 let mode = self.mode.clone();
352 append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode)
353 }
354
355 /// Adds a directory and all of its contents (recursively) to this archive
356 /// with the given path as the name of the directory in the archive.
357 ///
358 /// Note that this will not attempt to seek the archive to a valid position,
359 /// so if the archive is in the middle of a read or some other similar
360 /// operation then this may corrupt the archive.
361 ///
362 /// Also note that after all files have been written to an archive the
363 /// `finish` function needs to be called to finish writing the archive.
364 ///
365 /// # Examples
366 ///
367 /// ```
368 /// use std::fs;
369 /// use tar::Builder;
370 ///
371 /// let mut ar = Builder::new(Vec::new());
372 ///
373 /// // Use the directory at one location, but insert it into the archive
374 /// // with a different name.
375 /// ar.append_dir_all("bardir", ".").unwrap();
376 /// ```
377 pub fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
378 where
379 P: AsRef<Path>,
380 Q: AsRef<Path>,
381 {
382 let mode = self.mode.clone();
383 let follow = self.follow;
384 append_dir_all(
385 self.get_mut(),
386 path.as_ref(),
387 src_path.as_ref(),
388 mode,
389 follow,
390 )
391 }
392
393 /// Finish writing this archive, emitting the termination sections.
394 ///
395 /// This function should only be called when the archive has been written
396 /// entirely and if an I/O error happens the underlying object still needs
397 /// to be acquired.
398 ///
399 /// In most situations the `into_inner` method should be preferred.
400 pub fn finish(&mut self) -> io::Result<()> {
401 if self.finished {
402 return Ok(());
403 }
404 self.finished = true;
405 self.get_mut().write_all(&[0; 1024])
406 }
407}
408
409fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> {
410 dst.write_all(buf:header.as_bytes())?;
411 let len: u64 = io::copy(&mut data, &mut dst)?;
412
413 // Pad with zeros if necessary.
414 let buf: [u8; 512] = [0; 512];
415 let remaining: u64 = 512 - (len % 512);
416 if remaining < 512 {
417 dst.write_all(&buf[..remaining as usize])?;
418 }
419
420 Ok(())
421}
422
423fn append_path_with_name(
424 dst: &mut dyn Write,
425 path: &Path,
426 name: Option<&Path>,
427 mode: HeaderMode,
428 follow: bool,
429) -> io::Result<()> {
430 let stat = if follow {
431 fs::metadata(path).map_err(|err| {
432 io::Error::new(
433 err.kind(),
434 format!("{} when getting metadata for {}", err, path.display()),
435 )
436 })?
437 } else {
438 fs::symlink_metadata(path).map_err(|err| {
439 io::Error::new(
440 err.kind(),
441 format!("{} when getting metadata for {}", err, path.display()),
442 )
443 })?
444 };
445 let ar_name = name.unwrap_or(path);
446 if stat.is_file() {
447 append_fs(dst, ar_name, &stat, &mut fs::File::open(path)?, mode, None)
448 } else if stat.is_dir() {
449 append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None)
450 } else if stat.file_type().is_symlink() {
451 let link_name = fs::read_link(path)?;
452 append_fs(
453 dst,
454 ar_name,
455 &stat,
456 &mut io::empty(),
457 mode,
458 Some(&link_name),
459 )
460 } else {
461 #[cfg(unix)]
462 {
463 append_special(dst, path, &stat, mode)
464 }
465 #[cfg(not(unix))]
466 {
467 Err(other(&format!("{} has unknown file type", path.display())))
468 }
469 }
470}
471
472#[cfg(unix)]
473fn append_special(
474 dst: &mut dyn Write,
475 path: &Path,
476 stat: &fs::Metadata,
477 mode: HeaderMode,
478) -> io::Result<()> {
479 use ::std::os::unix::fs::{FileTypeExt, MetadataExt};
480
481 let file_type = stat.file_type();
482 let entry_type;
483 if file_type.is_socket() {
484 // sockets can't be archived
485 return Err(other(&format!(
486 "{}: socket can not be archived",
487 path.display()
488 )));
489 } else if file_type.is_fifo() {
490 entry_type = EntryType::Fifo;
491 } else if file_type.is_char_device() {
492 entry_type = EntryType::Char;
493 } else if file_type.is_block_device() {
494 entry_type = EntryType::Block;
495 } else {
496 return Err(other(&format!("{} has unknown file type", path.display())));
497 }
498
499 let mut header = Header::new_gnu();
500 header.set_metadata_in_mode(stat, mode);
501 prepare_header_path(dst, &mut header, path)?;
502
503 header.set_entry_type(entry_type);
504 let dev_id = stat.rdev();
505 let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff);
506 let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff);
507 header.set_device_major(dev_major as u32)?;
508 header.set_device_minor(dev_minor as u32)?;
509
510 header.set_cksum();
511 dst.write_all(header.as_bytes())?;
512
513 Ok(())
514}
515
516fn append_file(
517 dst: &mut dyn Write,
518 path: &Path,
519 file: &mut fs::File,
520 mode: HeaderMode,
521) -> io::Result<()> {
522 let stat: Metadata = file.metadata()?;
523 append_fs(dst, path, &stat, read:file, mode, link_name:None)
524}
525
526fn append_dir(
527 dst: &mut dyn Write,
528 path: &Path,
529 src_path: &Path,
530 mode: HeaderMode,
531) -> io::Result<()> {
532 let stat: Metadata = fs::metadata(src_path)?;
533 append_fs(dst, path, &stat, &mut io::empty(), mode, link_name:None)
534}
535
536fn prepare_header(size: u64, entry_type: u8) -> Header {
537 let mut header: Header = Header::new_gnu();
538 let name: &[u8; 13] = b"././@LongLink";
539 header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
540 header.set_mode(0o644);
541 header.set_uid(0);
542 header.set_gid(0);
543 header.set_mtime(0);
544 // + 1 to be compliant with GNU tar
545 header.set_size(size + 1);
546 header.set_entry_type(ty:EntryType::new(byte:entry_type));
547 header.set_cksum();
548 header
549}
550
551fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> {
552 // Try to encode the path directly in the header, but if it ends up not
553 // working (probably because it's too long) then try to use the GNU-specific
554 // long name extension by emitting an entry which indicates that it's the
555 // filename.
556 if let Err(e) = header.set_path(path) {
557 let data = path2bytes(&path)?;
558 let max = header.as_old().name.len();
559 // Since `e` isn't specific enough to let us know the path is indeed too
560 // long, verify it first before using the extension.
561 if data.len() < max {
562 return Err(e);
563 }
564 let header2 = prepare_header(data.len() as u64, b'L');
565 // null-terminated string
566 let mut data2 = data.chain(io::repeat(0).take(1));
567 append(dst, &header2, &mut data2)?;
568
569 // Truncate the path to store in the header we're about to emit to
570 // ensure we've got something at least mentioned. Note that we use
571 // `str`-encoding to be compatible with Windows, but in general the
572 // entry in the header itself shouldn't matter too much since extraction
573 // doesn't look at it.
574 let truncated = match str::from_utf8(&data[..max]) {
575 Ok(s) => s,
576 Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
577 };
578 header.set_path(truncated)?;
579 }
580 Ok(())
581}
582
583fn prepare_header_link(
584 dst: &mut dyn Write,
585 header: &mut Header,
586 link_name: &Path,
587) -> io::Result<()> {
588 // Same as previous function but for linkname
589 if let Err(e: Error) = header.set_link_name(&link_name) {
590 let data: Cow<'_, [u8]> = path2bytes(&link_name)?;
591 if data.len() < header.as_old().linkname.len() {
592 return Err(e);
593 }
594 let header2: Header = prepare_header(size:data.len() as u64, entry_type:b'K');
595 let mut data2: Chain<&[u8], Take> = data.chain(next:io::repeat(0).take(limit:1));
596 append(dst, &header2, &mut data2)?;
597 }
598 Ok(())
599}
600
601fn append_fs(
602 dst: &mut dyn Write,
603 path: &Path,
604 meta: &fs::Metadata,
605 read: &mut dyn Read,
606 mode: HeaderMode,
607 link_name: Option<&Path>,
608) -> io::Result<()> {
609 let mut header: Header = Header::new_gnu();
610
611 prepare_header_path(dst, &mut header, path)?;
612 header.set_metadata_in_mode(meta, mode);
613 if let Some(link_name: &Path) = link_name {
614 prepare_header_link(dst, &mut header, link_name)?;
615 }
616 header.set_cksum();
617 append(dst, &header, data:read)
618}
619
620fn append_dir_all(
621 dst: &mut dyn Write,
622 path: &Path,
623 src_path: &Path,
624 mode: HeaderMode,
625 follow: bool,
626) -> io::Result<()> {
627 let mut stack = vec![(src_path.to_path_buf(), true, false)];
628 while let Some((src, is_dir, is_symlink)) = stack.pop() {
629 let dest = path.join(src.strip_prefix(&src_path).unwrap());
630 // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
631 if is_dir || (is_symlink && follow && src.is_dir()) {
632 for entry in fs::read_dir(&src)? {
633 let entry = entry?;
634 let file_type = entry.file_type()?;
635 stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
636 }
637 if dest != Path::new("") {
638 append_dir(dst, &dest, &src, mode)?;
639 }
640 } else if !follow && is_symlink {
641 let stat = fs::symlink_metadata(&src)?;
642 let link_name = fs::read_link(&src)?;
643 append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name))?;
644 } else {
645 #[cfg(unix)]
646 {
647 let stat = fs::metadata(&src)?;
648 if !stat.is_file() {
649 append_special(dst, &dest, &stat, mode)?;
650 continue;
651 }
652 }
653 append_file(dst, &dest, &mut fs::File::open(src)?, mode)?;
654 }
655 }
656 Ok(())
657}
658
659impl<W: Write> Drop for Builder<W> {
660 fn drop(&mut self) {
661 let _ = self.finish();
662 }
663}
664