1//! Types for reading ZIP archives
2
3#[cfg(feature = "aes-crypto")]
4use crate::aes::{AesReader, AesReaderValid};
5use crate::compression::CompressionMethod;
6use crate::cp437::FromCp437;
7use crate::crc32::Crc32Reader;
8use crate::result::{InvalidPassword, ZipError, ZipResult};
9use crate::spec;
10use crate::types::{AesMode, AesVendorVersion, AtomicU64, DateTime, System, ZipFileData};
11use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
12use byteorder::{LittleEndian, ReadBytesExt};
13use std::borrow::Cow;
14use std::collections::HashMap;
15use std::io::{self, prelude::*};
16use std::path::Path;
17use std::sync::Arc;
18
19#[cfg(any(
20 feature = "deflate",
21 feature = "deflate-miniz",
22 feature = "deflate-zlib"
23))]
24use flate2::read::DeflateDecoder;
25
26#[cfg(feature = "bzip2")]
27use bzip2::read::BzDecoder;
28
29#[cfg(feature = "zstd")]
30use zstd::stream::read::Decoder as ZstdDecoder;
31
32/// Provides high level API for reading from a stream.
33pub(crate) mod stream;
34
35// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
36pub(crate) mod zip_archive {
37 /// Extract immutable data from `ZipArchive` to make it cheap to clone
38 #[derive(Debug)]
39 pub(crate) struct Shared {
40 pub(super) files: Vec<super::ZipFileData>,
41 pub(super) names_map: super::HashMap<String, usize>,
42 pub(super) offset: u64,
43 pub(super) comment: Vec<u8>,
44 }
45
46 /// ZIP archive reader
47 ///
48 /// At the moment, this type is cheap to clone if this is the case for the
49 /// reader it uses. However, this is not guaranteed by this crate and it may
50 /// change in the future.
51 ///
52 /// ```no_run
53 /// use std::io::prelude::*;
54 /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
55 /// let mut zip = zip::ZipArchive::new(reader)?;
56 ///
57 /// for i in 0..zip.len() {
58 /// let mut file = zip.by_index(i)?;
59 /// println!("Filename: {}", file.name());
60 /// std::io::copy(&mut file, &mut std::io::stdout());
61 /// }
62 ///
63 /// Ok(())
64 /// }
65 /// ```
66 #[derive(Clone, Debug)]
67 pub struct ZipArchive<R> {
68 pub(super) reader: R,
69 pub(super) shared: super::Arc<Shared>,
70 }
71}
72
73pub use zip_archive::ZipArchive;
74#[allow(clippy::large_enum_variant)]
75enum CryptoReader<'a> {
76 Plaintext(io::Take<&'a mut dyn Read>),
77 ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut dyn Read>>),
78 #[cfg(feature = "aes-crypto")]
79 Aes {
80 reader: AesReaderValid<io::Take<&'a mut dyn Read>>,
81 vendor_version: AesVendorVersion,
82 },
83}
84
85impl<'a> Read for CryptoReader<'a> {
86 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
87 match self {
88 CryptoReader::Plaintext(r: &mut Take<&mut dyn Read>) => r.read(buf),
89 CryptoReader::ZipCrypto(r: &mut ZipCryptoReaderValid<…>) => r.read(buf),
90 #[cfg(feature = "aes-crypto")]
91 CryptoReader::Aes { reader: r, .. } => r.read(buf),
92 }
93 }
94}
95
96impl<'a> CryptoReader<'a> {
97 /// Consumes this decoder, returning the underlying reader.
98 pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
99 match self {
100 CryptoReader::Plaintext(r) => r,
101 CryptoReader::ZipCrypto(r) => r.into_inner(),
102 #[cfg(feature = "aes-crypto")]
103 CryptoReader::Aes { reader: r, .. } => r.into_inner(),
104 }
105 }
106
107 /// Returns `true` if the data is encrypted using AE2.
108 pub fn is_ae2_encrypted(&self) -> bool {
109 #[cfg(feature = "aes-crypto")]
110 return matches!(
111 self,
112 CryptoReader::Aes {
113 vendor_version: AesVendorVersion::Ae2,
114 ..
115 }
116 );
117 #[cfg(not(feature = "aes-crypto"))]
118 false
119 }
120}
121
122enum ZipFileReader<'a> {
123 NoReader,
124 Raw(io::Take<&'a mut dyn io::Read>),
125 Stored(Crc32Reader<CryptoReader<'a>>),
126 #[cfg(any(
127 feature = "deflate",
128 feature = "deflate-miniz",
129 feature = "deflate-zlib"
130 ))]
131 Deflated(Crc32Reader<flate2::read::DeflateDecoder<CryptoReader<'a>>>),
132 #[cfg(feature = "bzip2")]
133 Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>),
134 #[cfg(feature = "zstd")]
135 Zstd(Crc32Reader<ZstdDecoder<'a, io::BufReader<CryptoReader<'a>>>>),
136}
137
138impl<'a> Read for ZipFileReader<'a> {
139 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
140 match self {
141 ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
142 ZipFileReader::Raw(r: &mut Take<&mut dyn Read>) => r.read(buf),
143 ZipFileReader::Stored(r: &mut Crc32Reader>) => r.read(buf),
144 #[cfg(any(
145 feature = "deflate",
146 feature = "deflate-miniz",
147 feature = "deflate-zlib"
148 ))]
149 ZipFileReader::Deflated(r: &mut Crc32Reader>) => r.read(buf),
150 #[cfg(feature = "bzip2")]
151 ZipFileReader::Bzip2(r) => r.read(buf),
152 #[cfg(feature = "zstd")]
153 ZipFileReader::Zstd(r) => r.read(buf),
154 }
155 }
156}
157
158impl<'a> ZipFileReader<'a> {
159 /// Consumes this decoder, returning the underlying reader.
160 pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
161 match self {
162 ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
163 ZipFileReader::Raw(r: Take<&mut dyn Read>) => r,
164 ZipFileReader::Stored(r: Crc32Reader>) => r.into_inner().into_inner(),
165 #[cfg(any(
166 feature = "deflate",
167 feature = "deflate-miniz",
168 feature = "deflate-zlib"
169 ))]
170 ZipFileReader::Deflated(r: Crc32Reader>) => r.into_inner().into_inner().into_inner(),
171 #[cfg(feature = "bzip2")]
172 ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(),
173 #[cfg(feature = "zstd")]
174 ZipFileReader::Zstd(r) => r.into_inner().finish().into_inner().into_inner(),
175 }
176 }
177}
178
179/// A struct for reading a zip file
180pub struct ZipFile<'a> {
181 data: Cow<'a, ZipFileData>,
182 crypto_reader: Option<CryptoReader<'a>>,
183 reader: ZipFileReader<'a>,
184}
185
186fn find_content<'a>(
187 data: &ZipFileData,
188 reader: &'a mut (impl Read + Seek),
189) -> ZipResult<io::Take<&'a mut dyn Read>> {
190 // Parse local header
191 reader.seek(pos:io::SeekFrom::Start(data.header_start))?;
192 let signature: u32 = reader.read_u32::<LittleEndian>()?;
193 if signature != spec::LOCAL_FILE_HEADER_SIGNATURE {
194 return Err(ZipError::InvalidArchive("Invalid local file header"));
195 }
196
197 reader.seek(pos:io::SeekFrom::Current(22))?;
198 let file_name_length: u64 = reader.read_u16::<LittleEndian>()? as u64;
199 let extra_field_length: u64 = reader.read_u16::<LittleEndian>()? as u64;
200 let magic_and_header: u64 = 4 + 22 + 2 + 2;
201 let data_start: u64 = data.header_start + magic_and_header + file_name_length + extra_field_length;
202 data.data_start.store(val:data_start);
203
204 reader.seek(pos:io::SeekFrom::Start(data_start))?;
205 Ok((reader as &mut dyn Read).take(limit:data.compressed_size))
206}
207
208#[allow(clippy::too_many_arguments)]
209fn make_crypto_reader<'a>(
210 compression_method: crate::compression::CompressionMethod,
211 crc32: u32,
212 last_modified_time: DateTime,
213 using_data_descriptor: bool,
214 reader: io::Take<&'a mut dyn io::Read>,
215 password: Option<&[u8]>,
216 aes_info: Option<(AesMode, AesVendorVersion)>,
217 #[cfg(feature = "aes-crypto")] compressed_size: u64,
218) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>> {
219 #[allow(deprecated)]
220 {
221 if let CompressionMethod::Unsupported(_) = compression_method {
222 return unsupported_zip_error("Compression method not supported");
223 }
224 }
225
226 let reader = match (password, aes_info) {
227 #[cfg(not(feature = "aes-crypto"))]
228 (Some(_), Some(_)) => {
229 return Err(ZipError::UnsupportedArchive(
230 "AES encrypted files cannot be decrypted without the aes-crypto feature.",
231 ))
232 }
233 #[cfg(feature = "aes-crypto")]
234 (Some(password), Some((aes_mode, vendor_version))) => {
235 match AesReader::new(reader, aes_mode, compressed_size).validate(password)? {
236 None => return Ok(Err(InvalidPassword)),
237 Some(r) => CryptoReader::Aes {
238 reader: r,
239 vendor_version,
240 },
241 }
242 }
243 (Some(password), None) => {
244 let validator = if using_data_descriptor {
245 ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart())
246 } else {
247 ZipCryptoValidator::PkzipCrc32(crc32)
248 };
249 match ZipCryptoReader::new(reader, password).validate(validator)? {
250 None => return Ok(Err(InvalidPassword)),
251 Some(r) => CryptoReader::ZipCrypto(r),
252 }
253 }
254 (None, Some(_)) => return Ok(Err(InvalidPassword)),
255 (None, None) => CryptoReader::Plaintext(reader),
256 };
257 Ok(Ok(reader))
258}
259
260fn make_reader(
261 compression_method: CompressionMethod,
262 crc32: u32,
263 reader: CryptoReader,
264) -> ZipFileReader {
265 let ae2_encrypted = reader.is_ae2_encrypted();
266
267 match compression_method {
268 CompressionMethod::Stored => {
269 ZipFileReader::Stored(Crc32Reader::new(reader, crc32, ae2_encrypted))
270 }
271 #[cfg(any(
272 feature = "deflate",
273 feature = "deflate-miniz",
274 feature = "deflate-zlib"
275 ))]
276 CompressionMethod::Deflated => {
277 let deflate_reader = DeflateDecoder::new(reader);
278 ZipFileReader::Deflated(Crc32Reader::new(deflate_reader, crc32, ae2_encrypted))
279 }
280 #[cfg(feature = "bzip2")]
281 CompressionMethod::Bzip2 => {
282 let bzip2_reader = BzDecoder::new(reader);
283 ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32, ae2_encrypted))
284 }
285 #[cfg(feature = "zstd")]
286 CompressionMethod::Zstd => {
287 let zstd_reader = ZstdDecoder::new(reader).unwrap();
288 ZipFileReader::Zstd(Crc32Reader::new(zstd_reader, crc32, ae2_encrypted))
289 }
290 _ => panic!("Compression method not supported"),
291 }
292}
293
294impl<R: Read + io::Seek> ZipArchive<R> {
295 /// Get the directory start offset and number of files. This is done in a
296 /// separate function to ease the control flow design.
297 pub(crate) fn get_directory_counts(
298 reader: &mut R,
299 footer: &spec::CentralDirectoryEnd,
300 cde_start_pos: u64,
301 ) -> ZipResult<(u64, u64, usize)> {
302 // See if there's a ZIP64 footer. The ZIP64 locator if present will
303 // have its signature 20 bytes in front of the standard footer. The
304 // standard footer, in turn, is 22+N bytes large, where N is the
305 // comment length. Therefore:
306 let zip64locator = if reader
307 .seek(io::SeekFrom::End(
308 -(20 + 22 + footer.zip_file_comment.len() as i64),
309 ))
310 .is_ok()
311 {
312 match spec::Zip64CentralDirectoryEndLocator::parse(reader) {
313 Ok(loc) => Some(loc),
314 Err(ZipError::InvalidArchive(_)) => {
315 // No ZIP64 header; that's actually fine. We're done here.
316 None
317 }
318 Err(e) => {
319 // Yikes, a real problem
320 return Err(e);
321 }
322 }
323 } else {
324 // Empty Zip files will have nothing else so this error might be fine. If
325 // not, we'll find out soon.
326 None
327 };
328
329 match zip64locator {
330 None => {
331 // Some zip files have data prepended to them, resulting in the
332 // offsets all being too small. Get the amount of error by comparing
333 // the actual file position we found the CDE at with the offset
334 // recorded in the CDE.
335 let archive_offset = cde_start_pos
336 .checked_sub(footer.central_directory_size as u64)
337 .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
338 .ok_or(ZipError::InvalidArchive(
339 "Invalid central directory size or offset",
340 ))?;
341
342 let directory_start = footer.central_directory_offset as u64 + archive_offset;
343 let number_of_files = footer.number_of_files_on_this_disk as usize;
344 Ok((archive_offset, directory_start, number_of_files))
345 }
346 Some(locator64) => {
347 // If we got here, this is indeed a ZIP64 file.
348
349 if !footer.record_too_small()
350 && footer.disk_number as u32 != locator64.disk_with_central_directory
351 {
352 return unsupported_zip_error(
353 "Support for multi-disk files is not implemented",
354 );
355 }
356
357 // We need to reassess `archive_offset`. We know where the ZIP64
358 // central-directory-end structure *should* be, but unfortunately we
359 // don't know how to precisely relate that location to our current
360 // actual offset in the file, since there may be junk at its
361 // beginning. Therefore we need to perform another search, as in
362 // read::CentralDirectoryEnd::find_and_parse, except now we search
363 // forward.
364
365 let search_upper_bound = cde_start_pos
366 .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
367 .ok_or(ZipError::InvalidArchive(
368 "File cannot contain ZIP64 central directory end",
369 ))?;
370 let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
371 reader,
372 locator64.end_of_central_directory_offset,
373 search_upper_bound,
374 )?;
375
376 if footer.disk_number != footer.disk_with_central_directory {
377 return unsupported_zip_error(
378 "Support for multi-disk files is not implemented",
379 );
380 }
381
382 let directory_start = footer
383 .central_directory_offset
384 .checked_add(archive_offset)
385 .ok_or({
386 ZipError::InvalidArchive("Invalid central directory size or offset")
387 })?;
388
389 Ok((
390 archive_offset,
391 directory_start,
392 footer.number_of_files as usize,
393 ))
394 }
395 }
396 }
397
398 /// Read a ZIP archive, collecting the files it contains
399 ///
400 /// This uses the central directory record of the ZIP file, and ignores local file headers
401 pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
402 let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
403
404 if !footer.record_too_small() && footer.disk_number != footer.disk_with_central_directory {
405 return unsupported_zip_error("Support for multi-disk files is not implemented");
406 }
407
408 let (archive_offset, directory_start, number_of_files) =
409 Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?;
410
411 // If the parsed number of files is greater than the offset then
412 // something fishy is going on and we shouldn't trust number_of_files.
413 let file_capacity = if number_of_files > cde_start_pos as usize {
414 0
415 } else {
416 number_of_files
417 };
418
419 let mut files = Vec::with_capacity(file_capacity);
420 let mut names_map = HashMap::with_capacity(file_capacity);
421
422 if reader.seek(io::SeekFrom::Start(directory_start)).is_err() {
423 return Err(ZipError::InvalidArchive(
424 "Could not seek to start of central directory",
425 ));
426 }
427
428 for _ in 0..number_of_files {
429 let file = central_header_to_zip_file(&mut reader, archive_offset)?;
430 names_map.insert(file.file_name.clone(), files.len());
431 files.push(file);
432 }
433
434 let shared = Arc::new(zip_archive::Shared {
435 files,
436 names_map,
437 offset: archive_offset,
438 comment: footer.zip_file_comment,
439 });
440
441 Ok(ZipArchive { reader, shared })
442 }
443 /// Extract a Zip archive into a directory, overwriting files if they
444 /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
445 ///
446 /// Extraction is not atomic; If an error is encountered, some of the files
447 /// may be left on disk.
448 pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
449 use std::fs;
450
451 for i in 0..self.len() {
452 let mut file = self.by_index(i)?;
453 let filepath = file
454 .enclosed_name()
455 .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
456
457 let outpath = directory.as_ref().join(filepath);
458
459 if file.name().ends_with('/') {
460 fs::create_dir_all(&outpath)?;
461 } else {
462 if let Some(p) = outpath.parent() {
463 if !p.exists() {
464 fs::create_dir_all(p)?;
465 }
466 }
467 let mut outfile = fs::File::create(&outpath)?;
468 io::copy(&mut file, &mut outfile)?;
469 }
470 // Get and Set permissions
471 #[cfg(unix)]
472 {
473 use std::os::unix::fs::PermissionsExt;
474 if let Some(mode) = file.unix_mode() {
475 fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?;
476 }
477 }
478 }
479 Ok(())
480 }
481
482 /// Number of files contained in this zip.
483 pub fn len(&self) -> usize {
484 self.shared.files.len()
485 }
486
487 /// Whether this zip archive contains no files
488 pub fn is_empty(&self) -> bool {
489 self.len() == 0
490 }
491
492 /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
493 ///
494 /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
495 /// of that prepended data.
496 pub fn offset(&self) -> u64 {
497 self.shared.offset
498 }
499
500 /// Get the comment of the zip archive.
501 pub fn comment(&self) -> &[u8] {
502 &self.shared.comment
503 }
504
505 /// Returns an iterator over all the file and directory names in this archive.
506 pub fn file_names(&self) -> impl Iterator<Item = &str> {
507 self.shared.names_map.keys().map(|s| s.as_str())
508 }
509
510 /// Search for a file entry by name, decrypt with given password
511 ///
512 /// # Warning
513 ///
514 /// The implementation of the cryptographic algorithms has not
515 /// gone through a correctness review, and you should assume it is insecure:
516 /// passwords used with this API may be compromised.
517 ///
518 /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
519 /// to check for a 1/256 chance that the password is correct.
520 /// There are many passwords out there that will also pass the validity checks
521 /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
522 /// due to its fairly primitive approach to cryptography.
523 pub fn by_name_decrypt<'a>(
524 &'a mut self,
525 name: &str,
526 password: &[u8],
527 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
528 self.by_name_with_optional_password(name, Some(password))
529 }
530
531 /// Search for a file entry by name
532 pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>> {
533 Ok(self.by_name_with_optional_password(name, None)?.unwrap())
534 }
535
536 fn by_name_with_optional_password<'a>(
537 &'a mut self,
538 name: &str,
539 password: Option<&[u8]>,
540 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
541 let index = match self.shared.names_map.get(name) {
542 Some(index) => *index,
543 None => {
544 return Err(ZipError::FileNotFound);
545 }
546 };
547 self.by_index_with_optional_password(index, password)
548 }
549
550 /// Get a contained file by index, decrypt with given password
551 ///
552 /// # Warning
553 ///
554 /// The implementation of the cryptographic algorithms has not
555 /// gone through a correctness review, and you should assume it is insecure:
556 /// passwords used with this API may be compromised.
557 ///
558 /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
559 /// to check for a 1/256 chance that the password is correct.
560 /// There are many passwords out there that will also pass the validity checks
561 /// we are able to perform. This is a weakness of the ZipCrypto algorithm,
562 /// due to its fairly primitive approach to cryptography.
563 pub fn by_index_decrypt<'a>(
564 &'a mut self,
565 file_number: usize,
566 password: &[u8],
567 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
568 self.by_index_with_optional_password(file_number, Some(password))
569 }
570
571 /// Get a contained file by index
572 pub fn by_index(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
573 Ok(self
574 .by_index_with_optional_password(file_number, None)?
575 .unwrap())
576 }
577
578 /// Get a contained file by index without decompressing it
579 pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
580 let reader = &mut self.reader;
581 self.shared
582 .files
583 .get(file_number)
584 .ok_or(ZipError::FileNotFound)
585 .and_then(move |data| {
586 Ok(ZipFile {
587 crypto_reader: None,
588 reader: ZipFileReader::Raw(find_content(data, reader)?),
589 data: Cow::Borrowed(data),
590 })
591 })
592 }
593
594 fn by_index_with_optional_password<'a>(
595 &'a mut self,
596 file_number: usize,
597 mut password: Option<&[u8]>,
598 ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
599 let data = self
600 .shared
601 .files
602 .get(file_number)
603 .ok_or(ZipError::FileNotFound)?;
604
605 match (password, data.encrypted) {
606 (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
607 (Some(_), false) => password = None, //Password supplied, but none needed! Discard.
608 _ => {}
609 }
610 let limit_reader = find_content(data, &mut self.reader)?;
611
612 match make_crypto_reader(
613 data.compression_method,
614 data.crc32,
615 data.last_modified_time,
616 data.using_data_descriptor,
617 limit_reader,
618 password,
619 data.aes_mode,
620 #[cfg(feature = "aes-crypto")]
621 data.compressed_size,
622 ) {
623 Ok(Ok(crypto_reader)) => Ok(Ok(ZipFile {
624 crypto_reader: Some(crypto_reader),
625 reader: ZipFileReader::NoReader,
626 data: Cow::Borrowed(data),
627 })),
628 Err(e) => Err(e),
629 Ok(Err(e)) => Ok(Err(e)),
630 }
631 }
632
633 /// Unwrap and return the inner reader object
634 ///
635 /// The position of the reader is undefined.
636 pub fn into_inner(self) -> R {
637 self.reader
638 }
639}
640
641fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
642 Err(ZipError::UnsupportedArchive(detail))
643}
644
645/// Parse a central directory entry to collect the information for the file.
646pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>(
647 reader: &mut R,
648 archive_offset: u64,
649) -> ZipResult<ZipFileData> {
650 let central_header_start: u64 = reader.stream_position()?;
651
652 // Parse central header
653 let signature: u32 = reader.read_u32::<LittleEndian>()?;
654 if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
655 Err(ZipError::InvalidArchive("Invalid Central Directory header"))
656 } else {
657 central_header_to_zip_file_inner(reader, archive_offset, central_header_start)
658 }
659}
660
661/// Parse a central directory entry to collect the information for the file.
662fn central_header_to_zip_file_inner<R: Read>(
663 reader: &mut R,
664 archive_offset: u64,
665 central_header_start: u64,
666) -> ZipResult<ZipFileData> {
667 let version_made_by = reader.read_u16::<LittleEndian>()?;
668 let _version_to_extract = reader.read_u16::<LittleEndian>()?;
669 let flags = reader.read_u16::<LittleEndian>()?;
670 let encrypted = flags & 1 == 1;
671 let is_utf8 = flags & (1 << 11) != 0;
672 let using_data_descriptor = flags & (1 << 3) != 0;
673 let compression_method = reader.read_u16::<LittleEndian>()?;
674 let last_mod_time = reader.read_u16::<LittleEndian>()?;
675 let last_mod_date = reader.read_u16::<LittleEndian>()?;
676 let crc32 = reader.read_u32::<LittleEndian>()?;
677 let compressed_size = reader.read_u32::<LittleEndian>()?;
678 let uncompressed_size = reader.read_u32::<LittleEndian>()?;
679 let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
680 let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
681 let file_comment_length = reader.read_u16::<LittleEndian>()? as usize;
682 let _disk_number = reader.read_u16::<LittleEndian>()?;
683 let _internal_file_attributes = reader.read_u16::<LittleEndian>()?;
684 let external_file_attributes = reader.read_u32::<LittleEndian>()?;
685 let offset = reader.read_u32::<LittleEndian>()? as u64;
686 let mut file_name_raw = vec![0; file_name_length];
687 reader.read_exact(&mut file_name_raw)?;
688 let mut extra_field = vec![0; extra_field_length];
689 reader.read_exact(&mut extra_field)?;
690 let mut file_comment_raw = vec![0; file_comment_length];
691 reader.read_exact(&mut file_comment_raw)?;
692
693 let file_name = match is_utf8 {
694 true => String::from_utf8_lossy(&file_name_raw).into_owned(),
695 false => file_name_raw.clone().from_cp437(),
696 };
697 let file_comment = match is_utf8 {
698 true => String::from_utf8_lossy(&file_comment_raw).into_owned(),
699 false => file_comment_raw.from_cp437(),
700 };
701
702 // Construct the result
703 let mut result = ZipFileData {
704 system: System::from_u8((version_made_by >> 8) as u8),
705 version_made_by: version_made_by as u8,
706 encrypted,
707 using_data_descriptor,
708 compression_method: {
709 #[allow(deprecated)]
710 CompressionMethod::from_u16(compression_method)
711 },
712 compression_level: None,
713 last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
714 crc32,
715 compressed_size: compressed_size as u64,
716 uncompressed_size: uncompressed_size as u64,
717 file_name,
718 file_name_raw,
719 extra_field,
720 file_comment,
721 header_start: offset,
722 central_header_start,
723 data_start: AtomicU64::new(0),
724 external_attributes: external_file_attributes,
725 large_file: false,
726 aes_mode: None,
727 };
728
729 match parse_extra_field(&mut result) {
730 Ok(..) | Err(ZipError::Io(..)) => {}
731 Err(e) => return Err(e),
732 }
733
734 let aes_enabled = result.compression_method == CompressionMethod::AES;
735 if aes_enabled && result.aes_mode.is_none() {
736 return Err(ZipError::InvalidArchive(
737 "AES encryption without AES extra data field",
738 ));
739 }
740
741 // Account for shifted zip offsets.
742 result.header_start = result
743 .header_start
744 .checked_add(archive_offset)
745 .ok_or(ZipError::InvalidArchive("Archive header is too large"))?;
746
747 Ok(result)
748}
749
750fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
751 let mut reader = io::Cursor::new(&file.extra_field);
752
753 while (reader.position() as usize) < file.extra_field.len() {
754 let kind = reader.read_u16::<LittleEndian>()?;
755 let len = reader.read_u16::<LittleEndian>()?;
756 let mut len_left = len as i64;
757 match kind {
758 // Zip64 extended information extra field
759 0x0001 => {
760 if file.uncompressed_size == spec::ZIP64_BYTES_THR {
761 file.large_file = true;
762 file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
763 len_left -= 8;
764 }
765 if file.compressed_size == spec::ZIP64_BYTES_THR {
766 file.large_file = true;
767 file.compressed_size = reader.read_u64::<LittleEndian>()?;
768 len_left -= 8;
769 }
770 if file.header_start == spec::ZIP64_BYTES_THR {
771 file.header_start = reader.read_u64::<LittleEndian>()?;
772 len_left -= 8;
773 }
774 }
775 0x9901 => {
776 // AES
777 if len != 7 {
778 return Err(ZipError::UnsupportedArchive(
779 "AES extra data field has an unsupported length",
780 ));
781 }
782 let vendor_version = reader.read_u16::<LittleEndian>()?;
783 let vendor_id = reader.read_u16::<LittleEndian>()?;
784 let aes_mode = reader.read_u8()?;
785 let compression_method = reader.read_u16::<LittleEndian>()?;
786
787 if vendor_id != 0x4541 {
788 return Err(ZipError::InvalidArchive("Invalid AES vendor"));
789 }
790 let vendor_version = match vendor_version {
791 0x0001 => AesVendorVersion::Ae1,
792 0x0002 => AesVendorVersion::Ae2,
793 _ => return Err(ZipError::InvalidArchive("Invalid AES vendor version")),
794 };
795 match aes_mode {
796 0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version)),
797 0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version)),
798 0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version)),
799 _ => return Err(ZipError::InvalidArchive("Invalid AES encryption strength")),
800 };
801 file.compression_method = {
802 #[allow(deprecated)]
803 CompressionMethod::from_u16(compression_method)
804 };
805 }
806 _ => {
807 // Other fields are ignored
808 }
809 }
810
811 // We could also check for < 0 to check for errors
812 if len_left > 0 {
813 reader.seek(io::SeekFrom::Current(len_left))?;
814 }
815 }
816 Ok(())
817}
818
819/// Methods for retrieving information on zip files
820impl<'a> ZipFile<'a> {
821 fn get_reader(&mut self) -> &mut ZipFileReader<'a> {
822 if let ZipFileReader::NoReader = self.reader {
823 let data = &self.data;
824 let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
825 self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)
826 }
827 &mut self.reader
828 }
829
830 pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read {
831 if let ZipFileReader::NoReader = self.reader {
832 let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
833 self.reader = ZipFileReader::Raw(crypto_reader.into_inner())
834 }
835 &mut self.reader
836 }
837
838 /// Get the version of the file
839 pub fn version_made_by(&self) -> (u8, u8) {
840 (
841 self.data.version_made_by / 10,
842 self.data.version_made_by % 10,
843 )
844 }
845
846 /// Get the name of the file
847 ///
848 /// # Warnings
849 ///
850 /// It is dangerous to use this name directly when extracting an archive.
851 /// It may contain an absolute path (`/etc/shadow`), or break out of the
852 /// current directory (`../runtime`). Carelessly writing to these paths
853 /// allows an attacker to craft a ZIP archive that will overwrite critical
854 /// files.
855 ///
856 /// You can use the [`ZipFile::enclosed_name`] method to validate the name
857 /// as a safe path.
858 pub fn name(&self) -> &str {
859 &self.data.file_name
860 }
861
862 /// Get the name of the file, in the raw (internal) byte representation.
863 ///
864 /// The encoding of this data is currently undefined.
865 pub fn name_raw(&self) -> &[u8] {
866 &self.data.file_name_raw
867 }
868
869 /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
870 /// removes a leading '/' and removes '..' parts.
871 #[deprecated(
872 since = "0.5.7",
873 note = "by stripping `..`s from the path, the meaning of paths can change.
874 `mangled_name` can be used if this behaviour is desirable"
875 )]
876 pub fn sanitized_name(&self) -> ::std::path::PathBuf {
877 self.mangled_name()
878 }
879
880 /// Rewrite the path, ignoring any path components with special meaning.
881 ///
882 /// - Absolute paths are made relative
883 /// - [`ParentDir`]s are ignored
884 /// - Truncates the filename at a NULL byte
885 ///
886 /// This is appropriate if you need to be able to extract *something* from
887 /// any archive, but will easily misrepresent trivial paths like
888 /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
889 /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
890 ///
891 /// [`ParentDir`]: `Component::ParentDir`
892 pub fn mangled_name(&self) -> ::std::path::PathBuf {
893 self.data.file_name_sanitized()
894 }
895
896 /// Ensure the file path is safe to use as a [`Path`].
897 ///
898 /// - It can't contain NULL bytes
899 /// - It can't resolve to a path outside the current directory
900 /// > `foo/../bar` is fine, `foo/../../bar` is not.
901 /// - It can't be an absolute path
902 ///
903 /// This will read well-formed ZIP files correctly, and is resistant
904 /// to path-based exploits. It is recommended over
905 /// [`ZipFile::mangled_name`].
906 pub fn enclosed_name(&self) -> Option<&Path> {
907 self.data.enclosed_name()
908 }
909
910 /// Get the comment of the file
911 pub fn comment(&self) -> &str {
912 &self.data.file_comment
913 }
914
915 /// Get the compression method used to store the file
916 pub fn compression(&self) -> CompressionMethod {
917 self.data.compression_method
918 }
919
920 /// Get the size of the file, in bytes, in the archive
921 pub fn compressed_size(&self) -> u64 {
922 self.data.compressed_size
923 }
924
925 /// Get the size of the file, in bytes, when uncompressed
926 pub fn size(&self) -> u64 {
927 self.data.uncompressed_size
928 }
929
930 /// Get the time the file was last modified
931 pub fn last_modified(&self) -> DateTime {
932 self.data.last_modified_time
933 }
934 /// Returns whether the file is actually a directory
935 pub fn is_dir(&self) -> bool {
936 self.name()
937 .chars()
938 .rev()
939 .next()
940 .map_or(false, |c| c == '/' || c == '\\')
941 }
942
943 /// Returns whether the file is a regular file
944 pub fn is_file(&self) -> bool {
945 !self.is_dir()
946 }
947
948 /// Get unix mode for the file
949 pub fn unix_mode(&self) -> Option<u32> {
950 self.data.unix_mode()
951 }
952
953 /// Get the CRC32 hash of the original file
954 pub fn crc32(&self) -> u32 {
955 self.data.crc32
956 }
957
958 /// Get the extra data of the zip header for this file
959 pub fn extra_data(&self) -> &[u8] {
960 &self.data.extra_field
961 }
962
963 /// Get the starting offset of the data of the compressed file
964 pub fn data_start(&self) -> u64 {
965 self.data.data_start.load()
966 }
967
968 /// Get the starting offset of the zip header for this file
969 pub fn header_start(&self) -> u64 {
970 self.data.header_start
971 }
972 /// Get the starting offset of the zip header in the central directory for this file
973 pub fn central_header_start(&self) -> u64 {
974 self.data.central_header_start
975 }
976}
977
978impl<'a> Read for ZipFile<'a> {
979 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
980 self.get_reader().read(buf)
981 }
982}
983
984impl<'a> Drop for ZipFile<'a> {
985 fn drop(&mut self) {
986 // self.data is Owned, this reader is constructed by a streaming reader.
987 // In this case, we want to exhaust the reader so that the next file is accessible.
988 if let Cow::Owned(_) = self.data {
989 let mut buffer = [0; 1 << 16];
990
991 // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
992 let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader {
993 ZipFileReader::NoReader => {
994 let innerreader = ::std::mem::replace(&mut self.crypto_reader, None);
995 innerreader.expect("Invalid reader state").into_inner()
996 }
997 reader => {
998 let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader);
999 innerreader.into_inner()
1000 }
1001 };
1002
1003 loop {
1004 match reader.read(&mut buffer) {
1005 Ok(0) => break,
1006 Ok(_) => (),
1007 Err(e) => {
1008 panic!("Could not consume all of the output of the current ZipFile: {e:?}")
1009 }
1010 }
1011 }
1012 }
1013 }
1014}
1015
1016/// Read ZipFile structures from a non-seekable reader.
1017///
1018/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
1019/// as some information will be missing when reading this manner.
1020///
1021/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
1022/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
1023/// is encountered. No more files should be read after this.
1024///
1025/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
1026/// the structure is done.
1027///
1028/// Missing fields are:
1029/// * `comment`: set to an empty string
1030/// * `data_start`: set to 0
1031/// * `external_attributes`: `unix_mode()`: will return None
1032pub fn read_zipfile_from_stream<'a, R: io::Read>(
1033 reader: &'a mut R,
1034) -> ZipResult<Option<ZipFile<'_>>> {
1035 let signature = reader.read_u32::<LittleEndian>()?;
1036
1037 match signature {
1038 spec::LOCAL_FILE_HEADER_SIGNATURE => (),
1039 spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
1040 _ => return Err(ZipError::InvalidArchive("Invalid local file header")),
1041 }
1042
1043 let version_made_by = reader.read_u16::<LittleEndian>()?;
1044 let flags = reader.read_u16::<LittleEndian>()?;
1045 let encrypted = flags & 1 == 1;
1046 let is_utf8 = flags & (1 << 11) != 0;
1047 let using_data_descriptor = flags & (1 << 3) != 0;
1048 #[allow(deprecated)]
1049 let compression_method = CompressionMethod::from_u16(reader.read_u16::<LittleEndian>()?);
1050 let last_mod_time = reader.read_u16::<LittleEndian>()?;
1051 let last_mod_date = reader.read_u16::<LittleEndian>()?;
1052 let crc32 = reader.read_u32::<LittleEndian>()?;
1053 let compressed_size = reader.read_u32::<LittleEndian>()?;
1054 let uncompressed_size = reader.read_u32::<LittleEndian>()?;
1055 let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
1056 let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
1057
1058 let mut file_name_raw = vec![0; file_name_length];
1059 reader.read_exact(&mut file_name_raw)?;
1060 let mut extra_field = vec![0; extra_field_length];
1061 reader.read_exact(&mut extra_field)?;
1062
1063 let file_name = match is_utf8 {
1064 true => String::from_utf8_lossy(&file_name_raw).into_owned(),
1065 false => file_name_raw.clone().from_cp437(),
1066 };
1067
1068 let mut result = ZipFileData {
1069 system: System::from_u8((version_made_by >> 8) as u8),
1070 version_made_by: version_made_by as u8,
1071 encrypted,
1072 using_data_descriptor,
1073 compression_method,
1074 compression_level: None,
1075 last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
1076 crc32,
1077 compressed_size: compressed_size as u64,
1078 uncompressed_size: uncompressed_size as u64,
1079 file_name,
1080 file_name_raw,
1081 extra_field,
1082 file_comment: String::new(), // file comment is only available in the central directory
1083 // header_start and data start are not available, but also don't matter, since seeking is
1084 // not available.
1085 header_start: 0,
1086 data_start: AtomicU64::new(0),
1087 central_header_start: 0,
1088 // The external_attributes field is only available in the central directory.
1089 // We set this to zero, which should be valid as the docs state 'If input came
1090 // from standard input, this field is set to zero.'
1091 external_attributes: 0,
1092 large_file: false,
1093 aes_mode: None,
1094 };
1095
1096 match parse_extra_field(&mut result) {
1097 Ok(..) | Err(ZipError::Io(..)) => {}
1098 Err(e) => return Err(e),
1099 }
1100
1101 if encrypted {
1102 return unsupported_zip_error("Encrypted files are not supported");
1103 }
1104 if using_data_descriptor {
1105 return unsupported_zip_error("The file length is not available in the local header");
1106 }
1107
1108 let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size);
1109
1110 let result_crc32 = result.crc32;
1111 let result_compression_method = result.compression_method;
1112 let crypto_reader = make_crypto_reader(
1113 result_compression_method,
1114 result_crc32,
1115 result.last_modified_time,
1116 result.using_data_descriptor,
1117 limit_reader,
1118 None,
1119 None,
1120 #[cfg(feature = "aes-crypto")]
1121 result.compressed_size,
1122 )?
1123 .unwrap();
1124
1125 Ok(Some(ZipFile {
1126 data: Cow::Owned(result),
1127 crypto_reader: None,
1128 reader: make_reader(result_compression_method, result_crc32, crypto_reader),
1129 }))
1130}
1131
1132#[cfg(test)]
1133mod test {
1134 #[test]
1135 fn invalid_offset() {
1136 use super::ZipArchive;
1137 use std::io;
1138
1139 let mut v = Vec::new();
1140 v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
1141 let reader = ZipArchive::new(io::Cursor::new(v));
1142 assert!(reader.is_err());
1143 }
1144
1145 #[test]
1146 fn invalid_offset2() {
1147 use super::ZipArchive;
1148 use std::io;
1149
1150 let mut v = Vec::new();
1151 v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip"));
1152 let reader = ZipArchive::new(io::Cursor::new(v));
1153 assert!(reader.is_err());
1154 }
1155
1156 #[test]
1157 fn zip64_with_leading_junk() {
1158 use super::ZipArchive;
1159 use std::io;
1160
1161 let mut v = Vec::new();
1162 v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
1163 let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1164 assert_eq!(reader.len(), 1);
1165 }
1166
1167 #[test]
1168 fn zip_contents() {
1169 use super::ZipArchive;
1170 use std::io;
1171
1172 let mut v = Vec::new();
1173 v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1174 let mut reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1175 assert_eq!(reader.comment(), b"");
1176 assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77);
1177 }
1178
1179 #[test]
1180 fn zip_read_streaming() {
1181 use super::read_zipfile_from_stream;
1182 use std::io;
1183
1184 let mut v = Vec::new();
1185 v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1186 let mut reader = io::Cursor::new(v);
1187 loop {
1188 if read_zipfile_from_stream(&mut reader).unwrap().is_none() {
1189 break;
1190 }
1191 }
1192 }
1193
1194 #[test]
1195 fn zip_clone() {
1196 use super::ZipArchive;
1197 use std::io::{self, Read};
1198
1199 let mut v = Vec::new();
1200 v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1201 let mut reader1 = ZipArchive::new(io::Cursor::new(v)).unwrap();
1202 let mut reader2 = reader1.clone();
1203
1204 let mut file1 = reader1.by_index(0).unwrap();
1205 let mut file2 = reader2.by_index(0).unwrap();
1206
1207 let t = file1.last_modified();
1208 assert_eq!(
1209 (
1210 t.year(),
1211 t.month(),
1212 t.day(),
1213 t.hour(),
1214 t.minute(),
1215 t.second()
1216 ),
1217 (1980, 1, 1, 0, 0, 0)
1218 );
1219
1220 let mut buf1 = [0; 5];
1221 let mut buf2 = [0; 5];
1222 let mut buf3 = [0; 5];
1223 let mut buf4 = [0; 5];
1224
1225 file1.read_exact(&mut buf1).unwrap();
1226 file2.read_exact(&mut buf2).unwrap();
1227 file1.read_exact(&mut buf3).unwrap();
1228 file2.read_exact(&mut buf4).unwrap();
1229
1230 assert_eq!(buf1, buf2);
1231 assert_eq!(buf3, buf4);
1232 assert_ne!(buf1, buf3);
1233 }
1234
1235 #[test]
1236 fn file_and_dir_predicates() {
1237 use super::ZipArchive;
1238 use std::io;
1239
1240 let mut v = Vec::new();
1241 v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip"));
1242 let mut zip = ZipArchive::new(io::Cursor::new(v)).unwrap();
1243
1244 for i in 0..zip.len() {
1245 let zip_file = zip.by_index(i).unwrap();
1246 let full_name = zip_file.enclosed_name().unwrap();
1247 let file_name = full_name.file_name().unwrap().to_str().unwrap();
1248 assert!(
1249 (file_name.starts_with("dir") && zip_file.is_dir())
1250 || (file_name.starts_with("file") && zip_file.is_file())
1251 );
1252 }
1253 }
1254
1255 /// test case to ensure we don't preemptively over allocate based on the
1256 /// declared number of files in the CDE of an invalid zip when the number of
1257 /// files declared is more than the alleged offset in the CDE
1258 #[test]
1259 fn invalid_cde_number_of_files_allocation_smaller_offset() {
1260 use super::ZipArchive;
1261 use std::io;
1262
1263 let mut v = Vec::new();
1264 v.extend_from_slice(include_bytes!(
1265 "../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
1266 ));
1267 let reader = ZipArchive::new(io::Cursor::new(v));
1268 assert!(reader.is_err());
1269 }
1270
1271 /// test case to ensure we don't preemptively over allocate based on the
1272 /// declared number of files in the CDE of an invalid zip when the number of
1273 /// files declared is less than the alleged offset in the CDE
1274 #[test]
1275 fn invalid_cde_number_of_files_allocation_greater_offset() {
1276 use super::ZipArchive;
1277 use std::io;
1278
1279 let mut v = Vec::new();
1280 v.extend_from_slice(include_bytes!(
1281 "../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
1282 ));
1283 let reader = ZipArchive::new(io::Cursor::new(v));
1284 assert!(reader.is_err());
1285 }
1286}
1287