read.rs source code [crates/zip/src/read.rs]

1	//! Types for reading ZIP archives
2
3	#[cfg(feature = "aes-crypto")]
4	use crate::aes::{AesReader, AesReaderValid};
5	use crate::compression::CompressionMethod;
6	use crate::cp437::FromCp437;
7	use crate::crc32::Crc32Reader;
8	use crate::result::{InvalidPassword, ZipError, ZipResult};
9	use crate::spec;
10	use crate::types::{AesMode, AesVendorVersion, AtomicU64, DateTime, System, ZipFileData};
11	use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
12	use byteorder::{LittleEndian, ReadBytesExt};
13	use std::borrow::Cow;
14	use std::collections::HashMap;
15	use std::io::{self, prelude::*};
16	use std::path::Path;
17	use std::sync::Arc;
18
19	#[cfg(any(
20	feature = "deflate",
21	feature = "deflate-miniz",
22	feature = "deflate-zlib"
23	))]
24	use flate2::read::DeflateDecoder;
25
26	#[cfg(feature = "bzip2")]
27	use bzip2::read::BzDecoder;
28
29	#[cfg(feature = "zstd")]
30	use zstd::stream::read::Decoder as ZstdDecoder;
31
32	/// Provides high level API for reading from a stream.
33	pub(crate) mod stream;
34
35	// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
36	pub(crate) mod zip_archive {
37	/// Extract immutable data from `ZipArchive` to make it cheap to clone
38	#[derive(Debug)]
39	pub(crate) struct Shared {
40	pub(super) files: Vec<super::ZipFileData>,
41	pub(super) names_map: super::HashMap<String, usize>,
42	pub(super) offset: u64,
43	pub(super) comment: Vec<u8>,
44	}
45
46	/// ZIP archive reader
47	///
48	/// At the moment, this type is cheap to clone if this is the case for the
49	/// reader it uses. However, this is not guaranteed by this crate and it may
50	/// change in the future.
51	///
52	/// ```no_run
53	/// use std::io::prelude::*;
54	/// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
55	/// let mut zip = zip::ZipArchive::new(reader)?;
56	///
57	/// for i in `0`..zip.len() {
58	/// let mut file = zip.by_index(i)?;
59	/// println!("Filename: {}", file.name());
60	/// std::io::copy(&mut file, &mut std::io::stdout());
61	/// }
62	///
63	/// Ok(())
64	/// }
65	/// ```
66	#[derive(Clone, Debug)]
67	pub struct ZipArchive<R> {
68	pub(super) reader: R,
69	pub(super) shared: super::Arc<Shared>,
70	}
71	}
72
73	pub use zip_archive::ZipArchive;
74	#[allow(clippy::large_enum_variant)]
75	enum CryptoReader<'a> {
76	Plaintext(io::Take<&'a mut dyn Read>),
77	ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut dyn Read>>),
78	#[cfg(feature = "aes-crypto")]
79	Aes {
80	reader: AesReaderValid<io::Take<&'a mut dyn Read>>,
81	vendor_version: AesVendorVersion,
82	},
83	}
84
85	impl<'a> Read for CryptoReader<'a> {
86	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
87	match self {
88	CryptoReader::Plaintext(r: &mut Take<&mut dyn Read>) => r.read(buf),
89	CryptoReader::ZipCrypto(r: &mut ZipCryptoReaderValid<…>) => r.read(buf),
90	#[cfg(feature = "aes-crypto")]
91	CryptoReader::Aes { reader: r, .. } => r.read(buf),
92	}
93	}
94	}
95
96	impl<'a> CryptoReader<'a> {
97	/// Consumes this decoder, returning the underlying reader.
98	pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
99	match self {
100	CryptoReader::Plaintext(r) => r,
101	CryptoReader::ZipCrypto(r) => r.into_inner(),
102	#[cfg(feature = "aes-crypto")]
103	CryptoReader::Aes { reader: r, .. } => r.into_inner(),
104	}
105	}
106
107	/// Returns `true` if the data is encrypted using AE2.
108	pub fn is_ae2_encrypted(&self) -> bool {
109	#[cfg(feature = "aes-crypto")]
110	return matches!(
111	self,
112	CryptoReader::Aes {
113	vendor_version: AesVendorVersion::Ae2,
114	..
115	}
116	);
117	#[cfg(not(feature = "aes-crypto"))]
118	`false`
119	}
120	}
121
122	enum ZipFileReader<'a> {
123	NoReader,
124	Raw(io::Take<&'a mut dyn io::Read>),
125	Stored(Crc32Reader<CryptoReader<'a>>),
126	#[cfg(any(
127	feature = "deflate",
128	feature = "deflate-miniz",
129	feature = "deflate-zlib"
130	))]
131	Deflated(Crc32Reader<flate2::read::DeflateDecoder<CryptoReader<'a>>>),
132	#[cfg(feature = "bzip2")]
133	Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>),
134	#[cfg(feature = "zstd")]
135	Zstd(Crc32Reader<ZstdDecoder<'a, io::BufReader<CryptoReader<'a>>>>),
136	}
137
138	impl<'a> Read for ZipFileReader<'a> {
139	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
140	match self {
141	ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
142	ZipFileReader::Raw(r: &mut Take<&mut dyn Read>) => r.read(buf),
143	ZipFileReader::Stored(r: &mut Crc32Reader>) => r.read(buf),
144	#[cfg(any(
145	feature = "deflate",
146	feature = "deflate-miniz",
147	feature = "deflate-zlib"
148	))]
149	ZipFileReader::Deflated(r: &mut Crc32Reader>) => r.read(buf),
150	#[cfg(feature = "bzip2")]
151	ZipFileReader::Bzip2(r) => r.read(buf),
152	#[cfg(feature = "zstd")]
153	ZipFileReader::Zstd(r) => r.read(buf),
154	}
155	}
156	}
157
158	impl<'a> ZipFileReader<'a> {
159	/// Consumes this decoder, returning the underlying reader.
160	pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
161	match self {
162	ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
163	ZipFileReader::Raw(r: Take<&mut dyn Read>) => r,
164	ZipFileReader::Stored(r: Crc32Reader>) => r.into_inner().into_inner(),
165	#[cfg(any(
166	feature = "deflate",
167	feature = "deflate-miniz",
168	feature = "deflate-zlib"
169	))]
170	ZipFileReader::Deflated(r: Crc32Reader>) => r.into_inner().into_inner().into_inner(),
171	#[cfg(feature = "bzip2")]
172	ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(),
173	#[cfg(feature = "zstd")]
174	ZipFileReader::Zstd(r) => r.into_inner().finish().into_inner().into_inner(),
175	}
176	}
177	}
178
179	/// A struct for reading a zip file
180	pub struct ZipFile<'a> {
181	data: Cow<'a, ZipFileData>,
182	crypto_reader: Option<CryptoReader<'a>>,
183	reader: ZipFileReader<'a>,
184	}
185
186	fn find_content<'a>(
187	data: &ZipFileData,
188	reader: &'a mut (impl Read + Seek),
189	) -> ZipResult<io::Take<&'a mut dyn Read>> {
190	// Parse local header
191	reader.seek(pos:io::SeekFrom::Start(data.header_start))?;
192	let signature: u32 = reader.read_u32::<LittleEndian>()?;
193	if signature != spec::LOCAL_FILE_HEADER_SIGNATURE {
194	return Err(ZipError::InvalidArchive("Invalid local file header"));
195	}
196
197	reader.seek(pos:io::SeekFrom::Current(`22`))?;
198	let file_name_length: u64 = reader.read_u16::<LittleEndian>()? as u64;
199	let extra_field_length: u64 = reader.read_u16::<LittleEndian>()? as u64;
200	let magic_and_header: u64 = `4` + `22` + `2` + `2`;
201	let data_start: u64 = data.header_start + magic_and_header + file_name_length + extra_field_length;
202	data.data_start.store(val:data_start);
203
204	reader.seek(pos:io::SeekFrom::Start(data_start))?;
205	Ok((reader as &mut dyn Read).take(limit:data.compressed_size))
206	}
207
208	#[allow(clippy::too_many_arguments)]
209	fn make_crypto_reader<'a>(
210	compression_method: crate::compression::CompressionMethod,
211	crc32: u32,
212	last_modified_time: DateTime,
213	using_data_descriptor: bool,
214	reader: io::Take<&'a mut dyn io::Read>,
215	password: Option<&[u8]>,
216	aes_info: Option<(AesMode, AesVendorVersion)>,
217	#[cfg(feature = "aes-crypto")] compressed_size: u64,
218	) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>> {
219	#[allow(deprecated)]
220	{
221	if let CompressionMethod::Unsupported(_) = compression_method {
222	return unsupported_zip_error("Compression method not supported");
223	}
224	}
225
226	let reader = match (password, aes_info) {
227	#[cfg(not(feature = "aes-crypto"))]
228	(Some(_), Some(_)) => {
229	return Err(ZipError::UnsupportedArchive(
230	"AES encrypted files cannot be decrypted without the aes-crypto feature.",
231	))
232	}
233	#[cfg(feature = "aes-crypto")]
234	(Some(password), Some((aes_mode, vendor_version))) => {
235	match AesReader::new(reader, aes_mode, compressed_size).validate(password)? {
236	None => return Ok(Err(InvalidPassword)),
237	Some(r) => CryptoReader::Aes {
238	reader: r,
239	vendor_version,
240	},
241	}
242	}
243	(Some(password), None) => {
244	let validator = if using_data_descriptor {
245	ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart())
246	} else {
247	ZipCryptoValidator::PkzipCrc32(crc32)
248	};
249	match ZipCryptoReader::new(reader, password).validate(validator)? {
250	None => return Ok(Err(InvalidPassword)),
251	Some(r) => CryptoReader::ZipCrypto(r),
252	}
253	}
254	(None, Some(_)) => return Ok(Err(InvalidPassword)),
255	(None, None) => CryptoReader::Plaintext(reader),
256	};
257	Ok(Ok(reader))
258	}
259
260	fn make_reader(
261	compression_method: CompressionMethod,
262	crc32: u32,
263	reader: CryptoReader,
264	) -> ZipFileReader {
265	let ae2_encrypted = reader.is_ae2_encrypted();
266
267	match compression_method {
268	CompressionMethod::Stored => {
269	ZipFileReader::Stored(Crc32Reader::new(reader, crc32, ae2_encrypted))
270	}
271	#[cfg(any(
272	feature = "deflate",
273	feature = "deflate-miniz",
274	feature = "deflate-zlib"
275	))]
276	CompressionMethod::Deflated => {
277	let deflate_reader = DeflateDecoder::new(reader);
278	ZipFileReader::Deflated(Crc32Reader::new(deflate_reader, crc32, ae2_encrypted))
279	}
280	#[cfg(feature = "bzip2")]
281	CompressionMethod::Bzip2 => {
282	let bzip2_reader = BzDecoder::new(reader);
283	ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32, ae2_encrypted))
284	}
285	#[cfg(feature = "zstd")]
286	CompressionMethod::Zstd => {
287	let zstd_reader = ZstdDecoder::new(reader).unwrap();
288	ZipFileReader::Zstd(Crc32Reader::new(zstd_reader, crc32, ae2_encrypted))
289	}
290	_ => panic!("Compression method not supported"),
291	}
292	}
293
294	impl<R: Read + io::Seek> ZipArchive<R> {
295	/// Get the directory start offset and number of files. This is done in a
296	/// separate function to ease the control flow design.
297	pub(crate) fn get_directory_counts(
298	reader: &mut R,
299	footer: &spec::CentralDirectoryEnd,
300	cde_start_pos: u64,
301	) -> ZipResult<(u64, u64, usize)> {
302	// See if there's a ZIP64 footer. The ZIP64 locator if present will
303	// have its signature 20 bytes in front of the standard footer. The
304	// standard footer, in turn, is 22+N bytes large, where N is the
305	// comment length. Therefore:
306	let zip64locator = if reader
307	.seek(io::SeekFrom::End(
308	-(`20` + `22` + footer.zip_file_comment.len() as i64),
309	))
310	.is_ok()
311	{
312	match spec::Zip64CentralDirectoryEndLocator::parse(reader) {
313	Ok(loc) => Some(loc),
314	Err(ZipError::InvalidArchive(_)) => {
315	// No ZIP64 header; that's actually fine. We're done here.
316	None
317	}
318	Err(e) => {
319	// Yikes, a real problem
320	return Err(e);
321	}
322	}
323	} else {
324	// Empty Zip files will have nothing else so this error might be fine. If
325	// not, we'll find out soon.
326	None
327	};
328
329	match zip64locator {
330	None => {
331	// Some zip files have data prepended to them, resulting in the
332	// offsets all being too small. Get the amount of error by comparing
333	// the actual file position we found the CDE at with the offset
334	// recorded in the CDE.
335	let archive_offset = cde_start_pos
336	.checked_sub(footer.central_directory_size as u64)
337	.and_then(\|x\| x.checked_sub(footer.central_directory_offset as u64))
338	.ok_or(ZipError::InvalidArchive(
339	"Invalid central directory size or offset",
340	))?;
341
342	let directory_start = footer.central_directory_offset as u64 + archive_offset;
343	let number_of_files = footer.number_of_files_on_this_disk as usize;
344	Ok((archive_offset, directory_start, number_of_files))
345	}
346	Some(locator64) => {
347	// If we got here, this is indeed a ZIP64 file.
348
349	if !footer.record_too_small()
350	&& footer.disk_number as u32 != locator64.disk_with_central_directory
351	{
352	return unsupported_zip_error(
353	"Support for multi-disk files is not implemented",
354	);
355	}
356
357	// We need to reassess `archive_offset`. We know where the ZIP64
358	// central-directory-end structure should* be, but unfortunately we*
359	// don't know how to precisely relate that location to our current
360	// actual offset in the file, since there may be junk at its
361	// beginning. Therefore we need to perform another search, as in
362	// read::CentralDirectoryEnd::find_and_parse, except now we search
363	// forward.
364
365	let search_upper_bound = cde_start_pos
366	.checked_sub(`60`) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
367	.ok_or(ZipError::InvalidArchive(
368	"File cannot contain ZIP64 central directory end",
369	))?;
370	let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
371	reader,
372	locator64.end_of_central_directory_offset,
373	search_upper_bound,
374	)?;
375
376	if footer.disk_number != footer.disk_with_central_directory {
377	return unsupported_zip_error(
378	"Support for multi-disk files is not implemented",
379	);
380	}
381
382	let directory_start = footer
383	.central_directory_offset
384	.checked_add(archive_offset)
385	.ok_or({
386	ZipError::InvalidArchive("Invalid central directory size or offset")
387	})?;
388
389	Ok((
390	archive_offset,
391	directory_start,
392	footer.number_of_files as usize,
393	))
394	}
395	}
396	}
397
398	/// Read a ZIP archive, collecting the files it contains
399	///
400	/// This uses the central directory record of the ZIP file, and ignores local file headers
401	pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
402	let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
403
404	if !footer.record_too_small() && footer.disk_number != footer.disk_with_central_directory {
405	return unsupported_zip_error("Support for multi-disk files is not implemented");
406	}
407
408	let (archive_offset, directory_start, number_of_files) =
409	Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?;
410
411	// If the parsed number of files is greater than the offset then
412	// something fishy is going on and we shouldn't trust number_of_files.
413	let file_capacity = if number_of_files > cde_start_pos as usize {
414	`0`
415	} else {
416	number_of_files
417	};
418
419	let mut files = Vec::with_capacity(file_capacity);
420	let mut names_map = HashMap::with_capacity(file_capacity);
421
422	if reader.seek(io::SeekFrom::Start(directory_start)).is_err() {
423	return Err(ZipError::InvalidArchive(
424	"Could not seek to start of central directory",
425	));
426	}
427
428	for _ in `0`..number_of_files {
429	let file = central_header_to_zip_file(&mut reader, archive_offset)?;
430	names_map.insert(file.file_name.clone(), files.len());
431	files.push(file);
432	}
433
434	let shared = Arc::new(zip_archive::Shared {
435	files,
436	names_map,
437	offset: archive_offset,
438	comment: footer.zip_file_comment,
439	});
440
441	Ok(ZipArchive { reader, shared })
442	}
443	/// Extract a Zip archive into a directory, overwriting files if they
444	/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
445	///
446	/// Extraction is not atomic; If an error is encountered, some of the files
447	/// may be left on disk.
448	pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> {
449	use std::fs;
450
451	for i in `0`..self.len() {
452	let mut file = self.by_index(i)?;
453	let filepath = file
454	.enclosed_name()
455	.ok_or(ZipError::InvalidArchive("Invalid file path"))?;
456
457	let outpath = directory.as_ref().join(filepath);
458
459	if file.name().ends_with('/') {
460	fs::create_dir_all(&outpath)?;
461	} else {
462	if let Some(p) = outpath.parent() {
463	if !p.exists() {
464	fs::create_dir_all(p)?;
465	}
466	}
467	let mut outfile = fs::File::create(&outpath)?;
468	io::copy(&mut file, &mut outfile)?;
469	}
470	// Get and Set permissions
471	#[cfg(unix)]
472	{
473	use std::os::unix::fs::PermissionsExt;
474	if let Some(mode) = file.unix_mode() {
475	fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?;
476	}
477	}
478	}
479	Ok(())
480	}
481
482	/// Number of files contained in this zip.
483	pub fn len(&self) -> usize {
484	self.shared.files.len()
485	}
486
487	/// Whether this zip archive contains no files
488	pub fn is_empty(&self) -> bool {
489	self.len() == `0`
490	}
491
492	/// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes.
493	///
494	/// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
495	/// of that prepended data.
496	pub fn offset(&self) -> u64 {
497	self.shared.offset
498	}
499
500	/// Get the comment of the zip archive.
501	pub fn comment(&self) -> &[u8] {
502	&self.shared.comment
503	}
504
505	/// Returns an iterator over all the file and directory names in this archive.
506	pub fn file_names(&self) -> impl Iterator<Item = &str> {
507	self.shared.names_map.keys().map(\|s\| s.as_str())
508	}
509
510	/// Search for a file entry by name, decrypt with given password
511	///
512	/// # Warning
513	///
514	/// The implementation of the cryptographic algorithms has not
515	/// gone through a correctness review, and you should assume it is insecure:
516	/// passwords used with this API may be compromised.
517	///
518	/// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
519	/// to check for a 1/256 chance that the password is correct.
520	/// There are many passwords out there that will also pass the validity checks
521	/// we are able to perform. This is a weakness of the ZipCrypto algorithm,
522	/// due to its fairly primitive approach to cryptography.
523	pub fn by_name_decrypt<'a>(
524	&'a mut self,
525	name: &str,
526	password: &[u8],
527	) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
528	self.by_name_with_optional_password(name, Some(password))
529	}
530
531	/// Search for a file entry by name
532	pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>> {
533	Ok(self.by_name_with_optional_password(name, None)?.unwrap())
534	}
535
536	fn by_name_with_optional_password<'a>(
537	&'a mut self,
538	name: &str,
539	password: Option<&[u8]>,
540	) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
541	let index = match self.shared.names_map.get(name) {
542	Some(index) => *index,
543	None => {
544	return Err(ZipError::FileNotFound);
545	}
546	};
547	self.by_index_with_optional_password(index, password)
548	}
549
550	/// Get a contained file by index, decrypt with given password
551	///
552	/// # Warning
553	///
554	/// The implementation of the cryptographic algorithms has not
555	/// gone through a correctness review, and you should assume it is insecure:
556	/// passwords used with this API may be compromised.
557	///
558	/// This function sometimes accepts wrong password. This is because the ZIP spec only allows us
559	/// to check for a 1/256 chance that the password is correct.
560	/// There are many passwords out there that will also pass the validity checks
561	/// we are able to perform. This is a weakness of the ZipCrypto algorithm,
562	/// due to its fairly primitive approach to cryptography.
563	pub fn by_index_decrypt<'a>(
564	&'a mut self,
565	file_number: usize,
566	password: &[u8],
567	) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
568	self.by_index_with_optional_password(file_number, Some(password))
569	}
570
571	/// Get a contained file by index
572	pub fn by_index(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
573	Ok(self
574	.by_index_with_optional_password(file_number, None)?
575	.unwrap())
576	}
577
578	/// Get a contained file by index without decompressing it
579	pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
580	let reader = &mut self.reader;
581	self.shared
582	.files
583	.get(file_number)
584	.ok_or(ZipError::FileNotFound)
585	.and_then(move \|data\| {
586	Ok(ZipFile {
587	crypto_reader: None,
588	reader: ZipFileReader::Raw(find_content(data, reader)?),
589	data: Cow::Borrowed(data),
590	})
591	})
592	}
593
594	fn by_index_with_optional_password<'a>(
595	&'a mut self,
596	file_number: usize,
597	mut password: Option<&[u8]>,
598	) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
599	let data = self
600	.shared
601	.files
602	.get(file_number)
603	.ok_or(ZipError::FileNotFound)?;
604
605	match (password, data.encrypted) {
606	(None, `true`) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
607	(Some(_), `false`) => password = None, //Password supplied, but none needed! Discard.
608	_ => {}
609	}
610	let limit_reader = find_content(data, &mut self.reader)?;
611
612	match make_crypto_reader(
613	data.compression_method,
614	data.crc32,
615	data.last_modified_time,
616	data.using_data_descriptor,
617	limit_reader,
618	password,
619	data.aes_mode,
620	#[cfg(feature = "aes-crypto")]
621	data.compressed_size,
622	) {
623	Ok(Ok(crypto_reader)) => Ok(Ok(ZipFile {
624	crypto_reader: Some(crypto_reader),
625	reader: ZipFileReader::NoReader,
626	data: Cow::Borrowed(data),
627	})),
628	Err(e) => Err(e),
629	Ok(Err(e)) => Ok(Err(e)),
630	}
631	}
632
633	/// Unwrap and return the inner reader object
634	///
635	/// The position of the reader is undefined.
636	pub fn into_inner(self) -> R {
637	self.reader
638	}
639	}
640
641	fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
642	Err(ZipError::UnsupportedArchive(detail))
643	}
644
645	/// Parse a central directory entry to collect the information for the file.
646	pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>(
647	reader: &mut R,
648	archive_offset: u64,
649	) -> ZipResult<ZipFileData> {
650	let central_header_start: u64 = reader.stream_position()?;
651
652	// Parse central header
653	let signature: u32 = reader.read_u32::<LittleEndian>()?;
654	if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
655	Err(ZipError::InvalidArchive("Invalid Central Directory header"))
656	} else {
657	central_header_to_zip_file_inner(reader, archive_offset, central_header_start)
658	}
659	}
660
661	/// Parse a central directory entry to collect the information for the file.
662	fn central_header_to_zip_file_inner<R: Read>(
663	reader: &mut R,
664	archive_offset: u64,
665	central_header_start: u64,
666	) -> ZipResult<ZipFileData> {
667	let version_made_by = reader.read_u16::<LittleEndian>()?;
668	let _version_to_extract = reader.read_u16::<LittleEndian>()?;
669	let flags = reader.read_u16::<LittleEndian>()?;
670	let encrypted = flags & `1` == `1`;
671	let is_utf8 = flags & (`1` << `11`) != `0`;
672	let using_data_descriptor = flags & (`1` << `3`) != `0`;
673	let compression_method = reader.read_u16::<LittleEndian>()?;
674	let last_mod_time = reader.read_u16::<LittleEndian>()?;
675	let last_mod_date = reader.read_u16::<LittleEndian>()?;
676	let crc32 = reader.read_u32::<LittleEndian>()?;
677	let compressed_size = reader.read_u32::<LittleEndian>()?;
678	let uncompressed_size = reader.read_u32::<LittleEndian>()?;
679	let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
680	let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
681	let file_comment_length = reader.read_u16::<LittleEndian>()? as usize;
682	let _disk_number = reader.read_u16::<LittleEndian>()?;
683	let _internal_file_attributes = reader.read_u16::<LittleEndian>()?;
684	let external_file_attributes = reader.read_u32::<LittleEndian>()?;
685	let offset = reader.read_u32::<LittleEndian>()? as u64;
686	let mut file_name_raw = vec![`0`; file_name_length];
687	reader.read_exact(&mut file_name_raw)?;
688	let mut extra_field = vec![`0`; extra_field_length];
689	reader.read_exact(&mut extra_field)?;
690	let mut file_comment_raw = vec![`0`; file_comment_length];
691	reader.read_exact(&mut file_comment_raw)?;
692
693	let file_name = match is_utf8 {
694	`true` => String::from_utf8_lossy(&file_name_raw).into_owned(),
695	`false` => file_name_raw.clone().from_cp437(),
696	};
697	let file_comment = match is_utf8 {
698	`true` => String::from_utf8_lossy(&file_comment_raw).into_owned(),
699	`false` => file_comment_raw.from_cp437(),
700	};
701
702	// Construct the result
703	let mut result = ZipFileData {
704	system: System::from_u8((version_made_by >> `8`) as u8),
705	version_made_by: version_made_by as u8,
706	encrypted,
707	using_data_descriptor,
708	compression_method: {
709	#[allow(deprecated)]
710	CompressionMethod::from_u16(compression_method)
711	},
712	compression_level: None,
713	last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
714	crc32,
715	compressed_size: compressed_size as u64,
716	uncompressed_size: uncompressed_size as u64,
717	file_name,
718	file_name_raw,
719	extra_field,
720	file_comment,
721	header_start: offset,
722	central_header_start,
723	data_start: AtomicU64::new(`0`),
724	external_attributes: external_file_attributes,
725	large_file: `false`,
726	aes_mode: None,
727	};
728
729	match parse_extra_field(&mut result) {
730	Ok(..) \| Err(ZipError::Io(..)) => {}
731	Err(e) => return Err(e),
732	}
733
734	let aes_enabled = result.compression_method == CompressionMethod::AES;
735	if aes_enabled && result.aes_mode.is_none() {
736	return Err(ZipError::InvalidArchive(
737	"AES encryption without AES extra data field",
738	));
739	}
740
741	// Account for shifted zip offsets.
742	result.header_start = result
743	.header_start
744	.checked_add(archive_offset)
745	.ok_or(ZipError::InvalidArchive("Archive header is too large"))?;
746
747	Ok(result)
748	}
749
750	fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
751	let mut reader = io::Cursor::new(&file.extra_field);
752
753	while (reader.position() as usize) < file.extra_field.len() {
754	let kind = reader.read_u16::<LittleEndian>()?;
755	let len = reader.read_u16::<LittleEndian>()?;
756	let mut len_left = len as i64;
757	match kind {
758	// Zip64 extended information extra field
759	`0x0001` => {
760	if file.uncompressed_size == spec::ZIP64_BYTES_THR {
761	file.large_file = `true`;
762	file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
763	len_left -= `8`;
764	}
765	if file.compressed_size == spec::ZIP64_BYTES_THR {
766	file.large_file = `true`;
767	file.compressed_size = reader.read_u64::<LittleEndian>()?;
768	len_left -= `8`;
769	}
770	if file.header_start == spec::ZIP64_BYTES_THR {
771	file.header_start = reader.read_u64::<LittleEndian>()?;
772	len_left -= `8`;
773	}
774	}
775	`0x9901` => {
776	// AES
777	if len != `7` {
778	return Err(ZipError::UnsupportedArchive(
779	"AES extra data field has an unsupported length",
780	));
781	}
782	let vendor_version = reader.read_u16::<LittleEndian>()?;
783	let vendor_id = reader.read_u16::<LittleEndian>()?;
784	let aes_mode = reader.read_u8()?;
785	let compression_method = reader.read_u16::<LittleEndian>()?;
786
787	if vendor_id != `0x4541` {
788	return Err(ZipError::InvalidArchive("Invalid AES vendor"));
789	}
790	let vendor_version = match vendor_version {
791	`0x0001` => AesVendorVersion::Ae1,
792	`0x0002` => AesVendorVersion::Ae2,
793	_ => return Err(ZipError::InvalidArchive("Invalid AES vendor version")),
794	};
795	match aes_mode {
796	`0x01` => file.aes_mode = Some((AesMode::Aes128, vendor_version)),
797	`0x02` => file.aes_mode = Some((AesMode::Aes192, vendor_version)),
798	`0x03` => file.aes_mode = Some((AesMode::Aes256, vendor_version)),
799	_ => return Err(ZipError::InvalidArchive("Invalid AES encryption strength")),
800	};
801	file.compression_method = {
802	#[allow(deprecated)]
803	CompressionMethod::from_u16(compression_method)
804	};
805	}
806	_ => {
807	// Other fields are ignored
808	}
809	}
810
811	// We could also check for < 0 to check for errors
812	if len_left > `0` {
813	reader.seek(io::SeekFrom::Current(len_left))?;
814	}
815	}
816	Ok(())
817	}
818
819	/// Methods for retrieving information on zip files
820	impl<'a> ZipFile<'a> {
821	fn get_reader(&mut self) -> &mut ZipFileReader<'a> {
822	if let ZipFileReader::NoReader = self.reader {
823	let data = &self.data;
824	let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
825	self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)
826	}
827	&mut self.reader
828	}
829
830	pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read {
831	if let ZipFileReader::NoReader = self.reader {
832	let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
833	self.reader = ZipFileReader::Raw(crypto_reader.into_inner())
834	}
835	&mut self.reader
836	}
837
838	/// Get the version of the file
839	pub fn version_made_by(&self) -> (u8, u8) {
840	(
841	self.data.version_made_by / `10`,
842	self.data.version_made_by % `10`,
843	)
844	}
845
846	/// Get the name of the file
847	///
848	/// # Warnings
849	///
850	/// It is dangerous to use this name directly when extracting an archive.
851	/// It may contain an absolute path (`/etc/shadow`), or break out of the
852	/// current directory (`../runtime`). Carelessly writing to these paths
853	/// allows an attacker to craft a ZIP archive that will overwrite critical
854	/// files.
855	///
856	/// You can use the [`ZipFile::enclosed_name`] method to validate the name
857	/// as a safe path.
858	pub fn name(&self) -> &str {
859	&self.data.file_name
860	}
861
862	/// Get the name of the file, in the raw (internal) byte representation.
863	///
864	/// The encoding of this data is currently undefined.
865	pub fn name_raw(&self) -> &[u8] {
866	&self.data.file_name_raw
867	}
868
869	/// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
870	/// removes a leading '/' and removes '..' parts.
871	#[deprecated(
872	since = "0.5.7",
873	note = "by stripping `..`s from the path, the meaning of paths can change.
874	`mangled_name` can be used if this behaviour is desirable"
875	)]
876	pub fn sanitized_name(&self) -> ::std::path::PathBuf {
877	self.mangled_name()
878	}
879
880	/// Rewrite the path, ignoring any path components with special meaning.
881	///
882	/// - Absolute paths are made relative
883	/// - [`ParentDir`]s are ignored
884	/// - Truncates the filename at a NULL byte
885	///
886	/// This is appropriate if you need to be able to extract something* from*
887	/// any archive, but will easily misrepresent trivial paths like
888	/// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
889	/// [`ZipFile::enclosed_name`] is the better option in most scenarios.
890	///
891	/// [`ParentDir`]: `Component::ParentDir`
892	pub fn mangled_name(&self) -> ::std::path::PathBuf {
893	self.data.file_name_sanitized()
894	}
895
896	/// Ensure the file path is safe to use as a [`Path`].
897	///
898	/// - It can't contain NULL bytes
899	/// - It can't resolve to a path outside the current directory
900	/// > `foo/../bar` is fine, `foo/../../bar` is not.
901	/// - It can't be an absolute path
902	///
903	/// This will read well-formed ZIP files correctly, and is resistant
904	/// to path-based exploits. It is recommended over
905	/// [`ZipFile::mangled_name`].
906	pub fn enclosed_name(&self) -> Option<&Path> {
907	self.data.enclosed_name()
908	}
909
910	/// Get the comment of the file
911	pub fn comment(&self) -> &str {
912	&self.data.file_comment
913	}
914
915	/// Get the compression method used to store the file
916	pub fn compression(&self) -> CompressionMethod {
917	self.data.compression_method
918	}
919
920	/// Get the size of the file, in bytes, in the archive
921	pub fn compressed_size(&self) -> u64 {
922	self.data.compressed_size
923	}
924
925	/// Get the size of the file, in bytes, when uncompressed
926	pub fn size(&self) -> u64 {
927	self.data.uncompressed_size
928	}
929
930	/// Get the time the file was last modified
931	pub fn last_modified(&self) -> DateTime {
932	self.data.last_modified_time
933	}
934	/// Returns whether the file is actually a directory
935	pub fn is_dir(&self) -> bool {
936	self.name()
937	.chars()
938	.rev()
939	.next()
940	.map_or(`false`, \|c\| c == '/' \|\| c == '`\\`')
941	}
942
943	/// Returns whether the file is a regular file
944	pub fn is_file(&self) -> bool {
945	!self.is_dir()
946	}
947
948	/// Get unix mode for the file
949	pub fn unix_mode(&self) -> Option<u32> {
950	self.data.unix_mode()
951	}
952
953	/// Get the CRC32 hash of the original file
954	pub fn crc32(&self) -> u32 {
955	self.data.crc32
956	}
957
958	/// Get the extra data of the zip header for this file
959	pub fn extra_data(&self) -> &[u8] {
960	&self.data.extra_field
961	}
962
963	/// Get the starting offset of the data of the compressed file
964	pub fn data_start(&self) -> u64 {
965	self.data.data_start.load()
966	}
967
968	/// Get the starting offset of the zip header for this file
969	pub fn header_start(&self) -> u64 {
970	self.data.header_start
971	}
972	/// Get the starting offset of the zip header in the central directory for this file
973	pub fn central_header_start(&self) -> u64 {
974	self.data.central_header_start
975	}
976	}
977
978	impl<'a> Read for ZipFile<'a> {
979	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
980	self.get_reader().read(buf)
981	}
982	}
983
984	impl<'a> Drop for ZipFile<'a> {
985	fn drop(&mut self) {
986	// self.data is Owned, this reader is constructed by a streaming reader.
987	// In this case, we want to exhaust the reader so that the next file is accessible.
988	if let Cow::Owned(_) = self.data {
989	let mut buffer = [`0`; `1` << `16`];
990
991	// Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
992	let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader {
993	ZipFileReader::NoReader => {
994	let innerreader = ::std::mem::replace(&mut self.crypto_reader, None);
995	innerreader.expect("Invalid reader state").into_inner()
996	}
997	reader => {
998	let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader);
999	innerreader.into_inner()
1000	}
1001	};
1002
1003	loop {
1004	match reader.read(&mut buffer) {
1005	Ok(`0`) => break,
1006	Ok(_) => (),
1007	Err(e) => {
1008	panic!("Could not consume all of the output of the current ZipFile: {e:?}")
1009	}
1010	}
1011	}
1012	}
1013	}
1014	}
1015
1016	/// Read ZipFile structures from a non-seekable reader.
1017	///
1018	/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
1019	/// as some information will be missing when reading this manner.
1020	///
1021	/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
1022	/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
1023	/// is encountered. No more files should be read after this.
1024	///
1025	/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
1026	/// the structure is done.
1027	///
1028	/// Missing fields are:
1029	/// `comment`: set to an empty string*
1030	/// `data_start`: set to 0*
1031	/// `external_attributes`: `unix_mode()`: will return None*
1032	pub fn read_zipfile_from_stream<'a, R: io::Read>(
1033	reader: &'a mut R,
1034	) -> ZipResult<Option<ZipFile<'_>>> {
1035	let signature = reader.read_u32::<LittleEndian>()?;
1036
1037	match signature {
1038	spec::LOCAL_FILE_HEADER_SIGNATURE => (),
1039	spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
1040	_ => return Err(ZipError::InvalidArchive("Invalid local file header")),
1041	}
1042
1043	let version_made_by = reader.read_u16::<LittleEndian>()?;
1044	let flags = reader.read_u16::<LittleEndian>()?;
1045	let encrypted = flags & `1` == `1`;
1046	let is_utf8 = flags & (`1` << `11`) != `0`;
1047	let using_data_descriptor = flags & (`1` << `3`) != `0`;
1048	#[allow(deprecated)]
1049	let compression_method = CompressionMethod::from_u16(reader.read_u16::<LittleEndian>()?);
1050	let last_mod_time = reader.read_u16::<LittleEndian>()?;
1051	let last_mod_date = reader.read_u16::<LittleEndian>()?;
1052	let crc32 = reader.read_u32::<LittleEndian>()?;
1053	let compressed_size = reader.read_u32::<LittleEndian>()?;
1054	let uncompressed_size = reader.read_u32::<LittleEndian>()?;
1055	let file_name_length = reader.read_u16::<LittleEndian>()? as usize;
1056	let extra_field_length = reader.read_u16::<LittleEndian>()? as usize;
1057
1058	let mut file_name_raw = vec![`0`; file_name_length];
1059	reader.read_exact(&mut file_name_raw)?;
1060	let mut extra_field = vec![`0`; extra_field_length];
1061	reader.read_exact(&mut extra_field)?;
1062
1063	let file_name = match is_utf8 {
1064	`true` => String::from_utf8_lossy(&file_name_raw).into_owned(),
1065	`false` => file_name_raw.clone().from_cp437(),
1066	};
1067
1068	let mut result = ZipFileData {
1069	system: System::from_u8((version_made_by >> `8`) as u8),
1070	version_made_by: version_made_by as u8,
1071	encrypted,
1072	using_data_descriptor,
1073	compression_method,
1074	compression_level: None,
1075	last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time),
1076	crc32,
1077	compressed_size: compressed_size as u64,
1078	uncompressed_size: uncompressed_size as u64,
1079	file_name,
1080	file_name_raw,
1081	extra_field,
1082	file_comment: String::new(), // file comment is only available in the central directory
1083	// header_start and data start are not available, but also don't matter, since seeking is
1084	// not available.
1085	header_start: `0`,
1086	data_start: AtomicU64::new(`0`),
1087	central_header_start: `0`,
1088	// The external_attributes field is only available in the central directory.
1089	// We set this to zero, which should be valid as the docs state 'If input came
1090	// from standard input, this field is set to zero.'
1091	external_attributes: `0`,
1092	large_file: `false`,
1093	aes_mode: None,
1094	};
1095
1096	match parse_extra_field(&mut result) {
1097	Ok(..) \| Err(ZipError::Io(..)) => {}
1098	Err(e) => return Err(e),
1099	}
1100
1101	if encrypted {
1102	return unsupported_zip_error("Encrypted files are not supported");
1103	}
1104	if using_data_descriptor {
1105	return unsupported_zip_error("The file length is not available in the local header");
1106	}
1107
1108	let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size);
1109
1110	let result_crc32 = result.crc32;
1111	let result_compression_method = result.compression_method;
1112	let crypto_reader = make_crypto_reader(
1113	result_compression_method,
1114	result_crc32,
1115	result.last_modified_time,
1116	result.using_data_descriptor,
1117	limit_reader,
1118	None,
1119	None,
1120	#[cfg(feature = "aes-crypto")]
1121	result.compressed_size,
1122	)?
1123	.unwrap();
1124
1125	Ok(Some(ZipFile {
1126	data: Cow::Owned(result),
1127	crypto_reader: None,
1128	reader: make_reader(result_compression_method, result_crc32, crypto_reader),
1129	}))
1130	}
1131
1132	#[cfg(test)]
1133	mod test {
1134	#[test]
1135	fn invalid_offset() {
1136	use super::ZipArchive;
1137	use std::io;
1138
1139	let mut v = Vec::new();
1140	v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip"));
1141	let reader = ZipArchive::new(io::Cursor::new(v));
1142	assert!(reader.is_err());
1143	}
1144
1145	#[test]
1146	fn invalid_offset2() {
1147	use super::ZipArchive;
1148	use std::io;
1149
1150	let mut v = Vec::new();
1151	v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip"));
1152	let reader = ZipArchive::new(io::Cursor::new(v));
1153	assert!(reader.is_err());
1154	}
1155
1156	#[test]
1157	fn zip64_with_leading_junk() {
1158	use super::ZipArchive;
1159	use std::io;
1160
1161	let mut v = Vec::new();
1162	v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
1163	let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1164	assert_eq!(reader.len(), `1`);
1165	}
1166
1167	#[test]
1168	fn zip_contents() {
1169	use super::ZipArchive;
1170	use std::io;
1171
1172	let mut v = Vec::new();
1173	v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1174	let mut reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
1175	assert_eq!(reader.comment(), b"");
1176	assert_eq!(reader.by_index(`0`).unwrap().central_header_start(), `77`);
1177	}
1178
1179	#[test]
1180	fn zip_read_streaming() {
1181	use super::read_zipfile_from_stream;
1182	use std::io;
1183
1184	let mut v = Vec::new();
1185	v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1186	let mut reader = io::Cursor::new(v);
1187	loop {
1188	if read_zipfile_from_stream(&mut reader).unwrap().is_none() {
1189	break;
1190	}
1191	}
1192	}
1193
1194	#[test]
1195	fn zip_clone() {
1196	use super::ZipArchive;
1197	use std::io::{self, Read};
1198
1199	let mut v = Vec::new();
1200	v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
1201	let mut reader1 = ZipArchive::new(io::Cursor::new(v)).unwrap();
1202	let mut reader2 = reader1.clone();
1203
1204	let mut file1 = reader1.by_index(`0`).unwrap();
1205	let mut file2 = reader2.by_index(`0`).unwrap();
1206
1207	let t = file1.last_modified();
1208	assert_eq!(
1209	(
1210	t.year(),
1211	t.month(),
1212	t.day(),
1213	t.hour(),
1214	t.minute(),
1215	t.second()
1216	),
1217	(`1980`, `1`, `1`, `0`, `0`, `0`)
1218	);
1219
1220	let mut buf1 = [`0`; `5`];
1221	let mut buf2 = [`0`; `5`];
1222	let mut buf3 = [`0`; `5`];
1223	let mut buf4 = [`0`; `5`];
1224
1225	file1.read_exact(&mut buf1).unwrap();
1226	file2.read_exact(&mut buf2).unwrap();
1227	file1.read_exact(&mut buf3).unwrap();
1228	file2.read_exact(&mut buf4).unwrap();
1229
1230	assert_eq!(buf1, buf2);
1231	assert_eq!(buf3, buf4);
1232	assert_ne!(buf1, buf3);
1233	}
1234
1235	#[test]
1236	fn file_and_dir_predicates() {
1237	use super::ZipArchive;
1238	use std::io;
1239
1240	let mut v = Vec::new();
1241	v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip"));
1242	let mut zip = ZipArchive::new(io::Cursor::new(v)).unwrap();
1243
1244	for i in `0`..zip.len() {
1245	let zip_file = zip.by_index(i).unwrap();
1246	let full_name = zip_file.enclosed_name().unwrap();
1247	let file_name = full_name.file_name().unwrap().to_str().unwrap();
1248	assert!(
1249	(file_name.starts_with("dir") && zip_file.is_dir())
1250	\|\| (file_name.starts_with("file") && zip_file.is_file())
1251	);
1252	}
1253	}
1254
1255	/// test case to ensure we don't preemptively over allocate based on the
1256	/// declared number of files in the CDE of an invalid zip when the number of
1257	/// files declared is more than the alleged offset in the CDE
1258	#[test]
1259	fn invalid_cde_number_of_files_allocation_smaller_offset() {
1260	use super::ZipArchive;
1261	use std::io;
1262
1263	let mut v = Vec::new();
1264	v.extend_from_slice(include_bytes!(
1265	"../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
1266	));
1267	let reader = ZipArchive::new(io::Cursor::new(v));
1268	assert!(reader.is_err());
1269	}
1270
1271	/// test case to ensure we don't preemptively over allocate based on the
1272	/// declared number of files in the CDE of an invalid zip when the number of
1273	/// files declared is less than the alleged offset in the CDE
1274	#[test]
1275	fn invalid_cde_number_of_files_allocation_greater_offset() {
1276	use super::ZipArchive;
1277	use std::io;
1278
1279	let mut v = Vec::new();
1280	v.extend_from_slice(include_bytes!(
1281	"../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
1282	));
1283	let reader = ZipArchive::new(io::Cursor::new(v));
1284	assert!(reader.is_err());
1285	}
1286	}
1287