1use std::fs;
2use std::io::{self, Read};
3use std::path::Path;
4
5use super::{
6 central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile,
7 ZipFileData, ZipResult,
8};
9
10use byteorder::{LittleEndian, ReadBytesExt};
11
12/// Stream decoder for zip.
13#[derive(Debug)]
14pub struct ZipStreamReader<R>(R);
15
16impl<R> ZipStreamReader<R> {
17 /// Create a new ZipStreamReader
18 pub fn new(reader: R) -> Self {
19 Self(reader)
20 }
21}
22
23impl<R: Read> ZipStreamReader<R> {
24 fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> {
25 // Give archive_offset and central_header_start dummy value 0, since
26 // they are not used in the output.
27 let archive_offset = 0;
28 let central_header_start = 0;
29
30 // Parse central header
31 let signature = self.0.read_u32::<LittleEndian>()?;
32 if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
33 Ok(None)
34 } else {
35 central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start)
36 .map(ZipStreamFileMetadata)
37 .map(Some)
38 }
39 }
40
41 /// Iteraate over the stream and extract all file and their
42 /// metadata.
43 pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
44 while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
45 visitor.visit_file(&mut file)?;
46 }
47
48 while let Some(metadata) = self.parse_central_directory()? {
49 visitor.visit_additional_metadata(&metadata)?;
50 }
51
52 Ok(())
53 }
54
55 /// Extract a Zip archive into a directory, overwriting files if they
56 /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
57 ///
58 /// Extraction is not atomic; If an error is encountered, some of the files
59 /// may be left on disk.
60 pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
61 struct Extractor<'a>(&'a Path);
62 impl ZipStreamVisitor for Extractor<'_> {
63 fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
64 let filepath = file
65 .enclosed_name()
66 .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
67
68 let outpath = self.0.join(filepath);
69
70 if file.name().ends_with('/') {
71 fs::create_dir_all(&outpath)?;
72 } else {
73 if let Some(p) = outpath.parent() {
74 fs::create_dir_all(p)?;
75 }
76 let mut outfile = fs::File::create(&outpath)?;
77 io::copy(file, &mut outfile)?;
78 }
79
80 Ok(())
81 }
82
83 #[allow(unused)]
84 fn visit_additional_metadata(
85 &mut self,
86 metadata: &ZipStreamFileMetadata,
87 ) -> ZipResult<()> {
88 #[cfg(unix)]
89 {
90 let filepath = metadata
91 .enclosed_name()
92 .ok_or(ZipError::InvalidArchive("Invalid file path"))?;
93
94 let outpath = self.0.join(filepath);
95
96 use std::os::unix::fs::PermissionsExt;
97 if let Some(mode) = metadata.unix_mode() {
98 fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?;
99 }
100 }
101
102 Ok(())
103 }
104 }
105
106 self.visit(&mut Extractor(directory.as_ref()))
107 }
108}
109
110/// Visitor for ZipStreamReader
111pub trait ZipStreamVisitor {
112 /// * `file` - contains the content of the file and most of the metadata,
113 /// except:
114 /// - `comment`: set to an empty string
115 /// - `data_start`: set to 0
116 /// - `external_attributes`: `unix_mode()`: will return None
117 fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>;
118
119 /// This function is guranteed to be called after all `visit_file`s.
120 ///
121 /// * `metadata` - Provides missing metadata in `visit_file`.
122 fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
123}
124
125/// Additional metadata for the file.
126#[derive(Debug)]
127pub struct ZipStreamFileMetadata(ZipFileData);
128
129impl ZipStreamFileMetadata {
130 /// Get the name of the file
131 ///
132 /// # Warnings
133 ///
134 /// It is dangerous to use this name directly when extracting an archive.
135 /// It may contain an absolute path (`/etc/shadow`), or break out of the
136 /// current directory (`../runtime`). Carelessly writing to these paths
137 /// allows an attacker to craft a ZIP archive that will overwrite critical
138 /// files.
139 ///
140 /// You can use the [`ZipFile::enclosed_name`] method to validate the name
141 /// as a safe path.
142 pub fn name(&self) -> &str {
143 &self.0.file_name
144 }
145
146 /// Get the name of the file, in the raw (internal) byte representation.
147 ///
148 /// The encoding of this data is currently undefined.
149 pub fn name_raw(&self) -> &[u8] {
150 &self.0.file_name_raw
151 }
152
153 /// Rewrite the path, ignoring any path components with special meaning.
154 ///
155 /// - Absolute paths are made relative
156 /// - [`ParentDir`]s are ignored
157 /// - Truncates the filename at a NULL byte
158 ///
159 /// This is appropriate if you need to be able to extract *something* from
160 /// any archive, but will easily misrepresent trivial paths like
161 /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
162 /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
163 ///
164 /// [`ParentDir`]: `Component::ParentDir`
165 pub fn mangled_name(&self) -> ::std::path::PathBuf {
166 self.0.file_name_sanitized()
167 }
168
169 /// Ensure the file path is safe to use as a [`Path`].
170 ///
171 /// - It can't contain NULL bytes
172 /// - It can't resolve to a path outside the current directory
173 /// > `foo/../bar` is fine, `foo/../../bar` is not.
174 /// - It can't be an absolute path
175 ///
176 /// This will read well-formed ZIP files correctly, and is resistant
177 /// to path-based exploits. It is recommended over
178 /// [`ZipFile::mangled_name`].
179 pub fn enclosed_name(&self) -> Option<&Path> {
180 self.0.enclosed_name()
181 }
182
183 /// Returns whether the file is actually a directory
184 pub fn is_dir(&self) -> bool {
185 self.name()
186 .chars()
187 .rev()
188 .next()
189 .map_or(false, |c| c == '/' || c == '\\')
190 }
191
192 /// Returns whether the file is a regular file
193 pub fn is_file(&self) -> bool {
194 !self.is_dir()
195 }
196
197 /// Get the comment of the file
198 pub fn comment(&self) -> &str {
199 &self.0.file_comment
200 }
201
202 /// Get the starting offset of the data of the compressed file
203 pub fn data_start(&self) -> u64 {
204 self.0.data_start.load()
205 }
206
207 /// Get unix mode for the file
208 pub fn unix_mode(&self) -> Option<u32> {
209 self.0.unix_mode()
210 }
211}
212
213#[cfg(test)]
214mod test {
215 use super::*;
216 use std::collections::BTreeSet;
217 use std::io;
218
219 struct DummyVisitor;
220 impl ZipStreamVisitor for DummyVisitor {
221 fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
222 Ok(())
223 }
224
225 fn visit_additional_metadata(
226 &mut self,
227 _metadata: &ZipStreamFileMetadata,
228 ) -> ZipResult<()> {
229 Ok(())
230 }
231 }
232
233 #[derive(Default, Debug, Eq, PartialEq)]
234 struct CounterVisitor(u64, u64);
235 impl ZipStreamVisitor for CounterVisitor {
236 fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
237 self.0 += 1;
238 Ok(())
239 }
240
241 fn visit_additional_metadata(
242 &mut self,
243 _metadata: &ZipStreamFileMetadata,
244 ) -> ZipResult<()> {
245 self.1 += 1;
246 Ok(())
247 }
248 }
249
250 #[test]
251 fn invalid_offset() {
252 ZipStreamReader::new(io::Cursor::new(include_bytes!(
253 "../../tests/data/invalid_offset.zip"
254 )))
255 .visit(&mut DummyVisitor)
256 .unwrap_err();
257 }
258
259 #[test]
260 fn invalid_offset2() {
261 ZipStreamReader::new(io::Cursor::new(include_bytes!(
262 "../../tests/data/invalid_offset2.zip"
263 )))
264 .visit(&mut DummyVisitor)
265 .unwrap_err();
266 }
267
268 #[test]
269 fn zip_read_streaming() {
270 let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
271 "../../tests/data/mimetype.zip"
272 )));
273
274 #[derive(Default)]
275 struct V {
276 filenames: BTreeSet<Box<str>>,
277 }
278 impl ZipStreamVisitor for V {
279 fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
280 if file.is_file() {
281 self.filenames.insert(file.name().into());
282 }
283
284 Ok(())
285 }
286 fn visit_additional_metadata(
287 &mut self,
288 metadata: &ZipStreamFileMetadata,
289 ) -> ZipResult<()> {
290 if metadata.is_file() {
291 assert!(
292 self.filenames.contains(metadata.name()),
293 "{} is missing its file content",
294 metadata.name()
295 );
296 }
297
298 Ok(())
299 }
300 }
301
302 reader.visit(&mut V::default()).unwrap();
303 }
304
305 #[test]
306 fn file_and_dir_predicates() {
307 let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
308 "../../tests/data/files_and_dirs.zip"
309 )));
310
311 #[derive(Default)]
312 struct V {
313 filenames: BTreeSet<Box<str>>,
314 }
315 impl ZipStreamVisitor for V {
316 fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
317 let full_name = file.enclosed_name().unwrap();
318 let file_name = full_name.file_name().unwrap().to_str().unwrap();
319 assert!(
320 (file_name.starts_with("dir") && file.is_dir())
321 || (file_name.starts_with("file") && file.is_file())
322 );
323
324 if file.is_file() {
325 self.filenames.insert(file.name().into());
326 }
327
328 Ok(())
329 }
330 fn visit_additional_metadata(
331 &mut self,
332 metadata: &ZipStreamFileMetadata,
333 ) -> ZipResult<()> {
334 if metadata.is_file() {
335 assert!(
336 self.filenames.contains(metadata.name()),
337 "{} is missing its file content",
338 metadata.name()
339 );
340 }
341
342 Ok(())
343 }
344 }
345
346 reader.visit(&mut V::default()).unwrap();
347 }
348
349 /// test case to ensure we don't preemptively over allocate based on the
350 /// declared number of files in the CDE of an invalid zip when the number of
351 /// files declared is more than the alleged offset in the CDE
352 #[test]
353 fn invalid_cde_number_of_files_allocation_smaller_offset() {
354 ZipStreamReader::new(io::Cursor::new(include_bytes!(
355 "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
356 )))
357 .visit(&mut DummyVisitor)
358 .unwrap_err();
359 }
360
361 /// test case to ensure we don't preemptively over allocate based on the
362 /// declared number of files in the CDE of an invalid zip when the number of
363 /// files declared is less than the alleged offset in the CDE
364 #[test]
365 fn invalid_cde_number_of_files_allocation_greater_offset() {
366 ZipStreamReader::new(io::Cursor::new(include_bytes!(
367 "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
368 )))
369 .visit(&mut DummyVisitor)
370 .unwrap_err();
371 }
372}
373