| 1 | //! Helper functions for efficient IO. |
| 2 | |
| 3 | #[cfg (feature = "std" )] |
| 4 | pub(crate) fn copy_wide( |
| 5 | mut reader: impl std::io::Read, |
| 6 | hasher: &mut crate::Hasher, |
| 7 | ) -> std::io::Result<u64> { |
| 8 | let mut buffer: [u8; 65536] = [0; 65536]; |
| 9 | let mut total: u64 = 0; |
| 10 | loop { |
| 11 | match reader.read(&mut buffer) { |
| 12 | Ok(0) => return Ok(total), |
| 13 | Ok(n: usize) => { |
| 14 | hasher.update(&buffer[..n]); |
| 15 | total += n as u64; |
| 16 | } |
| 17 | // see test_update_reader_interrupted |
| 18 | Err(e: Error) if e.kind() == std::io::ErrorKind::Interrupted => continue, |
| 19 | Err(e: Error) => return Err(e), |
| 20 | } |
| 21 | } |
| 22 | } |
| 23 | |
| 24 | // Mmap a file, if it looks like a good idea. Return None in cases where we know mmap will fail, or |
| 25 | // if the file is short enough that mmapping isn't worth it. However, if we do try to mmap and it |
| 26 | // fails, return the error. |
| 27 | // |
| 28 | // SAFETY: Mmaps are fundamentally unsafe, because you can call invariant-checking functions like |
| 29 | // str::from_utf8 on them and then have them change out from under you. Letting a safe caller get |
| 30 | // their hands on an mmap, or even a &[u8] that's backed by an mmap, is unsound. However, because |
| 31 | // this function is crate-private, we can guarantee that all can ever happen in the event of a race |
| 32 | // condition is that we either hash nonsense bytes or crash with SIGBUS or similar, neither of |
| 33 | // which should risk memory corruption in a safe caller. |
| 34 | // |
| 35 | // PARANOIA: But a data race...is a data race...is a data race...right? Even if we know that no |
| 36 | // platform in the "real world" is ever going to do anything other than compute the "wrong answer" |
| 37 | // if we race on this mmap while we hash it, aren't we still supposed to feel bad about doing this? |
| 38 | // Well, maybe. This is IO, and IO gets special carve-outs in the memory model. Consider a |
| 39 | // memory-mapped register that returns random 32-bit words. (This is actually realistic if you have |
| 40 | // a hardware RNG.) It's probably sound to construct a *const i32 pointing to that register and do |
| 41 | // some raw pointer reads from it. Those reads should be volatile if you don't want the compiler to |
| 42 | // coalesce them, but either way the compiler isn't allowed to just _go nuts_ and insert |
| 43 | // should-never-happen branches to wipe your hard drive if two adjacent reads happen to give |
| 44 | // different values. As far as I'm aware, there's no such thing as a read that's allowed if it's |
| 45 | // volatile but prohibited if it's not (unlike atomics). As mentioned above, it's not ok to |
| 46 | // construct a safe &i32 to the register if you're going to leak that reference to unknown callers. |
| 47 | // But if you "know what you're doing," I don't think *const i32 and &i32 are fundamentally |
| 48 | // different here. Feedback needed. |
| 49 | #[cfg (feature = "mmap" )] |
| 50 | pub(crate) fn maybe_mmap_file(file: &std::fs::File) -> std::io::Result<Option<memmap2::Mmap>> { |
| 51 | let metadata = file.metadata()?; |
| 52 | let file_size = metadata.len(); |
| 53 | #[allow (clippy::if_same_then_else)] |
| 54 | if !metadata.is_file() { |
| 55 | // Not a real file. |
| 56 | Ok(None) |
| 57 | } else if file_size > isize::max_value() as u64 { |
| 58 | // Too long to safely map. |
| 59 | // https://github.com/danburkert/memmap-rs/issues/69 |
| 60 | Ok(None) |
| 61 | } else if file_size == 0 { |
| 62 | // Mapping an empty file currently fails. |
| 63 | // https://github.com/danburkert/memmap-rs/issues/72 |
| 64 | // See test_mmap_virtual_file. |
| 65 | Ok(None) |
| 66 | } else if file_size < 16 * 1024 { |
| 67 | // Mapping small files is not worth it. |
| 68 | Ok(None) |
| 69 | } else { |
| 70 | // Explicitly set the length of the memory map, so that filesystem |
| 71 | // changes can't race to violate the invariants we just checked. |
| 72 | let map = unsafe { |
| 73 | memmap2::MmapOptions::new() |
| 74 | .len(file_size as usize) |
| 75 | .map(file)? |
| 76 | }; |
| 77 | Ok(Some(map)) |
| 78 | } |
| 79 | } |
| 80 | |