| 1 | /// Values supported by [`Mmap::advise`][crate::Mmap::advise] and [`MmapMut::advise`][crate::MmapMut::advise] functions. |
| 2 | /// |
| 3 | /// See [madvise()](https://man7.org/linux/man-pages/man2/madvise.2.html) map page. |
| 4 | #[repr (i32)] |
| 5 | #[derive (Clone, Copy, Debug, Eq, PartialEq, Hash)] |
| 6 | pub enum Advice { |
| 7 | /// **MADV_NORMAL** |
| 8 | /// |
| 9 | /// No special treatment. This is the default. |
| 10 | Normal = libc::MADV_NORMAL, |
| 11 | |
| 12 | /// **MADV_RANDOM** |
| 13 | /// |
| 14 | /// Expect page references in random order. (Hence, read |
| 15 | /// ahead may be less useful than normally.) |
| 16 | Random = libc::MADV_RANDOM, |
| 17 | |
| 18 | /// **MADV_SEQUENTIAL** |
| 19 | /// |
| 20 | /// Expect page references in sequential order. (Hence, pages |
| 21 | /// in the given range can be aggressively read ahead, and may |
| 22 | /// be freed soon after they are accessed.) |
| 23 | Sequential = libc::MADV_SEQUENTIAL, |
| 24 | |
| 25 | /// **MADV_WILLNEED** |
| 26 | /// |
| 27 | /// Expect access in the near future. (Hence, it might be a |
| 28 | /// good idea to read some pages ahead.) |
| 29 | WillNeed = libc::MADV_WILLNEED, |
| 30 | |
| 31 | /// **MADV_DONTFORK** - Linux only (since Linux 2.6.16) |
| 32 | /// |
| 33 | /// Do not make the pages in this range available to the child |
| 34 | /// after a fork(2). This is useful to prevent copy-on-write |
| 35 | /// semantics from changing the physical location of a page if |
| 36 | /// the parent writes to it after a fork(2). (Such page |
| 37 | /// relocations cause problems for hardware that DMAs into the |
| 38 | /// page.) |
| 39 | #[cfg (target_os = "linux" )] |
| 40 | DontFork = libc::MADV_DONTFORK, |
| 41 | |
| 42 | /// **MADV_DOFORK** - Linux only (since Linux 2.6.16) |
| 43 | /// |
| 44 | /// Undo the effect of MADV_DONTFORK, restoring the default |
| 45 | /// behavior, whereby a mapping is inherited across fork(2). |
| 46 | #[cfg (target_os = "linux" )] |
| 47 | DoFork = libc::MADV_DOFORK, |
| 48 | |
| 49 | /// **MADV_MERGEABLE** - Linux only (since Linux 2.6.32) |
| 50 | /// |
| 51 | /// Enable Kernel Samepage Merging (KSM) for the pages in the |
| 52 | /// range specified by addr and length. The kernel regularly |
| 53 | /// scans those areas of user memory that have been marked as |
| 54 | /// mergeable, looking for pages with identical content. |
| 55 | /// These are replaced by a single write-protected page (which |
| 56 | /// is automatically copied if a process later wants to update |
| 57 | /// the content of the page). KSM merges only private |
| 58 | /// anonymous pages (see mmap(2)). |
| 59 | /// |
| 60 | /// The KSM feature is intended for applications that generate |
| 61 | /// many instances of the same data (e.g., virtualization |
| 62 | /// systems such as KVM). It can consume a lot of processing |
| 63 | /// power; use with care. See the Linux kernel source file |
| 64 | /// Documentation/admin-guide/mm/ksm.rst for more details. |
| 65 | /// |
| 66 | /// The MADV_MERGEABLE and MADV_UNMERGEABLE operations are |
| 67 | /// available only if the kernel was configured with |
| 68 | /// CONFIG_KSM. |
| 69 | #[cfg (target_os = "linux" )] |
| 70 | Mergeable = libc::MADV_MERGEABLE, |
| 71 | |
| 72 | /// **MADV_UNMERGEABLE** - Linux only (since Linux 2.6.32) |
| 73 | /// |
| 74 | /// Undo the effect of an earlier MADV_MERGEABLE operation on |
| 75 | /// the specified address range; KSM unmerges whatever pages |
| 76 | /// it had merged in the address range specified by addr and |
| 77 | /// length. |
| 78 | #[cfg (target_os = "linux" )] |
| 79 | Unmergeable = libc::MADV_UNMERGEABLE, |
| 80 | |
| 81 | /// **MADV_HUGEPAGE** - Linux only (since Linux 2.6.38) |
| 82 | /// |
| 83 | /// Enable Transparent Huge Pages (THP) for pages in the range |
| 84 | /// specified by addr and length. Currently, Transparent Huge |
| 85 | /// Pages work only with private anonymous pages (see |
| 86 | /// mmap(2)). The kernel will regularly scan the areas marked |
| 87 | /// as huge page candidates to replace them with huge pages. |
| 88 | /// The kernel will also allocate huge pages directly when the |
| 89 | /// region is naturally aligned to the huge page size (see |
| 90 | /// posix_memalign(2)). |
| 91 | /// |
| 92 | /// This feature is primarily aimed at applications that use |
| 93 | /// large mappings of data and access large regions of that |
| 94 | /// memory at a time (e.g., virtualization systems such as |
| 95 | /// QEMU). It can very easily waste memory (e.g., a 2 MB |
| 96 | /// mapping that only ever accesses 1 byte will result in 2 MB |
| 97 | /// of wired memory instead of one 4 KB page). See the Linux |
| 98 | /// kernel source file |
| 99 | /// Documentation/admin-guide/mm/transhuge.rst for more |
| 100 | /// details. |
| 101 | /// |
| 102 | /// Most common kernels configurations provide MADV_HUGEPAGE- |
| 103 | /// style behavior by default, and thus MADV_HUGEPAGE is |
| 104 | /// normally not necessary. It is mostly intended for |
| 105 | /// embedded systems, where MADV_HUGEPAGE-style behavior may |
| 106 | /// not be enabled by default in the kernel. On such systems, |
| 107 | /// this flag can be used in order to selectively enable THP. |
| 108 | /// Whenever MADV_HUGEPAGE is used, it should always be in |
| 109 | /// regions of memory with an access pattern that the |
| 110 | /// developer knows in advance won't risk to increase the |
| 111 | /// memory footprint of the application when transparent |
| 112 | /// hugepages are enabled. |
| 113 | /// |
| 114 | /// The MADV_HUGEPAGE and MADV_NOHUGEPAGE operations are |
| 115 | /// available only if the kernel was configured with |
| 116 | /// CONFIG_TRANSPARENT_HUGEPAGE. |
| 117 | #[cfg (target_os = "linux" )] |
| 118 | HugePage = libc::MADV_HUGEPAGE, |
| 119 | |
| 120 | /// **MADV_NOHUGEPAGE** - Linux only (since Linux 2.6.38) |
| 121 | /// |
| 122 | /// Ensures that memory in the address range specified by addr |
| 123 | /// and length will not be backed by transparent hugepages. |
| 124 | #[cfg (target_os = "linux" )] |
| 125 | NoHugePage = libc::MADV_NOHUGEPAGE, |
| 126 | |
| 127 | /// **MADV_DONTDUMP** - Linux only (since Linux 3.4) |
| 128 | /// |
| 129 | /// Exclude from a core dump those pages in the range |
| 130 | /// specified by addr and length. This is useful in |
| 131 | /// applications that have large areas of memory that are |
| 132 | /// known not to be useful in a core dump. The effect of |
| 133 | /// **MADV_DONTDUMP** takes precedence over the bit mask that is |
| 134 | /// set via the `/proc/[pid]/coredump_filter` file (see |
| 135 | /// core(5)). |
| 136 | #[cfg (target_os = "linux" )] |
| 137 | DontDump = libc::MADV_DONTDUMP, |
| 138 | |
| 139 | /// **MADV_DODUMP** - Linux only (since Linux 3.4) |
| 140 | /// |
| 141 | /// Undo the effect of an earlier MADV_DONTDUMP. |
| 142 | #[cfg (target_os = "linux" )] |
| 143 | DoDump = libc::MADV_DODUMP, |
| 144 | |
| 145 | /// **MADV_HWPOISON** - Linux only (since Linux 2.6.32) |
| 146 | /// |
| 147 | /// Poison the pages in the range specified by addr and length |
| 148 | /// and handle subsequent references to those pages like a |
| 149 | /// hardware memory corruption. This operation is available |
| 150 | /// only for privileged (CAP_SYS_ADMIN) processes. This |
| 151 | /// operation may result in the calling process receiving a |
| 152 | /// SIGBUS and the page being unmapped. |
| 153 | /// |
| 154 | /// This feature is intended for testing of memory error- |
| 155 | /// handling code; it is available only if the kernel was |
| 156 | /// configured with CONFIG_MEMORY_FAILURE. |
| 157 | #[cfg (target_os = "linux" )] |
| 158 | HwPoison = libc::MADV_HWPOISON, |
| 159 | |
| 160 | /// **MADV_POPULATE_READ** - Linux only (since Linux 5.14) |
| 161 | /// |
| 162 | /// Populate (prefault) page tables readable, faulting in all |
| 163 | /// pages in the range just as if manually reading from each |
| 164 | /// page; however, avoid the actual memory access that would have |
| 165 | /// been performed after handling the fault. |
| 166 | /// |
| 167 | /// In contrast to MAP_POPULATE, MADV_POPULATE_READ does not hide |
| 168 | /// errors, can be applied to (parts of) existing mappings and |
| 169 | /// will always populate (prefault) page tables readable. One |
| 170 | /// example use case is prefaulting a file mapping, reading all |
| 171 | /// file content from disk; however, pages won't be dirtied and |
| 172 | /// consequently won't have to be written back to disk when |
| 173 | /// evicting the pages from memory. |
| 174 | /// |
| 175 | /// Depending on the underlying mapping, map the shared zeropage, |
| 176 | /// preallocate memory or read the underlying file; files with |
| 177 | /// holes might or might not preallocate blocks. If populating |
| 178 | /// fails, a SIGBUS signal is not generated; instead, an error is |
| 179 | /// returned. |
| 180 | /// |
| 181 | /// If MADV_POPULATE_READ succeeds, all page tables have been |
| 182 | /// populated (prefaulted) readable once. If MADV_POPULATE_READ |
| 183 | /// fails, some page tables might have been populated. |
| 184 | /// |
| 185 | /// MADV_POPULATE_READ cannot be applied to mappings without read |
| 186 | /// permissions and special mappings, for example, mappings |
| 187 | /// marked with kernel-internal flags such as VM_PFNMAP or VM_IO, |
| 188 | /// or secret memory regions created using memfd_secret(2). |
| 189 | /// |
| 190 | /// Note that with MADV_POPULATE_READ, the process can be killed |
| 191 | /// at any moment when the system runs out of memory. |
| 192 | #[cfg (target_os = "linux" )] |
| 193 | PopulateRead = libc::MADV_POPULATE_READ, |
| 194 | |
| 195 | /// **MADV_POPULATE_WRITE** - Linux only (since Linux 5.14) |
| 196 | /// |
| 197 | /// Populate (prefault) page tables writable, faulting in all |
| 198 | /// pages in the range just as if manually writing to each each |
| 199 | /// page; however, avoid the actual memory access that would have |
| 200 | /// been performed after handling the fault. |
| 201 | /// |
| 202 | /// In contrast to MAP_POPULATE, MADV_POPULATE_WRITE does not |
| 203 | /// hide errors, can be applied to (parts of) existing mappings |
| 204 | /// and will always populate (prefault) page tables writable. |
| 205 | /// One example use case is preallocating memory, breaking any |
| 206 | /// CoW (Copy on Write). |
| 207 | /// |
| 208 | /// Depending on the underlying mapping, preallocate memory or |
| 209 | /// read the underlying file; files with holes will preallocate |
| 210 | /// blocks. If populating fails, a SIGBUS signal is not gener‐ |
| 211 | /// ated; instead, an error is returned. |
| 212 | /// |
| 213 | /// If MADV_POPULATE_WRITE succeeds, all page tables have been |
| 214 | /// populated (prefaulted) writable once. If MADV_POPULATE_WRITE |
| 215 | /// fails, some page tables might have been populated. |
| 216 | /// |
| 217 | /// MADV_POPULATE_WRITE cannot be applied to mappings without |
| 218 | /// write permissions and special mappings, for example, mappings |
| 219 | /// marked with kernel-internal flags such as VM_PFNMAP or VM_IO, |
| 220 | /// or secret memory regions created using memfd_secret(2). |
| 221 | /// |
| 222 | /// Note that with MADV_POPULATE_WRITE, the process can be killed |
| 223 | /// at any moment when the system runs out of memory. |
| 224 | #[cfg (target_os = "linux" )] |
| 225 | PopulateWrite = libc::MADV_POPULATE_WRITE, |
| 226 | |
| 227 | /// **MADV_ZERO_WIRED_PAGES** - Darwin only |
| 228 | /// |
| 229 | /// Indicates that the application would like the wired pages in this address range to be |
| 230 | /// zeroed out if the address range is deallocated without first unwiring the pages (i.e. |
| 231 | /// a munmap(2) without a preceding munlock(2) or the application quits). This is used |
| 232 | /// with madvise() system call. |
| 233 | #[cfg (any(target_os = "macos" , target_os = "ios" ))] |
| 234 | ZeroWiredPages = libc::MADV_ZERO_WIRED_PAGES, |
| 235 | } |
| 236 | |
| 237 | /// Values supported by [`Mmap::unsafe_advise`][crate::Mmap::unsafe_advise] and [`MmapMut::unsafe_advise`][crate::MmapMut::unsafe_advise] functions. |
| 238 | /// |
| 239 | /// These flags can be passed to the [madvise (2)][man_page] system call |
| 240 | /// and effects on the mapped pages which are conceptually writes, |
| 241 | /// i.e. the change the observable contents of these pages which |
| 242 | /// implies undefined behaviour if the mapping is still borrowed. |
| 243 | /// |
| 244 | /// Hence, these potentially unsafe flags must be used with the unsafe |
| 245 | /// methods and the programmer has to justify that the code |
| 246 | /// does not keep any borrows of the mapping active while the mapped pages |
| 247 | /// are updated by the kernel's memory management subsystem. |
| 248 | /// |
| 249 | /// [man_page]: https://man7.org/linux/man-pages/man2/madvise.2.html |
| 250 | #[repr (i32)] |
| 251 | #[derive (Clone, Copy, Debug, Eq, PartialEq, Hash)] |
| 252 | pub enum UncheckedAdvice { |
| 253 | /// **MADV_DONTNEED** |
| 254 | /// |
| 255 | /// Do not expect access in the near future. (For the time |
| 256 | /// being, the application is finished with the given range, |
| 257 | /// so the kernel can free resources associated with it.) |
| 258 | /// |
| 259 | /// After a successful MADV_DONTNEED operation, the semantics |
| 260 | /// of memory access in the specified region are changed: |
| 261 | /// subsequent accesses of pages in the range will succeed, |
| 262 | /// but will result in either repopulating the memory contents |
| 263 | /// from the up-to-date contents of the underlying mapped file |
| 264 | /// (for shared file mappings, shared anonymous mappings, and |
| 265 | /// shmem-based techniques such as System V shared memory |
| 266 | /// segments) or zero-fill-on-demand pages for anonymous |
| 267 | /// private mappings. |
| 268 | /// |
| 269 | /// Note that, when applied to shared mappings, MADV_DONTNEED |
| 270 | /// might not lead to immediate freeing of the pages in the |
| 271 | /// range. The kernel is free to delay freeing the pages |
| 272 | /// until an appropriate moment. The resident set size (RSS) |
| 273 | /// of the calling process will be immediately reduced |
| 274 | /// however. |
| 275 | /// |
| 276 | /// **MADV_DONTNEED** cannot be applied to locked pages, Huge TLB |
| 277 | /// pages, or VM_PFNMAP pages. (Pages marked with the kernel- |
| 278 | /// internal VM_PFNMAP flag are special memory areas that are |
| 279 | /// not managed by the virtual memory subsystem. Such pages |
| 280 | /// are typically created by device drivers that map the pages |
| 281 | /// into user space.) |
| 282 | /// |
| 283 | /// # Safety |
| 284 | /// |
| 285 | /// Using the returned value with conceptually write to the |
| 286 | /// mapped pages, i.e. borrowing the mapping when the pages |
| 287 | /// are freed results in undefined behaviour. |
| 288 | DontNeed = libc::MADV_DONTNEED, |
| 289 | |
| 290 | // |
| 291 | // The rest are Linux-specific |
| 292 | // |
| 293 | /// **MADV_FREE** - Linux (since Linux 4.5) and Darwin |
| 294 | /// |
| 295 | /// The application no longer requires the pages in the range |
| 296 | /// specified by addr and len. The kernel can thus free these |
| 297 | /// pages, but the freeing could be delayed until memory |
| 298 | /// pressure occurs. For each of the pages that has been |
| 299 | /// marked to be freed but has not yet been freed, the free |
| 300 | /// operation will be canceled if the caller writes into the |
| 301 | /// page. After a successful MADV_FREE operation, any stale |
| 302 | /// data (i.e., dirty, unwritten pages) will be lost when the |
| 303 | /// kernel frees the pages. However, subsequent writes to |
| 304 | /// pages in the range will succeed and then kernel cannot |
| 305 | /// free those dirtied pages, so that the caller can always |
| 306 | /// see just written data. If there is no subsequent write, |
| 307 | /// the kernel can free the pages at any time. Once pages in |
| 308 | /// the range have been freed, the caller will see zero-fill- |
| 309 | /// on-demand pages upon subsequent page references. |
| 310 | /// |
| 311 | /// The MADV_FREE operation can be applied only to private |
| 312 | /// anonymous pages (see mmap(2)). In Linux before version |
| 313 | /// 4.12, when freeing pages on a swapless system, the pages |
| 314 | /// in the given range are freed instantly, regardless of |
| 315 | /// memory pressure. |
| 316 | /// |
| 317 | /// # Safety |
| 318 | /// |
| 319 | /// Using the returned value with conceptually write to the |
| 320 | /// mapped pages, i.e. borrowing the mapping while the pages |
| 321 | /// are still being freed results in undefined behaviour. |
| 322 | #[cfg (any(target_os = "linux" , target_os = "macos" , target_os = "ios" ))] |
| 323 | Free = libc::MADV_FREE, |
| 324 | |
| 325 | /// **MADV_REMOVE** - Linux only (since Linux 2.6.16) |
| 326 | /// |
| 327 | /// Free up a given range of pages and its associated backing |
| 328 | /// store. This is equivalent to punching a hole in the |
| 329 | /// corresponding byte range of the backing store (see |
| 330 | /// fallocate(2)). Subsequent accesses in the specified |
| 331 | /// address range will see bytes containing zero. |
| 332 | /// |
| 333 | /// The specified address range must be mapped shared and |
| 334 | /// writable. This flag cannot be applied to locked pages, |
| 335 | /// Huge TLB pages, or VM_PFNMAP pages. |
| 336 | /// |
| 337 | /// In the initial implementation, only tmpfs(5) was supported |
| 338 | /// **MADV_REMOVE**; but since Linux 3.5, any filesystem which |
| 339 | /// supports the fallocate(2) FALLOC_FL_PUNCH_HOLE mode also |
| 340 | /// supports MADV_REMOVE. Hugetlbfs fails with the error |
| 341 | /// EINVAL and other filesystems fail with the error |
| 342 | /// EOPNOTSUPP. |
| 343 | /// |
| 344 | /// # Safety |
| 345 | /// |
| 346 | /// Using the returned value with conceptually write to the |
| 347 | /// mapped pages, i.e. borrowing the mapping when the pages |
| 348 | /// are freed results in undefined behaviour. |
| 349 | #[cfg (target_os = "linux" )] |
| 350 | Remove = libc::MADV_REMOVE, |
| 351 | |
| 352 | /// **MADV_FREE_REUSABLE** - Darwin only |
| 353 | /// |
| 354 | /// Behaves like **MADV_FREE**, but the freed pages are accounted for in the RSS of the process. |
| 355 | /// |
| 356 | /// # Safety |
| 357 | /// |
| 358 | /// Using the returned value with conceptually write to the |
| 359 | /// mapped pages, i.e. borrowing the mapping while the pages |
| 360 | /// are still being freed results in undefined behaviour. |
| 361 | #[cfg (any(target_os = "macos" , target_os = "ios" ))] |
| 362 | FreeReusable = libc::MADV_FREE_REUSABLE, |
| 363 | |
| 364 | /// **MADV_FREE_REUSE** - Darwin only |
| 365 | /// |
| 366 | /// Marks a memory region previously freed by **MADV_FREE_REUSABLE** as non-reusable, accounts |
| 367 | /// for the pages in the RSS of the process. Pages that have been freed will be replaced by |
| 368 | /// zero-filled pages on demand, other pages will be left as is. |
| 369 | /// |
| 370 | /// # Safety |
| 371 | /// |
| 372 | /// Using the returned value with conceptually write to the |
| 373 | /// mapped pages, i.e. borrowing the mapping while the pages |
| 374 | /// are still being freed results in undefined behaviour. |
| 375 | #[cfg (any(target_os = "macos" , target_os = "ios" ))] |
| 376 | FreeReuse = libc::MADV_FREE_REUSE, |
| 377 | } |
| 378 | |
| 379 | // Future expansion: |
| 380 | // MADV_SOFT_OFFLINE (since Linux 2.6.33) |
| 381 | // MADV_WIPEONFORK (since Linux 4.14) |
| 382 | // MADV_KEEPONFORK (since Linux 4.14) |
| 383 | // MADV_COLD (since Linux 5.4) |
| 384 | // MADV_PAGEOUT (since Linux 5.4) |
| 385 | |
| 386 | #[cfg (target_os = "linux" )] |
| 387 | impl Advice { |
| 388 | /// Performs a runtime check if this advice is supported by the kernel. |
| 389 | /// Only supported on Linux. See the [`madvise(2)`] man page. |
| 390 | /// |
| 391 | /// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html#VERSIONS |
| 392 | pub fn is_supported(self) -> bool { |
| 393 | (unsafe { libc::madvise(addr:std::ptr::null_mut(), len:0, self as libc::c_int) }) == 0 |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | #[cfg (target_os = "linux" )] |
| 398 | impl UncheckedAdvice { |
| 399 | /// Performs a runtime check if this advice is supported by the kernel. |
| 400 | /// Only supported on Linux. See the [`madvise(2)`] man page. |
| 401 | /// |
| 402 | /// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html#VERSIONS |
| 403 | pub fn is_supported(self) -> bool { |
| 404 | (unsafe { libc::madvise(addr:std::ptr::null_mut(), len:0, self as libc::c_int) }) == 0 |
| 405 | } |
| 406 | } |
| 407 | |
| 408 | #[cfg (test)] |
| 409 | mod tests { |
| 410 | #[cfg (target_os = "linux" )] |
| 411 | #[test ] |
| 412 | fn test_is_supported() { |
| 413 | use super::*; |
| 414 | |
| 415 | assert!(Advice::Normal.is_supported()); |
| 416 | assert!(Advice::Random.is_supported()); |
| 417 | assert!(Advice::Sequential.is_supported()); |
| 418 | assert!(Advice::WillNeed.is_supported()); |
| 419 | |
| 420 | assert!(UncheckedAdvice::DontNeed.is_supported()); |
| 421 | } |
| 422 | } |
| 423 | |