| 1 | //! This module contains the entry points for `slice::sort_unstable`. |
| 2 | |
| 3 | use crate::mem::SizedTypeProperties; |
| 4 | #[cfg (not(any(feature = "optimize_for_size" , target_pointer_width = "16" )))] |
| 5 | use crate::slice::sort::shared::find_existing_run; |
| 6 | #[cfg (not(any(feature = "optimize_for_size" , target_pointer_width = "16" )))] |
| 7 | use crate::slice::sort::shared::smallsort::insertion_sort_shift_left; |
| 8 | use crate::{cfg_select, intrinsics}; |
| 9 | |
| 10 | pub(crate) mod heapsort; |
| 11 | pub(crate) mod quicksort; |
| 12 | |
| 13 | /// Unstable sort called ipnsort by Lukas Bergdoll and Orson Peters. |
| 14 | /// Design document: |
| 15 | /// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/ipnsort_introduction/text.md> |
| 16 | /// |
| 17 | /// Upholds all safety properties outlined here: |
| 18 | /// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/sort_safety/text.md> |
| 19 | #[inline (always)] |
| 20 | pub fn sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) { |
| 21 | // Arrays of zero-sized types are always all-equal, and thus sorted. |
| 22 | if T::IS_ZST { |
| 23 | return; |
| 24 | } |
| 25 | |
| 26 | // Instrumenting the standard library showed that 90+% of the calls to sort |
| 27 | // by rustc are either of size 0 or 1. |
| 28 | let len = v.len(); |
| 29 | if intrinsics::likely(len < 2) { |
| 30 | return; |
| 31 | } |
| 32 | |
| 33 | cfg_select! { |
| 34 | any(feature = "optimize_for_size" , target_pointer_width = "16" ) => { |
| 35 | heapsort::heapsort(v, is_less); |
| 36 | } |
| 37 | _ => { |
| 38 | // More advanced sorting methods than insertion sort are faster if called in |
| 39 | // a hot loop for small inputs, but for general-purpose code the small |
| 40 | // binary size of insertion sort is more important. The instruction cache in |
| 41 | // modern processors is very valuable, and for a single sort call in general |
| 42 | // purpose code any gains from an advanced method are cancelled by i-cache |
| 43 | // misses during the sort, and thrashing the i-cache for surrounding code. |
| 44 | const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20; |
| 45 | if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) { |
| 46 | insertion_sort_shift_left(v, 1, is_less); |
| 47 | return; |
| 48 | } |
| 49 | |
| 50 | ipnsort(v, is_less); |
| 51 | } |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | /// See [`sort`] |
| 56 | /// |
| 57 | /// Deliberately don't inline the main sorting routine entrypoint to ensure the |
| 58 | /// inlined insertion sort i-cache footprint remains minimal. |
| 59 | #[cfg (not(any(feature = "optimize_for_size" , target_pointer_width = "16" )))] |
| 60 | #[inline (never)] |
| 61 | fn ipnsort<T, F>(v: &mut [T], is_less: &mut F) |
| 62 | where |
| 63 | F: FnMut(&T, &T) -> bool, |
| 64 | { |
| 65 | let len: usize = v.len(); |
| 66 | let (run_len: usize, was_reversed: bool) = find_existing_run(v, is_less); |
| 67 | |
| 68 | // SAFETY: find_existing_run promises to return a valid run_len. |
| 69 | unsafe { intrinsics::assume(run_len <= len) }; |
| 70 | |
| 71 | if run_len == len { |
| 72 | if was_reversed { |
| 73 | v.reverse(); |
| 74 | } |
| 75 | |
| 76 | // It would be possible to a do in-place merging here for a long existing streak. But that |
| 77 | // makes the implementation a lot bigger, users can use `slice::sort` for that use-case. |
| 78 | return; |
| 79 | } |
| 80 | |
| 81 | // Limit the number of imbalanced partitions to `2 * floor(log2(len))`. |
| 82 | // The binary OR by one is used to eliminate the zero-check in the logarithm. |
| 83 | let limit: u32 = 2 * (len | 1).ilog2(); |
| 84 | crate::slice::sort::unstable::quicksort::quicksort(v, ancestor_pivot:None, limit, is_less); |
| 85 | } |
| 86 | |