1 | //! This module contains the entry points for `slice::sort_unstable`. |
2 | |
3 | use crate::mem::SizedTypeProperties; |
4 | #[cfg (not(any(feature = "optimize_for_size" , target_pointer_width = "16" )))] |
5 | use crate::slice::sort::shared::find_existing_run; |
6 | #[cfg (not(any(feature = "optimize_for_size" , target_pointer_width = "16" )))] |
7 | use crate::slice::sort::shared::smallsort::insertion_sort_shift_left; |
8 | use crate::{cfg_match, intrinsics}; |
9 | |
10 | pub(crate) mod heapsort; |
11 | pub(crate) mod quicksort; |
12 | |
13 | /// Unstable sort called ipnsort by Lukas Bergdoll and Orson Peters. |
14 | /// Design document: |
15 | /// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/ipnsort_introduction/text.md> |
16 | /// |
17 | /// Upholds all safety properties outlined here: |
18 | /// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/sort_safety/text.md> |
19 | #[inline (always)] |
20 | pub fn sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) { |
21 | // Arrays of zero-sized types are always all-equal, and thus sorted. |
22 | if T::IS_ZST { |
23 | return; |
24 | } |
25 | |
26 | // Instrumenting the standard library showed that 90+% of the calls to sort |
27 | // by rustc are either of size 0 or 1. |
28 | let len = v.len(); |
29 | if intrinsics::likely(len < 2) { |
30 | return; |
31 | } |
32 | |
33 | cfg_match! { |
34 | any(feature = "optimize_for_size" , target_pointer_width = "16" ) => { |
35 | heapsort::heapsort(v, is_less); |
36 | } |
37 | _ => { |
38 | // More advanced sorting methods than insertion sort are faster if called in |
39 | // a hot loop for small inputs, but for general-purpose code the small |
40 | // binary size of insertion sort is more important. The instruction cache in |
41 | // modern processors is very valuable, and for a single sort call in general |
42 | // purpose code any gains from an advanced method are cancelled by i-cache |
43 | // misses during the sort, and thrashing the i-cache for surrounding code. |
44 | const MAX_LEN_ALWAYS_INSERTION_SORT: usize = 20; |
45 | if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) { |
46 | insertion_sort_shift_left(v, 1, is_less); |
47 | return; |
48 | } |
49 | |
50 | ipnsort(v, is_less); |
51 | } |
52 | } |
53 | } |
54 | |
55 | /// See [`sort`] |
56 | /// |
57 | /// Deliberately don't inline the main sorting routine entrypoint to ensure the |
58 | /// inlined insertion sort i-cache footprint remains minimal. |
59 | #[cfg (not(any(feature = "optimize_for_size" , target_pointer_width = "16" )))] |
60 | #[inline (never)] |
61 | fn ipnsort<T, F>(v: &mut [T], is_less: &mut F) |
62 | where |
63 | F: FnMut(&T, &T) -> bool, |
64 | { |
65 | let len: usize = v.len(); |
66 | let (run_len: usize, was_reversed: bool) = find_existing_run(v, is_less); |
67 | |
68 | // SAFETY: find_existing_run promises to return a valid run_len. |
69 | unsafe { intrinsics::assume(run_len <= len) }; |
70 | |
71 | if run_len == len { |
72 | if was_reversed { |
73 | v.reverse(); |
74 | } |
75 | |
76 | // It would be possible to a do in-place merging here for a long existing streak. But that |
77 | // makes the implementation a lot bigger, users can use `slice::sort` for that use-case. |
78 | return; |
79 | } |
80 | |
81 | // Limit the number of imbalanced partitions to `2 * floor(log2(len))`. |
82 | // The binary OR by one is used to eliminate the zero-check in the logarithm. |
83 | let limit: u32 = 2 * (len | 1).ilog2(); |
84 | crate::slice::sort::unstable::quicksort::quicksort(v, ancestor_pivot:None, limit, is_less); |
85 | } |
86 | |