mod.rs source code [crates/core/src/slice/sort/stable/mod.rs]

1	//! This module contains the entry points for `slice::sort`.
2
3	#[cfg(not(any(feature = "optimize_for_size", target_pointer_width = "16")))]
4	use crate::cmp;
5	use crate::mem::{MaybeUninit, SizedTypeProperties};
6	#[cfg(not(any(feature = "optimize_for_size", target_pointer_width = "16")))]
7	use crate::slice::sort::shared::smallsort::{
8	SMALL_SORT_GENERAL_SCRATCH_LEN, StableSmallSortTypeImpl, insertion_sort_shift_left,
9	};
10	use crate::{cfg_select, intrinsics};
11
12	pub(crate) mod merge;
13
14	#[cfg(not(any(feature = "optimize_for_size", target_pointer_width = "16")))]
15	pub(crate) mod drift;
16	#[cfg(not(any(feature = "optimize_for_size", target_pointer_width = "16")))]
17	pub(crate) mod quicksort;
18
19	#[cfg(any(feature = "optimize_for_size", target_pointer_width = "16"))]
20	pub(crate) mod tiny;
21
22	/// Stable sort called driftsort by Orson Peters and Lukas Bergdoll.
23	/// Design document:
24	/// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/driftsort_introduction/text.md>
25	///
26	/// Upholds all safety properties outlined here:
27	/// <https://github.com/Voultapher/sort-research-rs/blob/main/writeup/sort_safety/text.md>
28	#[inline(always)]
29	pub fn sort<T, F: FnMut(&T, &T) -> bool, BufT: BufGuard<T>>(v: &mut [T], is_less: &mut F) {
30	// Arrays of zero-sized types are always all-equal, and thus sorted.
31	if T::IS_ZST {
32	return;
33	}
34
35	// Instrumenting the standard library showed that 90+% of the calls to sort
36	// by rustc are either of size 0 or 1.
37	let len = v.len();
38	if intrinsics::likely(len < `2`) {
39	return;
40	}
41
42	cfg_select! {
43	any(feature = "optimize_for_size", target_pointer_width = "16") => {
44	// Unlike driftsort, mergesort only requires len / 2,
45	// not len - len / 2.
46	let alloc_len = len / `2`;
47
48	cfg_select! {
49	target_pointer_width = "16" => {
50	let mut heap_buf = BufT::with_capacity(alloc_len);
51	let scratch = heap_buf.as_uninit_slice_mut();
52	}
53	_ => {
54	// For small inputs 4KiB of stack storage suffices, which allows us to avoid
55	// calling the (de-)allocator. Benchmarks showed this was quite beneficial.
56	let mut stack_buf = AlignedStorage::<T, `4096`>::new();
57	let stack_scratch = stack_buf.as_uninit_slice_mut();
58	let mut heap_buf;
59	let scratch = if stack_scratch.len() >= alloc_len {
60	stack_scratch
61	} else {
62	heap_buf = BufT::with_capacity(alloc_len);
63	heap_buf.as_uninit_slice_mut()
64	};
65	}
66	}
67
68	tiny::mergesort(v, scratch, is_less);
69	}
70	_ => {
71	// More advanced sorting methods than insertion sort are faster if called in
72	// a hot loop for small inputs, but for general-purpose code the small
73	// binary size of insertion sort is more important. The instruction cache in
74	// modern processors is very valuable, and for a single sort call in general
75	// purpose code any gains from an advanced method are cancelled by i-cache
76	// misses during the sort, and thrashing the i-cache for surrounding code.
77	const MAX_LEN_ALWAYS_INSERTION_SORT: usize = `20`;
78	if intrinsics::likely(len <= MAX_LEN_ALWAYS_INSERTION_SORT) {
79	insertion_sort_shift_left(v, `1`, is_less);
80	return;
81	}
82
83	driftsort_main::<T, F, BufT>(v, is_less);
84	}
85	}
86	}
87
88	/// See [`sort`]
89	///
90	/// Deliberately don't inline the main sorting routine entrypoint to ensure the
91	/// inlined insertion sort i-cache footprint remains minimal.
92	#[cfg(not(any(feature = "optimize_for_size", target_pointer_width = "16")))]
93	#[inline(never)]
94	fn driftsort_main<T, F: FnMut(&T, &T) -> bool, BufT: BufGuard<T>>(v: &mut [T], is_less: &mut F) {
95	// By allocating n elements of memory we can ensure the entire input can
96	// be sorted using stable quicksort, which allows better performance on
97	// random and low-cardinality distributions. However, we still want to
98	// reduce our memory usage to n - n / 2 for large inputs. We do this by scaling
99	// our allocation as max(n - n / 2, min(n, 8MB)), ensuring we scale like n for
100	// small inputs and n - n / 2 for large inputs, without a sudden drop off. We
101	// also need to ensure our alloc >= SMALL_SORT_GENERAL_SCRATCH_LEN, as the
102	// small-sort always needs this much memory.
103	//
104	// driftsort will produce unsorted runs of up to min_good_run_len, which
105	// is at most len - len / 2.
106	// Unsorted runs need to be processed by quicksort, which requires as much
107	// scratch space as the run length, therefore the scratch space must be at
108	// least len - len / 2.
109	// If min_good_run_len is ever modified, this code must be updated to allocate
110	// the correct scratch size for it.
111	const MAX_FULL_ALLOC_BYTES: usize = `8_000_000`; // 8MB
112	let max_full_alloc = MAX_FULL_ALLOC_BYTES / size_of::<T>();
113	let len = v.len();
114	let alloc_len = cmp::max(
115	cmp::max(len - len / `2`, cmp::min(len, max_full_alloc)),
116	SMALL_SORT_GENERAL_SCRATCH_LEN,
117	);
118
119	// For small inputs 4KiB of stack storage suffices, which allows us to avoid
120	// calling the (de-)allocator. Benchmarks showed this was quite beneficial.
121	let mut stack_buf = AlignedStorage::<T, `4096`>::new();
122	let stack_scratch = stack_buf.as_uninit_slice_mut();
123	let mut heap_buf;
124	let scratch = if stack_scratch.len() >= alloc_len {
125	stack_scratch
126	} else {
127	heap_buf = BufT::with_capacity(alloc_len);
128	heap_buf.as_uninit_slice_mut()
129	};
130
131	// For small inputs using quicksort is not yet beneficial, and a single
132	// small-sort or two small-sorts plus a single merge outperforms it, so use
133	// eager mode.
134	let eager_sort = len <= T::small_sort_threshold() * `2`;
135	crate::slice::sort::stable::drift::sort(v, scratch, eager_sort, is_less);
136	}
137
138	#[doc(hidden)]
139	/// Abstracts owned memory buffer, so that sort code can live in core where no allocation is
140	/// possible. This trait can then be implemented in a place that has access to allocation.
141	pub trait BufGuard<T> {
142	/// Creates new buffer that holds at least `capacity` memory.
143	fn with_capacity(capacity: usize) -> Self;
144	/// Returns mutable access to uninitialized memory owned by the buffer.
145	fn as_uninit_slice_mut(&mut self) -> &mut [MaybeUninit<T>];
146	}
147
148	#[repr(C)]
149	struct AlignedStorage<T, const N: usize> {
150	_align: [T; `0`],
151	storage: [MaybeUninit<u8>; N],
152	}
153
154	impl<T, const N: usize> AlignedStorage<T, N> {
155	fn new() -> Self {
156	Self { _align: [], storage: [const { MaybeUninit::uninit() }; N] }
157	}
158
159	fn as_uninit_slice_mut(&mut self) -> &mut [MaybeUninit<T>] {
160	let len: usize = N / size_of::<T>();
161
162	// SAFETY: `_align` ensures we are correctly aligned.
163	unsafe { core::slice::from_raw_parts_mut(self.storage.as_mut_ptr().cast(), len) }
164	}
165	}
166

Provided by KDAB

Definitions

Learn Rust with the experts

Find out more