quicksort.rs source code [crates/rayon/src/slice/quicksort.rs]

1	//! Parallel quicksort.
2	//!
3	//! This implementation is copied verbatim from `std::slice::sort_unstable` and then parallelized.
4	//! The only difference from the original is that calls to `recurse` are executed in parallel using
5	//! `rayon_core::join`.
6
7	use std::marker::PhantomData;
8	use std::mem::{self, MaybeUninit};
9	use std::ptr;
10
11	/// When dropped, copies from `src` into `dest`.
12	#[must_use]
13	struct CopyOnDrop<'a, T> {
14	src: *const T,
15	dest: *mut T,
16	/// `src` is often a local pointer here, make sure we have appropriate
17	/// PhantomData so that dropck can protect us.
18	marker: PhantomData<&'a mut T>,
19	}
20
21	impl<'a, T> CopyOnDrop<'a, T> {
22	/// Construct from a source pointer and a destination
23	/// Assumes dest lives longer than src, since there is no easy way to
24	/// copy down lifetime information from another pointer
25	unsafe fn new(src: &'a T, dest: *mut T) -> Self {
26	CopyOnDrop {
27	src,
28	dest,
29	marker: PhantomData,
30	}
31	}
32	}
33
34	impl<T> Drop for CopyOnDrop<'_, T> {
35	fn drop(&mut self) {
36	// SAFETY: This is a helper class.
37	// Please refer to its usage for correctness.
38	// Namely, one must be sure that `src` and `dst` does not overlap as required by `ptr::copy_nonoverlapping`.
39	unsafe {
40	ptr::copy_nonoverlapping(self.src, self.dest, count:`1`);
41	}
42	}
43	}
44
45	/// Shifts the first element to the right until it encounters a greater or equal element.
46	fn shift_head<T, F>(v: &mut [T], is_less: &F)
47	where
48	F: Fn(&T, &T) -> bool,
49	{
50	let len = v.len();
51	// SAFETY: The unsafe operations below involves indexing without a bounds check (by offsetting a
52	// pointer) and copying memory (`ptr::copy_nonoverlapping`).
53	//
54	// a. Indexing:
55	// 1. We checked the size of the array to >=2.
56	// 2. All the indexing that we will do is always between {0 <= index < len} at most.
57	//
58	// b. Memory copying
59	// 1. We are obtaining pointers to references which are guaranteed to be valid.
60	// 2. They cannot overlap because we obtain pointers to difference indices of the slice.
61	// Namely, `i` and `i-1`.
62	// 3. If the slice is properly aligned, the elements are properly aligned.
63	// It is the caller's responsibility to make sure the slice is properly aligned.
64	//
65	// See comments below for further detail.
66	unsafe {
67	// If the first two elements are out-of-order...
68	if len >= `2` && is_less(v.get_unchecked(`1`), v.get_unchecked(`0`)) {
69	// Read the first element into a stack-allocated variable. If a following comparison
70	// operation panics, `hole` will get dropped and automatically write the element back
71	// into the slice.
72	let tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(`0`)));
73	let v = v.as_mut_ptr();
74	let mut hole = CopyOnDrop::new(&*tmp, v.add(`1`));
75	ptr::copy_nonoverlapping(v.add(`1`), v.add(`0`), `1`);
76
77	for i in `2`..len {
78	if !is_less(&v.add(i), &tmp) {
79	break;
80	}
81
82	// Move `i`-th element one place to the left, thus shifting the hole to the right.
83	ptr::copy_nonoverlapping(v.add(i), v.add(i - `1`), `1`);
84	hole.dest = v.add(i);
85	}
86	// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
87	}
88	}
89	}
90
91	/// Shifts the last element to the left until it encounters a smaller or equal element.
92	fn shift_tail<T, F>(v: &mut [T], is_less: &F)
93	where
94	F: Fn(&T, &T) -> bool,
95	{
96	let len = v.len();
97	// SAFETY: The unsafe operations below involves indexing without a bound check (by offsetting a
98	// pointer) and copying memory (`ptr::copy_nonoverlapping`).
99	//
100	// a. Indexing:
101	// 1. We checked the size of the array to >= 2.
102	// 2. All the indexing that we will do is always between `0 <= index < len-1` at most.
103	//
104	// b. Memory copying
105	// 1. We are obtaining pointers to references which are guaranteed to be valid.
106	// 2. They cannot overlap because we obtain pointers to difference indices of the slice.
107	// Namely, `i` and `i+1`.
108	// 3. If the slice is properly aligned, the elements are properly aligned.
109	// It is the caller's responsibility to make sure the slice is properly aligned.
110	//
111	// See comments below for further detail.
112	unsafe {
113	// If the last two elements are out-of-order...
114	if len >= `2` && is_less(v.get_unchecked(len - `1`), v.get_unchecked(len - `2`)) {
115	// Read the last element into a stack-allocated variable. If a following comparison
116	// operation panics, `hole` will get dropped and automatically write the element back
117	// into the slice.
118	let tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(len - `1`)));
119	let v = v.as_mut_ptr();
120	let mut hole = CopyOnDrop::new(&*tmp, v.add(len - `2`));
121	ptr::copy_nonoverlapping(v.add(len - `2`), v.add(len - `1`), `1`);
122
123	for i in (`0`..len - `2`).rev() {
124	if !is_less(&tmp, &v.add(i)) {
125	break;
126	}
127
128	// Move `i`-th element one place to the right, thus shifting the hole to the left.
129	ptr::copy_nonoverlapping(v.add(i), v.add(i + `1`), `1`);
130	hole.dest = v.add(i);
131	}
132	// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
133	}
134	}
135	}
136
137	/// Partially sorts a slice by shifting several out-of-order elements around.
138	///
139	/// Returns `true` if the slice is sorted at the end. This function is O(n) worst-case.
140	#[cold]
141	fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &F) -> bool
142	where
143	F: Fn(&T, &T) -> bool,
144	{
145	// Maximum number of adjacent out-of-order pairs that will get shifted.
146	const MAX_STEPS: usize = `5`;
147	// If the slice is shorter than this, don't shift any elements.
148	const SHORTEST_SHIFTING: usize = `50`;
149
150	let len = v.len();
151	let mut i = `1`;
152
153	for _ in `0`..MAX_STEPS {
154	// SAFETY: We already explicitly did the bound checking with `i < len`.
155	// All our subsequent indexing is only in the range `0 <= index < len`
156	unsafe {
157	// Find the next pair of adjacent out-of-order elements.
158	while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - `1`)) {
159	i += `1`;
160	}
161	}
162
163	// Are we done?
164	if i == len {
165	return `true`;
166	}
167
168	// Don't shift elements on short arrays, that has a performance cost.
169	if len < SHORTEST_SHIFTING {
170	return `false`;
171	}
172
173	// Swap the found pair of elements. This puts them in correct order.
174	v.swap(i - `1`, i);
175
176	// Shift the smaller element to the left.
177	shift_tail(&mut v[..i], is_less);
178	// Shift the greater element to the right.
179	shift_head(&mut v[i..], is_less);
180	}
181
182	// Didn't manage to sort the slice in the limited number of steps.
183	`false`
184	}
185
186	/// Sorts a slice using insertion sort, which is O(n^2) worst-case.
187	fn insertion_sort<T, F>(v: &mut [T], is_less: &F)
188	where
189	F: Fn(&T, &T) -> bool,
190	{
191	for i: usize in `1`..v.len() {
192	shift_tail(&mut v[..i + `1`], is_less);
193	}
194	}
195
196	/// Sorts `v` using heapsort, which guarantees O(n* \* log(n)) worst-case.*
197	#[cold]
198	fn heapsort<T, F>(v: &mut [T], is_less: &F)
199	where
200	F: Fn(&T, &T) -> bool,
201	{
202	// This binary heap respects the invariant `parent >= child`.
203	let sift_down = \|v: &mut [T], mut node\| {
204	loop {
205	// Children of `node`.
206	let mut child = `2` * node + `1`;
207	if child >= v.len() {
208	break;
209	}
210
211	// Choose the greater child.
212	if child + `1` < v.len() && is_less(&v[child], &v[child + `1`]) {
213	child += `1`;
214	}
215
216	// Stop if the invariant holds at `node`.
217	if !is_less(&v[node], &v[child]) {
218	break;
219	}
220
221	// Swap `node` with the greater child, move one step down, and continue sifting.
222	v.swap(node, child);
223	node = child;
224	}
225	};
226
227	// Build the heap in linear time.
228	for i in (`0`..v.len() / `2`).rev() {
229	sift_down(v, i);
230	}
231
232	// Pop maximal elements from the heap.
233	for i in (`1`..v.len()).rev() {
234	v.swap(`0`, i);
235	sift_down(&mut v[..i], `0`);
236	}
237	}
238
239	/// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal
240	/// to `pivot`.
241	///
242	/// Returns the number of elements smaller than `pivot`.
243	///
244	/// Partitioning is performed block-by-block in order to minimize the cost of branching operations.
245	/// This idea is presented in the [BlockQuicksort][pdf] paper.
246	///
247	/// [pdf]: https://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf
248	fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &F) -> usize
249	where
250	F: Fn(&T, &T) -> bool,
251	{
252	// Number of elements in a typical block.
253	const BLOCK: usize = `128`;
254
255	// The partitioning algorithm repeats the following steps until completion:
256	//
257	// 1. Trace a block from the left side to identify elements greater than or equal to the pivot.
258	// 2. Trace a block from the right side to identify elements smaller than the pivot.
259	// 3. Exchange the identified elements between the left and right side.
260	//
261	// We keep the following variables for a block of elements:
262	//
263	// 1. `block` - Number of elements in the block.
264	// 2. `start` - Start pointer into the `offsets` array.
265	// 3. `end` - End pointer into the `offsets` array.
266	// 4. `offsets - Indices of out-of-order elements within the block.
267
268	// The current block on the left side (from `l` to `l.add(block_l)`).
269	let mut l = v.as_mut_ptr();
270	let mut block_l = BLOCK;
271	let mut start_l = ptr::null_mut();
272	let mut end_l = ptr::null_mut();
273	let mut offsets_l = [MaybeUninit::<u8>::uninit(); BLOCK];
274
275	// The current block on the right side (from `r.sub(block_r)` to `r`).
276	// SAFETY: The documentation for .add() specifically mention that `vec.as_ptr().add(vec.len())` is always safe`
277	let mut r = unsafe { l.add(v.len()) };
278	let mut block_r = BLOCK;
279	let mut start_r = ptr::null_mut();
280	let mut end_r = ptr::null_mut();
281	let mut offsets_r = [MaybeUninit::<u8>::uninit(); BLOCK];
282
283	// FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 BLOCK)` rather*
284	// than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient.
285
286	// Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
287	fn width<T>(l: *mut T, r: *mut T) -> usize {
288	assert!(mem::size_of::<T>() > `0`);
289	// FIXME: this should likely* use `offset_from`, but more*
290	// investigation is needed (including running tests in miri).
291	// TODO unstable: (r.addr() - l.addr()) / mem::size_of::<T>()
292	(r as usize - l as usize) / mem::size_of::<T>()
293	}
294
295	loop {
296	// We are done with partitioning block-by-block when `l` and `r` get very close. Then we do
297	// some patch-up work in order to partition the remaining elements in between.
298	let is_done = width(l, r) <= `2` * BLOCK;
299
300	if is_done {
301	// Number of remaining elements (still not compared to the pivot).
302	let mut rem = width(l, r);
303	if start_l < end_l \|\| start_r < end_r {
304	rem -= BLOCK;
305	}
306
307	// Adjust block sizes so that the left and right block don't overlap, but get perfectly
308	// aligned to cover the whole remaining gap.
309	if start_l < end_l {
310	block_r = rem;
311	} else if start_r < end_r {
312	block_l = rem;
313	} else {
314	// There were the same number of elements to switch on both blocks during the last
315	// iteration, so there are no remaining elements on either block. Cover the remaining
316	// items with roughly equally-sized blocks.
317	block_l = rem / `2`;
318	block_r = rem - block_l;
319	}
320	debug_assert!(block_l <= BLOCK && block_r <= BLOCK);
321	debug_assert!(width(l, r) == block_l + block_r);
322	}
323
324	if start_l == end_l {
325	// Trace `block_l` elements from the left side.
326	// TODO unstable: start_l = MaybeUninit::slice_as_mut_ptr(&mut offsets_l);
327	start_l = offsets_l.as_mut_ptr() as *mut u8;
328	end_l = start_l;
329	let mut elem = l;
330
331	for i in `0`..block_l {
332	// SAFETY: The unsafety operations below involve the usage of the `offset`.
333	// According to the conditions required by the function, we satisfy them because:
334	// 1. `offsets_l` is stack-allocated, and thus considered separate allocated object.
335	// 2. The function `is_less` returns a `bool`.
336	// Casting a `bool` will never overflow `isize`.
337	// 3. We have guaranteed that `block_l` will be `<= BLOCK`.
338	// Plus, `end_l` was initially set to the begin pointer of `offsets_` which was declared on the stack.
339	// Thus, we know that even in the worst case (all invocations of `is_less` returns false) we will only be at most 1 byte pass the end.
340	// Another unsafety operation here is dereferencing `elem`.
341	// However, `elem` was initially the begin pointer to the slice which is always valid.
342	unsafe {
343	// Branchless comparison.
344	*end_l = i as u8;
345	end_l = end_l.offset(!is_less(&elem, pivot) as isize*);
346	elem = elem.offset(`1`);
347	}
348	}
349	}
350
351	if start_r == end_r {
352	// Trace `block_r` elements from the right side.
353	// TODO unstable: start_r = MaybeUninit::slice_as_mut_ptr(&mut offsets_r);
354	start_r = offsets_r.as_mut_ptr() as *mut u8;
355	end_r = start_r;
356	let mut elem = r;
357
358	for i in `0`..block_r {
359	// SAFETY: The unsafety operations below involve the usage of the `offset`.
360	// According to the conditions required by the function, we satisfy them because:
361	// 1. `offsets_r` is stack-allocated, and thus considered separate allocated object.
362	// 2. The function `is_less` returns a `bool`.
363	// Casting a `bool` will never overflow `isize`.
364	// 3. We have guaranteed that `block_r` will be `<= BLOCK`.
365	// Plus, `end_r` was initially set to the begin pointer of `offsets_` which was declared on the stack.
366	// Thus, we know that even in the worst case (all invocations of `is_less` returns true) we will only be at most 1 byte pass the end.
367	// Another unsafety operation here is dereferencing `elem`.
368	// However, `elem` was initially `1 sizeof(T)` past the end and we decrement it by `1 * sizeof(T)` before accessing it.*
369	// Plus, `block_r` was asserted to be less than `BLOCK` and `elem` will therefore at most be pointing to the beginning of the slice.
370	unsafe {
371	// Branchless comparison.
372	elem = elem.offset(`-1`);
373	*end_r = i as u8;
374	end_r = end_r.offset(is_less(&elem, pivot) as isize*);
375	}
376	}
377	}
378
379	// Number of out-of-order elements to swap between the left and right side.
380	let count = Ord::min(width(start_l, end_l), width(start_r, end_r));
381
382	if count > `0` {
383	macro_rules! left {
384	() => {
385	l.offset(*start_l as isize)
386	};
387	}
388	macro_rules! right {
389	() => {
390	r.offset(-(*start_r as isize) - `1`)
391	};
392	}
393
394	// Instead of swapping one pair at the time, it is more efficient to perform a cyclic
395	// permutation. This is not strictly equivalent to swapping, but produces a similar
396	// result using fewer memory operations.
397
398	// SAFETY: The use of `ptr::read` is valid because there is at least one element in
399	// both `offsets_l` and `offsets_r`, so `left!` is a valid pointer to read from.
400	//
401	// The uses of `left!` involve calls to `offset` on `l`, which points to the
402	// beginning of `v`. All the offsets pointed-to by `start_l` are at most `block_l`, so
403	// these `offset` calls are safe as all reads are within the block. The same argument
404	// applies for the uses of `right!`.
405	//
406	// The calls to `start_l.offset` are valid because there are at most `count-1` of them,
407	// plus the final one at the end of the unsafe block, where `count` is the minimum number
408	// of collected offsets in `offsets_l` and `offsets_r`, so there is no risk of there not
409	// being enough elements. The same reasoning applies to the calls to `start_r.offset`.
410	//
411	// The calls to `copy_nonoverlapping` are safe because `left!` and `right!` are guaranteed
412	// not to overlap, and are valid because of the reasoning above.
413	unsafe {
414	let tmp = ptr::read(left!());
415	ptr::copy_nonoverlapping(right!(), left!(), `1`);
416
417	for _ in `1`..count {
418	start_l = start_l.offset(`1`);
419	ptr::copy_nonoverlapping(left!(), right!(), `1`);
420	start_r = start_r.offset(`1`);
421	ptr::copy_nonoverlapping(right!(), left!(), `1`);
422	}
423
424	ptr::copy_nonoverlapping(&tmp, right!(), `1`);
425	mem::forget(tmp);
426	start_l = start_l.offset(`1`);
427	start_r = start_r.offset(`1`);
428	}
429	}
430
431	if start_l == end_l {
432	// All out-of-order elements in the left block were moved. Move to the next block.
433
434	// block-width-guarantee
435	// SAFETY: if `!is_done` then the slice width is guaranteed to be at least `2BLOCK` wide. There*
436	// are at most `BLOCK` elements in `offsets_l` because of its size, so the `offset` operation is
437	// safe. Otherwise, the debug assertions in the `is_done` case guarantee that
438	// `width(l, r) == block_l + block_r`, namely, that the block sizes have been adjusted to account
439	// for the smaller number of remaining elements.
440	l = unsafe { l.add(block_l) };
441	}
442
443	if start_r == end_r {
444	// All out-of-order elements in the right block were moved. Move to the previous block.
445
446	// SAFETY: Same argument as [block-width-guarantee]. Either this is a full block `2BLOCK`-wide,*
447	// or `block_r` has been adjusted for the last handful of elements.
448	r = unsafe { r.offset(-(block_r as isize)) };
449	}
450
451	if is_done {
452	break;
453	}
454	}
455
456	// All that remains now is at most one block (either the left or the right) with out-of-order
457	// elements that need to be moved. Such remaining elements can be simply shifted to the end
458	// within their block.
459
460	if start_l < end_l {
461	// The left block remains.
462	// Move its remaining out-of-order elements to the far right.
463	debug_assert_eq!(width(l, r), block_l);
464	while start_l < end_l {
465	// remaining-elements-safety
466	// SAFETY: while the loop condition holds there are still elements in `offsets_l`, so it
467	// is safe to point `end_l` to the previous element.
468	//
469	// The `ptr::swap` is safe if both its arguments are valid for reads and writes:
470	// - Per the debug assert above, the distance between `l` and `r` is `block_l`
471	// elements, so there can be at most `block_l` remaining offsets between `start_l`
472	// and `end_l`. This means `r` will be moved at most `block_l` steps back, which
473	// makes the `r.offset` calls valid (at that point `l == r`).
474	// - `offsets_l` contains valid offsets into `v` collected during the partitioning of
475	// the last block, so the `l.offset` calls are valid.
476	unsafe {
477	end_l = end_l.offset(`-1`);
478	ptr::swap(l.offset(end_l as isize*), r.offset(`-1`));
479	r = r.offset(`-1`);
480	}
481	}
482	width(v.as_mut_ptr(), r)
483	} else if start_r < end_r {
484	// The right block remains.
485	// Move its remaining out-of-order elements to the far left.
486	debug_assert_eq!(width(l, r), block_r);
487	while start_r < end_r {
488	// SAFETY: See the reasoning in [remaining-elements-safety].
489	unsafe {
490	end_r = end_r.offset(`-1`);
491	ptr::swap(l, r.offset(-(end_r as isize*) - `1`));
492	l = l.offset(`1`);
493	}
494	}
495	width(v.as_mut_ptr(), l)
496	} else {
497	// Nothing else to do, we're done.
498	width(v.as_mut_ptr(), l)
499	}
500	}
501
502	/// Partitions `v` into elements smaller than `v[pivot]`, followed by elements greater than or
503	/// equal to `v[pivot]`.
504	///
505	/// Returns a tuple of:
506	///
507	/// 1. Number of elements smaller than `v[pivot]`.
508	/// 2. True if `v` was already partitioned.
509	fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &F) -> (usize, bool)
510	where
511	F: Fn(&T, &T) -> bool,
512	{
513	let (mid, was_partitioned) = {
514	// Place the pivot at the beginning of slice.
515	v.swap(`0`, pivot);
516	let (pivot, v) = v.split_at_mut(`1`);
517	let pivot = &mut pivot[`0`];
518
519	// Read the pivot into a stack-allocated variable for efficiency. If a following comparison
520	// operation panics, the pivot will be automatically written back into the slice.
521
522	// SAFETY: `pivot` is a reference to the first element of `v`, so `ptr::read` is safe.
523	let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
524	let _pivot_guard = unsafe { CopyOnDrop::new(&*tmp, pivot) };
525	let pivot = &*tmp;
526
527	// Find the first pair of out-of-order elements.
528	let mut l = `0`;
529	let mut r = v.len();
530
531	// SAFETY: The unsafety below involves indexing an array.
532	// For the first one: We already do the bounds checking here with `l < r`.
533	// For the second one: We initially have `l == 0` and `r == v.len()` and we checked that `l < r` at every indexing operation.
534	// From here we know that `r` must be at least `r == l` which was shown to be valid from the first one.
535	unsafe {
536	// Find the first element greater than or equal to the pivot.
537	while l < r && is_less(v.get_unchecked(l), pivot) {
538	l += `1`;
539	}
540
541	// Find the last element smaller that the pivot.
542	while l < r && !is_less(v.get_unchecked(r - `1`), pivot) {
543	r -= `1`;
544	}
545	}
546
547	(
548	l + partition_in_blocks(&mut v[l..r], pivot, is_less),
549	l >= r,
550	)
551
552	// `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated
553	// variable) back into the slice where it originally was. This step is critical in ensuring
554	// safety!
555	};
556
557	// Place the pivot between the two partitions.
558	v.swap(`0`, mid);
559
560	(mid, was_partitioned)
561	}
562
563	/// Partitions `v` into elements equal to `v[pivot]` followed by elements greater than `v[pivot]`.
564	///
565	/// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain
566	/// elements smaller than the pivot.
567	fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &F) -> usize
568	where
569	F: Fn(&T, &T) -> bool,
570	{
571	// Place the pivot at the beginning of slice.
572	v.swap(`0`, pivot);
573	let (pivot, v) = v.split_at_mut(`1`);
574	let pivot = &mut pivot[`0`];
575
576	// Read the pivot into a stack-allocated variable for efficiency. If a following comparison
577	// operation panics, the pivot will be automatically written back into the slice.
578	// SAFETY: The pointer here is valid because it is obtained from a reference to a slice.
579	let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
580	let _pivot_guard = unsafe { CopyOnDrop::new(&*tmp, pivot) };
581	let pivot = &*tmp;
582
583	// Now partition the slice.
584	let mut l = `0`;
585	let mut r = v.len();
586	loop {
587	// SAFETY: The unsafety below involves indexing an array.
588	// For the first one: We already do the bounds checking here with `l < r`.
589	// For the second one: We initially have `l == 0` and `r == v.len()` and we checked that `l < r` at every indexing operation.
590	// From here we know that `r` must be at least `r == l` which was shown to be valid from the first one.
591	unsafe {
592	// Find the first element greater than the pivot.
593	while l < r && !is_less(pivot, v.get_unchecked(l)) {
594	l += `1`;
595	}
596
597	// Find the last element equal to the pivot.
598	while l < r && is_less(pivot, v.get_unchecked(r - `1`)) {
599	r -= `1`;
600	}
601
602	// Are we done?
603	if l >= r {
604	break;
605	}
606
607	// Swap the found pair of out-of-order elements.
608	r -= `1`;
609	let ptr = v.as_mut_ptr();
610	ptr::swap(ptr.add(l), ptr.add(r));
611	l += `1`;
612	}
613	}
614
615	// We found `l` elements equal to the pivot. Add 1 to account for the pivot itself.
616	l + `1`
617
618	// `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated variable)
619	// back into the slice where it originally was. This step is critical in ensuring safety!
620	}
621
622	/// Scatters some elements around in an attempt to break patterns that might cause imbalanced
623	/// partitions in quicksort.
624	#[cold]
625	fn break_patterns<T>(v: &mut [T]) {
626	let len = v.len();
627	if len >= `8` {
628	// Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
629	let mut random = len as u32;
630	let mut gen_u32 = \|\| {
631	random ^= random << `13`;
632	random ^= random >> `17`;
633	random ^= random << `5`;
634	random
635	};
636	let mut gen_usize = \|\| {
637	if usize::BITS <= `32` {
638	gen_u32() as usize
639	} else {
640	(((gen_u32() as u64) << `32`) \| (gen_u32() as u64)) as usize
641	}
642	};
643
644	// Take random numbers modulo this number.
645	// The number fits into `usize` because `len` is not greater than `isize::MAX`.
646	let modulus = len.next_power_of_two();
647
648	// Some pivot candidates will be in the nearby of this index. Let's randomize them.
649	let pos = len / `4` * `2`;
650
651	for i in `0`..`3` {
652	// Generate a random number modulo `len`. However, in order to avoid costly operations
653	// we first take it modulo a power of two, and then decrease by `len` until it fits
654	// into the range `[0, len - 1]`.
655	let mut other = gen_usize() & (modulus - `1`);
656
657	// `other` is guaranteed to be less than `2 len`.*
658	if other >= len {
659	other -= len;
660	}
661
662	v.swap(pos - `1` + i, other);
663	}
664	}
665	}
666
667	/// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted.
668	///
669	/// Elements in `v` might be reordered in the process.
670	fn choose_pivot<T, F>(v: &mut [T], is_less: &F) -> (usize, bool)
671	where
672	F: Fn(&T, &T) -> bool,
673	{
674	// Minimum length to choose the median-of-medians method.
675	// Shorter slices use the simple median-of-three method.
676	const SHORTEST_MEDIAN_OF_MEDIANS: usize = `50`;
677	// Maximum number of swaps that can be performed in this function.
678	const MAX_SWAPS: usize = `4` * `3`;
679
680	let len = v.len();
681
682	// Three indices near which we are going to choose a pivot.
683	#[allow(clippy::identity_op)]
684	let mut a = len / `4` * `1`;
685	let mut b = len / `4` * `2`;
686	let mut c = len / `4` * `3`;
687
688	// Counts the total number of swaps we are about to perform while sorting indices.
689	let mut swaps = `0`;
690
691	if len >= `8` {
692	// Swaps indices so that `v[a] <= v[b]`.
693	// SAFETY: `len >= 8` so there are at least two elements in the neighborhoods of
694	// `a`, `b` and `c`. This means the three calls to `sort_adjacent` result in
695	// corresponding calls to `sort3` with valid 3-item neighborhoods around each
696	// pointer, which in turn means the calls to `sort2` are done with valid
697	// references. Thus the `v.get_unchecked` calls are safe, as is the `ptr::swap`
698	// call.
699	let mut sort2 = \|a: &mut usize, b: &mut usize\| unsafe {
700	if is_less(v.get_unchecked(b), v.get_unchecked(a)) {
701	ptr::swap(a, b);
702	swaps += `1`;
703	}
704	};
705
706	// Swaps indices so that `v[a] <= v[b] <= v[c]`.
707	let mut sort3 = \|a: &mut usize, b: &mut usize, c: &mut usize\| {
708	sort2(a, b);
709	sort2(b, c);
710	sort2(a, b);
711	};
712
713	if len >= SHORTEST_MEDIAN_OF_MEDIANS {
714	// Finds the median of `v[a - 1], v[a], v[a + 1]` and stores the index into `a`.
715	let mut sort_adjacent = \|a: &mut usize\| {
716	let tmp = *a;
717	sort3(&mut (tmp - `1`), a, &mut (tmp + `1`));
718	};
719
720	// Find medians in the neighborhoods of `a`, `b`, and `c`.
721	sort_adjacent(&mut a);
722	sort_adjacent(&mut b);
723	sort_adjacent(&mut c);
724	}
725
726	// Find the median among `a`, `b`, and `c`.
727	sort3(&mut a, &mut b, &mut c);
728	}
729
730	if swaps < MAX_SWAPS {
731	(b, swaps == `0`)
732	} else {
733	// The maximum number of swaps was performed. Chances are the slice is descending or mostly
734	// descending, so reversing will probably help sort it faster.
735	v.reverse();
736	(len - `1` - b, `true`)
737	}
738	}
739
740	/// Sorts `v` recursively.
741	///
742	/// If the slice had a predecessor in the original array, it is specified as `pred`.
743	///
744	/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
745	/// this function will immediately switch to heapsort.
746	fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &F, mut pred: Option<&'a mut T>, mut limit: u32)
747	where
748	T: Send,
749	F: Fn(&T, &T) -> bool + Sync,
750	{
751	// Slices of up to this length get sorted using insertion sort.
752	const MAX_INSERTION: usize = `20`;
753	// If both partitions are up to this length, we continue sequentially. This number is as small
754	// as possible but so that the overhead of Rayon's task scheduling is still negligible.
755	const MAX_SEQUENTIAL: usize = `2000`;
756
757	// True if the last partitioning was reasonably balanced.
758	let mut was_balanced = `true`;
759	// True if the last partitioning didn't shuffle elements (the slice was already partitioned).
760	let mut was_partitioned = `true`;
761
762	loop {
763	let len = v.len();
764
765	// Very short slices get sorted using insertion sort.
766	if len <= MAX_INSERTION {
767	insertion_sort(v, is_less);
768	return;
769	}
770
771	// If too many bad pivot choices were made, simply fall back to heapsort in order to
772	// guarantee `O(n log(n))` worst-case.*
773	if limit == `0` {
774	heapsort(v, is_less);
775	return;
776	}
777
778	// If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
779	// some elements around. Hopefully we'll choose a better pivot this time.
780	if !was_balanced {
781	break_patterns(v);
782	limit -= `1`;
783	}
784
785	// Choose a pivot and try guessing whether the slice is already sorted.
786	let (pivot, likely_sorted) = choose_pivot(v, is_less);
787
788	// If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
789	// selection predicts the slice is likely already sorted...
790	if was_balanced && was_partitioned && likely_sorted {
791	// Try identifying several out-of-order elements and shifting them to correct
792	// positions. If the slice ends up being completely sorted, we're done.
793	if partial_insertion_sort(v, is_less) {
794	return;
795	}
796	}
797
798	// If the chosen pivot is equal to the predecessor, then it's the smallest element in the
799	// slice. Partition the slice into elements equal to and elements greater than the pivot.
800	// This case is usually hit when the slice contains many duplicate elements.
801	if let Some(ref p) = pred {
802	if !is_less(p, &v[pivot]) {
803	let mid = partition_equal(v, pivot, is_less);
804
805	// Continue sorting elements greater than the pivot.
806	v = &mut v[mid..];
807	continue;
808	}
809	}
810
811	// Partition the slice.
812	let (mid, was_p) = partition(v, pivot, is_less);
813	was_balanced = Ord::min(mid, len - mid) >= len / `8`;
814	was_partitioned = was_p;
815
816	// Split the slice into `left`, `pivot`, and `right`.
817	let (left, right) = v.split_at_mut(mid);
818	let (pivot, right) = right.split_at_mut(`1`);
819	let pivot = &mut pivot[`0`];
820
821	if Ord::max(left.len(), right.len()) <= MAX_SEQUENTIAL {
822	// Recurse into the shorter side only in order to minimize the total number of recursive
823	// calls and consume less stack space. Then just continue with the longer side (this is
824	// akin to tail recursion).
825	if left.len() < right.len() {
826	recurse(left, is_less, pred, limit);
827	v = right;
828	pred = Some(pivot);
829	} else {
830	recurse(right, is_less, Some(pivot), limit);
831	v = left;
832	}
833	} else {
834	// Sort the left and right half in parallel.
835	rayon_core::join(
836	\|\| recurse(left, is_less, pred, limit),
837	\|\| recurse(right, is_less, Some(pivot), limit),
838	);
839	break;
840	}
841	}
842	}
843
844	/// Sorts `v` using pattern-defeating quicksort in parallel.
845	///
846	/// The algorithm is unstable, in-place, and O(n* \* log(n)) worst-case.*
847	pub(super) fn par_quicksort<T, F>(v: &mut [T], is_less: F)
848	where
849	T: Send,
850	F: Fn(&T, &T) -> bool + Sync,
851	{
852	// Sorting has no meaningful behavior on zero-sized types.
853	if mem::size_of::<T>() == `0` {
854	return;
855	}
856
857	// Limit the number of imbalanced partitions to `floor(log2(len)) + 1`.
858	let limit: u32 = usize::BITS - v.len().leading_zeros();
859
860	recurse(v, &is_less, pred:None, limit);
861	}
862
863	#[cfg(test)]
864	mod tests {
865	use super::heapsort;
866	use rand::distributions::Uniform;
867	use rand::{thread_rng, Rng};
868
869	#[test]
870	fn test_heapsort() {
871	let rng = &mut thread_rng();
872
873	for len in (`0`..`25`).chain(`500`..`501`) {
874	for &modulus in &[`5`, `10`, `100`] {
875	let dist = Uniform::new(`0`, modulus);
876	for _ in `0`..`100` {
877	let v: Vec<i32> = rng.sample_iter(&dist).take(len).collect();
878
879	// Test heapsort using `<` operator.
880	let mut tmp = v.clone();
881	heapsort(&mut tmp, &\|a, b\| a < b);
882	assert!(tmp.windows(`2`).all(\|w\| w[`0`] <= w[`1`]));
883
884	// Test heapsort using `>` operator.
885	let mut tmp = v.clone();
886	heapsort(&mut tmp, &\|a, b\| a > b);
887	assert!(tmp.windows(`2`).all(\|w\| w[`0`] >= w[`1`]));
888	}
889	}
890	}
891
892	// Sort using a completely random comparison function.
893	// This will reorder the elements somehow, but won't panic.
894	let mut v: Vec<_> = (`0`..`100`).collect();
895	heapsort(&mut v, &\|_, _\| thread_rng().gen());
896	heapsort(&mut v, &\|a, b\| a < b);
897
898	for (i, &entry) in v.iter().enumerate() {
899	assert_eq!(entry, i);
900	}
901	}
902	}
903