mergesort.rs source code [crates/rustc-rayon-0.5.1/src/slice/mergesort.rs]

1	//! Parallel merge sort.
2	//!
3	//! This implementation is copied verbatim from `std::slice::sort` and then parallelized.
4	//! The only difference from the original is that the sequential `mergesort` returns
5	//! `MergesortResult` and leaves descending arrays intact.
6
7	use crate::iter::*;
8	use crate::slice::ParallelSliceMut;
9	use crate::SendPtr;
10	use std::mem;
11	use std::mem::size_of;
12	use std::ptr;
13	use std::slice;
14
15	unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
16	let old: mut T = ptr;
17	*ptr = ptr.offset(count:`1`);
18	old
19	}
20
21	unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
22	*ptr = ptr.offset(count:`-1`);
23	*ptr
24	}
25
26	/// When dropped, copies from `src` into `dest` a sequence of length `len`.
27	struct CopyOnDrop<T> {
28	src: *const T,
29	dest: *mut T,
30	len: usize,
31	}
32
33	impl<T> Drop for CopyOnDrop<T> {
34	fn drop(&mut self) {
35	unsafe {
36	ptr::copy_nonoverlapping(self.src, self.dest, self.len);
37	}
38	}
39	}
40
41	/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
42	///
43	/// This is the integral subroutine of insertion sort.
44	fn insert_head<T, F>(v: &mut [T], is_less: &F)
45	where
46	F: Fn(&T, &T) -> bool,
47	{
48	if v.len() >= `2` && is_less(&v[`1`], &v[`0`]) {
49	unsafe {
50	// There are three ways to implement insertion here:
51	//
52	// 1. Swap adjacent elements until the first one gets to its final destination.
53	// However, this way we copy data around more than is necessary. If elements are big
54	// structures (costly to copy), this method will be slow.
55	//
56	// 2. Iterate until the right place for the first element is found. Then shift the
57	// elements succeeding it to make room for it and finally place it into the
58	// remaining hole. This is a good method.
59	//
60	// 3. Copy the first element into a temporary variable. Iterate until the right place
61	// for it is found. As we go along, copy every traversed element into the slot
62	// preceding it. Finally, copy data from the temporary variable into the remaining
63	// hole. This method is very good. Benchmarks demonstrated slightly better
64	// performance than with the 2nd method.
65	//
66	// All methods were benchmarked, and the 3rd showed best results. So we chose that one.
67	let tmp = mem::ManuallyDrop::new(ptr::read(&v[`0`]));
68
69	// Intermediate state of the insertion process is always tracked by `hole`, which
70	// serves two purposes:
71	// 1. Protects integrity of `v` from panics in `is_less`.
72	// 2. Fills the remaining hole in `v` in the end.
73	//
74	// Panic safety:
75	//
76	// If `is_less` panics at any point during the process, `hole` will get dropped and
77	// fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
78	// initially held exactly once.
79	let mut hole = InsertionHole {
80	src: &*tmp,
81	dest: &mut v[`1`],
82	};
83	ptr::copy_nonoverlapping(&v[`1`], &mut v[`0`], `1`);
84
85	for i in `2`..v.len() {
86	if !is_less(&v[i], &*tmp) {
87	break;
88	}
89	ptr::copy_nonoverlapping(&v[i], &mut v[i - `1`], `1`);
90	hole.dest = &mut v[i];
91	}
92	// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
93	}
94	}
95
96	// When dropped, copies from `src` into `dest`.
97	struct InsertionHole<T> {
98	src: *const T,
99	dest: *mut T,
100	}
101
102	impl<T> Drop for InsertionHole<T> {
103	fn drop(&mut self) {
104	unsafe {
105	ptr::copy_nonoverlapping(self.src, self.dest, `1`);
106	}
107	}
108	}
109	}
110
111	/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
112	/// stores the result into `v[..]`.
113	///
114	/// # Safety
115	///
116	/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
117	/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
118	unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &F)
119	where
120	F: Fn(&T, &T) -> bool,
121	{
122	let len = v.len();
123	let v = v.as_mut_ptr();
124	let v_mid = v.add(mid);
125	let v_end = v.add(len);
126
127	// The merge process first copies the shorter run into `buf`. Then it traces the newly copied
128	// run and the longer run forwards (or backwards), comparing their next unconsumed elements and
129	// copying the lesser (or greater) one into `v`.
130	//
131	// As soon as the shorter run is fully consumed, the process is done. If the longer run gets
132	// consumed first, then we must copy whatever is left of the shorter run into the remaining
133	// hole in `v`.
134	//
135	// Intermediate state of the process is always tracked by `hole`, which serves two purposes:
136	// 1. Protects integrity of `v` from panics in `is_less`.
137	// 2. Fills the remaining hole in `v` if the longer run gets consumed first.
138	//
139	// Panic safety:
140	//
141	// If `is_less` panics at any point during the process, `hole` will get dropped and fill the
142	// hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
143	// object it initially held exactly once.
144	let mut hole;
145
146	if mid <= len - mid {
147	// The left run is shorter.
148	ptr::copy_nonoverlapping(v, buf, mid);
149	hole = MergeHole {
150	start: buf,
151	end: buf.add(mid),
152	dest: v,
153	};
154
155	// Initially, these pointers point to the beginnings of their arrays.
156	let left = &mut hole.start;
157	let mut right = v_mid;
158	let out = &mut hole.dest;
159
160	while *left < hole.end && right < v_end {
161	// Consume the lesser side.
162	// If equal, prefer the left run to maintain stability.
163	let to_copy = if is_less(&right, &*left) {
164	get_and_increment(&mut right)
165	} else {
166	get_and_increment(left)
167	};
168	ptr::copy_nonoverlapping(to_copy, get_and_increment(out), `1`);
169	}
170	} else {
171	// The right run is shorter.
172	ptr::copy_nonoverlapping(v_mid, buf, len - mid);
173	hole = MergeHole {
174	start: buf,
175	end: buf.add(len - mid),
176	dest: v_mid,
177	};
178
179	// Initially, these pointers point past the ends of their arrays.
180	let left = &mut hole.dest;
181	let right = &mut hole.end;
182	let mut out = v_end;
183
184	while v < left && buf < right {
185	// Consume the greater side.
186	// If equal, prefer the right run to maintain stability.
187	let to_copy = if is_less(&right.offset(`-1`), &left.offset(`-1`)) {
188	decrement_and_get(left)
189	} else {
190	decrement_and_get(right)
191	};
192	ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), `1`);
193	}
194	}
195	// Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
196	// it will now be copied into the hole in `v`.
197
198	// When dropped, copies the range `start..end` into `dest..`.
199	struct MergeHole<T> {
200	start: *mut T,
201	end: *mut T,
202	dest: *mut T,
203	}
204
205	impl<T> Drop for MergeHole<T> {
206	fn drop(&mut self) {
207	// `T` is not a zero-sized type, so it's okay to divide by its size.
208	unsafe {
209	let len = self.end.offset_from(self.start) as usize;
210	ptr::copy_nonoverlapping(self.start, self.dest, len);
211	}
212	}
213	}
214	}
215
216	/// The result of merge sort.
217	#[must_use]
218	#[derive(Clone, Copy, PartialEq, Eq)]
219	enum MergesortResult {
220	/// The slice has already been sorted.
221	NonDescending,
222	/// The slice has been descending and therefore it was left intact.
223	Descending,
224	/// The slice was sorted.
225	Sorted,
226	}
227
228	/// A sorted run that starts at index `start` and is of length `len`.
229	#[derive(Clone, Copy)]
230	struct Run {
231	start: usize,
232	len: usize,
233	}
234
235	/// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
236	/// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
237	/// algorithm should continue building a new run instead, `None` is returned.
238	///
239	/// TimSort is infamous for its buggy implementations, as described here:
240	/// http://envisage-project.eu/timsort-specification-and-verification/
241	///
242	/// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
243	/// Enforcing them on just top three is not sufficient to ensure that the invariants will still
244	/// hold for all* runs in the stack.*
245	///
246	/// This function correctly checks invariants for the top four runs. Additionally, if the top
247	/// run starts at index 0, it will always demand a merge operation until the stack is fully
248	/// collapsed, in order to complete the sort.
249	#[inline]
250	fn collapse(runs: &[Run]) -> Option<usize> {
251	let n: usize = runs.len();
252
253	if n >= `2`
254	&& (runs[n - `1`].start == `0`
255	\|\| runs[n - `2`].len <= runs[n - `1`].len
256	\|\| (n >= `3` && runs[n - `3`].len <= runs[n - `2`].len + runs[n - `1`].len)
257	\|\| (n >= `4` && runs[n - `4`].len <= runs[n - `3`].len + runs[n - `2`].len))
258	{
259	if n >= `3` && runs[n - `3`].len < runs[n - `1`].len {
260	Some(n - `3`)
261	} else {
262	Some(n - `2`)
263	}
264	} else {
265	None
266	}
267	}
268
269	/// Sorts a slice using merge sort, unless it is already in descending order.
270	///
271	/// This function doesn't modify the slice if it is already non-descending or descending.
272	/// Otherwise, it sorts the slice into non-descending order.
273	///
274	/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
275	/// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
276	///
277	/// The algorithm identifies strictly descending and non-descending subsequences, which are called
278	/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
279	/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
280	/// satisfied:
281	///
282	/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
283	/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
284	///
285	/// The invariants ensure that the total running time is O(n* \* log(n)) worst-case.*
286	///
287	/// # Safety
288	///
289	/// The argument `buf` is used as a temporary buffer and must be at least as long as `v`.
290	unsafe fn mergesort<T, F>(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult
291	where
292	T: Send,
293	F: Fn(&T, &T) -> bool + Sync,
294	{
295	// Very short runs are extended using insertion sort to span at least this many elements.
296	const MIN_RUN: usize = `10`;
297
298	let len = v.len();
299
300	// In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
301	// strange decision, but consider the fact that merges more often go in the opposite direction
302	// (forwards). According to benchmarks, merging forwards is slightly faster than merging
303	// backwards. To conclude, identifying runs by traversing backwards improves performance.
304	let mut runs = vec![];
305	let mut end = len;
306	while end > `0` {
307	// Find the next natural run, and reverse it if it's strictly descending.
308	let mut start = end - `1`;
309
310	if start > `0` {
311	start -= `1`;
312
313	if is_less(v.get_unchecked(start + `1`), v.get_unchecked(start)) {
314	while start > `0` && is_less(v.get_unchecked(start), v.get_unchecked(start - `1`)) {
315	start -= `1`;
316	}
317
318	// If this descending run covers the whole slice, return immediately.
319	if start == `0` && end == len {
320	return MergesortResult::Descending;
321	} else {
322	v[start..end].reverse();
323	}
324	} else {
325	while start > `0` && !is_less(v.get_unchecked(start), v.get_unchecked(start - `1`)) {
326	start -= `1`;
327	}
328
329	// If this non-descending run covers the whole slice, return immediately.
330	if end - start == len {
331	return MergesortResult::NonDescending;
332	}
333	}
334	}
335
336	// Insert some more elements into the run if it's too short. Insertion sort is faster than
337	// merge sort on short sequences, so this significantly improves performance.
338	while start > `0` && end - start < MIN_RUN {
339	start -= `1`;
340	insert_head(&mut v[start..end], &is_less);
341	}
342
343	// Push this run onto the stack.
344	runs.push(Run {
345	start,
346	len: end - start,
347	});
348	end = start;
349
350	// Merge some pairs of adjacent runs to satisfy the invariants.
351	while let Some(r) = collapse(&runs) {
352	let left = runs[r + `1`];
353	let right = runs[r];
354	merge(
355	&mut v[left.start..right.start + right.len],
356	left.len,
357	buf,
358	&is_less,
359	);
360
361	runs[r] = Run {
362	start: left.start,
363	len: left.len + right.len,
364	};
365	runs.remove(r + `1`);
366	}
367	}
368
369	// Finally, exactly one run must remain in the stack.
370	debug_assert!(runs.len() == `1` && runs[`0`].start == `0` && runs[`0`].len == len);
371
372	// The original order of the slice was neither non-descending nor descending.
373	MergesortResult::Sorted
374	}
375
376	////////////////////////////////////////////////////////////////////////////
377	// Everything above this line is copied from `std::slice::sort` (with very minor tweaks).
378	// Everything below this line is parallelization.
379	////////////////////////////////////////////////////////////////////////////
380
381	/// Splits two sorted slices so that they can be merged in parallel.
382	///
383	/// Returns two indices `(a, b)` so that slices `left[..a]` and `right[..b]` come before
384	/// `left[a..]` and `right[b..]`.
385	fn split_for_merge<T, F>(left: &[T], right: &[T], is_less: &F) -> (usize, usize)
386	where
387	F: Fn(&T, &T) -> bool,
388	{
389	let left_len = left.len();
390	let right_len = right.len();
391
392	if left_len >= right_len {
393	let left_mid = left_len / `2`;
394
395	// Find the first element in `right` that is greater than or equal to `left[left_mid]`.
396	let mut a = `0`;
397	let mut b = right_len;
398	while a < b {
399	let m = a + (b - a) / `2`;
400	if is_less(&right[m], &left[left_mid]) {
401	a = m + `1`;
402	} else {
403	b = m;
404	}
405	}
406
407	(left_mid, a)
408	} else {
409	let right_mid = right_len / `2`;
410
411	// Find the first element in `left` that is greater than `right[right_mid]`.
412	let mut a = `0`;
413	let mut b = left_len;
414	while a < b {
415	let m = a + (b - a) / `2`;
416	if is_less(&right[right_mid], &left[m]) {
417	b = m;
418	} else {
419	a = m + `1`;
420	}
421	}
422
423	(a, right_mid)
424	}
425	}
426
427	/// Merges slices `left` and `right` in parallel and stores the result into `dest`.
428	///
429	/// # Safety
430	///
431	/// The `dest` pointer must have enough space to store the result.
432	///
433	/// Even if `is_less` panics at any point during the merge process, this function will fully copy
434	/// all elements from `left` and `right` into `dest` (not necessarily in sorted order).
435	unsafe fn par_merge<T, F>(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F)
436	where
437	T: Send,
438	F: Fn(&T, &T) -> bool + Sync,
439	{
440	// Slices whose lengths sum up to this value are merged sequentially. This number is slightly
441	// larger than `CHUNK_LENGTH`, and the reason is that merging is faster than merge sorting, so
442	// merging needs a bit coarser granularity in order to hide the overhead of Rayon's task
443	// scheduling.
444	const MAX_SEQUENTIAL: usize = `5000`;
445
446	let left_len = left.len();
447	let right_len = right.len();
448
449	// Intermediate state of the merge process, which serves two purposes:
450	// 1. Protects integrity of `dest` from panics in `is_less`.
451	// 2. Copies the remaining elements as soon as one of the two sides is exhausted.
452	//
453	// Panic safety:
454	//
455	// If `is_less` panics at any point during the merge process, `s` will get dropped and copy the
456	// remaining parts of `left` and `right` into `dest`.
457	let mut s = State {
458	left_start: left.as_mut_ptr(),
459	left_end: left.as_mut_ptr().add(left_len),
460	right_start: right.as_mut_ptr(),
461	right_end: right.as_mut_ptr().add(right_len),
462	dest,
463	};
464
465	if left_len == `0` \|\| right_len == `0` \|\| left_len + right_len < MAX_SEQUENTIAL {
466	while s.left_start < s.left_end && s.right_start < s.right_end {
467	// Consume the lesser side.
468	// If equal, prefer the left run to maintain stability.
469	let to_copy = if is_less(&s.right_start, &s.left_start) {
470	get_and_increment(&mut s.right_start)
471	} else {
472	get_and_increment(&mut s.left_start)
473	};
474	ptr::copy_nonoverlapping(to_copy, get_and_increment(&mut s.dest), `1`);
475	}
476	} else {
477	// Function `split_for_merge` might panic. If that happens, `s` will get destructed and copy
478	// the whole `left` and `right` into `dest`.
479	let (left_mid, right_mid) = split_for_merge(left, right, is_less);
480	let (left_l, left_r) = left.split_at_mut(left_mid);
481	let (right_l, right_r) = right.split_at_mut(right_mid);
482
483	// Prevent the destructor of `s` from running. Rayon will ensure that both calls to
484	// `par_merge` happen. If one of the two calls panics, they will ensure that elements still
485	// get copied into `dest_left` and `dest_right``.
486	mem::forget(s);
487
488	// Wrap pointers in SendPtr so that they can be sent to another thread
489	// See the documentation of SendPtr for a full explanation
490	let dest_l = SendPtr(dest);
491	let dest_r = SendPtr(dest.add(left_l.len() + right_l.len()));
492	rayon_core::join(
493	move \|\| par_merge(left_l, right_l, dest_l.get(), is_less),
494	move \|\| par_merge(left_r, right_r, dest_r.get(), is_less),
495	);
496	}
497	// Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
498	// all at once.
499
500	// When dropped, copies arrays `left_start..left_end` and `right_start..right_end` into `dest`,
501	// in that order.
502	struct State<T> {
503	left_start: *mut T,
504	left_end: *mut T,
505	right_start: *mut T,
506	right_end: *mut T,
507	dest: *mut T,
508	}
509
510	impl<T> Drop for State<T> {
511	fn drop(&mut self) {
512	let size = size_of::<T>();
513	let left_len = (self.left_end as usize - self.left_start as usize) / size;
514	let right_len = (self.right_end as usize - self.right_start as usize) / size;
515
516	// Copy array `left`, followed by `right`.
517	unsafe {
518	ptr::copy_nonoverlapping(self.left_start, self.dest, left_len);
519	self.dest = self.dest.add(left_len);
520	ptr::copy_nonoverlapping(self.right_start, self.dest, right_len);
521	}
522	}
523	}
524	}
525
526	/// Recursively merges pre-sorted chunks inside `v`.
527	///
528	/// Chunks of `v` are stored in `chunks` as intervals (inclusive left and exclusive right bound).
529	/// Argument `buf` is an auxiliary buffer that will be used during the procedure.
530	/// If `into_buf` is true, the result will be stored into `buf`, otherwise it will be in `v`.
531	///
532	/// # Safety
533	///
534	/// The number of chunks must be positive and they must be adjacent: the right bound of each chunk
535	/// must equal the left bound of the following chunk.
536	///
537	/// The buffer must be at least as long as `v`.
538	unsafe fn recurse<T, F>(
539	v: *mut T,
540	buf: *mut T,
541	chunks: &[(usize, usize)],
542	into_buf: bool,
543	is_less: &F,
544	) where
545	T: Send,
546	F: Fn(&T, &T) -> bool + Sync,
547	{
548	let len = chunks.len();
549	debug_assert!(len > `0`);
550
551	// Base case of the algorithm.
552	// If only one chunk is remaining, there's no more work to split and merge.
553	if len == `1` {
554	if into_buf {
555	// Copy the chunk from `v` into `buf`.
556	let (start, end) = chunks[`0`];
557	let src = v.add(start);
558	let dest = buf.add(start);
559	ptr::copy_nonoverlapping(src, dest, end - start);
560	}
561	return;
562	}
563
564	// Split the chunks into two halves.
565	let (start, _) = chunks[`0`];
566	let (mid, _) = chunks[len / `2`];
567	let (_, end) = chunks[len - `1`];
568	let (left, right) = chunks.split_at(len / `2`);
569
570	// After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
571	// `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
572	// merge chunks from `v` into `buf`, and vice versa.
573	//
574	// Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
575	// merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
576	// level etc.
577	let (src, dest) = if into_buf { (v, buf) } else { (buf, v) };
578
579	// Panic safety:
580	//
581	// If `is_less` panics at any point during the recursive calls, the destructor of `guard` will
582	// be executed, thus copying everything from `src` into `dest`. This way we ensure that all
583	// chunks are in fact copied into `dest`, even if the merge process doesn't finish.
584	let guard = CopyOnDrop {
585	src: src.add(start),
586	dest: dest.add(start),
587	len: end - start,
588	};
589
590	// Wrap pointers in SendPtr so that they can be sent to another thread
591	// See the documentation of SendPtr for a full explanation
592	let v = SendPtr(v);
593	let buf = SendPtr(buf);
594	rayon_core::join(
595	move \|\| recurse(v.get(), buf.get(), left, !into_buf, is_less),
596	move \|\| recurse(v.get(), buf.get(), right, !into_buf, is_less),
597	);
598
599	// Everything went all right - recursive calls didn't panic.
600	// Forget the guard in order to prevent its destructor from running.
601	mem::forget(guard);
602
603	// Merge chunks `(start, mid)` and `(mid, end)` from `src` into `dest`.
604	let src_left = slice::from_raw_parts_mut(src.add(start), mid - start);
605	let src_right = slice::from_raw_parts_mut(src.add(mid), end - mid);
606	par_merge(src_left, src_right, dest.add(start), is_less);
607	}
608
609	/// Sorts `v` using merge sort in parallel.
610	///
611	/// The algorithm is stable, allocates memory, and `O(n log n)` worst-case.
612	/// The allocated temporary buffer is of the same length as is `v`.
613	pub(super) fn par_mergesort<T, F>(v: &mut [T], is_less: F)
614	where
615	T: Send,
616	F: Fn(&T, &T) -> bool + Sync,
617	{
618	// Slices of up to this length get sorted using insertion sort in order to avoid the cost of
619	// buffer allocation.
620	const MAX_INSERTION: usize = `20`;
621	// The length of initial chunks. This number is as small as possible but so that the overhead
622	// of Rayon's task scheduling is still negligible.
623	const CHUNK_LENGTH: usize = `2000`;
624
625	// Sorting has no meaningful behavior on zero-sized types.
626	if size_of::<T>() == `0` {
627	return;
628	}
629
630	let len = v.len();
631
632	// Short slices get sorted in-place via insertion sort to avoid allocations.
633	if len <= MAX_INSERTION {
634	if len >= `2` {
635	for i in (`0`..len - `1`).rev() {
636	insert_head(&mut v[i..], &is_less);
637	}
638	}
639	return;
640	}
641
642	// Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
643	// shallow copies of the contents of `v` without risking the dtors running on copies if
644	// `is_less` panics.
645	let mut buf = Vec::<T>::with_capacity(len);
646	let buf = buf.as_mut_ptr();
647
648	// If the slice is not longer than one chunk would be, do sequential merge sort and return.
649	if len <= CHUNK_LENGTH {
650	let res = unsafe { mergesort(v, buf, &is_less) };
651	if res == MergesortResult::Descending {
652	v.reverse();
653	}
654	return;
655	}
656
657	// Split the slice into chunks and merge sort them in parallel.
658	// However, descending chunks will not be sorted - they will be simply left intact.
659	let mut iter = {
660	// Wrap pointer in SendPtr so that it can be sent to another thread
661	// See the documentation of SendPtr for a full explanation
662	let buf = SendPtr(buf);
663	let is_less = &is_less;
664
665	v.par_chunks_mut(CHUNK_LENGTH)
666	.with_max_len(`1`)
667	.enumerate()
668	.map(move \|(i, chunk)\| {
669	let l = CHUNK_LENGTH * i;
670	let r = l + chunk.len();
671	unsafe {
672	let buf = buf.get().add(l);
673	(l, r, mergesort(chunk, buf, is_less))
674	}
675	})
676	.collect::<Vec<_>>()
677	.into_iter()
678	.peekable()
679	};
680
681	// Now attempt to concatenate adjacent chunks that were left intact.
682	let mut chunks = Vec::with_capacity(iter.len());
683
684	while let Some((a, mut b, res)) = iter.next() {
685	// If this chunk was not modified by the sort procedure...
686	if res != MergesortResult::Sorted {
687	while let Some(&(x, y, r)) = iter.peek() {
688	// If the following chunk is of the same type and can be concatenated...
689	if r == res && (r == MergesortResult::Descending) == is_less(&v[x], &v[x - `1`]) {
690	// Concatenate them.
691	b = y;
692	iter.next();
693	} else {
694	break;
695	}
696	}
697	}
698
699	// Descending chunks must be reversed.
700	if res == MergesortResult::Descending {
701	v[a..b].reverse();
702	}
703
704	chunks.push((a, b));
705	}
706
707	// All chunks are properly sorted.
708	// Now we just have to merge them together.
709	unsafe {
710	recurse(v.as_mut_ptr(), buf, &chunks, `false`, &is_less);
711	}
712	}
713
714	#[cfg(test)]
715	mod tests {
716	use super::split_for_merge;
717	use rand::distributions::Uniform;
718	use rand::{thread_rng, Rng};
719
720	#[test]
721	fn test_split_for_merge() {
722	fn check(left: &[u32], right: &[u32]) {
723	let (l, r) = split_for_merge(left, right, &\|&a, &b\| a < b);
724	assert!(left[..l]
725	.iter()
726	.all(\|&x\| right[r..].iter().all(\|&y\| x <= y)));
727	assert!(right[..r].iter().all(\|&x\| left[l..].iter().all(\|&y\| x < y)));
728	}
729
730	check(&[`1`, `2`, `2`, `2`, `2`, `3`], &[`1`, `2`, `2`, `2`, `2`, `3`]);
731	check(&[`1`, `2`, `2`, `2`, `2`, `3`], &[]);
732	check(&[], &[`1`, `2`, `2`, `2`, `2`, `3`]);
733
734	let rng = &mut thread_rng();
735
736	for _ in `0`..`100` {
737	let limit: u32 = rng.gen_range(`1`..`21`);
738	let left_len: usize = rng.gen_range(`0`..`20`);
739	let right_len: usize = rng.gen_range(`0`..`20`);
740
741	let mut left = rng
742	.sample_iter(&Uniform::new(`0`, limit))
743	.take(left_len)
744	.collect::<Vec<_>>();
745	let mut right = rng
746	.sample_iter(&Uniform::new(`0`, limit))
747	.take(right_len)
748	.collect::<Vec<_>>();
749
750	left.sort();
751	right.sort();
752	check(&left, &right);
753	}
754	}
755	}
756