mod.rs source code [crates/rayon-1.9.0/src/slice/mod.rs]

1	//! Parallel iterator types for [slices][std::slice]
2	//!
3	//! You will rarely need to interact with this module directly unless you need
4	//! to name one of the iterator types.
5	//!
6	//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
7
8	mod chunks;
9	mod mergesort;
10	mod quicksort;
11	mod rchunks;
12
13	mod test;
14
15	use self::mergesort::par_mergesort;
16	use self::quicksort::par_quicksort;
17	use crate::iter::plumbing::*;
18	use crate::iter::*;
19	use crate::split_producer::*;
20	use std::cmp;
21	use std::cmp::Ordering;
22	use std::fmt::{self, Debug};
23	use std::mem;
24
25	pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
26	pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
27
28	/// Parallel extensions for slices.
29	pub trait ParallelSlice<T: Sync> {
30	/// Returns a plain slice, which is used to implement the rest of the
31	/// parallel methods.
32	fn as_parallel_slice(&self) -> &[T];
33
34	/// Returns a parallel iterator over subslices separated by elements that
35	/// match the separator.
36	///
37	/// # Examples
38	///
39	/// ```
40	/// use rayon::prelude::*;
41	/// let products: Vec<_> = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`]
42	/// .par_split(\|i\| *i == `0`)
43	/// .map(\|numbers\| numbers.iter().product::<i32>())
44	/// .collect();
45	/// assert_eq!(products, [`6`, `64`, `162`]);
46	/// ```
47	fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
48	where
49	P: Fn(&T) -> bool + Sync + Send,
50	{
51	Split {
52	slice: self.as_parallel_slice(),
53	separator,
54	}
55	}
56
57	/// Returns a parallel iterator over subslices separated by elements that
58	/// match the separator, including the matched part as a terminator.
59	///
60	/// # Examples
61	///
62	/// ```
63	/// use rayon::prelude::*;
64	/// let lengths: Vec<_> = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`]
65	/// .par_split_inclusive(\|i\| *i == `0`)
66	/// .map(\|numbers\| numbers.len())
67	/// .collect();
68	/// assert_eq!(lengths, [`4`, `4`, `3`]);
69	/// ```
70	fn par_split_inclusive<P>(&self, separator: P) -> SplitInclusive<'_, T, P>
71	where
72	P: Fn(&T) -> bool + Sync + Send,
73	{
74	SplitInclusive {
75	slice: self.as_parallel_slice(),
76	separator,
77	}
78	}
79
80	/// Returns a parallel iterator over all contiguous windows of length
81	/// `window_size`. The windows overlap.
82	///
83	/// # Examples
84	///
85	/// ```
86	/// use rayon::prelude::*;
87	/// let windows: Vec<_> = [`1`, `2`, `3`].par_windows(`2`).collect();
88	/// assert_eq!(vec![[`1`, `2`], [`2`, `3`]], windows);
89	/// ```
90	fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
91	Windows {
92	window_size,
93	slice: self.as_parallel_slice(),
94	}
95	}
96
97	/// Returns a parallel iterator over at most `chunk_size` elements of
98	/// `self` at a time. The chunks do not overlap.
99	///
100	/// If the number of elements in the iterator is not divisible by
101	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
102	/// other chunks will have that exact length.
103	///
104	/// # Examples
105	///
106	/// ```
107	/// use rayon::prelude::*;
108	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_chunks(`2`).collect();
109	/// assert_eq!(chunks, vec![&[`1`, `2`][..], &[`3`, `4`], &[`5`]]);
110	/// ```
111	#[track_caller]
112	fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
113	assert!(chunk_size != `0`, "chunk_size must not be zero");
114	Chunks::new(chunk_size, self.as_parallel_slice())
115	}
116
117	/// Returns a parallel iterator over `chunk_size` elements of
118	/// `self` at a time. The chunks do not overlap.
119	///
120	/// If `chunk_size` does not divide the length of the slice, then the
121	/// last up to `chunk_size-1` elements will be omitted and can be
122	/// retrieved from the remainder function of the iterator.
123	///
124	/// # Examples
125	///
126	/// ```
127	/// use rayon::prelude::*;
128	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_chunks_exact(`2`).collect();
129	/// assert_eq!(chunks, vec![&[`1`, `2`][..], &[`3`, `4`]]);
130	/// ```
131	#[track_caller]
132	fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
133	assert!(chunk_size != `0`, "chunk_size must not be zero");
134	ChunksExact::new(chunk_size, self.as_parallel_slice())
135	}
136
137	/// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
138	/// starting at the end. The chunks do not overlap.
139	///
140	/// If the number of elements in the iterator is not divisible by
141	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
142	/// other chunks will have that exact length.
143	///
144	/// # Examples
145	///
146	/// ```
147	/// use rayon::prelude::*;
148	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_rchunks(`2`).collect();
149	/// assert_eq!(chunks, vec![&[`4`, `5`][..], &[`2`, `3`], &[`1`]]);
150	/// ```
151	#[track_caller]
152	fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
153	assert!(chunk_size != `0`, "chunk_size must not be zero");
154	RChunks::new(chunk_size, self.as_parallel_slice())
155	}
156
157	/// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
158	/// starting at the end. The chunks do not overlap.
159	///
160	/// If `chunk_size` does not divide the length of the slice, then the
161	/// last up to `chunk_size-1` elements will be omitted and can be
162	/// retrieved from the remainder function of the iterator.
163	///
164	/// # Examples
165	///
166	/// ```
167	/// use rayon::prelude::*;
168	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_rchunks_exact(`2`).collect();
169	/// assert_eq!(chunks, vec![&[`4`, `5`][..], &[`2`, `3`]]);
170	/// ```
171	#[track_caller]
172	fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
173	assert!(chunk_size != `0`, "chunk_size must not be zero");
174	RChunksExact::new(chunk_size, self.as_parallel_slice())
175	}
176	}
177
178	impl<T: Sync> ParallelSlice<T> for [T] {
179	#[inline]
180	fn as_parallel_slice(&self) -> &[T] {
181	self
182	}
183	}
184
185	/// Parallel extensions for mutable slices.
186	pub trait ParallelSliceMut<T: Send> {
187	/// Returns a plain mutable slice, which is used to implement the rest of
188	/// the parallel methods.
189	fn as_parallel_slice_mut(&mut self) -> &mut [T];
190
191	/// Returns a parallel iterator over mutable subslices separated by
192	/// elements that match the separator.
193	///
194	/// # Examples
195	///
196	/// ```
197	/// use rayon::prelude::*;
198	/// let mut array = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`];
199	/// array.par_split_mut(\|i\| *i == `0`)
200	/// .for_each(\|slice\| slice.reverse());
201	/// assert_eq!(array, [`3`, `2`, `1`, `0`, `8`, `4`, `2`, `0`, `9`, `6`, `3`]);
202	/// ```
203	fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
204	where
205	P: Fn(&T) -> bool + Sync + Send,
206	{
207	SplitMut {
208	slice: self.as_parallel_slice_mut(),
209	separator,
210	}
211	}
212
213	/// Returns a parallel iterator over mutable subslices separated by elements
214	/// that match the separator, including the matched part as a terminator.
215	///
216	/// # Examples
217	///
218	/// ```
219	/// use rayon::prelude::*;
220	/// let mut array = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`];
221	/// array.par_split_inclusive_mut(\|i\| *i == `0`)
222	/// .for_each(\|slice\| slice.reverse());
223	/// assert_eq!(array, [`0`, `3`, `2`, `1`, `0`, `8`, `4`, `2`, `9`, `6`, `3`]);
224	/// ```
225	fn par_split_inclusive_mut<P>(&mut self, separator: P) -> SplitInclusiveMut<'_, T, P>
226	where
227	P: Fn(&T) -> bool + Sync + Send,
228	{
229	SplitInclusiveMut {
230	slice: self.as_parallel_slice_mut(),
231	separator,
232	}
233	}
234
235	/// Returns a parallel iterator over at most `chunk_size` elements of
236	/// `self` at a time. The chunks are mutable and do not overlap.
237	///
238	/// If the number of elements in the iterator is not divisible by
239	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
240	/// other chunks will have that exact length.
241	///
242	/// # Examples
243	///
244	/// ```
245	/// use rayon::prelude::*;
246	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
247	/// array.par_chunks_mut(`2`)
248	/// .for_each(\|slice\| slice.reverse());
249	/// assert_eq!(array, [`2`, `1`, `4`, `3`, `5`]);
250	/// ```
251	#[track_caller]
252	fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
253	assert!(chunk_size != `0`, "chunk_size must not be zero");
254	ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
255	}
256
257	/// Returns a parallel iterator over `chunk_size` elements of
258	/// `self` at a time. The chunks are mutable and do not overlap.
259	///
260	/// If `chunk_size` does not divide the length of the slice, then the
261	/// last up to `chunk_size-1` elements will be omitted and can be
262	/// retrieved from the remainder function of the iterator.
263	///
264	/// # Examples
265	///
266	/// ```
267	/// use rayon::prelude::*;
268	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
269	/// array.par_chunks_exact_mut(`3`)
270	/// .for_each(\|slice\| slice.reverse());
271	/// assert_eq!(array, [`3`, `2`, `1`, `4`, `5`]);
272	/// ```
273	#[track_caller]
274	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
275	assert!(chunk_size != `0`, "chunk_size must not be zero");
276	ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
277	}
278
279	/// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
280	/// starting at the end. The chunks are mutable and do not overlap.
281	///
282	/// If the number of elements in the iterator is not divisible by
283	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
284	/// other chunks will have that exact length.
285	///
286	/// # Examples
287	///
288	/// ```
289	/// use rayon::prelude::*;
290	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
291	/// array.par_rchunks_mut(`2`)
292	/// .for_each(\|slice\| slice.reverse());
293	/// assert_eq!(array, [`1`, `3`, `2`, `5`, `4`]);
294	/// ```
295	#[track_caller]
296	fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
297	assert!(chunk_size != `0`, "chunk_size must not be zero");
298	RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
299	}
300
301	/// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
302	/// starting at the end. The chunks are mutable and do not overlap.
303	///
304	/// If `chunk_size` does not divide the length of the slice, then the
305	/// last up to `chunk_size-1` elements will be omitted and can be
306	/// retrieved from the remainder function of the iterator.
307	///
308	/// # Examples
309	///
310	/// ```
311	/// use rayon::prelude::*;
312	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
313	/// array.par_rchunks_exact_mut(`3`)
314	/// .for_each(\|slice\| slice.reverse());
315	/// assert_eq!(array, [`1`, `2`, `5`, `4`, `3`]);
316	/// ```
317	#[track_caller]
318	fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
319	assert!(chunk_size != `0`, "chunk_size must not be zero");
320	RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
321	}
322
323	/// Sorts the slice in parallel.
324	///
325	/// This sort is stable (i.e., does not reorder equal elements) and O(n* \* log(n)) worst-case.*
326	///
327	/// When applicable, unstable sorting is preferred because it is generally faster than stable
328	/// sorting and it doesn't allocate auxiliary memory.
329	/// See [`par_sort_unstable`](#method.par_sort_unstable).
330	///
331	/// # Current implementation
332	///
333	/// The current algorithm is an adaptive merge sort inspired by
334	/// [timsort](https://en.wikipedia.org/wiki/Timsort).
335	/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
336	/// two or more sorted sequences concatenated one after another.
337	///
338	/// Also, it allocates temporary storage the same size as `self`, but for very short slices a
339	/// non-allocating insertion sort is used instead.
340	///
341	/// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
342	/// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
343	/// or descending runs are concatenated. Finally, the remaining chunks are merged together using
344	/// parallel subdivision of chunks and parallel merge operation.
345	///
346	/// # Examples
347	///
348	/// ```
349	/// use rayon::prelude::*;
350	///
351	/// let mut v = [`-5`, `4`, `1`, `-3`, `2`];
352	///
353	/// v.par_sort();
354	/// assert_eq!(v, [-`5`, -`3`, `1`, `2`, `4`]);
355	/// ```
356	fn par_sort(&mut self)
357	where
358	T: Ord,
359	{
360	par_mergesort(self.as_parallel_slice_mut(), T::lt);
361	}
362
363	/// Sorts the slice in parallel with a comparator function.
364	///
365	/// This sort is stable (i.e., does not reorder equal elements) and O(n* \* log(n)) worst-case.*
366	///
367	/// The comparator function must define a total ordering for the elements in the slice. If
368	/// the ordering is not total, the order of the elements is unspecified. An order is a
369	/// total order if it is (for all `a`, `b` and `c`):
370	///
371	/// total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and*
372	/// transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.*
373	///
374	/// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
375	/// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
376	///
377	/// ```
378	/// use rayon::prelude::*;
379	///
380	/// let mut floats = [`5f64`, `4.0`, `1.0`, `3.0`, `2.0`];
381	/// floats.par_sort_by(\|a, b\| a.partial_cmp(b).unwrap());
382	/// assert_eq!(floats, [`1.0`, `2.0`, `3.0`, `4.0`, `5.0`]);
383	/// ```
384	///
385	/// When applicable, unstable sorting is preferred because it is generally faster than stable
386	/// sorting and it doesn't allocate auxiliary memory.
387	/// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
388	///
389	/// # Current implementation
390	///
391	/// The current algorithm is an adaptive merge sort inspired by
392	/// [timsort](https://en.wikipedia.org/wiki/Timsort).
393	/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
394	/// two or more sorted sequences concatenated one after another.
395	///
396	/// Also, it allocates temporary storage the same size as `self`, but for very short slices a
397	/// non-allocating insertion sort is used instead.
398	///
399	/// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
400	/// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
401	/// or descending runs are concatenated. Finally, the remaining chunks are merged together using
402	/// parallel subdivision of chunks and parallel merge operation.
403	///
404	/// # Examples
405	///
406	/// ```
407	/// use rayon::prelude::*;
408	///
409	/// let mut v = [`5`, `4`, `1`, `3`, `2`];
410	/// v.par_sort_by(\|a, b\| a.cmp(b));
411	/// assert_eq!(v, [`1`, `2`, `3`, `4`, `5`]);
412	///
413	/// // reverse sorting
414	/// v.par_sort_by(\|a, b\| b.cmp(a));
415	/// assert_eq!(v, [`5`, `4`, `3`, `2`, `1`]);
416	/// ```
417	fn par_sort_by<F>(&mut self, compare: F)
418	where
419	F: Fn(&T, &T) -> Ordering + Sync,
420	{
421	par_mergesort(self.as_parallel_slice_mut(), \|a, b\| {
422	compare(a, b) == Ordering::Less
423	});
424	}
425
426	/// Sorts the slice in parallel with a key extraction function.
427	///
428	/// This sort is stable (i.e., does not reorder equal elements) and O(m* \* n \* log(n))*
429	/// worst-case, where the key function is O(m).
430	///
431	/// For expensive key functions (e.g. functions that are not simple property accesses or
432	/// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
433	/// be significantly faster, as it does not recompute element keys.
434	///
435	/// When applicable, unstable sorting is preferred because it is generally faster than stable
436	/// sorting and it doesn't allocate auxiliary memory.
437	/// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
438	///
439	/// # Current implementation
440	///
441	/// The current algorithm is an adaptive merge sort inspired by
442	/// [timsort](https://en.wikipedia.org/wiki/Timsort).
443	/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
444	/// two or more sorted sequences concatenated one after another.
445	///
446	/// Also, it allocates temporary storage the same size as `self`, but for very short slices a
447	/// non-allocating insertion sort is used instead.
448	///
449	/// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
450	/// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
451	/// or descending runs are concatenated. Finally, the remaining chunks are merged together using
452	/// parallel subdivision of chunks and parallel merge operation.
453	///
454	/// # Examples
455	///
456	/// ```
457	/// use rayon::prelude::*;
458	///
459	/// let mut v = [`-5i32`, `4`, `1`, `-3`, `2`];
460	///
461	/// v.par_sort_by_key(\|k\| k.abs());
462	/// assert_eq!(v, [`1`, `2`, -`3`, `4`, -`5`]);
463	/// ```
464	fn par_sort_by_key<K, F>(&mut self, f: F)
465	where
466	K: Ord,
467	F: Fn(&T) -> K + Sync,
468	{
469	par_mergesort(self.as_parallel_slice_mut(), \|a, b\| f(a).lt(&f(b)));
470	}
471
472	/// Sorts the slice in parallel with a key extraction function.
473	///
474	/// During sorting, the key function is called at most once per element, by using
475	/// temporary storage to remember the results of key evaluation.
476	/// The key function is called in parallel, so the order of calls is completely unspecified.
477	///
478	/// This sort is stable (i.e., does not reorder equal elements) and O(m* \* n + n \* log(n))*
479	/// worst-case, where the key function is O(m).
480	///
481	/// For simple key functions (e.g., functions that are property accesses or
482	/// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
483	/// faster.
484	///
485	/// # Current implementation
486	///
487	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
488	/// which combines the fast average case of randomized quicksort with the fast worst case of
489	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
490	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
491	/// deterministic behavior.
492	///
493	/// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
494	/// length of the slice.
495	///
496	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
497	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
498	/// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
499	///
500	/// [pdqsort]: https://github.com/orlp/pdqsort
501	///
502	/// # Examples
503	///
504	/// ```
505	/// use rayon::prelude::*;
506	///
507	/// let mut v = [`-5i32`, `4`, `32`, `-3`, `2`];
508	///
509	/// v.par_sort_by_cached_key(\|k\| k.to_string());
510	/// assert!(v == [-`3`, -`5`, `2`, `32`, `4`]);
511	/// ```
512	fn par_sort_by_cached_key<K, F>(&mut self, f: F)
513	where
514	F: Fn(&T) -> K + Sync,
515	K: Ord + Send,
516	{
517	let slice = self.as_parallel_slice_mut();
518	let len = slice.len();
519	if len < `2` {
520	return;
521	}
522
523	// Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
524	macro_rules! sort_by_key {
525	($t:ty) => {{
526	let mut indices: Vec<_> = slice
527	.par_iter_mut()
528	.enumerate()
529	.map(\|(i, x)\| (f(&*x), i as $t))
530	.collect();
531	// The elements of `indices` are unique, as they are indexed, so any sort will be
532	// stable with respect to the original slice. We use `sort_unstable` here because
533	// it requires less memory allocation.
534	indices.par_sort_unstable();
535	for i in `0`..len {
536	let mut index = indices[i].`1`;
537	while (index as usize) < i {
538	index = indices[index as usize].`1`;
539	}
540	indices[i].`1` = index;
541	slice.swap(i, index as usize);
542	}
543	}};
544	}
545
546	let sz_u8 = mem::size_of::<(K, u8)>();
547	let sz_u16 = mem::size_of::<(K, u16)>();
548	let sz_u32 = mem::size_of::<(K, u32)>();
549	let sz_usize = mem::size_of::<(K, usize)>();
550
551	if sz_u8 < sz_u16 && len <= (std::u8::MAX as usize) {
552	return sort_by_key!(u8);
553	}
554	if sz_u16 < sz_u32 && len <= (std::u16::MAX as usize) {
555	return sort_by_key!(u16);
556	}
557	if sz_u32 < sz_usize && len <= (std::u32::MAX as usize) {
558	return sort_by_key!(u32);
559	}
560	sort_by_key!(usize)
561	}
562
563	/// Sorts the slice in parallel, but might not preserve the order of equal elements.
564	///
565	/// This sort is unstable (i.e., may reorder equal elements), in-place
566	/// (i.e., does not allocate), and O(n* \* log(n)) worst-case.*
567	///
568	/// # Current implementation
569	///
570	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
571	/// which combines the fast average case of randomized quicksort with the fast worst case of
572	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
573	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
574	/// deterministic behavior.
575	///
576	/// It is typically faster than stable sorting, except in a few special cases, e.g., when the
577	/// slice consists of several concatenated sorted sequences.
578	///
579	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
580	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
581	/// parallel.
582	///
583	/// [pdqsort]: https://github.com/orlp/pdqsort
584	///
585	/// # Examples
586	///
587	/// ```
588	/// use rayon::prelude::*;
589	///
590	/// let mut v = [`-5`, `4`, `1`, `-3`, `2`];
591	///
592	/// v.par_sort_unstable();
593	/// assert_eq!(v, [-`5`, -`3`, `1`, `2`, `4`]);
594	/// ```
595	fn par_sort_unstable(&mut self)
596	where
597	T: Ord,
598	{
599	par_quicksort(self.as_parallel_slice_mut(), T::lt);
600	}
601
602	/// Sorts the slice in parallel with a comparator function, but might not preserve the order of
603	/// equal elements.
604	///
605	/// This sort is unstable (i.e., may reorder equal elements), in-place
606	/// (i.e., does not allocate), and O(n* \* log(n)) worst-case.*
607	///
608	/// The comparator function must define a total ordering for the elements in the slice. If
609	/// the ordering is not total, the order of the elements is unspecified. An order is a
610	/// total order if it is (for all `a`, `b` and `c`):
611	///
612	/// total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and*
613	/// transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.*
614	///
615	/// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
616	/// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
617	///
618	/// ```
619	/// use rayon::prelude::*;
620	///
621	/// let mut floats = [`5f64`, `4.0`, `1.0`, `3.0`, `2.0`];
622	/// floats.par_sort_unstable_by(\|a, b\| a.partial_cmp(b).unwrap());
623	/// assert_eq!(floats, [`1.0`, `2.0`, `3.0`, `4.0`, `5.0`]);
624	/// ```
625	///
626	/// # Current implementation
627	///
628	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
629	/// which combines the fast average case of randomized quicksort with the fast worst case of
630	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
631	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
632	/// deterministic behavior.
633	///
634	/// It is typically faster than stable sorting, except in a few special cases, e.g., when the
635	/// slice consists of several concatenated sorted sequences.
636	///
637	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
638	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
639	/// parallel.
640	///
641	/// [pdqsort]: https://github.com/orlp/pdqsort
642	///
643	/// # Examples
644	///
645	/// ```
646	/// use rayon::prelude::*;
647	///
648	/// let mut v = [`5`, `4`, `1`, `3`, `2`];
649	/// v.par_sort_unstable_by(\|a, b\| a.cmp(b));
650	/// assert_eq!(v, [`1`, `2`, `3`, `4`, `5`]);
651	///
652	/// // reverse sorting
653	/// v.par_sort_unstable_by(\|a, b\| b.cmp(a));
654	/// assert_eq!(v, [`5`, `4`, `3`, `2`, `1`]);
655	/// ```
656	fn par_sort_unstable_by<F>(&mut self, compare: F)
657	where
658	F: Fn(&T, &T) -> Ordering + Sync,
659	{
660	par_quicksort(self.as_parallel_slice_mut(), \|a, b\| {
661	compare(a, b) == Ordering::Less
662	});
663	}
664
665	/// Sorts the slice in parallel with a key extraction function, but might not preserve the order
666	/// of equal elements.
667	///
668	/// This sort is unstable (i.e., may reorder equal elements), in-place
669	/// (i.e., does not allocate), and O(m \ n \* log(n)) worst-case,*
670	/// where the key function is O(m).
671	///
672	/// # Current implementation
673	///
674	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
675	/// which combines the fast average case of randomized quicksort with the fast worst case of
676	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
677	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
678	/// deterministic behavior.
679	///
680	/// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
681	/// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
682	/// is expensive.
683	///
684	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
685	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
686	/// parallel.
687	///
688	/// [pdqsort]: https://github.com/orlp/pdqsort
689	///
690	/// # Examples
691	///
692	/// ```
693	/// use rayon::prelude::*;
694	///
695	/// let mut v = [`-5i32`, `4`, `1`, `-3`, `2`];
696	///
697	/// v.par_sort_unstable_by_key(\|k\| k.abs());
698	/// assert_eq!(v, [`1`, `2`, -`3`, `4`, -`5`]);
699	/// ```
700	fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
701	where
702	K: Ord,
703	F: Fn(&T) -> K + Sync,
704	{
705	par_quicksort(self.as_parallel_slice_mut(), \|a, b\| f(a).lt(&f(b)));
706	}
707	}
708
709	impl<T: Send> ParallelSliceMut<T> for [T] {
710	#[inline]
711	fn as_parallel_slice_mut(&mut self) -> &mut [T] {
712	self
713	}
714	}
715
716	impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
717	type Item = &'data T;
718	type Iter = Iter<'data, T>;
719
720	fn into_par_iter(self) -> Self::Iter {
721	Iter { slice: self }
722	}
723	}
724
725	impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
726	type Item = &'data mut T;
727	type Iter = IterMut<'data, T>;
728
729	fn into_par_iter(self) -> Self::Iter {
730	IterMut { slice: self }
731	}
732	}
733
734	/// Parallel iterator over immutable items in a slice
735	#[derive(Debug)]
736	pub struct Iter<'data, T: Sync> {
737	slice: &'data [T],
738	}
739
740	impl<'data, T: Sync> Clone for Iter<'data, T> {
741	fn clone(&self) -> Self {
742	Iter { ..*self }
743	}
744	}
745
746	impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
747	type Item = &'data T;
748
749	fn drive_unindexed<C>(self, consumer: C) -> C::Result
750	where
751	C: UnindexedConsumer<Self::Item>,
752	{
753	bridge(self, consumer)
754	}
755
756	fn opt_len(&self) -> Option<usize> {
757	Some(self.len())
758	}
759	}
760
761	impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
762	fn drive<C>(self, consumer: C) -> C::Result
763	where
764	C: Consumer<Self::Item>,
765	{
766	bridge(self, consumer)
767	}
768
769	fn len(&self) -> usize {
770	self.slice.len()
771	}
772
773	fn with_producer<CB>(self, callback: CB) -> CB::Output
774	where
775	CB: ProducerCallback<Self::Item>,
776	{
777	callback.callback(producer:IterProducer { slice: self.slice })
778	}
779	}
780
781	struct IterProducer<'data, T: Sync> {
782	slice: &'data [T],
783	}
784
785	impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
786	type Item = &'data T;
787	type IntoIter = ::std::slice::Iter<'data, T>;
788
789	fn into_iter(self) -> Self::IntoIter {
790	self.slice.iter()
791	}
792
793	fn split_at(self, index: usize) -> (Self, Self) {
794	let (left: &[T], right: &[T]) = self.slice.split_at(mid:index);
795	(IterProducer { slice: left }, IterProducer { slice: right })
796	}
797	}
798
799	/// Parallel iterator over immutable overlapping windows of a slice
800	#[derive(Debug)]
801	pub struct Windows<'data, T: Sync> {
802	window_size: usize,
803	slice: &'data [T],
804	}
805
806	impl<'data, T: Sync> Clone for Windows<'data, T> {
807	fn clone(&self) -> Self {
808	Windows { ..*self }
809	}
810	}
811
812	impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
813	type Item = &'data [T];
814
815	fn drive_unindexed<C>(self, consumer: C) -> C::Result
816	where
817	C: UnindexedConsumer<Self::Item>,
818	{
819	bridge(self, consumer)
820	}
821
822	fn opt_len(&self) -> Option<usize> {
823	Some(self.len())
824	}
825	}
826
827	impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
828	fn drive<C>(self, consumer: C) -> C::Result
829	where
830	C: Consumer<Self::Item>,
831	{
832	bridge(self, consumer)
833	}
834
835	fn len(&self) -> usize {
836	assert!(self.window_size >= `1`);
837	self.slice.len().saturating_sub(self.window_size - `1`)
838	}
839
840	fn with_producer<CB>(self, callback: CB) -> CB::Output
841	where
842	CB: ProducerCallback<Self::Item>,
843	{
844	callback.callback(producer:WindowsProducer {
845	window_size: self.window_size,
846	slice: self.slice,
847	})
848	}
849	}
850
851	struct WindowsProducer<'data, T: Sync> {
852	window_size: usize,
853	slice: &'data [T],
854	}
855
856	impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
857	type Item = &'data [T];
858	type IntoIter = ::std::slice::Windows<'data, T>;
859
860	fn into_iter(self) -> Self::IntoIter {
861	self.slice.windows(self.window_size)
862	}
863
864	fn split_at(self, index: usize) -> (Self, Self) {
865	let left_index: usize = cmp::min(self.slice.len(), v2:index + (self.window_size - `1`));
866	let left: &[T] = &self.slice[..left_index];
867	let right: &[T] = &self.slice[index..];
868	(
869	WindowsProducer {
870	window_size: self.window_size,
871	slice: left,
872	},
873	WindowsProducer {
874	window_size: self.window_size,
875	slice: right,
876	},
877	)
878	}
879	}
880
881	/// Parallel iterator over mutable items in a slice
882	#[derive(Debug)]
883	pub struct IterMut<'data, T: Send> {
884	slice: &'data mut [T],
885	}
886
887	impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
888	type Item = &'data mut T;
889
890	fn drive_unindexed<C>(self, consumer: C) -> C::Result
891	where
892	C: UnindexedConsumer<Self::Item>,
893	{
894	bridge(self, consumer)
895	}
896
897	fn opt_len(&self) -> Option<usize> {
898	Some(self.len())
899	}
900	}
901
902	impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
903	fn drive<C>(self, consumer: C) -> C::Result
904	where
905	C: Consumer<Self::Item>,
906	{
907	bridge(self, consumer)
908	}
909
910	fn len(&self) -> usize {
911	self.slice.len()
912	}
913
914	fn with_producer<CB>(self, callback: CB) -> CB::Output
915	where
916	CB: ProducerCallback<Self::Item>,
917	{
918	callback.callback(producer:IterMutProducer { slice: self.slice })
919	}
920	}
921
922	struct IterMutProducer<'data, T: Send> {
923	slice: &'data mut [T],
924	}
925
926	impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
927	type Item = &'data mut T;
928	type IntoIter = ::std::slice::IterMut<'data, T>;
929
930	fn into_iter(self) -> Self::IntoIter {
931	self.slice.iter_mut()
932	}
933
934	fn split_at(self, index: usize) -> (Self, Self) {
935	let (left: &mut [T], right: &mut [T]) = self.slice.split_at_mut(mid:index);
936	(
937	IterMutProducer { slice: left },
938	IterMutProducer { slice: right },
939	)
940	}
941	}
942
943	/// Parallel iterator over slices separated by a predicate
944	pub struct Split<'data, T, P> {
945	slice: &'data [T],
946	separator: P,
947	}
948
949	impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
950	fn clone(&self) -> Self {
951	Split {
952	separator: self.separator.clone(),
953	..*self
954	}
955	}
956	}
957
958	impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
959	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
960	f.debug_struct("Split").field(name:"slice", &self.slice).finish()
961	}
962	}
963
964	impl<'data, T, P> ParallelIterator for Split<'data, T, P>
965	where
966	P: Fn(&T) -> bool + Sync + Send,
967	T: Sync,
968	{
969	type Item = &'data [T];
970
971	fn drive_unindexed<C>(self, consumer: C) -> C::Result
972	where
973	C: UnindexedConsumer<Self::Item>,
974	{
975	let producer: SplitProducer<'_, P, &[T]> = SplitProducer::new(self.slice, &self.separator);
976	bridge_unindexed(producer, consumer)
977	}
978	}
979
980	/// Parallel iterator over slices separated by a predicate,
981	/// including the matched part as a terminator.
982	pub struct SplitInclusive<'data, T, P> {
983	slice: &'data [T],
984	separator: P,
985	}
986
987	impl<'data, T, P: Clone> Clone for SplitInclusive<'data, T, P> {
988	fn clone(&self) -> Self {
989	SplitInclusive {
990	separator: self.separator.clone(),
991	..*self
992	}
993	}
994	}
995
996	impl<'data, T: Debug, P> Debug for SplitInclusive<'data, T, P> {
997	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
998	f&mut DebugStruct<'_, '_>.debug_struct("SplitInclusive")
999	.field(name:"slice", &self.slice)
1000	.finish()
1001	}
1002	}
1003
1004	impl<'data, T, P> ParallelIterator for SplitInclusive<'data, T, P>
1005	where
1006	P: Fn(&T) -> bool + Sync + Send,
1007	T: Sync,
1008	{
1009	type Item = &'data [T];
1010
1011	fn drive_unindexed<C>(self, consumer: C) -> C::Result
1012	where
1013	C: UnindexedConsumer<Self::Item>,
1014	{
1015	let producer: SplitProducer<'_, P, &[T], true> = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1016	bridge_unindexed(producer, consumer)
1017	}
1018	}
1019
1020	/// Implement support for `SplitProducer`.
1021	impl<'data, T, P> Fissile<P> for &'data [T]
1022	where
1023	P: Fn(&T) -> bool,
1024	{
1025	fn length(&self) -> usize {
1026	self.len()
1027	}
1028
1029	fn midpoint(&self, end: usize) -> usize {
1030	end / `2`
1031	}
1032
1033	fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1034	self[start..end].iter().position(separator)
1035	}
1036
1037	fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1038	self[..end].iter().rposition(separator)
1039	}
1040
1041	fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1042	if INCL {
1043	// include the separator in the left side
1044	self.split_at(index + `1`)
1045	} else {
1046	let (left, right) = self.split_at(index);
1047	(left, &right[`1`..]) // skip the separator
1048	}
1049	}
1050
1051	fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1052	where
1053	F: Folder<Self>,
1054	Self: Send,
1055	{
1056	if INCL {
1057	debug_assert!(!skip_last);
1058	folder.consume_iter(self.split_inclusive(separator))
1059	} else {
1060	let mut split = self.split(separator);
1061	if skip_last {
1062	split.next_back();
1063	}
1064	folder.consume_iter(split)
1065	}
1066	}
1067	}
1068
1069	/// Parallel iterator over mutable slices separated by a predicate
1070	pub struct SplitMut<'data, T, P> {
1071	slice: &'data mut [T],
1072	separator: P,
1073	}
1074
1075	impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
1076	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1077	f&mut DebugStruct<'_, '_>.debug_struct("SplitMut")
1078	.field(name:"slice", &self.slice)
1079	.finish()
1080	}
1081	}
1082
1083	impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
1084	where
1085	P: Fn(&T) -> bool + Sync + Send,
1086	T: Send,
1087	{
1088	type Item = &'data mut [T];
1089
1090	fn drive_unindexed<C>(self, consumer: C) -> C::Result
1091	where
1092	C: UnindexedConsumer<Self::Item>,
1093	{
1094	let producer: SplitProducer<'_, P, &mut …> = SplitProducer::new(self.slice, &self.separator);
1095	bridge_unindexed(producer, consumer)
1096	}
1097	}
1098
1099	/// Parallel iterator over mutable slices separated by a predicate,
1100	/// including the matched part as a terminator.
1101	pub struct SplitInclusiveMut<'data, T, P> {
1102	slice: &'data mut [T],
1103	separator: P,
1104	}
1105
1106	impl<'data, T: Debug, P> Debug for SplitInclusiveMut<'data, T, P> {
1107	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1108	f&mut DebugStruct<'_, '_>.debug_struct("SplitInclusiveMut")
1109	.field(name:"slice", &self.slice)
1110	.finish()
1111	}
1112	}
1113
1114	impl<'data, T, P> ParallelIterator for SplitInclusiveMut<'data, T, P>
1115	where
1116	P: Fn(&T) -> bool + Sync + Send,
1117	T: Send,
1118	{
1119	type Item = &'data mut [T];
1120
1121	fn drive_unindexed<C>(self, consumer: C) -> C::Result
1122	where
1123	C: UnindexedConsumer<Self::Item>,
1124	{
1125	let producer: SplitProducer<'_, P, &mut …, true> = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1126	bridge_unindexed(producer, consumer)
1127	}
1128	}
1129
1130	/// Implement support for `SplitProducer`.
1131	impl<'data, T, P> Fissile<P> for &'data mut [T]
1132	where
1133	P: Fn(&T) -> bool,
1134	{
1135	fn length(&self) -> usize {
1136	self.len()
1137	}
1138
1139	fn midpoint(&self, end: usize) -> usize {
1140	end / `2`
1141	}
1142
1143	fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1144	self[start..end].iter().position(separator)
1145	}
1146
1147	fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1148	self[..end].iter().rposition(separator)
1149	}
1150
1151	fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1152	if INCL {
1153	// include the separator in the left side
1154	self.split_at_mut(index + `1`)
1155	} else {
1156	let (left, right) = self.split_at_mut(index);
1157	(left, &mut right[`1`..]) // skip the separator
1158	}
1159	}
1160
1161	fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1162	where
1163	F: Folder<Self>,
1164	Self: Send,
1165	{
1166	if INCL {
1167	debug_assert!(!skip_last);
1168	folder.consume_iter(self.split_inclusive_mut(separator))
1169	} else {
1170	let mut split = self.split_mut(separator);
1171	if skip_last {
1172	split.next_back();
1173	}
1174	folder.consume_iter(split)
1175	}
1176	}
1177	}
1178