mod.rs source code [crates/rayon/src/slice/mod.rs]

1	//! Parallel iterator types for [slices][std::slice]
2	//!
3	//! You will rarely need to interact with this module directly unless you need
4	//! to name one of the iterator types.
5	//!
6	//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
7
8	mod chunk_by;
9	mod chunks;
10	mod mergesort;
11	mod quicksort;
12	mod rchunks;
13
14	mod test;
15
16	use self::mergesort::par_mergesort;
17	use self::quicksort::par_quicksort;
18	use crate::iter::plumbing::*;
19	use crate::iter::*;
20	use crate::split_producer::*;
21
22	use std::cmp::Ordering;
23	use std::fmt::{self, Debug};
24	use std::mem;
25
26	pub use self::chunk_by::{ChunkBy, ChunkByMut};
27	pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
28	pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
29
30	/// Parallel extensions for slices.
31	pub trait ParallelSlice<T: Sync> {
32	/// Returns a plain slice, which is used to implement the rest of the
33	/// parallel methods.
34	fn as_parallel_slice(&self) -> &[T];
35
36	/// Returns a parallel iterator over subslices separated by elements that
37	/// match the separator.
38	///
39	/// # Examples
40	///
41	/// ```
42	/// use rayon::prelude::*;
43	/// let products: Vec<_> = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`]
44	/// .par_split(\|i\| *i == `0`)
45	/// .map(\|numbers\| numbers.iter().product::<i32>())
46	/// .collect();
47	/// assert_eq!(products, [`6`, `64`, `162`]);
48	/// ```
49	fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
50	where
51	P: Fn(&T) -> bool + Sync + Send,
52	{
53	Split {
54	slice: self.as_parallel_slice(),
55	separator,
56	}
57	}
58
59	/// Returns a parallel iterator over subslices separated by elements that
60	/// match the separator, including the matched part as a terminator.
61	///
62	/// # Examples
63	///
64	/// ```
65	/// use rayon::prelude::*;
66	/// let lengths: Vec<_> = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`]
67	/// .par_split_inclusive(\|i\| *i == `0`)
68	/// .map(\|numbers\| numbers.len())
69	/// .collect();
70	/// assert_eq!(lengths, [`4`, `4`, `3`]);
71	/// ```
72	fn par_split_inclusive<P>(&self, separator: P) -> SplitInclusive<'_, T, P>
73	where
74	P: Fn(&T) -> bool + Sync + Send,
75	{
76	SplitInclusive {
77	slice: self.as_parallel_slice(),
78	separator,
79	}
80	}
81
82	/// Returns a parallel iterator over all contiguous windows of length
83	/// `window_size`. The windows overlap.
84	///
85	/// # Examples
86	///
87	/// ```
88	/// use rayon::prelude::*;
89	/// let windows: Vec<_> = [`1`, `2`, `3`].par_windows(`2`).collect();
90	/// assert_eq!(vec![[`1`, `2`], [`2`, `3`]], windows);
91	/// ```
92	fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
93	Windows {
94	window_size,
95	slice: self.as_parallel_slice(),
96	}
97	}
98
99	/// Returns a parallel iterator over at most `chunk_size` elements of
100	/// `self` at a time. The chunks do not overlap.
101	///
102	/// If the number of elements in the iterator is not divisible by
103	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
104	/// other chunks will have that exact length.
105	///
106	/// # Examples
107	///
108	/// ```
109	/// use rayon::prelude::*;
110	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_chunks(`2`).collect();
111	/// assert_eq!(chunks, vec![&[`1`, `2`][..], &[`3`, `4`], &[`5`]]);
112	/// ```
113	#[track_caller]
114	fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
115	assert!(chunk_size != `0`, "chunk_size must not be zero");
116	Chunks::new(chunk_size, self.as_parallel_slice())
117	}
118
119	/// Returns a parallel iterator over `chunk_size` elements of
120	/// `self` at a time. The chunks do not overlap.
121	///
122	/// If `chunk_size` does not divide the length of the slice, then the
123	/// last up to `chunk_size-1` elements will be omitted and can be
124	/// retrieved from the remainder function of the iterator.
125	///
126	/// # Examples
127	///
128	/// ```
129	/// use rayon::prelude::*;
130	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_chunks_exact(`2`).collect();
131	/// assert_eq!(chunks, vec![&[`1`, `2`][..], &[`3`, `4`]]);
132	/// ```
133	#[track_caller]
134	fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
135	assert!(chunk_size != `0`, "chunk_size must not be zero");
136	ChunksExact::new(chunk_size, self.as_parallel_slice())
137	}
138
139	/// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
140	/// starting at the end. The chunks do not overlap.
141	///
142	/// If the number of elements in the iterator is not divisible by
143	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
144	/// other chunks will have that exact length.
145	///
146	/// # Examples
147	///
148	/// ```
149	/// use rayon::prelude::*;
150	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_rchunks(`2`).collect();
151	/// assert_eq!(chunks, vec![&[`4`, `5`][..], &[`2`, `3`], &[`1`]]);
152	/// ```
153	#[track_caller]
154	fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
155	assert!(chunk_size != `0`, "chunk_size must not be zero");
156	RChunks::new(chunk_size, self.as_parallel_slice())
157	}
158
159	/// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
160	/// starting at the end. The chunks do not overlap.
161	///
162	/// If `chunk_size` does not divide the length of the slice, then the
163	/// last up to `chunk_size-1` elements will be omitted and can be
164	/// retrieved from the remainder function of the iterator.
165	///
166	/// # Examples
167	///
168	/// ```
169	/// use rayon::prelude::*;
170	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_rchunks_exact(`2`).collect();
171	/// assert_eq!(chunks, vec![&[`4`, `5`][..], &[`2`, `3`]]);
172	/// ```
173	#[track_caller]
174	fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
175	assert!(chunk_size != `0`, "chunk_size must not be zero");
176	RChunksExact::new(chunk_size, self.as_parallel_slice())
177	}
178
179	/// Returns a parallel iterator over the slice producing non-overlapping runs
180	/// of elements using the predicate to separate them.
181	///
182	/// The predicate is called on two elements following themselves,
183	/// it means the predicate is called on `slice[0]` and `slice[1]`
184	/// then on `slice[1]` and `slice[2]` and so on.
185	///
186	/// # Examples
187	///
188	/// ```
189	/// use rayon::prelude::*;
190	/// let chunks: Vec<_> = [`1`, `2`, `2`, `3`, `3`, `3`].par_chunk_by(\|&x, &y\| x == y).collect();
191	/// assert_eq!(chunks[`0`], &[`1`]);
192	/// assert_eq!(chunks[`1`], &[`2`, `2`]);
193	/// assert_eq!(chunks[`2`], &[`3`, `3`, `3`]);
194	/// ```
195	fn par_chunk_by<F>(&self, pred: F) -> ChunkBy<'_, T, F>
196	where
197	F: Fn(&T, &T) -> bool + Send + Sync,
198	{
199	ChunkBy::new(self.as_parallel_slice(), pred)
200	}
201	}
202
203	impl<T: Sync> ParallelSlice<T> for [T] {
204	#[inline]
205	fn as_parallel_slice(&self) -> &[T] {
206	self
207	}
208	}
209
210	/// Parallel extensions for mutable slices.
211	pub trait ParallelSliceMut<T: Send> {
212	/// Returns a plain mutable slice, which is used to implement the rest of
213	/// the parallel methods.
214	fn as_parallel_slice_mut(&mut self) -> &mut [T];
215
216	/// Returns a parallel iterator over mutable subslices separated by
217	/// elements that match the separator.
218	///
219	/// # Examples
220	///
221	/// ```
222	/// use rayon::prelude::*;
223	/// let mut array = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`];
224	/// array.par_split_mut(\|i\| *i == `0`)
225	/// .for_each(\|slice\| slice.reverse());
226	/// assert_eq!(array, [`3`, `2`, `1`, `0`, `8`, `4`, `2`, `0`, `9`, `6`, `3`]);
227	/// ```
228	fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
229	where
230	P: Fn(&T) -> bool + Sync + Send,
231	{
232	SplitMut {
233	slice: self.as_parallel_slice_mut(),
234	separator,
235	}
236	}
237
238	/// Returns a parallel iterator over mutable subslices separated by elements
239	/// that match the separator, including the matched part as a terminator.
240	///
241	/// # Examples
242	///
243	/// ```
244	/// use rayon::prelude::*;
245	/// let mut array = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`];
246	/// array.par_split_inclusive_mut(\|i\| *i == `0`)
247	/// .for_each(\|slice\| slice.reverse());
248	/// assert_eq!(array, [`0`, `3`, `2`, `1`, `0`, `8`, `4`, `2`, `9`, `6`, `3`]);
249	/// ```
250	fn par_split_inclusive_mut<P>(&mut self, separator: P) -> SplitInclusiveMut<'_, T, P>
251	where
252	P: Fn(&T) -> bool + Sync + Send,
253	{
254	SplitInclusiveMut {
255	slice: self.as_parallel_slice_mut(),
256	separator,
257	}
258	}
259
260	/// Returns a parallel iterator over at most `chunk_size` elements of
261	/// `self` at a time. The chunks are mutable and do not overlap.
262	///
263	/// If the number of elements in the iterator is not divisible by
264	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
265	/// other chunks will have that exact length.
266	///
267	/// # Examples
268	///
269	/// ```
270	/// use rayon::prelude::*;
271	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
272	/// array.par_chunks_mut(`2`)
273	/// .for_each(\|slice\| slice.reverse());
274	/// assert_eq!(array, [`2`, `1`, `4`, `3`, `5`]);
275	/// ```
276	#[track_caller]
277	fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
278	assert!(chunk_size != `0`, "chunk_size must not be zero");
279	ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
280	}
281
282	/// Returns a parallel iterator over `chunk_size` elements of
283	/// `self` at a time. The chunks are mutable and do not overlap.
284	///
285	/// If `chunk_size` does not divide the length of the slice, then the
286	/// last up to `chunk_size-1` elements will be omitted and can be
287	/// retrieved from the remainder function of the iterator.
288	///
289	/// # Examples
290	///
291	/// ```
292	/// use rayon::prelude::*;
293	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
294	/// array.par_chunks_exact_mut(`3`)
295	/// .for_each(\|slice\| slice.reverse());
296	/// assert_eq!(array, [`3`, `2`, `1`, `4`, `5`]);
297	/// ```
298	#[track_caller]
299	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
300	assert!(chunk_size != `0`, "chunk_size must not be zero");
301	ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
302	}
303
304	/// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
305	/// starting at the end. The chunks are mutable and do not overlap.
306	///
307	/// If the number of elements in the iterator is not divisible by
308	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
309	/// other chunks will have that exact length.
310	///
311	/// # Examples
312	///
313	/// ```
314	/// use rayon::prelude::*;
315	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
316	/// array.par_rchunks_mut(`2`)
317	/// .for_each(\|slice\| slice.reverse());
318	/// assert_eq!(array, [`1`, `3`, `2`, `5`, `4`]);
319	/// ```
320	#[track_caller]
321	fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
322	assert!(chunk_size != `0`, "chunk_size must not be zero");
323	RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
324	}
325
326	/// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
327	/// starting at the end. The chunks are mutable and do not overlap.
328	///
329	/// If `chunk_size` does not divide the length of the slice, then the
330	/// last up to `chunk_size-1` elements will be omitted and can be
331	/// retrieved from the remainder function of the iterator.
332	///
333	/// # Examples
334	///
335	/// ```
336	/// use rayon::prelude::*;
337	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
338	/// array.par_rchunks_exact_mut(`3`)
339	/// .for_each(\|slice\| slice.reverse());
340	/// assert_eq!(array, [`1`, `2`, `5`, `4`, `3`]);
341	/// ```
342	#[track_caller]
343	fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
344	assert!(chunk_size != `0`, "chunk_size must not be zero");
345	RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
346	}
347
348	/// Sorts the slice in parallel.
349	///
350	/// This sort is stable (i.e., does not reorder equal elements) and O(n* \* log(n)) worst-case.*
351	///
352	/// When applicable, unstable sorting is preferred because it is generally faster than stable
353	/// sorting and it doesn't allocate auxiliary memory.
354	/// See [`par_sort_unstable`](#method.par_sort_unstable).
355	///
356	/// # Current implementation
357	///
358	/// The current algorithm is an adaptive merge sort inspired by
359	/// [timsort](https://en.wikipedia.org/wiki/Timsort).
360	/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
361	/// two or more sorted sequences concatenated one after another.
362	///
363	/// Also, it allocates temporary storage the same size as `self`, but for very short slices a
364	/// non-allocating insertion sort is used instead.
365	///
366	/// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
367	/// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
368	/// or descending runs are concatenated. Finally, the remaining chunks are merged together using
369	/// parallel subdivision of chunks and parallel merge operation.
370	///
371	/// # Examples
372	///
373	/// ```
374	/// use rayon::prelude::*;
375	///
376	/// let mut v = [`-5`, `4`, `1`, `-3`, `2`];
377	///
378	/// v.par_sort();
379	/// assert_eq!(v, [-`5`, -`3`, `1`, `2`, `4`]);
380	/// ```
381	fn par_sort(&mut self)
382	where
383	T: Ord,
384	{
385	par_mergesort(self.as_parallel_slice_mut(), T::lt);
386	}
387
388	/// Sorts the slice in parallel with a comparator function.
389	///
390	/// This sort is stable (i.e., does not reorder equal elements) and O(n* \* log(n)) worst-case.*
391	///
392	/// The comparator function must define a total ordering for the elements in the slice. If
393	/// the ordering is not total, the order of the elements is unspecified. An order is a
394	/// total order if it is (for all `a`, `b` and `c`):
395	///
396	/// total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and*
397	/// transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.*
398	///
399	/// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
400	/// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
401	///
402	/// ```
403	/// use rayon::prelude::*;
404	///
405	/// let mut floats = [`5f64`, `4.0`, `1.0`, `3.0`, `2.0`];
406	/// floats.par_sort_by(\|a, b\| a.partial_cmp(b).unwrap());
407	/// assert_eq!(floats, [`1.0`, `2.0`, `3.0`, `4.0`, `5.0`]);
408	/// ```
409	///
410	/// When applicable, unstable sorting is preferred because it is generally faster than stable
411	/// sorting and it doesn't allocate auxiliary memory.
412	/// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
413	///
414	/// # Current implementation
415	///
416	/// The current algorithm is an adaptive merge sort inspired by
417	/// [timsort](https://en.wikipedia.org/wiki/Timsort).
418	/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
419	/// two or more sorted sequences concatenated one after another.
420	///
421	/// Also, it allocates temporary storage the same size as `self`, but for very short slices a
422	/// non-allocating insertion sort is used instead.
423	///
424	/// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
425	/// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
426	/// or descending runs are concatenated. Finally, the remaining chunks are merged together using
427	/// parallel subdivision of chunks and parallel merge operation.
428	///
429	/// # Examples
430	///
431	/// ```
432	/// use rayon::prelude::*;
433	///
434	/// let mut v = [`5`, `4`, `1`, `3`, `2`];
435	/// v.par_sort_by(\|a, b\| a.cmp(b));
436	/// assert_eq!(v, [`1`, `2`, `3`, `4`, `5`]);
437	///
438	/// // reverse sorting
439	/// v.par_sort_by(\|a, b\| b.cmp(a));
440	/// assert_eq!(v, [`5`, `4`, `3`, `2`, `1`]);
441	/// ```
442	fn par_sort_by<F>(&mut self, compare: F)
443	where
444	F: Fn(&T, &T) -> Ordering + Sync,
445	{
446	par_mergesort(self.as_parallel_slice_mut(), \|a, b\| {
447	compare(a, b) == Ordering::Less
448	});
449	}
450
451	/// Sorts the slice in parallel with a key extraction function.
452	///
453	/// This sort is stable (i.e., does not reorder equal elements) and O(m* \* n \* log(n))*
454	/// worst-case, where the key function is O(m).
455	///
456	/// For expensive key functions (e.g. functions that are not simple property accesses or
457	/// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
458	/// be significantly faster, as it does not recompute element keys.
459	///
460	/// When applicable, unstable sorting is preferred because it is generally faster than stable
461	/// sorting and it doesn't allocate auxiliary memory.
462	/// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
463	///
464	/// # Current implementation
465	///
466	/// The current algorithm is an adaptive merge sort inspired by
467	/// [timsort](https://en.wikipedia.org/wiki/Timsort).
468	/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
469	/// two or more sorted sequences concatenated one after another.
470	///
471	/// Also, it allocates temporary storage the same size as `self`, but for very short slices a
472	/// non-allocating insertion sort is used instead.
473	///
474	/// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
475	/// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
476	/// or descending runs are concatenated. Finally, the remaining chunks are merged together using
477	/// parallel subdivision of chunks and parallel merge operation.
478	///
479	/// # Examples
480	///
481	/// ```
482	/// use rayon::prelude::*;
483	///
484	/// let mut v = [`-5i32`, `4`, `1`, `-3`, `2`];
485	///
486	/// v.par_sort_by_key(\|k\| k.abs());
487	/// assert_eq!(v, [`1`, `2`, -`3`, `4`, -`5`]);
488	/// ```
489	fn par_sort_by_key<K, F>(&mut self, f: F)
490	where
491	K: Ord,
492	F: Fn(&T) -> K + Sync,
493	{
494	par_mergesort(self.as_parallel_slice_mut(), \|a, b\| f(a).lt(&f(b)));
495	}
496
497	/// Sorts the slice in parallel with a key extraction function.
498	///
499	/// During sorting, the key function is called at most once per element, by using
500	/// temporary storage to remember the results of key evaluation.
501	/// The key function is called in parallel, so the order of calls is completely unspecified.
502	///
503	/// This sort is stable (i.e., does not reorder equal elements) and O(m* \* n + n \* log(n))*
504	/// worst-case, where the key function is O(m).
505	///
506	/// For simple key functions (e.g., functions that are property accesses or
507	/// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
508	/// faster.
509	///
510	/// # Current implementation
511	///
512	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
513	/// which combines the fast average case of randomized quicksort with the fast worst case of
514	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
515	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
516	/// deterministic behavior.
517	///
518	/// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
519	/// length of the slice.
520	///
521	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
522	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
523	/// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
524	///
525	/// [pdqsort]: https://github.com/orlp/pdqsort
526	///
527	/// # Examples
528	///
529	/// ```
530	/// use rayon::prelude::*;
531	///
532	/// let mut v = [`-5i32`, `4`, `32`, `-3`, `2`];
533	///
534	/// v.par_sort_by_cached_key(\|k\| k.to_string());
535	/// assert!(v == [-`3`, -`5`, `2`, `32`, `4`]);
536	/// ```
537	fn par_sort_by_cached_key<K, F>(&mut self, f: F)
538	where
539	F: Fn(&T) -> K + Sync,
540	K: Ord + Send,
541	{
542	let slice = self.as_parallel_slice_mut();
543	let len = slice.len();
544	if len < `2` {
545	return;
546	}
547
548	// Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
549	macro_rules! sort_by_key {
550	($t:ty) => {{
551	let mut indices: Vec<_> = slice
552	.par_iter_mut()
553	.enumerate()
554	.map(\|(i, x)\| (f(&*x), i as $t))
555	.collect();
556	// The elements of `indices` are unique, as they are indexed, so any sort will be
557	// stable with respect to the original slice. We use `sort_unstable` here because
558	// it requires less memory allocation.
559	indices.par_sort_unstable();
560	for i in `0`..len {
561	let mut index = indices[i].`1`;
562	while (index as usize) < i {
563	index = indices[index as usize].`1`;
564	}
565	indices[i].`1` = index;
566	slice.swap(i, index as usize);
567	}
568	}};
569	}
570
571	let sz_u8 = mem::size_of::<(K, u8)>();
572	let sz_u16 = mem::size_of::<(K, u16)>();
573	let sz_u32 = mem::size_of::<(K, u32)>();
574	let sz_usize = mem::size_of::<(K, usize)>();
575
576	if sz_u8 < sz_u16 && len <= (std::u8::MAX as usize) {
577	return sort_by_key!(u8);
578	}
579	if sz_u16 < sz_u32 && len <= (std::u16::MAX as usize) {
580	return sort_by_key!(u16);
581	}
582	if sz_u32 < sz_usize && len <= (std::u32::MAX as usize) {
583	return sort_by_key!(u32);
584	}
585	sort_by_key!(usize)
586	}
587
588	/// Sorts the slice in parallel, but might not preserve the order of equal elements.
589	///
590	/// This sort is unstable (i.e., may reorder equal elements), in-place
591	/// (i.e., does not allocate), and O(n* \* log(n)) worst-case.*
592	///
593	/// # Current implementation
594	///
595	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
596	/// which combines the fast average case of randomized quicksort with the fast worst case of
597	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
598	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
599	/// deterministic behavior.
600	///
601	/// It is typically faster than stable sorting, except in a few special cases, e.g., when the
602	/// slice consists of several concatenated sorted sequences.
603	///
604	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
605	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
606	/// parallel.
607	///
608	/// [pdqsort]: https://github.com/orlp/pdqsort
609	///
610	/// # Examples
611	///
612	/// ```
613	/// use rayon::prelude::*;
614	///
615	/// let mut v = [`-5`, `4`, `1`, `-3`, `2`];
616	///
617	/// v.par_sort_unstable();
618	/// assert_eq!(v, [-`5`, -`3`, `1`, `2`, `4`]);
619	/// ```
620	fn par_sort_unstable(&mut self)
621	where
622	T: Ord,
623	{
624	par_quicksort(self.as_parallel_slice_mut(), T::lt);
625	}
626
627	/// Sorts the slice in parallel with a comparator function, but might not preserve the order of
628	/// equal elements.
629	///
630	/// This sort is unstable (i.e., may reorder equal elements), in-place
631	/// (i.e., does not allocate), and O(n* \* log(n)) worst-case.*
632	///
633	/// The comparator function must define a total ordering for the elements in the slice. If
634	/// the ordering is not total, the order of the elements is unspecified. An order is a
635	/// total order if it is (for all `a`, `b` and `c`):
636	///
637	/// total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and*
638	/// transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.*
639	///
640	/// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
641	/// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
642	///
643	/// ```
644	/// use rayon::prelude::*;
645	///
646	/// let mut floats = [`5f64`, `4.0`, `1.0`, `3.0`, `2.0`];
647	/// floats.par_sort_unstable_by(\|a, b\| a.partial_cmp(b).unwrap());
648	/// assert_eq!(floats, [`1.0`, `2.0`, `3.0`, `4.0`, `5.0`]);
649	/// ```
650	///
651	/// # Current implementation
652	///
653	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
654	/// which combines the fast average case of randomized quicksort with the fast worst case of
655	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
656	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
657	/// deterministic behavior.
658	///
659	/// It is typically faster than stable sorting, except in a few special cases, e.g., when the
660	/// slice consists of several concatenated sorted sequences.
661	///
662	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
663	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
664	/// parallel.
665	///
666	/// [pdqsort]: https://github.com/orlp/pdqsort
667	///
668	/// # Examples
669	///
670	/// ```
671	/// use rayon::prelude::*;
672	///
673	/// let mut v = [`5`, `4`, `1`, `3`, `2`];
674	/// v.par_sort_unstable_by(\|a, b\| a.cmp(b));
675	/// assert_eq!(v, [`1`, `2`, `3`, `4`, `5`]);
676	///
677	/// // reverse sorting
678	/// v.par_sort_unstable_by(\|a, b\| b.cmp(a));
679	/// assert_eq!(v, [`5`, `4`, `3`, `2`, `1`]);
680	/// ```
681	fn par_sort_unstable_by<F>(&mut self, compare: F)
682	where
683	F: Fn(&T, &T) -> Ordering + Sync,
684	{
685	par_quicksort(self.as_parallel_slice_mut(), \|a, b\| {
686	compare(a, b) == Ordering::Less
687	});
688	}
689
690	/// Sorts the slice in parallel with a key extraction function, but might not preserve the order
691	/// of equal elements.
692	///
693	/// This sort is unstable (i.e., may reorder equal elements), in-place
694	/// (i.e., does not allocate), and O(m \ n \* log(n)) worst-case,*
695	/// where the key function is O(m).
696	///
697	/// # Current implementation
698	///
699	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
700	/// which combines the fast average case of randomized quicksort with the fast worst case of
701	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
702	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
703	/// deterministic behavior.
704	///
705	/// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
706	/// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
707	/// is expensive.
708	///
709	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
710	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
711	/// parallel.
712	///
713	/// [pdqsort]: https://github.com/orlp/pdqsort
714	///
715	/// # Examples
716	///
717	/// ```
718	/// use rayon::prelude::*;
719	///
720	/// let mut v = [`-5i32`, `4`, `1`, `-3`, `2`];
721	///
722	/// v.par_sort_unstable_by_key(\|k\| k.abs());
723	/// assert_eq!(v, [`1`, `2`, -`3`, `4`, -`5`]);
724	/// ```
725	fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
726	where
727	K: Ord,
728	F: Fn(&T) -> K + Sync,
729	{
730	par_quicksort(self.as_parallel_slice_mut(), \|a, b\| f(a).lt(&f(b)));
731	}
732
733	/// Returns a parallel iterator over the slice producing non-overlapping mutable
734	/// runs of elements using the predicate to separate them.
735	///
736	/// The predicate is called on two elements following themselves,
737	/// it means the predicate is called on `slice[0]` and `slice[1]`
738	/// then on `slice[1]` and `slice[2]` and so on.
739	///
740	/// # Examples
741	///
742	/// ```
743	/// use rayon::prelude::*;
744	/// let mut xs = [`1`, `2`, `2`, `3`, `3`, `3`];
745	/// let chunks: Vec<_> = xs.par_chunk_by_mut(\|&x, &y\| x == y).collect();
746	/// assert_eq!(chunks[`0`], &mut [`1`]);
747	/// assert_eq!(chunks[`1`], &mut [`2`, `2`]);
748	/// assert_eq!(chunks[`2`], &mut [`3`, `3`, `3`]);
749	/// ```
750	fn par_chunk_by_mut<F>(&mut self, pred: F) -> ChunkByMut<'_, T, F>
751	where
752	F: Fn(&T, &T) -> bool + Send + Sync,
753	{
754	ChunkByMut::new(self.as_parallel_slice_mut(), pred)
755	}
756	}
757
758	impl<T: Send> ParallelSliceMut<T> for [T] {
759	#[inline]
760	fn as_parallel_slice_mut(&mut self) -> &mut [T] {
761	self
762	}
763	}
764
765	impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
766	type Item = &'data T;
767	type Iter = Iter<'data, T>;
768
769	fn into_par_iter(self) -> Self::Iter {
770	Iter { slice: self }
771	}
772	}
773
774	impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
775	type Item = &'data mut T;
776	type Iter = IterMut<'data, T>;
777
778	fn into_par_iter(self) -> Self::Iter {
779	IterMut { slice: self }
780	}
781	}
782
783	/// Parallel iterator over immutable items in a slice
784	#[derive(Debug)]
785	pub struct Iter<'data, T: Sync> {
786	slice: &'data [T],
787	}
788
789	impl<'data, T: Sync> Clone for Iter<'data, T> {
790	fn clone(&self) -> Self {
791	Iter { ..*self }
792	}
793	}
794
795	impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
796	type Item = &'data T;
797
798	fn drive_unindexed<C>(self, consumer: C) -> C::Result
799	where
800	C: UnindexedConsumer<Self::Item>,
801	{
802	bridge(self, consumer)
803	}
804
805	fn opt_len(&self) -> Option<usize> {
806	Some(self.len())
807	}
808	}
809
810	impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
811	fn drive<C>(self, consumer: C) -> C::Result
812	where
813	C: Consumer<Self::Item>,
814	{
815	bridge(self, consumer)
816	}
817
818	fn len(&self) -> usize {
819	self.slice.len()
820	}
821
822	fn with_producer<CB>(self, callback: CB) -> CB::Output
823	where
824	CB: ProducerCallback<Self::Item>,
825	{
826	callback.callback(producer:IterProducer { slice: self.slice })
827	}
828	}
829
830	struct IterProducer<'data, T: Sync> {
831	slice: &'data [T],
832	}
833
834	impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
835	type Item = &'data T;
836	type IntoIter = ::std::slice::Iter<'data, T>;
837
838	fn into_iter(self) -> Self::IntoIter {
839	self.slice.iter()
840	}
841
842	fn split_at(self, index: usize) -> (Self, Self) {
843	let (left: &[T], right: &[T]) = self.slice.split_at(mid:index);
844	(IterProducer { slice: left }, IterProducer { slice: right })
845	}
846	}
847
848	/// Parallel iterator over immutable overlapping windows of a slice
849	#[derive(Debug)]
850	pub struct Windows<'data, T: Sync> {
851	window_size: usize,
852	slice: &'data [T],
853	}
854
855	impl<'data, T: Sync> Clone for Windows<'data, T> {
856	fn clone(&self) -> Self {
857	Windows { ..*self }
858	}
859	}
860
861	impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
862	type Item = &'data [T];
863
864	fn drive_unindexed<C>(self, consumer: C) -> C::Result
865	where
866	C: UnindexedConsumer<Self::Item>,
867	{
868	bridge(self, consumer)
869	}
870
871	fn opt_len(&self) -> Option<usize> {
872	Some(self.len())
873	}
874	}
875
876	impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
877	fn drive<C>(self, consumer: C) -> C::Result
878	where
879	C: Consumer<Self::Item>,
880	{
881	bridge(self, consumer)
882	}
883
884	fn len(&self) -> usize {
885	assert!(self.window_size >= `1`);
886	self.slice.len().saturating_sub(self.window_size - `1`)
887	}
888
889	fn with_producer<CB>(self, callback: CB) -> CB::Output
890	where
891	CB: ProducerCallback<Self::Item>,
892	{
893	callback.callback(producer:WindowsProducer {
894	window_size: self.window_size,
895	slice: self.slice,
896	})
897	}
898	}
899
900	struct WindowsProducer<'data, T: Sync> {
901	window_size: usize,
902	slice: &'data [T],
903	}
904
905	impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
906	type Item = &'data [T];
907	type IntoIter = ::std::slice::Windows<'data, T>;
908
909	fn into_iter(self) -> Self::IntoIter {
910	self.slice.windows(self.window_size)
911	}
912
913	fn split_at(self, index: usize) -> (Self, Self) {
914	let left_index: usize = Ord::min(self.slice.len(), other:index + (self.window_size - `1`));
915	let left: &[T] = &self.slice[..left_index];
916	let right: &[T] = &self.slice[index..];
917	(
918	WindowsProducer {
919	window_size: self.window_size,
920	slice: left,
921	},
922	WindowsProducer {
923	window_size: self.window_size,
924	slice: right,
925	},
926	)
927	}
928	}
929
930	/// Parallel iterator over mutable items in a slice
931	#[derive(Debug)]
932	pub struct IterMut<'data, T: Send> {
933	slice: &'data mut [T],
934	}
935
936	impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
937	type Item = &'data mut T;
938
939	fn drive_unindexed<C>(self, consumer: C) -> C::Result
940	where
941	C: UnindexedConsumer<Self::Item>,
942	{
943	bridge(self, consumer)
944	}
945
946	fn opt_len(&self) -> Option<usize> {
947	Some(self.len())
948	}
949	}
950
951	impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
952	fn drive<C>(self, consumer: C) -> C::Result
953	where
954	C: Consumer<Self::Item>,
955	{
956	bridge(self, consumer)
957	}
958
959	fn len(&self) -> usize {
960	self.slice.len()
961	}
962
963	fn with_producer<CB>(self, callback: CB) -> CB::Output
964	where
965	CB: ProducerCallback<Self::Item>,
966	{
967	callback.callback(producer:IterMutProducer { slice: self.slice })
968	}
969	}
970
971	struct IterMutProducer<'data, T: Send> {
972	slice: &'data mut [T],
973	}
974
975	impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
976	type Item = &'data mut T;
977	type IntoIter = ::std::slice::IterMut<'data, T>;
978
979	fn into_iter(self) -> Self::IntoIter {
980	self.slice.iter_mut()
981	}
982
983	fn split_at(self, index: usize) -> (Self, Self) {
984	let (left: &mut [T], right: &mut [T]) = self.slice.split_at_mut(mid:index);
985	(
986	IterMutProducer { slice: left },
987	IterMutProducer { slice: right },
988	)
989	}
990	}
991
992	/// Parallel iterator over slices separated by a predicate
993	pub struct Split<'data, T, P> {
994	slice: &'data [T],
995	separator: P,
996	}
997
998	impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
999	fn clone(&self) -> Self {
1000	Split {
1001	separator: self.separator.clone(),
1002	..*self
1003	}
1004	}
1005	}
1006
1007	impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
1008	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1009	f.debug_struct("Split").field(name:"slice", &self.slice).finish()
1010	}
1011	}
1012
1013	impl<'data, T, P> ParallelIterator for Split<'data, T, P>
1014	where
1015	P: Fn(&T) -> bool + Sync + Send,
1016	T: Sync,
1017	{
1018	type Item = &'data [T];
1019
1020	fn drive_unindexed<C>(self, consumer: C) -> C::Result
1021	where
1022	C: UnindexedConsumer<Self::Item>,
1023	{
1024	let producer: SplitProducer<'_, P, &'data …> = SplitProducer::new(self.slice, &self.separator);
1025	bridge_unindexed(producer, consumer)
1026	}
1027	}
1028
1029	/// Parallel iterator over slices separated by a predicate,
1030	/// including the matched part as a terminator.
1031	pub struct SplitInclusive<'data, T, P> {
1032	slice: &'data [T],
1033	separator: P,
1034	}
1035
1036	impl<'data, T, P: Clone> Clone for SplitInclusive<'data, T, P> {
1037	fn clone(&self) -> Self {
1038	SplitInclusive {
1039	separator: self.separator.clone(),
1040	..*self
1041	}
1042	}
1043	}
1044
1045	impl<'data, T: Debug, P> Debug for SplitInclusive<'data, T, P> {
1046	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1047	f&mut DebugStruct<'_, '_>.debug_struct("SplitInclusive")
1048	.field(name:"slice", &self.slice)
1049	.finish()
1050	}
1051	}
1052
1053	impl<'data, T, P> ParallelIterator for SplitInclusive<'data, T, P>
1054	where
1055	P: Fn(&T) -> bool + Sync + Send,
1056	T: Sync,
1057	{
1058	type Item = &'data [T];
1059
1060	fn drive_unindexed<C>(self, consumer: C) -> C::Result
1061	where
1062	C: UnindexedConsumer<Self::Item>,
1063	{
1064	let producer: SplitProducer<'_, P, &'data …, true> = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1065	bridge_unindexed(producer, consumer)
1066	}
1067	}
1068
1069	/// Implement support for `SplitProducer`.
1070	impl<'data, T, P> Fissile<P> for &'data [T]
1071	where
1072	P: Fn(&T) -> bool,
1073	{
1074	fn length(&self) -> usize {
1075	self.len()
1076	}
1077
1078	fn midpoint(&self, end: usize) -> usize {
1079	end / `2`
1080	}
1081
1082	fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1083	self[start..end].iter().position(separator)
1084	}
1085
1086	fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1087	self[..end].iter().rposition(separator)
1088	}
1089
1090	fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1091	if INCL {
1092	// include the separator in the left side
1093	self.split_at(index + `1`)
1094	} else {
1095	let (left, right) = self.split_at(index);
1096	(left, &right[`1`..]) // skip the separator
1097	}
1098	}
1099
1100	fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1101	where
1102	F: Folder<Self>,
1103	Self: Send,
1104	{
1105	if INCL {
1106	debug_assert!(!skip_last);
1107	folder.consume_iter(self.split_inclusive(separator))
1108	} else {
1109	let mut split = self.split(separator);
1110	if skip_last {
1111	split.next_back();
1112	}
1113	folder.consume_iter(split)
1114	}
1115	}
1116	}
1117
1118	/// Parallel iterator over mutable slices separated by a predicate
1119	pub struct SplitMut<'data, T, P> {
1120	slice: &'data mut [T],
1121	separator: P,
1122	}
1123
1124	impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
1125	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1126	f&mut DebugStruct<'_, '_>.debug_struct("SplitMut")
1127	.field(name:"slice", &self.slice)
1128	.finish()
1129	}
1130	}
1131
1132	impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
1133	where
1134	P: Fn(&T) -> bool + Sync + Send,
1135	T: Send,
1136	{
1137	type Item = &'data mut [T];
1138
1139	fn drive_unindexed<C>(self, consumer: C) -> C::Result
1140	where
1141	C: UnindexedConsumer<Self::Item>,
1142	{
1143	let producer: SplitProducer<'_, P, &'data mut …> = SplitProducer::new(self.slice, &self.separator);
1144	bridge_unindexed(producer, consumer)
1145	}
1146	}
1147
1148	/// Parallel iterator over mutable slices separated by a predicate,
1149	/// including the matched part as a terminator.
1150	pub struct SplitInclusiveMut<'data, T, P> {
1151	slice: &'data mut [T],
1152	separator: P,
1153	}
1154
1155	impl<'data, T: Debug, P> Debug for SplitInclusiveMut<'data, T, P> {
1156	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1157	f&mut DebugStruct<'_, '_>.debug_struct("SplitInclusiveMut")
1158	.field(name:"slice", &self.slice)
1159	.finish()
1160	}
1161	}
1162
1163	impl<'data, T, P> ParallelIterator for SplitInclusiveMut<'data, T, P>
1164	where
1165	P: Fn(&T) -> bool + Sync + Send,
1166	T: Send,
1167	{
1168	type Item = &'data mut [T];
1169
1170	fn drive_unindexed<C>(self, consumer: C) -> C::Result
1171	where
1172	C: UnindexedConsumer<Self::Item>,
1173	{
1174	let producer: SplitProducer<'_, P, &'data mut …, true> = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1175	bridge_unindexed(producer, consumer)
1176	}
1177	}
1178
1179	/// Implement support for `SplitProducer`.
1180	impl<'data, T, P> Fissile<P> for &'data mut [T]
1181	where
1182	P: Fn(&T) -> bool,
1183	{
1184	fn length(&self) -> usize {
1185	self.len()
1186	}
1187
1188	fn midpoint(&self, end: usize) -> usize {
1189	end / `2`
1190	}
1191
1192	fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1193	self[start..end].iter().position(separator)
1194	}
1195
1196	fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1197	self[..end].iter().rposition(separator)
1198	}
1199
1200	fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1201	if INCL {
1202	// include the separator in the left side
1203	self.split_at_mut(index + `1`)
1204	} else {
1205	let (left, right) = self.split_at_mut(index);
1206	(left, &mut right[`1`..]) // skip the separator
1207	}
1208	}
1209
1210	fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1211	where
1212	F: Folder<Self>,
1213	Self: Send,
1214	{
1215	if INCL {
1216	debug_assert!(!skip_last);
1217	folder.consume_iter(self.split_inclusive_mut(separator))
1218	} else {
1219	let mut split = self.split_mut(separator);
1220	if skip_last {
1221	split.next_back();
1222	}
1223	folder.consume_iter(split)
1224	}
1225	}
1226	}
1227