mod.rs source code [crates/rayon/src/slice/mod.rs]

1	//! Parallel iterator types for [slices][std::slice]
2	//!
3	//! You will rarely need to interact with this module directly unless you need
4	//! to name one of the iterator types.
5	//!
6	//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
7
8	mod chunks;
9	mod mergesort;
10	mod quicksort;
11	mod rchunks;
12
13	mod test;
14
15	use self::mergesort::par_mergesort;
16	use self::quicksort::par_quicksort;
17	use crate::iter::plumbing::*;
18	use crate::iter::*;
19	use crate::split_producer::*;
20	use std::cmp;
21	use std::cmp::Ordering;
22	use std::fmt::{self, Debug};
23	use std::mem;
24
25	pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
26	pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
27
28	/// Parallel extensions for slices.
29	pub trait ParallelSlice<T: Sync> {
30	/// Returns a plain slice, which is used to implement the rest of the
31	/// parallel methods.
32	fn as_parallel_slice(&self) -> &[T];
33
34	/// Returns a parallel iterator over subslices separated by elements that
35	/// match the separator.
36	///
37	/// # Examples
38	///
39	/// ```
40	/// use rayon::prelude::*;
41	/// let smallest = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`]
42	/// .par_split(\|i\| *i == `0`)
43	/// .map(\|numbers\| numbers.iter().min().unwrap())
44	/// .min();
45	/// assert_eq!(Some(&`1`), smallest);
46	/// ```
47	fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
48	where
49	P: Fn(&T) -> bool + Sync + Send,
50	{
51	Split {
52	slice: self.as_parallel_slice(),
53	separator,
54	}
55	}
56
57	/// Returns a parallel iterator over all contiguous windows of length
58	/// `window_size`. The windows overlap.
59	///
60	/// # Examples
61	///
62	/// ```
63	/// use rayon::prelude::*;
64	/// let windows: Vec<_> = [`1`, `2`, `3`].par_windows(`2`).collect();
65	/// assert_eq!(vec![[`1`, `2`], [`2`, `3`]], windows);
66	/// ```
67	fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
68	Windows {
69	window_size,
70	slice: self.as_parallel_slice(),
71	}
72	}
73
74	/// Returns a parallel iterator over at most `chunk_size` elements of
75	/// `self` at a time. The chunks do not overlap.
76	///
77	/// If the number of elements in the iterator is not divisible by
78	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
79	/// other chunks will have that exact length.
80	///
81	/// # Examples
82	///
83	/// ```
84	/// use rayon::prelude::*;
85	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_chunks(`2`).collect();
86	/// assert_eq!(chunks, vec![&[`1`, `2`][..], &[`3`, `4`], &[`5`]]);
87	/// ```
88	#[track_caller]
89	fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
90	assert!(chunk_size != `0`, "chunk_size must not be zero");
91	Chunks::new(chunk_size, self.as_parallel_slice())
92	}
93
94	/// Returns a parallel iterator over `chunk_size` elements of
95	/// `self` at a time. The chunks do not overlap.
96	///
97	/// If `chunk_size` does not divide the length of the slice, then the
98	/// last up to `chunk_size-1` elements will be omitted and can be
99	/// retrieved from the remainder function of the iterator.
100	///
101	/// # Examples
102	///
103	/// ```
104	/// use rayon::prelude::*;
105	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_chunks_exact(`2`).collect();
106	/// assert_eq!(chunks, vec![&[`1`, `2`][..], &[`3`, `4`]]);
107	/// ```
108	#[track_caller]
109	fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
110	assert!(chunk_size != `0`, "chunk_size must not be zero");
111	ChunksExact::new(chunk_size, self.as_parallel_slice())
112	}
113
114	/// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
115	/// starting at the end. The chunks do not overlap.
116	///
117	/// If the number of elements in the iterator is not divisible by
118	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
119	/// other chunks will have that exact length.
120	///
121	/// # Examples
122	///
123	/// ```
124	/// use rayon::prelude::*;
125	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_rchunks(`2`).collect();
126	/// assert_eq!(chunks, vec![&[`4`, `5`][..], &[`2`, `3`], &[`1`]]);
127	/// ```
128	#[track_caller]
129	fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
130	assert!(chunk_size != `0`, "chunk_size must not be zero");
131	RChunks::new(chunk_size, self.as_parallel_slice())
132	}
133
134	/// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
135	/// starting at the end. The chunks do not overlap.
136	///
137	/// If `chunk_size` does not divide the length of the slice, then the
138	/// last up to `chunk_size-1` elements will be omitted and can be
139	/// retrieved from the remainder function of the iterator.
140	///
141	/// # Examples
142	///
143	/// ```
144	/// use rayon::prelude::*;
145	/// let chunks: Vec<_> = [`1`, `2`, `3`, `4`, `5`].par_rchunks_exact(`2`).collect();
146	/// assert_eq!(chunks, vec![&[`4`, `5`][..], &[`2`, `3`]]);
147	/// ```
148	#[track_caller]
149	fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
150	assert!(chunk_size != `0`, "chunk_size must not be zero");
151	RChunksExact::new(chunk_size, self.as_parallel_slice())
152	}
153	}
154
155	impl<T: Sync> ParallelSlice<T> for [T] {
156	#[inline]
157	fn as_parallel_slice(&self) -> &[T] {
158	self
159	}
160	}
161
162	/// Parallel extensions for mutable slices.
163	pub trait ParallelSliceMut<T: Send> {
164	/// Returns a plain mutable slice, which is used to implement the rest of
165	/// the parallel methods.
166	fn as_parallel_slice_mut(&mut self) -> &mut [T];
167
168	/// Returns a parallel iterator over mutable subslices separated by
169	/// elements that match the separator.
170	///
171	/// # Examples
172	///
173	/// ```
174	/// use rayon::prelude::*;
175	/// let mut array = [`1`, `2`, `3`, `0`, `2`, `4`, `8`, `0`, `3`, `6`, `9`];
176	/// array.par_split_mut(\|i\| *i == `0`)
177	/// .for_each(\|slice\| slice.reverse());
178	/// assert_eq!(array, [`3`, `2`, `1`, `0`, `8`, `4`, `2`, `0`, `9`, `6`, `3`]);
179	/// ```
180	fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
181	where
182	P: Fn(&T) -> bool + Sync + Send,
183	{
184	SplitMut {
185	slice: self.as_parallel_slice_mut(),
186	separator,
187	}
188	}
189
190	/// Returns a parallel iterator over at most `chunk_size` elements of
191	/// `self` at a time. The chunks are mutable and do not overlap.
192	///
193	/// If the number of elements in the iterator is not divisible by
194	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
195	/// other chunks will have that exact length.
196	///
197	/// # Examples
198	///
199	/// ```
200	/// use rayon::prelude::*;
201	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
202	/// array.par_chunks_mut(`2`)
203	/// .for_each(\|slice\| slice.reverse());
204	/// assert_eq!(array, [`2`, `1`, `4`, `3`, `5`]);
205	/// ```
206	#[track_caller]
207	fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
208	assert!(chunk_size != `0`, "chunk_size must not be zero");
209	ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
210	}
211
212	/// Returns a parallel iterator over `chunk_size` elements of
213	/// `self` at a time. The chunks are mutable and do not overlap.
214	///
215	/// If `chunk_size` does not divide the length of the slice, then the
216	/// last up to `chunk_size-1` elements will be omitted and can be
217	/// retrieved from the remainder function of the iterator.
218	///
219	/// # Examples
220	///
221	/// ```
222	/// use rayon::prelude::*;
223	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
224	/// array.par_chunks_exact_mut(`3`)
225	/// .for_each(\|slice\| slice.reverse());
226	/// assert_eq!(array, [`3`, `2`, `1`, `4`, `5`]);
227	/// ```
228	#[track_caller]
229	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
230	assert!(chunk_size != `0`, "chunk_size must not be zero");
231	ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
232	}
233
234	/// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
235	/// starting at the end. The chunks are mutable and do not overlap.
236	///
237	/// If the number of elements in the iterator is not divisible by
238	/// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
239	/// other chunks will have that exact length.
240	///
241	/// # Examples
242	///
243	/// ```
244	/// use rayon::prelude::*;
245	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
246	/// array.par_rchunks_mut(`2`)
247	/// .for_each(\|slice\| slice.reverse());
248	/// assert_eq!(array, [`1`, `3`, `2`, `5`, `4`]);
249	/// ```
250	#[track_caller]
251	fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
252	assert!(chunk_size != `0`, "chunk_size must not be zero");
253	RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
254	}
255
256	/// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
257	/// starting at the end. The chunks are mutable and do not overlap.
258	///
259	/// If `chunk_size` does not divide the length of the slice, then the
260	/// last up to `chunk_size-1` elements will be omitted and can be
261	/// retrieved from the remainder function of the iterator.
262	///
263	/// # Examples
264	///
265	/// ```
266	/// use rayon::prelude::*;
267	/// let mut array = [`1`, `2`, `3`, `4`, `5`];
268	/// array.par_rchunks_exact_mut(`3`)
269	/// .for_each(\|slice\| slice.reverse());
270	/// assert_eq!(array, [`1`, `2`, `5`, `4`, `3`]);
271	/// ```
272	#[track_caller]
273	fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
274	assert!(chunk_size != `0`, "chunk_size must not be zero");
275	RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
276	}
277
278	/// Sorts the slice in parallel.
279	///
280	/// This sort is stable (i.e., does not reorder equal elements) and O(n* \* log(n)) worst-case.*
281	///
282	/// When applicable, unstable sorting is preferred because it is generally faster than stable
283	/// sorting and it doesn't allocate auxiliary memory.
284	/// See [`par_sort_unstable`](#method.par_sort_unstable).
285	///
286	/// # Current implementation
287	///
288	/// The current algorithm is an adaptive merge sort inspired by
289	/// [timsort](https://en.wikipedia.org/wiki/Timsort).
290	/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
291	/// two or more sorted sequences concatenated one after another.
292	///
293	/// Also, it allocates temporary storage the same size as `self`, but for very short slices a
294	/// non-allocating insertion sort is used instead.
295	///
296	/// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
297	/// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
298	/// or descending runs are concatenated. Finally, the remaining chunks are merged together using
299	/// parallel subdivision of chunks and parallel merge operation.
300	///
301	/// # Examples
302	///
303	/// ```
304	/// use rayon::prelude::*;
305	///
306	/// let mut v = [`-5`, `4`, `1`, `-3`, `2`];
307	///
308	/// v.par_sort();
309	/// assert_eq!(v, [-`5`, -`3`, `1`, `2`, `4`]);
310	/// ```
311	fn par_sort(&mut self)
312	where
313	T: Ord,
314	{
315	par_mergesort(self.as_parallel_slice_mut(), T::lt);
316	}
317
318	/// Sorts the slice in parallel with a comparator function.
319	///
320	/// This sort is stable (i.e., does not reorder equal elements) and O(n* \* log(n)) worst-case.*
321	///
322	/// The comparator function must define a total ordering for the elements in the slice. If
323	/// the ordering is not total, the order of the elements is unspecified. An order is a
324	/// total order if it is (for all `a`, `b` and `c`):
325	///
326	/// total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and*
327	/// transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.*
328	///
329	/// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
330	/// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
331	///
332	/// ```
333	/// use rayon::prelude::*;
334	///
335	/// let mut floats = [`5f64`, `4.0`, `1.0`, `3.0`, `2.0`];
336	/// floats.par_sort_by(\|a, b\| a.partial_cmp(b).unwrap());
337	/// assert_eq!(floats, [`1.0`, `2.0`, `3.0`, `4.0`, `5.0`]);
338	/// ```
339	///
340	/// When applicable, unstable sorting is preferred because it is generally faster than stable
341	/// sorting and it doesn't allocate auxiliary memory.
342	/// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
343	///
344	/// # Current implementation
345	///
346	/// The current algorithm is an adaptive merge sort inspired by
347	/// [timsort](https://en.wikipedia.org/wiki/Timsort).
348	/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
349	/// two or more sorted sequences concatenated one after another.
350	///
351	/// Also, it allocates temporary storage the same size as `self`, but for very short slices a
352	/// non-allocating insertion sort is used instead.
353	///
354	/// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
355	/// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
356	/// or descending runs are concatenated. Finally, the remaining chunks are merged together using
357	/// parallel subdivision of chunks and parallel merge operation.
358	///
359	/// # Examples
360	///
361	/// ```
362	/// use rayon::prelude::*;
363	///
364	/// let mut v = [`5`, `4`, `1`, `3`, `2`];
365	/// v.par_sort_by(\|a, b\| a.cmp(b));
366	/// assert_eq!(v, [`1`, `2`, `3`, `4`, `5`]);
367	///
368	/// // reverse sorting
369	/// v.par_sort_by(\|a, b\| b.cmp(a));
370	/// assert_eq!(v, [`5`, `4`, `3`, `2`, `1`]);
371	/// ```
372	fn par_sort_by<F>(&mut self, compare: F)
373	where
374	F: Fn(&T, &T) -> Ordering + Sync,
375	{
376	par_mergesort(self.as_parallel_slice_mut(), \|a, b\| {
377	compare(a, b) == Ordering::Less
378	});
379	}
380
381	/// Sorts the slice in parallel with a key extraction function.
382	///
383	/// This sort is stable (i.e., does not reorder equal elements) and O(m* \* n \* log(n))*
384	/// worst-case, where the key function is O(m).
385	///
386	/// For expensive key functions (e.g. functions that are not simple property accesses or
387	/// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
388	/// be significantly faster, as it does not recompute element keys.
389	///
390	/// When applicable, unstable sorting is preferred because it is generally faster than stable
391	/// sorting and it doesn't allocate auxiliary memory.
392	/// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
393	///
394	/// # Current implementation
395	///
396	/// The current algorithm is an adaptive merge sort inspired by
397	/// [timsort](https://en.wikipedia.org/wiki/Timsort).
398	/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
399	/// two or more sorted sequences concatenated one after another.
400	///
401	/// Also, it allocates temporary storage the same size as `self`, but for very short slices a
402	/// non-allocating insertion sort is used instead.
403	///
404	/// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
405	/// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
406	/// or descending runs are concatenated. Finally, the remaining chunks are merged together using
407	/// parallel subdivision of chunks and parallel merge operation.
408	///
409	/// # Examples
410	///
411	/// ```
412	/// use rayon::prelude::*;
413	///
414	/// let mut v = [`-5i32`, `4`, `1`, `-3`, `2`];
415	///
416	/// v.par_sort_by_key(\|k\| k.abs());
417	/// assert_eq!(v, [`1`, `2`, -`3`, `4`, -`5`]);
418	/// ```
419	fn par_sort_by_key<K, F>(&mut self, f: F)
420	where
421	K: Ord,
422	F: Fn(&T) -> K + Sync,
423	{
424	par_mergesort(self.as_parallel_slice_mut(), \|a, b\| f(a).lt(&f(b)));
425	}
426
427	/// Sorts the slice in parallel with a key extraction function.
428	///
429	/// During sorting, the key function is called at most once per element, by using
430	/// temporary storage to remember the results of key evaluation.
431	/// The key function is called in parallel, so the order of calls is completely unspecified.
432	///
433	/// This sort is stable (i.e., does not reorder equal elements) and O(m* \* n + n \* log(n))*
434	/// worst-case, where the key function is O(m).
435	///
436	/// For simple key functions (e.g., functions that are property accesses or
437	/// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
438	/// faster.
439	///
440	/// # Current implementation
441	///
442	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
443	/// which combines the fast average case of randomized quicksort with the fast worst case of
444	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
445	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
446	/// deterministic behavior.
447	///
448	/// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
449	/// length of the slice.
450	///
451	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
452	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
453	/// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
454	///
455	/// [pdqsort]: https://github.com/orlp/pdqsort
456	///
457	/// # Examples
458	///
459	/// ```
460	/// use rayon::prelude::*;
461	///
462	/// let mut v = [`-5i32`, `4`, `32`, `-3`, `2`];
463	///
464	/// v.par_sort_by_cached_key(\|k\| k.to_string());
465	/// assert!(v == [-`3`, -`5`, `2`, `32`, `4`]);
466	/// ```
467	fn par_sort_by_cached_key<K, F>(&mut self, f: F)
468	where
469	F: Fn(&T) -> K + Sync,
470	K: Ord + Send,
471	{
472	let slice = self.as_parallel_slice_mut();
473	let len = slice.len();
474	if len < `2` {
475	return;
476	}
477
478	// Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
479	macro_rules! sort_by_key {
480	($t:ty) => {{
481	let mut indices: Vec<_> = slice
482	.par_iter_mut()
483	.enumerate()
484	.map(\|(i, x)\| (f(&*x), i as $t))
485	.collect();
486	// The elements of `indices` are unique, as they are indexed, so any sort will be
487	// stable with respect to the original slice. We use `sort_unstable` here because
488	// it requires less memory allocation.
489	indices.par_sort_unstable();
490	for i in `0`..len {
491	let mut index = indices[i].`1`;
492	while (index as usize) < i {
493	index = indices[index as usize].`1`;
494	}
495	indices[i].`1` = index;
496	slice.swap(i, index as usize);
497	}
498	}};
499	}
500
501	let sz_u8 = mem::size_of::<(K, u8)>();
502	let sz_u16 = mem::size_of::<(K, u16)>();
503	let sz_u32 = mem::size_of::<(K, u32)>();
504	let sz_usize = mem::size_of::<(K, usize)>();
505
506	if sz_u8 < sz_u16 && len <= (std::u8::MAX as usize) {
507	return sort_by_key!(u8);
508	}
509	if sz_u16 < sz_u32 && len <= (std::u16::MAX as usize) {
510	return sort_by_key!(u16);
511	}
512	if sz_u32 < sz_usize && len <= (std::u32::MAX as usize) {
513	return sort_by_key!(u32);
514	}
515	sort_by_key!(usize)
516	}
517
518	/// Sorts the slice in parallel, but might not preserve the order of equal elements.
519	///
520	/// This sort is unstable (i.e., may reorder equal elements), in-place
521	/// (i.e., does not allocate), and O(n* \* log(n)) worst-case.*
522	///
523	/// # Current implementation
524	///
525	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
526	/// which combines the fast average case of randomized quicksort with the fast worst case of
527	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
528	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
529	/// deterministic behavior.
530	///
531	/// It is typically faster than stable sorting, except in a few special cases, e.g., when the
532	/// slice consists of several concatenated sorted sequences.
533	///
534	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
535	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
536	/// parallel.
537	///
538	/// [pdqsort]: https://github.com/orlp/pdqsort
539	///
540	/// # Examples
541	///
542	/// ```
543	/// use rayon::prelude::*;
544	///
545	/// let mut v = [`-5`, `4`, `1`, `-3`, `2`];
546	///
547	/// v.par_sort_unstable();
548	/// assert_eq!(v, [-`5`, -`3`, `1`, `2`, `4`]);
549	/// ```
550	fn par_sort_unstable(&mut self)
551	where
552	T: Ord,
553	{
554	par_quicksort(self.as_parallel_slice_mut(), T::lt);
555	}
556
557	/// Sorts the slice in parallel with a comparator function, but might not preserve the order of
558	/// equal elements.
559	///
560	/// This sort is unstable (i.e., may reorder equal elements), in-place
561	/// (i.e., does not allocate), and O(n* \* log(n)) worst-case.*
562	///
563	/// The comparator function must define a total ordering for the elements in the slice. If
564	/// the ordering is not total, the order of the elements is unspecified. An order is a
565	/// total order if it is (for all `a`, `b` and `c`):
566	///
567	/// total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and*
568	/// transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.*
569	///
570	/// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
571	/// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
572	///
573	/// ```
574	/// use rayon::prelude::*;
575	///
576	/// let mut floats = [`5f64`, `4.0`, `1.0`, `3.0`, `2.0`];
577	/// floats.par_sort_unstable_by(\|a, b\| a.partial_cmp(b).unwrap());
578	/// assert_eq!(floats, [`1.0`, `2.0`, `3.0`, `4.0`, `5.0`]);
579	/// ```
580	///
581	/// # Current implementation
582	///
583	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
584	/// which combines the fast average case of randomized quicksort with the fast worst case of
585	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
586	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
587	/// deterministic behavior.
588	///
589	/// It is typically faster than stable sorting, except in a few special cases, e.g., when the
590	/// slice consists of several concatenated sorted sequences.
591	///
592	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
593	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
594	/// parallel.
595	///
596	/// [pdqsort]: https://github.com/orlp/pdqsort
597	///
598	/// # Examples
599	///
600	/// ```
601	/// use rayon::prelude::*;
602	///
603	/// let mut v = [`5`, `4`, `1`, `3`, `2`];
604	/// v.par_sort_unstable_by(\|a, b\| a.cmp(b));
605	/// assert_eq!(v, [`1`, `2`, `3`, `4`, `5`]);
606	///
607	/// // reverse sorting
608	/// v.par_sort_unstable_by(\|a, b\| b.cmp(a));
609	/// assert_eq!(v, [`5`, `4`, `3`, `2`, `1`]);
610	/// ```
611	fn par_sort_unstable_by<F>(&mut self, compare: F)
612	where
613	F: Fn(&T, &T) -> Ordering + Sync,
614	{
615	par_quicksort(self.as_parallel_slice_mut(), \|a, b\| {
616	compare(a, b) == Ordering::Less
617	});
618	}
619
620	/// Sorts the slice in parallel with a key extraction function, but might not preserve the order
621	/// of equal elements.
622	///
623	/// This sort is unstable (i.e., may reorder equal elements), in-place
624	/// (i.e., does not allocate), and O(m \ n \* log(n)) worst-case,*
625	/// where the key function is O(m).
626	///
627	/// # Current implementation
628	///
629	/// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
630	/// which combines the fast average case of randomized quicksort with the fast worst case of
631	/// heapsort, while achieving linear time on slices with certain patterns. It uses some
632	/// randomization to avoid degenerate cases, but with a fixed seed to always provide
633	/// deterministic behavior.
634	///
635	/// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
636	/// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
637	/// is expensive.
638	///
639	/// All quicksorts work in two stages: partitioning into two halves followed by recursive
640	/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
641	/// parallel.
642	///
643	/// [pdqsort]: https://github.com/orlp/pdqsort
644	///
645	/// # Examples
646	///
647	/// ```
648	/// use rayon::prelude::*;
649	///
650	/// let mut v = [`-5i32`, `4`, `1`, `-3`, `2`];
651	///
652	/// v.par_sort_unstable_by_key(\|k\| k.abs());
653	/// assert_eq!(v, [`1`, `2`, -`3`, `4`, -`5`]);
654	/// ```
655	fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
656	where
657	K: Ord,
658	F: Fn(&T) -> K + Sync,
659	{
660	par_quicksort(self.as_parallel_slice_mut(), \|a, b\| f(a).lt(&f(b)));
661	}
662	}
663
664	impl<T: Send> ParallelSliceMut<T> for [T] {
665	#[inline]
666	fn as_parallel_slice_mut(&mut self) -> &mut [T] {
667	self
668	}
669	}
670
671	impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
672	type Item = &'data T;
673	type Iter = Iter<'data, T>;
674
675	fn into_par_iter(self) -> Self::Iter {
676	Iter { slice: self }
677	}
678	}
679
680	impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
681	type Item = &'data mut T;
682	type Iter = IterMut<'data, T>;
683
684	fn into_par_iter(self) -> Self::Iter {
685	IterMut { slice: self }
686	}
687	}
688
689	/// Parallel iterator over immutable items in a slice
690	#[derive(Debug)]
691	pub struct Iter<'data, T: Sync> {
692	slice: &'data [T],
693	}
694
695	impl<'data, T: Sync> Clone for Iter<'data, T> {
696	fn clone(&self) -> Self {
697	Iter { ..*self }
698	}
699	}
700
701	impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
702	type Item = &'data T;
703
704	fn drive_unindexed<C>(self, consumer: C) -> C::Result
705	where
706	C: UnindexedConsumer<Self::Item>,
707	{
708	bridge(self, consumer)
709	}
710
711	fn opt_len(&self) -> Option<usize> {
712	Some(self.len())
713	}
714	}
715
716	impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
717	fn drive<C>(self, consumer: C) -> C::Result
718	where
719	C: Consumer<Self::Item>,
720	{
721	bridge(self, consumer)
722	}
723
724	fn len(&self) -> usize {
725	self.slice.len()
726	}
727
728	fn with_producer<CB>(self, callback: CB) -> CB::Output
729	where
730	CB: ProducerCallback<Self::Item>,
731	{
732	callback.callback(producer:IterProducer { slice: self.slice })
733	}
734	}
735
736	struct IterProducer<'data, T: Sync> {
737	slice: &'data [T],
738	}
739
740	impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
741	type Item = &'data T;
742	type IntoIter = ::std::slice::Iter<'data, T>;
743
744	fn into_iter(self) -> Self::IntoIter {
745	self.slice.iter()
746	}
747
748	fn split_at(self, index: usize) -> (Self, Self) {
749	let (left: &[T], right: &[T]) = self.slice.split_at(mid:index);
750	(IterProducer { slice: left }, IterProducer { slice: right })
751	}
752	}
753
754	/// Parallel iterator over immutable overlapping windows of a slice
755	#[derive(Debug)]
756	pub struct Windows<'data, T: Sync> {
757	window_size: usize,
758	slice: &'data [T],
759	}
760
761	impl<'data, T: Sync> Clone for Windows<'data, T> {
762	fn clone(&self) -> Self {
763	Windows { ..*self }
764	}
765	}
766
767	impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
768	type Item = &'data [T];
769
770	fn drive_unindexed<C>(self, consumer: C) -> C::Result
771	where
772	C: UnindexedConsumer<Self::Item>,
773	{
774	bridge(self, consumer)
775	}
776
777	fn opt_len(&self) -> Option<usize> {
778	Some(self.len())
779	}
780	}
781
782	impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
783	fn drive<C>(self, consumer: C) -> C::Result
784	where
785	C: Consumer<Self::Item>,
786	{
787	bridge(self, consumer)
788	}
789
790	fn len(&self) -> usize {
791	assert!(self.window_size >= `1`);
792	self.slice.len().saturating_sub(self.window_size - `1`)
793	}
794
795	fn with_producer<CB>(self, callback: CB) -> CB::Output
796	where
797	CB: ProducerCallback<Self::Item>,
798	{
799	callback.callback(producer:WindowsProducer {
800	window_size: self.window_size,
801	slice: self.slice,
802	})
803	}
804	}
805
806	struct WindowsProducer<'data, T: Sync> {
807	window_size: usize,
808	slice: &'data [T],
809	}
810
811	impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
812	type Item = &'data [T];
813	type IntoIter = ::std::slice::Windows<'data, T>;
814
815	fn into_iter(self) -> Self::IntoIter {
816	self.slice.windows(self.window_size)
817	}
818
819	fn split_at(self, index: usize) -> (Self, Self) {
820	let left_index: usize = cmp::min(self.slice.len(), v2:index + (self.window_size - `1`));
821	let left: &[T] = &self.slice[..left_index];
822	let right: &[T] = &self.slice[index..];
823	(
824	WindowsProducer {
825	window_size: self.window_size,
826	slice: left,
827	},
828	WindowsProducer {
829	window_size: self.window_size,
830	slice: right,
831	},
832	)
833	}
834	}
835
836	/// Parallel iterator over mutable items in a slice
837	#[derive(Debug)]
838	pub struct IterMut<'data, T: Send> {
839	slice: &'data mut [T],
840	}
841
842	impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
843	type Item = &'data mut T;
844
845	fn drive_unindexed<C>(self, consumer: C) -> C::Result
846	where
847	C: UnindexedConsumer<Self::Item>,
848	{
849	bridge(self, consumer)
850	}
851
852	fn opt_len(&self) -> Option<usize> {
853	Some(self.len())
854	}
855	}
856
857	impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
858	fn drive<C>(self, consumer: C) -> C::Result
859	where
860	C: Consumer<Self::Item>,
861	{
862	bridge(self, consumer)
863	}
864
865	fn len(&self) -> usize {
866	self.slice.len()
867	}
868
869	fn with_producer<CB>(self, callback: CB) -> CB::Output
870	where
871	CB: ProducerCallback<Self::Item>,
872	{
873	callback.callback(producer:IterMutProducer { slice: self.slice })
874	}
875	}
876
877	struct IterMutProducer<'data, T: Send> {
878	slice: &'data mut [T],
879	}
880
881	impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
882	type Item = &'data mut T;
883	type IntoIter = ::std::slice::IterMut<'data, T>;
884
885	fn into_iter(self) -> Self::IntoIter {
886	self.slice.iter_mut()
887	}
888
889	fn split_at(self, index: usize) -> (Self, Self) {
890	let (left: &mut [T], right: &mut [T]) = self.slice.split_at_mut(mid:index);
891	(
892	IterMutProducer { slice: left },
893	IterMutProducer { slice: right },
894	)
895	}
896	}
897
898	/// Parallel iterator over slices separated by a predicate
899	pub struct Split<'data, T, P> {
900	slice: &'data [T],
901	separator: P,
902	}
903
904	impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
905	fn clone(&self) -> Self {
906	Split {
907	separator: self.separator.clone(),
908	..*self
909	}
910	}
911	}
912
913	impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
914	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
915	f.debug_struct("Split").field(name:"slice", &self.slice).finish()
916	}
917	}
918
919	impl<'data, T, P> ParallelIterator for Split<'data, T, P>
920	where
921	P: Fn(&T) -> bool + Sync + Send,
922	T: Sync,
923	{
924	type Item = &'data [T];
925
926	fn drive_unindexed<C>(self, consumer: C) -> C::Result
927	where
928	C: UnindexedConsumer<Self::Item>,
929	{
930	let producer: SplitProducer<'_, P, &[T]> = SplitProducer::new(self.slice, &self.separator);
931	bridge_unindexed(producer, consumer)
932	}
933	}
934
935	/// Implement support for `SplitProducer`.
936	impl<'data, T, P> Fissile<P> for &'data [T]
937	where
938	P: Fn(&T) -> bool,
939	{
940	fn length(&self) -> usize {
941	self.len()
942	}
943
944	fn midpoint(&self, end: usize) -> usize {
945	end / `2`
946	}
947
948	fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
949	self[start..end].iter().position(separator)
950	}
951
952	fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
953	self[..end].iter().rposition(separator)
954	}
955
956	fn split_once(self, index: usize) -> (Self, Self) {
957	let (left, right) = self.split_at(index);
958	(left, &right[`1`..]) // skip the separator
959	}
960
961	fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
962	where
963	F: Folder<Self>,
964	Self: Send,
965	{
966	let mut split = self.split(separator);
967	if skip_last {
968	split.next_back();
969	}
970	folder.consume_iter(split)
971	}
972	}
973
974	/// Parallel iterator over mutable slices separated by a predicate
975	pub struct SplitMut<'data, T, P> {
976	slice: &'data mut [T],
977	separator: P,
978	}
979
980	impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
981	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
982	f&mut DebugStruct<'_, '_>.debug_struct("SplitMut")
983	.field(name:"slice", &self.slice)
984	.finish()
985	}
986	}
987
988	impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
989	where
990	P: Fn(&T) -> bool + Sync + Send,
991	T: Send,
992	{
993	type Item = &'data mut [T];
994
995	fn drive_unindexed<C>(self, consumer: C) -> C::Result
996	where
997	C: UnindexedConsumer<Self::Item>,
998	{
999	let producer: SplitProducer<'_, P, &mut …> = SplitProducer::new(self.slice, &self.separator);
1000	bridge_unindexed(producer, consumer)
1001	}
1002	}
1003
1004	/// Implement support for `SplitProducer`.
1005	impl<'data, T, P> Fissile<P> for &'data mut [T]
1006	where
1007	P: Fn(&T) -> bool,
1008	{
1009	fn length(&self) -> usize {
1010	self.len()
1011	}
1012
1013	fn midpoint(&self, end: usize) -> usize {
1014	end / `2`
1015	}
1016
1017	fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1018	self[start..end].iter().position(separator)
1019	}
1020
1021	fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1022	self[..end].iter().rposition(separator)
1023	}
1024
1025	fn split_once(self, index: usize) -> (Self, Self) {
1026	let (left, right) = self.split_at_mut(index);
1027	(left, &mut right[`1`..]) // skip the separator
1028	}
1029
1030	fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
1031	where
1032	F: Folder<Self>,
1033	Self: Send,
1034	{
1035	let mut split = self.split_mut(separator);
1036	if skip_last {
1037	split.next_back();
1038	}
1039	folder.consume_iter(split)
1040	}
1041	}
1042