1 | use alloc::vec::Vec; |
2 | use core::cmp::Ordering; |
3 | |
4 | /// Consumes a given iterator, returning the minimum elements in **ascending** order. |
5 | pub(crate) fn k_smallest_general<I, F>(iter: I, k: usize, mut comparator: F) -> Vec<I::Item> |
6 | where |
7 | I: Iterator, |
8 | F: FnMut(&I::Item, &I::Item) -> Ordering, |
9 | { |
10 | /// Sift the element currently at `origin` away from the root until it is properly ordered. |
11 | /// |
12 | /// This will leave **larger** elements closer to the root of the heap. |
13 | fn sift_down<T, F>(heap: &mut [T], is_less_than: &mut F, mut origin: usize) |
14 | where |
15 | F: FnMut(&T, &T) -> bool, |
16 | { |
17 | #[inline ] |
18 | fn children_of(n: usize) -> (usize, usize) { |
19 | (2 * n + 1, 2 * n + 2) |
20 | } |
21 | |
22 | while origin < heap.len() { |
23 | let (left_idx, right_idx) = children_of(origin); |
24 | if left_idx >= heap.len() { |
25 | return; |
26 | } |
27 | |
28 | let replacement_idx = |
29 | if right_idx < heap.len() && is_less_than(&heap[left_idx], &heap[right_idx]) { |
30 | right_idx |
31 | } else { |
32 | left_idx |
33 | }; |
34 | |
35 | if is_less_than(&heap[origin], &heap[replacement_idx]) { |
36 | heap.swap(origin, replacement_idx); |
37 | origin = replacement_idx; |
38 | } else { |
39 | return; |
40 | } |
41 | } |
42 | } |
43 | |
44 | if k == 0 { |
45 | iter.last(); |
46 | return Vec::new(); |
47 | } |
48 | if k == 1 { |
49 | return iter.min_by(comparator).into_iter().collect(); |
50 | } |
51 | let mut iter = iter.fuse(); |
52 | let mut storage: Vec<I::Item> = iter.by_ref().take(k).collect(); |
53 | |
54 | let mut is_less_than = move |a: &_, b: &_| comparator(a, b) == Ordering::Less; |
55 | |
56 | // Rearrange the storage into a valid heap by reordering from the second-bottom-most layer up to the root. |
57 | // Slightly faster than ordering on each insert, but only by a factor of lg(k). |
58 | // The resulting heap has the **largest** item on top. |
59 | for i in (0..=(storage.len() / 2)).rev() { |
60 | sift_down(&mut storage, &mut is_less_than, i); |
61 | } |
62 | |
63 | iter.for_each(|val| { |
64 | debug_assert_eq!(storage.len(), k); |
65 | if is_less_than(&val, &storage[0]) { |
66 | // Treating this as an push-and-pop saves having to write a sift-up implementation. |
67 | // https://en.wikipedia.org/wiki/Binary_heap#Insert_then_extract |
68 | storage[0] = val; |
69 | // We retain the smallest items we've seen so far, but ordered largest first so we can drop the largest efficiently. |
70 | sift_down(&mut storage, &mut is_less_than, 0); |
71 | } |
72 | }); |
73 | |
74 | // Ultimately the items need to be in least-first, strict order, but the heap is currently largest-first. |
75 | // To achieve this, repeatedly, |
76 | // 1) "pop" the largest item off the heap into the tail slot of the underlying storage, |
77 | // 2) shrink the logical size of the heap by 1, |
78 | // 3) restore the heap property over the remaining items. |
79 | let mut heap = &mut storage[..]; |
80 | while heap.len() > 1 { |
81 | let last_idx = heap.len() - 1; |
82 | heap.swap(0, last_idx); |
83 | // Sifting over a truncated slice means that the sifting will not disturb already popped elements. |
84 | heap = &mut heap[..last_idx]; |
85 | sift_down(heap, &mut is_less_than, 0); |
86 | } |
87 | |
88 | storage |
89 | } |
90 | |
91 | #[inline ] |
92 | pub(crate) fn key_to_cmp<T, K, F>(mut key: F) -> impl FnMut(&T, &T) -> Ordering |
93 | where |
94 | F: FnMut(&T) -> K, |
95 | K: Ord, |
96 | { |
97 | move |a: &T, b: &T| key(a).cmp(&key(b)) |
98 | } |
99 | |