1 | use std::hash::Hash; |
2 | |
3 | mod private { |
4 | use std::collections::HashMap; |
5 | use std::hash::Hash; |
6 | use std::fmt; |
7 | |
8 | #[derive(Clone)] |
9 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed" ] |
10 | pub struct DuplicatesBy<I: Iterator, Key, F> { |
11 | pub(crate) iter: I, |
12 | pub(crate) meta: Meta<Key, F>, |
13 | } |
14 | |
15 | impl<I, V, F> fmt::Debug for DuplicatesBy<I, V, F> |
16 | where |
17 | I: Iterator + fmt::Debug, |
18 | V: fmt::Debug + Hash + Eq, |
19 | { |
20 | debug_fmt_fields!(DuplicatesBy, iter, meta.used); |
21 | } |
22 | |
23 | impl<I: Iterator, Key: Eq + Hash, F> DuplicatesBy<I, Key, F> { |
24 | pub(crate) fn new(iter: I, key_method: F) -> Self { |
25 | DuplicatesBy { |
26 | iter, |
27 | meta: Meta { |
28 | used: HashMap::new(), |
29 | pending: 0, |
30 | key_method, |
31 | }, |
32 | } |
33 | } |
34 | } |
35 | |
36 | #[derive(Clone)] |
37 | pub struct Meta<Key, F> { |
38 | used: HashMap<Key, bool>, |
39 | pending: usize, |
40 | key_method: F, |
41 | } |
42 | |
43 | impl<Key, F> Meta<Key, F> |
44 | where |
45 | Key: Eq + Hash, |
46 | { |
47 | /// Takes an item and returns it back to the caller if it's the second time we see it. |
48 | /// Otherwise the item is consumed and None is returned |
49 | #[inline (always)] |
50 | fn filter<I>(&mut self, item: I) -> Option<I> |
51 | where |
52 | F: KeyMethod<Key, I>, |
53 | { |
54 | let kv = self.key_method.make(item); |
55 | match self.used.get_mut(kv.key_ref()) { |
56 | None => { |
57 | self.used.insert(kv.key(), false); |
58 | self.pending += 1; |
59 | None |
60 | } |
61 | Some(true) => None, |
62 | Some(produced) => { |
63 | *produced = true; |
64 | self.pending -= 1; |
65 | Some(kv.value()) |
66 | } |
67 | } |
68 | } |
69 | } |
70 | |
71 | impl<I, Key, F> Iterator for DuplicatesBy<I, Key, F> |
72 | where |
73 | I: Iterator, |
74 | Key: Eq + Hash, |
75 | F: KeyMethod<Key, I::Item>, |
76 | { |
77 | type Item = I::Item; |
78 | |
79 | fn next(&mut self) -> Option<Self::Item> { |
80 | let DuplicatesBy { iter, meta } = self; |
81 | iter.find_map(|v| meta.filter(v)) |
82 | } |
83 | |
84 | #[inline ] |
85 | fn size_hint(&self) -> (usize, Option<usize>) { |
86 | let (_, hi) = self.iter.size_hint(); |
87 | let hi = hi.map(|hi| { |
88 | if hi <= self.meta.pending { |
89 | // fewer or equally many iter-remaining elements than pending elements |
90 | // => at most, each iter-remaining element is matched |
91 | hi |
92 | } else { |
93 | // fewer pending elements than iter-remaining elements |
94 | // => at most: |
95 | // * each pending element is matched |
96 | // * the other iter-remaining elements come in pairs |
97 | self.meta.pending + (hi - self.meta.pending) / 2 |
98 | } |
99 | }); |
100 | // The lower bound is always 0 since we might only get unique items from now on |
101 | (0, hi) |
102 | } |
103 | } |
104 | |
105 | impl<I, Key, F> DoubleEndedIterator for DuplicatesBy<I, Key, F> |
106 | where |
107 | I: DoubleEndedIterator, |
108 | Key: Eq + Hash, |
109 | F: KeyMethod<Key, I::Item>, |
110 | { |
111 | fn next_back(&mut self) -> Option<Self::Item> { |
112 | let DuplicatesBy { iter, meta } = self; |
113 | iter.rev().find_map(|v| meta.filter(v)) |
114 | } |
115 | } |
116 | |
117 | /// A keying method for use with `DuplicatesBy` |
118 | pub trait KeyMethod<K, V> { |
119 | type Container: KeyXorValue<K, V>; |
120 | |
121 | fn make(&mut self, value: V) -> Self::Container; |
122 | } |
123 | |
124 | /// Apply the identity function to elements before checking them for equality. |
125 | #[derive(Debug)] |
126 | pub struct ById; |
127 | impl<V> KeyMethod<V, V> for ById { |
128 | type Container = JustValue<V>; |
129 | |
130 | fn make(&mut self, v: V) -> Self::Container { |
131 | JustValue(v) |
132 | } |
133 | } |
134 | |
135 | /// Apply a user-supplied function to elements before checking them for equality. |
136 | pub struct ByFn<F>(pub(crate) F); |
137 | impl<F> fmt::Debug for ByFn<F> { |
138 | debug_fmt_fields!(ByFn,); |
139 | } |
140 | impl<K, V, F> KeyMethod<K, V> for ByFn<F> |
141 | where |
142 | F: FnMut(&V) -> K, |
143 | { |
144 | type Container = KeyValue<K, V>; |
145 | |
146 | fn make(&mut self, v: V) -> Self::Container { |
147 | KeyValue((self.0)(&v), v) |
148 | } |
149 | } |
150 | |
151 | // Implementors of this trait can hold onto a key and a value but only give access to one of them |
152 | // at a time. This allows the key and the value to be the same value internally |
153 | pub trait KeyXorValue<K, V> { |
154 | fn key_ref(&self) -> &K; |
155 | fn key(self) -> K; |
156 | fn value(self) -> V; |
157 | } |
158 | |
159 | #[derive(Debug)] |
160 | pub struct KeyValue<K, V>(K, V); |
161 | impl<K, V> KeyXorValue<K, V> for KeyValue<K, V> { |
162 | fn key_ref(&self) -> &K { |
163 | &self.0 |
164 | } |
165 | fn key(self) -> K { |
166 | self.0 |
167 | } |
168 | fn value(self) -> V { |
169 | self.1 |
170 | } |
171 | } |
172 | |
173 | #[derive(Debug)] |
174 | pub struct JustValue<V>(V); |
175 | impl<V> KeyXorValue<V, V> for JustValue<V> { |
176 | fn key_ref(&self) -> &V { |
177 | &self.0 |
178 | } |
179 | fn key(self) -> V { |
180 | self.0 |
181 | } |
182 | fn value(self) -> V { |
183 | self.0 |
184 | } |
185 | } |
186 | } |
187 | |
188 | /// An iterator adapter to filter for duplicate elements. |
189 | /// |
190 | /// See [`.duplicates_by()`](crate::Itertools::duplicates_by) for more information. |
191 | pub type DuplicatesBy<I, V, F> = private::DuplicatesBy<I, V, private::ByFn<F>>; |
192 | |
193 | /// Create a new `DuplicatesBy` iterator. |
194 | pub fn duplicates_by<I, Key, F>(iter: I, f: F) -> DuplicatesBy<I, Key, F> |
195 | where |
196 | Key: Eq + Hash, |
197 | F: FnMut(&I::Item) -> Key, |
198 | I: Iterator, |
199 | { |
200 | DuplicatesBy::new(iter, private::ByFn(f)) |
201 | } |
202 | |
203 | /// An iterator adapter to filter out duplicate elements. |
204 | /// |
205 | /// See [`.duplicates()`](crate::Itertools::duplicates) for more information. |
206 | pub type Duplicates<I> = private::DuplicatesBy<I, <I as Iterator>::Item, private::ById>; |
207 | |
208 | /// Create a new `Duplicates` iterator. |
209 | pub fn duplicates<I>(iter: I) -> Duplicates<I> |
210 | where |
211 | I: Iterator, |
212 | I::Item: Eq + Hash, |
213 | { |
214 | Duplicates::new(iter, private::ById) |
215 | } |
216 | |
217 | |