1 | use std::cell::{Cell, RefCell}; |
2 | use alloc::vec::{self, Vec}; |
3 | |
4 | /// A trait to unify `FnMut` for `GroupBy` with the chunk key in `IntoChunks` |
5 | trait KeyFunction<A> { |
6 | type Key; |
7 | fn call_mut(&mut self, arg: A) -> Self::Key; |
8 | } |
9 | |
10 | impl<A, K, F: ?Sized> KeyFunction<A> for F |
11 | where F: FnMut(A) -> K |
12 | { |
13 | type Key = K; |
14 | #[inline ] |
15 | fn call_mut(&mut self, arg: A) -> Self::Key { |
16 | (*self)(arg) |
17 | } |
18 | } |
19 | |
20 | |
21 | /// `ChunkIndex` acts like the grouping key function for `IntoChunks` |
22 | #[derive(Debug)] |
23 | struct ChunkIndex { |
24 | size: usize, |
25 | index: usize, |
26 | key: usize, |
27 | } |
28 | |
29 | impl ChunkIndex { |
30 | #[inline (always)] |
31 | fn new(size: usize) -> Self { |
32 | ChunkIndex { |
33 | size, |
34 | index: 0, |
35 | key: 0, |
36 | } |
37 | } |
38 | } |
39 | |
40 | impl<A> KeyFunction<A> for ChunkIndex { |
41 | type Key = usize; |
42 | #[inline (always)] |
43 | fn call_mut(&mut self, _arg: A) -> Self::Key { |
44 | if self.index == self.size { |
45 | self.key += 1; |
46 | self.index = 0; |
47 | } |
48 | self.index += 1; |
49 | self.key |
50 | } |
51 | } |
52 | |
53 | |
54 | struct GroupInner<K, I, F> |
55 | where I: Iterator |
56 | { |
57 | key: F, |
58 | iter: I, |
59 | current_key: Option<K>, |
60 | current_elt: Option<I::Item>, |
61 | /// flag set if iterator is exhausted |
62 | done: bool, |
63 | /// Index of group we are currently buffering or visiting |
64 | top_group: usize, |
65 | /// Least index for which we still have elements buffered |
66 | oldest_buffered_group: usize, |
67 | /// Group index for `buffer[0]` -- the slots |
68 | /// bottom_group..oldest_buffered_group are unused and will be erased when |
69 | /// that range is large enough. |
70 | bottom_group: usize, |
71 | /// Buffered groups, from `bottom_group` (index 0) to `top_group`. |
72 | buffer: Vec<vec::IntoIter<I::Item>>, |
73 | /// index of last group iter that was dropped, usize::MAX == none |
74 | dropped_group: usize, |
75 | } |
76 | |
77 | impl<K, I, F> GroupInner<K, I, F> |
78 | where I: Iterator, |
79 | F: for<'a> KeyFunction<&'a I::Item, Key=K>, |
80 | K: PartialEq, |
81 | { |
82 | /// `client`: Index of group that requests next element |
83 | #[inline (always)] |
84 | fn step(&mut self, client: usize) -> Option<I::Item> { |
85 | /* |
86 | println!("client={}, bottom_group={}, oldest_buffered_group={}, top_group={}, buffers=[{}]", |
87 | client, self.bottom_group, self.oldest_buffered_group, |
88 | self.top_group, |
89 | self.buffer.iter().map(|elt| elt.len()).format(", ")); |
90 | */ |
91 | if client < self.oldest_buffered_group { |
92 | None |
93 | } else if client < self.top_group || |
94 | (client == self.top_group && |
95 | self.buffer.len() > self.top_group - self.bottom_group) |
96 | { |
97 | self.lookup_buffer(client) |
98 | } else if self.done { |
99 | None |
100 | } else if self.top_group == client { |
101 | self.step_current() |
102 | } else { |
103 | self.step_buffering(client) |
104 | } |
105 | } |
106 | |
107 | #[inline (never)] |
108 | fn lookup_buffer(&mut self, client: usize) -> Option<I::Item> { |
109 | // if `bufidx` doesn't exist in self.buffer, it might be empty |
110 | let bufidx = client - self.bottom_group; |
111 | if client < self.oldest_buffered_group { |
112 | return None; |
113 | } |
114 | let elt = self.buffer.get_mut(bufidx).and_then(|queue| queue.next()); |
115 | if elt.is_none() && client == self.oldest_buffered_group { |
116 | // FIXME: VecDeque is unfortunately not zero allocation when empty, |
117 | // so we do this job manually. |
118 | // `bottom_group..oldest_buffered_group` is unused, and if it's large enough, erase it. |
119 | self.oldest_buffered_group += 1; |
120 | // skip forward further empty queues too |
121 | while self.buffer.get(self.oldest_buffered_group - self.bottom_group) |
122 | .map_or(false, |buf| buf.len() == 0) |
123 | { |
124 | self.oldest_buffered_group += 1; |
125 | } |
126 | |
127 | let nclear = self.oldest_buffered_group - self.bottom_group; |
128 | if nclear > 0 && nclear >= self.buffer.len() / 2 { |
129 | let mut i = 0; |
130 | self.buffer.retain(|buf| { |
131 | i += 1; |
132 | debug_assert!(buf.len() == 0 || i > nclear); |
133 | i > nclear |
134 | }); |
135 | self.bottom_group = self.oldest_buffered_group; |
136 | } |
137 | } |
138 | elt |
139 | } |
140 | |
141 | /// Take the next element from the iterator, and set the done |
142 | /// flag if exhausted. Must not be called after done. |
143 | #[inline (always)] |
144 | fn next_element(&mut self) -> Option<I::Item> { |
145 | debug_assert!(!self.done); |
146 | match self.iter.next() { |
147 | None => { self.done = true; None } |
148 | otherwise => otherwise, |
149 | } |
150 | } |
151 | |
152 | |
153 | #[inline (never)] |
154 | fn step_buffering(&mut self, client: usize) -> Option<I::Item> { |
155 | // requested a later group -- walk through the current group up to |
156 | // the requested group index, and buffer the elements (unless |
157 | // the group is marked as dropped). |
158 | // Because the `Groups` iterator is always the first to request |
159 | // each group index, client is the next index efter top_group. |
160 | debug_assert!(self.top_group + 1 == client); |
161 | let mut group = Vec::new(); |
162 | |
163 | if let Some(elt) = self.current_elt.take() { |
164 | if self.top_group != self.dropped_group { |
165 | group.push(elt); |
166 | } |
167 | } |
168 | let mut first_elt = None; // first element of the next group |
169 | |
170 | while let Some(elt) = self.next_element() { |
171 | let key = self.key.call_mut(&elt); |
172 | match self.current_key.take() { |
173 | None => {} |
174 | Some(old_key) => if old_key != key { |
175 | self.current_key = Some(key); |
176 | first_elt = Some(elt); |
177 | break; |
178 | }, |
179 | } |
180 | self.current_key = Some(key); |
181 | if self.top_group != self.dropped_group { |
182 | group.push(elt); |
183 | } |
184 | } |
185 | |
186 | if self.top_group != self.dropped_group { |
187 | self.push_next_group(group); |
188 | } |
189 | if first_elt.is_some() { |
190 | self.top_group += 1; |
191 | debug_assert!(self.top_group == client); |
192 | } |
193 | first_elt |
194 | } |
195 | |
196 | fn push_next_group(&mut self, group: Vec<I::Item>) { |
197 | // When we add a new buffered group, fill up slots between oldest_buffered_group and top_group |
198 | while self.top_group - self.bottom_group > self.buffer.len() { |
199 | if self.buffer.is_empty() { |
200 | self.bottom_group += 1; |
201 | self.oldest_buffered_group += 1; |
202 | } else { |
203 | self.buffer.push(Vec::new().into_iter()); |
204 | } |
205 | } |
206 | self.buffer.push(group.into_iter()); |
207 | debug_assert!(self.top_group + 1 - self.bottom_group == self.buffer.len()); |
208 | } |
209 | |
210 | /// This is the immediate case, where we use no buffering |
211 | #[inline ] |
212 | fn step_current(&mut self) -> Option<I::Item> { |
213 | debug_assert!(!self.done); |
214 | if let elt @ Some(..) = self.current_elt.take() { |
215 | return elt; |
216 | } |
217 | match self.next_element() { |
218 | None => None, |
219 | Some(elt) => { |
220 | let key = self.key.call_mut(&elt); |
221 | match self.current_key.take() { |
222 | None => {} |
223 | Some(old_key) => if old_key != key { |
224 | self.current_key = Some(key); |
225 | self.current_elt = Some(elt); |
226 | self.top_group += 1; |
227 | return None; |
228 | }, |
229 | } |
230 | self.current_key = Some(key); |
231 | Some(elt) |
232 | } |
233 | } |
234 | } |
235 | |
236 | /// Request the just started groups' key. |
237 | /// |
238 | /// `client`: Index of group |
239 | /// |
240 | /// **Panics** if no group key is available. |
241 | fn group_key(&mut self, client: usize) -> K { |
242 | // This can only be called after we have just returned the first |
243 | // element of a group. |
244 | // Perform this by simply buffering one more element, grabbing the |
245 | // next key. |
246 | debug_assert!(!self.done); |
247 | debug_assert!(client == self.top_group); |
248 | debug_assert!(self.current_key.is_some()); |
249 | debug_assert!(self.current_elt.is_none()); |
250 | let old_key = self.current_key.take().unwrap(); |
251 | if let Some(elt) = self.next_element() { |
252 | let key = self.key.call_mut(&elt); |
253 | if old_key != key { |
254 | self.top_group += 1; |
255 | } |
256 | self.current_key = Some(key); |
257 | self.current_elt = Some(elt); |
258 | } |
259 | old_key |
260 | } |
261 | } |
262 | |
263 | impl<K, I, F> GroupInner<K, I, F> |
264 | where I: Iterator, |
265 | { |
266 | /// Called when a group is dropped |
267 | fn drop_group(&mut self, client: usize) { |
268 | // It's only useful to track the maximal index |
269 | if self.dropped_group == !0 || client > self.dropped_group { |
270 | self.dropped_group = client; |
271 | } |
272 | } |
273 | } |
274 | |
275 | /// `GroupBy` is the storage for the lazy grouping operation. |
276 | /// |
277 | /// If the groups are consumed in their original order, or if each |
278 | /// group is dropped without keeping it around, then `GroupBy` uses |
279 | /// no allocations. It needs allocations only if several group iterators |
280 | /// are alive at the same time. |
281 | /// |
282 | /// This type implements [`IntoIterator`] (it is **not** an iterator |
283 | /// itself), because the group iterators need to borrow from this |
284 | /// value. It should be stored in a local variable or temporary and |
285 | /// iterated. |
286 | /// |
287 | /// See [`.group_by()`](crate::Itertools::group_by) for more information. |
288 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed" ] |
289 | pub struct GroupBy<K, I, F> |
290 | where I: Iterator, |
291 | { |
292 | inner: RefCell<GroupInner<K, I, F>>, |
293 | // the group iterator's current index. Keep this in the main value |
294 | // so that simultaneous iterators all use the same state. |
295 | index: Cell<usize>, |
296 | } |
297 | |
298 | /// Create a new |
299 | pub fn new<K, J, F>(iter: J, f: F) -> GroupBy<K, J::IntoIter, F> |
300 | where J: IntoIterator, |
301 | F: FnMut(&J::Item) -> K, |
302 | { |
303 | GroupBy { |
304 | inner: RefCell::new(GroupInner { |
305 | key: f, |
306 | iter: iter.into_iter(), |
307 | current_key: None, |
308 | current_elt: None, |
309 | done: false, |
310 | top_group: 0, |
311 | oldest_buffered_group: 0, |
312 | bottom_group: 0, |
313 | buffer: Vec::new(), |
314 | dropped_group: !0, |
315 | }), |
316 | index: Cell::new(0), |
317 | } |
318 | } |
319 | |
320 | impl<K, I, F> GroupBy<K, I, F> |
321 | where I: Iterator, |
322 | { |
323 | /// `client`: Index of group that requests next element |
324 | fn step(&self, client: usize) -> Option<I::Item> |
325 | where F: FnMut(&I::Item) -> K, |
326 | K: PartialEq, |
327 | { |
328 | self.inner.borrow_mut().step(client) |
329 | } |
330 | |
331 | /// `client`: Index of group |
332 | fn drop_group(&self, client: usize) { |
333 | self.inner.borrow_mut().drop_group(client); |
334 | } |
335 | } |
336 | |
337 | impl<'a, K, I, F> IntoIterator for &'a GroupBy<K, I, F> |
338 | where I: Iterator, |
339 | I::Item: 'a, |
340 | F: FnMut(&I::Item) -> K, |
341 | K: PartialEq |
342 | { |
343 | type Item = (K, Group<'a, K, I, F>); |
344 | type IntoIter = Groups<'a, K, I, F>; |
345 | |
346 | fn into_iter(self) -> Self::IntoIter { |
347 | Groups { parent: self } |
348 | } |
349 | } |
350 | |
351 | |
352 | /// An iterator that yields the Group iterators. |
353 | /// |
354 | /// Iterator element type is `(K, Group)`: |
355 | /// the group's key `K` and the group's iterator. |
356 | /// |
357 | /// See [`.group_by()`](crate::Itertools::group_by) for more information. |
358 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed" ] |
359 | pub struct Groups<'a, K: 'a, I: 'a, F: 'a> |
360 | where I: Iterator, |
361 | I::Item: 'a |
362 | { |
363 | parent: &'a GroupBy<K, I, F>, |
364 | } |
365 | |
366 | impl<'a, K, I, F> Iterator for Groups<'a, K, I, F> |
367 | where I: Iterator, |
368 | I::Item: 'a, |
369 | F: FnMut(&I::Item) -> K, |
370 | K: PartialEq |
371 | { |
372 | type Item = (K, Group<'a, K, I, F>); |
373 | |
374 | #[inline ] |
375 | fn next(&mut self) -> Option<Self::Item> { |
376 | let index = self.parent.index.get(); |
377 | self.parent.index.set(index + 1); |
378 | let inner = &mut *self.parent.inner.borrow_mut(); |
379 | inner.step(index).map(|elt| { |
380 | let key = inner.group_key(index); |
381 | (key, Group { |
382 | parent: self.parent, |
383 | index, |
384 | first: Some(elt), |
385 | }) |
386 | }) |
387 | } |
388 | } |
389 | |
390 | /// An iterator for the elements in a single group. |
391 | /// |
392 | /// Iterator element type is `I::Item`. |
393 | pub struct Group<'a, K: 'a, I: 'a, F: 'a> |
394 | where I: Iterator, |
395 | I::Item: 'a, |
396 | { |
397 | parent: &'a GroupBy<K, I, F>, |
398 | index: usize, |
399 | first: Option<I::Item>, |
400 | } |
401 | |
402 | impl<'a, K, I, F> Drop for Group<'a, K, I, F> |
403 | where I: Iterator, |
404 | I::Item: 'a, |
405 | { |
406 | fn drop(&mut self) { |
407 | self.parent.drop_group(self.index); |
408 | } |
409 | } |
410 | |
411 | impl<'a, K, I, F> Iterator for Group<'a, K, I, F> |
412 | where I: Iterator, |
413 | I::Item: 'a, |
414 | F: FnMut(&I::Item) -> K, |
415 | K: PartialEq, |
416 | { |
417 | type Item = I::Item; |
418 | #[inline ] |
419 | fn next(&mut self) -> Option<Self::Item> { |
420 | if let elt @ Some(..) = self.first.take() { |
421 | return elt; |
422 | } |
423 | self.parent.step(self.index) |
424 | } |
425 | } |
426 | |
427 | ///// IntoChunks ///// |
428 | |
429 | /// Create a new |
430 | pub fn new_chunks<J>(iter: J, size: usize) -> IntoChunks<J::IntoIter> |
431 | where J: IntoIterator, |
432 | { |
433 | IntoChunks { |
434 | inner: RefCell::new(GroupInner { |
435 | key: ChunkIndex::new(size), |
436 | iter: iter.into_iter(), |
437 | current_key: None, |
438 | current_elt: None, |
439 | done: false, |
440 | top_group: 0, |
441 | oldest_buffered_group: 0, |
442 | bottom_group: 0, |
443 | buffer: Vec::new(), |
444 | dropped_group: !0, |
445 | }), |
446 | index: Cell::new(0), |
447 | } |
448 | } |
449 | |
450 | |
451 | /// `ChunkLazy` is the storage for a lazy chunking operation. |
452 | /// |
453 | /// `IntoChunks` behaves just like `GroupBy`: it is iterable, and |
454 | /// it only buffers if several chunk iterators are alive at the same time. |
455 | /// |
456 | /// This type implements [`IntoIterator`] (it is **not** an iterator |
457 | /// itself), because the chunk iterators need to borrow from this |
458 | /// value. It should be stored in a local variable or temporary and |
459 | /// iterated. |
460 | /// |
461 | /// Iterator element type is `Chunk`, each chunk's iterator. |
462 | /// |
463 | /// See [`.chunks()`](crate::Itertools::chunks) for more information. |
464 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed" ] |
465 | pub struct IntoChunks<I> |
466 | where I: Iterator, |
467 | { |
468 | inner: RefCell<GroupInner<usize, I, ChunkIndex>>, |
469 | // the chunk iterator's current index. Keep this in the main value |
470 | // so that simultaneous iterators all use the same state. |
471 | index: Cell<usize>, |
472 | } |
473 | |
474 | |
475 | impl<I> IntoChunks<I> |
476 | where I: Iterator, |
477 | { |
478 | /// `client`: Index of chunk that requests next element |
479 | fn step(&self, client: usize) -> Option<I::Item> { |
480 | self.inner.borrow_mut().step(client) |
481 | } |
482 | |
483 | /// `client`: Index of chunk |
484 | fn drop_group(&self, client: usize) { |
485 | self.inner.borrow_mut().drop_group(client); |
486 | } |
487 | } |
488 | |
489 | impl<'a, I> IntoIterator for &'a IntoChunks<I> |
490 | where I: Iterator, |
491 | I::Item: 'a, |
492 | { |
493 | type Item = Chunk<'a, I>; |
494 | type IntoIter = Chunks<'a, I>; |
495 | |
496 | fn into_iter(self) -> Self::IntoIter { |
497 | Chunks { |
498 | parent: self, |
499 | } |
500 | } |
501 | } |
502 | |
503 | |
504 | /// An iterator that yields the Chunk iterators. |
505 | /// |
506 | /// Iterator element type is `Chunk`. |
507 | /// |
508 | /// See [`.chunks()`](crate::Itertools::chunks) for more information. |
509 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed" ] |
510 | pub struct Chunks<'a, I: 'a> |
511 | where I: Iterator, |
512 | I::Item: 'a, |
513 | { |
514 | parent: &'a IntoChunks<I>, |
515 | } |
516 | |
517 | impl<'a, I> Iterator for Chunks<'a, I> |
518 | where I: Iterator, |
519 | I::Item: 'a, |
520 | { |
521 | type Item = Chunk<'a, I>; |
522 | |
523 | #[inline ] |
524 | fn next(&mut self) -> Option<Self::Item> { |
525 | let index = self.parent.index.get(); |
526 | self.parent.index.set(index + 1); |
527 | let inner = &mut *self.parent.inner.borrow_mut(); |
528 | inner.step(index).map(|elt| { |
529 | Chunk { |
530 | parent: self.parent, |
531 | index, |
532 | first: Some(elt), |
533 | } |
534 | }) |
535 | } |
536 | } |
537 | |
538 | /// An iterator for the elements in a single chunk. |
539 | /// |
540 | /// Iterator element type is `I::Item`. |
541 | pub struct Chunk<'a, I: 'a> |
542 | where I: Iterator, |
543 | I::Item: 'a, |
544 | { |
545 | parent: &'a IntoChunks<I>, |
546 | index: usize, |
547 | first: Option<I::Item>, |
548 | } |
549 | |
550 | impl<'a, I> Drop for Chunk<'a, I> |
551 | where I: Iterator, |
552 | I::Item: 'a, |
553 | { |
554 | fn drop(&mut self) { |
555 | self.parent.drop_group(self.index); |
556 | } |
557 | } |
558 | |
559 | impl<'a, I> Iterator for Chunk<'a, I> |
560 | where I: Iterator, |
561 | I::Item: 'a, |
562 | { |
563 | type Item = I::Item; |
564 | #[inline ] |
565 | fn next(&mut self) -> Option<Self::Item> { |
566 | if let elt @ Some(..) = self.first.take() { |
567 | return elt; |
568 | } |
569 | self.parent.step(self.index) |
570 | } |
571 | } |
572 | |