1 | use alloc::vec::{self, Vec}; |
2 | use std::cell::{Cell, RefCell}; |
3 | |
4 | /// A trait to unify `FnMut` for `GroupBy` with the chunk key in `IntoChunks` |
5 | trait KeyFunction<A> { |
6 | type Key; |
7 | fn call_mut(&mut self, arg: A) -> Self::Key; |
8 | } |
9 | |
10 | impl<A, K, F> KeyFunction<A> for F |
11 | where |
12 | F: FnMut(A) -> K + ?Sized, |
13 | { |
14 | type Key = K; |
15 | #[inline ] |
16 | fn call_mut(&mut self, arg: A) -> Self::Key { |
17 | (*self)(arg) |
18 | } |
19 | } |
20 | |
21 | /// `ChunkIndex` acts like the grouping key function for `IntoChunks` |
22 | #[derive (Debug, Clone)] |
23 | struct ChunkIndex { |
24 | size: usize, |
25 | index: usize, |
26 | key: usize, |
27 | } |
28 | |
29 | impl ChunkIndex { |
30 | #[inline (always)] |
31 | fn new(size: usize) -> Self { |
32 | Self { |
33 | size, |
34 | index: 0, |
35 | key: 0, |
36 | } |
37 | } |
38 | } |
39 | |
40 | impl<A> KeyFunction<A> for ChunkIndex { |
41 | type Key = usize; |
42 | #[inline (always)] |
43 | fn call_mut(&mut self, _arg: A) -> Self::Key { |
44 | if self.index == self.size { |
45 | self.key += 1; |
46 | self.index = 0; |
47 | } |
48 | self.index += 1; |
49 | self.key |
50 | } |
51 | } |
52 | |
53 | #[derive (Clone)] |
54 | struct GroupInner<K, I, F> |
55 | where |
56 | I: Iterator, |
57 | { |
58 | key: F, |
59 | iter: I, |
60 | current_key: Option<K>, |
61 | current_elt: Option<I::Item>, |
62 | /// flag set if iterator is exhausted |
63 | done: bool, |
64 | /// Index of group we are currently buffering or visiting |
65 | top_group: usize, |
66 | /// Least index for which we still have elements buffered |
67 | oldest_buffered_group: usize, |
68 | /// Group index for `buffer[0]` -- the slots |
69 | /// bottom_group..oldest_buffered_group are unused and will be erased when |
70 | /// that range is large enough. |
71 | bottom_group: usize, |
72 | /// Buffered groups, from `bottom_group` (index 0) to `top_group`. |
73 | buffer: Vec<vec::IntoIter<I::Item>>, |
74 | /// index of last group iter that was dropped, usize::MAX == none |
75 | dropped_group: usize, |
76 | } |
77 | |
78 | impl<K, I, F> GroupInner<K, I, F> |
79 | where |
80 | I: Iterator, |
81 | F: for<'a> KeyFunction<&'a I::Item, Key = K>, |
82 | K: PartialEq, |
83 | { |
84 | /// `client`: Index of group that requests next element |
85 | #[inline (always)] |
86 | fn step(&mut self, client: usize) -> Option<I::Item> { |
87 | /* |
88 | println!("client={}, bottom_group={}, oldest_buffered_group={}, top_group={}, buffers=[{}]", |
89 | client, self.bottom_group, self.oldest_buffered_group, |
90 | self.top_group, |
91 | self.buffer.iter().map(|elt| elt.len()).format(", ")); |
92 | */ |
93 | if client < self.oldest_buffered_group { |
94 | None |
95 | } else if client < self.top_group |
96 | || (client == self.top_group && self.buffer.len() > self.top_group - self.bottom_group) |
97 | { |
98 | self.lookup_buffer(client) |
99 | } else if self.done { |
100 | None |
101 | } else if self.top_group == client { |
102 | self.step_current() |
103 | } else { |
104 | self.step_buffering(client) |
105 | } |
106 | } |
107 | |
108 | #[inline (never)] |
109 | fn lookup_buffer(&mut self, client: usize) -> Option<I::Item> { |
110 | // if `bufidx` doesn't exist in self.buffer, it might be empty |
111 | let bufidx = client - self.bottom_group; |
112 | if client < self.oldest_buffered_group { |
113 | return None; |
114 | } |
115 | let elt = self.buffer.get_mut(bufidx).and_then(|queue| queue.next()); |
116 | if elt.is_none() && client == self.oldest_buffered_group { |
117 | // FIXME: VecDeque is unfortunately not zero allocation when empty, |
118 | // so we do this job manually. |
119 | // `bottom_group..oldest_buffered_group` is unused, and if it's large enough, erase it. |
120 | self.oldest_buffered_group += 1; |
121 | // skip forward further empty queues too |
122 | while self |
123 | .buffer |
124 | .get(self.oldest_buffered_group - self.bottom_group) |
125 | .map_or(false, |buf| buf.len() == 0) |
126 | { |
127 | self.oldest_buffered_group += 1; |
128 | } |
129 | |
130 | let nclear = self.oldest_buffered_group - self.bottom_group; |
131 | if nclear > 0 && nclear >= self.buffer.len() / 2 { |
132 | let mut i = 0; |
133 | self.buffer.retain(|buf| { |
134 | i += 1; |
135 | debug_assert!(buf.len() == 0 || i > nclear); |
136 | i > nclear |
137 | }); |
138 | self.bottom_group = self.oldest_buffered_group; |
139 | } |
140 | } |
141 | elt |
142 | } |
143 | |
144 | /// Take the next element from the iterator, and set the done |
145 | /// flag if exhausted. Must not be called after done. |
146 | #[inline (always)] |
147 | fn next_element(&mut self) -> Option<I::Item> { |
148 | debug_assert!(!self.done); |
149 | match self.iter.next() { |
150 | None => { |
151 | self.done = true; |
152 | None |
153 | } |
154 | otherwise => otherwise, |
155 | } |
156 | } |
157 | |
158 | #[inline (never)] |
159 | fn step_buffering(&mut self, client: usize) -> Option<I::Item> { |
160 | // requested a later group -- walk through the current group up to |
161 | // the requested group index, and buffer the elements (unless |
162 | // the group is marked as dropped). |
163 | // Because the `Groups` iterator is always the first to request |
164 | // each group index, client is the next index efter top_group. |
165 | debug_assert!(self.top_group + 1 == client); |
166 | let mut group = Vec::new(); |
167 | |
168 | if let Some(elt) = self.current_elt.take() { |
169 | if self.top_group != self.dropped_group { |
170 | group.push(elt); |
171 | } |
172 | } |
173 | let mut first_elt = None; // first element of the next group |
174 | |
175 | while let Some(elt) = self.next_element() { |
176 | let key = self.key.call_mut(&elt); |
177 | match self.current_key.take() { |
178 | None => {} |
179 | Some(old_key) => { |
180 | if old_key != key { |
181 | self.current_key = Some(key); |
182 | first_elt = Some(elt); |
183 | break; |
184 | } |
185 | } |
186 | } |
187 | self.current_key = Some(key); |
188 | if self.top_group != self.dropped_group { |
189 | group.push(elt); |
190 | } |
191 | } |
192 | |
193 | if self.top_group != self.dropped_group { |
194 | self.push_next_group(group); |
195 | } |
196 | if first_elt.is_some() { |
197 | self.top_group += 1; |
198 | debug_assert!(self.top_group == client); |
199 | } |
200 | first_elt |
201 | } |
202 | |
203 | fn push_next_group(&mut self, group: Vec<I::Item>) { |
204 | // When we add a new buffered group, fill up slots between oldest_buffered_group and top_group |
205 | while self.top_group - self.bottom_group > self.buffer.len() { |
206 | if self.buffer.is_empty() { |
207 | self.bottom_group += 1; |
208 | self.oldest_buffered_group += 1; |
209 | } else { |
210 | self.buffer.push(Vec::new().into_iter()); |
211 | } |
212 | } |
213 | self.buffer.push(group.into_iter()); |
214 | debug_assert!(self.top_group + 1 - self.bottom_group == self.buffer.len()); |
215 | } |
216 | |
217 | /// This is the immediate case, where we use no buffering |
218 | #[inline ] |
219 | fn step_current(&mut self) -> Option<I::Item> { |
220 | debug_assert!(!self.done); |
221 | if let elt @ Some(..) = self.current_elt.take() { |
222 | return elt; |
223 | } |
224 | match self.next_element() { |
225 | None => None, |
226 | Some(elt) => { |
227 | let key = self.key.call_mut(&elt); |
228 | match self.current_key.take() { |
229 | None => {} |
230 | Some(old_key) => { |
231 | if old_key != key { |
232 | self.current_key = Some(key); |
233 | self.current_elt = Some(elt); |
234 | self.top_group += 1; |
235 | return None; |
236 | } |
237 | } |
238 | } |
239 | self.current_key = Some(key); |
240 | Some(elt) |
241 | } |
242 | } |
243 | } |
244 | |
245 | /// Request the just started groups' key. |
246 | /// |
247 | /// `client`: Index of group |
248 | /// |
249 | /// **Panics** if no group key is available. |
250 | fn group_key(&mut self, client: usize) -> K { |
251 | // This can only be called after we have just returned the first |
252 | // element of a group. |
253 | // Perform this by simply buffering one more element, grabbing the |
254 | // next key. |
255 | debug_assert!(!self.done); |
256 | debug_assert!(client == self.top_group); |
257 | debug_assert!(self.current_key.is_some()); |
258 | debug_assert!(self.current_elt.is_none()); |
259 | let old_key = self.current_key.take().unwrap(); |
260 | if let Some(elt) = self.next_element() { |
261 | let key = self.key.call_mut(&elt); |
262 | if old_key != key { |
263 | self.top_group += 1; |
264 | } |
265 | self.current_key = Some(key); |
266 | self.current_elt = Some(elt); |
267 | } |
268 | old_key |
269 | } |
270 | } |
271 | |
272 | impl<K, I, F> GroupInner<K, I, F> |
273 | where |
274 | I: Iterator, |
275 | { |
276 | /// Called when a group is dropped |
277 | fn drop_group(&mut self, client: usize) { |
278 | // It's only useful to track the maximal index |
279 | if self.dropped_group == !0 || client > self.dropped_group { |
280 | self.dropped_group = client; |
281 | } |
282 | } |
283 | } |
284 | |
285 | /// `GroupBy` is the storage for the lazy grouping operation. |
286 | /// |
287 | /// If the groups are consumed in their original order, or if each |
288 | /// group is dropped without keeping it around, then `GroupBy` uses |
289 | /// no allocations. It needs allocations only if several group iterators |
290 | /// are alive at the same time. |
291 | /// |
292 | /// This type implements [`IntoIterator`] (it is **not** an iterator |
293 | /// itself), because the group iterators need to borrow from this |
294 | /// value. It should be stored in a local variable or temporary and |
295 | /// iterated. |
296 | /// |
297 | /// See [`.group_by()`](crate::Itertools::group_by) for more information. |
298 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed" ] |
299 | pub struct GroupBy<K, I, F> |
300 | where |
301 | I: Iterator, |
302 | { |
303 | inner: RefCell<GroupInner<K, I, F>>, |
304 | // the group iterator's current index. Keep this in the main value |
305 | // so that simultaneous iterators all use the same state. |
306 | index: Cell<usize>, |
307 | } |
308 | |
309 | /// Create a new |
310 | pub fn new<K, J, F>(iter: J, f: F) -> GroupBy<K, J::IntoIter, F> |
311 | where |
312 | J: IntoIterator, |
313 | F: FnMut(&J::Item) -> K, |
314 | { |
315 | GroupBy { |
316 | inner: RefCell::new(GroupInner { |
317 | key: f, |
318 | iter: iter.into_iter(), |
319 | current_key: None, |
320 | current_elt: None, |
321 | done: false, |
322 | top_group: 0, |
323 | oldest_buffered_group: 0, |
324 | bottom_group: 0, |
325 | buffer: Vec::new(), |
326 | dropped_group: !0, |
327 | }), |
328 | index: Cell::new(0), |
329 | } |
330 | } |
331 | |
332 | impl<K, I, F> GroupBy<K, I, F> |
333 | where |
334 | I: Iterator, |
335 | { |
336 | /// `client`: Index of group that requests next element |
337 | fn step(&self, client: usize) -> Option<I::Item> |
338 | where |
339 | F: FnMut(&I::Item) -> K, |
340 | K: PartialEq, |
341 | { |
342 | self.inner.borrow_mut().step(client) |
343 | } |
344 | |
345 | /// `client`: Index of group |
346 | fn drop_group(&self, client: usize) { |
347 | self.inner.borrow_mut().drop_group(client); |
348 | } |
349 | } |
350 | |
351 | impl<'a, K, I, F> IntoIterator for &'a GroupBy<K, I, F> |
352 | where |
353 | I: Iterator, |
354 | I::Item: 'a, |
355 | F: FnMut(&I::Item) -> K, |
356 | K: PartialEq, |
357 | { |
358 | type Item = (K, Group<'a, K, I, F>); |
359 | type IntoIter = Groups<'a, K, I, F>; |
360 | |
361 | fn into_iter(self) -> Self::IntoIter { |
362 | Groups { parent: self } |
363 | } |
364 | } |
365 | |
366 | /// An iterator that yields the Group iterators. |
367 | /// |
368 | /// Iterator element type is `(K, Group)`: |
369 | /// the group's key `K` and the group's iterator. |
370 | /// |
371 | /// See [`.group_by()`](crate::Itertools::group_by) for more information. |
372 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed" ] |
373 | pub struct Groups<'a, K, I, F> |
374 | where |
375 | I: Iterator + 'a, |
376 | I::Item: 'a, |
377 | K: 'a, |
378 | F: 'a, |
379 | { |
380 | parent: &'a GroupBy<K, I, F>, |
381 | } |
382 | |
383 | impl<'a, K, I, F> Iterator for Groups<'a, K, I, F> |
384 | where |
385 | I: Iterator, |
386 | I::Item: 'a, |
387 | F: FnMut(&I::Item) -> K, |
388 | K: PartialEq, |
389 | { |
390 | type Item = (K, Group<'a, K, I, F>); |
391 | |
392 | #[inline ] |
393 | fn next(&mut self) -> Option<Self::Item> { |
394 | let index: usize = self.parent.index.get(); |
395 | self.parent.index.set(val:index + 1); |
396 | let inner: &mut GroupInner = &mut *self.parent.inner.borrow_mut(); |
397 | inner.step(client:index).map(|elt: ::Item| { |
398 | let key: K = inner.group_key(client:index); |
399 | ( |
400 | key, |
401 | Group { |
402 | parent: self.parent, |
403 | index, |
404 | first: Some(elt), |
405 | }, |
406 | ) |
407 | }) |
408 | } |
409 | } |
410 | |
411 | /// An iterator for the elements in a single group. |
412 | /// |
413 | /// Iterator element type is `I::Item`. |
414 | pub struct Group<'a, K, I, F> |
415 | where |
416 | I: Iterator + 'a, |
417 | I::Item: 'a, |
418 | K: 'a, |
419 | F: 'a, |
420 | { |
421 | parent: &'a GroupBy<K, I, F>, |
422 | index: usize, |
423 | first: Option<I::Item>, |
424 | } |
425 | |
426 | impl<'a, K, I, F> Drop for Group<'a, K, I, F> |
427 | where |
428 | I: Iterator, |
429 | I::Item: 'a, |
430 | { |
431 | fn drop(&mut self) { |
432 | self.parent.drop_group(self.index); |
433 | } |
434 | } |
435 | |
436 | impl<'a, K, I, F> Iterator for Group<'a, K, I, F> |
437 | where |
438 | I: Iterator, |
439 | I::Item: 'a, |
440 | F: FnMut(&I::Item) -> K, |
441 | K: PartialEq, |
442 | { |
443 | type Item = I::Item; |
444 | #[inline ] |
445 | fn next(&mut self) -> Option<Self::Item> { |
446 | if let elt: Option<::Item> @ Some(..) = self.first.take() { |
447 | return elt; |
448 | } |
449 | self.parent.step(self.index) |
450 | } |
451 | } |
452 | |
453 | ///// IntoChunks ///// |
454 | |
455 | /// Create a new |
456 | pub fn new_chunks<J>(iter: J, size: usize) -> IntoChunks<J::IntoIter> |
457 | where |
458 | J: IntoIterator, |
459 | { |
460 | IntoChunks { |
461 | inner: RefCell::new(GroupInner { |
462 | key: ChunkIndex::new(size), |
463 | iter: iter.into_iter(), |
464 | current_key: None, |
465 | current_elt: None, |
466 | done: false, |
467 | top_group: 0, |
468 | oldest_buffered_group: 0, |
469 | bottom_group: 0, |
470 | buffer: Vec::new(), |
471 | dropped_group: !0, |
472 | }), |
473 | index: Cell::new(0), |
474 | } |
475 | } |
476 | |
477 | /// `ChunkLazy` is the storage for a lazy chunking operation. |
478 | /// |
479 | /// `IntoChunks` behaves just like `GroupBy`: it is iterable, and |
480 | /// it only buffers if several chunk iterators are alive at the same time. |
481 | /// |
482 | /// This type implements [`IntoIterator`] (it is **not** an iterator |
483 | /// itself), because the chunk iterators need to borrow from this |
484 | /// value. It should be stored in a local variable or temporary and |
485 | /// iterated. |
486 | /// |
487 | /// Iterator element type is `Chunk`, each chunk's iterator. |
488 | /// |
489 | /// See [`.chunks()`](crate::Itertools::chunks) for more information. |
490 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed" ] |
491 | pub struct IntoChunks<I> |
492 | where |
493 | I: Iterator, |
494 | { |
495 | inner: RefCell<GroupInner<usize, I, ChunkIndex>>, |
496 | // the chunk iterator's current index. Keep this in the main value |
497 | // so that simultaneous iterators all use the same state. |
498 | index: Cell<usize>, |
499 | } |
500 | |
501 | impl<I> Clone for IntoChunks<I> |
502 | where |
503 | I: Clone + Iterator, |
504 | I::Item: Clone, |
505 | { |
506 | clone_fields!(inner, index); |
507 | } |
508 | |
509 | impl<I> IntoChunks<I> |
510 | where |
511 | I: Iterator, |
512 | { |
513 | /// `client`: Index of chunk that requests next element |
514 | fn step(&self, client: usize) -> Option<I::Item> { |
515 | self.inner.borrow_mut().step(client) |
516 | } |
517 | |
518 | /// `client`: Index of chunk |
519 | fn drop_group(&self, client: usize) { |
520 | self.inner.borrow_mut().drop_group(client); |
521 | } |
522 | } |
523 | |
524 | impl<'a, I> IntoIterator for &'a IntoChunks<I> |
525 | where |
526 | I: Iterator, |
527 | I::Item: 'a, |
528 | { |
529 | type Item = Chunk<'a, I>; |
530 | type IntoIter = Chunks<'a, I>; |
531 | |
532 | fn into_iter(self) -> Self::IntoIter { |
533 | Chunks { parent: self } |
534 | } |
535 | } |
536 | |
537 | /// An iterator that yields the Chunk iterators. |
538 | /// |
539 | /// Iterator element type is `Chunk`. |
540 | /// |
541 | /// See [`.chunks()`](crate::Itertools::chunks) for more information. |
542 | #[must_use = "iterator adaptors are lazy and do nothing unless consumed" ] |
543 | #[derive (Clone)] |
544 | pub struct Chunks<'a, I> |
545 | where |
546 | I: Iterator + 'a, |
547 | I::Item: 'a, |
548 | { |
549 | parent: &'a IntoChunks<I>, |
550 | } |
551 | |
552 | impl<'a, I> Iterator for Chunks<'a, I> |
553 | where |
554 | I: Iterator, |
555 | I::Item: 'a, |
556 | { |
557 | type Item = Chunk<'a, I>; |
558 | |
559 | #[inline ] |
560 | fn next(&mut self) -> Option<Self::Item> { |
561 | let index: usize = self.parent.index.get(); |
562 | self.parent.index.set(val:index + 1); |
563 | let inner: &mut GroupInner = &mut *self.parent.inner.borrow_mut(); |
564 | inner.step(client:index).map(|elt: ::Item| Chunk { |
565 | parent: self.parent, |
566 | index, |
567 | first: Some(elt), |
568 | }) |
569 | } |
570 | } |
571 | |
572 | /// An iterator for the elements in a single chunk. |
573 | /// |
574 | /// Iterator element type is `I::Item`. |
575 | pub struct Chunk<'a, I> |
576 | where |
577 | I: Iterator + 'a, |
578 | I::Item: 'a, |
579 | { |
580 | parent: &'a IntoChunks<I>, |
581 | index: usize, |
582 | first: Option<I::Item>, |
583 | } |
584 | |
585 | impl<'a, I> Drop for Chunk<'a, I> |
586 | where |
587 | I: Iterator, |
588 | I::Item: 'a, |
589 | { |
590 | fn drop(&mut self) { |
591 | self.parent.drop_group(self.index); |
592 | } |
593 | } |
594 | |
595 | impl<'a, I> Iterator for Chunk<'a, I> |
596 | where |
597 | I: Iterator, |
598 | I::Item: 'a, |
599 | { |
600 | type Item = I::Item; |
601 | #[inline ] |
602 | fn next(&mut self) -> Option<Self::Item> { |
603 | if let elt: Option<::Item> @ Some(..) = self.first.take() { |
604 | return elt; |
605 | } |
606 | self.parent.step(self.index) |
607 | } |
608 | } |
609 | |