1use criterion::{black_box, criterion_group, criterion_main, Criterion};
2use itertools::Itertools;
3use itertools::free::cloned;
4use itertools::iproduct;
5
6use std::iter::repeat;
7use std::cmp;
8use std::ops::{Add, Range};
9
10mod extra;
11
12use crate::extra::ZipSlices;
13
14fn slice_iter(c: &mut Criterion) {
15 let xs: Vec<_> = repeat(1i32).take(20).collect();
16
17 c.bench_function("slice iter", move |b| {
18 b.iter(|| for elt in xs.iter() {
19 black_box(elt);
20 })
21 });
22}
23
24fn slice_iter_rev(c: &mut Criterion) {
25 let xs: Vec<_> = repeat(1i32).take(20).collect();
26
27 c.bench_function("slice iter rev", move |b| {
28 b.iter(|| for elt in xs.iter().rev() {
29 black_box(elt);
30 })
31 });
32}
33
34fn zip_default_zip(c: &mut Criterion) {
35 let xs = vec![0; 1024];
36 let ys = vec![0; 768];
37 let xs = black_box(xs);
38 let ys = black_box(ys);
39
40 c.bench_function("zip default zip", move |b| {
41 b.iter(|| {
42 for (&x, &y) in xs.iter().zip(&ys) {
43 black_box(x);
44 black_box(y);
45 }
46 })
47 });
48}
49
50fn zipdot_i32_default_zip(c: &mut Criterion) {
51 let xs = vec![2; 1024];
52 let ys = vec![2; 768];
53 let xs = black_box(xs);
54 let ys = black_box(ys);
55
56 c.bench_function("zipdot i32 default zip", move |b| {
57 b.iter(|| {
58 let mut s = 0;
59 for (&x, &y) in xs.iter().zip(&ys) {
60 s += x * y;
61 }
62 s
63 })
64 });
65}
66
67fn zipdot_f32_default_zip(c: &mut Criterion) {
68 let xs = vec![2f32; 1024];
69 let ys = vec![2f32; 768];
70 let xs = black_box(xs);
71 let ys = black_box(ys);
72
73 c.bench_function("zipdot f32 default zip", move |b| {
74 b.iter(|| {
75 let mut s = 0.;
76 for (&x, &y) in xs.iter().zip(&ys) {
77 s += x * y;
78 }
79 s
80 })
81 });
82}
83
84fn zip_default_zip3(c: &mut Criterion) {
85 let xs = vec![0; 1024];
86 let ys = vec![0; 768];
87 let zs = vec![0; 766];
88 let xs = black_box(xs);
89 let ys = black_box(ys);
90 let zs = black_box(zs);
91
92 c.bench_function("zip default zip3", move |b| {
93 b.iter(|| {
94 for ((&x, &y), &z) in xs.iter().zip(&ys).zip(&zs) {
95 black_box(x);
96 black_box(y);
97 black_box(z);
98 }
99 })
100 });
101}
102
103fn zip_slices_ziptuple(c: &mut Criterion) {
104 let xs = vec![0; 1024];
105 let ys = vec![0; 768];
106
107 c.bench_function("zip slices ziptuple", move |b| {
108 b.iter(|| {
109 let xs = black_box(&xs);
110 let ys = black_box(&ys);
111 for (&x, &y) in itertools::multizip((xs, ys)) {
112 black_box(x);
113 black_box(y);
114 }
115 })
116 });
117}
118
119fn zipslices(c: &mut Criterion) {
120 let xs = vec![0; 1024];
121 let ys = vec![0; 768];
122 let xs = black_box(xs);
123 let ys = black_box(ys);
124
125 c.bench_function("zipslices", move |b| {
126 b.iter(|| {
127 for (&x, &y) in ZipSlices::new(&xs, &ys) {
128 black_box(x);
129 black_box(y);
130 }
131 })
132 });
133}
134
135fn zipslices_mut(c: &mut Criterion) {
136 let xs = vec![0; 1024];
137 let ys = vec![0; 768];
138 let xs = black_box(xs);
139 let mut ys = black_box(ys);
140
141 c.bench_function("zipslices mut", move |b| {
142 b.iter(|| {
143 for (&x, &mut y) in ZipSlices::from_slices(&xs[..], &mut ys[..]) {
144 black_box(x);
145 black_box(y);
146 }
147 })
148 });
149}
150
151fn zipdot_i32_zipslices(c: &mut Criterion) {
152 let xs = vec![2; 1024];
153 let ys = vec![2; 768];
154 let xs = black_box(xs);
155 let ys = black_box(ys);
156
157 c.bench_function("zipdot i32 zipslices", move |b| {
158 b.iter(|| {
159 let mut s = 0i32;
160 for (&x, &y) in ZipSlices::new(&xs, &ys) {
161 s += x * y;
162 }
163 s
164 })
165 });
166}
167
168fn zipdot_f32_zipslices(c: &mut Criterion) {
169 let xs = vec![2f32; 1024];
170 let ys = vec![2f32; 768];
171 let xs = black_box(xs);
172 let ys = black_box(ys);
173
174 c.bench_function("zipdot f32 zipslices", move |b| {
175 b.iter(|| {
176 let mut s = 0.;
177 for (&x, &y) in ZipSlices::new(&xs, &ys) {
178 s += x * y;
179 }
180 s
181 })
182 });
183}
184
185fn zip_checked_counted_loop(c: &mut Criterion) {
186 let xs = vec![0; 1024];
187 let ys = vec![0; 768];
188 let xs = black_box(xs);
189 let ys = black_box(ys);
190
191 c.bench_function("zip checked counted loop", move |b| {
192 b.iter(|| {
193 // Must slice to equal lengths, and then bounds checks are eliminated!
194 let len = cmp::min(xs.len(), ys.len());
195 let xs = &xs[..len];
196 let ys = &ys[..len];
197
198 for i in 0..len {
199 let x = xs[i];
200 let y = ys[i];
201 black_box(x);
202 black_box(y);
203 }
204 })
205 });
206}
207
208fn zipdot_i32_checked_counted_loop(c: &mut Criterion) {
209 let xs = vec![2; 1024];
210 let ys = vec![2; 768];
211 let xs = black_box(xs);
212 let ys = black_box(ys);
213
214 c.bench_function("zipdot i32 checked counted loop", move |b| {
215 b.iter(|| {
216 // Must slice to equal lengths, and then bounds checks are eliminated!
217 let len = cmp::min(xs.len(), ys.len());
218 let xs = &xs[..len];
219 let ys = &ys[..len];
220
221 let mut s = 0i32;
222
223 for i in 0..len {
224 s += xs[i] * ys[i];
225 }
226 s
227 })
228 });
229}
230
231fn zipdot_f32_checked_counted_loop(c: &mut Criterion) {
232 let xs = vec![2f32; 1024];
233 let ys = vec![2f32; 768];
234 let xs = black_box(xs);
235 let ys = black_box(ys);
236
237 c.bench_function("zipdot f32 checked counted loop", move |b| {
238 b.iter(|| {
239 // Must slice to equal lengths, and then bounds checks are eliminated!
240 let len = cmp::min(xs.len(), ys.len());
241 let xs = &xs[..len];
242 let ys = &ys[..len];
243
244 let mut s = 0.;
245
246 for i in 0..len {
247 s += xs[i] * ys[i];
248 }
249 s
250 })
251 });
252}
253
254fn zipdot_f32_checked_counted_unrolled_loop(c: &mut Criterion) {
255 let xs = vec![2f32; 1024];
256 let ys = vec![2f32; 768];
257 let xs = black_box(xs);
258 let ys = black_box(ys);
259
260 c.bench_function("zipdot f32 checked counted unrolled loop", move |b| {
261 b.iter(|| {
262 // Must slice to equal lengths, and then bounds checks are eliminated!
263 let len = cmp::min(xs.len(), ys.len());
264 let mut xs = &xs[..len];
265 let mut ys = &ys[..len];
266
267 let mut s = 0.;
268 let (mut p0, mut p1, mut p2, mut p3, mut p4, mut p5, mut p6, mut p7) =
269 (0., 0., 0., 0., 0., 0., 0., 0.);
270
271 // how to unroll and have bounds checks eliminated (by cristicbz)
272 // split sum into eight parts to enable vectorization (by bluss)
273 while xs.len() >= 8 {
274 p0 += xs[0] * ys[0];
275 p1 += xs[1] * ys[1];
276 p2 += xs[2] * ys[2];
277 p3 += xs[3] * ys[3];
278 p4 += xs[4] * ys[4];
279 p5 += xs[5] * ys[5];
280 p6 += xs[6] * ys[6];
281 p7 += xs[7] * ys[7];
282
283 xs = &xs[8..];
284 ys = &ys[8..];
285 }
286 s += p0 + p4;
287 s += p1 + p5;
288 s += p2 + p6;
289 s += p3 + p7;
290
291 for i in 0..xs.len() {
292 s += xs[i] * ys[i];
293 }
294 s
295 })
296 });
297}
298
299fn zip_unchecked_counted_loop(c: &mut Criterion) {
300 let xs = vec![0; 1024];
301 let ys = vec![0; 768];
302 let xs = black_box(xs);
303 let ys = black_box(ys);
304
305 c.bench_function("zip unchecked counted loop", move |b| {
306 b.iter(|| {
307 let len = cmp::min(xs.len(), ys.len());
308 for i in 0..len {
309 unsafe {
310 let x = *xs.get_unchecked(i);
311 let y = *ys.get_unchecked(i);
312 black_box(x);
313 black_box(y);
314 }
315 }
316 })
317 });
318}
319
320fn zipdot_i32_unchecked_counted_loop(c: &mut Criterion) {
321 let xs = vec![2; 1024];
322 let ys = vec![2; 768];
323 let xs = black_box(xs);
324 let ys = black_box(ys);
325
326 c.bench_function("zipdot i32 unchecked counted loop", move |b| {
327 b.iter(|| {
328 let len = cmp::min(xs.len(), ys.len());
329 let mut s = 0i32;
330 for i in 0..len {
331 unsafe {
332 let x = *xs.get_unchecked(i);
333 let y = *ys.get_unchecked(i);
334 s += x * y;
335 }
336 }
337 s
338 })
339 });
340}
341
342fn zipdot_f32_unchecked_counted_loop(c: &mut Criterion) {
343 let xs = vec![2.; 1024];
344 let ys = vec![2.; 768];
345 let xs = black_box(xs);
346 let ys = black_box(ys);
347
348 c.bench_function("zipdot f32 unchecked counted loop", move |b| {
349 b.iter(|| {
350 let len = cmp::min(xs.len(), ys.len());
351 let mut s = 0f32;
352 for i in 0..len {
353 unsafe {
354 let x = *xs.get_unchecked(i);
355 let y = *ys.get_unchecked(i);
356 s += x * y;
357 }
358 }
359 s
360 })
361 });
362}
363
364fn zip_unchecked_counted_loop3(c: &mut Criterion) {
365 let xs = vec![0; 1024];
366 let ys = vec![0; 768];
367 let zs = vec![0; 766];
368 let xs = black_box(xs);
369 let ys = black_box(ys);
370 let zs = black_box(zs);
371
372 c.bench_function("zip unchecked counted loop3", move |b| {
373 b.iter(|| {
374 let len = cmp::min(xs.len(), cmp::min(ys.len(), zs.len()));
375 for i in 0..len {
376 unsafe {
377 let x = *xs.get_unchecked(i);
378 let y = *ys.get_unchecked(i);
379 let z = *zs.get_unchecked(i);
380 black_box(x);
381 black_box(y);
382 black_box(z);
383 }
384 }
385 })
386 });
387}
388
389fn group_by_lazy_1(c: &mut Criterion) {
390 let mut data = vec![0; 1024];
391 for (index, elt) in data.iter_mut().enumerate() {
392 *elt = index / 10;
393 }
394
395 let data = black_box(data);
396
397 c.bench_function("group by lazy 1", move |b| {
398 b.iter(|| {
399 for (_key, group) in &data.iter().group_by(|elt| **elt) {
400 for elt in group {
401 black_box(elt);
402 }
403 }
404 })
405 });
406}
407
408fn group_by_lazy_2(c: &mut Criterion) {
409 let mut data = vec![0; 1024];
410 for (index, elt) in data.iter_mut().enumerate() {
411 *elt = index / 2;
412 }
413
414 let data = black_box(data);
415
416 c.bench_function("group by lazy 2", move |b| {
417 b.iter(|| {
418 for (_key, group) in &data.iter().group_by(|elt| **elt) {
419 for elt in group {
420 black_box(elt);
421 }
422 }
423 })
424 });
425}
426
427fn slice_chunks(c: &mut Criterion) {
428 let data = vec![0; 1024];
429
430 let data = black_box(data);
431 let sz = black_box(10);
432
433 c.bench_function("slice chunks", move |b| {
434 b.iter(|| {
435 for group in data.chunks(sz) {
436 for elt in group {
437 black_box(elt);
438 }
439 }
440 })
441 });
442}
443
444fn chunks_lazy_1(c: &mut Criterion) {
445 let data = vec![0; 1024];
446
447 let data = black_box(data);
448 let sz = black_box(10);
449
450 c.bench_function("chunks lazy 1", move |b| {
451 b.iter(|| {
452 for group in &data.iter().chunks(sz) {
453 for elt in group {
454 black_box(elt);
455 }
456 }
457 })
458 });
459}
460
461fn equal(c: &mut Criterion) {
462 let data = vec![7; 1024];
463 let l = data.len();
464 let alpha = black_box(&data[1..]);
465 let beta = black_box(&data[..l - 1]);
466
467 c.bench_function("equal", move |b| {
468 b.iter(|| {
469 itertools::equal(alpha, beta)
470 })
471 });
472}
473
474fn merge_default(c: &mut Criterion) {
475 let mut data1 = vec![0; 1024];
476 let mut data2 = vec![0; 800];
477 let mut x = 0;
478 for (_, elt) in data1.iter_mut().enumerate() {
479 *elt = x;
480 x += 1;
481 }
482
483 let mut y = 0;
484 for (i, elt) in data2.iter_mut().enumerate() {
485 *elt += y;
486 if i % 3 == 0 {
487 y += 3;
488 } else {
489 y += 0;
490 }
491 }
492 let data1 = black_box(data1);
493 let data2 = black_box(data2);
494
495 c.bench_function("merge default", move |b| {
496 b.iter(|| {
497 data1.iter().merge(&data2).count()
498 })
499 });
500}
501
502fn merge_by_cmp(c: &mut Criterion) {
503 let mut data1 = vec![0; 1024];
504 let mut data2 = vec![0; 800];
505 let mut x = 0;
506 for (_, elt) in data1.iter_mut().enumerate() {
507 *elt = x;
508 x += 1;
509 }
510
511 let mut y = 0;
512 for (i, elt) in data2.iter_mut().enumerate() {
513 *elt += y;
514 if i % 3 == 0 {
515 y += 3;
516 } else {
517 y += 0;
518 }
519 }
520 let data1 = black_box(data1);
521 let data2 = black_box(data2);
522
523 c.bench_function("merge by cmp", move |b| {
524 b.iter(|| {
525 data1.iter().merge_by(&data2, PartialOrd::le).count()
526 })
527 });
528}
529
530fn merge_by_lt(c: &mut Criterion) {
531 let mut data1 = vec![0; 1024];
532 let mut data2 = vec![0; 800];
533 let mut x = 0;
534 for (_, elt) in data1.iter_mut().enumerate() {
535 *elt = x;
536 x += 1;
537 }
538
539 let mut y = 0;
540 for (i, elt) in data2.iter_mut().enumerate() {
541 *elt += y;
542 if i % 3 == 0 {
543 y += 3;
544 } else {
545 y += 0;
546 }
547 }
548 let data1 = black_box(data1);
549 let data2 = black_box(data2);
550
551 c.bench_function("merge by lt", move |b| {
552 b.iter(|| {
553 data1.iter().merge_by(&data2, |a, b| a <= b).count()
554 })
555 });
556}
557
558fn kmerge_default(c: &mut Criterion) {
559 let mut data1 = vec![0; 1024];
560 let mut data2 = vec![0; 800];
561 let mut x = 0;
562 for (_, elt) in data1.iter_mut().enumerate() {
563 *elt = x;
564 x += 1;
565 }
566
567 let mut y = 0;
568 for (i, elt) in data2.iter_mut().enumerate() {
569 *elt += y;
570 if i % 3 == 0 {
571 y += 3;
572 } else {
573 y += 0;
574 }
575 }
576 let data1 = black_box(data1);
577 let data2 = black_box(data2);
578 let its = &[data1.iter(), data2.iter()];
579
580 c.bench_function("kmerge default", move |b| {
581 b.iter(|| {
582 its.iter().cloned().kmerge().count()
583 })
584 });
585}
586
587fn kmerge_tenway(c: &mut Criterion) {
588 let mut data = vec![0; 10240];
589
590 let mut state = 1729u16;
591 fn rng(state: &mut u16) -> u16 {
592 let new = state.wrapping_mul(31421) + 6927;
593 *state = new;
594 new
595 }
596
597 for elt in &mut data {
598 *elt = rng(&mut state);
599 }
600
601 let mut chunks = Vec::new();
602 let mut rest = &mut data[..];
603 while rest.len() > 0 {
604 let chunk_len = 1 + rng(&mut state) % 512;
605 let chunk_len = cmp::min(rest.len(), chunk_len as usize);
606 let (fst, tail) = {rest}.split_at_mut(chunk_len);
607 fst.sort();
608 chunks.push(fst.iter().cloned());
609 rest = tail;
610 }
611
612 // println!("Chunk lengths: {}", chunks.iter().format_with(", ", |elt, f| f(&elt.len())));
613
614 c.bench_function("kmerge tenway", move |b| {
615 b.iter(|| {
616 chunks.iter().cloned().kmerge().count()
617 })
618 });
619}
620
621fn fast_integer_sum<I>(iter: I) -> I::Item
622 where I: IntoIterator,
623 I::Item: Default + Add<Output=I::Item>
624{
625 iter.into_iter().fold(<_>::default(), |x, y| x + y)
626}
627
628fn step_vec_2(c: &mut Criterion) {
629 let v = vec![0; 1024];
630
631 c.bench_function("step vec 2", move |b| {
632 b.iter(|| {
633 fast_integer_sum(cloned(v.iter().step_by(2)))
634 })
635 });
636}
637
638fn step_vec_10(c: &mut Criterion) {
639 let v = vec![0; 1024];
640
641 c.bench_function("step vec 10", move |b| {
642 b.iter(|| {
643 fast_integer_sum(cloned(v.iter().step_by(10)))
644 })
645 });
646}
647
648fn step_range_2(c: &mut Criterion) {
649 let v = black_box(0..1024);
650
651 c.bench_function("step range 2", move |b| {
652 b.iter(|| {
653 fast_integer_sum(v.clone().step_by(2))
654 })
655 });
656}
657
658fn step_range_10(c: &mut Criterion) {
659 let v = black_box(0..1024);
660
661 c.bench_function("step range 10", move |b| {
662 b.iter(|| {
663 fast_integer_sum(v.clone().step_by(10))
664 })
665 });
666}
667
668fn cartesian_product_iterator(c: &mut Criterion) {
669 let xs = vec![0; 16];
670
671 c.bench_function("cartesian product iterator", move |b| {
672 b.iter(|| {
673 let mut sum = 0;
674 for (&x, &y, &z) in iproduct!(&xs, &xs, &xs) {
675 sum += x;
676 sum += y;
677 sum += z;
678 }
679 sum
680 })
681 });
682}
683
684fn cartesian_product_fold(c: &mut Criterion) {
685 let xs = vec![0; 16];
686
687 c.bench_function("cartesian product fold", move |b| {
688 b.iter(|| {
689 let mut sum = 0;
690 iproduct!(&xs, &xs, &xs).fold((), |(), (&x, &y, &z)| {
691 sum += x;
692 sum += y;
693 sum += z;
694 });
695 sum
696 })
697 });
698}
699
700fn multi_cartesian_product_iterator(c: &mut Criterion) {
701 let xs = [vec![0; 16], vec![0; 16], vec![0; 16]];
702
703 c.bench_function("multi cartesian product iterator", move |b| {
704 b.iter(|| {
705 let mut sum = 0;
706 for x in xs.iter().multi_cartesian_product() {
707 sum += x[0];
708 sum += x[1];
709 sum += x[2];
710 }
711 sum
712 })
713 });
714}
715
716fn multi_cartesian_product_fold(c: &mut Criterion) {
717 let xs = [vec![0; 16], vec![0; 16], vec![0; 16]];
718
719 c.bench_function("multi cartesian product fold", move |b| {
720 b.iter(|| {
721 let mut sum = 0;
722 xs.iter().multi_cartesian_product().fold((), |(), x| {
723 sum += x[0];
724 sum += x[1];
725 sum += x[2];
726 });
727 sum
728 })
729 });
730}
731
732fn cartesian_product_nested_for(c: &mut Criterion) {
733 let xs = vec![0; 16];
734
735 c.bench_function("cartesian product nested for", move |b| {
736 b.iter(|| {
737 let mut sum = 0;
738 for &x in &xs {
739 for &y in &xs {
740 for &z in &xs {
741 sum += x;
742 sum += y;
743 sum += z;
744 }
745 }
746 }
747 sum
748 })
749 });
750}
751
752fn all_equal(c: &mut Criterion) {
753 let mut xs = vec![0; 5_000_000];
754 xs.extend(vec![1; 5_000_000]);
755
756 c.bench_function("all equal", move |b| {
757 b.iter(|| xs.iter().all_equal())
758 });
759}
760
761fn all_equal_for(c: &mut Criterion) {
762 let mut xs = vec![0; 5_000_000];
763 xs.extend(vec![1; 5_000_000]);
764
765 c.bench_function("all equal for", move |b| {
766 b.iter(|| {
767 for &x in &xs {
768 if x != xs[0] {
769 return false;
770 }
771 }
772 true
773 })
774 });
775}
776
777fn all_equal_default(c: &mut Criterion) {
778 let mut xs = vec![0; 5_000_000];
779 xs.extend(vec![1; 5_000_000]);
780
781 c.bench_function("all equal default", move |b| {
782 b.iter(|| xs.iter().dedup().nth(1).is_none())
783 });
784}
785
786const PERM_COUNT: usize = 6;
787
788fn permutations_iter(c: &mut Criterion) {
789 struct NewIterator(Range<usize>);
790
791 impl Iterator for NewIterator {
792 type Item = usize;
793
794 fn next(&mut self) -> Option<Self::Item> {
795 self.0.next()
796 }
797 }
798
799 c.bench_function("permutations iter", move |b| {
800 b.iter(|| {
801 for _ in NewIterator(0..PERM_COUNT).permutations(PERM_COUNT) {
802
803 }
804 })
805 });
806}
807
808fn permutations_range(c: &mut Criterion) {
809 c.bench_function("permutations range", move |b| {
810 b.iter(|| {
811 for _ in (0..PERM_COUNT).permutations(PERM_COUNT) {
812
813 }
814 })
815 });
816}
817
818fn permutations_slice(c: &mut Criterion) {
819 let v = (0..PERM_COUNT).collect_vec();
820
821 c.bench_function("permutations slice", move |b| {
822 b.iter(|| {
823 for _ in v.as_slice().iter().permutations(PERM_COUNT) {
824
825 }
826 })
827 });
828}
829
830criterion_group!(
831 benches,
832 slice_iter,
833 slice_iter_rev,
834 zip_default_zip,
835 zipdot_i32_default_zip,
836 zipdot_f32_default_zip,
837 zip_default_zip3,
838 zip_slices_ziptuple,
839 zipslices,
840 zipslices_mut,
841 zipdot_i32_zipslices,
842 zipdot_f32_zipslices,
843 zip_checked_counted_loop,
844 zipdot_i32_checked_counted_loop,
845 zipdot_f32_checked_counted_loop,
846 zipdot_f32_checked_counted_unrolled_loop,
847 zip_unchecked_counted_loop,
848 zipdot_i32_unchecked_counted_loop,
849 zipdot_f32_unchecked_counted_loop,
850 zip_unchecked_counted_loop3,
851 group_by_lazy_1,
852 group_by_lazy_2,
853 slice_chunks,
854 chunks_lazy_1,
855 equal,
856 merge_default,
857 merge_by_cmp,
858 merge_by_lt,
859 kmerge_default,
860 kmerge_tenway,
861 step_vec_2,
862 step_vec_10,
863 step_range_2,
864 step_range_10,
865 cartesian_product_iterator,
866 cartesian_product_fold,
867 multi_cartesian_product_iterator,
868 multi_cartesian_product_fold,
869 cartesian_product_nested_for,
870 all_equal,
871 all_equal_for,
872 all_equal_default,
873 permutations_iter,
874 permutations_range,
875 permutations_slice,
876);
877criterion_main!(benches);
878