bencher.rs - Codebrowser

1	use std::iter::IntoIterator;
2	use std::time::Duration;
3	use std::time::Instant;
4
5	use crate::black_box;
6	use crate::measurement::{Measurement, WallTime};
7	use crate::BatchSize;
8
9	#[cfg(feature = "async")]
10	use std::future::Future;
11
12	#[cfg(feature = "async")]
13	use crate::async_executor::AsyncExecutor;
14
15	// ================================== MAINTENANCE NOTE =============================================
16	// Any changes made to either Bencher or AsyncBencher will have to be replicated to the other!
17	// ================================== MAINTENANCE NOTE =============================================
18
19	/// Timer struct used to iterate a benchmarked function and measure the runtime.
20	///
21	/// This struct provides different timing loops as methods. Each timing loop provides a different
22	/// way to time a routine and each has advantages and disadvantages.
23	///
24	/// If you want to do the iteration and measurement yourself (eg. passing the iteration count*
25	/// to a separate process), use `iter_custom`.
26	/// If your routine requires no per-iteration setup and returns a value with an expensive `drop`*
27	/// method, use `iter_with_large_drop`.
28	/// If your routine requires some per-iteration setup that shouldn't be timed, use `iter_batched`*
29	/// or `iter_batched_ref`. See [`BatchSize`](enum.BatchSize.html) for a discussion of batch sizes.
30	/// If the setup value implements `Drop` and you don't want to include the `drop` time in the
31	/// measurement, use `iter_batched_ref`, otherwise use `iter_batched`. These methods are also
32	/// suitable for benchmarking routines which return a value with an expensive `drop` method,
33	/// but are more complex than `iter_with_large_drop`.
34	/// Otherwise, use `iter`.*
35	pub struct Bencher<'a, M: Measurement = WallTime> {
36	pub(crate) iterated: bool, // Have we iterated this benchmark?
37	pub(crate) iters: u64, // Number of times to iterate this benchmark
38	pub(crate) value: M::Value, // The measured value
39	pub(crate) measurement: &'a M, // Reference to the measurement object
40	pub(crate) elapsed_time: Duration, // How much time did it take to perform the iteration? Used for the warmup period.
41	}
42	impl<'a, M: Measurement> Bencher<'a, M> {
43	/// Times a `routine` by executing it many times and timing the total elapsed time.
44	///
45	/// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
46	///
47	/// # Timing model
48	///
49	/// Note that the `Bencher` also times the time required to destroy the output of `routine()`.
50	/// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
51	/// to the runtime of the `routine`.
52	///
53	/// ```text
54	/// elapsed = Instant::now + iters (routine + mem::drop(O) + Range::next)*
55	/// ```
56	///
57	/// # Example
58	///
59	/// ```rust
60	/// #[macro_use] extern crate criterion;
61	///
62	/// use criterion::*;
63	///
64	/// // The function to benchmark
65	/// fn foo() {
66	/// // ...
67	/// }
68	///
69	/// fn bench(c: &mut Criterion) {
70	/// c.bench_function("iter", move \|b\| {
71	/// b.iter(\|\| foo())
72	/// });
73	/// }
74	///
75	/// criterion_group!(benches, bench);
76	/// criterion_main!(benches);
77	/// ```
78	///
79	#[inline(never)]
80	pub fn iter<O, R>(&mut self, mut routine: R)
81	where
82	R: FnMut() -> O,
83	{
84	self.iterated = `true`;
85	let time_start = Instant::now();
86	let start = self.measurement.start();
87	for _ in `0`..self.iters {
88	black_box(routine());
89	}
90	self.value = self.measurement.end(start);
91	self.elapsed_time = time_start.elapsed();
92	}
93
94	/// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
95	///
96	/// Prefer this timing loop in cases where `routine` has to do its own measurements to
97	/// get accurate timing information (for example in multi-threaded scenarios where you spawn
98	/// and coordinate with multiple threads).
99	///
100	/// # Timing model
101	/// Custom, the timing model is whatever is returned as the Duration from `routine`.
102	///
103	/// # Example
104	/// ```rust
105	/// #[macro_use] extern crate criterion;
106	/// use criterion::*;
107	/// use criterion::black_box;
108	/// use std::time::Instant;
109	///
110	/// fn foo() {
111	/// // ...
112	/// }
113	///
114	/// fn bench(c: &mut Criterion) {
115	/// c.bench_function("iter", move \|b\| {
116	/// b.iter_custom(\|iters\| {
117	/// let start = Instant::now();
118	/// for _i in `0`..iters {
119	/// black_box(foo());
120	/// }
121	/// start.elapsed()
122	/// })
123	/// });
124	/// }
125	///
126	/// criterion_group!(benches, bench);
127	/// criterion_main!(benches);
128	/// ```
129	///
130	#[inline(never)]
131	pub fn iter_custom<R>(&mut self, mut routine: R)
132	where
133	R: FnMut(u64) -> M::Value,
134	{
135	self.iterated = `true`;
136	let time_start = Instant::now();
137	self.value = routine(self.iters);
138	self.elapsed_time = time_start.elapsed();
139	}
140
141	#[doc(hidden)]
142	pub fn iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R)
143	where
144	S: FnMut() -> I,
145	R: FnMut(I) -> O,
146	{
147	self.iter_batched(setup, routine, BatchSize::PerIteration);
148	}
149
150	/// Times a `routine` by collecting its output on each iteration. This avoids timing the
151	/// destructor of the value returned by `routine`.
152	///
153	/// WARNING: This requires `O(iters mem::size_of::<O>())` of memory, and `iters` is not under the*
154	/// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead.
155	///
156	/// # Timing model
157	///
158	/// ``` text
159	/// elapsed = Instant::now + iters (routine) + Iterator::collect::<Vec<_>>*
160	/// ```
161	///
162	/// # Example
163	///
164	/// ```rust
165	/// #[macro_use] extern crate criterion;
166	///
167	/// use criterion::*;
168	///
169	/// fn create_vector() -> Vec<u64> {
170	/// # vec![]
171	/// // ...
172	/// }
173	///
174	/// fn bench(c: &mut Criterion) {
175	/// c.bench_function("with_drop", move \|b\| {
176	/// // This will avoid timing the Vec::drop.
177	/// b.iter_with_large_drop(\|\| create_vector())
178	/// });
179	/// }
180	///
181	/// criterion_group!(benches, bench);
182	/// criterion_main!(benches);
183	/// ```
184	///
185	pub fn iter_with_large_drop<O, R>(&mut self, mut routine: R)
186	where
187	R: FnMut() -> O,
188	{
189	self.iter_batched(\|\| (), \|_\| routine(), BatchSize::SmallInput);
190	}
191
192	/// Times a `routine` that requires some input by generating a batch of input, then timing the
193	/// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
194	/// details on choosing the batch size. Use this when the routine must consume its input.
195	///
196	/// For example, use this loop to benchmark sorting algorithms, because they require unsorted
197	/// data on each iteration.
198	///
199	/// # Timing model
200	///
201	/// ```text
202	/// elapsed = (Instant::now num_batches) + (iters * (routine + O::drop)) + Vec::extend*
203	/// ```
204	///
205	/// # Example
206	///
207	/// ```rust
208	/// #[macro_use] extern crate criterion;
209	///
210	/// use criterion::*;
211	///
212	/// fn create_scrambled_data() -> Vec<u64> {
213	/// # vec![]
214	/// // ...
215	/// }
216	///
217	/// // The sorting algorithm to test
218	/// fn sort(data: &mut [u64]) {
219	/// // ...
220	/// }
221	///
222	/// fn bench(c: &mut Criterion) {
223	/// let data = create_scrambled_data();
224	///
225	/// c.bench_function("with_setup", move \|b\| {
226	/// // This will avoid timing the to_vec call.
227	/// b.iter_batched(\|\| data.clone(), \|mut data\| sort(&mut data), BatchSize::SmallInput)
228	/// });
229	/// }
230	///
231	/// criterion_group!(benches, bench);
232	/// criterion_main!(benches);
233	/// ```
234	///
235	#[inline(never)]
236	pub fn iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
237	where
238	S: FnMut() -> I,
239	R: FnMut(I) -> O,
240	{
241	self.iterated = `true`;
242	let batch_size = size.iters_per_batch(self.iters);
243	assert!(batch_size != `0`, "Batch size must not be zero.");
244	let time_start = Instant::now();
245	self.value = self.measurement.zero();
246
247	if batch_size == `1` {
248	for _ in `0`..self.iters {
249	let input = black_box(setup());
250
251	let start = self.measurement.start();
252	let output = routine(input);
253	let end = self.measurement.end(start);
254	self.value = self.measurement.add(&self.value, &end);
255
256	drop(black_box(output));
257	}
258	} else {
259	let mut iteration_counter = `0`;
260
261	while iteration_counter < self.iters {
262	let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
263
264	let inputs = black_box((`0`..batch_size).map(\|_\| setup()).collect::<Vec<_>>());
265	let mut outputs = Vec::with_capacity(batch_size as usize);
266
267	let start = self.measurement.start();
268	outputs.extend(inputs.into_iter().map(&mut routine));
269	let end = self.measurement.end(start);
270	self.value = self.measurement.add(&self.value, &end);
271
272	black_box(outputs);
273
274	iteration_counter += batch_size;
275	}
276	}
277
278	self.elapsed_time = time_start.elapsed();
279	}
280
281	/// Times a `routine` that requires some input by generating a batch of input, then timing the
282	/// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
283	/// details on choosing the batch size. Use this when the routine should accept the input by
284	/// mutable reference.
285	///
286	/// For example, use this loop to benchmark sorting algorithms, because they require unsorted
287	/// data on each iteration.
288	///
289	/// # Timing model
290	///
291	/// ```text
292	/// elapsed = (Instant::now num_batches) + (iters * routine) + Vec::extend*
293	/// ```
294	///
295	/// # Example
296	///
297	/// ```rust
298	/// #[macro_use] extern crate criterion;
299	///
300	/// use criterion::*;
301	///
302	/// fn create_scrambled_data() -> Vec<u64> {
303	/// # vec![]
304	/// // ...
305	/// }
306	///
307	/// // The sorting algorithm to test
308	/// fn sort(data: &mut [u64]) {
309	/// // ...
310	/// }
311	///
312	/// fn bench(c: &mut Criterion) {
313	/// let data = create_scrambled_data();
314	///
315	/// c.bench_function("with_setup", move \|b\| {
316	/// // This will avoid timing the to_vec call.
317	/// b.iter_batched(\|\| data.clone(), \|mut data\| sort(&mut data), BatchSize::SmallInput)
318	/// });
319	/// }
320	///
321	/// criterion_group!(benches, bench);
322	/// criterion_main!(benches);
323	/// ```
324	///
325	#[inline(never)]
326	pub fn iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
327	where
328	S: FnMut() -> I,
329	R: FnMut(&mut I) -> O,
330	{
331	self.iterated = `true`;
332	let batch_size = size.iters_per_batch(self.iters);
333	assert!(batch_size != `0`, "Batch size must not be zero.");
334	let time_start = Instant::now();
335	self.value = self.measurement.zero();
336
337	if batch_size == `1` {
338	for _ in `0`..self.iters {
339	let mut input = black_box(setup());
340
341	let start = self.measurement.start();
342	let output = routine(&mut input);
343	let end = self.measurement.end(start);
344	self.value = self.measurement.add(&self.value, &end);
345
346	drop(black_box(output));
347	drop(black_box(input));
348	}
349	} else {
350	let mut iteration_counter = `0`;
351
352	while iteration_counter < self.iters {
353	let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
354
355	let mut inputs = black_box((`0`..batch_size).map(\|_\| setup()).collect::<Vec<_>>());
356	let mut outputs = Vec::with_capacity(batch_size as usize);
357
358	let start = self.measurement.start();
359	outputs.extend(inputs.iter_mut().map(&mut routine));
360	let end = self.measurement.end(start);
361	self.value = self.measurement.add(&self.value, &end);
362
363	black_box(outputs);
364
365	iteration_counter += batch_size;
366	}
367	}
368	self.elapsed_time = time_start.elapsed();
369	}
370
371	// Benchmarks must actually call one of the iter methods. This causes benchmarks to fail loudly
372	// if they don't.
373	pub(crate) fn assert_iterated(&mut self) {
374	assert!(
375	self.iterated,
376	"Benchmark function must call Bencher::iter or related method."
377	);
378	self.iterated = `false`;
379	}
380
381	/// Convert this bencher into an AsyncBencher, which enables async/await support.
382	#[cfg(feature = "async")]
383	pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M> {
384	AsyncBencher { b: self, runner }
385	}
386	}
387
388	/// Async/await variant of the Bencher struct.
389	#[cfg(feature = "async")]
390	pub struct AsyncBencher<'a, 'b, A: AsyncExecutor, M: Measurement = WallTime> {
391	b: &'b mut Bencher<'a, M>,
392	runner: A,
393	}
394	#[cfg(feature = "async")]
395	impl<'a, 'b, A: AsyncExecutor, M: Measurement> AsyncBencher<'a, 'b, A, M> {
396	/// Times a `routine` by executing it many times and timing the total elapsed time.
397	///
398	/// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
399	///
400	/// # Timing model
401	///
402	/// Note that the `AsyncBencher` also times the time required to destroy the output of `routine()`.
403	/// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
404	/// to the runtime of the `routine`.
405	///
406	/// ```text
407	/// elapsed = Instant::now + iters (routine + mem::drop(O) + Range::next)*
408	/// ```
409	///
410	/// # Example
411	///
412	/// ```rust
413	/// #[macro_use] extern crate criterion;
414	///
415	/// use criterion::;*
416	/// use criterion::async_executor::FuturesExecutor;
417	///
418	/// // The function to benchmark
419	/// async fn foo() {
420	/// // ...
421	/// }
422	///
423	/// fn bench(c: &mut Criterion) {
424	/// c.bench_function("iter", move \|b\| {
425	/// b.to_async(FuturesExecutor).iter(\|\| async { foo().await } )
426	/// });
427	/// }
428	///
429	/// criterion_group!(benches, bench);
430	/// criterion_main!(benches);
431	/// ```
432	///
433	#[inline(never)]
434	pub fn iter<O, R, F>(&mut self, mut routine: R)
435	where
436	R: FnMut() -> F,
437	F: Future<Output = O>,
438	{
439	let AsyncBencher { b, runner } = self;
440	runner.block_on(async {
441	b.iterated = `true`;
442	let time_start = Instant::now();
443	let start = b.measurement.start();
444	for _ in `0`..b.iters {
445	black_box(routine().await);
446	}
447	b.value = b.measurement.end(start);
448	b.elapsed_time = time_start.elapsed();
449	});
450	}
451
452	/// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
453	///
454	/// Prefer this timing loop in cases where `routine` has to do its own measurements to
455	/// get accurate timing information (for example in multi-threaded scenarios where you spawn
456	/// and coordinate with multiple threads).
457	///
458	/// # Timing model
459	/// Custom, the timing model is whatever is returned as the Duration from `routine`.
460	///
461	/// # Example
462	/// ```rust
463	/// #[macro_use] extern crate criterion;
464	/// use criterion::;*
465	/// use criterion::black_box;
466	/// use criterion::async_executor::FuturesExecutor;
467	/// use std::time::Instant;
468	///
469	/// async fn foo() {
470	/// // ...
471	/// }
472	///
473	/// fn bench(c: &mut Criterion) {
474	/// c.bench_function("iter", move \|b\| {
475	/// b.to_async(FuturesExecutor).iter_custom(\|iters\| {
476	/// async move {
477	/// let start = Instant::now();
478	/// for _i in 0..iters {
479	/// black_box(foo().await);
480	/// }
481	/// start.elapsed()
482	/// }
483	/// })
484	/// });
485	/// }
486	///
487	/// criterion_group!(benches, bench);
488	/// criterion_main!(benches);
489	/// ```
490	///
491	#[inline(never)]
492	pub fn iter_custom<R, F>(&mut self, mut routine: R)
493	where
494	R: FnMut(u64) -> F,
495	F: Future<Output = M::Value>,
496	{
497	let AsyncBencher { b, runner } = self;
498	runner.block_on(async {
499	b.iterated = `true`;
500	let time_start = Instant::now();
501	b.value = routine(b.iters).await;
502	b.elapsed_time = time_start.elapsed();
503	})
504	}
505
506	#[doc(hidden)]
507	pub fn iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
508	where
509	S: FnMut() -> I,
510	R: FnMut(I) -> F,
511	F: Future<Output = O>,
512	{
513	self.iter_batched(setup, routine, BatchSize::PerIteration);
514	}
515
516	/// Times a `routine` by collecting its output on each iteration. This avoids timing the
517	/// destructor of the value returned by `routine`.
518	///
519	/// WARNING: This requires `O(iters mem::size_of::<O>())` of memory, and `iters` is not under the*
520	/// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead.
521	///
522	/// # Timing model
523	///
524	/// ``` text
525	/// elapsed = Instant::now + iters (routine) + Iterator::collect::<Vec<_>>*
526	/// ```
527	///
528	/// # Example
529	///
530	/// ```rust
531	/// #[macro_use] extern crate criterion;
532	///
533	/// use criterion::;*
534	/// use criterion::async_executor::FuturesExecutor;
535	///
536	/// async fn create_vector() -> Vec<u64> {
537	/// # vec![]
538	/// // ...
539	/// }
540	///
541	/// fn bench(c: &mut Criterion) {
542	/// c.bench_function("with_drop", move \|b\| {
543	/// // This will avoid timing the Vec::drop.
544	/// b.to_async(FuturesExecutor).iter_with_large_drop(\|\| async { create_vector().await })
545	/// });
546	/// }
547	///
548	/// criterion_group!(benches, bench);
549	/// criterion_main!(benches);
550	/// ```
551	///
552	pub fn iter_with_large_drop<O, R, F>(&mut self, mut routine: R)
553	where
554	R: FnMut() -> F,
555	F: Future<Output = O>,
556	{
557	self.iter_batched(\|\| (), \|_\| routine(), BatchSize::SmallInput);
558	}
559
560	#[doc(hidden)]
561	pub fn iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
562	where
563	S: FnMut() -> I,
564	R: FnMut(I) -> F,
565	F: Future<Output = O>,
566	{
567	self.iter_batched(setup, routine, BatchSize::NumBatches(`1`));
568	}
569
570	/// Times a `routine` that requires some input by generating a batch of input, then timing the
571	/// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
572	/// details on choosing the batch size. Use this when the routine must consume its input.
573	///
574	/// For example, use this loop to benchmark sorting algorithms, because they require unsorted
575	/// data on each iteration.
576	///
577	/// # Timing model
578	///
579	/// ```text
580	/// elapsed = (Instant::now num_batches) + (iters * (routine + O::drop)) + Vec::extend*
581	/// ```
582	///
583	/// # Example
584	///
585	/// ```rust
586	/// #[macro_use] extern crate criterion;
587	///
588	/// use criterion::;*
589	/// use criterion::async_executor::FuturesExecutor;
590	///
591	/// fn create_scrambled_data() -> Vec<u64> {
592	/// # vec![]
593	/// // ...
594	/// }
595	///
596	/// // The sorting algorithm to test
597	/// async fn sort(data: &mut [u64]) {
598	/// // ...
599	/// }
600	///
601	/// fn bench(c: &mut Criterion) {
602	/// let data = create_scrambled_data();
603	///
604	/// c.bench_function("with_setup", move \|b\| {
605	/// // This will avoid timing the to_vec call.
606	/// b.iter_batched(\|\| data.clone(), \|mut data\| async move { sort(&mut data).await }, BatchSize::SmallInput)
607	/// });
608	/// }
609	///
610	/// criterion_group!(benches, bench);
611	/// criterion_main!(benches);
612	/// ```
613	///
614	#[inline(never)]
615	pub fn iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
616	where
617	S: FnMut() -> I,
618	R: FnMut(I) -> F,
619	F: Future<Output = O>,
620	{
621	let AsyncBencher { b, runner } = self;
622	runner.block_on(async {
623	b.iterated = `true`;
624	let batch_size = size.iters_per_batch(b.iters);
625	assert!(batch_size != `0`, "Batch size must not be zero.");
626	let time_start = Instant::now();
627	b.value = b.measurement.zero();
628
629	if batch_size == `1` {
630	for _ in `0`..b.iters {
631	let input = black_box(setup());
632
633	let start = b.measurement.start();
634	let output = routine(input).await;
635	let end = b.measurement.end(start);
636	b.value = b.measurement.add(&b.value, &end);
637
638	drop(black_box(output));
639	}
640	} else {
641	let mut iteration_counter = `0`;
642
643	while iteration_counter < b.iters {
644	let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
645
646	let inputs = black_box((`0`..batch_size).map(\|_\| setup()).collect::<Vec<_>>());
647	let mut outputs = Vec::with_capacity(batch_size as usize);
648
649	let start = b.measurement.start();
650	// Can't use .extend here like the sync version does
651	for input in inputs {
652	outputs.push(routine(input).await);
653	}
654	let end = b.measurement.end(start);
655	b.value = b.measurement.add(&b.value, &end);
656
657	black_box(outputs);
658
659	iteration_counter += batch_size;
660	}
661	}
662
663	b.elapsed_time = time_start.elapsed();
664	})
665	}
666
667	/// Times a `routine` that requires some input by generating a batch of input, then timing the
668	/// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
669	/// details on choosing the batch size. Use this when the routine should accept the input by
670	/// mutable reference.
671	///
672	/// For example, use this loop to benchmark sorting algorithms, because they require unsorted
673	/// data on each iteration.
674	///
675	/// # Timing model
676	///
677	/// ```text
678	/// elapsed = (Instant::now num_batches) + (iters * routine) + Vec::extend*
679	/// ```
680	///
681	/// # Example
682	///
683	/// ```rust
684	/// #[macro_use] extern crate criterion;
685	///
686	/// use criterion::;*
687	/// use criterion::async_executor::FuturesExecutor;
688	///
689	/// fn create_scrambled_data() -> Vec<u64> {
690	/// # vec![]
691	/// // ...
692	/// }
693	///
694	/// // The sorting algorithm to test
695	/// async fn sort(data: &mut [u64]) {
696	/// // ...
697	/// }
698	///
699	/// fn bench(c: &mut Criterion) {
700	/// let data = create_scrambled_data();
701	///
702	/// c.bench_function("with_setup", move \|b\| {
703	/// // This will avoid timing the to_vec call.
704	/// b.iter_batched(\|\| data.clone(), \|mut data\| async move { sort(&mut data).await }, BatchSize::SmallInput)
705	/// });
706	/// }
707	///
708	/// criterion_group!(benches, bench);
709	/// criterion_main!(benches);
710	/// ```
711	///
712	#[inline(never)]
713	pub fn iter_batched_ref<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
714	where
715	S: FnMut() -> I,
716	R: FnMut(&mut I) -> F,
717	F: Future<Output = O>,
718	{
719	let AsyncBencher { b, runner } = self;
720	runner.block_on(async {
721	b.iterated = `true`;
722	let batch_size = size.iters_per_batch(b.iters);
723	assert!(batch_size != `0`, "Batch size must not be zero.");
724	let time_start = Instant::now();
725	b.value = b.measurement.zero();
726
727	if batch_size == `1` {
728	for _ in `0`..b.iters {
729	let mut input = black_box(setup());
730
731	let start = b.measurement.start();
732	let output = routine(&mut input).await;
733	let end = b.measurement.end(start);
734	b.value = b.measurement.add(&b.value, &end);
735
736	drop(black_box(output));
737	drop(black_box(input));
738	}
739	} else {
740	let mut iteration_counter = `0`;
741
742	while iteration_counter < b.iters {
743	let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
744
745	let inputs = black_box((`0`..batch_size).map(\|_\| setup()).collect::<Vec<_>>());
746	let mut outputs = Vec::with_capacity(batch_size as usize);
747
748	let start = b.measurement.start();
749	// Can't use .extend here like the sync version does
750	for mut input in inputs {
751	outputs.push(routine(&mut input).await);
752	}
753	let end = b.measurement.end(start);
754	b.value = b.measurement.add(&b.value, &end);
755
756	black_box(outputs);
757
758	iteration_counter += batch_size;
759	}
760	}
761	b.elapsed_time = time_start.elapsed();
762	});
763	}
764	}
765