1  use std::iter::IntoIterator;

2  use std::time::Duration;

3  use std::time::Instant;

4 

5  use crate::black_box;

6  use crate::measurement::{Measurement, WallTime};

7  use crate::BatchSize;

8 

9  #[cfg (feature = "async" )]

10  use std::future::Future;

11 

12  #[cfg (feature = "async" )]

13  use crate::async_executor::AsyncExecutor;

14 

15  // ================================== MAINTENANCE NOTE =============================================

16  // Any changes made to either Bencher or AsyncBencher will have to be replicated to the other!

17  // ================================== MAINTENANCE NOTE =============================================

18 

19  /// Timer struct used to iterate a benchmarked function and measure the runtime.

20  ///

21  /// This struct provides different timing loops as methods. Each timing loop provides a different

22  /// way to time a routine and each has advantages and disadvantages.

23  ///

24  /// * If you want to do the iteration and measurement yourself (eg. passing the iteration count

25  /// to a separate process), use `iter_custom`.

26  /// * If your routine requires no periteration setup and returns a value with an expensive `drop`

27  /// method, use `iter_with_large_drop`.

28  /// * If your routine requires some periteration setup that shouldn't be timed, use `iter_batched`

29  /// or `iter_batched_ref`. See [`BatchSize`](enum.BatchSize.html) for a discussion of batch sizes.

30  /// If the setup value implements `Drop` and you don't want to include the `drop` time in the

31  /// measurement, use `iter_batched_ref`, otherwise use `iter_batched`. These methods are also

32  /// suitable for benchmarking routines which return a value with an expensive `drop` method,

33  /// but are more complex than `iter_with_large_drop`.

34  /// * Otherwise, use `iter`.

35  pub struct Bencher<'a, M: Measurement = WallTime> {

36  pub(crate) iterated: bool, // Have we iterated this benchmark?

37  pub(crate) iters: u64, // Number of times to iterate this benchmark

38  pub(crate) value: M::Value, // The measured value

39  pub(crate) measurement: &'a M, // Reference to the measurement object

40  pub(crate) elapsed_time: Duration, // How much time did it take to perform the iteration? Used for the warmup period.

41  }

42  impl<'a, M: Measurement> Bencher<'a, M> {

43  /// Times a `routine` by executing it many times and timing the total elapsed time.

44  ///

45  /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.

46  ///

47  /// # Timing model

48  ///

49  /// Note that the `Bencher` also times the time required to destroy the output of `routine()`.

50  /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared

51  /// to the runtime of the `routine`.

52  ///

53  /// ```text

54  /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)

55  /// ```

56  ///

57  /// # Example

58  ///

59  /// ```rust

60  /// #[macro_use] extern crate criterion;

61  ///

62  /// use criterion::*;

63  ///

64  /// // The function to benchmark

65  /// fn foo() {

66  /// // ...

67  /// }

68  ///

69  /// fn bench(c: &mut Criterion) {

70  /// c.bench_function("iter" , move b {

71  /// b.iter( foo())

72  /// });

73  /// }

74  ///

75  /// criterion_group!(benches, bench);

76  /// criterion_main!(benches);

77  /// ```

78  ///

79  #[inline (never)]

80  pub fn iter<O, R>(&mut self, mut routine: R)

81  where

82  R: FnMut() > O,

83  {

84  self.iterated = true;

85  let time_start = Instant::now();

86  let start = self.measurement.start();

87  for _ in 0..self.iters {

88  black_box(routine());

89  }

90  self.value = self.measurement.end(start);

91  self.elapsed_time = time_start.elapsed();

92  }

93 

94  /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.

95  ///

96  /// Prefer this timing loop in cases where `routine` has to do its own measurements to

97  /// get accurate timing information (for example in multithreaded scenarios where you spawn

98  /// and coordinate with multiple threads).

99  ///

100  /// # Timing model

101  /// Custom, the timing model is whatever is returned as the Duration from `routine`.

102  ///

103  /// # Example

104  /// ```rust

105  /// #[macro_use] extern crate criterion;

106  /// use criterion::*;

107  /// use criterion::black_box;

108  /// use std::time::Instant;

109  ///

110  /// fn foo() {

111  /// // ...

112  /// }

113  ///

114  /// fn bench(c: &mut Criterion) {

115  /// c.bench_function("iter" , move b {

116  /// b.iter_custom(iters {

117  /// let start = Instant::now();

118  /// for _i in 0..iters {

119  /// black_box(foo());

120  /// }

121  /// start.elapsed()

122  /// })

123  /// });

124  /// }

125  ///

126  /// criterion_group!(benches, bench);

127  /// criterion_main!(benches);

128  /// ```

129  ///

130  #[inline (never)]

131  pub fn iter_custom<R>(&mut self, mut routine: R)

132  where

133  R: FnMut(u64) > M::Value,

134  {

135  self.iterated = true;

136  let time_start = Instant::now();

137  self.value = routine(self.iters);

138  self.elapsed_time = time_start.elapsed();

139  }

140 

141  #[doc (hidden)]

142  pub fn iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R)

143  where

144  S: FnMut() > I,

145  R: FnMut(I) > O,

146  {

147  self.iter_batched(setup, routine, BatchSize::PerIteration);

148  }

149 

150  /// Times a `routine` by collecting its output on each iteration. This avoids timing the

151  /// destructor of the value returned by `routine`.

152  ///

153  /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not under the

154  /// control of the caller. If this causes outofmemory errors, use `iter_batched` instead.

155  ///

156  /// # Timing model

157  ///

158  /// ``` text

159  /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>

160  /// ```

161  ///

162  /// # Example

163  ///

164  /// ```rust

165  /// #[macro_use] extern crate criterion;

166  ///

167  /// use criterion::*;

168  ///

169  /// fn create_vector() > Vec<u64> {

170  /// # vec![]

171  /// // ...

172  /// }

173  ///

174  /// fn bench(c: &mut Criterion) {

175  /// c.bench_function("with_drop" , move b {

176  /// // This will avoid timing the Vec::drop.

177  /// b.iter_with_large_drop( create_vector())

178  /// });

179  /// }

180  ///

181  /// criterion_group!(benches, bench);

182  /// criterion_main!(benches);

183  /// ```

184  ///

185  pub fn iter_with_large_drop<O, R>(&mut self, mut routine: R)

186  where

187  R: FnMut() > O,

188  {

189  self.iter_batched( (), _ routine(), BatchSize::SmallInput);

190  }

191 

192  /// Times a `routine` that requires some input by generating a batch of input, then timing the

193  /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for

194  /// details on choosing the batch size. Use this when the routine must consume its input.

195  ///

196  /// For example, use this loop to benchmark sorting algorithms, because they require unsorted

197  /// data on each iteration.

198  ///

199  /// # Timing model

200  ///

201  /// ```text

202  /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend

203  /// ```

204  ///

205  /// # Example

206  ///

207  /// ```rust

208  /// #[macro_use] extern crate criterion;

209  ///

210  /// use criterion::*;

211  ///

212  /// fn create_scrambled_data() > Vec<u64> {

213  /// # vec![]

214  /// // ...

215  /// }

216  ///

217  /// // The sorting algorithm to test

218  /// fn sort(data: &mut [u64]) {

219  /// // ...

220  /// }

221  ///

222  /// fn bench(c: &mut Criterion) {

223  /// let data = create_scrambled_data();

224  ///

225  /// c.bench_function("with_setup" , move b {

226  /// // This will avoid timing the to_vec call.

227  /// b.iter_batched( data.clone(), mut data sort(&mut data), BatchSize::SmallInput)

228  /// });

229  /// }

230  ///

231  /// criterion_group!(benches, bench);

232  /// criterion_main!(benches);

233  /// ```

234  ///

235  #[inline (never)]

236  pub fn iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)

237  where

238  S: FnMut() > I,

239  R: FnMut(I) > O,

240  {

241  self.iterated = true;

242  let batch_size = size.iters_per_batch(self.iters);

243  assert!(batch_size != 0, "Batch size must not be zero." );

244  let time_start = Instant::now();

245  self.value = self.measurement.zero();

246 

247  if batch_size == 1 {

248  for _ in 0..self.iters {

249  let input = black_box(setup());

250 

251  let start = self.measurement.start();

252  let output = routine(input);

253  let end = self.measurement.end(start);

254  self.value = self.measurement.add(&self.value, &end);

255 

256  drop(black_box(output));

257  }

258  } else {

259  let mut iteration_counter = 0;

260 

261  while iteration_counter < self.iters {

262  let batch_size = ::std::cmp::min(batch_size, self.iters  iteration_counter);

263 

264  let inputs = black_box((0..batch_size).map(_ setup()).collect::<Vec<_>>());

265  let mut outputs = Vec::with_capacity(batch_size as usize);

266 

267  let start = self.measurement.start();

268  outputs.extend(inputs.into_iter().map(&mut routine));

269  let end = self.measurement.end(start);

270  self.value = self.measurement.add(&self.value, &end);

271 

272  black_box(outputs);

273 

274  iteration_counter += batch_size;

275  }

276  }

277 

278  self.elapsed_time = time_start.elapsed();

279  }

280 

281  /// Times a `routine` that requires some input by generating a batch of input, then timing the

282  /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for

283  /// details on choosing the batch size. Use this when the routine should accept the input by

284  /// mutable reference.

285  ///

286  /// For example, use this loop to benchmark sorting algorithms, because they require unsorted

287  /// data on each iteration.

288  ///

289  /// # Timing model

290  ///

291  /// ```text

292  /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend

293  /// ```

294  ///

295  /// # Example

296  ///

297  /// ```rust

298  /// #[macro_use] extern crate criterion;

299  ///

300  /// use criterion::*;

301  ///

302  /// fn create_scrambled_data() > Vec<u64> {

303  /// # vec![]

304  /// // ...

305  /// }

306  ///

307  /// // The sorting algorithm to test

308  /// fn sort(data: &mut [u64]) {

309  /// // ...

310  /// }

311  ///

312  /// fn bench(c: &mut Criterion) {

313  /// let data = create_scrambled_data();

314  ///

315  /// c.bench_function("with_setup" , move b {

316  /// // This will avoid timing the to_vec call.

317  /// b.iter_batched( data.clone(), mut data sort(&mut data), BatchSize::SmallInput)

318  /// });

319  /// }

320  ///

321  /// criterion_group!(benches, bench);

322  /// criterion_main!(benches);

323  /// ```

324  ///

325  #[inline (never)]

326  pub fn iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)

327  where

328  S: FnMut() > I,

329  R: FnMut(&mut I) > O,

330  {

331  self.iterated = true;

332  let batch_size = size.iters_per_batch(self.iters);

333  assert!(batch_size != 0, "Batch size must not be zero." );

334  let time_start = Instant::now();

335  self.value = self.measurement.zero();

336 

337  if batch_size == 1 {

338  for _ in 0..self.iters {

339  let mut input = black_box(setup());

340 

341  let start = self.measurement.start();

342  let output = routine(&mut input);

343  let end = self.measurement.end(start);

344  self.value = self.measurement.add(&self.value, &end);

345 

346  drop(black_box(output));

347  drop(black_box(input));

348  }

349  } else {

350  let mut iteration_counter = 0;

351 

352  while iteration_counter < self.iters {

353  let batch_size = ::std::cmp::min(batch_size, self.iters  iteration_counter);

354 

355  let mut inputs = black_box((0..batch_size).map(_ setup()).collect::<Vec<_>>());

356  let mut outputs = Vec::with_capacity(batch_size as usize);

357 

358  let start = self.measurement.start();

359  outputs.extend(inputs.iter_mut().map(&mut routine));

360  let end = self.measurement.end(start);

361  self.value = self.measurement.add(&self.value, &end);

362 

363  black_box(outputs);

364 

365  iteration_counter += batch_size;

366  }

367  }

368  self.elapsed_time = time_start.elapsed();

369  }

370 

371  // Benchmarks must actually call one of the iter methods. This causes benchmarks to fail loudly

372  // if they don't.

373  pub(crate) fn assert_iterated(&mut self) {

374  assert!(

375  self.iterated,

376  "Benchmark function must call Bencher::iter or related method."

377  );

378  self.iterated = false;

379  }

380 

381  /// Convert this bencher into an AsyncBencher, which enables async/await support.

382  #[cfg (feature = "async" )]

383  pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) > AsyncBencher<'a, 'b, A, M> {

384  AsyncBencher { b: self, runner }

385  }

386  }

387 

388  /// Async/await variant of the Bencher struct.

389  #[cfg (feature = "async" )]

390  pub struct AsyncBencher<'a, 'b, A: AsyncExecutor, M: Measurement = WallTime> {

391  b: &'b mut Bencher<'a, M>,

392  runner: A,

393  }

394  #[cfg (feature = "async" )]

395  impl<'a, 'b, A: AsyncExecutor, M: Measurement> AsyncBencher<'a, 'b, A, M> {

396  /// Times a `routine` by executing it many times and timing the total elapsed time.

397  ///

398  /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.

399  ///

400  /// # Timing model

401  ///

402  /// Note that the `AsyncBencher` also times the time required to destroy the output of `routine()`.

403  /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared

404  /// to the runtime of the `routine`.

405  ///

406  /// ```text

407  /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)

408  /// ```

409  ///

410  /// # Example

411  ///

412  /// ```rust

413  /// #[macro_use] extern crate criterion;

414  ///

415  /// use criterion::*;

416  /// use criterion::async_executor::FuturesExecutor;

417  ///

418  /// // The function to benchmark

419  /// async fn foo() {

420  /// // ...

421  /// }

422  ///

423  /// fn bench(c: &mut Criterion) {

424  /// c.bench_function("iter", move b {

425  /// b.to_async(FuturesExecutor).iter( async { foo().await } )

426  /// });

427  /// }

428  ///

429  /// criterion_group!(benches, bench);

430  /// criterion_main!(benches);

431  /// ```

432  ///

433  #[inline (never)]

434  pub fn iter<O, R, F>(&mut self, mut routine: R)

435  where

436  R: FnMut() > F,

437  F: Future<Output = O>,

438  {

439  let AsyncBencher { b, runner } = self;

440  runner.block_on(async {

441  b.iterated = true;

442  let time_start = Instant::now();

443  let start = b.measurement.start();

444  for _ in 0..b.iters {

445  black_box(routine().await);

446  }

447  b.value = b.measurement.end(start);

448  b.elapsed_time = time_start.elapsed();

449  });

450  }

451 

452  /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.

453  ///

454  /// Prefer this timing loop in cases where `routine` has to do its own measurements to

455  /// get accurate timing information (for example in multithreaded scenarios where you spawn

456  /// and coordinate with multiple threads).

457  ///

458  /// # Timing model

459  /// Custom, the timing model is whatever is returned as the Duration from `routine`.

460  ///

461  /// # Example

462  /// ```rust

463  /// #[macro_use] extern crate criterion;

464  /// use criterion::*;

465  /// use criterion::black_box;

466  /// use criterion::async_executor::FuturesExecutor;

467  /// use std::time::Instant;

468  ///

469  /// async fn foo() {

470  /// // ...

471  /// }

472  ///

473  /// fn bench(c: &mut Criterion) {

474  /// c.bench_function("iter", move b {

475  /// b.to_async(FuturesExecutor).iter_custom(iters {

476  /// async move {

477  /// let start = Instant::now();

478  /// for _i in 0..iters {

479  /// black_box(foo().await);

480  /// }

481  /// start.elapsed()

482  /// }

483  /// })

484  /// });

485  /// }

486  ///

487  /// criterion_group!(benches, bench);

488  /// criterion_main!(benches);

489  /// ```

490  ///

491  #[inline (never)]

492  pub fn iter_custom<R, F>(&mut self, mut routine: R)

493  where

494  R: FnMut(u64) > F,

495  F: Future<Output = M::Value>,

496  {

497  let AsyncBencher { b, runner } = self;

498  runner.block_on(async {

499  b.iterated = true;

500  let time_start = Instant::now();

501  b.value = routine(b.iters).await;

502  b.elapsed_time = time_start.elapsed();

503  })

504  }

505 

506  #[doc (hidden)]

507  pub fn iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)

508  where

509  S: FnMut() > I,

510  R: FnMut(I) > F,

511  F: Future<Output = O>,

512  {

513  self.iter_batched(setup, routine, BatchSize::PerIteration);

514  }

515 

516  /// Times a `routine` by collecting its output on each iteration. This avoids timing the

517  /// destructor of the value returned by `routine`.

518  ///

519  /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not under the

520  /// control of the caller. If this causes outofmemory errors, use `iter_batched` instead.

521  ///

522  /// # Timing model

523  ///

524  /// ``` text

525  /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>

526  /// ```

527  ///

528  /// # Example

529  ///

530  /// ```rust

531  /// #[macro_use] extern crate criterion;

532  ///

533  /// use criterion::*;

534  /// use criterion::async_executor::FuturesExecutor;

535  ///

536  /// async fn create_vector() > Vec<u64> {

537  /// # vec![]

538  /// // ...

539  /// }

540  ///

541  /// fn bench(c: &mut Criterion) {

542  /// c.bench_function("with_drop", move b {

543  /// // This will avoid timing the Vec::drop.

544  /// b.to_async(FuturesExecutor).iter_with_large_drop( async { create_vector().await })

545  /// });

546  /// }

547  ///

548  /// criterion_group!(benches, bench);

549  /// criterion_main!(benches);

550  /// ```

551  ///

552  pub fn iter_with_large_drop<O, R, F>(&mut self, mut routine: R)

553  where

554  R: FnMut() > F,

555  F: Future<Output = O>,

556  {

557  self.iter_batched( (), _ routine(), BatchSize::SmallInput);

558  }

559 

560  #[doc (hidden)]

561  pub fn iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)

562  where

563  S: FnMut() > I,

564  R: FnMut(I) > F,

565  F: Future<Output = O>,

566  {

567  self.iter_batched(setup, routine, BatchSize::NumBatches(1));

568  }

569 

570  /// Times a `routine` that requires some input by generating a batch of input, then timing the

571  /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for

572  /// details on choosing the batch size. Use this when the routine must consume its input.

573  ///

574  /// For example, use this loop to benchmark sorting algorithms, because they require unsorted

575  /// data on each iteration.

576  ///

577  /// # Timing model

578  ///

579  /// ```text

580  /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend

581  /// ```

582  ///

583  /// # Example

584  ///

585  /// ```rust

586  /// #[macro_use] extern crate criterion;

587  ///

588  /// use criterion::*;

589  /// use criterion::async_executor::FuturesExecutor;

590  ///

591  /// fn create_scrambled_data() > Vec<u64> {

592  /// # vec![]

593  /// // ...

594  /// }

595  ///

596  /// // The sorting algorithm to test

597  /// async fn sort(data: &mut [u64]) {

598  /// // ...

599  /// }

600  ///

601  /// fn bench(c: &mut Criterion) {

602  /// let data = create_scrambled_data();

603  ///

604  /// c.bench_function("with_setup", move b {

605  /// // This will avoid timing the to_vec call.

606  /// b.iter_batched( data.clone(), mut data async move { sort(&mut data).await }, BatchSize::SmallInput)

607  /// });

608  /// }

609  ///

610  /// criterion_group!(benches, bench);

611  /// criterion_main!(benches);

612  /// ```

613  ///

614  #[inline (never)]

615  pub fn iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)

616  where

617  S: FnMut() > I,

618  R: FnMut(I) > F,

619  F: Future<Output = O>,

620  {

621  let AsyncBencher { b, runner } = self;

622  runner.block_on(async {

623  b.iterated = true;

624  let batch_size = size.iters_per_batch(b.iters);

625  assert!(batch_size != 0, "Batch size must not be zero." );

626  let time_start = Instant::now();

627  b.value = b.measurement.zero();

628 

629  if batch_size == 1 {

630  for _ in 0..b.iters {

631  let input = black_box(setup());

632 

633  let start = b.measurement.start();

634  let output = routine(input).await;

635  let end = b.measurement.end(start);

636  b.value = b.measurement.add(&b.value, &end);

637 

638  drop(black_box(output));

639  }

640  } else {

641  let mut iteration_counter = 0;

642 

643  while iteration_counter < b.iters {

644  let batch_size = ::std::cmp::min(batch_size, b.iters  iteration_counter);

645 

646  let inputs = black_box((0..batch_size).map(_ setup()).collect::<Vec<_>>());

647  let mut outputs = Vec::with_capacity(batch_size as usize);

648 

649  let start = b.measurement.start();

650  // Can't use .extend here like the sync version does

651  for input in inputs {

652  outputs.push(routine(input).await);

653  }

654  let end = b.measurement.end(start);

655  b.value = b.measurement.add(&b.value, &end);

656 

657  black_box(outputs);

658 

659  iteration_counter += batch_size;

660  }

661  }

662 

663  b.elapsed_time = time_start.elapsed();

664  })

665  }

666 

667  /// Times a `routine` that requires some input by generating a batch of input, then timing the

668  /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for

669  /// details on choosing the batch size. Use this when the routine should accept the input by

670  /// mutable reference.

671  ///

672  /// For example, use this loop to benchmark sorting algorithms, because they require unsorted

673  /// data on each iteration.

674  ///

675  /// # Timing model

676  ///

677  /// ```text

678  /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend

679  /// ```

680  ///

681  /// # Example

682  ///

683  /// ```rust

684  /// #[macro_use] extern crate criterion;

685  ///

686  /// use criterion::*;

687  /// use criterion::async_executor::FuturesExecutor;

688  ///

689  /// fn create_scrambled_data() > Vec<u64> {

690  /// # vec![]

691  /// // ...

692  /// }

693  ///

694  /// // The sorting algorithm to test

695  /// async fn sort(data: &mut [u64]) {

696  /// // ...

697  /// }

698  ///

699  /// fn bench(c: &mut Criterion) {

700  /// let data = create_scrambled_data();

701  ///

702  /// c.bench_function("with_setup", move b {

703  /// // This will avoid timing the to_vec call.

704  /// b.iter_batched( data.clone(), mut data async move { sort(&mut data).await }, BatchSize::SmallInput)

705  /// });

706  /// }

707  ///

708  /// criterion_group!(benches, bench);

709  /// criterion_main!(benches);

710  /// ```

711  ///

712  #[inline (never)]

713  pub fn iter_batched_ref<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)

714  where

715  S: FnMut() > I,

716  R: FnMut(&mut I) > F,

717  F: Future<Output = O>,

718  {

719  let AsyncBencher { b, runner } = self;

720  runner.block_on(async {

721  b.iterated = true;

722  let batch_size = size.iters_per_batch(b.iters);

723  assert!(batch_size != 0, "Batch size must not be zero." );

724  let time_start = Instant::now();

725  b.value = b.measurement.zero();

726 

727  if batch_size == 1 {

728  for _ in 0..b.iters {

729  let mut input = black_box(setup());

730 

731  let start = b.measurement.start();

732  let output = routine(&mut input).await;

733  let end = b.measurement.end(start);

734  b.value = b.measurement.add(&b.value, &end);

735 

736  drop(black_box(output));

737  drop(black_box(input));

738  }

739  } else {

740  let mut iteration_counter = 0;

741 

742  while iteration_counter < b.iters {

743  let batch_size = ::std::cmp::min(batch_size, b.iters  iteration_counter);

744 

745  let inputs = black_box((0..batch_size).map(_ setup()).collect::<Vec<_>>());

746  let mut outputs = Vec::with_capacity(batch_size as usize);

747 

748  let start = b.measurement.start();

749  // Can't use .extend here like the sync version does

750  for mut input in inputs {

751  outputs.push(routine(&mut input).await);

752  }

753  let end = b.measurement.end(start);

754  b.value = b.measurement.add(&b.value, &end);

755 

756  black_box(outputs);

757 

758  iteration_counter += batch_size;

759  }

760  }

761  b.elapsed_time = time_start.elapsed();

762  });

763  }

764  }

765  