| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | // UNSUPPORTED: c++03, c++11, c++14, c++17 |
| 10 | |
| 11 | #include <cstdint> |
| 12 | #include <optional> |
| 13 | #include <stop_token> |
| 14 | #include <thread> |
| 15 | |
| 16 | #include "benchmark/benchmark.h" |
| 17 | #include "make_test_thread.h" |
| 18 | |
| 19 | using namespace std::chrono_literals; |
| 20 | |
| 21 | // We have a single thread created by std::jthread consuming the stop_token: |
| 22 | // polling for stop_requested. |
| 23 | void BM_stop_token_single_thread_polling_stop_requested(benchmark::State& state) { |
| 24 | auto thread_func = [&](std::stop_token st, std::atomic<std::uint64_t>* loop_count) { |
| 25 | while (!st.stop_requested()) { |
| 26 | // doing some work |
| 27 | loop_count->fetch_add(1, std::memory_order_relaxed); |
| 28 | } |
| 29 | }; |
| 30 | |
| 31 | std::atomic<std::uint64_t> loop_count(0); |
| 32 | std::uint64_t total_loop_test_param = state.range(0); |
| 33 | |
| 34 | auto thread = support::make_test_jthread(thread_func, &loop_count); |
| 35 | |
| 36 | for (auto _ : state) { |
| 37 | auto start_total = loop_count.load(std::memory_order_relaxed); |
| 38 | |
| 39 | while (loop_count.load(std::memory_order_relaxed) - start_total < total_loop_test_param) { |
| 40 | std::this_thread::yield(); |
| 41 | } |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | BENCHMARK(BM_stop_token_single_thread_polling_stop_requested)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); |
| 46 | |
| 47 | // We have multiple threads polling for stop_requested of the same stop_token. |
| 48 | void BM_stop_token_multi_thread_polling_stop_requested(benchmark::State& state) { |
| 49 | std::atomic<bool> start{false}; |
| 50 | |
| 51 | auto thread_func = [&start](std::atomic<std::uint64_t>* loop_count, std::stop_token st) { |
| 52 | start.wait(false); |
| 53 | while (!st.stop_requested()) { |
| 54 | // doing some work |
| 55 | loop_count->fetch_add(1, std::memory_order_relaxed); |
| 56 | } |
| 57 | }; |
| 58 | |
| 59 | constexpr size_t thread_count = 20; |
| 60 | |
| 61 | std::uint64_t total_loop_test_param = state.range(0); |
| 62 | |
| 63 | std::vector<std::atomic<std::uint64_t>> loop_counts(thread_count); |
| 64 | std::stop_source ss; |
| 65 | std::vector<std::jthread> threads; |
| 66 | threads.reserve(thread_count); |
| 67 | |
| 68 | for (size_t i = 0; i < thread_count; ++i) { |
| 69 | threads.emplace_back(support::make_test_jthread(thread_func, &loop_counts[i], ss.get_token())); |
| 70 | } |
| 71 | |
| 72 | auto get_total_loop = [&loop_counts] { |
| 73 | std::uint64_t total = 0; |
| 74 | for (const auto& loop_count : loop_counts) { |
| 75 | total += loop_count.load(std::memory_order_relaxed); |
| 76 | } |
| 77 | return total; |
| 78 | }; |
| 79 | |
| 80 | start = true; |
| 81 | start.notify_all(); |
| 82 | |
| 83 | for (auto _ : state) { |
| 84 | auto start_total = get_total_loop(); |
| 85 | |
| 86 | while (get_total_loop() - start_total < total_loop_test_param) { |
| 87 | std::this_thread::yield(); |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | ss.request_stop(); |
| 92 | } |
| 93 | |
| 94 | BENCHMARK(BM_stop_token_multi_thread_polling_stop_requested)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); |
| 95 | |
| 96 | // We have a single thread created by std::jthread consuming the stop_token: |
| 97 | // registering/deregistering callbacks, one at a time. |
| 98 | void BM_stop_token_single_thread_reg_unreg_callback(benchmark::State& state) { |
| 99 | auto thread_func = [&](std::stop_token st, std::atomic<std::uint64_t>* reg_count) { |
| 100 | while (!st.stop_requested()) { |
| 101 | std::stop_callback cb{st, [&]() noexcept {}}; |
| 102 | benchmark::DoNotOptimize(cb); |
| 103 | reg_count->fetch_add(1, std::memory_order_relaxed); |
| 104 | } |
| 105 | }; |
| 106 | |
| 107 | std::atomic<std::uint64_t> reg_count(0); |
| 108 | std::uint64_t total_reg_test_param = state.range(0); |
| 109 | |
| 110 | auto thread = support::make_test_jthread(thread_func, ®_count); |
| 111 | |
| 112 | for (auto _ : state) { |
| 113 | auto start_total = reg_count.load(std::memory_order_relaxed); |
| 114 | |
| 115 | while (reg_count.load(std::memory_order_relaxed) - start_total < total_reg_test_param) { |
| 116 | std::this_thread::yield(); |
| 117 | } |
| 118 | } |
| 119 | } |
| 120 | BENCHMARK(BM_stop_token_single_thread_reg_unreg_callback)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); |
| 121 | |
| 122 | // At startup, it creates a single stop_source which it will then pass an associated stop_token to every |
| 123 | // request. |
| 124 | // |
| 125 | // Assume a thread-pool handles these requests and for each request it polls for stop_requested(), then attaches a |
| 126 | // stop-callback, does some work, then detaches the stop-callback some time later. The lifetime of requests/callbacks |
| 127 | // would overlap with other requests/callback from the same thread. |
| 128 | // |
| 129 | // Say something like each thread keeping a circular buffer of N stop-callbacks and destroying the stop-callbacks in |
| 130 | // FIFO order |
| 131 | void BM_stop_token_async_reg_unreg_callback(benchmark::State& state) { |
| 132 | struct dummy_stop_callback { |
| 133 | void operator()() const noexcept {} |
| 134 | }; |
| 135 | |
| 136 | constexpr size_t thread_count = 20; |
| 137 | constexpr size_t concurrent_request_count = 1000; |
| 138 | std::atomic<bool> start{false}; |
| 139 | |
| 140 | std::uint64_t total_reg_test_param = state.range(0); |
| 141 | std::vector<std::atomic<std::uint64_t>> reg_counts(thread_count); |
| 142 | |
| 143 | std::stop_source ss; |
| 144 | std::vector<std::jthread> threads; |
| 145 | threads.reserve(thread_count); |
| 146 | |
| 147 | auto thread_func = [&start](std::atomic<std::uint64_t>* count, std::stop_token st) { |
| 148 | std::vector<std::optional<std::stop_callback<dummy_stop_callback>>> cbs(concurrent_request_count); |
| 149 | |
| 150 | start.wait(false); |
| 151 | |
| 152 | std::uint32_t index = 0; |
| 153 | while (!st.stop_requested()) { |
| 154 | cbs[index].emplace(st, dummy_stop_callback{}); |
| 155 | index = (index + 1) % concurrent_request_count; |
| 156 | count->fetch_add(1, std::memory_order_relaxed); |
| 157 | } |
| 158 | }; |
| 159 | |
| 160 | for (size_t i = 0; i < thread_count; ++i) { |
| 161 | threads.emplace_back(support::make_test_jthread(thread_func, ®_counts[i], ss.get_token())); |
| 162 | } |
| 163 | |
| 164 | auto get_total_reg = [&] { |
| 165 | std::uint64_t total = 0; |
| 166 | for (const auto& reg_count : reg_counts) { |
| 167 | total += reg_count.load(std::memory_order_relaxed); |
| 168 | } |
| 169 | return total; |
| 170 | }; |
| 171 | |
| 172 | start = true; |
| 173 | start.notify_all(); |
| 174 | |
| 175 | for (auto _ : state) { |
| 176 | auto start_total = get_total_reg(); |
| 177 | |
| 178 | while (get_total_reg() - start_total < total_reg_test_param) { |
| 179 | std::this_thread::yield(); |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | ss.request_stop(); |
| 184 | } |
| 185 | BENCHMARK(BM_stop_token_async_reg_unreg_callback)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); |
| 186 | |
| 187 | BENCHMARK_MAIN(); |
| 188 | |