1 | //===----------------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // UNSUPPORTED: c++03, c++11, c++14, c++17 |
10 | |
11 | #include <cstdint> |
12 | #include <optional> |
13 | #include <stop_token> |
14 | #include <thread> |
15 | |
16 | #include "benchmark/benchmark.h" |
17 | #include "make_test_thread.h" |
18 | |
19 | using namespace std::chrono_literals; |
20 | |
21 | // We have a single thread created by std::jthread consuming the stop_token: |
22 | // polling for stop_requested. |
23 | void BM_stop_token_single_thread_polling_stop_requested(benchmark::State& state) { |
24 | auto thread_func = [&](std::stop_token st, std::atomic<std::uint64_t>* loop_count) { |
25 | while (!st.stop_requested()) { |
26 | // doing some work |
27 | loop_count->fetch_add(1, std::memory_order_relaxed); |
28 | } |
29 | }; |
30 | |
31 | std::atomic<std::uint64_t> loop_count(0); |
32 | std::uint64_t total_loop_test_param = state.range(0); |
33 | |
34 | auto thread = support::make_test_jthread(thread_func, &loop_count); |
35 | |
36 | for (auto _ : state) { |
37 | auto start_total = loop_count.load(std::memory_order_relaxed); |
38 | |
39 | while (loop_count.load(std::memory_order_relaxed) - start_total < total_loop_test_param) { |
40 | std::this_thread::yield(); |
41 | } |
42 | } |
43 | } |
44 | |
45 | BENCHMARK(BM_stop_token_single_thread_polling_stop_requested)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); |
46 | |
47 | // We have multiple threads polling for stop_requested of the same stop_token. |
48 | void BM_stop_token_multi_thread_polling_stop_requested(benchmark::State& state) { |
49 | std::atomic<bool> start{false}; |
50 | |
51 | auto thread_func = [&start](std::atomic<std::uint64_t>* loop_count, std::stop_token st) { |
52 | start.wait(false); |
53 | while (!st.stop_requested()) { |
54 | // doing some work |
55 | loop_count->fetch_add(1, std::memory_order_relaxed); |
56 | } |
57 | }; |
58 | |
59 | constexpr size_t thread_count = 20; |
60 | |
61 | std::uint64_t total_loop_test_param = state.range(0); |
62 | |
63 | std::vector<std::atomic<std::uint64_t>> loop_counts(thread_count); |
64 | std::stop_source ss; |
65 | std::vector<std::jthread> threads; |
66 | threads.reserve(thread_count); |
67 | |
68 | for (size_t i = 0; i < thread_count; ++i) { |
69 | threads.emplace_back(support::make_test_jthread(thread_func, &loop_counts[i], ss.get_token())); |
70 | } |
71 | |
72 | auto get_total_loop = [&loop_counts] { |
73 | std::uint64_t total = 0; |
74 | for (const auto& loop_count : loop_counts) { |
75 | total += loop_count.load(std::memory_order_relaxed); |
76 | } |
77 | return total; |
78 | }; |
79 | |
80 | start = true; |
81 | start.notify_all(); |
82 | |
83 | for (auto _ : state) { |
84 | auto start_total = get_total_loop(); |
85 | |
86 | while (get_total_loop() - start_total < total_loop_test_param) { |
87 | std::this_thread::yield(); |
88 | } |
89 | } |
90 | |
91 | ss.request_stop(); |
92 | } |
93 | |
94 | BENCHMARK(BM_stop_token_multi_thread_polling_stop_requested)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); |
95 | |
96 | // We have a single thread created by std::jthread consuming the stop_token: |
97 | // registering/deregistering callbacks, one at a time. |
98 | void BM_stop_token_single_thread_reg_unreg_callback(benchmark::State& state) { |
99 | auto thread_func = [&](std::stop_token st, std::atomic<std::uint64_t>* reg_count) { |
100 | while (!st.stop_requested()) { |
101 | std::stop_callback cb{st, [&]() noexcept {}}; |
102 | benchmark::DoNotOptimize(cb); |
103 | reg_count->fetch_add(1, std::memory_order_relaxed); |
104 | } |
105 | }; |
106 | |
107 | std::atomic<std::uint64_t> reg_count(0); |
108 | std::uint64_t total_reg_test_param = state.range(0); |
109 | |
110 | auto thread = support::make_test_jthread(thread_func, ®_count); |
111 | |
112 | for (auto _ : state) { |
113 | auto start_total = reg_count.load(std::memory_order_relaxed); |
114 | |
115 | while (reg_count.load(std::memory_order_relaxed) - start_total < total_reg_test_param) { |
116 | std::this_thread::yield(); |
117 | } |
118 | } |
119 | } |
120 | BENCHMARK(BM_stop_token_single_thread_reg_unreg_callback)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); |
121 | |
122 | // At startup, it creates a single stop_source which it will then pass an associated stop_token to every |
123 | // request. |
124 | // |
125 | // Assume a thread-pool handles these requests and for each request it polls for stop_requested(), then attaches a |
126 | // stop-callback, does some work, then detaches the stop-callback some time later. The lifetime of requests/callbacks |
127 | // would overlap with other requests/callback from the same thread. |
128 | // |
129 | // Say something like each thread keeping a circular buffer of N stop-callbacks and destroying the stop-callbacks in |
130 | // FIFO order |
131 | void BM_stop_token_async_reg_unreg_callback(benchmark::State& state) { |
132 | struct dummy_stop_callback { |
133 | void operator()() const noexcept {} |
134 | }; |
135 | |
136 | constexpr size_t thread_count = 20; |
137 | constexpr size_t concurrent_request_count = 1000; |
138 | std::atomic<bool> start{false}; |
139 | |
140 | std::uint64_t total_reg_test_param = state.range(0); |
141 | std::vector<std::atomic<std::uint64_t>> reg_counts(thread_count); |
142 | |
143 | std::stop_source ss; |
144 | std::vector<std::jthread> threads; |
145 | threads.reserve(thread_count); |
146 | |
147 | auto thread_func = [&start](std::atomic<std::uint64_t>* count, std::stop_token st) { |
148 | std::vector<std::optional<std::stop_callback<dummy_stop_callback>>> cbs(concurrent_request_count); |
149 | |
150 | start.wait(false); |
151 | |
152 | std::uint32_t index = 0; |
153 | while (!st.stop_requested()) { |
154 | cbs[index].emplace(st, dummy_stop_callback{}); |
155 | index = (index + 1) % concurrent_request_count; |
156 | count->fetch_add(1, std::memory_order_relaxed); |
157 | } |
158 | }; |
159 | |
160 | for (size_t i = 0; i < thread_count; ++i) { |
161 | threads.emplace_back(support::make_test_jthread(thread_func, ®_counts[i], ss.get_token())); |
162 | } |
163 | |
164 | auto get_total_reg = [&] { |
165 | std::uint64_t total = 0; |
166 | for (const auto& reg_count : reg_counts) { |
167 | total += reg_count.load(std::memory_order_relaxed); |
168 | } |
169 | return total; |
170 | }; |
171 | |
172 | start = true; |
173 | start.notify_all(); |
174 | |
175 | for (auto _ : state) { |
176 | auto start_total = get_total_reg(); |
177 | |
178 | while (get_total_reg() - start_total < total_reg_test_param) { |
179 | std::this_thread::yield(); |
180 | } |
181 | } |
182 | |
183 | ss.request_stop(); |
184 | } |
185 | BENCHMARK(BM_stop_token_async_reg_unreg_callback)->RangeMultiplier(2)->Range(1 << 10, 1 << 24); |
186 | |
187 | BENCHMARK_MAIN(); |
188 | |