1 | //===----------------------------------------------------------------------===// |
2 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
3 | // See https://llvm.org/LICENSE.txt for license information. |
4 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
5 | // |
6 | //===----------------------------------------------------------------------===// |
7 | |
8 | #include <numeric> |
9 | #include <stop_token> |
10 | #include <thread> |
11 | |
12 | #include "benchmark/benchmark.h" |
13 | #include "make_test_thread.h" |
14 | |
15 | using namespace std::chrono_literals; |
16 | |
17 | // We have a single thread created by std::jthread consuming the stop_token: |
18 | // polling for stop_requested. |
19 | void BM_stop_token_single_thread_polling_stop_requested(benchmark::State& state) { |
20 | auto thread_func = [&](std::stop_token st, std::atomic<std::uint64_t>* loop_count) { |
21 | while (!st.stop_requested()) { |
22 | // doing some work |
23 | loop_count->fetch_add(i: 1, m: std::memory_order_relaxed); |
24 | } |
25 | }; |
26 | |
27 | std::atomic<std::uint64_t> loop_count(0); |
28 | std::uint64_t total_loop_test_param = state.range(pos: 0); |
29 | |
30 | auto thread = support::make_test_jthread(thread_func, &loop_count); |
31 | |
32 | for (auto _ : state) { |
33 | auto start_total = loop_count.load(m: std::memory_order_relaxed); |
34 | |
35 | while (loop_count.load(m: std::memory_order_relaxed) - start_total < total_loop_test_param) { |
36 | std::this_thread::yield(); |
37 | } |
38 | } |
39 | } |
40 | |
41 | BENCHMARK(BM_stop_token_single_thread_polling_stop_requested)->RangeMultiplier(multiplier: 2)->Range(start: 1 << 10, limit: 1 << 24); |
42 | |
43 | // We have multiple threads polling for stop_requested of the same stop_token. |
44 | void BM_stop_token_multi_thread_polling_stop_requested(benchmark::State& state) { |
45 | std::atomic<bool> start{false}; |
46 | |
47 | auto thread_func = [&start](std::atomic<std::uint64_t>* loop_count, std::stop_token st) { |
48 | start.wait(false); |
49 | while (!st.stop_requested()) { |
50 | // doing some work |
51 | loop_count->fetch_add(i: 1, m: std::memory_order_relaxed); |
52 | } |
53 | }; |
54 | |
55 | constexpr size_t thread_count = 20; |
56 | |
57 | std::uint64_t total_loop_test_param = state.range(pos: 0); |
58 | |
59 | std::vector<std::atomic<std::uint64_t>> loop_counts(thread_count); |
60 | std::stop_source ss; |
61 | std::vector<std::jthread> threads; |
62 | threads.reserve(thread_count); |
63 | |
64 | for (size_t i = 0; i < thread_count; ++i) { |
65 | threads.emplace_back(support::make_test_jthread(thread_func, &loop_counts[i], ss.get_token())); |
66 | } |
67 | |
68 | auto get_total_loop = [&loop_counts] { |
69 | std::uint64_t total = 0; |
70 | for (const auto& loop_count : loop_counts) { |
71 | total += loop_count.load(m: std::memory_order_relaxed); |
72 | } |
73 | return total; |
74 | }; |
75 | |
76 | start = true; |
77 | start.notify_all(); |
78 | |
79 | for (auto _ : state) { |
80 | auto start_total = get_total_loop(); |
81 | |
82 | while (get_total_loop() - start_total < total_loop_test_param) { |
83 | std::this_thread::yield(); |
84 | } |
85 | } |
86 | |
87 | ss.request_stop(); |
88 | } |
89 | |
90 | BENCHMARK(BM_stop_token_multi_thread_polling_stop_requested)->RangeMultiplier(multiplier: 2)->Range(start: 1 << 10, limit: 1 << 24); |
91 | |
92 | // We have a single thread created by std::jthread consuming the stop_token: |
93 | // registering/deregistering callbacks, one at a time. |
94 | void BM_stop_token_single_thread_reg_unreg_callback(benchmark::State& state) { |
95 | auto thread_func = [&](std::stop_token st, std::atomic<std::uint64_t>* reg_count) { |
96 | while (!st.stop_requested()) { |
97 | std::stop_callback cb{st, [&]() noexcept {}}; |
98 | benchmark::DoNotOptimize(cb); |
99 | reg_count->fetch_add(i: 1, m: std::memory_order_relaxed); |
100 | } |
101 | }; |
102 | |
103 | std::atomic<std::uint64_t> reg_count(0); |
104 | std::uint64_t total_reg_test_param = state.range(pos: 0); |
105 | |
106 | auto thread = support::make_test_jthread(thread_func, ®_count); |
107 | |
108 | for (auto _ : state) { |
109 | auto start_total = reg_count.load(m: std::memory_order_relaxed); |
110 | |
111 | while (reg_count.load(m: std::memory_order_relaxed) - start_total < total_reg_test_param) { |
112 | std::this_thread::yield(); |
113 | } |
114 | } |
115 | } |
116 | BENCHMARK(BM_stop_token_single_thread_reg_unreg_callback)->RangeMultiplier(multiplier: 2)->Range(start: 1 << 10, limit: 1 << 24); |
117 | |
118 | // At startup, it creates a single stop_source which it will then pass an associated stop_token to every |
119 | // request. |
120 | // |
121 | // Assume a thread-pool handles these requests and for each request it polls for stop_requested(), then attaches a |
122 | // stop-callback, does some work, then detaches the stop-callback some time later. The lifetime of requests/callbacks |
123 | // would overlap with other requests/callback from the same thread. |
124 | // |
125 | // Say something like each thread keeping a circular buffer of N stop-callbacks and destroying the stop-callbacks in |
126 | // FIFO order |
127 | void BM_stop_token_async_reg_unreg_callback(benchmark::State& state) { |
128 | struct dummy_stop_callback { |
129 | void operator()() const noexcept {} |
130 | }; |
131 | |
132 | constexpr size_t thread_count = 20; |
133 | constexpr size_t concurrent_request_count = 1000; |
134 | std::atomic<bool> start{false}; |
135 | |
136 | std::uint64_t total_reg_test_param = state.range(pos: 0); |
137 | std::vector<std::atomic<std::uint64_t>> reg_counts(thread_count); |
138 | |
139 | std::stop_source ss; |
140 | std::vector<std::jthread> threads; |
141 | threads.reserve(thread_count); |
142 | |
143 | auto thread_func = [&start](std::atomic<std::uint64_t>* count, std::stop_token st) { |
144 | std::vector<std::optional<std::stop_callback<dummy_stop_callback>>> cbs(concurrent_request_count); |
145 | |
146 | start.wait(false); |
147 | |
148 | std::uint32_t index = 0; |
149 | while (!st.stop_requested()) { |
150 | cbs[index].emplace(st, dummy_stop_callback{}); |
151 | index = (index + 1) % concurrent_request_count; |
152 | count->fetch_add(i: 1, m: std::memory_order_relaxed); |
153 | } |
154 | }; |
155 | |
156 | for (size_t i = 0; i < thread_count; ++i) { |
157 | threads.emplace_back(support::make_test_jthread(thread_func, ®_counts[i], ss.get_token())); |
158 | } |
159 | |
160 | auto get_total_reg = [&] { |
161 | std::uint64_t total = 0; |
162 | for (const auto& reg_count : reg_counts) { |
163 | total += reg_count.load(m: std::memory_order_relaxed); |
164 | } |
165 | return total; |
166 | }; |
167 | |
168 | start = true; |
169 | start.notify_all(); |
170 | |
171 | for (auto _ : state) { |
172 | auto start_total = get_total_reg(); |
173 | |
174 | while (get_total_reg() - start_total < total_reg_test_param) { |
175 | std::this_thread::yield(); |
176 | } |
177 | } |
178 | |
179 | ss.request_stop(); |
180 | } |
181 | BENCHMARK(BM_stop_token_async_reg_unreg_callback)->RangeMultiplier(multiplier: 2)->Range(start: 1 << 10, limit: 1 << 24); |
182 | |
183 | BENCHMARK_MAIN(); |
184 | |