1 | // SPDX-License-Identifier: LGPL-2.1 |
2 | #define _GNU_SOURCE |
3 | #include <assert.h> |
4 | #include <pthread.h> |
5 | #include <sched.h> |
6 | #include <stdint.h> |
7 | #include <stdio.h> |
8 | #include <stdlib.h> |
9 | #include <string.h> |
10 | #include <stddef.h> |
11 | |
12 | #include "../kselftest.h" |
13 | #include "rseq.h" |
14 | |
15 | #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID |
16 | # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID |
17 | static |
18 | int get_current_cpu_id(void) |
19 | { |
20 | return rseq_current_mm_cid(); |
21 | } |
22 | static |
23 | bool rseq_validate_cpu_id(void) |
24 | { |
25 | return rseq_mm_cid_available(); |
26 | } |
27 | static |
28 | bool rseq_use_cpu_index(void) |
29 | { |
30 | return false; /* Use mm_cid */ |
31 | } |
32 | #else |
33 | # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID |
34 | static |
35 | int get_current_cpu_id(void) |
36 | { |
37 | return rseq_cpu_start(); |
38 | } |
39 | static |
40 | bool rseq_validate_cpu_id(void) |
41 | { |
42 | return rseq_current_cpu_raw() >= 0; |
43 | } |
44 | static |
45 | bool rseq_use_cpu_index(void) |
46 | { |
47 | return true; /* Use cpu_id as index. */ |
48 | } |
49 | #endif |
50 | |
51 | struct percpu_lock_entry { |
52 | intptr_t v; |
53 | } __attribute__((aligned(128))); |
54 | |
55 | struct percpu_lock { |
56 | struct percpu_lock_entry c[CPU_SETSIZE]; |
57 | }; |
58 | |
59 | struct test_data_entry { |
60 | intptr_t count; |
61 | } __attribute__((aligned(128))); |
62 | |
63 | struct spinlock_test_data { |
64 | struct percpu_lock lock; |
65 | struct test_data_entry c[CPU_SETSIZE]; |
66 | int reps; |
67 | }; |
68 | |
69 | struct percpu_list_node { |
70 | intptr_t data; |
71 | struct percpu_list_node *next; |
72 | }; |
73 | |
74 | struct percpu_list_entry { |
75 | struct percpu_list_node *head; |
76 | } __attribute__((aligned(128))); |
77 | |
78 | struct percpu_list { |
79 | struct percpu_list_entry c[CPU_SETSIZE]; |
80 | }; |
81 | |
82 | /* A simple percpu spinlock. Returns the cpu lock was acquired on. */ |
83 | int rseq_this_cpu_lock(struct percpu_lock *lock) |
84 | { |
85 | int cpu; |
86 | |
87 | for (;;) { |
88 | int ret; |
89 | |
90 | cpu = get_current_cpu_id(); |
91 | ret = rseq_cmpeqv_storev(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
92 | v: &lock->c[cpu].v, expect: 0, newv: 1, cpu); |
93 | if (rseq_likely(!ret)) |
94 | break; |
95 | /* Retry if comparison fails or rseq aborts. */ |
96 | } |
97 | /* |
98 | * Acquire semantic when taking lock after control dependency. |
99 | * Matches rseq_smp_store_release(). |
100 | */ |
101 | rseq_smp_acquire__after_ctrl_dep(); |
102 | return cpu; |
103 | } |
104 | |
105 | void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) |
106 | { |
107 | assert(lock->c[cpu].v == 1); |
108 | /* |
109 | * Release lock, with release semantic. Matches |
110 | * rseq_smp_acquire__after_ctrl_dep(). |
111 | */ |
112 | rseq_smp_store_release(&lock->c[cpu].v, 0); |
113 | } |
114 | |
115 | void *test_percpu_spinlock_thread(void *arg) |
116 | { |
117 | struct spinlock_test_data *data = arg; |
118 | int i, cpu; |
119 | |
120 | if (rseq_register_current_thread()) { |
121 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n" , |
122 | errno, strerror(errno)); |
123 | abort(); |
124 | } |
125 | for (i = 0; i < data->reps; i++) { |
126 | cpu = rseq_this_cpu_lock(lock: &data->lock); |
127 | data->c[cpu].count++; |
128 | rseq_percpu_unlock(lock: &data->lock, cpu); |
129 | } |
130 | if (rseq_unregister_current_thread()) { |
131 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n" , |
132 | errno, strerror(errno)); |
133 | abort(); |
134 | } |
135 | |
136 | return NULL; |
137 | } |
138 | |
139 | /* |
140 | * A simple test which implements a sharded counter using a per-cpu |
141 | * lock. Obviously real applications might prefer to simply use a |
142 | * per-cpu increment; however, this is reasonable for a test and the |
143 | * lock can be extended to synchronize more complicated operations. |
144 | */ |
145 | void test_percpu_spinlock(void) |
146 | { |
147 | const int num_threads = 200; |
148 | int i; |
149 | uint64_t sum; |
150 | pthread_t test_threads[num_threads]; |
151 | struct spinlock_test_data data; |
152 | |
153 | memset(&data, 0, sizeof(data)); |
154 | data.reps = 5000; |
155 | |
156 | for (i = 0; i < num_threads; i++) |
157 | pthread_create(&test_threads[i], NULL, |
158 | test_percpu_spinlock_thread, &data); |
159 | |
160 | for (i = 0; i < num_threads; i++) |
161 | pthread_join(test_threads[i], NULL); |
162 | |
163 | sum = 0; |
164 | for (i = 0; i < CPU_SETSIZE; i++) |
165 | sum += data.c[i].count; |
166 | |
167 | assert(sum == (uint64_t)data.reps * num_threads); |
168 | } |
169 | |
170 | void this_cpu_list_push(struct percpu_list *list, |
171 | struct percpu_list_node *node, |
172 | int *_cpu) |
173 | { |
174 | int cpu; |
175 | |
176 | for (;;) { |
177 | intptr_t *targetptr, newval, expect; |
178 | int ret; |
179 | |
180 | cpu = get_current_cpu_id(); |
181 | /* Load list->c[cpu].head with single-copy atomicity. */ |
182 | expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); |
183 | newval = (intptr_t)node; |
184 | targetptr = (intptr_t *)&list->c[cpu].head; |
185 | node->next = (struct percpu_list_node *)expect; |
186 | ret = rseq_cmpeqv_storev(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
187 | v: targetptr, expect, newv: newval, cpu); |
188 | if (rseq_likely(!ret)) |
189 | break; |
190 | /* Retry if comparison fails or rseq aborts. */ |
191 | } |
192 | if (_cpu) |
193 | *_cpu = cpu; |
194 | } |
195 | |
196 | /* |
197 | * Unlike a traditional lock-less linked list; the availability of a |
198 | * rseq primitive allows us to implement pop without concerns over |
199 | * ABA-type races. |
200 | */ |
201 | struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, |
202 | int *_cpu) |
203 | { |
204 | for (;;) { |
205 | struct percpu_list_node *head; |
206 | intptr_t *targetptr, expectnot, *load; |
207 | long offset; |
208 | int ret, cpu; |
209 | |
210 | cpu = get_current_cpu_id(); |
211 | targetptr = (intptr_t *)&list->c[cpu].head; |
212 | expectnot = (intptr_t)NULL; |
213 | offset = offsetof(struct percpu_list_node, next); |
214 | load = (intptr_t *)&head; |
215 | ret = rseq_cmpnev_storeoffp_load(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
216 | v: targetptr, expectnot, |
217 | voffp: offset, load, cpu); |
218 | if (rseq_likely(!ret)) { |
219 | if (_cpu) |
220 | *_cpu = cpu; |
221 | return head; |
222 | } |
223 | if (ret > 0) |
224 | return NULL; |
225 | /* Retry if rseq aborts. */ |
226 | } |
227 | } |
228 | |
229 | /* |
230 | * __percpu_list_pop is not safe against concurrent accesses. Should |
231 | * only be used on lists that are not concurrently modified. |
232 | */ |
233 | struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) |
234 | { |
235 | struct percpu_list_node *node; |
236 | |
237 | node = list->c[cpu].head; |
238 | if (!node) |
239 | return NULL; |
240 | list->c[cpu].head = node->next; |
241 | return node; |
242 | } |
243 | |
244 | void *test_percpu_list_thread(void *arg) |
245 | { |
246 | int i; |
247 | struct percpu_list *list = (struct percpu_list *)arg; |
248 | |
249 | if (rseq_register_current_thread()) { |
250 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n" , |
251 | errno, strerror(errno)); |
252 | abort(); |
253 | } |
254 | |
255 | for (i = 0; i < 100000; i++) { |
256 | struct percpu_list_node *node; |
257 | |
258 | node = this_cpu_list_pop(list, NULL); |
259 | sched_yield(); /* encourage shuffling */ |
260 | if (node) |
261 | this_cpu_list_push(list, node, NULL); |
262 | } |
263 | |
264 | if (rseq_unregister_current_thread()) { |
265 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n" , |
266 | errno, strerror(errno)); |
267 | abort(); |
268 | } |
269 | |
270 | return NULL; |
271 | } |
272 | |
273 | /* Simultaneous modification to a per-cpu linked list from many threads. */ |
274 | void test_percpu_list(void) |
275 | { |
276 | int i, j; |
277 | uint64_t sum = 0, expected_sum = 0; |
278 | struct percpu_list list; |
279 | pthread_t test_threads[200]; |
280 | cpu_set_t allowed_cpus; |
281 | |
282 | memset(&list, 0, sizeof(list)); |
283 | |
284 | /* Generate list entries for every usable cpu. */ |
285 | sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); |
286 | for (i = 0; i < CPU_SETSIZE; i++) { |
287 | if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) |
288 | continue; |
289 | for (j = 1; j <= 100; j++) { |
290 | struct percpu_list_node *node; |
291 | |
292 | expected_sum += j; |
293 | |
294 | node = malloc(sizeof(*node)); |
295 | assert(node); |
296 | node->data = j; |
297 | node->next = list.c[i].head; |
298 | list.c[i].head = node; |
299 | } |
300 | } |
301 | |
302 | for (i = 0; i < 200; i++) |
303 | pthread_create(&test_threads[i], NULL, |
304 | test_percpu_list_thread, &list); |
305 | |
306 | for (i = 0; i < 200; i++) |
307 | pthread_join(test_threads[i], NULL); |
308 | |
309 | for (i = 0; i < CPU_SETSIZE; i++) { |
310 | struct percpu_list_node *node; |
311 | |
312 | if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) |
313 | continue; |
314 | |
315 | while ((node = __percpu_list_pop(list: &list, cpu: i))) { |
316 | sum += node->data; |
317 | free(node); |
318 | } |
319 | } |
320 | |
321 | /* |
322 | * All entries should now be accounted for (unless some external |
323 | * actor is interfering with our allowed affinity while this |
324 | * test is running). |
325 | */ |
326 | assert(sum == expected_sum); |
327 | } |
328 | |
329 | int main(int argc, char **argv) |
330 | { |
331 | if (rseq_register_current_thread()) { |
332 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n" , |
333 | errno, strerror(errno)); |
334 | goto error; |
335 | } |
336 | if (!rseq_validate_cpu_id()) { |
337 | fprintf(stderr, "Error: cpu id getter unavailable\n" ); |
338 | goto error; |
339 | } |
340 | printf("spinlock\n" ); |
341 | test_percpu_spinlock(); |
342 | printf("percpu_list\n" ); |
343 | test_percpu_list(); |
344 | if (rseq_unregister_current_thread()) { |
345 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n" , |
346 | errno, strerror(errno)); |
347 | goto error; |
348 | } |
349 | return 0; |
350 | |
351 | error: |
352 | return -1; |
353 | } |
354 | |