1 | // SPDX-License-Identifier: LGPL-2.1 |
2 | #define _GNU_SOURCE |
3 | #include <assert.h> |
4 | #include <linux/membarrier.h> |
5 | #include <pthread.h> |
6 | #include <sched.h> |
7 | #include <stdatomic.h> |
8 | #include <stdint.h> |
9 | #include <stdio.h> |
10 | #include <stdlib.h> |
11 | #include <string.h> |
12 | #include <syscall.h> |
13 | #include <unistd.h> |
14 | #include <poll.h> |
15 | #include <sys/types.h> |
16 | #include <signal.h> |
17 | #include <errno.h> |
18 | #include <stddef.h> |
19 | #include <stdbool.h> |
20 | |
21 | static inline pid_t rseq_gettid(void) |
22 | { |
23 | return syscall(__NR_gettid); |
24 | } |
25 | |
26 | #define NR_INJECT 9 |
27 | static int loop_cnt[NR_INJECT + 1]; |
28 | |
29 | static int loop_cnt_1 asm("asm_loop_cnt_1" ) __attribute__((used)); |
30 | static int loop_cnt_2 asm("asm_loop_cnt_2" ) __attribute__((used)); |
31 | static int loop_cnt_3 asm("asm_loop_cnt_3" ) __attribute__((used)); |
32 | static int loop_cnt_4 asm("asm_loop_cnt_4" ) __attribute__((used)); |
33 | static int loop_cnt_5 asm("asm_loop_cnt_5" ) __attribute__((used)); |
34 | static int loop_cnt_6 asm("asm_loop_cnt_6" ) __attribute__((used)); |
35 | |
36 | static int opt_modulo, verbose; |
37 | |
38 | static int opt_yield, opt_signal, opt_sleep, |
39 | opt_disable_rseq, opt_threads = 200, |
40 | opt_disable_mod = 0, opt_test = 's'; |
41 | |
42 | static long long opt_reps = 5000; |
43 | |
44 | static __thread __attribute__((tls_model("initial-exec" ))) |
45 | unsigned int signals_delivered; |
46 | |
47 | #ifndef BENCHMARK |
48 | |
49 | static __thread __attribute__((tls_model("initial-exec" ), unused)) |
50 | unsigned int yield_mod_cnt, nr_abort; |
51 | |
52 | #define printf_verbose(fmt, ...) \ |
53 | do { \ |
54 | if (verbose) \ |
55 | printf(fmt, ## __VA_ARGS__); \ |
56 | } while (0) |
57 | |
58 | #ifdef __i386__ |
59 | |
60 | #define INJECT_ASM_REG "eax" |
61 | |
62 | #define RSEQ_INJECT_CLOBBER \ |
63 | , INJECT_ASM_REG |
64 | |
65 | #define RSEQ_INJECT_ASM(n) \ |
66 | "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ |
67 | "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ |
68 | "jz 333f\n\t" \ |
69 | "222:\n\t" \ |
70 | "dec %%" INJECT_ASM_REG "\n\t" \ |
71 | "jnz 222b\n\t" \ |
72 | "333:\n\t" |
73 | |
74 | #elif defined(__x86_64__) |
75 | |
76 | #define INJECT_ASM_REG_P "rax" |
77 | #define INJECT_ASM_REG "eax" |
78 | |
79 | #define RSEQ_INJECT_CLOBBER \ |
80 | , INJECT_ASM_REG_P \ |
81 | , INJECT_ASM_REG |
82 | |
83 | #define RSEQ_INJECT_ASM(n) \ |
84 | "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ |
85 | "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ |
86 | "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ |
87 | "jz 333f\n\t" \ |
88 | "222:\n\t" \ |
89 | "dec %%" INJECT_ASM_REG "\n\t" \ |
90 | "jnz 222b\n\t" \ |
91 | "333:\n\t" |
92 | |
93 | #elif defined(__s390__) |
94 | |
95 | #define RSEQ_INJECT_INPUT \ |
96 | , [loop_cnt_1]"m"(loop_cnt[1]) \ |
97 | , [loop_cnt_2]"m"(loop_cnt[2]) \ |
98 | , [loop_cnt_3]"m"(loop_cnt[3]) \ |
99 | , [loop_cnt_4]"m"(loop_cnt[4]) \ |
100 | , [loop_cnt_5]"m"(loop_cnt[5]) \ |
101 | , [loop_cnt_6]"m"(loop_cnt[6]) |
102 | |
103 | #define INJECT_ASM_REG "r12" |
104 | |
105 | #define RSEQ_INJECT_CLOBBER \ |
106 | , INJECT_ASM_REG |
107 | |
108 | #define RSEQ_INJECT_ASM(n) \ |
109 | "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ |
110 | "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ |
111 | "je 333f\n\t" \ |
112 | "222:\n\t" \ |
113 | "ahi %%" INJECT_ASM_REG ", -1\n\t" \ |
114 | "jnz 222b\n\t" \ |
115 | "333:\n\t" |
116 | |
117 | #elif defined(__ARMEL__) |
118 | |
119 | #define RSEQ_INJECT_INPUT \ |
120 | , [loop_cnt_1]"m"(loop_cnt[1]) \ |
121 | , [loop_cnt_2]"m"(loop_cnt[2]) \ |
122 | , [loop_cnt_3]"m"(loop_cnt[3]) \ |
123 | , [loop_cnt_4]"m"(loop_cnt[4]) \ |
124 | , [loop_cnt_5]"m"(loop_cnt[5]) \ |
125 | , [loop_cnt_6]"m"(loop_cnt[6]) |
126 | |
127 | #define INJECT_ASM_REG "r4" |
128 | |
129 | #define RSEQ_INJECT_CLOBBER \ |
130 | , INJECT_ASM_REG |
131 | |
132 | #define RSEQ_INJECT_ASM(n) \ |
133 | "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ |
134 | "cmp " INJECT_ASM_REG ", #0\n\t" \ |
135 | "beq 333f\n\t" \ |
136 | "222:\n\t" \ |
137 | "subs " INJECT_ASM_REG ", #1\n\t" \ |
138 | "bne 222b\n\t" \ |
139 | "333:\n\t" |
140 | |
141 | #elif defined(__AARCH64EL__) |
142 | |
143 | #define RSEQ_INJECT_INPUT \ |
144 | , [loop_cnt_1] "Qo" (loop_cnt[1]) \ |
145 | , [loop_cnt_2] "Qo" (loop_cnt[2]) \ |
146 | , [loop_cnt_3] "Qo" (loop_cnt[3]) \ |
147 | , [loop_cnt_4] "Qo" (loop_cnt[4]) \ |
148 | , [loop_cnt_5] "Qo" (loop_cnt[5]) \ |
149 | , [loop_cnt_6] "Qo" (loop_cnt[6]) |
150 | |
151 | #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 |
152 | |
153 | #define RSEQ_INJECT_ASM(n) \ |
154 | " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ |
155 | " cbz " INJECT_ASM_REG ", 333f\n" \ |
156 | "222:\n" \ |
157 | " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ |
158 | " cbnz " INJECT_ASM_REG ", 222b\n" \ |
159 | "333:\n" |
160 | |
161 | #elif defined(__PPC__) |
162 | |
163 | #define RSEQ_INJECT_INPUT \ |
164 | , [loop_cnt_1]"m"(loop_cnt[1]) \ |
165 | , [loop_cnt_2]"m"(loop_cnt[2]) \ |
166 | , [loop_cnt_3]"m"(loop_cnt[3]) \ |
167 | , [loop_cnt_4]"m"(loop_cnt[4]) \ |
168 | , [loop_cnt_5]"m"(loop_cnt[5]) \ |
169 | , [loop_cnt_6]"m"(loop_cnt[6]) |
170 | |
171 | #define INJECT_ASM_REG "r18" |
172 | |
173 | #define RSEQ_INJECT_CLOBBER \ |
174 | , INJECT_ASM_REG |
175 | |
176 | #define RSEQ_INJECT_ASM(n) \ |
177 | "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ |
178 | "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ |
179 | "beq 333f\n\t" \ |
180 | "222:\n\t" \ |
181 | "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ |
182 | "bne 222b\n\t" \ |
183 | "333:\n\t" |
184 | |
185 | #elif defined(__mips__) |
186 | |
187 | #define RSEQ_INJECT_INPUT \ |
188 | , [loop_cnt_1]"m"(loop_cnt[1]) \ |
189 | , [loop_cnt_2]"m"(loop_cnt[2]) \ |
190 | , [loop_cnt_3]"m"(loop_cnt[3]) \ |
191 | , [loop_cnt_4]"m"(loop_cnt[4]) \ |
192 | , [loop_cnt_5]"m"(loop_cnt[5]) \ |
193 | , [loop_cnt_6]"m"(loop_cnt[6]) |
194 | |
195 | #define INJECT_ASM_REG "$5" |
196 | |
197 | #define RSEQ_INJECT_CLOBBER \ |
198 | , INJECT_ASM_REG |
199 | |
200 | #define RSEQ_INJECT_ASM(n) \ |
201 | "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ |
202 | "beqz " INJECT_ASM_REG ", 333f\n\t" \ |
203 | "222:\n\t" \ |
204 | "addiu " INJECT_ASM_REG ", -1\n\t" \ |
205 | "bnez " INJECT_ASM_REG ", 222b\n\t" \ |
206 | "333:\n\t" |
207 | #elif defined(__riscv) |
208 | |
209 | #define RSEQ_INJECT_INPUT \ |
210 | , [loop_cnt_1]"m"(loop_cnt[1]) \ |
211 | , [loop_cnt_2]"m"(loop_cnt[2]) \ |
212 | , [loop_cnt_3]"m"(loop_cnt[3]) \ |
213 | , [loop_cnt_4]"m"(loop_cnt[4]) \ |
214 | , [loop_cnt_5]"m"(loop_cnt[5]) \ |
215 | , [loop_cnt_6]"m"(loop_cnt[6]) |
216 | |
217 | #define INJECT_ASM_REG "t1" |
218 | |
219 | #define RSEQ_INJECT_CLOBBER \ |
220 | , INJECT_ASM_REG |
221 | |
222 | #define RSEQ_INJECT_ASM(n) \ |
223 | "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ |
224 | "beqz " INJECT_ASM_REG ", 333f\n\t" \ |
225 | "222:\n\t" \ |
226 | "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ |
227 | "bnez " INJECT_ASM_REG ", 222b\n\t" \ |
228 | "333:\n\t" |
229 | |
230 | |
231 | #else |
232 | #error unsupported target |
233 | #endif |
234 | |
235 | #define RSEQ_INJECT_FAILED \ |
236 | nr_abort++; |
237 | |
238 | #define RSEQ_INJECT_C(n) \ |
239 | { \ |
240 | int loc_i, loc_nr_loops = loop_cnt[n]; \ |
241 | \ |
242 | for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ |
243 | rseq_barrier(); \ |
244 | } \ |
245 | if (loc_nr_loops == -1 && opt_modulo) { \ |
246 | if (yield_mod_cnt == opt_modulo - 1) { \ |
247 | if (opt_sleep > 0) \ |
248 | poll(NULL, 0, opt_sleep); \ |
249 | if (opt_yield) \ |
250 | sched_yield(); \ |
251 | if (opt_signal) \ |
252 | raise(SIGUSR1); \ |
253 | yield_mod_cnt = 0; \ |
254 | } else { \ |
255 | yield_mod_cnt++; \ |
256 | } \ |
257 | } \ |
258 | } |
259 | |
260 | #else |
261 | |
262 | #define printf_verbose(fmt, ...) |
263 | |
264 | #endif /* BENCHMARK */ |
265 | |
266 | #include "rseq.h" |
267 | |
268 | static enum rseq_mo opt_mo = RSEQ_MO_RELAXED; |
269 | |
270 | #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV |
271 | #define TEST_MEMBARRIER |
272 | |
273 | static int sys_membarrier(int cmd, int flags, int cpu_id) |
274 | { |
275 | return syscall(__NR_membarrier, cmd, flags, cpu_id); |
276 | } |
277 | #endif |
278 | |
279 | #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID |
280 | # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID |
281 | static |
282 | int get_current_cpu_id(void) |
283 | { |
284 | return rseq_current_mm_cid(); |
285 | } |
286 | static |
287 | bool rseq_validate_cpu_id(void) |
288 | { |
289 | return rseq_mm_cid_available(); |
290 | } |
291 | static |
292 | bool rseq_use_cpu_index(void) |
293 | { |
294 | return false; /* Use mm_cid */ |
295 | } |
296 | # ifdef TEST_MEMBARRIER |
297 | /* |
298 | * Membarrier does not currently support targeting a mm_cid, so |
299 | * issue the barrier on all cpus. |
300 | */ |
301 | static |
302 | int rseq_membarrier_expedited(int cpu) |
303 | { |
304 | return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, |
305 | 0, 0); |
306 | } |
307 | # endif /* TEST_MEMBARRIER */ |
308 | #else |
309 | # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID |
310 | static |
311 | int get_current_cpu_id(void) |
312 | { |
313 | return rseq_cpu_start(); |
314 | } |
315 | static |
316 | bool rseq_validate_cpu_id(void) |
317 | { |
318 | return rseq_current_cpu_raw() >= 0; |
319 | } |
320 | static |
321 | bool rseq_use_cpu_index(void) |
322 | { |
323 | return true; /* Use cpu_id as index. */ |
324 | } |
325 | # ifdef TEST_MEMBARRIER |
326 | static |
327 | int rseq_membarrier_expedited(int cpu) |
328 | { |
329 | return sys_membarrier(cmd: MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, |
330 | flags: MEMBARRIER_CMD_FLAG_CPU, cpu_id: cpu); |
331 | } |
332 | # endif /* TEST_MEMBARRIER */ |
333 | #endif |
334 | |
335 | struct percpu_lock_entry { |
336 | intptr_t v; |
337 | } __attribute__((aligned(128))); |
338 | |
339 | struct percpu_lock { |
340 | struct percpu_lock_entry c[CPU_SETSIZE]; |
341 | }; |
342 | |
343 | struct test_data_entry { |
344 | intptr_t count; |
345 | } __attribute__((aligned(128))); |
346 | |
347 | struct spinlock_test_data { |
348 | struct percpu_lock lock; |
349 | struct test_data_entry c[CPU_SETSIZE]; |
350 | }; |
351 | |
352 | struct spinlock_thread_test_data { |
353 | struct spinlock_test_data *data; |
354 | long long reps; |
355 | int reg; |
356 | }; |
357 | |
358 | struct inc_test_data { |
359 | struct test_data_entry c[CPU_SETSIZE]; |
360 | }; |
361 | |
362 | struct inc_thread_test_data { |
363 | struct inc_test_data *data; |
364 | long long reps; |
365 | int reg; |
366 | }; |
367 | |
368 | struct percpu_list_node { |
369 | intptr_t data; |
370 | struct percpu_list_node *next; |
371 | }; |
372 | |
373 | struct percpu_list_entry { |
374 | struct percpu_list_node *head; |
375 | } __attribute__((aligned(128))); |
376 | |
377 | struct percpu_list { |
378 | struct percpu_list_entry c[CPU_SETSIZE]; |
379 | }; |
380 | |
381 | #define BUFFER_ITEM_PER_CPU 100 |
382 | |
383 | struct percpu_buffer_node { |
384 | intptr_t data; |
385 | }; |
386 | |
387 | struct percpu_buffer_entry { |
388 | intptr_t offset; |
389 | intptr_t buflen; |
390 | struct percpu_buffer_node **array; |
391 | } __attribute__((aligned(128))); |
392 | |
393 | struct percpu_buffer { |
394 | struct percpu_buffer_entry c[CPU_SETSIZE]; |
395 | }; |
396 | |
397 | #define MEMCPY_BUFFER_ITEM_PER_CPU 100 |
398 | |
399 | struct percpu_memcpy_buffer_node { |
400 | intptr_t data1; |
401 | uint64_t data2; |
402 | }; |
403 | |
404 | struct percpu_memcpy_buffer_entry { |
405 | intptr_t offset; |
406 | intptr_t buflen; |
407 | struct percpu_memcpy_buffer_node *array; |
408 | } __attribute__((aligned(128))); |
409 | |
410 | struct percpu_memcpy_buffer { |
411 | struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; |
412 | }; |
413 | |
414 | /* A simple percpu spinlock. Grabs lock on current cpu. */ |
415 | static int rseq_this_cpu_lock(struct percpu_lock *lock) |
416 | { |
417 | int cpu; |
418 | |
419 | for (;;) { |
420 | int ret; |
421 | |
422 | cpu = get_current_cpu_id(); |
423 | if (cpu < 0) { |
424 | fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n" , |
425 | getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu); |
426 | abort(); |
427 | } |
428 | ret = rseq_cmpeqv_storev(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
429 | v: &lock->c[cpu].v, |
430 | expect: 0, newv: 1, cpu); |
431 | if (rseq_likely(!ret)) |
432 | break; |
433 | /* Retry if comparison fails or rseq aborts. */ |
434 | } |
435 | /* |
436 | * Acquire semantic when taking lock after control dependency. |
437 | * Matches rseq_smp_store_release(). |
438 | */ |
439 | rseq_smp_acquire__after_ctrl_dep(); |
440 | return cpu; |
441 | } |
442 | |
443 | static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) |
444 | { |
445 | assert(lock->c[cpu].v == 1); |
446 | /* |
447 | * Release lock, with release semantic. Matches |
448 | * rseq_smp_acquire__after_ctrl_dep(). |
449 | */ |
450 | rseq_smp_store_release(&lock->c[cpu].v, 0); |
451 | } |
452 | |
453 | void *test_percpu_spinlock_thread(void *arg) |
454 | { |
455 | struct spinlock_thread_test_data *thread_data = arg; |
456 | struct spinlock_test_data *data = thread_data->data; |
457 | long long i, reps; |
458 | |
459 | if (!opt_disable_rseq && thread_data->reg && |
460 | rseq_register_current_thread()) |
461 | abort(); |
462 | reps = thread_data->reps; |
463 | for (i = 0; i < reps; i++) { |
464 | int cpu = rseq_this_cpu_lock(lock: &data->lock); |
465 | data->c[cpu].count++; |
466 | rseq_percpu_unlock(lock: &data->lock, cpu); |
467 | #ifndef BENCHMARK |
468 | if (i != 0 && !(i % (reps / 10))) |
469 | printf_verbose("tid %d: count %lld\n" , |
470 | (int) rseq_gettid(), i); |
471 | #endif |
472 | } |
473 | printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n" , |
474 | (int) rseq_gettid(), nr_abort, signals_delivered); |
475 | if (!opt_disable_rseq && thread_data->reg && |
476 | rseq_unregister_current_thread()) |
477 | abort(); |
478 | return NULL; |
479 | } |
480 | |
481 | /* |
482 | * A simple test which implements a sharded counter using a per-cpu |
483 | * lock. Obviously real applications might prefer to simply use a |
484 | * per-cpu increment; however, this is reasonable for a test and the |
485 | * lock can be extended to synchronize more complicated operations. |
486 | */ |
487 | void test_percpu_spinlock(void) |
488 | { |
489 | const int num_threads = opt_threads; |
490 | int i, ret; |
491 | uint64_t sum; |
492 | pthread_t test_threads[num_threads]; |
493 | struct spinlock_test_data data; |
494 | struct spinlock_thread_test_data thread_data[num_threads]; |
495 | |
496 | memset(&data, 0, sizeof(data)); |
497 | for (i = 0; i < num_threads; i++) { |
498 | thread_data[i].reps = opt_reps; |
499 | if (opt_disable_mod <= 0 || (i % opt_disable_mod)) |
500 | thread_data[i].reg = 1; |
501 | else |
502 | thread_data[i].reg = 0; |
503 | thread_data[i].data = &data; |
504 | ret = pthread_create(&test_threads[i], NULL, |
505 | test_percpu_spinlock_thread, |
506 | &thread_data[i]); |
507 | if (ret) { |
508 | errno = ret; |
509 | perror("pthread_create" ); |
510 | abort(); |
511 | } |
512 | } |
513 | |
514 | for (i = 0; i < num_threads; i++) { |
515 | ret = pthread_join(test_threads[i], NULL); |
516 | if (ret) { |
517 | errno = ret; |
518 | perror("pthread_join" ); |
519 | abort(); |
520 | } |
521 | } |
522 | |
523 | sum = 0; |
524 | for (i = 0; i < CPU_SETSIZE; i++) |
525 | sum += data.c[i].count; |
526 | |
527 | assert(sum == (uint64_t)opt_reps * num_threads); |
528 | } |
529 | |
530 | void *test_percpu_inc_thread(void *arg) |
531 | { |
532 | struct inc_thread_test_data *thread_data = arg; |
533 | struct inc_test_data *data = thread_data->data; |
534 | long long i, reps; |
535 | |
536 | if (!opt_disable_rseq && thread_data->reg && |
537 | rseq_register_current_thread()) |
538 | abort(); |
539 | reps = thread_data->reps; |
540 | for (i = 0; i < reps; i++) { |
541 | int ret; |
542 | |
543 | do { |
544 | int cpu; |
545 | |
546 | cpu = get_current_cpu_id(); |
547 | ret = rseq_addv(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
548 | v: &data->c[cpu].count, count: 1, cpu); |
549 | } while (rseq_unlikely(ret)); |
550 | #ifndef BENCHMARK |
551 | if (i != 0 && !(i % (reps / 10))) |
552 | printf_verbose("tid %d: count %lld\n" , |
553 | (int) rseq_gettid(), i); |
554 | #endif |
555 | } |
556 | printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n" , |
557 | (int) rseq_gettid(), nr_abort, signals_delivered); |
558 | if (!opt_disable_rseq && thread_data->reg && |
559 | rseq_unregister_current_thread()) |
560 | abort(); |
561 | return NULL; |
562 | } |
563 | |
564 | void test_percpu_inc(void) |
565 | { |
566 | const int num_threads = opt_threads; |
567 | int i, ret; |
568 | uint64_t sum; |
569 | pthread_t test_threads[num_threads]; |
570 | struct inc_test_data data; |
571 | struct inc_thread_test_data thread_data[num_threads]; |
572 | |
573 | memset(&data, 0, sizeof(data)); |
574 | for (i = 0; i < num_threads; i++) { |
575 | thread_data[i].reps = opt_reps; |
576 | if (opt_disable_mod <= 0 || (i % opt_disable_mod)) |
577 | thread_data[i].reg = 1; |
578 | else |
579 | thread_data[i].reg = 0; |
580 | thread_data[i].data = &data; |
581 | ret = pthread_create(&test_threads[i], NULL, |
582 | test_percpu_inc_thread, |
583 | &thread_data[i]); |
584 | if (ret) { |
585 | errno = ret; |
586 | perror("pthread_create" ); |
587 | abort(); |
588 | } |
589 | } |
590 | |
591 | for (i = 0; i < num_threads; i++) { |
592 | ret = pthread_join(test_threads[i], NULL); |
593 | if (ret) { |
594 | errno = ret; |
595 | perror("pthread_join" ); |
596 | abort(); |
597 | } |
598 | } |
599 | |
600 | sum = 0; |
601 | for (i = 0; i < CPU_SETSIZE; i++) |
602 | sum += data.c[i].count; |
603 | |
604 | assert(sum == (uint64_t)opt_reps * num_threads); |
605 | } |
606 | |
607 | void this_cpu_list_push(struct percpu_list *list, |
608 | struct percpu_list_node *node, |
609 | int *_cpu) |
610 | { |
611 | int cpu; |
612 | |
613 | for (;;) { |
614 | intptr_t *targetptr, newval, expect; |
615 | int ret; |
616 | |
617 | cpu = get_current_cpu_id(); |
618 | /* Load list->c[cpu].head with single-copy atomicity. */ |
619 | expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); |
620 | newval = (intptr_t)node; |
621 | targetptr = (intptr_t *)&list->c[cpu].head; |
622 | node->next = (struct percpu_list_node *)expect; |
623 | ret = rseq_cmpeqv_storev(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
624 | v: targetptr, expect, newv: newval, cpu); |
625 | if (rseq_likely(!ret)) |
626 | break; |
627 | /* Retry if comparison fails or rseq aborts. */ |
628 | } |
629 | if (_cpu) |
630 | *_cpu = cpu; |
631 | } |
632 | |
633 | /* |
634 | * Unlike a traditional lock-less linked list; the availability of a |
635 | * rseq primitive allows us to implement pop without concerns over |
636 | * ABA-type races. |
637 | */ |
638 | struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, |
639 | int *_cpu) |
640 | { |
641 | struct percpu_list_node *node = NULL; |
642 | int cpu; |
643 | |
644 | for (;;) { |
645 | struct percpu_list_node *head; |
646 | intptr_t *targetptr, expectnot, *load; |
647 | long offset; |
648 | int ret; |
649 | |
650 | cpu = get_current_cpu_id(); |
651 | targetptr = (intptr_t *)&list->c[cpu].head; |
652 | expectnot = (intptr_t)NULL; |
653 | offset = offsetof(struct percpu_list_node, next); |
654 | load = (intptr_t *)&head; |
655 | ret = rseq_cmpnev_storeoffp_load(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
656 | v: targetptr, expectnot, |
657 | voffp: offset, load, cpu); |
658 | if (rseq_likely(!ret)) { |
659 | node = head; |
660 | break; |
661 | } |
662 | if (ret > 0) |
663 | break; |
664 | /* Retry if rseq aborts. */ |
665 | } |
666 | if (_cpu) |
667 | *_cpu = cpu; |
668 | return node; |
669 | } |
670 | |
671 | /* |
672 | * __percpu_list_pop is not safe against concurrent accesses. Should |
673 | * only be used on lists that are not concurrently modified. |
674 | */ |
675 | struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) |
676 | { |
677 | struct percpu_list_node *node; |
678 | |
679 | node = list->c[cpu].head; |
680 | if (!node) |
681 | return NULL; |
682 | list->c[cpu].head = node->next; |
683 | return node; |
684 | } |
685 | |
686 | void *test_percpu_list_thread(void *arg) |
687 | { |
688 | long long i, reps; |
689 | struct percpu_list *list = (struct percpu_list *)arg; |
690 | |
691 | if (!opt_disable_rseq && rseq_register_current_thread()) |
692 | abort(); |
693 | |
694 | reps = opt_reps; |
695 | for (i = 0; i < reps; i++) { |
696 | struct percpu_list_node *node; |
697 | |
698 | node = this_cpu_list_pop(list, NULL); |
699 | if (opt_yield) |
700 | sched_yield(); /* encourage shuffling */ |
701 | if (node) |
702 | this_cpu_list_push(list, node, NULL); |
703 | } |
704 | |
705 | printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n" , |
706 | (int) rseq_gettid(), nr_abort, signals_delivered); |
707 | if (!opt_disable_rseq && rseq_unregister_current_thread()) |
708 | abort(); |
709 | |
710 | return NULL; |
711 | } |
712 | |
713 | /* Simultaneous modification to a per-cpu linked list from many threads. */ |
714 | void test_percpu_list(void) |
715 | { |
716 | const int num_threads = opt_threads; |
717 | int i, j, ret; |
718 | uint64_t sum = 0, expected_sum = 0; |
719 | struct percpu_list list; |
720 | pthread_t test_threads[num_threads]; |
721 | cpu_set_t allowed_cpus; |
722 | |
723 | memset(&list, 0, sizeof(list)); |
724 | |
725 | /* Generate list entries for every usable cpu. */ |
726 | sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); |
727 | for (i = 0; i < CPU_SETSIZE; i++) { |
728 | if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) |
729 | continue; |
730 | for (j = 1; j <= 100; j++) { |
731 | struct percpu_list_node *node; |
732 | |
733 | expected_sum += j; |
734 | |
735 | node = malloc(sizeof(*node)); |
736 | assert(node); |
737 | node->data = j; |
738 | node->next = list.c[i].head; |
739 | list.c[i].head = node; |
740 | } |
741 | } |
742 | |
743 | for (i = 0; i < num_threads; i++) { |
744 | ret = pthread_create(&test_threads[i], NULL, |
745 | test_percpu_list_thread, &list); |
746 | if (ret) { |
747 | errno = ret; |
748 | perror("pthread_create" ); |
749 | abort(); |
750 | } |
751 | } |
752 | |
753 | for (i = 0; i < num_threads; i++) { |
754 | ret = pthread_join(test_threads[i], NULL); |
755 | if (ret) { |
756 | errno = ret; |
757 | perror("pthread_join" ); |
758 | abort(); |
759 | } |
760 | } |
761 | |
762 | for (i = 0; i < CPU_SETSIZE; i++) { |
763 | struct percpu_list_node *node; |
764 | |
765 | if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) |
766 | continue; |
767 | |
768 | while ((node = __percpu_list_pop(list: &list, cpu: i))) { |
769 | sum += node->data; |
770 | free(node); |
771 | } |
772 | } |
773 | |
774 | /* |
775 | * All entries should now be accounted for (unless some external |
776 | * actor is interfering with our allowed affinity while this |
777 | * test is running). |
778 | */ |
779 | assert(sum == expected_sum); |
780 | } |
781 | |
782 | bool this_cpu_buffer_push(struct percpu_buffer *buffer, |
783 | struct percpu_buffer_node *node, |
784 | int *_cpu) |
785 | { |
786 | bool result = false; |
787 | int cpu; |
788 | |
789 | for (;;) { |
790 | intptr_t *targetptr_spec, newval_spec; |
791 | intptr_t *targetptr_final, newval_final; |
792 | intptr_t offset; |
793 | int ret; |
794 | |
795 | cpu = get_current_cpu_id(); |
796 | offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); |
797 | if (offset == buffer->c[cpu].buflen) |
798 | break; |
799 | newval_spec = (intptr_t)node; |
800 | targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; |
801 | newval_final = offset + 1; |
802 | targetptr_final = &buffer->c[cpu].offset; |
803 | ret = rseq_cmpeqv_trystorev_storev(rseq_mo: opt_mo, RSEQ_PERCPU, |
804 | v: targetptr_final, expect: offset, v2: targetptr_spec, |
805 | newv2: newval_spec, newv: newval_final, cpu); |
806 | if (rseq_likely(!ret)) { |
807 | result = true; |
808 | break; |
809 | } |
810 | /* Retry if comparison fails or rseq aborts. */ |
811 | } |
812 | if (_cpu) |
813 | *_cpu = cpu; |
814 | return result; |
815 | } |
816 | |
817 | struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, |
818 | int *_cpu) |
819 | { |
820 | struct percpu_buffer_node *head; |
821 | int cpu; |
822 | |
823 | for (;;) { |
824 | intptr_t *targetptr, newval; |
825 | intptr_t offset; |
826 | int ret; |
827 | |
828 | cpu = get_current_cpu_id(); |
829 | /* Load offset with single-copy atomicity. */ |
830 | offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); |
831 | if (offset == 0) { |
832 | head = NULL; |
833 | break; |
834 | } |
835 | head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); |
836 | newval = offset - 1; |
837 | targetptr = (intptr_t *)&buffer->c[cpu].offset; |
838 | ret = rseq_cmpeqv_cmpeqv_storev(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
839 | v: targetptr, expect: offset, |
840 | v2: (intptr_t *)&buffer->c[cpu].array[offset - 1], |
841 | expect2: (intptr_t)head, newv: newval, cpu); |
842 | if (rseq_likely(!ret)) |
843 | break; |
844 | /* Retry if comparison fails or rseq aborts. */ |
845 | } |
846 | if (_cpu) |
847 | *_cpu = cpu; |
848 | return head; |
849 | } |
850 | |
851 | /* |
852 | * __percpu_buffer_pop is not safe against concurrent accesses. Should |
853 | * only be used on buffers that are not concurrently modified. |
854 | */ |
855 | struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, |
856 | int cpu) |
857 | { |
858 | struct percpu_buffer_node *head; |
859 | intptr_t offset; |
860 | |
861 | offset = buffer->c[cpu].offset; |
862 | if (offset == 0) |
863 | return NULL; |
864 | head = buffer->c[cpu].array[offset - 1]; |
865 | buffer->c[cpu].offset = offset - 1; |
866 | return head; |
867 | } |
868 | |
869 | void *test_percpu_buffer_thread(void *arg) |
870 | { |
871 | long long i, reps; |
872 | struct percpu_buffer *buffer = (struct percpu_buffer *)arg; |
873 | |
874 | if (!opt_disable_rseq && rseq_register_current_thread()) |
875 | abort(); |
876 | |
877 | reps = opt_reps; |
878 | for (i = 0; i < reps; i++) { |
879 | struct percpu_buffer_node *node; |
880 | |
881 | node = this_cpu_buffer_pop(buffer, NULL); |
882 | if (opt_yield) |
883 | sched_yield(); /* encourage shuffling */ |
884 | if (node) { |
885 | if (!this_cpu_buffer_push(buffer, node, NULL)) { |
886 | /* Should increase buffer size. */ |
887 | abort(); |
888 | } |
889 | } |
890 | } |
891 | |
892 | printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n" , |
893 | (int) rseq_gettid(), nr_abort, signals_delivered); |
894 | if (!opt_disable_rseq && rseq_unregister_current_thread()) |
895 | abort(); |
896 | |
897 | return NULL; |
898 | } |
899 | |
900 | /* Simultaneous modification to a per-cpu buffer from many threads. */ |
901 | void test_percpu_buffer(void) |
902 | { |
903 | const int num_threads = opt_threads; |
904 | int i, j, ret; |
905 | uint64_t sum = 0, expected_sum = 0; |
906 | struct percpu_buffer buffer; |
907 | pthread_t test_threads[num_threads]; |
908 | cpu_set_t allowed_cpus; |
909 | |
910 | memset(&buffer, 0, sizeof(buffer)); |
911 | |
912 | /* Generate list entries for every usable cpu. */ |
913 | sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); |
914 | for (i = 0; i < CPU_SETSIZE; i++) { |
915 | if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) |
916 | continue; |
917 | /* Worse-case is every item in same CPU. */ |
918 | buffer.c[i].array = |
919 | malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * |
920 | BUFFER_ITEM_PER_CPU); |
921 | assert(buffer.c[i].array); |
922 | buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; |
923 | for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { |
924 | struct percpu_buffer_node *node; |
925 | |
926 | expected_sum += j; |
927 | |
928 | /* |
929 | * We could theoretically put the word-sized |
930 | * "data" directly in the buffer. However, we |
931 | * want to model objects that would not fit |
932 | * within a single word, so allocate an object |
933 | * for each node. |
934 | */ |
935 | node = malloc(sizeof(*node)); |
936 | assert(node); |
937 | node->data = j; |
938 | buffer.c[i].array[j - 1] = node; |
939 | buffer.c[i].offset++; |
940 | } |
941 | } |
942 | |
943 | for (i = 0; i < num_threads; i++) { |
944 | ret = pthread_create(&test_threads[i], NULL, |
945 | test_percpu_buffer_thread, &buffer); |
946 | if (ret) { |
947 | errno = ret; |
948 | perror("pthread_create" ); |
949 | abort(); |
950 | } |
951 | } |
952 | |
953 | for (i = 0; i < num_threads; i++) { |
954 | ret = pthread_join(test_threads[i], NULL); |
955 | if (ret) { |
956 | errno = ret; |
957 | perror("pthread_join" ); |
958 | abort(); |
959 | } |
960 | } |
961 | |
962 | for (i = 0; i < CPU_SETSIZE; i++) { |
963 | struct percpu_buffer_node *node; |
964 | |
965 | if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) |
966 | continue; |
967 | |
968 | while ((node = __percpu_buffer_pop(&buffer, i))) { |
969 | sum += node->data; |
970 | free(node); |
971 | } |
972 | free(buffer.c[i].array); |
973 | } |
974 | |
975 | /* |
976 | * All entries should now be accounted for (unless some external |
977 | * actor is interfering with our allowed affinity while this |
978 | * test is running). |
979 | */ |
980 | assert(sum == expected_sum); |
981 | } |
982 | |
983 | bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, |
984 | struct percpu_memcpy_buffer_node item, |
985 | int *_cpu) |
986 | { |
987 | bool result = false; |
988 | int cpu; |
989 | |
990 | for (;;) { |
991 | intptr_t *targetptr_final, newval_final, offset; |
992 | char *destptr, *srcptr; |
993 | size_t copylen; |
994 | int ret; |
995 | |
996 | cpu = get_current_cpu_id(); |
997 | /* Load offset with single-copy atomicity. */ |
998 | offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); |
999 | if (offset == buffer->c[cpu].buflen) |
1000 | break; |
1001 | destptr = (char *)&buffer->c[cpu].array[offset]; |
1002 | srcptr = (char *)&item; |
1003 | /* copylen must be <= 4kB. */ |
1004 | copylen = sizeof(item); |
1005 | newval_final = offset + 1; |
1006 | targetptr_final = &buffer->c[cpu].offset; |
1007 | ret = rseq_cmpeqv_trymemcpy_storev( |
1008 | rseq_mo: opt_mo, RSEQ_PERCPU, |
1009 | v: targetptr_final, expect: offset, |
1010 | dst: destptr, src: srcptr, len: copylen, |
1011 | newv: newval_final, cpu); |
1012 | if (rseq_likely(!ret)) { |
1013 | result = true; |
1014 | break; |
1015 | } |
1016 | /* Retry if comparison fails or rseq aborts. */ |
1017 | } |
1018 | if (_cpu) |
1019 | *_cpu = cpu; |
1020 | return result; |
1021 | } |
1022 | |
1023 | bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, |
1024 | struct percpu_memcpy_buffer_node *item, |
1025 | int *_cpu) |
1026 | { |
1027 | bool result = false; |
1028 | int cpu; |
1029 | |
1030 | for (;;) { |
1031 | intptr_t *targetptr_final, newval_final, offset; |
1032 | char *destptr, *srcptr; |
1033 | size_t copylen; |
1034 | int ret; |
1035 | |
1036 | cpu = get_current_cpu_id(); |
1037 | /* Load offset with single-copy atomicity. */ |
1038 | offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); |
1039 | if (offset == 0) |
1040 | break; |
1041 | destptr = (char *)item; |
1042 | srcptr = (char *)&buffer->c[cpu].array[offset - 1]; |
1043 | /* copylen must be <= 4kB. */ |
1044 | copylen = sizeof(*item); |
1045 | newval_final = offset - 1; |
1046 | targetptr_final = &buffer->c[cpu].offset; |
1047 | ret = rseq_cmpeqv_trymemcpy_storev(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
1048 | v: targetptr_final, expect: offset, dst: destptr, src: srcptr, len: copylen, |
1049 | newv: newval_final, cpu); |
1050 | if (rseq_likely(!ret)) { |
1051 | result = true; |
1052 | break; |
1053 | } |
1054 | /* Retry if comparison fails or rseq aborts. */ |
1055 | } |
1056 | if (_cpu) |
1057 | *_cpu = cpu; |
1058 | return result; |
1059 | } |
1060 | |
1061 | /* |
1062 | * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should |
1063 | * only be used on buffers that are not concurrently modified. |
1064 | */ |
1065 | bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, |
1066 | struct percpu_memcpy_buffer_node *item, |
1067 | int cpu) |
1068 | { |
1069 | intptr_t offset; |
1070 | |
1071 | offset = buffer->c[cpu].offset; |
1072 | if (offset == 0) |
1073 | return false; |
1074 | memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); |
1075 | buffer->c[cpu].offset = offset - 1; |
1076 | return true; |
1077 | } |
1078 | |
1079 | void *test_percpu_memcpy_buffer_thread(void *arg) |
1080 | { |
1081 | long long i, reps; |
1082 | struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; |
1083 | |
1084 | if (!opt_disable_rseq && rseq_register_current_thread()) |
1085 | abort(); |
1086 | |
1087 | reps = opt_reps; |
1088 | for (i = 0; i < reps; i++) { |
1089 | struct percpu_memcpy_buffer_node item; |
1090 | bool result; |
1091 | |
1092 | result = this_cpu_memcpy_buffer_pop(buffer, item: &item, NULL); |
1093 | if (opt_yield) |
1094 | sched_yield(); /* encourage shuffling */ |
1095 | if (result) { |
1096 | if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { |
1097 | /* Should increase buffer size. */ |
1098 | abort(); |
1099 | } |
1100 | } |
1101 | } |
1102 | |
1103 | printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n" , |
1104 | (int) rseq_gettid(), nr_abort, signals_delivered); |
1105 | if (!opt_disable_rseq && rseq_unregister_current_thread()) |
1106 | abort(); |
1107 | |
1108 | return NULL; |
1109 | } |
1110 | |
1111 | /* Simultaneous modification to a per-cpu buffer from many threads. */ |
1112 | void test_percpu_memcpy_buffer(void) |
1113 | { |
1114 | const int num_threads = opt_threads; |
1115 | int i, j, ret; |
1116 | uint64_t sum = 0, expected_sum = 0; |
1117 | struct percpu_memcpy_buffer buffer; |
1118 | pthread_t test_threads[num_threads]; |
1119 | cpu_set_t allowed_cpus; |
1120 | |
1121 | memset(&buffer, 0, sizeof(buffer)); |
1122 | |
1123 | /* Generate list entries for every usable cpu. */ |
1124 | sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); |
1125 | for (i = 0; i < CPU_SETSIZE; i++) { |
1126 | if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) |
1127 | continue; |
1128 | /* Worse-case is every item in same CPU. */ |
1129 | buffer.c[i].array = |
1130 | malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * |
1131 | MEMCPY_BUFFER_ITEM_PER_CPU); |
1132 | assert(buffer.c[i].array); |
1133 | buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; |
1134 | for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { |
1135 | expected_sum += 2 * j + 1; |
1136 | |
1137 | /* |
1138 | * We could theoretically put the word-sized |
1139 | * "data" directly in the buffer. However, we |
1140 | * want to model objects that would not fit |
1141 | * within a single word, so allocate an object |
1142 | * for each node. |
1143 | */ |
1144 | buffer.c[i].array[j - 1].data1 = j; |
1145 | buffer.c[i].array[j - 1].data2 = j + 1; |
1146 | buffer.c[i].offset++; |
1147 | } |
1148 | } |
1149 | |
1150 | for (i = 0; i < num_threads; i++) { |
1151 | ret = pthread_create(&test_threads[i], NULL, |
1152 | test_percpu_memcpy_buffer_thread, |
1153 | &buffer); |
1154 | if (ret) { |
1155 | errno = ret; |
1156 | perror("pthread_create" ); |
1157 | abort(); |
1158 | } |
1159 | } |
1160 | |
1161 | for (i = 0; i < num_threads; i++) { |
1162 | ret = pthread_join(test_threads[i], NULL); |
1163 | if (ret) { |
1164 | errno = ret; |
1165 | perror("pthread_join" ); |
1166 | abort(); |
1167 | } |
1168 | } |
1169 | |
1170 | for (i = 0; i < CPU_SETSIZE; i++) { |
1171 | struct percpu_memcpy_buffer_node item; |
1172 | |
1173 | if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) |
1174 | continue; |
1175 | |
1176 | while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { |
1177 | sum += item.data1; |
1178 | sum += item.data2; |
1179 | } |
1180 | free(buffer.c[i].array); |
1181 | } |
1182 | |
1183 | /* |
1184 | * All entries should now be accounted for (unless some external |
1185 | * actor is interfering with our allowed affinity while this |
1186 | * test is running). |
1187 | */ |
1188 | assert(sum == expected_sum); |
1189 | } |
1190 | |
1191 | static void test_signal_interrupt_handler(int signo) |
1192 | { |
1193 | signals_delivered++; |
1194 | } |
1195 | |
1196 | static int set_signal_handler(void) |
1197 | { |
1198 | int ret = 0; |
1199 | struct sigaction sa; |
1200 | sigset_t sigset; |
1201 | |
1202 | ret = sigemptyset(&sigset); |
1203 | if (ret < 0) { |
1204 | perror("sigemptyset" ); |
1205 | return ret; |
1206 | } |
1207 | |
1208 | sa.sa_handler = test_signal_interrupt_handler; |
1209 | sa.sa_mask = sigset; |
1210 | sa.sa_flags = 0; |
1211 | ret = sigaction(SIGUSR1, &sa, NULL); |
1212 | if (ret < 0) { |
1213 | perror("sigaction" ); |
1214 | return ret; |
1215 | } |
1216 | |
1217 | printf_verbose("Signal handler set for SIGUSR1\n" ); |
1218 | |
1219 | return ret; |
1220 | } |
1221 | |
1222 | /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ |
1223 | #ifdef TEST_MEMBARRIER |
1224 | struct test_membarrier_thread_args { |
1225 | int stop; |
1226 | intptr_t percpu_list_ptr; |
1227 | }; |
1228 | |
1229 | /* Worker threads modify data in their "active" percpu lists. */ |
1230 | void *test_membarrier_worker_thread(void *arg) |
1231 | { |
1232 | struct test_membarrier_thread_args *args = |
1233 | (struct test_membarrier_thread_args *)arg; |
1234 | const int iters = opt_reps; |
1235 | int i; |
1236 | |
1237 | if (rseq_register_current_thread()) { |
1238 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n" , |
1239 | errno, strerror(errno)); |
1240 | abort(); |
1241 | } |
1242 | |
1243 | /* Wait for initialization. */ |
1244 | while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {} |
1245 | |
1246 | for (i = 0; i < iters; ++i) { |
1247 | int ret; |
1248 | |
1249 | do { |
1250 | int cpu = get_current_cpu_id(); |
1251 | |
1252 | ret = rseq_offset_deref_addv(rseq_mo: RSEQ_MO_RELAXED, RSEQ_PERCPU, |
1253 | ptr: &args->percpu_list_ptr, |
1254 | off: sizeof(struct percpu_list_entry) * cpu, inc: 1, cpu); |
1255 | } while (rseq_unlikely(ret)); |
1256 | } |
1257 | |
1258 | if (rseq_unregister_current_thread()) { |
1259 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n" , |
1260 | errno, strerror(errno)); |
1261 | abort(); |
1262 | } |
1263 | return NULL; |
1264 | } |
1265 | |
1266 | void test_membarrier_init_percpu_list(struct percpu_list *list) |
1267 | { |
1268 | int i; |
1269 | |
1270 | memset(list, 0, sizeof(*list)); |
1271 | for (i = 0; i < CPU_SETSIZE; i++) { |
1272 | struct percpu_list_node *node; |
1273 | |
1274 | node = malloc(sizeof(*node)); |
1275 | assert(node); |
1276 | node->data = 0; |
1277 | node->next = NULL; |
1278 | list->c[i].head = node; |
1279 | } |
1280 | } |
1281 | |
1282 | void test_membarrier_free_percpu_list(struct percpu_list *list) |
1283 | { |
1284 | int i; |
1285 | |
1286 | for (i = 0; i < CPU_SETSIZE; i++) |
1287 | free(list->c[i].head); |
1288 | } |
1289 | |
1290 | /* |
1291 | * The manager thread swaps per-cpu lists that worker threads see, |
1292 | * and validates that there are no unexpected modifications. |
1293 | */ |
1294 | void *test_membarrier_manager_thread(void *arg) |
1295 | { |
1296 | struct test_membarrier_thread_args *args = |
1297 | (struct test_membarrier_thread_args *)arg; |
1298 | struct percpu_list list_a, list_b; |
1299 | intptr_t expect_a = 0, expect_b = 0; |
1300 | int cpu_a = 0, cpu_b = 0; |
1301 | |
1302 | if (rseq_register_current_thread()) { |
1303 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n" , |
1304 | errno, strerror(errno)); |
1305 | abort(); |
1306 | } |
1307 | |
1308 | /* Init lists. */ |
1309 | test_membarrier_init_percpu_list(list: &list_a); |
1310 | test_membarrier_init_percpu_list(list: &list_b); |
1311 | |
1312 | __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); |
1313 | |
1314 | while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) { |
1315 | /* list_a is "active". */ |
1316 | cpu_a = rand() % CPU_SETSIZE; |
1317 | /* |
1318 | * As list_b is "inactive", we should never see changes |
1319 | * to list_b. |
1320 | */ |
1321 | if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) { |
1322 | fprintf(stderr, "Membarrier test failed\n" ); |
1323 | abort(); |
1324 | } |
1325 | |
1326 | /* Make list_b "active". */ |
1327 | __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE); |
1328 | if (rseq_membarrier_expedited(cpu_a) && |
1329 | errno != ENXIO /* missing CPU */) { |
1330 | perror("sys_membarrier" ); |
1331 | abort(); |
1332 | } |
1333 | /* |
1334 | * Cpu A should now only modify list_b, so the values |
1335 | * in list_a should be stable. |
1336 | */ |
1337 | expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE); |
1338 | |
1339 | cpu_b = rand() % CPU_SETSIZE; |
1340 | /* |
1341 | * As list_a is "inactive", we should never see changes |
1342 | * to list_a. |
1343 | */ |
1344 | if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) { |
1345 | fprintf(stderr, "Membarrier test failed\n" ); |
1346 | abort(); |
1347 | } |
1348 | |
1349 | /* Make list_a "active". */ |
1350 | __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); |
1351 | if (rseq_membarrier_expedited(cpu_b) && |
1352 | errno != ENXIO /* missing CPU*/) { |
1353 | perror("sys_membarrier" ); |
1354 | abort(); |
1355 | } |
1356 | /* Remember a value from list_b. */ |
1357 | expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE); |
1358 | } |
1359 | |
1360 | test_membarrier_free_percpu_list(list: &list_a); |
1361 | test_membarrier_free_percpu_list(list: &list_b); |
1362 | |
1363 | if (rseq_unregister_current_thread()) { |
1364 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n" , |
1365 | errno, strerror(errno)); |
1366 | abort(); |
1367 | } |
1368 | return NULL; |
1369 | } |
1370 | |
1371 | void test_membarrier(void) |
1372 | { |
1373 | const int num_threads = opt_threads; |
1374 | struct test_membarrier_thread_args thread_args; |
1375 | pthread_t worker_threads[num_threads]; |
1376 | pthread_t manager_thread; |
1377 | int i, ret; |
1378 | |
1379 | if (sys_membarrier(cmd: MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, flags: 0, cpu_id: 0)) { |
1380 | perror("sys_membarrier" ); |
1381 | abort(); |
1382 | } |
1383 | |
1384 | thread_args.stop = 0; |
1385 | thread_args.percpu_list_ptr = 0; |
1386 | ret = pthread_create(&manager_thread, NULL, |
1387 | test_membarrier_manager_thread, &thread_args); |
1388 | if (ret) { |
1389 | errno = ret; |
1390 | perror("pthread_create" ); |
1391 | abort(); |
1392 | } |
1393 | |
1394 | for (i = 0; i < num_threads; i++) { |
1395 | ret = pthread_create(&worker_threads[i], NULL, |
1396 | test_membarrier_worker_thread, &thread_args); |
1397 | if (ret) { |
1398 | errno = ret; |
1399 | perror("pthread_create" ); |
1400 | abort(); |
1401 | } |
1402 | } |
1403 | |
1404 | |
1405 | for (i = 0; i < num_threads; i++) { |
1406 | ret = pthread_join(worker_threads[i], NULL); |
1407 | if (ret) { |
1408 | errno = ret; |
1409 | perror("pthread_join" ); |
1410 | abort(); |
1411 | } |
1412 | } |
1413 | |
1414 | __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE); |
1415 | ret = pthread_join(manager_thread, NULL); |
1416 | if (ret) { |
1417 | errno = ret; |
1418 | perror("pthread_join" ); |
1419 | abort(); |
1420 | } |
1421 | } |
1422 | #else /* TEST_MEMBARRIER */ |
1423 | void test_membarrier(void) |
1424 | { |
1425 | fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " |
1426 | "Skipping membarrier test.\n" ); |
1427 | } |
1428 | #endif |
1429 | |
1430 | static void show_usage(int argc, char **argv) |
1431 | { |
1432 | printf("Usage : %s <OPTIONS>\n" , |
1433 | argv[0]); |
1434 | printf("OPTIONS:\n" ); |
1435 | printf(" [-1 loops] Number of loops for delay injection 1\n" ); |
1436 | printf(" [-2 loops] Number of loops for delay injection 2\n" ); |
1437 | printf(" [-3 loops] Number of loops for delay injection 3\n" ); |
1438 | printf(" [-4 loops] Number of loops for delay injection 4\n" ); |
1439 | printf(" [-5 loops] Number of loops for delay injection 5\n" ); |
1440 | printf(" [-6 loops] Number of loops for delay injection 6\n" ); |
1441 | printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n" ); |
1442 | printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n" ); |
1443 | printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n" ); |
1444 | printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n" ); |
1445 | printf(" [-y] Yield\n" ); |
1446 | printf(" [-k] Kill thread with signal\n" ); |
1447 | printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n" ); |
1448 | printf(" [-t N] Number of threads (default 200)\n" ); |
1449 | printf(" [-r N] Number of repetitions per thread (default 5000)\n" ); |
1450 | printf(" [-d] Disable rseq system call (no initialization)\n" ); |
1451 | printf(" [-D M] Disable rseq for each M threads\n" ); |
1452 | printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n" ); |
1453 | printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n" ); |
1454 | printf(" [-v] Verbose output.\n" ); |
1455 | printf(" [-h] Show this help.\n" ); |
1456 | printf("\n" ); |
1457 | } |
1458 | |
1459 | int main(int argc, char **argv) |
1460 | { |
1461 | int i; |
1462 | |
1463 | for (i = 1; i < argc; i++) { |
1464 | if (argv[i][0] != '-') |
1465 | continue; |
1466 | switch (argv[i][1]) { |
1467 | case '1': |
1468 | case '2': |
1469 | case '3': |
1470 | case '4': |
1471 | case '5': |
1472 | case '6': |
1473 | case '7': |
1474 | case '8': |
1475 | case '9': |
1476 | if (argc < i + 2) { |
1477 | show_usage(argc, argv); |
1478 | goto error; |
1479 | } |
1480 | loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); |
1481 | i++; |
1482 | break; |
1483 | case 'm': |
1484 | if (argc < i + 2) { |
1485 | show_usage(argc, argv); |
1486 | goto error; |
1487 | } |
1488 | opt_modulo = atol(argv[i + 1]); |
1489 | if (opt_modulo < 0) { |
1490 | show_usage(argc, argv); |
1491 | goto error; |
1492 | } |
1493 | i++; |
1494 | break; |
1495 | case 's': |
1496 | if (argc < i + 2) { |
1497 | show_usage(argc, argv); |
1498 | goto error; |
1499 | } |
1500 | opt_sleep = atol(argv[i + 1]); |
1501 | if (opt_sleep < 0) { |
1502 | show_usage(argc, argv); |
1503 | goto error; |
1504 | } |
1505 | i++; |
1506 | break; |
1507 | case 'y': |
1508 | opt_yield = 1; |
1509 | break; |
1510 | case 'k': |
1511 | opt_signal = 1; |
1512 | break; |
1513 | case 'd': |
1514 | opt_disable_rseq = 1; |
1515 | break; |
1516 | case 'D': |
1517 | if (argc < i + 2) { |
1518 | show_usage(argc, argv); |
1519 | goto error; |
1520 | } |
1521 | opt_disable_mod = atol(argv[i + 1]); |
1522 | if (opt_disable_mod < 0) { |
1523 | show_usage(argc, argv); |
1524 | goto error; |
1525 | } |
1526 | i++; |
1527 | break; |
1528 | case 't': |
1529 | if (argc < i + 2) { |
1530 | show_usage(argc, argv); |
1531 | goto error; |
1532 | } |
1533 | opt_threads = atol(argv[i + 1]); |
1534 | if (opt_threads < 0) { |
1535 | show_usage(argc, argv); |
1536 | goto error; |
1537 | } |
1538 | i++; |
1539 | break; |
1540 | case 'r': |
1541 | if (argc < i + 2) { |
1542 | show_usage(argc, argv); |
1543 | goto error; |
1544 | } |
1545 | opt_reps = atoll(argv[i + 1]); |
1546 | if (opt_reps < 0) { |
1547 | show_usage(argc, argv); |
1548 | goto error; |
1549 | } |
1550 | i++; |
1551 | break; |
1552 | case 'h': |
1553 | show_usage(argc, argv); |
1554 | goto end; |
1555 | case 'T': |
1556 | if (argc < i + 2) { |
1557 | show_usage(argc, argv); |
1558 | goto error; |
1559 | } |
1560 | opt_test = *argv[i + 1]; |
1561 | switch (opt_test) { |
1562 | case 's': |
1563 | case 'l': |
1564 | case 'i': |
1565 | case 'b': |
1566 | case 'm': |
1567 | case 'r': |
1568 | break; |
1569 | default: |
1570 | show_usage(argc, argv); |
1571 | goto error; |
1572 | } |
1573 | i++; |
1574 | break; |
1575 | case 'v': |
1576 | verbose = 1; |
1577 | break; |
1578 | case 'M': |
1579 | opt_mo = RSEQ_MO_RELEASE; |
1580 | break; |
1581 | default: |
1582 | show_usage(argc, argv); |
1583 | goto error; |
1584 | } |
1585 | } |
1586 | |
1587 | loop_cnt_1 = loop_cnt[1]; |
1588 | loop_cnt_2 = loop_cnt[2]; |
1589 | loop_cnt_3 = loop_cnt[3]; |
1590 | loop_cnt_4 = loop_cnt[4]; |
1591 | loop_cnt_5 = loop_cnt[5]; |
1592 | loop_cnt_6 = loop_cnt[6]; |
1593 | |
1594 | if (set_signal_handler()) |
1595 | goto error; |
1596 | |
1597 | if (!opt_disable_rseq && rseq_register_current_thread()) |
1598 | goto error; |
1599 | if (!opt_disable_rseq && !rseq_validate_cpu_id()) { |
1600 | fprintf(stderr, "Error: cpu id getter unavailable\n" ); |
1601 | goto error; |
1602 | } |
1603 | switch (opt_test) { |
1604 | case 's': |
1605 | printf_verbose("spinlock\n" ); |
1606 | test_percpu_spinlock(); |
1607 | break; |
1608 | case 'l': |
1609 | printf_verbose("linked list\n" ); |
1610 | test_percpu_list(); |
1611 | break; |
1612 | case 'b': |
1613 | printf_verbose("buffer\n" ); |
1614 | test_percpu_buffer(); |
1615 | break; |
1616 | case 'm': |
1617 | printf_verbose("memcpy buffer\n" ); |
1618 | test_percpu_memcpy_buffer(); |
1619 | break; |
1620 | case 'i': |
1621 | printf_verbose("counter increment\n" ); |
1622 | test_percpu_inc(); |
1623 | break; |
1624 | case 'r': |
1625 | printf_verbose("membarrier\n" ); |
1626 | test_membarrier(); |
1627 | break; |
1628 | } |
1629 | if (!opt_disable_rseq && rseq_unregister_current_thread()) |
1630 | abort(); |
1631 | end: |
1632 | return 0; |
1633 | |
1634 | error: |
1635 | return -1; |
1636 | } |
1637 | |