1 | // Mini-benchmark for creating a lot of threads. |
2 | // |
3 | // Some facts: |
4 | // a) clang -O1 takes <15ms to start N=500 threads, |
5 | // consuming ~4MB more RAM than N=1. |
6 | // b) clang -O1 -ftsan takes ~26s to start N=500 threads, |
7 | // eats 5GB more RAM than N=1 (which is somewhat expected but still a lot) |
8 | // but then it consumes ~4GB of extra memory when the threads shut down! |
9 | // (definitely not in the barrier_wait interceptor) |
10 | // Also, it takes 26s to run with N=500 vs just 1.1s to run with N=1. |
11 | #include <assert.h> |
12 | #include <pthread.h> |
13 | #include <stdio.h> |
14 | #include <stdlib.h> |
15 | #include <unistd.h> |
16 | |
17 | pthread_barrier_t all_threads_ready; |
18 | |
19 | void* Thread(void *unused) { |
20 | pthread_barrier_wait(barrier: &all_threads_ready); |
21 | return 0; |
22 | } |
23 | |
24 | int main(int argc, char **argv) { |
25 | int n_threads; |
26 | if (argc == 1) { |
27 | n_threads = 100; |
28 | } else if (argc == 2) { |
29 | n_threads = atoi(nptr: argv[1]); |
30 | } else { |
31 | printf(format: "Usage: %s n_threads\n" , argv[0]); |
32 | return 1; |
33 | } |
34 | printf(format: "%s: n_threads=%d\n" , __FILE__, n_threads); |
35 | |
36 | pthread_barrier_init(barrier: &all_threads_ready, NULL, count: n_threads + 1); |
37 | |
38 | pthread_t *t = new pthread_t[n_threads]; |
39 | for (int i = 0; i < n_threads; i++) { |
40 | int status = pthread_create(newthread: &t[i], attr: 0, start_routine: Thread, arg: (void*)i); |
41 | assert(status == 0); |
42 | } |
43 | // sleep(5); // FIXME: simplify measuring the memory usage. |
44 | pthread_barrier_wait(barrier: &all_threads_ready); |
45 | for (int i = 0; i < n_threads; i++) { |
46 | pthread_join(th: t[i], thread_return: 0); |
47 | } |
48 | // sleep(5); // FIXME: simplify measuring the memory usage. |
49 | delete [] t; |
50 | |
51 | return 0; |
52 | } |
53 | |