1 | #include <stdio.h> |
2 | #include <vector> |
3 | #include <pthread.h> |
4 | #include <malloc.h> |
5 | #include <algorithm> |
6 | |
7 | using namespace std; |
8 | |
9 | const size_t kNumThreds = 16; |
10 | const size_t kNumIters = 1 << 23; |
11 | |
12 | inline void break_optimization(void *arg) { |
13 | __asm__ __volatile__("" : : "r" (arg) : "memory" ); |
14 | } |
15 | |
16 | __attribute__((noinline)) |
17 | static void *MallocThread(void *t) { |
18 | size_t total_malloced = 0, total_freed = 0; |
19 | size_t max_in_use = 0; |
20 | size_t tid = reinterpret_cast<size_t>(t); |
21 | vector<pair<char *, size_t> > allocated; |
22 | allocated.reserve(kNumIters); |
23 | for (size_t i = 1; i < kNumIters; i++) { |
24 | if ((i % (kNumIters / 4)) == 0 && tid == 0) |
25 | fprintf(stderr, format: " T[%ld] iter %ld\n" , tid, i); |
26 | bool allocate = (i % 5) <= 2; // 60% malloc, 40% free |
27 | if (i > kNumIters / 4) |
28 | allocate = i % 2; // then switch to 50% malloc, 50% free |
29 | if (allocate) { |
30 | size_t size = 1 + (i % 200); |
31 | if ((i % 10001) == 0) |
32 | size *= 4096; |
33 | total_malloced += size; |
34 | char *x = new char[size]; |
35 | x[0] = x[size - 1] = x[size / 2] = 0; |
36 | allocated.push_back(make_pair(x, size)); |
37 | max_in_use = max(max_in_use, total_malloced - total_freed); |
38 | } else { |
39 | if (allocated.empty()) continue; |
40 | size_t slot = i % allocated.size(); |
41 | char *p = allocated[slot].first; |
42 | p[0] = 0; // emulate last user touch of the block |
43 | size_t size = allocated[slot].second; |
44 | total_freed += size; |
45 | swap(allocated[slot], allocated.back()); |
46 | allocated.pop_back(); |
47 | delete [] p; |
48 | } |
49 | } |
50 | if (tid == 0) |
51 | fprintf(stderr, format: " T[%ld] total_malloced: %ldM in use %ldM max %ldM\n" , |
52 | tid, total_malloced >> 20, (total_malloced - total_freed) >> 20, |
53 | max_in_use >> 20); |
54 | for (size_t i = 0; i < allocated.size(); i++) |
55 | delete [] allocated[i].first; |
56 | return 0; |
57 | } |
58 | |
59 | template <int depth> |
60 | struct DeepStack { |
61 | __attribute__((noinline)) |
62 | static void *run(void *t) { |
63 | break_optimization(arg: 0); |
64 | DeepStack<depth - 1>::run(t); |
65 | break_optimization(arg: 0); |
66 | return 0; |
67 | } |
68 | }; |
69 | |
70 | template<> |
71 | struct DeepStack<0> { |
72 | static void *run(void *t) { |
73 | MallocThread(t); |
74 | return 0; |
75 | } |
76 | }; |
77 | |
78 | // Build with -Dstandalone_malloc_test=main to make it a separate program. |
79 | int standalone_malloc_test() { |
80 | pthread_t t[kNumThreds]; |
81 | for (size_t i = 0; i < kNumThreds; i++) |
82 | pthread_create(newthread: &t[i], attr: 0, start_routine: DeepStack<200>::run, arg: reinterpret_cast<void *>(i)); |
83 | for (size_t i = 0; i < kNumThreds; i++) |
84 | pthread_join(th: t[i], thread_return: 0); |
85 | malloc_stats(); |
86 | return 0; |
87 | } |
88 | |