| 1 | #include <stdio.h> |
| 2 | #include <vector> |
| 3 | #include <pthread.h> |
| 4 | #include <malloc.h> |
| 5 | #include <algorithm> |
| 6 | |
| 7 | using namespace std; |
| 8 | |
| 9 | const size_t kNumThreds = 16; |
| 10 | const size_t kNumIters = 1 << 23; |
| 11 | |
| 12 | inline void break_optimization(void *arg) { |
| 13 | __asm__ __volatile__("" : : "r" (arg) : "memory" ); |
| 14 | } |
| 15 | |
| 16 | __attribute__((noinline)) |
| 17 | static void *MallocThread(void *t) { |
| 18 | size_t total_malloced = 0, total_freed = 0; |
| 19 | size_t max_in_use = 0; |
| 20 | size_t tid = reinterpret_cast<size_t>(t); |
| 21 | vector<pair<char *, size_t> > allocated; |
| 22 | allocated.reserve(kNumIters); |
| 23 | for (size_t i = 1; i < kNumIters; i++) { |
| 24 | if ((i % (kNumIters / 4)) == 0 && tid == 0) |
| 25 | fprintf(stderr, format: " T[%ld] iter %ld\n" , tid, i); |
| 26 | bool allocate = (i % 5) <= 2; // 60% malloc, 40% free |
| 27 | if (i > kNumIters / 4) |
| 28 | allocate = i % 2; // then switch to 50% malloc, 50% free |
| 29 | if (allocate) { |
| 30 | size_t size = 1 + (i % 200); |
| 31 | if ((i % 10001) == 0) |
| 32 | size *= 4096; |
| 33 | total_malloced += size; |
| 34 | char *x = new char[size]; |
| 35 | x[0] = x[size - 1] = x[size / 2] = 0; |
| 36 | allocated.push_back(make_pair(x, size)); |
| 37 | max_in_use = max(max_in_use, total_malloced - total_freed); |
| 38 | } else { |
| 39 | if (allocated.empty()) continue; |
| 40 | size_t slot = i % allocated.size(); |
| 41 | char *p = allocated[slot].first; |
| 42 | p[0] = 0; // emulate last user touch of the block |
| 43 | size_t size = allocated[slot].second; |
| 44 | total_freed += size; |
| 45 | swap(allocated[slot], allocated.back()); |
| 46 | allocated.pop_back(); |
| 47 | delete [] p; |
| 48 | } |
| 49 | } |
| 50 | if (tid == 0) |
| 51 | fprintf(stderr, format: " T[%ld] total_malloced: %ldM in use %ldM max %ldM\n" , |
| 52 | tid, total_malloced >> 20, (total_malloced - total_freed) >> 20, |
| 53 | max_in_use >> 20); |
| 54 | for (size_t i = 0; i < allocated.size(); i++) |
| 55 | delete [] allocated[i].first; |
| 56 | return 0; |
| 57 | } |
| 58 | |
| 59 | template <int depth> |
| 60 | struct DeepStack { |
| 61 | __attribute__((noinline)) |
| 62 | static void *run(void *t) { |
| 63 | break_optimization(arg: 0); |
| 64 | DeepStack<depth - 1>::run(t); |
| 65 | break_optimization(arg: 0); |
| 66 | return 0; |
| 67 | } |
| 68 | }; |
| 69 | |
| 70 | template<> |
| 71 | struct DeepStack<0> { |
| 72 | static void *run(void *t) { |
| 73 | MallocThread(t); |
| 74 | return 0; |
| 75 | } |
| 76 | }; |
| 77 | |
| 78 | // Build with -Dstandalone_malloc_test=main to make it a separate program. |
| 79 | int standalone_malloc_test() { |
| 80 | pthread_t t[kNumThreds]; |
| 81 | for (size_t i = 0; i < kNumThreds; i++) |
| 82 | pthread_create(newthread: &t[i], attr: 0, start_routine: DeepStack<200>::run, arg: reinterpret_cast<void *>(i)); |
| 83 | for (size_t i = 0; i < kNumThreds; i++) |
| 84 | pthread_join(th: t[i], thread_return: 0); |
| 85 | malloc_stats(); |
| 86 | return 0; |
| 87 | } |
| 88 | |