1 | //===-- memprof_allocator.cpp --------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of MemProfiler, a memory profiler. |
10 | // |
11 | // Implementation of MemProf's memory allocator, which uses the allocator |
12 | // from sanitizer_common. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "memprof_allocator.h" |
17 | #include "memprof_mapping.h" |
18 | #include "memprof_mibmap.h" |
19 | #include "memprof_rawprofile.h" |
20 | #include "memprof_stack.h" |
21 | #include "memprof_thread.h" |
22 | #include "profile/MemProfData.inc" |
23 | #include "sanitizer_common/sanitizer_allocator_checks.h" |
24 | #include "sanitizer_common/sanitizer_allocator_interface.h" |
25 | #include "sanitizer_common/sanitizer_allocator_report.h" |
26 | #include "sanitizer_common/sanitizer_array_ref.h" |
27 | #include "sanitizer_common/sanitizer_common.h" |
28 | #include "sanitizer_common/sanitizer_errno.h" |
29 | #include "sanitizer_common/sanitizer_file.h" |
30 | #include "sanitizer_common/sanitizer_flags.h" |
31 | #include "sanitizer_common/sanitizer_internal_defs.h" |
32 | #include "sanitizer_common/sanitizer_stackdepot.h" |
33 | |
34 | #include <sched.h> |
35 | #include <time.h> |
36 | |
37 | namespace __memprof { |
38 | namespace { |
39 | using ::llvm::memprof::MemInfoBlock; |
40 | |
41 | void Print(const MemInfoBlock &M, const u64 id, bool print_terse) { |
42 | u64 p; |
43 | |
44 | if (print_terse) { |
45 | p = M.TotalSize * 100 / M.AllocCount; |
46 | Printf(format: "MIB:%llu/%u/%llu.%02llu/%u/%u/" , id, M.AllocCount, p / 100, p % 100, |
47 | M.MinSize, M.MaxSize); |
48 | p = M.TotalAccessCount * 100 / M.AllocCount; |
49 | Printf(format: "%llu.%02llu/%llu/%llu/" , p / 100, p % 100, M.MinAccessCount, |
50 | M.MaxAccessCount); |
51 | p = M.TotalLifetime * 100 / M.AllocCount; |
52 | Printf(format: "%llu.%02llu/%u/%u/" , p / 100, p % 100, M.MinLifetime, |
53 | M.MaxLifetime); |
54 | Printf(format: "%u/%u/%u/%u\n" , M.NumMigratedCpu, M.NumLifetimeOverlaps, |
55 | M.NumSameAllocCpu, M.NumSameDeallocCpu); |
56 | } else { |
57 | p = M.TotalSize * 100 / M.AllocCount; |
58 | Printf(format: "Memory allocation stack id = %llu\n" , id); |
59 | Printf(format: "\talloc_count %u, size (ave/min/max) %llu.%02llu / %u / %u\n" , |
60 | M.AllocCount, p / 100, p % 100, M.MinSize, M.MaxSize); |
61 | p = M.TotalAccessCount * 100 / M.AllocCount; |
62 | Printf(format: "\taccess_count (ave/min/max): %llu.%02llu / %llu / %llu\n" , p / 100, |
63 | p % 100, M.MinAccessCount, M.MaxAccessCount); |
64 | p = M.TotalLifetime * 100 / M.AllocCount; |
65 | Printf(format: "\tlifetime (ave/min/max): %llu.%02llu / %u / %u\n" , p / 100, |
66 | p % 100, M.MinLifetime, M.MaxLifetime); |
67 | Printf(format: "\tnum migrated: %u, num lifetime overlaps: %u, num same alloc " |
68 | "cpu: %u, num same dealloc_cpu: %u\n" , |
69 | M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu, |
70 | M.NumSameDeallocCpu); |
71 | } |
72 | } |
73 | } // namespace |
74 | |
75 | static int GetCpuId(void) { |
76 | // _memprof_preinit is called via the preinit_array, which subsequently calls |
77 | // malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu |
78 | // will seg fault as the address of __vdso_getcpu will be null. |
79 | if (!memprof_inited) |
80 | return -1; |
81 | return sched_getcpu(); |
82 | } |
83 | |
84 | // Compute the timestamp in ms. |
85 | static int GetTimestamp(void) { |
86 | // timespec_get will segfault if called from dl_init |
87 | if (!memprof_timestamp_inited) { |
88 | // By returning 0, this will be effectively treated as being |
89 | // timestamped at memprof init time (when memprof_init_timestamp_s |
90 | // is initialized). |
91 | return 0; |
92 | } |
93 | timespec ts; |
94 | clock_gettime(CLOCK_REALTIME, tp: &ts); |
95 | return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000; |
96 | } |
97 | |
98 | static MemprofAllocator &get_allocator(); |
99 | |
100 | // The memory chunk allocated from the underlying allocator looks like this: |
101 | // H H U U U U U U |
102 | // H -- ChunkHeader (32 bytes) |
103 | // U -- user memory. |
104 | |
105 | // If there is left padding before the ChunkHeader (due to use of memalign), |
106 | // we store a magic value in the first uptr word of the memory block and |
107 | // store the address of ChunkHeader in the next uptr. |
108 | // M B L L L L L L L L L H H U U U U U U |
109 | // | ^ |
110 | // ---------------------| |
111 | // M -- magic value kAllocBegMagic |
112 | // B -- address of ChunkHeader pointing to the first 'H' |
113 | |
114 | constexpr uptr kMaxAllowedMallocBits = 40; |
115 | |
116 | // Should be no more than 32-bytes |
117 | struct { |
118 | // 1-st 4 bytes. |
119 | u32 ; |
120 | // 2-nd 4 bytes |
121 | u32 ; |
122 | // 3-rd 4 bytes |
123 | u32 ; |
124 | // 4-th 4 bytes |
125 | // Note only 1 bit is needed for this flag if we need space in the future for |
126 | // more fields. |
127 | u32 ; |
128 | // 5-th and 6-th 4 bytes |
129 | // The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this |
130 | // could be shrunk to kMaxAllowedMallocBits if we need space in the future for |
131 | // more fields. |
132 | atomic_uint64_t ; |
133 | // 23 bits available |
134 | // 7-th and 8-th 4 bytes |
135 | u64 ; // TODO: hash of type name |
136 | }; |
137 | |
138 | static const uptr = sizeof(ChunkHeader); |
139 | COMPILER_CHECK(kChunkHeaderSize == 32); |
140 | |
141 | struct MemprofChunk : ChunkHeader { |
142 | uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; } |
143 | uptr UsedSize() { |
144 | return atomic_load(a: &user_requested_size, mo: memory_order_relaxed); |
145 | } |
146 | void *AllocBeg() { |
147 | if (from_memalign) |
148 | return get_allocator().GetBlockBegin(p: reinterpret_cast<void *>(this)); |
149 | return reinterpret_cast<void *>(this); |
150 | } |
151 | }; |
152 | |
153 | class { |
154 | static constexpr uptr = |
155 | FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL); |
156 | atomic_uintptr_t ; |
157 | MemprofChunk *; |
158 | |
159 | public: |
160 | MemprofChunk *() const { |
161 | return atomic_load(a: &magic, mo: memory_order_acquire) == kAllocBegMagic |
162 | ? chunk_header |
163 | : nullptr; |
164 | } |
165 | |
166 | void (MemprofChunk *p) { |
167 | if (p) { |
168 | chunk_header = p; |
169 | atomic_store(a: &magic, v: kAllocBegMagic, mo: memory_order_release); |
170 | return; |
171 | } |
172 | |
173 | uptr old = kAllocBegMagic; |
174 | if (!atomic_compare_exchange_strong(a: &magic, cmp: &old, xchg: 0, |
175 | mo: memory_order_release)) { |
176 | CHECK_EQ(old, kAllocBegMagic); |
177 | } |
178 | } |
179 | }; |
180 | |
181 | void FlushUnneededMemProfShadowMemory(uptr p, uptr size) { |
182 | // Since memprof's mapping is compacting, the shadow chunk may be |
183 | // not page-aligned, so we only flush the page-aligned portion. |
184 | ReleaseMemoryPagesToOS(beg: MemToShadow(p), end: MemToShadow(p: p + size)); |
185 | } |
186 | |
187 | void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const { |
188 | // Statistics. |
189 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
190 | thread_stats.mmaps++; |
191 | thread_stats.mmaped += size; |
192 | } |
193 | |
194 | void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const { |
195 | // We are about to unmap a chunk of user memory. |
196 | // Mark the corresponding shadow memory as not needed. |
197 | FlushUnneededMemProfShadowMemory(p, size); |
198 | // Statistics. |
199 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
200 | thread_stats.munmaps++; |
201 | thread_stats.munmaped += size; |
202 | } |
203 | |
204 | AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) { |
205 | CHECK(ms); |
206 | return &ms->allocator_cache; |
207 | } |
208 | |
209 | // Accumulates the access count from the shadow for the given pointer and size. |
210 | u64 GetShadowCount(uptr p, u32 size) { |
211 | u64 *shadow = (u64 *)MEM_TO_SHADOW(p); |
212 | u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size); |
213 | u64 count = 0; |
214 | for (; shadow <= shadow_end; shadow++) |
215 | count += *shadow; |
216 | return count; |
217 | } |
218 | |
219 | // Clears the shadow counters (when memory is allocated). |
220 | void ClearShadow(uptr addr, uptr size) { |
221 | CHECK(AddrIsAlignedByGranularity(addr)); |
222 | CHECK(AddrIsInMem(addr)); |
223 | CHECK(AddrIsAlignedByGranularity(addr + size)); |
224 | CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY)); |
225 | CHECK(REAL(memset)); |
226 | uptr shadow_beg = MEM_TO_SHADOW(addr); |
227 | uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1; |
228 | if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) { |
229 | REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); |
230 | } else { |
231 | uptr page_size = GetPageSizeCached(); |
232 | uptr page_beg = RoundUpTo(size: shadow_beg, boundary: page_size); |
233 | uptr page_end = RoundDownTo(x: shadow_end, boundary: page_size); |
234 | |
235 | if (page_beg >= page_end) { |
236 | REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg); |
237 | } else { |
238 | if (page_beg != shadow_beg) { |
239 | REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg); |
240 | } |
241 | if (page_end != shadow_end) { |
242 | REAL(memset)((void *)page_end, 0, shadow_end - page_end); |
243 | } |
244 | ReserveShadowMemoryRange(beg: page_beg, end: page_end - 1, name: nullptr); |
245 | } |
246 | } |
247 | } |
248 | |
249 | struct Allocator { |
250 | static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits; |
251 | |
252 | MemprofAllocator allocator; |
253 | StaticSpinMutex fallback_mutex; |
254 | AllocatorCache fallback_allocator_cache; |
255 | |
256 | uptr max_user_defined_malloc_size; |
257 | |
258 | // Holds the mapping of stack ids to MemInfoBlocks. |
259 | MIBMapTy MIBMap; |
260 | |
261 | atomic_uint8_t destructing; |
262 | atomic_uint8_t constructed; |
263 | bool print_text; |
264 | |
265 | // ------------------- Initialization ------------------------ |
266 | explicit Allocator(LinkerInitialized) : print_text(flags()->print_text) { |
267 | atomic_store_relaxed(a: &destructing, v: 0); |
268 | atomic_store_relaxed(a: &constructed, v: 1); |
269 | } |
270 | |
271 | ~Allocator() { |
272 | atomic_store_relaxed(a: &destructing, v: 1); |
273 | FinishAndWrite(); |
274 | } |
275 | |
276 | static void PrintCallback(const uptr Key, LockedMemInfoBlock *const &Value, |
277 | void *Arg) { |
278 | SpinMutexLock l(&Value->mutex); |
279 | Print(M: Value->mib, id: Key, print_terse: bool(Arg)); |
280 | } |
281 | |
282 | void FinishAndWrite() { |
283 | if (print_text && common_flags()->print_module_map) |
284 | DumpProcessMap(); |
285 | |
286 | allocator.ForceLock(); |
287 | |
288 | InsertLiveBlocks(); |
289 | if (print_text) { |
290 | if (!flags()->print_terse) |
291 | Printf(format: "Recorded MIBs (incl. live on exit):\n" ); |
292 | MIBMap.ForEach(cb: PrintCallback, |
293 | arg: reinterpret_cast<void *>(flags()->print_terse)); |
294 | StackDepotPrintAll(); |
295 | } else { |
296 | // Serialize the contents to a raw profile. Format documented in |
297 | // memprof_rawprofile.h. |
298 | char *Buffer = nullptr; |
299 | |
300 | __sanitizer::ListOfModules List; |
301 | List.init(); |
302 | ArrayRef<LoadedModule> Modules(List.begin(), List.end()); |
303 | u64 BytesSerialized = SerializeToRawProfile(BlockCache&: MIBMap, Modules, Buffer); |
304 | CHECK(Buffer && BytesSerialized && "could not serialize to buffer" ); |
305 | report_file.Write(buffer: Buffer, length: BytesSerialized); |
306 | } |
307 | |
308 | allocator.ForceUnlock(); |
309 | } |
310 | |
311 | // Inserts any blocks which have been allocated but not yet deallocated. |
312 | void InsertLiveBlocks() { |
313 | allocator.ForEachChunk( |
314 | callback: [](uptr chunk, void *alloc) { |
315 | u64 user_requested_size; |
316 | Allocator *A = (Allocator *)alloc; |
317 | MemprofChunk *m = |
318 | A->GetMemprofChunk(alloc_beg: (void *)chunk, user_requested_size); |
319 | if (!m) |
320 | return; |
321 | uptr user_beg = ((uptr)m) + kChunkHeaderSize; |
322 | u64 c = GetShadowCount(p: user_beg, size: user_requested_size); |
323 | long curtime = GetTimestamp(); |
324 | MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, |
325 | m->cpu_id, GetCpuId()); |
326 | InsertOrMerge(Id: m->alloc_context_id, Block: newMIB, Map&: A->MIBMap); |
327 | }, |
328 | arg: this); |
329 | } |
330 | |
331 | void InitLinkerInitialized() { |
332 | SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null); |
333 | allocator.InitLinkerInitialized( |
334 | release_to_os_interval_ms: common_flags()->allocator_release_to_os_interval_ms); |
335 | max_user_defined_malloc_size = common_flags()->max_allocation_size_mb |
336 | ? common_flags()->max_allocation_size_mb |
337 | << 20 |
338 | : kMaxAllowedMallocSize; |
339 | } |
340 | |
341 | // -------------------- Allocation/Deallocation routines --------------- |
342 | void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack, |
343 | AllocType alloc_type) { |
344 | if (UNLIKELY(!memprof_inited)) |
345 | MemprofInitFromRtl(); |
346 | if (UNLIKELY(IsRssLimitExceeded())) { |
347 | if (AllocatorMayReturnNull()) |
348 | return nullptr; |
349 | ReportRssLimitExceeded(stack); |
350 | } |
351 | CHECK(stack); |
352 | const uptr min_alignment = MEMPROF_ALIGNMENT; |
353 | if (alignment < min_alignment) |
354 | alignment = min_alignment; |
355 | if (size == 0) { |
356 | // We'd be happy to avoid allocating memory for zero-size requests, but |
357 | // some programs/tests depend on this behavior and assume that malloc |
358 | // would not return NULL even for zero-size allocations. Moreover, it |
359 | // looks like operator new should never return NULL, and results of |
360 | // consecutive "new" calls must be different even if the allocated size |
361 | // is zero. |
362 | size = 1; |
363 | } |
364 | CHECK(IsPowerOfTwo(alignment)); |
365 | uptr rounded_size = RoundUpTo(size, boundary: alignment); |
366 | uptr needed_size = rounded_size + kChunkHeaderSize; |
367 | if (alignment > min_alignment) |
368 | needed_size += alignment; |
369 | CHECK(IsAligned(needed_size, min_alignment)); |
370 | if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize || |
371 | size > max_user_defined_malloc_size) { |
372 | if (AllocatorMayReturnNull()) { |
373 | Report(format: "WARNING: MemProfiler failed to allocate 0x%zx bytes\n" , size); |
374 | return nullptr; |
375 | } |
376 | uptr malloc_limit = |
377 | Min(a: kMaxAllowedMallocSize, b: max_user_defined_malloc_size); |
378 | ReportAllocationSizeTooBig(user_size: size, max_size: malloc_limit, stack); |
379 | } |
380 | |
381 | MemprofThread *t = GetCurrentThread(); |
382 | void *allocated; |
383 | if (t) { |
384 | AllocatorCache *cache = GetAllocatorCache(ms: &t->malloc_storage()); |
385 | allocated = allocator.Allocate(cache, size: needed_size, alignment: 8); |
386 | } else { |
387 | SpinMutexLock l(&fallback_mutex); |
388 | AllocatorCache *cache = &fallback_allocator_cache; |
389 | allocated = allocator.Allocate(cache, size: needed_size, alignment: 8); |
390 | } |
391 | if (UNLIKELY(!allocated)) { |
392 | SetAllocatorOutOfMemory(); |
393 | if (AllocatorMayReturnNull()) |
394 | return nullptr; |
395 | ReportOutOfMemory(requested_size: size, stack); |
396 | } |
397 | |
398 | uptr alloc_beg = reinterpret_cast<uptr>(allocated); |
399 | uptr alloc_end = alloc_beg + needed_size; |
400 | uptr = alloc_beg + kChunkHeaderSize; |
401 | uptr user_beg = beg_plus_header; |
402 | if (!IsAligned(a: user_beg, alignment)) |
403 | user_beg = RoundUpTo(size: user_beg, boundary: alignment); |
404 | uptr user_end = user_beg + size; |
405 | CHECK_LE(user_end, alloc_end); |
406 | uptr chunk_beg = user_beg - kChunkHeaderSize; |
407 | MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); |
408 | m->from_memalign = alloc_beg != chunk_beg; |
409 | CHECK(size); |
410 | |
411 | m->cpu_id = GetCpuId(); |
412 | m->timestamp_ms = GetTimestamp(); |
413 | m->alloc_context_id = StackDepotPut(stack: *stack); |
414 | |
415 | uptr size_rounded_down_to_granularity = |
416 | RoundDownTo(x: size, SHADOW_GRANULARITY); |
417 | if (size_rounded_down_to_granularity) |
418 | ClearShadow(addr: user_beg, size: size_rounded_down_to_granularity); |
419 | |
420 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
421 | thread_stats.mallocs++; |
422 | thread_stats.malloced += size; |
423 | thread_stats.malloced_overhead += needed_size - size; |
424 | if (needed_size > SizeClassMap::kMaxSize) |
425 | thread_stats.malloc_large++; |
426 | else |
427 | thread_stats.malloced_by_size[SizeClassMap::ClassID(size: needed_size)]++; |
428 | |
429 | void *res = reinterpret_cast<void *>(user_beg); |
430 | atomic_store(a: &m->user_requested_size, v: size, mo: memory_order_release); |
431 | if (alloc_beg != chunk_beg) { |
432 | CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg); |
433 | reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m); |
434 | } |
435 | RunMallocHooks(ptr: res, size); |
436 | return res; |
437 | } |
438 | |
439 | void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment, |
440 | BufferedStackTrace *stack, AllocType alloc_type) { |
441 | uptr p = reinterpret_cast<uptr>(ptr); |
442 | if (p == 0) |
443 | return; |
444 | |
445 | RunFreeHooks(ptr); |
446 | |
447 | uptr chunk_beg = p - kChunkHeaderSize; |
448 | MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); |
449 | |
450 | u64 user_requested_size = |
451 | atomic_exchange(a: &m->user_requested_size, v: 0, mo: memory_order_acquire); |
452 | if (memprof_inited && atomic_load_relaxed(a: &constructed) && |
453 | !atomic_load_relaxed(a: &destructing)) { |
454 | u64 c = GetShadowCount(p, size: user_requested_size); |
455 | long curtime = GetTimestamp(); |
456 | |
457 | MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime, |
458 | m->cpu_id, GetCpuId()); |
459 | InsertOrMerge(Id: m->alloc_context_id, Block: newMIB, Map&: MIBMap); |
460 | } |
461 | |
462 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
463 | thread_stats.frees++; |
464 | thread_stats.freed += user_requested_size; |
465 | |
466 | void *alloc_beg = m->AllocBeg(); |
467 | if (alloc_beg != m) { |
468 | // Clear the magic value, as allocator internals may overwrite the |
469 | // contents of deallocated chunk, confusing GetMemprofChunk lookup. |
470 | reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr); |
471 | } |
472 | |
473 | MemprofThread *t = GetCurrentThread(); |
474 | if (t) { |
475 | AllocatorCache *cache = GetAllocatorCache(ms: &t->malloc_storage()); |
476 | allocator.Deallocate(cache, p: alloc_beg); |
477 | } else { |
478 | SpinMutexLock l(&fallback_mutex); |
479 | AllocatorCache *cache = &fallback_allocator_cache; |
480 | allocator.Deallocate(cache, p: alloc_beg); |
481 | } |
482 | } |
483 | |
484 | void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) { |
485 | CHECK(old_ptr && new_size); |
486 | uptr p = reinterpret_cast<uptr>(old_ptr); |
487 | uptr chunk_beg = p - kChunkHeaderSize; |
488 | MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg); |
489 | |
490 | MemprofStats &thread_stats = GetCurrentThreadStats(); |
491 | thread_stats.reallocs++; |
492 | thread_stats.realloced += new_size; |
493 | |
494 | void *new_ptr = Allocate(size: new_size, alignment: 8, stack, alloc_type: FROM_MALLOC); |
495 | if (new_ptr) { |
496 | CHECK_NE(REAL(memcpy), nullptr); |
497 | uptr memcpy_size = Min(a: new_size, b: m->UsedSize()); |
498 | REAL(memcpy)(new_ptr, old_ptr, memcpy_size); |
499 | Deallocate(ptr: old_ptr, delete_size: 0, delete_alignment: 0, stack, alloc_type: FROM_MALLOC); |
500 | } |
501 | return new_ptr; |
502 | } |
503 | |
504 | void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { |
505 | if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { |
506 | if (AllocatorMayReturnNull()) |
507 | return nullptr; |
508 | ReportCallocOverflow(count: nmemb, size, stack); |
509 | } |
510 | void *ptr = Allocate(size: nmemb * size, alignment: 8, stack, alloc_type: FROM_MALLOC); |
511 | // If the memory comes from the secondary allocator no need to clear it |
512 | // as it comes directly from mmap. |
513 | if (ptr && allocator.FromPrimary(p: ptr)) |
514 | REAL(memset)(ptr, 0, nmemb * size); |
515 | return ptr; |
516 | } |
517 | |
518 | void CommitBack(MemprofThreadLocalMallocStorage *ms) { |
519 | AllocatorCache *ac = GetAllocatorCache(ms); |
520 | allocator.SwallowCache(cache: ac); |
521 | } |
522 | |
523 | // -------------------------- Chunk lookup ---------------------- |
524 | |
525 | // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). |
526 | MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) { |
527 | if (!alloc_beg) |
528 | return nullptr; |
529 | MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get(); |
530 | if (!p) { |
531 | if (!allocator.FromPrimary(p: alloc_beg)) |
532 | return nullptr; |
533 | p = reinterpret_cast<MemprofChunk *>(alloc_beg); |
534 | } |
535 | // The size is reset to 0 on deallocation (and a min of 1 on |
536 | // allocation). |
537 | user_requested_size = |
538 | atomic_load(a: &p->user_requested_size, mo: memory_order_acquire); |
539 | if (user_requested_size) |
540 | return p; |
541 | return nullptr; |
542 | } |
543 | |
544 | MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) { |
545 | void *alloc_beg = allocator.GetBlockBegin(p: reinterpret_cast<void *>(p)); |
546 | return GetMemprofChunk(alloc_beg, user_requested_size); |
547 | } |
548 | |
549 | uptr AllocationSize(uptr p) { |
550 | u64 user_requested_size; |
551 | MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size); |
552 | if (!m) |
553 | return 0; |
554 | if (m->Beg() != p) |
555 | return 0; |
556 | return user_requested_size; |
557 | } |
558 | |
559 | uptr AllocationSizeFast(uptr p) { |
560 | return reinterpret_cast<MemprofChunk *>(p - kChunkHeaderSize)->UsedSize(); |
561 | } |
562 | |
563 | void Purge() { allocator.ForceReleaseToOS(); } |
564 | |
565 | void PrintStats() { allocator.PrintStats(); } |
566 | |
567 | void ForceLock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { |
568 | allocator.ForceLock(); |
569 | fallback_mutex.Lock(); |
570 | } |
571 | |
572 | void ForceUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { |
573 | fallback_mutex.Unlock(); |
574 | allocator.ForceUnlock(); |
575 | } |
576 | }; |
577 | |
578 | static Allocator instance(LINKER_INITIALIZED); |
579 | |
580 | static MemprofAllocator &get_allocator() { return instance.allocator; } |
581 | |
582 | void InitializeAllocator() { instance.InitLinkerInitialized(); } |
583 | |
584 | void MemprofThreadLocalMallocStorage::CommitBack() { |
585 | instance.CommitBack(ms: this); |
586 | } |
587 | |
588 | void PrintInternalAllocatorStats() { instance.PrintStats(); } |
589 | |
590 | void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) { |
591 | instance.Deallocate(ptr, delete_size: 0, delete_alignment: 0, stack, alloc_type); |
592 | } |
593 | |
594 | void memprof_delete(void *ptr, uptr size, uptr alignment, |
595 | BufferedStackTrace *stack, AllocType alloc_type) { |
596 | instance.Deallocate(ptr, delete_size: size, delete_alignment: alignment, stack, alloc_type); |
597 | } |
598 | |
599 | void *memprof_malloc(uptr size, BufferedStackTrace *stack) { |
600 | return SetErrnoOnNull(instance.Allocate(size, alignment: 8, stack, alloc_type: FROM_MALLOC)); |
601 | } |
602 | |
603 | void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { |
604 | return SetErrnoOnNull(instance.Calloc(nmemb, size, stack)); |
605 | } |
606 | |
607 | void *memprof_reallocarray(void *p, uptr nmemb, uptr size, |
608 | BufferedStackTrace *stack) { |
609 | if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { |
610 | errno = errno_ENOMEM; |
611 | if (AllocatorMayReturnNull()) |
612 | return nullptr; |
613 | ReportReallocArrayOverflow(count: nmemb, size, stack); |
614 | } |
615 | return memprof_realloc(p, size: nmemb * size, stack); |
616 | } |
617 | |
618 | void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) { |
619 | if (!p) |
620 | return SetErrnoOnNull(instance.Allocate(size, alignment: 8, stack, alloc_type: FROM_MALLOC)); |
621 | if (size == 0) { |
622 | if (flags()->allocator_frees_and_returns_null_on_realloc_zero) { |
623 | instance.Deallocate(ptr: p, delete_size: 0, delete_alignment: 0, stack, alloc_type: FROM_MALLOC); |
624 | return nullptr; |
625 | } |
626 | // Allocate a size of 1 if we shouldn't free() on Realloc to 0 |
627 | size = 1; |
628 | } |
629 | return SetErrnoOnNull(instance.Reallocate(old_ptr: p, new_size: size, stack)); |
630 | } |
631 | |
632 | void *memprof_valloc(uptr size, BufferedStackTrace *stack) { |
633 | return SetErrnoOnNull( |
634 | instance.Allocate(size, alignment: GetPageSizeCached(), stack, alloc_type: FROM_MALLOC)); |
635 | } |
636 | |
637 | void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) { |
638 | uptr PageSize = GetPageSizeCached(); |
639 | if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) { |
640 | errno = errno_ENOMEM; |
641 | if (AllocatorMayReturnNull()) |
642 | return nullptr; |
643 | ReportPvallocOverflow(size, stack); |
644 | } |
645 | // pvalloc(0) should allocate one page. |
646 | size = size ? RoundUpTo(size, boundary: PageSize) : PageSize; |
647 | return SetErrnoOnNull(instance.Allocate(size, alignment: PageSize, stack, alloc_type: FROM_MALLOC)); |
648 | } |
649 | |
650 | void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack, |
651 | AllocType alloc_type) { |
652 | if (UNLIKELY(!IsPowerOfTwo(alignment))) { |
653 | errno = errno_EINVAL; |
654 | if (AllocatorMayReturnNull()) |
655 | return nullptr; |
656 | ReportInvalidAllocationAlignment(alignment, stack); |
657 | } |
658 | return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type)); |
659 | } |
660 | |
661 | void *memprof_aligned_alloc(uptr alignment, uptr size, |
662 | BufferedStackTrace *stack) { |
663 | if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) { |
664 | errno = errno_EINVAL; |
665 | if (AllocatorMayReturnNull()) |
666 | return nullptr; |
667 | ReportInvalidAlignedAllocAlignment(size, alignment, stack); |
668 | } |
669 | return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type: FROM_MALLOC)); |
670 | } |
671 | |
672 | int memprof_posix_memalign(void **memptr, uptr alignment, uptr size, |
673 | BufferedStackTrace *stack) { |
674 | if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) { |
675 | if (AllocatorMayReturnNull()) |
676 | return errno_EINVAL; |
677 | ReportInvalidPosixMemalignAlignment(alignment, stack); |
678 | } |
679 | void *ptr = instance.Allocate(size, alignment, stack, alloc_type: FROM_MALLOC); |
680 | if (UNLIKELY(!ptr)) |
681 | // OOM error is already taken care of by Allocate. |
682 | return errno_ENOMEM; |
683 | CHECK(IsAligned((uptr)ptr, alignment)); |
684 | *memptr = ptr; |
685 | return 0; |
686 | } |
687 | |
688 | static const void *memprof_malloc_begin(const void *p) { |
689 | u64 user_requested_size; |
690 | MemprofChunk *m = |
691 | instance.GetMemprofChunkByAddr(p: (uptr)p, user_requested_size); |
692 | if (!m) |
693 | return nullptr; |
694 | if (user_requested_size == 0) |
695 | return nullptr; |
696 | |
697 | return (const void *)m->Beg(); |
698 | } |
699 | |
700 | uptr memprof_malloc_usable_size(const void *ptr) { |
701 | if (!ptr) |
702 | return 0; |
703 | uptr usable_size = instance.AllocationSize(p: reinterpret_cast<uptr>(ptr)); |
704 | return usable_size; |
705 | } |
706 | |
707 | } // namespace __memprof |
708 | |
709 | // ---------------------- Interface ---------------- {{{1 |
710 | using namespace __memprof; |
711 | |
712 | uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } |
713 | |
714 | int __sanitizer_get_ownership(const void *p) { |
715 | return memprof_malloc_usable_size(ptr: p) != 0; |
716 | } |
717 | |
718 | const void *__sanitizer_get_allocated_begin(const void *p) { |
719 | return memprof_malloc_begin(p); |
720 | } |
721 | |
722 | uptr __sanitizer_get_allocated_size(const void *p) { |
723 | return memprof_malloc_usable_size(ptr: p); |
724 | } |
725 | |
726 | uptr __sanitizer_get_allocated_size_fast(const void *p) { |
727 | DCHECK_EQ(p, __sanitizer_get_allocated_begin(p)); |
728 | uptr ret = instance.AllocationSizeFast(p: reinterpret_cast<uptr>(p)); |
729 | DCHECK_EQ(ret, __sanitizer_get_allocated_size(p)); |
730 | return ret; |
731 | } |
732 | |
733 | void __sanitizer_purge_allocator() { instance.Purge(); } |
734 | |
735 | int __memprof_profile_dump() { |
736 | instance.FinishAndWrite(); |
737 | // In the future we may want to return non-zero if there are any errors |
738 | // detected during the dumping process. |
739 | return 0; |
740 | } |
741 | |
742 | void __memprof_profile_reset() { |
743 | if (report_file.fd != kInvalidFd && report_file.fd != kStdoutFd && |
744 | report_file.fd != kStderrFd) { |
745 | CloseFile(report_file.fd); |
746 | // Setting the file descriptor to kInvalidFd ensures that we will reopen the |
747 | // file when invoking Write again. |
748 | report_file.fd = kInvalidFd; |
749 | } |
750 | } |
751 | |