1 | //===- CtxInstrProfiling.cpp - contextual instrumented PGO ----------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "CtxInstrProfiling.h" |
10 | #include "RootAutoDetector.h" |
11 | #include "sanitizer_common/sanitizer_allocator_internal.h" |
12 | #include "sanitizer_common/sanitizer_atomic.h" |
13 | #include "sanitizer_common/sanitizer_atomic_clang.h" |
14 | #include "sanitizer_common/sanitizer_common.h" |
15 | #include "sanitizer_common/sanitizer_dense_map.h" |
16 | #include "sanitizer_common/sanitizer_libc.h" |
17 | #include "sanitizer_common/sanitizer_mutex.h" |
18 | #include "sanitizer_common/sanitizer_placement_new.h" |
19 | #include "sanitizer_common/sanitizer_thread_safety.h" |
20 | #include "sanitizer_common/sanitizer_vector.h" |
21 | |
22 | #include <assert.h> |
23 | |
24 | using namespace __ctx_profile; |
25 | |
26 | namespace { |
27 | // Keep track of all the context roots we actually saw, so we can then traverse |
28 | // them when the user asks for the profile in __llvm_ctx_profile_fetch |
29 | __sanitizer::SpinMutex AllContextsMutex; |
30 | SANITIZER_GUARDED_BY(AllContextsMutex) |
31 | __sanitizer::Vector<ContextRoot *> AllContextRoots; |
32 | |
33 | __sanitizer::atomic_uintptr_t AllFunctionsData = {}; |
34 | |
35 | // Keep all the functions for which we collect a flat profile in a linked list. |
36 | __sanitizer::SpinMutex FlatCtxArenaMutex; |
37 | SANITIZER_GUARDED_BY(FlatCtxArenaMutex) |
38 | Arena *FlatCtxArenaHead = nullptr; |
39 | SANITIZER_GUARDED_BY(FlatCtxArenaMutex) |
40 | Arena *FlatCtxArena = nullptr; |
41 | |
42 | // Set to true when we enter a root, and false when we exit - regardless if this |
43 | // thread collects a contextual profile for that root. |
44 | __thread bool IsUnderContext = false; |
45 | __sanitizer::atomic_uint8_t ProfilingStarted = {}; |
46 | |
47 | __sanitizer::atomic_uintptr_t RootDetector = {}; |
48 | RootAutoDetector *getRootDetector() { |
49 | return reinterpret_cast<RootAutoDetector *>( |
50 | __sanitizer::atomic_load_relaxed(a: &RootDetector)); |
51 | } |
52 | |
53 | // utility to taint a pointer by setting the LSB. There is an assumption |
54 | // throughout that the addresses of contexts are even (really, they should be |
55 | // align(8), but "even"-ness is the minimum assumption) |
56 | // "scratch contexts" are buffers that we return in certain cases - they are |
57 | // large enough to allow for memory safe counter access, but they don't link |
58 | // subcontexts below them (the runtime recognizes them and enforces that) |
59 | ContextNode *markAsScratch(const ContextNode *Ctx) { |
60 | return reinterpret_cast<ContextNode *>(reinterpret_cast<uint64_t>(Ctx) | 1); |
61 | } |
62 | |
63 | // Used when getting the data from TLS. We don't *really* need to reset, but |
64 | // it's a simpler system if we do. |
65 | template <typename T> inline T consume(T &V) { |
66 | auto R = V; |
67 | V = {0}; |
68 | return R; |
69 | } |
70 | |
71 | // We allocate at least kBuffSize Arena pages. The scratch buffer is also that |
72 | // large. |
73 | constexpr size_t kPower = 20; |
74 | constexpr size_t kBuffSize = 1 << kPower; |
75 | |
76 | // Highly unlikely we need more than kBuffSize for a context. |
77 | size_t getArenaAllocSize(size_t Needed) { |
78 | if (Needed >= kBuffSize) |
79 | return 2 * Needed; |
80 | return kBuffSize; |
81 | } |
82 | |
83 | // verify the structural integrity of the context |
84 | bool validate(const ContextRoot *Root) { |
85 | // all contexts should be laid out in some arena page. Go over each arena |
86 | // allocated for this Root, and jump over contained contexts based on |
87 | // self-reported sizes. |
88 | __sanitizer::DenseMap<uint64_t, bool> ContextStartAddrs; |
89 | for (const auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next()) { |
90 | const auto *Pos = Mem->start(); |
91 | while (Pos < Mem->pos()) { |
92 | const auto *Ctx = reinterpret_cast<const ContextNode *>(Pos); |
93 | if (!ContextStartAddrs.insert(KV: {reinterpret_cast<uint64_t>(Ctx), true}) |
94 | .second) |
95 | return false; |
96 | Pos += Ctx->size(); |
97 | } |
98 | } |
99 | |
100 | // Now traverse the contexts again the same way, but validate all nonull |
101 | // subcontext addresses appear in the set computed above. |
102 | for (const auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next()) { |
103 | const auto *Pos = Mem->start(); |
104 | while (Pos < Mem->pos()) { |
105 | const auto *Ctx = reinterpret_cast<const ContextNode *>(Pos); |
106 | for (uint32_t I = 0; I < Ctx->callsites_size(); ++I) |
107 | for (auto *Sub = Ctx->subContexts()[I]; Sub; Sub = Sub->next()) |
108 | if (!ContextStartAddrs.find(Key: reinterpret_cast<uint64_t>(Sub))) |
109 | return false; |
110 | |
111 | Pos += Ctx->size(); |
112 | } |
113 | } |
114 | return true; |
115 | } |
116 | |
117 | inline ContextNode *allocContextNode(char *Place, GUID Guid, |
118 | uint32_t NumCounters, |
119 | uint32_t NumCallsites, |
120 | ContextNode *Next = nullptr) { |
121 | assert(reinterpret_cast<uint64_t>(Place) % ExpectedAlignment == 0); |
122 | return new (Place) ContextNode(Guid, NumCounters, NumCallsites, Next); |
123 | } |
124 | |
125 | void resetContextNode(ContextNode &Node) { |
126 | // FIXME(mtrofin): this is std::memset, which we can probably use if we |
127 | // drop/reduce the dependency on sanitizer_common. |
128 | for (uint32_t I = 0; I < Node.counters_size(); ++I) |
129 | Node.counters()[I] = 0; |
130 | for (uint32_t I = 0; I < Node.callsites_size(); ++I) |
131 | for (auto *Next = Node.subContexts()[I]; Next; Next = Next->next()) |
132 | resetContextNode(Node&: *Next); |
133 | } |
134 | |
135 | ContextNode *onContextEnter(ContextNode &Node) { |
136 | ++Node.counters()[0]; |
137 | return &Node; |
138 | } |
139 | |
140 | } // namespace |
141 | |
142 | // the scratch buffer - what we give when we can't produce a real context (the |
143 | // scratch isn't "real" in that it's expected to be clobbered carelessly - we |
144 | // don't read it). The other important thing is that the callees from a scratch |
145 | // context also get a scratch context. |
146 | // Eventually this can be replaced with per-function buffers, a'la the typical |
147 | // (flat) instrumented FDO buffers. The clobbering aspect won't apply there, but |
148 | // the part about determining the nature of the subcontexts does. |
149 | __thread char __Buffer[kBuffSize] = {0}; |
150 | |
151 | #define TheScratchContext \ |
152 | markAsScratch(reinterpret_cast<ContextNode *>(__Buffer)) |
153 | |
154 | // init the TLSes |
155 | __thread void *volatile __llvm_ctx_profile_expected_callee[2] = {nullptr, |
156 | nullptr}; |
157 | __thread ContextNode **volatile __llvm_ctx_profile_callsite[2] = {0, 0}; |
158 | |
159 | __thread ContextRoot *volatile __llvm_ctx_profile_current_context_root = |
160 | nullptr; |
161 | |
162 | Arena::Arena(uint32_t Size) : Size(Size) { |
163 | __sanitizer::internal_memset(s: start(), c: 0, n: Size); |
164 | } |
165 | |
166 | // FIXME(mtrofin): use malloc / mmap instead of sanitizer common APIs to reduce |
167 | // the dependency on the latter. |
168 | Arena *Arena::allocateNewArena(size_t Size, Arena *Prev) { |
169 | assert(!Prev || Prev->Next == nullptr); |
170 | Arena *NewArena = new (__sanitizer::InternalAlloc( |
171 | size: Size + sizeof(Arena), /*cache=*/nullptr, /*alignment=*/ExpectedAlignment)) |
172 | Arena(Size); |
173 | if (Prev) |
174 | Prev->Next = NewArena; |
175 | return NewArena; |
176 | } |
177 | |
178 | void Arena::freeArenaList(Arena *&A) { |
179 | assert(A); |
180 | for (auto *I = A; I != nullptr;) { |
181 | auto *Current = I; |
182 | I = I->Next; |
183 | __sanitizer::InternalFree(p: Current); |
184 | } |
185 | A = nullptr; |
186 | } |
187 | |
188 | // If this is the first time we hit a callsite with this (Guid) particular |
189 | // callee, we need to allocate. |
190 | ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint, |
191 | uint32_t NumCounters, uint32_t NumCallsites) { |
192 | auto AllocSize = ContextNode::getAllocSize(NumCounters, NumCallsites); |
193 | auto *Mem = __llvm_ctx_profile_current_context_root->CurrentMem; |
194 | char *AllocPlace = Mem->tryBumpAllocate(S: AllocSize); |
195 | if (!AllocPlace) { |
196 | // if we failed to allocate on the current arena, allocate a new arena, |
197 | // and place it on __llvm_ctx_profile_current_context_root->CurrentMem so we |
198 | // find it from now on for other cases when we need to getCallsiteSlow. |
199 | // Note that allocateNewArena will link the allocated memory in the list of |
200 | // Arenas. |
201 | __llvm_ctx_profile_current_context_root->CurrentMem = Mem = |
202 | Mem->allocateNewArena(Size: getArenaAllocSize(Needed: AllocSize), Prev: Mem); |
203 | AllocPlace = Mem->tryBumpAllocate(S: AllocSize); |
204 | } |
205 | auto *Ret = allocContextNode(Place: AllocPlace, Guid, NumCounters, NumCallsites, |
206 | Next: *InsertionPoint); |
207 | *InsertionPoint = Ret; |
208 | return Ret; |
209 | } |
210 | |
211 | ContextNode *getFlatProfile(FunctionData &Data, void *Callee, GUID Guid, |
212 | uint32_t NumCounters) { |
213 | if (ContextNode *Existing = Data.FlatCtx) |
214 | return Existing; |
215 | { |
216 | // We could instead try to take the lock and, if that fails, return |
217 | // TheScratchContext. But that could leave message pump loops more sparsely |
218 | // profiled than everything else. Maybe that doesn't matter, and we can |
219 | // optimize this later. |
220 | __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Data.Mutex); |
221 | if (ContextNode *Existing = Data.FlatCtx) |
222 | return Existing; |
223 | |
224 | auto NeededSize = ContextNode::getAllocSize(NumCounters, NumCallsites: 0); |
225 | char *AllocBuff = nullptr; |
226 | { |
227 | __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> FL( |
228 | &FlatCtxArenaMutex); |
229 | if (FlatCtxArena) |
230 | AllocBuff = FlatCtxArena->tryBumpAllocate(S: NeededSize); |
231 | if (!AllocBuff) { |
232 | FlatCtxArena = Arena::allocateNewArena(Size: getArenaAllocSize(Needed: NeededSize), |
233 | Prev: FlatCtxArena); |
234 | AllocBuff = FlatCtxArena->tryBumpAllocate(S: NeededSize); |
235 | } |
236 | if (!FlatCtxArenaHead) |
237 | FlatCtxArenaHead = FlatCtxArena; |
238 | } |
239 | auto *Ret = allocContextNode(Place: AllocBuff, Guid, NumCounters, NumCallsites: 0); |
240 | Data.FlatCtx = Ret; |
241 | |
242 | Data.EntryAddress = Callee; |
243 | Data.Next = reinterpret_cast<FunctionData *>( |
244 | __sanitizer::atomic_load_relaxed(a: &AllFunctionsData)); |
245 | while (!__sanitizer::atomic_compare_exchange_strong( |
246 | a: &AllFunctionsData, cmp: reinterpret_cast<uintptr_t *>(&Data.Next), |
247 | xchg: reinterpret_cast<uintptr_t>(&Data), |
248 | mo: __sanitizer::memory_order_release)) { |
249 | } |
250 | } |
251 | |
252 | return Data.FlatCtx; |
253 | } |
254 | |
255 | // This should be called once for a Root. Allocate the first arena, set up the |
256 | // first context. |
257 | void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters, |
258 | uint32_t NumCallsites) { |
259 | __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( |
260 | &AllContextsMutex); |
261 | // Re-check - we got here without having had taken a lock. |
262 | if (Root->FirstMemBlock) |
263 | return; |
264 | const auto Needed = ContextNode::getAllocSize(NumCounters, NumCallsites); |
265 | auto *M = Arena::allocateNewArena(Size: getArenaAllocSize(Needed)); |
266 | Root->FirstMemBlock = M; |
267 | Root->CurrentMem = M; |
268 | Root->FirstNode = allocContextNode(Place: M->tryBumpAllocate(S: Needed), Guid, |
269 | NumCounters, NumCallsites); |
270 | AllContextRoots.PushBack(v: Root); |
271 | } |
272 | |
273 | ContextRoot *FunctionData::getOrAllocateContextRoot() { |
274 | auto *Root = CtxRoot; |
275 | if (!canBeRoot(Ctx: Root)) |
276 | return Root; |
277 | if (Root) |
278 | return Root; |
279 | __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex); |
280 | Root = CtxRoot; |
281 | if (!Root) { |
282 | Root = new (__sanitizer::InternalAlloc(size: sizeof(ContextRoot))) ContextRoot(); |
283 | CtxRoot = Root; |
284 | } |
285 | |
286 | assert(Root); |
287 | return Root; |
288 | } |
289 | |
290 | ContextNode *tryStartContextGivenRoot(ContextRoot *Root, GUID Guid, |
291 | uint32_t Counters, uint32_t Callsites) |
292 | SANITIZER_NO_THREAD_SAFETY_ANALYSIS { |
293 | IsUnderContext = true; |
294 | __sanitizer::atomic_fetch_add(a: &Root->TotalEntries, v: 1, |
295 | mo: __sanitizer::memory_order_relaxed); |
296 | if (!Root->FirstMemBlock) { |
297 | setupContext(Root, Guid, NumCounters: Counters, NumCallsites: Callsites); |
298 | } |
299 | if (Root->Taken.TryLock()) { |
300 | __llvm_ctx_profile_current_context_root = Root; |
301 | onContextEnter(Node&: *Root->FirstNode); |
302 | return Root->FirstNode; |
303 | } |
304 | // If this thread couldn't take the lock, return scratch context. |
305 | __llvm_ctx_profile_current_context_root = nullptr; |
306 | return TheScratchContext; |
307 | } |
308 | |
309 | ContextNode *getUnhandledContext(FunctionData &Data, void *Callee, GUID Guid, |
310 | uint32_t NumCounters, uint32_t NumCallsites, |
311 | ContextRoot *CtxRoot) { |
312 | |
313 | // 1) if we are currently collecting a contextual profile, fetch a ContextNode |
314 | // in the `Unhandled` set. We want to do this regardless of `ProfilingStarted` |
315 | // to (hopefully) offset the penalty of creating these contexts to before |
316 | // profiling. |
317 | // |
318 | // 2) if we are under a root (regardless if this thread is collecting or not a |
319 | // contextual profile for that root), do not collect a flat profile. We want |
320 | // to keep flat profiles only for activations that can't happen under a root, |
321 | // to avoid confusing profiles. We can, for example, combine flattened and |
322 | // flat profiles meaningfully, as we wouldn't double-count anything. |
323 | // |
324 | // 3) to avoid lengthy startup, don't bother with flat profiles until the |
325 | // profiling has started. We would reset them anyway when profiling starts. |
326 | // HOWEVER. This does lose profiling for message pumps: those functions are |
327 | // entered once and never exit. They should be assumed to be entered before |
328 | // profiling starts - because profiling should start after the server is up |
329 | // and running (which is equivalent to "message pumps are set up"). |
330 | if (!CtxRoot) { |
331 | if (auto *RAD = getRootDetector()) |
332 | RAD->sample(); |
333 | else if (auto *CR = Data.CtxRoot) { |
334 | if (canBeRoot(Ctx: CR)) |
335 | return tryStartContextGivenRoot(Root: CR, Guid, Counters: NumCounters, Callsites: NumCallsites); |
336 | } |
337 | if (IsUnderContext || !__sanitizer::atomic_load_relaxed(a: &ProfilingStarted)) |
338 | return TheScratchContext; |
339 | else |
340 | return markAsScratch( |
341 | Ctx: onContextEnter(Node&: *getFlatProfile(Data, Callee, Guid, NumCounters))); |
342 | } |
343 | auto [Iter, Ins] = CtxRoot->Unhandled.insert(KV: {Guid, nullptr}); |
344 | if (Ins) |
345 | Iter->second = getCallsiteSlow(Guid, InsertionPoint: &CtxRoot->FirstUnhandledCalleeNode, |
346 | NumCounters, NumCallsites: 0); |
347 | return markAsScratch(Ctx: onContextEnter(Node&: *Iter->second)); |
348 | } |
349 | |
350 | ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee, |
351 | GUID Guid, uint32_t NumCounters, |
352 | uint32_t NumCallsites) { |
353 | auto *CtxRoot = __llvm_ctx_profile_current_context_root; |
354 | // fast "out" if we're not even doing contextual collection. |
355 | if (!CtxRoot) |
356 | return getUnhandledContext(Data&: *Data, Callee, Guid, NumCounters, NumCallsites, |
357 | CtxRoot: nullptr); |
358 | |
359 | // also fast "out" if the caller is scratch. We can see if it's scratch by |
360 | // looking at the interior pointer into the subcontexts vector that the caller |
361 | // provided, which, if the context is scratch, so is that interior pointer |
362 | // (because all the address calculations are using even values. Or more |
363 | // precisely, aligned - 8 values) |
364 | auto **CallsiteContext = consume(V&: __llvm_ctx_profile_callsite[0]); |
365 | if (!CallsiteContext || isScratch(Ctx: CallsiteContext)) |
366 | return getUnhandledContext(Data&: *Data, Callee, Guid, NumCounters, NumCallsites, |
367 | CtxRoot); |
368 | |
369 | // if the callee isn't the expected one, return scratch. |
370 | // Signal handler(s) could have been invoked at any point in the execution. |
371 | // Should that have happened, and had it (the handler) be built with |
372 | // instrumentation, its __llvm_ctx_profile_get_context would have failed here. |
373 | // Its sub call graph would have then populated |
374 | // __llvm_ctx_profile_{expected_callee | callsite} at index 1. |
375 | // The normal call graph may be impacted in that, if the signal handler |
376 | // happened somewhere before we read the TLS here, we'd see the TLS reset and |
377 | // we'd also fail here. That would just mean we would loose counter values for |
378 | // the normal subgraph, this time around. That should be very unlikely, but if |
379 | // it happens too frequently, we should be able to detect discrepancies in |
380 | // entry counts (caller-callee). At the moment, the design goes on the |
381 | // assumption that is so unfrequent, though, that it's not worth doing more |
382 | // for that case. |
383 | auto *ExpectedCallee = consume(V&: __llvm_ctx_profile_expected_callee[0]); |
384 | if (ExpectedCallee != Callee) |
385 | return getUnhandledContext(Data&: *Data, Callee, Guid, NumCounters, NumCallsites, |
386 | CtxRoot); |
387 | |
388 | auto *Callsite = *CallsiteContext; |
389 | // in the case of indirect calls, we will have all seen targets forming a |
390 | // linked list here. Find the one corresponding to this callee. |
391 | while (Callsite && Callsite->guid() != Guid) { |
392 | Callsite = Callsite->next(); |
393 | } |
394 | auto *Ret = Callsite ? Callsite |
395 | : getCallsiteSlow(Guid, InsertionPoint: CallsiteContext, NumCounters, |
396 | NumCallsites); |
397 | if (Ret->callsites_size() != NumCallsites || |
398 | Ret->counters_size() != NumCounters) |
399 | __sanitizer::Printf(format: "[ctxprof] Returned ctx differs from what's asked: " |
400 | "Context: %p, Asked: %lu %u %u, Got: %lu %u %u \n", |
401 | reinterpret_cast<void *>(Ret), Guid, NumCallsites, |
402 | NumCounters, Ret->guid(), Ret->callsites_size(), |
403 | Ret->counters_size()); |
404 | onContextEnter(Node&: *Ret); |
405 | return Ret; |
406 | } |
407 | |
408 | ContextNode *__llvm_ctx_profile_start_context(FunctionData *FData, GUID Guid, |
409 | uint32_t Counters, |
410 | uint32_t Callsites) { |
411 | auto *Root = FData->getOrAllocateContextRoot(); |
412 | assert(canBeRoot(Root)); |
413 | return tryStartContextGivenRoot(Root, Guid, Counters, Callsites); |
414 | } |
415 | |
416 | void __llvm_ctx_profile_release_context(FunctionData *FData) |
417 | SANITIZER_NO_THREAD_SAFETY_ANALYSIS { |
418 | const auto *CurrentRoot = __llvm_ctx_profile_current_context_root; |
419 | auto *CR = FData->CtxRoot; |
420 | if (!CurrentRoot || CR != CurrentRoot) |
421 | return; |
422 | IsUnderContext = false; |
423 | assert(CR && canBeRoot(CR)); |
424 | __llvm_ctx_profile_current_context_root = nullptr; |
425 | CR->Taken.Unlock(); |
426 | } |
427 | |
428 | void __llvm_ctx_profile_start_collection(unsigned AutodetectDuration) { |
429 | size_t NumMemUnits = 0; |
430 | __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( |
431 | &AllContextsMutex); |
432 | for (uint32_t I = 0; I < AllContextRoots.Size(); ++I) { |
433 | auto *Root = AllContextRoots[I]; |
434 | __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> Lock( |
435 | &Root->Taken); |
436 | for (auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next()) |
437 | ++NumMemUnits; |
438 | |
439 | resetContextNode(Node&: *Root->FirstNode); |
440 | if (Root->FirstUnhandledCalleeNode) |
441 | resetContextNode(Node&: *Root->FirstUnhandledCalleeNode); |
442 | __sanitizer::atomic_store_relaxed(a: &Root->TotalEntries, v: 0); |
443 | } |
444 | if (AutodetectDuration) { |
445 | // we leak RD intentionally. Knowing when to free it is tricky, there's a |
446 | // race condition with functions observing the `RootDectector` as non-null. |
447 | // This can be addressed but the alternatives have some added complexity and |
448 | // it's not (yet) worth it. |
449 | auto *RD = new (__sanitizer::InternalAlloc(size: sizeof(RootAutoDetector))) |
450 | RootAutoDetector(AllFunctionsData, RootDetector, AutodetectDuration); |
451 | RD->start(); |
452 | } else { |
453 | __sanitizer::Printf(format: "[ctxprof] Initial NumMemUnits: %zu \n", NumMemUnits); |
454 | } |
455 | __sanitizer::atomic_store_relaxed(a: &ProfilingStarted, v: true); |
456 | } |
457 | |
458 | bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) { |
459 | __sanitizer::atomic_store_relaxed(a: &ProfilingStarted, v: false); |
460 | if (auto *RD = getRootDetector()) { |
461 | __sanitizer::Printf(format: "[ctxprof] Expected the root autodetector to have " |
462 | "finished well before attempting to fetch a context"); |
463 | RD->join(); |
464 | } |
465 | |
466 | __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( |
467 | &AllContextsMutex); |
468 | |
469 | Writer.startContextSection(); |
470 | for (int I = 0, E = AllContextRoots.Size(); I < E; ++I) { |
471 | auto *Root = AllContextRoots[I]; |
472 | __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> TakenLock( |
473 | &Root->Taken); |
474 | if (!validate(Root)) { |
475 | __sanitizer::Printf(format: "[ctxprof] Contextual Profile is %s\n", "invalid"); |
476 | return false; |
477 | } |
478 | Writer.writeContextual( |
479 | RootNode: *Root->FirstNode, Unhandled: Root->FirstUnhandledCalleeNode, |
480 | TotalRootEntryCount: __sanitizer::atomic_load_relaxed(a: &Root->TotalEntries)); |
481 | } |
482 | Writer.endContextSection(); |
483 | Writer.startFlatSection(); |
484 | // The list progresses behind the head, so taking this snapshot allows the |
485 | // list to grow concurrently without causing a race condition with our |
486 | // traversing it. |
487 | const auto *Pos = reinterpret_cast<const FunctionData *>( |
488 | __sanitizer::atomic_load_relaxed(a: &AllFunctionsData)); |
489 | for (; Pos; Pos = Pos->Next) { |
490 | const auto *CR = Pos->CtxRoot; |
491 | if (!CR && canBeRoot(Ctx: CR)) { |
492 | const auto *FP = Pos->FlatCtx; |
493 | Writer.writeFlat(Guid: FP->guid(), Buffer: FP->counters(), BufferSize: FP->counters_size()); |
494 | } |
495 | } |
496 | Writer.endFlatSection(); |
497 | return true; |
498 | } |
499 | |
500 | void __llvm_ctx_profile_free() { |
501 | __sanitizer::atomic_store_relaxed(a: &ProfilingStarted, v: false); |
502 | { |
503 | __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( |
504 | &AllContextsMutex); |
505 | for (int I = 0, E = AllContextRoots.Size(); I < E; ++I) |
506 | for (auto *A = AllContextRoots[I]->FirstMemBlock; A;) { |
507 | auto *C = A; |
508 | A = A->next(); |
509 | __sanitizer::InternalFree(p: C); |
510 | } |
511 | AllContextRoots.Reset(); |
512 | } |
513 | __sanitizer::atomic_store_relaxed(a: &AllFunctionsData, v: 0U); |
514 | { |
515 | __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock( |
516 | &FlatCtxArenaMutex); |
517 | FlatCtxArena = nullptr; |
518 | for (auto *A = FlatCtxArenaHead; A;) { |
519 | auto *C = A; |
520 | A = C->next(); |
521 | __sanitizer::InternalFree(p: C); |
522 | } |
523 | |
524 | FlatCtxArenaHead = nullptr; |
525 | } |
526 | } |
527 |
Definitions
- AllContextsMutex
- AllContextRoots
- AllFunctionsData
- FlatCtxArenaMutex
- FlatCtxArenaHead
- FlatCtxArena
- IsUnderContext
- ProfilingStarted
- RootDetector
- getRootDetector
- markAsScratch
- consume
- kPower
- kBuffSize
- getArenaAllocSize
- validate
- allocContextNode
- resetContextNode
- onContextEnter
- __Buffer
- __llvm_ctx_profile_expected_callee
- __llvm_ctx_profile_callsite
- __llvm_ctx_profile_current_context_root
- Arena
- allocateNewArena
- freeArenaList
- getCallsiteSlow
- getFlatProfile
- setupContext
- getOrAllocateContextRoot
- tryStartContextGivenRoot
- getUnhandledContext
- __llvm_ctx_profile_get_context
- __llvm_ctx_profile_start_context
- __llvm_ctx_profile_release_context
- __llvm_ctx_profile_start_collection
- __llvm_ctx_profile_fetch
Improve your Profiling and Debugging skills
Find out more