1//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a dynamic runtime instrumentation system.
10//
11// This is the implementation of a profiling handler.
12//
13//===----------------------------------------------------------------------===//
14#include <memory>
15#include <time.h>
16
17#include "sanitizer_common/sanitizer_atomic.h"
18#include "sanitizer_common/sanitizer_flags.h"
19#include "xray/xray_interface.h"
20#include "xray/xray_log_interface.h"
21#include "xray_buffer_queue.h"
22#include "xray_flags.h"
23#include "xray_profile_collector.h"
24#include "xray_profiling_flags.h"
25#include "xray_recursion_guard.h"
26#include "xray_tsc.h"
27#include "xray_utils.h"
28#include <pthread.h>
29
30namespace __xray {
31
32namespace {
33
34static atomic_sint32_t ProfilerLogFlushStatus = {
35 .val_dont_use: XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
36
37static atomic_sint32_t ProfilerLogStatus = {
38 .val_dont_use: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
39
40static SpinMutex ProfilerOptionsMutex;
41
42struct ProfilingData {
43 atomic_uintptr_t Allocators;
44 atomic_uintptr_t FCT;
45};
46
47static pthread_key_t ProfilingKey;
48
49// We use a global buffer queue, which gets initialized once at initialisation
50// time, and gets reset when profiling is "done".
51static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type
52 BufferQueueStorage;
53static BufferQueue *BQ = nullptr;
54
55thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
56thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
57 alignof(FunctionCallTrie::Allocators)>::type
58 AllocatorsStorage;
59thread_local std::aligned_storage<sizeof(FunctionCallTrie),
60 alignof(FunctionCallTrie)>::type
61 FunctionCallTrieStorage;
62thread_local ProfilingData TLD{.Allocators: {.val_dont_use: 0}, .FCT: {.val_dont_use: 0}};
63thread_local atomic_uint8_t ReentranceGuard{.val_dont_use: 0};
64
65// We use a separate guard for ensuring that for this thread, if we're already
66// cleaning up, that any signal handlers don't attempt to cleanup nor
67// initialise.
68thread_local atomic_uint8_t TLDInitGuard{.val_dont_use: 0};
69
70// We also use a separate latch to signal that the thread is exiting, and
71// non-essential work should be ignored (things like recording events, etc.).
72thread_local atomic_uint8_t ThreadExitingLatch{.val_dont_use: 0};
73
74static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
75 thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
76 pthread_setspecific(key: ProfilingKey, pointer: &TLD);
77 return false;
78 }();
79 (void)ThreadOnce;
80
81 RecursionGuard TLDInit(TLDInitGuard);
82 if (!TLDInit)
83 return nullptr;
84
85 if (atomic_load_relaxed(a: &ThreadExitingLatch))
86 return nullptr;
87
88 uptr Allocators = 0;
89 if (atomic_compare_exchange_strong(a: &TLD.Allocators, cmp: &Allocators, xchg: 1,
90 mo: memory_order_acq_rel)) {
91 bool Success = false;
92 auto AllocatorsUndo = at_scope_exit(fn: [&]() XRAY_NEVER_INSTRUMENT {
93 if (!Success)
94 atomic_store(a: &TLD.Allocators, v: 0, mo: memory_order_release);
95 });
96
97 // Acquire a set of buffers for this thread.
98 if (BQ == nullptr)
99 return nullptr;
100
101 if (BQ->getBuffer(Buf&: ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
102 return nullptr;
103 auto NodeBufferUndo = at_scope_exit(fn: [&]() XRAY_NEVER_INSTRUMENT {
104 if (!Success)
105 BQ->releaseBuffer(Buf&: ThreadBuffers.NodeBuffer);
106 });
107
108 if (BQ->getBuffer(Buf&: ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
109 return nullptr;
110 auto RootsBufferUndo = at_scope_exit(fn: [&]() XRAY_NEVER_INSTRUMENT {
111 if (!Success)
112 BQ->releaseBuffer(Buf&: ThreadBuffers.RootsBuffer);
113 });
114
115 if (BQ->getBuffer(Buf&: ThreadBuffers.ShadowStackBuffer) !=
116 BufferQueue::ErrorCode::Ok)
117 return nullptr;
118 auto ShadowStackBufferUndo = at_scope_exit(fn: [&]() XRAY_NEVER_INSTRUMENT {
119 if (!Success)
120 BQ->releaseBuffer(Buf&: ThreadBuffers.ShadowStackBuffer);
121 });
122
123 if (BQ->getBuffer(Buf&: ThreadBuffers.NodeIdPairBuffer) !=
124 BufferQueue::ErrorCode::Ok)
125 return nullptr;
126
127 Success = true;
128 new (&AllocatorsStorage) FunctionCallTrie::Allocators(
129 FunctionCallTrie::InitAllocatorsFromBuffers(Bufs&: ThreadBuffers));
130 Allocators = reinterpret_cast<uptr>(
131 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
132 atomic_store(a: &TLD.Allocators, v: Allocators, mo: memory_order_release);
133 }
134
135 if (Allocators == 1)
136 return nullptr;
137
138 uptr FCT = 0;
139 if (atomic_compare_exchange_strong(a: &TLD.FCT, cmp: &FCT, xchg: 1, mo: memory_order_acq_rel)) {
140 new (&FunctionCallTrieStorage)
141 FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
142 atomic_load_relaxed(a: &TLD.Allocators)));
143 FCT = reinterpret_cast<uptr>(
144 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
145 atomic_store(a: &TLD.FCT, v: FCT, mo: memory_order_release);
146 }
147
148 if (FCT == 1)
149 return nullptr;
150
151 return &TLD;
152}
153
154static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
155 auto FCT = atomic_exchange(a: &TLD.FCT, v: 0, mo: memory_order_acq_rel);
156 if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
157 &FunctionCallTrieStorage)))
158 reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
159
160 auto Allocators = atomic_exchange(a: &TLD.Allocators, v: 0, mo: memory_order_acq_rel);
161 if (Allocators ==
162 reinterpret_cast<uptr>(
163 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
164 reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
165}
166
167static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
168 RecursionGuard TLDInit(TLDInitGuard);
169 if (!TLDInit)
170 return;
171
172 uptr P = atomic_exchange(a: &T.FCT, v: 0, mo: memory_order_acq_rel);
173 if (P != reinterpret_cast<uptr>(
174 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
175 return;
176
177 auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
178 DCHECK_NE(FCT, nullptr);
179
180 uptr A = atomic_exchange(a: &T.Allocators, v: 0, mo: memory_order_acq_rel);
181 if (A !=
182 reinterpret_cast<uptr>(
183 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
184 return;
185
186 auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
187 DCHECK_NE(Allocators, nullptr);
188
189 // Always move the data into the profile collector.
190 profileCollectorService::post(Q: BQ, T: std::move(t&: *FCT), A: std::move(t&: *Allocators),
191 B: std::move(t&: ThreadBuffers), TId: GetTid());
192
193 // Re-initialize the ThreadBuffers object to a known "default" state.
194 ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
195}
196
197} // namespace
198
199const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
200#ifdef XRAY_PROFILER_DEFAULT_OPTIONS
201 return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
202#else
203 return "";
204#endif
205}
206
207XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
208 if (atomic_load(a: &ProfilerLogStatus, mo: memory_order_acquire) !=
209 XRayLogInitStatus::XRAY_LOG_FINALIZED) {
210 if (Verbosity())
211 Report(format: "Not flushing profiles, profiling not been finalized.\n");
212 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
213 }
214
215 RecursionGuard SignalGuard(ReentranceGuard);
216 if (!SignalGuard) {
217 if (Verbosity())
218 Report(format: "Cannot finalize properly inside a signal handler!\n");
219 atomic_store(a: &ProfilerLogFlushStatus,
220 v: XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
221 mo: memory_order_release);
222 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
223 }
224
225 s32 Previous = atomic_exchange(a: &ProfilerLogFlushStatus,
226 v: XRayLogFlushStatus::XRAY_LOG_FLUSHING,
227 mo: memory_order_acq_rel);
228 if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
229 if (Verbosity())
230 Report(format: "Not flushing profiles, implementation still flushing.\n");
231 return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
232 }
233
234 // At this point, we'll create the file that will contain the profile, but
235 // only if the options say so.
236 if (!profilingFlags()->no_flush) {
237 // First check whether we have data in the profile collector service
238 // before we try and write anything down.
239 XRayBuffer B = profileCollectorService::nextBuffer(B: {.Data: nullptr, .Size: 0});
240 if (B.Data == nullptr) {
241 if (Verbosity())
242 Report(format: "profiling: No data to flush.\n");
243 } else {
244 LogWriter *LW = LogWriter::Open();
245 if (LW == nullptr) {
246 if (Verbosity())
247 Report(format: "profiling: Failed to flush to file, dropping data.\n");
248 } else {
249 // Now for each of the buffers, write out the profile data as we would
250 // see it in memory, verbatim.
251 while (B.Data != nullptr && B.Size != 0) {
252 LW->WriteAll(Begin: reinterpret_cast<const char *>(B.Data),
253 End: reinterpret_cast<const char *>(B.Data) + B.Size);
254 B = profileCollectorService::nextBuffer(B);
255 }
256 LogWriter::Close(LogWriter: LW);
257 }
258 }
259 }
260
261 profileCollectorService::reset();
262
263 atomic_store(a: &ProfilerLogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHED,
264 mo: memory_order_release);
265 atomic_store(a: &ProfilerLogStatus, v: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
266 mo: memory_order_release);
267
268 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
269}
270
271void profilingHandleArg0(int32_t FuncId,
272 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
273 unsigned char CPU;
274 auto TSC = readTSC(CPU);
275 RecursionGuard G(ReentranceGuard);
276 if (!G)
277 return;
278
279 auto Status = atomic_load(a: &ProfilerLogStatus, mo: memory_order_acquire);
280 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
281 Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
282 return;
283
284 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
285 Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
286 postCurrentThreadFCT(T&: TLD);
287 return;
288 }
289
290 auto T = getThreadLocalData();
291 if (T == nullptr)
292 return;
293
294 auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(a: &T->FCT));
295 switch (Entry) {
296 case XRayEntryType::ENTRY:
297 case XRayEntryType::LOG_ARGS_ENTRY:
298 FCT->enterFunction(FId: FuncId, TSC, CPU);
299 break;
300 case XRayEntryType::EXIT:
301 case XRayEntryType::TAIL:
302 FCT->exitFunction(FId: FuncId, TSC, CPU);
303 break;
304 default:
305 // FIXME: Handle bugs.
306 break;
307 }
308}
309
310void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
311 uint64_t) XRAY_NEVER_INSTRUMENT {
312 return profilingHandleArg0(FuncId, Entry);
313}
314
315XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
316 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
317 if (!atomic_compare_exchange_strong(a: &ProfilerLogStatus, cmp: &CurrentStatus,
318 xchg: XRayLogInitStatus::XRAY_LOG_FINALIZING,
319 mo: memory_order_release)) {
320 if (Verbosity())
321 Report(format: "Cannot finalize profile, the profiling is not initialized.\n");
322 return static_cast<XRayLogInitStatus>(CurrentStatus);
323 }
324
325 // Mark then finalize the current generation of buffers. This allows us to let
326 // the threads currently holding onto new buffers still use them, but let the
327 // last reference do the memory cleanup.
328 DCHECK_NE(BQ, nullptr);
329 BQ->finalize();
330
331 // Wait a grace period to allow threads to see that we're finalizing.
332 SleepForMillis(millis: profilingFlags()->grace_period_ms);
333
334 // If we for some reason are entering this function from an instrumented
335 // handler, we bail out.
336 RecursionGuard G(ReentranceGuard);
337 if (!G)
338 return static_cast<XRayLogInitStatus>(CurrentStatus);
339
340 // Post the current thread's data if we have any.
341 postCurrentThreadFCT(T&: TLD);
342
343 // Then we force serialize the log data.
344 profileCollectorService::serialize();
345
346 atomic_store(a: &ProfilerLogStatus, v: XRayLogInitStatus::XRAY_LOG_FINALIZED,
347 mo: memory_order_release);
348 return XRayLogInitStatus::XRAY_LOG_FINALIZED;
349}
350
351XRayLogInitStatus
352profilingLoggingInit(size_t, size_t, void *Options,
353 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
354 RecursionGuard G(ReentranceGuard);
355 if (!G)
356 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
357
358 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
359 if (!atomic_compare_exchange_strong(a: &ProfilerLogStatus, cmp: &CurrentStatus,
360 xchg: XRayLogInitStatus::XRAY_LOG_INITIALIZING,
361 mo: memory_order_acq_rel)) {
362 if (Verbosity())
363 Report(format: "Cannot initialize already initialised profiling "
364 "implementation.\n");
365 return static_cast<XRayLogInitStatus>(CurrentStatus);
366 }
367
368 {
369 SpinMutexLock Lock(&ProfilerOptionsMutex);
370 FlagParser ConfigParser;
371 ProfilerFlags Flags;
372 Flags.setDefaults();
373 registerProfilerFlags(P: &ConfigParser, F: &Flags);
374 ConfigParser.ParseString(s: profilingCompilerDefinedFlags());
375 const char *Env = GetEnv(name: "XRAY_PROFILING_OPTIONS");
376 if (Env == nullptr)
377 Env = "";
378 ConfigParser.ParseString(s: Env);
379
380 // Then parse the configuration string provided.
381 ConfigParser.ParseString(s: static_cast<const char *>(Options));
382 if (Verbosity())
383 ReportUnrecognizedFlags();
384 *profilingFlags() = Flags;
385 }
386
387 // We need to reset the profile data collection implementation now.
388 profileCollectorService::reset();
389
390 // Then also reset the buffer queue implementation.
391 if (BQ == nullptr) {
392 bool Success = false;
393 new (&BufferQueueStorage)
394 BufferQueue(profilingFlags()->per_thread_allocator_max,
395 profilingFlags()->buffers_max, Success);
396 if (!Success) {
397 if (Verbosity())
398 Report(format: "Failed to initialize preallocated memory buffers!");
399 atomic_store(a: &ProfilerLogStatus,
400 v: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
401 mo: memory_order_release);
402 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
403 }
404
405 // If we've succeeded, set the global pointer to the initialised storage.
406 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
407 } else {
408 BQ->finalize();
409 auto InitStatus = BQ->init(BS: profilingFlags()->per_thread_allocator_max,
410 BC: profilingFlags()->buffers_max);
411
412 if (InitStatus != BufferQueue::ErrorCode::Ok) {
413 if (Verbosity())
414 Report(format: "Failed to initialize preallocated memory buffers; error: %s",
415 BufferQueue::getErrorString(E: InitStatus));
416 atomic_store(a: &ProfilerLogStatus,
417 v: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
418 mo: memory_order_release);
419 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
420 }
421
422 DCHECK(!BQ->finalizing());
423 }
424
425 // We need to set up the exit handlers.
426 static pthread_once_t Once = PTHREAD_ONCE_INIT;
427 pthread_once(
428 once_control: &Once, init_routine: +[] {
429 pthread_key_create(
430 key: &ProfilingKey, destr_function: +[](void *P) XRAY_NEVER_INSTRUMENT {
431 if (atomic_exchange(a: &ThreadExitingLatch, v: 1, mo: memory_order_acq_rel))
432 return;
433
434 if (P == nullptr)
435 return;
436
437 auto T = reinterpret_cast<ProfilingData *>(P);
438 if (atomic_load_relaxed(a: &T->Allocators) == 0)
439 return;
440
441 {
442 // If we're somehow executing this while inside a
443 // non-reentrant-friendly context, we skip attempting to post
444 // the current thread's data.
445 RecursionGuard G(ReentranceGuard);
446 if (!G)
447 return;
448
449 postCurrentThreadFCT(T&: *T);
450 }
451 });
452
453 // We also need to set up an exit handler, so that we can get the
454 // profile information at exit time. We use the C API to do this, to not
455 // rely on C++ ABI functions for registering exit handlers.
456 Atexit(function: +[]() XRAY_NEVER_INSTRUMENT {
457 if (atomic_exchange(a: &ThreadExitingLatch, v: 1, mo: memory_order_acq_rel))
458 return;
459
460 auto Cleanup =
461 at_scope_exit(fn: []() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
462
463 // Finalize and flush.
464 if (profilingFinalize() != XRAY_LOG_FINALIZED ||
465 profilingFlush() != XRAY_LOG_FLUSHED)
466 return;
467
468 if (Verbosity())
469 Report(format: "XRay Profile flushed at exit.");
470 });
471 });
472
473 __xray_log_set_buffer_iterator(Iterator: profileCollectorService::nextBuffer);
474 __xray_set_handler(entry: profilingHandleArg0);
475 __xray_set_handler_arg1(entry: profilingHandleArg1);
476
477 atomic_store(a: &ProfilerLogStatus, v: XRayLogInitStatus::XRAY_LOG_INITIALIZED,
478 mo: memory_order_release);
479 if (Verbosity())
480 Report(format: "XRay Profiling init successful.\n");
481
482 return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
483}
484
485bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
486 // Set up the flag defaults from the static defaults and the
487 // compiler-provided defaults.
488 {
489 SpinMutexLock Lock(&ProfilerOptionsMutex);
490 auto *F = profilingFlags();
491 F->setDefaults();
492 FlagParser ProfilingParser;
493 registerProfilerFlags(P: &ProfilingParser, F);
494 ProfilingParser.ParseString(s: profilingCompilerDefinedFlags());
495 }
496
497 XRayLogImpl Impl{
498 .log_init: profilingLoggingInit,
499 .log_finalize: profilingFinalize,
500 .handle_arg0: profilingHandleArg0,
501 .flush_log: profilingFlush,
502 };
503 auto RegistrationResult = __xray_log_register_mode(Mode: "xray-profiling", Impl);
504 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
505 if (Verbosity())
506 Report(format: "Cannot register XRay Profiling mode to 'xray-profiling'; error = "
507 "%d\n",
508 RegistrationResult);
509 return false;
510 }
511
512 if (!internal_strcmp(s1: flags()->xray_mode, s2: "xray-profiling"))
513 __xray_log_select_mode(Mode: "xray_profiling");
514 return true;
515}
516
517} // namespace __xray
518
519static auto UNUSED Unused = __xray::profilingDynamicInitializer();
520

source code of compiler-rt/lib/xray/xray_profiling.cpp