1 | //===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of XRay, a dynamic runtime instrumentation system. |
10 | // |
11 | // Here we implement the Flight Data Recorder mode for XRay, where we use |
12 | // compact structures to store records in memory as well as when writing out the |
13 | // data to files. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | #include "xray_fdr_logging.h" |
17 | #include <cassert> |
18 | #include <cstddef> |
19 | #include <errno.h> |
20 | #include <limits> |
21 | #include <memory> |
22 | #include <pthread.h> |
23 | #include <sys/time.h> |
24 | #include <time.h> |
25 | #include <unistd.h> |
26 | |
27 | #include "sanitizer_common/sanitizer_allocator_internal.h" |
28 | #include "sanitizer_common/sanitizer_atomic.h" |
29 | #include "sanitizer_common/sanitizer_common.h" |
30 | #include "xray/xray_interface.h" |
31 | #include "xray/xray_records.h" |
32 | #include "xray_allocator.h" |
33 | #include "xray_buffer_queue.h" |
34 | #include "xray_defs.h" |
35 | #include "xray_fdr_controller.h" |
36 | #include "xray_fdr_flags.h" |
37 | #include "xray_fdr_log_writer.h" |
38 | #include "xray_flags.h" |
39 | #include "xray_recursion_guard.h" |
40 | #include "xray_tsc.h" |
41 | #include "xray_utils.h" |
42 | |
43 | namespace __xray { |
44 | |
45 | static atomic_sint32_t LoggingStatus = { |
46 | .val_dont_use: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; |
47 | |
48 | namespace { |
49 | |
50 | // Group together thread-local-data in a struct, then hide it behind a function |
51 | // call so that it can be initialized on first use instead of as a global. We |
52 | // force the alignment to 64-bytes for x86 cache line alignment, as this |
53 | // structure is used in the hot path of implementation. |
54 | struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { |
55 | BufferQueue::Buffer Buffer{}; |
56 | BufferQueue *BQ = nullptr; |
57 | |
58 | using LogWriterStorage = |
59 | typename std::aligned_storage<sizeof(FDRLogWriter), |
60 | alignof(FDRLogWriter)>::type; |
61 | |
62 | LogWriterStorage LWStorage; |
63 | FDRLogWriter *Writer = nullptr; |
64 | |
65 | using ControllerStorage = |
66 | typename std::aligned_storage<sizeof(FDRController<>), |
67 | alignof(FDRController<>)>::type; |
68 | ControllerStorage CStorage; |
69 | FDRController<> *Controller = nullptr; |
70 | }; |
71 | |
72 | } // namespace |
73 | |
74 | static_assert(std::is_trivially_destructible<ThreadLocalData>::value, |
75 | "ThreadLocalData must be trivially destructible" ); |
76 | |
77 | // Use a global pthread key to identify thread-local data for logging. |
78 | static pthread_key_t Key; |
79 | |
80 | // Global BufferQueue. |
81 | static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage; |
82 | static BufferQueue *BQ = nullptr; |
83 | |
84 | // Global thresholds for function durations. |
85 | static atomic_uint64_t ThresholdTicks{.val_dont_use: 0}; |
86 | |
87 | // Global for ticks per second. |
88 | static atomic_uint64_t TicksPerSec{.val_dont_use: 0}; |
89 | |
90 | static atomic_sint32_t LogFlushStatus = { |
91 | .val_dont_use: XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; |
92 | |
93 | // This function will initialize the thread-local data structure used by the FDR |
94 | // logging implementation and return a reference to it. The implementation |
95 | // details require a bit of care to maintain. |
96 | // |
97 | // First, some requirements on the implementation in general: |
98 | // |
99 | // - XRay handlers should not call any memory allocation routines that may |
100 | // delegate to an instrumented implementation. This means functions like |
101 | // malloc() and free() should not be called while instrumenting. |
102 | // |
103 | // - We would like to use some thread-local data initialized on first-use of |
104 | // the XRay instrumentation. These allow us to implement unsynchronized |
105 | // routines that access resources associated with the thread. |
106 | // |
107 | // The implementation here uses a few mechanisms that allow us to provide both |
108 | // the requirements listed above. We do this by: |
109 | // |
110 | // 1. Using a thread-local aligned storage buffer for representing the |
111 | // ThreadLocalData struct. This data will be uninitialized memory by |
112 | // design. |
113 | // |
114 | // 2. Not requiring a thread exit handler/implementation, keeping the |
115 | // thread-local as purely a collection of references/data that do not |
116 | // require cleanup. |
117 | // |
118 | // We're doing this to avoid using a `thread_local` object that has a |
119 | // non-trivial destructor, because the C++ runtime might call std::malloc(...) |
120 | // to register calls to destructors. Deadlocks may arise when, for example, an |
121 | // externally provided malloc implementation is XRay instrumented, and |
122 | // initializing the thread-locals involves calling into malloc. A malloc |
123 | // implementation that does global synchronization might be holding a lock for a |
124 | // critical section, calling a function that might be XRay instrumented (and |
125 | // thus in turn calling into malloc by virtue of registration of the |
126 | // thread_local's destructor). |
127 | #if XRAY_HAS_TLS_ALIGNAS |
128 | static_assert(alignof(ThreadLocalData) >= 64, |
129 | "ThreadLocalData must be cache line aligned." ); |
130 | #endif |
131 | static ThreadLocalData &getThreadLocalData() { |
132 | thread_local typename std::aligned_storage< |
133 | sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{}; |
134 | |
135 | if (pthread_getspecific(key: Key) == NULL) { |
136 | new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{}; |
137 | pthread_setspecific(key: Key, pointer: &TLDStorage); |
138 | } |
139 | |
140 | return *reinterpret_cast<ThreadLocalData *>(&TLDStorage); |
141 | } |
142 | |
143 | static XRayFileHeader &() { |
144 | alignas(XRayFileHeader) static std::byte HStorage[sizeof(XRayFileHeader)]; |
145 | static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; |
146 | static bool TSCSupported = true; |
147 | static uint64_t CycleFrequency = NanosecondsPerSecond; |
148 | pthread_once( |
149 | once_control: &OnceInit, init_routine: +[] { |
150 | XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage); |
151 | // Version 2 of the log writes the extents of the buffer, instead of |
152 | // relying on an end-of-buffer record. |
153 | // Version 3 includes PID metadata record. |
154 | // Version 4 includes CPU data in the custom event records. |
155 | // Version 5 uses relative deltas for custom and typed event records, |
156 | // and removes the CPU data in custom event records (similar to how |
157 | // function records use deltas instead of full TSCs and rely on other |
158 | // metadata records for TSC wraparound and CPU migration). |
159 | H.Version = 5; |
160 | H.Type = FileTypes::FDR_LOG; |
161 | |
162 | // Test for required CPU features and cache the cycle frequency |
163 | TSCSupported = probeRequiredCPUFeatures(); |
164 | if (TSCSupported) |
165 | CycleFrequency = getTSCFrequency(); |
166 | H.CycleFrequency = CycleFrequency; |
167 | |
168 | // FIXME: Actually check whether we have 'constant_tsc' and |
169 | // 'nonstop_tsc' before setting the values in the header. |
170 | H.ConstantTSC = 1; |
171 | H.NonstopTSC = 1; |
172 | }); |
173 | return reinterpret_cast<XRayFileHeader &>(HStorage); |
174 | } |
175 | |
176 | // This is the iterator implementation, which knows how to handle FDR-mode |
177 | // specific buffers. This is used as an implementation of the iterator function |
178 | // needed by __xray_set_buffer_iterator(...). It maintains a global state of the |
179 | // buffer iteration for the currently installed FDR mode buffers. In particular: |
180 | // |
181 | // - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) |
182 | // then the iterator returns the header information. |
183 | // - If the argument represents the header information ({address of header |
184 | // info, size of the header info}) then it returns the first FDR buffer's |
185 | // address and extents. |
186 | // - It will keep returning the next buffer and extents as there are more |
187 | // buffers to process. When the input represents the last buffer, it will |
188 | // return the initial state to signal completion ({nullptr, 0}). |
189 | // |
190 | // See xray/xray_log_interface.h for more details on the requirements for the |
191 | // implementations of __xray_set_buffer_iterator(...) and |
192 | // __xray_log_process_buffers(...). |
193 | XRayBuffer fdrIterator(const XRayBuffer B) { |
194 | DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr" ) == 0); |
195 | DCHECK(BQ->finalizing()); |
196 | |
197 | if (BQ == nullptr || !BQ->finalizing()) { |
198 | if (Verbosity()) |
199 | Report( |
200 | format: "XRay FDR: Failed global buffer queue is null or not finalizing!\n" ); |
201 | return {.Data: nullptr, .Size: 0}; |
202 | } |
203 | |
204 | // We use a global scratch-pad for the header information, which only gets |
205 | // initialized the first time this function is called. We'll update one part |
206 | // of this information with some relevant data (in particular the number of |
207 | // buffers to expect). |
208 | alignas( |
209 | XRayFileHeader) static std::byte [sizeof(XRayFileHeader)]; |
210 | static pthread_once_t = PTHREAD_ONCE_INIT; |
211 | pthread_once( |
212 | once_control: &HeaderOnce, init_routine: +[] { |
213 | reinterpret_cast<XRayFileHeader &>(HeaderStorage) = |
214 | fdrCommonHeaderInfo(); |
215 | }); |
216 | |
217 | // We use a convenience alias for code referring to Header from here on out. |
218 | auto & = reinterpret_cast<XRayFileHeader &>(HeaderStorage); |
219 | if (B.Data == nullptr && B.Size == 0) { |
220 | Header.FdrData = FdrAdditionalHeaderData{.ThreadBufferSize: BQ->ConfiguredBufferSize()}; |
221 | return XRayBuffer{.Data: static_cast<void *>(&Header), .Size: sizeof(Header)}; |
222 | } |
223 | |
224 | static BufferQueue::const_iterator It{}; |
225 | static BufferQueue::const_iterator End{}; |
226 | static uint8_t *CurrentBuffer{nullptr}; |
227 | static size_t SerializedBufferSize = 0; |
228 | if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) { |
229 | // From this point on, we provide raw access to the raw buffer we're getting |
230 | // from the BufferQueue. We're relying on the iterators from the current |
231 | // Buffer queue. |
232 | It = BQ->cbegin(); |
233 | End = BQ->cend(); |
234 | } |
235 | |
236 | if (CurrentBuffer != nullptr) { |
237 | deallocateBuffer(B: CurrentBuffer, S: SerializedBufferSize); |
238 | CurrentBuffer = nullptr; |
239 | } |
240 | |
241 | if (It == End) |
242 | return {.Data: nullptr, .Size: 0}; |
243 | |
244 | // Set up the current buffer to contain the extents like we would when writing |
245 | // out to disk. The difference here would be that we still write "empty" |
246 | // buffers, or at least go through the iterators faithfully to let the |
247 | // handlers see the empty buffers in the queue. |
248 | // |
249 | // We need this atomic fence here to ensure that writes happening to the |
250 | // buffer have been committed before we load the extents atomically. Because |
251 | // the buffer is not explicitly synchronised across threads, we rely on the |
252 | // fence ordering to ensure that writes we expect to have been completed |
253 | // before the fence are fully committed before we read the extents. |
254 | atomic_thread_fence(mo: memory_order_acquire); |
255 | auto BufferSize = atomic_load(a: It->Extents, mo: memory_order_acquire); |
256 | SerializedBufferSize = BufferSize + sizeof(MetadataRecord); |
257 | CurrentBuffer = allocateBuffer(S: SerializedBufferSize); |
258 | if (CurrentBuffer == nullptr) |
259 | return {.Data: nullptr, .Size: 0}; |
260 | |
261 | // Write out the extents as a Metadata Record into the CurrentBuffer. |
262 | MetadataRecord ExtentsRecord; |
263 | ExtentsRecord.Type = uint8_t(RecordType::Metadata); |
264 | ExtentsRecord.RecordKind = |
265 | uint8_t(MetadataRecord::RecordKinds::BufferExtents); |
266 | internal_memcpy(dest: ExtentsRecord.Data, src: &BufferSize, n: sizeof(BufferSize)); |
267 | auto AfterExtents = |
268 | static_cast<char *>(internal_memcpy(dest: CurrentBuffer, src: &ExtentsRecord, |
269 | n: sizeof(MetadataRecord))) + |
270 | sizeof(MetadataRecord); |
271 | internal_memcpy(dest: AfterExtents, src: It->Data, n: BufferSize); |
272 | |
273 | XRayBuffer Result; |
274 | Result.Data = CurrentBuffer; |
275 | Result.Size = SerializedBufferSize; |
276 | ++It; |
277 | return Result; |
278 | } |
279 | |
280 | // Must finalize before flushing. |
281 | XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { |
282 | if (atomic_load(a: &LoggingStatus, mo: memory_order_acquire) != |
283 | XRayLogInitStatus::XRAY_LOG_FINALIZED) { |
284 | if (Verbosity()) |
285 | Report(format: "Not flushing log, implementation is not finalized.\n" ); |
286 | return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; |
287 | } |
288 | |
289 | if (atomic_exchange(a: &LogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHING, |
290 | mo: memory_order_release) == |
291 | XRayLogFlushStatus::XRAY_LOG_FLUSHING) { |
292 | if (Verbosity()) |
293 | Report(format: "Not flushing log, implementation is still flushing.\n" ); |
294 | return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; |
295 | } |
296 | |
297 | if (BQ == nullptr) { |
298 | if (Verbosity()) |
299 | Report(format: "Cannot flush when global buffer queue is null.\n" ); |
300 | return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; |
301 | } |
302 | |
303 | // We wait a number of milliseconds to allow threads to see that we've |
304 | // finalised before attempting to flush the log. |
305 | SleepForMillis(millis: fdrFlags()->grace_period_ms); |
306 | |
307 | // At this point, we're going to uninstall the iterator implementation, before |
308 | // we decide to do anything further with the global buffer queue. |
309 | __xray_log_remove_buffer_iterator(); |
310 | |
311 | // Once flushed, we should set the global status of the logging implementation |
312 | // to "uninitialized" to allow for FDR-logging multiple runs. |
313 | auto ResetToUnitialized = at_scope_exit(fn: [] { |
314 | atomic_store(a: &LoggingStatus, v: XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, |
315 | mo: memory_order_release); |
316 | }); |
317 | |
318 | auto CleanupBuffers = at_scope_exit(fn: [] { |
319 | auto &TLD = getThreadLocalData(); |
320 | if (TLD.Controller != nullptr) |
321 | TLD.Controller->flush(); |
322 | }); |
323 | |
324 | if (fdrFlags()->no_file_flush) { |
325 | if (Verbosity()) |
326 | Report(format: "XRay FDR: Not flushing to file, 'no_file_flush=true'.\n" ); |
327 | |
328 | atomic_store(a: &LogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHED, |
329 | mo: memory_order_release); |
330 | return XRayLogFlushStatus::XRAY_LOG_FLUSHED; |
331 | } |
332 | |
333 | // We write out the file in the following format: |
334 | // |
335 | // 1) We write down the XRay file header with version 1, type FDR_LOG. |
336 | // 2) Then we use the 'apply' member of the BufferQueue that's live, to |
337 | // ensure that at this point in time we write down the buffers that have |
338 | // been released (and marked "used") -- we dump the full buffer for now |
339 | // (fixed-sized) and let the tools reading the buffers deal with the data |
340 | // afterwards. |
341 | // |
342 | LogWriter *LW = LogWriter::Open(); |
343 | if (LW == nullptr) { |
344 | auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; |
345 | atomic_store(a: &LogFlushStatus, v: Result, mo: memory_order_release); |
346 | return Result; |
347 | } |
348 | |
349 | XRayFileHeader = fdrCommonHeaderInfo(); |
350 | Header.FdrData = FdrAdditionalHeaderData{.ThreadBufferSize: BQ->ConfiguredBufferSize()}; |
351 | LW->WriteAll(Begin: reinterpret_cast<char *>(&Header), |
352 | End: reinterpret_cast<char *>(&Header) + sizeof(Header)); |
353 | |
354 | // Release the current thread's buffer before we attempt to write out all the |
355 | // buffers. This ensures that in case we had only a single thread going, that |
356 | // we are able to capture the data nonetheless. |
357 | auto &TLD = getThreadLocalData(); |
358 | if (TLD.Controller != nullptr) |
359 | TLD.Controller->flush(); |
360 | |
361 | BQ->apply(Fn: [&](const BufferQueue::Buffer &B) { |
362 | // Starting at version 2 of the FDR logging implementation, we only write |
363 | // the records identified by the extents of the buffer. We use the Extents |
364 | // from the Buffer and write that out as the first record in the buffer. We |
365 | // still use a Metadata record, but fill in the extents instead for the |
366 | // data. |
367 | MetadataRecord ExtentsRecord; |
368 | auto BufferExtents = atomic_load(a: B.Extents, mo: memory_order_acquire); |
369 | DCHECK(BufferExtents <= B.Size); |
370 | ExtentsRecord.Type = uint8_t(RecordType::Metadata); |
371 | ExtentsRecord.RecordKind = |
372 | uint8_t(MetadataRecord::RecordKinds::BufferExtents); |
373 | internal_memcpy(dest: ExtentsRecord.Data, src: &BufferExtents, n: sizeof(BufferExtents)); |
374 | if (BufferExtents > 0) { |
375 | LW->WriteAll(Begin: reinterpret_cast<char *>(&ExtentsRecord), |
376 | End: reinterpret_cast<char *>(&ExtentsRecord) + |
377 | sizeof(MetadataRecord)); |
378 | LW->WriteAll(Begin: reinterpret_cast<char *>(B.Data), |
379 | End: reinterpret_cast<char *>(B.Data) + BufferExtents); |
380 | } |
381 | }); |
382 | |
383 | atomic_store(a: &LogFlushStatus, v: XRayLogFlushStatus::XRAY_LOG_FLUSHED, |
384 | mo: memory_order_release); |
385 | return XRayLogFlushStatus::XRAY_LOG_FLUSHED; |
386 | } |
387 | |
388 | XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { |
389 | s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; |
390 | if (!atomic_compare_exchange_strong(a: &LoggingStatus, cmp: &CurrentStatus, |
391 | xchg: XRayLogInitStatus::XRAY_LOG_FINALIZING, |
392 | mo: memory_order_release)) { |
393 | if (Verbosity()) |
394 | Report(format: "Cannot finalize log, implementation not initialized.\n" ); |
395 | return static_cast<XRayLogInitStatus>(CurrentStatus); |
396 | } |
397 | |
398 | // Do special things to make the log finalize itself, and not allow any more |
399 | // operations to be performed until re-initialized. |
400 | if (BQ == nullptr) { |
401 | if (Verbosity()) |
402 | Report(format: "Attempting to finalize an uninitialized global buffer!\n" ); |
403 | } else { |
404 | BQ->finalize(); |
405 | } |
406 | |
407 | atomic_store(a: &LoggingStatus, v: XRayLogInitStatus::XRAY_LOG_FINALIZED, |
408 | mo: memory_order_release); |
409 | return XRayLogInitStatus::XRAY_LOG_FINALIZED; |
410 | } |
411 | |
412 | struct TSCAndCPU { |
413 | uint64_t TSC = 0; |
414 | unsigned char CPU = 0; |
415 | }; |
416 | |
417 | static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { |
418 | // We want to get the TSC as early as possible, so that we can check whether |
419 | // we've seen this CPU before. We also do it before we load anything else, |
420 | // to allow for forward progress with the scheduling. |
421 | TSCAndCPU Result; |
422 | |
423 | // Test once for required CPU features |
424 | static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; |
425 | static bool TSCSupported = true; |
426 | pthread_once( |
427 | once_control: &OnceProbe, init_routine: +[] { TSCSupported = probeRequiredCPUFeatures(); }); |
428 | |
429 | if (TSCSupported) { |
430 | Result.TSC = __xray::readTSC(CPU&: Result.CPU); |
431 | } else { |
432 | // FIXME: This code needs refactoring as it appears in multiple locations |
433 | timespec TS; |
434 | int result = clock_gettime(CLOCK_REALTIME, tp: &TS); |
435 | if (result != 0) { |
436 | Report(format: "clock_gettime(2) return %d, errno=%d" , result, int(errno)); |
437 | TS = {.tv_sec: 0, .tv_nsec: 0}; |
438 | } |
439 | Result.CPU = 0; |
440 | Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; |
441 | } |
442 | return Result; |
443 | } |
444 | |
445 | thread_local atomic_uint8_t Running{.val_dont_use: 0}; |
446 | |
447 | static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { |
448 | // Check if we're finalizing, before proceeding. |
449 | { |
450 | auto Status = atomic_load(a: &LoggingStatus, mo: memory_order_acquire); |
451 | if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || |
452 | Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { |
453 | if (TLD.Controller != nullptr) { |
454 | TLD.Controller->flush(); |
455 | TLD.Controller = nullptr; |
456 | } |
457 | return false; |
458 | } |
459 | } |
460 | |
461 | if (UNLIKELY(TLD.Controller == nullptr)) { |
462 | // Set up the TLD buffer queue. |
463 | if (UNLIKELY(BQ == nullptr)) |
464 | return false; |
465 | TLD.BQ = BQ; |
466 | |
467 | // Check that we have a valid buffer. |
468 | if (TLD.Buffer.Generation != BQ->generation() && |
469 | TLD.BQ->releaseBuffer(Buf&: TLD.Buffer) != BufferQueue::ErrorCode::Ok) |
470 | return false; |
471 | |
472 | // Set up a buffer, before setting up the log writer. Bail out on failure. |
473 | if (TLD.BQ->getBuffer(Buf&: TLD.Buffer) != BufferQueue::ErrorCode::Ok) |
474 | return false; |
475 | |
476 | // Set up the Log Writer for this thread. |
477 | if (UNLIKELY(TLD.Writer == nullptr)) { |
478 | auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage); |
479 | new (LWStorage) FDRLogWriter(TLD.Buffer); |
480 | TLD.Writer = LWStorage; |
481 | } else { |
482 | TLD.Writer->resetRecord(); |
483 | } |
484 | |
485 | auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage); |
486 | new (CStorage) |
487 | FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, |
488 | atomic_load_relaxed(a: &ThresholdTicks)); |
489 | TLD.Controller = CStorage; |
490 | } |
491 | |
492 | DCHECK_NE(TLD.Controller, nullptr); |
493 | return true; |
494 | } |
495 | |
496 | void fdrLoggingHandleArg0(int32_t FuncId, |
497 | XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { |
498 | auto TC = getTimestamp(); |
499 | auto &TSC = TC.TSC; |
500 | auto &CPU = TC.CPU; |
501 | RecursionGuard Guard{Running}; |
502 | if (!Guard) |
503 | return; |
504 | |
505 | auto &TLD = getThreadLocalData(); |
506 | if (!setupTLD(TLD)) |
507 | return; |
508 | |
509 | switch (Entry) { |
510 | case XRayEntryType::ENTRY: |
511 | case XRayEntryType::LOG_ARGS_ENTRY: |
512 | TLD.Controller->functionEnter(FuncId, TSC, CPU); |
513 | return; |
514 | case XRayEntryType::EXIT: |
515 | TLD.Controller->functionExit(FuncId, TSC, CPU); |
516 | return; |
517 | case XRayEntryType::TAIL: |
518 | TLD.Controller->functionTailExit(FuncId, TSC, CPU); |
519 | return; |
520 | case XRayEntryType::CUSTOM_EVENT: |
521 | case XRayEntryType::TYPED_EVENT: |
522 | break; |
523 | } |
524 | } |
525 | |
526 | void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, |
527 | uint64_t Arg) XRAY_NEVER_INSTRUMENT { |
528 | auto TC = getTimestamp(); |
529 | auto &TSC = TC.TSC; |
530 | auto &CPU = TC.CPU; |
531 | RecursionGuard Guard{Running}; |
532 | if (!Guard) |
533 | return; |
534 | |
535 | auto &TLD = getThreadLocalData(); |
536 | if (!setupTLD(TLD)) |
537 | return; |
538 | |
539 | switch (Entry) { |
540 | case XRayEntryType::ENTRY: |
541 | case XRayEntryType::LOG_ARGS_ENTRY: |
542 | TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); |
543 | return; |
544 | case XRayEntryType::EXIT: |
545 | TLD.Controller->functionExit(FuncId, TSC, CPU); |
546 | return; |
547 | case XRayEntryType::TAIL: |
548 | TLD.Controller->functionTailExit(FuncId, TSC, CPU); |
549 | return; |
550 | case XRayEntryType::CUSTOM_EVENT: |
551 | case XRayEntryType::TYPED_EVENT: |
552 | break; |
553 | } |
554 | } |
555 | |
556 | void fdrLoggingHandleCustomEvent(void *Event, |
557 | std::size_t EventSize) XRAY_NEVER_INSTRUMENT { |
558 | auto TC = getTimestamp(); |
559 | auto &TSC = TC.TSC; |
560 | auto &CPU = TC.CPU; |
561 | RecursionGuard Guard{Running}; |
562 | if (!Guard) |
563 | return; |
564 | |
565 | // Complain when we ever get at least one custom event that's larger than what |
566 | // we can possibly support. |
567 | if (EventSize > |
568 | static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { |
569 | static pthread_once_t Once = PTHREAD_ONCE_INIT; |
570 | pthread_once( |
571 | once_control: &Once, init_routine: +[] { |
572 | Report(format: "Custom event size too large; truncating to %d.\n" , |
573 | std::numeric_limits<int32_t>::max()); |
574 | }); |
575 | } |
576 | |
577 | auto &TLD = getThreadLocalData(); |
578 | if (!setupTLD(TLD)) |
579 | return; |
580 | |
581 | int32_t ReducedEventSize = static_cast<int32_t>(EventSize); |
582 | TLD.Controller->customEvent(TSC, CPU, Event, EventSize: ReducedEventSize); |
583 | } |
584 | |
585 | void fdrLoggingHandleTypedEvent(size_t EventType, const void *Event, |
586 | size_t EventSize) noexcept |
587 | XRAY_NEVER_INSTRUMENT { |
588 | auto TC = getTimestamp(); |
589 | auto &TSC = TC.TSC; |
590 | auto &CPU = TC.CPU; |
591 | RecursionGuard Guard{Running}; |
592 | if (!Guard) |
593 | return; |
594 | |
595 | // Complain when we ever get at least one typed event that's larger than what |
596 | // we can possibly support. |
597 | if (EventSize > |
598 | static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { |
599 | static pthread_once_t Once = PTHREAD_ONCE_INIT; |
600 | pthread_once( |
601 | once_control: &Once, init_routine: +[] { |
602 | Report(format: "Typed event size too large; truncating to %d.\n" , |
603 | std::numeric_limits<int32_t>::max()); |
604 | }); |
605 | } |
606 | |
607 | auto &TLD = getThreadLocalData(); |
608 | if (!setupTLD(TLD)) |
609 | return; |
610 | |
611 | int32_t ReducedEventSize = static_cast<int32_t>(EventSize); |
612 | TLD.Controller->typedEvent(TSC, CPU, EventType: static_cast<uint16_t>(EventType), Event, |
613 | EventSize: ReducedEventSize); |
614 | } |
615 | |
616 | XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, |
617 | size_t OptionsSize) XRAY_NEVER_INSTRUMENT { |
618 | if (Options == nullptr) |
619 | return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; |
620 | |
621 | s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; |
622 | if (!atomic_compare_exchange_strong(a: &LoggingStatus, cmp: &CurrentStatus, |
623 | xchg: XRayLogInitStatus::XRAY_LOG_INITIALIZING, |
624 | mo: memory_order_release)) { |
625 | if (Verbosity()) |
626 | Report(format: "Cannot initialize already initialized implementation.\n" ); |
627 | return static_cast<XRayLogInitStatus>(CurrentStatus); |
628 | } |
629 | |
630 | if (Verbosity()) |
631 | Report(format: "Initializing FDR mode with options: %s\n" , |
632 | static_cast<const char *>(Options)); |
633 | |
634 | // TODO: Factor out the flags specific to the FDR mode implementation. For |
635 | // now, use the global/single definition of the flags, since the FDR mode |
636 | // flags are already defined there. |
637 | FlagParser FDRParser; |
638 | FDRFlags FDRFlags; |
639 | registerXRayFDRFlags(P: &FDRParser, F: &FDRFlags); |
640 | FDRFlags.setDefaults(); |
641 | |
642 | // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided |
643 | // options until we migrate everyone to use the XRAY_FDR_OPTIONS |
644 | // compiler-provided options. |
645 | FDRParser.ParseString(s: useCompilerDefinedFlags()); |
646 | FDRParser.ParseString(s: useCompilerDefinedFDRFlags()); |
647 | auto *EnvOpts = GetEnv(name: "XRAY_FDR_OPTIONS" ); |
648 | if (EnvOpts == nullptr) |
649 | EnvOpts = "" ; |
650 | FDRParser.ParseString(s: EnvOpts); |
651 | |
652 | // FIXME: Remove this when we fully remove the deprecated flags. |
653 | if (internal_strlen(s: EnvOpts) == 0) { |
654 | FDRFlags.func_duration_threshold_us = |
655 | flags()->xray_fdr_log_func_duration_threshold_us; |
656 | FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; |
657 | } |
658 | |
659 | // The provided options should always override the compiler-provided and |
660 | // environment-variable defined options. |
661 | FDRParser.ParseString(s: static_cast<const char *>(Options)); |
662 | *fdrFlags() = FDRFlags; |
663 | auto BufferSize = FDRFlags.buffer_size; |
664 | auto BufferMax = FDRFlags.buffer_max; |
665 | |
666 | if (BQ == nullptr) { |
667 | bool Success = false; |
668 | BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); |
669 | new (BQ) BufferQueue(BufferSize, BufferMax, Success); |
670 | if (!Success) { |
671 | Report(format: "BufferQueue init failed.\n" ); |
672 | return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; |
673 | } |
674 | } else { |
675 | if (BQ->init(BS: BufferSize, BC: BufferMax) != BufferQueue::ErrorCode::Ok) { |
676 | if (Verbosity()) |
677 | Report(format: "Failed to re-initialize global buffer queue. Init failed.\n" ); |
678 | return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; |
679 | } |
680 | } |
681 | |
682 | static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; |
683 | pthread_once( |
684 | once_control: &OnceInit, init_routine: +[] { |
685 | atomic_store(a: &TicksPerSec, |
686 | v: probeRequiredCPUFeatures() ? getTSCFrequency() |
687 | : __xray::NanosecondsPerSecond, |
688 | mo: memory_order_release); |
689 | pthread_key_create( |
690 | key: &Key, destr_function: +[](void *TLDPtr) { |
691 | if (TLDPtr == nullptr) |
692 | return; |
693 | auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr); |
694 | if (TLD.BQ == nullptr) |
695 | return; |
696 | if (TLD.Buffer.Data == nullptr) |
697 | return; |
698 | auto EC = TLD.BQ->releaseBuffer(Buf&: TLD.Buffer); |
699 | if (EC != BufferQueue::ErrorCode::Ok) |
700 | Report(format: "At thread exit, failed to release buffer at %p; " |
701 | "error=%s\n" , |
702 | TLD.Buffer.Data, BufferQueue::getErrorString(E: EC)); |
703 | }); |
704 | }); |
705 | |
706 | atomic_store(a: &ThresholdTicks, |
707 | v: atomic_load_relaxed(a: &TicksPerSec) * |
708 | fdrFlags()->func_duration_threshold_us / 1000000, |
709 | mo: memory_order_release); |
710 | // Arg1 handler should go in first to avoid concurrent code accidentally |
711 | // falling back to arg0 when it should have ran arg1. |
712 | __xray_set_handler_arg1(entry: fdrLoggingHandleArg1); |
713 | // Install the actual handleArg0 handler after initialising the buffers. |
714 | __xray_set_handler(entry: fdrLoggingHandleArg0); |
715 | __xray_set_customevent_handler(entry: fdrLoggingHandleCustomEvent); |
716 | __xray_set_typedevent_handler(entry: fdrLoggingHandleTypedEvent); |
717 | |
718 | // Install the buffer iterator implementation. |
719 | __xray_log_set_buffer_iterator(Iterator: fdrIterator); |
720 | |
721 | atomic_store(a: &LoggingStatus, v: XRayLogInitStatus::XRAY_LOG_INITIALIZED, |
722 | mo: memory_order_release); |
723 | |
724 | if (Verbosity()) |
725 | Report(format: "XRay FDR init successful.\n" ); |
726 | return XRayLogInitStatus::XRAY_LOG_INITIALIZED; |
727 | } |
728 | |
729 | bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { |
730 | XRayLogImpl Impl{ |
731 | .log_init: fdrLoggingInit, |
732 | .log_finalize: fdrLoggingFinalize, |
733 | .handle_arg0: fdrLoggingHandleArg0, |
734 | .flush_log: fdrLoggingFlush, |
735 | }; |
736 | auto RegistrationResult = __xray_log_register_mode(Mode: "xray-fdr" , Impl); |
737 | if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && |
738 | Verbosity()) { |
739 | Report(format: "Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n" , |
740 | RegistrationResult); |
741 | return false; |
742 | } |
743 | |
744 | if (flags()->xray_fdr_log || |
745 | !internal_strcmp(s1: flags()->xray_mode, s2: "xray-fdr" )) { |
746 | auto SelectResult = __xray_log_select_mode(Mode: "xray-fdr" ); |
747 | if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && |
748 | Verbosity()) { |
749 | Report(format: "Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n" , |
750 | SelectResult); |
751 | return false; |
752 | } |
753 | } |
754 | return true; |
755 | } |
756 | |
757 | } // namespace __xray |
758 | |
759 | static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); |
760 | |