1 | //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of XRay, a dynamic runtime instrumentation system. |
10 | // |
11 | // This file defines the interface for a function call trie. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | #ifndef XRAY_FUNCTION_CALL_TRIE_H |
15 | #define XRAY_FUNCTION_CALL_TRIE_H |
16 | |
17 | #include "xray_buffer_queue.h" |
18 | #include "xray_defs.h" |
19 | #include "xray_profiling_flags.h" |
20 | #include "xray_segmented_array.h" |
21 | #include <limits> |
22 | #include <memory> // For placement new. |
23 | #include <utility> |
24 | |
25 | namespace __xray { |
26 | |
27 | /// A FunctionCallTrie represents the stack traces of XRay instrumented |
28 | /// functions that we've encountered, where a node corresponds to a function and |
29 | /// the path from the root to the node its stack trace. Each node in the trie |
30 | /// will contain some useful values, including: |
31 | /// |
32 | /// * The cumulative amount of time spent in this particular node/stack. |
33 | /// * The number of times this stack has appeared. |
34 | /// * A histogram of latencies for that particular node. |
35 | /// |
36 | /// Each node in the trie will also contain a list of callees, represented using |
37 | /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function |
38 | /// ID of the callee, and a pointer to the node. |
39 | /// |
40 | /// If we visualise this data structure, we'll find the following potential |
41 | /// representation: |
42 | /// |
43 | /// [function id node] -> [callees] [cumulative time] |
44 | /// [call counter] [latency histogram] |
45 | /// |
46 | /// As an example, when we have a function in this pseudocode: |
47 | /// |
48 | /// func f(N) { |
49 | /// g() |
50 | /// h() |
51 | /// for i := 1..N { j() } |
52 | /// } |
53 | /// |
54 | /// We may end up with a trie of the following form: |
55 | /// |
56 | /// f -> [ g, h, j ] [...] [1] [...] |
57 | /// g -> [ ... ] [...] [1] [...] |
58 | /// h -> [ ... ] [...] [1] [...] |
59 | /// j -> [ ... ] [...] [N] [...] |
60 | /// |
61 | /// If for instance the function g() called j() like so: |
62 | /// |
63 | /// func g() { |
64 | /// for i := 1..10 { j() } |
65 | /// } |
66 | /// |
67 | /// We'll find the following updated trie: |
68 | /// |
69 | /// f -> [ g, h, j ] [...] [1] [...] |
70 | /// g -> [ j' ] [...] [1] [...] |
71 | /// h -> [ ... ] [...] [1] [...] |
72 | /// j -> [ ... ] [...] [N] [...] |
73 | /// j' -> [ ... ] [...] [10] [...] |
74 | /// |
75 | /// Note that we'll have a new node representing the path `f -> g -> j'` with |
76 | /// isolated data. This isolation gives us a means of representing the stack |
77 | /// traces as a path, as opposed to a key in a table. The alternative |
78 | /// implementation here would be to use a separate table for the path, and use |
79 | /// hashes of the path as an identifier to accumulate the information. We've |
80 | /// moved away from this approach as it takes a lot of time to compute the hash |
81 | /// every time we need to update a function's call information as we're handling |
82 | /// the entry and exit events. |
83 | /// |
84 | /// This approach allows us to maintain a shadow stack, which represents the |
85 | /// currently executing path, and on function exits quickly compute the amount |
86 | /// of time elapsed from the entry, then update the counters for the node |
87 | /// already represented in the trie. This necessitates an efficient |
88 | /// representation of the various data structures (the list of callees must be |
89 | /// cache-aware and efficient to look up, and the histogram must be compact and |
90 | /// quick to update) to enable us to keep the overheads of this implementation |
91 | /// to the minimum. |
92 | class FunctionCallTrie { |
93 | public: |
94 | struct Node; |
95 | |
96 | // We use a NodeIdPair type instead of a std::pair<...> to not rely on the |
97 | // standard library types in this header. |
98 | struct NodeIdPair { |
99 | Node *NodePtr; |
100 | int32_t FId; |
101 | }; |
102 | |
103 | using NodeIdPairArray = Array<NodeIdPair>; |
104 | using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; |
105 | |
106 | // A Node in the FunctionCallTrie gives us a list of callees, the cumulative |
107 | // number of times this node actually appeared, the cumulative amount of time |
108 | // for this particular node including its children call times, and just the |
109 | // local time spent on this node. Each Node will have the ID of the XRay |
110 | // instrumented function that it is associated to. |
111 | struct Node { |
112 | Node *Parent; |
113 | NodeIdPairArray Callees; |
114 | uint64_t CallCount; |
115 | uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. |
116 | int32_t FId; |
117 | |
118 | // TODO: Include the compact histogram. |
119 | }; |
120 | |
121 | private: |
122 | struct ShadowStackEntry { |
123 | uint64_t EntryTSC; |
124 | Node *NodePtr; |
125 | uint16_t EntryCPU; |
126 | }; |
127 | |
128 | using NodeArray = Array<Node>; |
129 | using RootArray = Array<Node *>; |
130 | using ShadowStackArray = Array<ShadowStackEntry>; |
131 | |
132 | public: |
133 | // We collate the allocators we need into a single struct, as a convenience to |
134 | // allow us to initialize these as a group. |
135 | struct Allocators { |
136 | using NodeAllocatorType = NodeArray::AllocatorType; |
137 | using RootAllocatorType = RootArray::AllocatorType; |
138 | using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; |
139 | |
140 | // Use hosted aligned storage members to allow for trivial move and init. |
141 | // This also allows us to sidestep the potential-failing allocation issue. |
142 | typename std::aligned_storage<sizeof(NodeAllocatorType), |
143 | alignof(NodeAllocatorType)>::type |
144 | NodeAllocatorStorage; |
145 | typename std::aligned_storage<sizeof(RootAllocatorType), |
146 | alignof(RootAllocatorType)>::type |
147 | RootAllocatorStorage; |
148 | typename std::aligned_storage<sizeof(ShadowStackAllocatorType), |
149 | alignof(ShadowStackAllocatorType)>::type |
150 | ShadowStackAllocatorStorage; |
151 | typename std::aligned_storage<sizeof(NodeIdPairAllocatorType), |
152 | alignof(NodeIdPairAllocatorType)>::type |
153 | NodeIdPairAllocatorStorage; |
154 | |
155 | NodeAllocatorType *NodeAllocator = nullptr; |
156 | RootAllocatorType *RootAllocator = nullptr; |
157 | ShadowStackAllocatorType *ShadowStackAllocator = nullptr; |
158 | NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; |
159 | |
160 | Allocators() = default; |
161 | Allocators(const Allocators &) = delete; |
162 | Allocators &operator=(const Allocators &) = delete; |
163 | |
164 | struct Buffers { |
165 | BufferQueue::Buffer NodeBuffer; |
166 | BufferQueue::Buffer RootsBuffer; |
167 | BufferQueue::Buffer ShadowStackBuffer; |
168 | BufferQueue::Buffer NodeIdPairBuffer; |
169 | }; |
170 | |
171 | explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT { |
172 | new (&NodeAllocatorStorage) |
173 | NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size); |
174 | NodeAllocator = |
175 | reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); |
176 | |
177 | new (&RootAllocatorStorage) |
178 | RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size); |
179 | RootAllocator = |
180 | reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); |
181 | |
182 | new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType( |
183 | B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size); |
184 | ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( |
185 | &ShadowStackAllocatorStorage); |
186 | |
187 | new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType( |
188 | B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size); |
189 | NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( |
190 | &NodeIdPairAllocatorStorage); |
191 | } |
192 | |
193 | explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { |
194 | new (&NodeAllocatorStorage) NodeAllocatorType(Max); |
195 | NodeAllocator = |
196 | reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); |
197 | |
198 | new (&RootAllocatorStorage) RootAllocatorType(Max); |
199 | RootAllocator = |
200 | reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); |
201 | |
202 | new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); |
203 | ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( |
204 | &ShadowStackAllocatorStorage); |
205 | |
206 | new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); |
207 | NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( |
208 | &NodeIdPairAllocatorStorage); |
209 | } |
210 | |
211 | Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { |
212 | // Here we rely on the safety of memcpy'ing contents of the storage |
213 | // members, and then pointing the source pointers to nullptr. |
214 | internal_memcpy(dest: &NodeAllocatorStorage, src: &O.NodeAllocatorStorage, |
215 | n: sizeof(NodeAllocatorType)); |
216 | internal_memcpy(dest: &RootAllocatorStorage, src: &O.RootAllocatorStorage, |
217 | n: sizeof(RootAllocatorType)); |
218 | internal_memcpy(dest: &ShadowStackAllocatorStorage, |
219 | src: &O.ShadowStackAllocatorStorage, |
220 | n: sizeof(ShadowStackAllocatorType)); |
221 | internal_memcpy(dest: &NodeIdPairAllocatorStorage, |
222 | src: &O.NodeIdPairAllocatorStorage, |
223 | n: sizeof(NodeIdPairAllocatorType)); |
224 | |
225 | NodeAllocator = |
226 | reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); |
227 | RootAllocator = |
228 | reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); |
229 | ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( |
230 | &ShadowStackAllocatorStorage); |
231 | NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( |
232 | &NodeIdPairAllocatorStorage); |
233 | |
234 | O.NodeAllocator = nullptr; |
235 | O.RootAllocator = nullptr; |
236 | O.ShadowStackAllocator = nullptr; |
237 | O.NodeIdPairAllocator = nullptr; |
238 | } |
239 | |
240 | Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { |
241 | // When moving into an existing instance, we ensure that we clean up the |
242 | // current allocators. |
243 | if (NodeAllocator) |
244 | NodeAllocator->~NodeAllocatorType(); |
245 | if (O.NodeAllocator) { |
246 | new (&NodeAllocatorStorage) |
247 | NodeAllocatorType(std::move(t&: *O.NodeAllocator)); |
248 | NodeAllocator = |
249 | reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); |
250 | O.NodeAllocator = nullptr; |
251 | } else { |
252 | NodeAllocator = nullptr; |
253 | } |
254 | |
255 | if (RootAllocator) |
256 | RootAllocator->~RootAllocatorType(); |
257 | if (O.RootAllocator) { |
258 | new (&RootAllocatorStorage) |
259 | RootAllocatorType(std::move(t&: *O.RootAllocator)); |
260 | RootAllocator = |
261 | reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); |
262 | O.RootAllocator = nullptr; |
263 | } else { |
264 | RootAllocator = nullptr; |
265 | } |
266 | |
267 | if (ShadowStackAllocator) |
268 | ShadowStackAllocator->~ShadowStackAllocatorType(); |
269 | if (O.ShadowStackAllocator) { |
270 | new (&ShadowStackAllocatorStorage) |
271 | ShadowStackAllocatorType(std::move(t&: *O.ShadowStackAllocator)); |
272 | ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( |
273 | &ShadowStackAllocatorStorage); |
274 | O.ShadowStackAllocator = nullptr; |
275 | } else { |
276 | ShadowStackAllocator = nullptr; |
277 | } |
278 | |
279 | if (NodeIdPairAllocator) |
280 | NodeIdPairAllocator->~NodeIdPairAllocatorType(); |
281 | if (O.NodeIdPairAllocator) { |
282 | new (&NodeIdPairAllocatorStorage) |
283 | NodeIdPairAllocatorType(std::move(t&: *O.NodeIdPairAllocator)); |
284 | NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( |
285 | &NodeIdPairAllocatorStorage); |
286 | O.NodeIdPairAllocator = nullptr; |
287 | } else { |
288 | NodeIdPairAllocator = nullptr; |
289 | } |
290 | |
291 | return *this; |
292 | } |
293 | |
294 | ~Allocators() XRAY_NEVER_INSTRUMENT { |
295 | if (NodeAllocator != nullptr) |
296 | NodeAllocator->~NodeAllocatorType(); |
297 | if (RootAllocator != nullptr) |
298 | RootAllocator->~RootAllocatorType(); |
299 | if (ShadowStackAllocator != nullptr) |
300 | ShadowStackAllocator->~ShadowStackAllocatorType(); |
301 | if (NodeIdPairAllocator != nullptr) |
302 | NodeIdPairAllocator->~NodeIdPairAllocatorType(); |
303 | } |
304 | }; |
305 | |
306 | static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { |
307 | return InitAllocatorsCustom(Max: profilingFlags()->per_thread_allocator_max); |
308 | } |
309 | |
310 | static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { |
311 | Allocators A(Max); |
312 | return A; |
313 | } |
314 | |
315 | static Allocators |
316 | InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT { |
317 | Allocators A(Bufs); |
318 | return A; |
319 | } |
320 | |
321 | private: |
322 | NodeArray Nodes; |
323 | RootArray Roots; |
324 | ShadowStackArray ShadowStack; |
325 | NodeIdPairAllocatorType *NodeIdPairAllocator; |
326 | uint32_t OverflowedFunctions; |
327 | |
328 | public: |
329 | explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT |
330 | : Nodes(*A.NodeAllocator), |
331 | Roots(*A.RootAllocator), |
332 | ShadowStack(*A.ShadowStackAllocator), |
333 | NodeIdPairAllocator(A.NodeIdPairAllocator), |
334 | OverflowedFunctions(0) {} |
335 | |
336 | FunctionCallTrie() = delete; |
337 | FunctionCallTrie(const FunctionCallTrie &) = delete; |
338 | FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; |
339 | |
340 | FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT |
341 | : Nodes(std::move(t&: O.Nodes)), |
342 | Roots(std::move(t&: O.Roots)), |
343 | ShadowStack(std::move(t&: O.ShadowStack)), |
344 | NodeIdPairAllocator(O.NodeIdPairAllocator), |
345 | OverflowedFunctions(O.OverflowedFunctions) {} |
346 | |
347 | FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { |
348 | Nodes = std::move(t&: O.Nodes); |
349 | Roots = std::move(t&: O.Roots); |
350 | ShadowStack = std::move(t&: O.ShadowStack); |
351 | NodeIdPairAllocator = O.NodeIdPairAllocator; |
352 | OverflowedFunctions = O.OverflowedFunctions; |
353 | return *this; |
354 | } |
355 | |
356 | ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} |
357 | |
358 | void enterFunction(const int32_t FId, uint64_t TSC, |
359 | uint16_t CPU) XRAY_NEVER_INSTRUMENT { |
360 | DCHECK_NE(FId, 0); |
361 | |
362 | // If we're already overflowed the function call stack, do not bother |
363 | // attempting to record any more function entries. |
364 | if (UNLIKELY(OverflowedFunctions)) { |
365 | ++OverflowedFunctions; |
366 | return; |
367 | } |
368 | |
369 | // If this is the first function we've encountered, we want to set up the |
370 | // node(s) and treat it as a root. |
371 | if (UNLIKELY(ShadowStack.empty())) { |
372 | auto *NewRoot = Nodes.AppendEmplace( |
373 | args: nullptr, args: NodeIdPairArray(*NodeIdPairAllocator), args: 0u, args: 0u, args: FId); |
374 | if (UNLIKELY(NewRoot == nullptr)) |
375 | return; |
376 | if (Roots.AppendEmplace(args&: NewRoot) == nullptr) { |
377 | Nodes.trim(Elements: 1); |
378 | return; |
379 | } |
380 | if (ShadowStack.AppendEmplace(args&: TSC, args&: NewRoot, args&: CPU) == nullptr) { |
381 | Nodes.trim(Elements: 1); |
382 | Roots.trim(Elements: 1); |
383 | ++OverflowedFunctions; |
384 | return; |
385 | } |
386 | return; |
387 | } |
388 | |
389 | // From this point on, we require that the stack is not empty. |
390 | DCHECK(!ShadowStack.empty()); |
391 | auto TopNode = ShadowStack.back().NodePtr; |
392 | DCHECK_NE(TopNode, nullptr); |
393 | |
394 | // If we've seen this callee before, then we access that node and place that |
395 | // on the top of the stack. |
396 | auto* Callee = TopNode->Callees.find_element( |
397 | P: [FId](const NodeIdPair &NR) { return NR.FId == FId; }); |
398 | if (Callee != nullptr) { |
399 | CHECK_NE(Callee->NodePtr, nullptr); |
400 | if (ShadowStack.AppendEmplace(args&: TSC, args&: Callee->NodePtr, args&: CPU) == nullptr) |
401 | ++OverflowedFunctions; |
402 | return; |
403 | } |
404 | |
405 | // This means we've never seen this stack before, create a new node here. |
406 | auto* NewNode = Nodes.AppendEmplace( |
407 | args&: TopNode, args: NodeIdPairArray(*NodeIdPairAllocator), args: 0u, args: 0u, args: FId); |
408 | if (UNLIKELY(NewNode == nullptr)) |
409 | return; |
410 | DCHECK_NE(NewNode, nullptr); |
411 | TopNode->Callees.AppendEmplace(args&: NewNode, args: FId); |
412 | if (ShadowStack.AppendEmplace(args&: TSC, args&: NewNode, args&: CPU) == nullptr) |
413 | ++OverflowedFunctions; |
414 | return; |
415 | } |
416 | |
417 | void exitFunction(int32_t FId, uint64_t TSC, |
418 | uint16_t CPU) XRAY_NEVER_INSTRUMENT { |
419 | // If we're exiting functions that have "overflowed" or don't fit into the |
420 | // stack due to allocator constraints, we then decrement that count first. |
421 | if (OverflowedFunctions) { |
422 | --OverflowedFunctions; |
423 | return; |
424 | } |
425 | |
426 | // When we exit a function, we look up the ShadowStack to see whether we've |
427 | // entered this function before. We do as little processing here as we can, |
428 | // since most of the hard work would have already been done at function |
429 | // entry. |
430 | uint64_t CumulativeTreeTime = 0; |
431 | |
432 | while (!ShadowStack.empty()) { |
433 | const auto &Top = ShadowStack.back(); |
434 | auto TopNode = Top.NodePtr; |
435 | DCHECK_NE(TopNode, nullptr); |
436 | |
437 | // We may encounter overflow on the TSC we're provided, which may end up |
438 | // being less than the TSC when we first entered the function. |
439 | // |
440 | // To get the accurate measurement of cycles, we need to check whether |
441 | // we've overflowed (TSC < Top.EntryTSC) and then account the difference |
442 | // between the entry TSC and the max for the TSC counter (max of uint64_t) |
443 | // then add the value of TSC. We can prove that the maximum delta we will |
444 | // get is at most the 64-bit unsigned value, since the difference between |
445 | // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max() |
446 | // - 1) + 1. |
447 | // |
448 | // NOTE: This assumes that TSCs are synchronised across CPUs. |
449 | // TODO: Count the number of times we've seen CPU migrations. |
450 | uint64_t LocalTime = |
451 | Top.EntryTSC > TSC |
452 | ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC |
453 | : TSC - Top.EntryTSC; |
454 | TopNode->CallCount++; |
455 | TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; |
456 | CumulativeTreeTime += LocalTime; |
457 | ShadowStack.trim(Elements: 1); |
458 | |
459 | // TODO: Update the histogram for the node. |
460 | if (TopNode->FId == FId) |
461 | break; |
462 | } |
463 | } |
464 | |
465 | const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } |
466 | |
467 | // The deepCopyInto operation will update the provided FunctionCallTrie by |
468 | // re-creating the contents of this particular FunctionCallTrie in the other |
469 | // FunctionCallTrie. It will do this using a Depth First Traversal from the |
470 | // roots, and while doing so recreating the traversal in the provided |
471 | // FunctionCallTrie. |
472 | // |
473 | // This operation will *not* destroy the state in `O`, and thus may cause some |
474 | // duplicate entries in `O` if it is not empty. |
475 | // |
476 | // This function is *not* thread-safe, and may require external |
477 | // synchronisation of both "this" and |O|. |
478 | // |
479 | // This function must *not* be called with a non-empty FunctionCallTrie |O|. |
480 | void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { |
481 | DCHECK(O.getRoots().empty()); |
482 | |
483 | // We then push the root into a stack, to use as the parent marker for new |
484 | // nodes we push in as we're traversing depth-first down the call tree. |
485 | struct NodeAndParent { |
486 | FunctionCallTrie::Node *Node; |
487 | FunctionCallTrie::Node *NewNode; |
488 | }; |
489 | using Stack = Array<NodeAndParent>; |
490 | |
491 | typename Stack::AllocatorType StackAllocator( |
492 | profilingFlags()->stack_allocator_max); |
493 | Stack DFSStack(StackAllocator); |
494 | |
495 | for (const auto Root : getRoots()) { |
496 | // Add a node in O for this root. |
497 | auto NewRoot = O.Nodes.AppendEmplace( |
498 | args: nullptr, args: NodeIdPairArray(*O.NodeIdPairAllocator), args&: Root->CallCount, |
499 | args&: Root->CumulativeLocalTime, args&: Root->FId); |
500 | |
501 | // Because we cannot allocate more memory we should bail out right away. |
502 | if (UNLIKELY(NewRoot == nullptr)) |
503 | return; |
504 | |
505 | if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr)) |
506 | return; |
507 | |
508 | // TODO: Figure out what to do if we fail to allocate any more stack |
509 | // space. Maybe warn or report once? |
510 | if (DFSStack.AppendEmplace(args: Root, args&: NewRoot) == nullptr) |
511 | return; |
512 | while (!DFSStack.empty()) { |
513 | NodeAndParent NP = DFSStack.back(); |
514 | DCHECK_NE(NP.Node, nullptr); |
515 | DCHECK_NE(NP.NewNode, nullptr); |
516 | DFSStack.trim(Elements: 1); |
517 | for (const auto Callee : NP.Node->Callees) { |
518 | auto NewNode = O.Nodes.AppendEmplace( |
519 | args&: NP.NewNode, args: NodeIdPairArray(*O.NodeIdPairAllocator), |
520 | args&: Callee.NodePtr->CallCount, args&: Callee.NodePtr->CumulativeLocalTime, |
521 | args: Callee.FId); |
522 | if (UNLIKELY(NewNode == nullptr)) |
523 | return; |
524 | if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) == |
525 | nullptr)) |
526 | return; |
527 | if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) == |
528 | nullptr)) |
529 | return; |
530 | } |
531 | } |
532 | } |
533 | } |
534 | |
535 | // The mergeInto operation will update the provided FunctionCallTrie by |
536 | // traversing the current trie's roots and updating (i.e. merging) the data in |
537 | // the nodes with the data in the target's nodes. If the node doesn't exist in |
538 | // the provided trie, we add a new one in the right position, and inherit the |
539 | // data from the original (current) trie, along with all its callees. |
540 | // |
541 | // This function is *not* thread-safe, and may require external |
542 | // synchronisation of both "this" and |O|. |
543 | void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { |
544 | struct NodeAndTarget { |
545 | FunctionCallTrie::Node *OrigNode; |
546 | FunctionCallTrie::Node *TargetNode; |
547 | }; |
548 | using Stack = Array<NodeAndTarget>; |
549 | typename Stack::AllocatorType StackAllocator( |
550 | profilingFlags()->stack_allocator_max); |
551 | Stack DFSStack(StackAllocator); |
552 | |
553 | for (const auto Root : getRoots()) { |
554 | Node *TargetRoot = nullptr; |
555 | auto R = O.Roots.find_element( |
556 | P: [&](const Node *Node) { return Node->FId == Root->FId; }); |
557 | if (R == nullptr) { |
558 | TargetRoot = O.Nodes.AppendEmplace( |
559 | args: nullptr, args: NodeIdPairArray(*O.NodeIdPairAllocator), args: 0u, args: 0u, |
560 | args&: Root->FId); |
561 | if (UNLIKELY(TargetRoot == nullptr)) |
562 | return; |
563 | |
564 | O.Roots.Append(E: TargetRoot); |
565 | } else { |
566 | TargetRoot = *R; |
567 | } |
568 | |
569 | DFSStack.AppendEmplace(args: Root, args&: TargetRoot); |
570 | while (!DFSStack.empty()) { |
571 | NodeAndTarget NT = DFSStack.back(); |
572 | DCHECK_NE(NT.OrigNode, nullptr); |
573 | DCHECK_NE(NT.TargetNode, nullptr); |
574 | DFSStack.trim(Elements: 1); |
575 | // TODO: Update the histogram as well when we have it ready. |
576 | NT.TargetNode->CallCount += NT.OrigNode->CallCount; |
577 | NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; |
578 | for (const auto Callee : NT.OrigNode->Callees) { |
579 | auto TargetCallee = NT.TargetNode->Callees.find_element( |
580 | P: [&](const FunctionCallTrie::NodeIdPair &C) { |
581 | return C.FId == Callee.FId; |
582 | }); |
583 | if (TargetCallee == nullptr) { |
584 | auto NewTargetNode = O.Nodes.AppendEmplace( |
585 | args&: NT.TargetNode, args: NodeIdPairArray(*O.NodeIdPairAllocator), args: 0u, args: 0u, |
586 | args: Callee.FId); |
587 | |
588 | if (UNLIKELY(NewTargetNode == nullptr)) |
589 | return; |
590 | |
591 | TargetCallee = |
592 | NT.TargetNode->Callees.AppendEmplace(args&: NewTargetNode, args: Callee.FId); |
593 | } |
594 | DFSStack.AppendEmplace(args: Callee.NodePtr, args&: TargetCallee->NodePtr); |
595 | } |
596 | } |
597 | } |
598 | } |
599 | }; |
600 | |
601 | } // namespace __xray |
602 | |
603 | #endif // XRAY_FUNCTION_CALL_TRIE_H |
604 | |