1 | //===-- function_call_trie_test.cpp ---------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of XRay, a function call tracing system. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | #include "xray_function_call_trie.h" |
13 | #include "gtest/gtest.h" |
14 | #include <cstdint> |
15 | |
16 | namespace __xray { |
17 | |
18 | namespace { |
19 | |
20 | TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) { |
21 | profilingFlags()->setDefaults(); |
22 | FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators(); |
23 | FunctionCallTrie Trie(Allocators); |
24 | } |
25 | |
26 | TEST(FunctionCallTrieTest, EnterAndExitFunction) { |
27 | profilingFlags()->setDefaults(); |
28 | auto A = FunctionCallTrie::InitAllocators(); |
29 | FunctionCallTrie Trie(A); |
30 | |
31 | uint64_t TSC = 1; |
32 | uint16_t CPU = 0; |
33 | Trie.enterFunction(FId: 1, TSC: TSC++, CPU: CPU++); |
34 | Trie.exitFunction(FId: 1, TSC: TSC++, CPU: CPU++); |
35 | const auto &R = Trie.getRoots(); |
36 | |
37 | ASSERT_EQ(R.size(), 1u); |
38 | ASSERT_EQ(R.front()->FId, 1); |
39 | ASSERT_EQ(R.front()->CallCount, 1u); |
40 | ASSERT_EQ(R.front()->CumulativeLocalTime, 1u); |
41 | } |
42 | |
43 | TEST(FunctionCallTrieTest, HandleTSCOverflow) { |
44 | profilingFlags()->setDefaults(); |
45 | auto A = FunctionCallTrie::InitAllocators(); |
46 | FunctionCallTrie Trie(A); |
47 | |
48 | Trie.enterFunction(FId: 1, TSC: std::numeric_limits<uint64_t>::max(), CPU: 0); |
49 | Trie.exitFunction(FId: 1, TSC: 1, CPU: 0); |
50 | const auto &R = Trie.getRoots(); |
51 | |
52 | ASSERT_EQ(R.size(), 1u); |
53 | ASSERT_EQ(R.front()->FId, 1); |
54 | ASSERT_EQ(R.front()->CallCount, 1u); |
55 | ASSERT_EQ(R.front()->CumulativeLocalTime, 1u); |
56 | } |
57 | |
58 | TEST(FunctionCallTrieTest, MaximalCumulativeTime) { |
59 | profilingFlags()->setDefaults(); |
60 | auto A = FunctionCallTrie::InitAllocators(); |
61 | FunctionCallTrie Trie(A); |
62 | |
63 | Trie.enterFunction(FId: 1, TSC: 1, CPU: 0); |
64 | Trie.exitFunction(FId: 1, TSC: 0, CPU: 0); |
65 | const auto &R = Trie.getRoots(); |
66 | |
67 | ASSERT_EQ(R.size(), 1u); |
68 | ASSERT_EQ(R.front()->FId, 1); |
69 | ASSERT_EQ(R.front()->CallCount, 1u); |
70 | ASSERT_EQ(R.front()->CumulativeLocalTime, |
71 | std::numeric_limits<uint64_t>::max() - 1); |
72 | } |
73 | |
74 | TEST(FunctionCallTrieTest, MissingFunctionEntry) { |
75 | profilingFlags()->setDefaults(); |
76 | auto A = FunctionCallTrie::InitAllocators(); |
77 | FunctionCallTrie Trie(A); |
78 | Trie.exitFunction(FId: 1, TSC: 1, CPU: 0); |
79 | const auto &R = Trie.getRoots(); |
80 | |
81 | ASSERT_TRUE(R.empty()); |
82 | } |
83 | |
84 | TEST(FunctionCallTrieTest, NoMatchingEntersForExit) { |
85 | profilingFlags()->setDefaults(); |
86 | auto A = FunctionCallTrie::InitAllocators(); |
87 | FunctionCallTrie Trie(A); |
88 | Trie.enterFunction(FId: 2, TSC: 1, CPU: 0); |
89 | Trie.enterFunction(FId: 3, TSC: 3, CPU: 0); |
90 | Trie.exitFunction(FId: 1, TSC: 5, CPU: 0); |
91 | const auto &R = Trie.getRoots(); |
92 | |
93 | ASSERT_FALSE(R.empty()); |
94 | EXPECT_EQ(R.size(), size_t{1}); |
95 | } |
96 | |
97 | TEST(FunctionCallTrieTest, MissingFunctionExit) { |
98 | profilingFlags()->setDefaults(); |
99 | auto A = FunctionCallTrie::InitAllocators(); |
100 | FunctionCallTrie Trie(A); |
101 | Trie.enterFunction(FId: 1, TSC: 1, CPU: 0); |
102 | const auto &R = Trie.getRoots(); |
103 | |
104 | ASSERT_FALSE(R.empty()); |
105 | EXPECT_EQ(R.size(), size_t{1}); |
106 | } |
107 | |
108 | TEST(FunctionCallTrieTest, MultipleRoots) { |
109 | profilingFlags()->setDefaults(); |
110 | auto A = FunctionCallTrie::InitAllocators(); |
111 | FunctionCallTrie Trie(A); |
112 | |
113 | // Enter and exit FId = 1. |
114 | Trie.enterFunction(FId: 1, TSC: 1, CPU: 0); |
115 | Trie.exitFunction(FId: 1, TSC: 2, CPU: 0); |
116 | |
117 | // Enter and exit FId = 2. |
118 | Trie.enterFunction(FId: 2, TSC: 3, CPU: 0); |
119 | Trie.exitFunction(FId: 2, TSC: 4, CPU: 0); |
120 | |
121 | const auto &R = Trie.getRoots(); |
122 | ASSERT_FALSE(R.empty()); |
123 | ASSERT_EQ(R.size(), 2u); |
124 | |
125 | // Make sure the roots have different IDs. |
126 | const auto R0 = R[0]; |
127 | const auto R1 = R[1]; |
128 | ASSERT_NE(R0->FId, R1->FId); |
129 | |
130 | // Inspect the roots that they have the right data. |
131 | ASSERT_NE(R0, nullptr); |
132 | EXPECT_EQ(R0->CallCount, 1u); |
133 | EXPECT_EQ(R0->CumulativeLocalTime, 1u); |
134 | |
135 | ASSERT_NE(R1, nullptr); |
136 | EXPECT_EQ(R1->CallCount, 1u); |
137 | EXPECT_EQ(R1->CumulativeLocalTime, 1u); |
138 | } |
139 | |
140 | // While missing an intermediary entry may be rare in practice, we still enforce |
141 | // that we can handle the case where we've missed the entry event somehow, in |
142 | // between call entry/exits. To illustrate, imagine the following shadow call |
143 | // stack: |
144 | // |
145 | // f0@t0 -> f1@t1 -> f2@t2 |
146 | // |
147 | // If for whatever reason we see an exit for `f2` @ t3, followed by an exit for |
148 | // `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of |
149 | // accounting local time to `f2` from d = (t3 - t2), then local time to `f1` |
150 | // as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'. |
151 | TEST(FunctionCallTrieTest, MissingIntermediaryExit) { |
152 | profilingFlags()->setDefaults(); |
153 | auto A = FunctionCallTrie::InitAllocators(); |
154 | FunctionCallTrie Trie(A); |
155 | |
156 | Trie.enterFunction(FId: 1, TSC: 0, CPU: 0); |
157 | Trie.enterFunction(FId: 2, TSC: 100, CPU: 0); |
158 | Trie.enterFunction(FId: 3, TSC: 200, CPU: 0); |
159 | Trie.exitFunction(FId: 3, TSC: 300, CPU: 0); |
160 | Trie.exitFunction(FId: 1, TSC: 400, CPU: 0); |
161 | |
162 | // What we should see at this point is all the functions in the trie in a |
163 | // specific order (1 -> 2 -> 3) with the appropriate count(s) and local |
164 | // latencies. |
165 | const auto &R = Trie.getRoots(); |
166 | ASSERT_FALSE(R.empty()); |
167 | ASSERT_EQ(R.size(), 1u); |
168 | |
169 | const auto &F1 = *R[0]; |
170 | ASSERT_EQ(F1.FId, 1); |
171 | ASSERT_FALSE(F1.Callees.empty()); |
172 | |
173 | const auto &F2 = *F1.Callees[0].NodePtr; |
174 | ASSERT_EQ(F2.FId, 2); |
175 | ASSERT_FALSE(F2.Callees.empty()); |
176 | |
177 | const auto &F3 = *F2.Callees[0].NodePtr; |
178 | ASSERT_EQ(F3.FId, 3); |
179 | ASSERT_TRUE(F3.Callees.empty()); |
180 | |
181 | // Now that we've established the preconditions, we check for specific aspects |
182 | // of the nodes. |
183 | EXPECT_EQ(F3.CallCount, 1u); |
184 | EXPECT_EQ(F2.CallCount, 1u); |
185 | EXPECT_EQ(F1.CallCount, 1u); |
186 | EXPECT_EQ(F3.CumulativeLocalTime, 100u); |
187 | EXPECT_EQ(F2.CumulativeLocalTime, 300u); |
188 | EXPECT_EQ(F1.CumulativeLocalTime, 100u); |
189 | } |
190 | |
191 | TEST(FunctionCallTrieTest, DeepCallStack) { |
192 | // Simulate a relatively deep call stack (32 levels) and ensure that we can |
193 | // properly pop all the way up the stack. |
194 | profilingFlags()->setDefaults(); |
195 | auto A = FunctionCallTrie::InitAllocators(); |
196 | FunctionCallTrie Trie(A); |
197 | for (int i = 0; i < 32; ++i) |
198 | Trie.enterFunction(FId: i + 1, TSC: i, CPU: 0); |
199 | Trie.exitFunction(FId: 1, TSC: 33, CPU: 0); |
200 | |
201 | // Here, validate that we have a 32-level deep function call path from the |
202 | // root (1) down to the leaf (33). |
203 | const auto &R = Trie.getRoots(); |
204 | ASSERT_EQ(R.size(), 1u); |
205 | auto F = R[0]; |
206 | for (int i = 0; i < 32; ++i) { |
207 | EXPECT_EQ(F->FId, i + 1); |
208 | EXPECT_EQ(F->CallCount, 1u); |
209 | if (F->Callees.empty() && i != 31) |
210 | FAIL() << "Empty callees for FId " << F->FId; |
211 | if (i != 31) |
212 | F = F->Callees[0].NodePtr; |
213 | } |
214 | } |
215 | |
216 | // TODO: Test that we can handle cross-CPU migrations, where TSCs are not |
217 | // guaranteed to be synchronised. |
218 | TEST(FunctionCallTrieTest, DeepCopy) { |
219 | profilingFlags()->setDefaults(); |
220 | auto A = FunctionCallTrie::InitAllocators(); |
221 | FunctionCallTrie Trie(A); |
222 | |
223 | Trie.enterFunction(FId: 1, TSC: 0, CPU: 0); |
224 | Trie.enterFunction(FId: 2, TSC: 1, CPU: 0); |
225 | Trie.exitFunction(FId: 2, TSC: 2, CPU: 0); |
226 | Trie.enterFunction(FId: 3, TSC: 3, CPU: 0); |
227 | Trie.exitFunction(FId: 3, TSC: 4, CPU: 0); |
228 | Trie.exitFunction(FId: 1, TSC: 5, CPU: 0); |
229 | |
230 | // We want to make a deep copy and compare notes. |
231 | auto B = FunctionCallTrie::InitAllocators(); |
232 | FunctionCallTrie Copy(B); |
233 | Trie.deepCopyInto(O&: Copy); |
234 | |
235 | ASSERT_NE(Trie.getRoots().size(), 0u); |
236 | ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size()); |
237 | const auto &R0Orig = *Trie.getRoots()[0]; |
238 | const auto &R0Copy = *Copy.getRoots()[0]; |
239 | EXPECT_EQ(R0Orig.FId, 1); |
240 | EXPECT_EQ(R0Orig.FId, R0Copy.FId); |
241 | |
242 | ASSERT_EQ(R0Orig.Callees.size(), 2u); |
243 | ASSERT_EQ(R0Copy.Callees.size(), 2u); |
244 | |
245 | const auto &F1Orig = |
246 | *R0Orig.Callees |
247 | .find_element( |
248 | P: [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; }) |
249 | ->NodePtr; |
250 | const auto &F1Copy = |
251 | *R0Copy.Callees |
252 | .find_element( |
253 | P: [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; }) |
254 | ->NodePtr; |
255 | EXPECT_EQ(&R0Orig, F1Orig.Parent); |
256 | EXPECT_EQ(&R0Copy, F1Copy.Parent); |
257 | } |
258 | |
259 | TEST(FunctionCallTrieTest, MergeInto) { |
260 | profilingFlags()->setDefaults(); |
261 | auto A = FunctionCallTrie::InitAllocators(); |
262 | FunctionCallTrie T0(A); |
263 | FunctionCallTrie T1(A); |
264 | |
265 | // 1 -> 2 -> 3 |
266 | T0.enterFunction(FId: 1, TSC: 0, CPU: 0); |
267 | T0.enterFunction(FId: 2, TSC: 1, CPU: 0); |
268 | T0.enterFunction(FId: 3, TSC: 2, CPU: 0); |
269 | T0.exitFunction(FId: 3, TSC: 3, CPU: 0); |
270 | T0.exitFunction(FId: 2, TSC: 4, CPU: 0); |
271 | T0.exitFunction(FId: 1, TSC: 5, CPU: 0); |
272 | |
273 | // 1 -> 2 -> 3 |
274 | T1.enterFunction(FId: 1, TSC: 0, CPU: 0); |
275 | T1.enterFunction(FId: 2, TSC: 1, CPU: 0); |
276 | T1.enterFunction(FId: 3, TSC: 2, CPU: 0); |
277 | T1.exitFunction(FId: 3, TSC: 3, CPU: 0); |
278 | T1.exitFunction(FId: 2, TSC: 4, CPU: 0); |
279 | T1.exitFunction(FId: 1, TSC: 5, CPU: 0); |
280 | |
281 | // We use a different allocator here to make sure that we're able to transfer |
282 | // data into a FunctionCallTrie which uses a different allocator. This |
283 | // reflects the intended usage scenario for when we're collecting profiles |
284 | // that aggregate across threads. |
285 | auto B = FunctionCallTrie::InitAllocators(); |
286 | FunctionCallTrie Merged(B); |
287 | |
288 | T0.mergeInto(O&: Merged); |
289 | T1.mergeInto(O&: Merged); |
290 | |
291 | ASSERT_EQ(Merged.getRoots().size(), 1u); |
292 | const auto &R0 = *Merged.getRoots()[0]; |
293 | EXPECT_EQ(R0.FId, 1); |
294 | EXPECT_EQ(R0.CallCount, 2u); |
295 | EXPECT_EQ(R0.CumulativeLocalTime, 10u); |
296 | EXPECT_EQ(R0.Callees.size(), 1u); |
297 | |
298 | const auto &F1 = *R0.Callees[0].NodePtr; |
299 | EXPECT_EQ(F1.FId, 2); |
300 | EXPECT_EQ(F1.CallCount, 2u); |
301 | EXPECT_EQ(F1.CumulativeLocalTime, 6u); |
302 | EXPECT_EQ(F1.Callees.size(), 1u); |
303 | |
304 | const auto &F2 = *F1.Callees[0].NodePtr; |
305 | EXPECT_EQ(F2.FId, 3); |
306 | EXPECT_EQ(F2.CallCount, 2u); |
307 | EXPECT_EQ(F2.CumulativeLocalTime, 2u); |
308 | EXPECT_EQ(F2.Callees.size(), 0u); |
309 | } |
310 | |
311 | TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) { |
312 | profilingFlags()->setDefaults(); |
313 | typename std::aligned_storage<sizeof(FunctionCallTrie::Allocators), |
314 | alignof(FunctionCallTrie::Allocators)>::type |
315 | ; |
316 | new (&AllocatorsStorage) |
317 | FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); |
318 | auto *A = |
319 | reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage); |
320 | |
321 | typename std::aligned_storage<sizeof(FunctionCallTrie), |
322 | alignof(FunctionCallTrie)>::type FCTStorage; |
323 | new (&FCTStorage) FunctionCallTrie(*A); |
324 | auto *T = reinterpret_cast<FunctionCallTrie *>(&FCTStorage); |
325 | |
326 | // Put some data into it. |
327 | T->enterFunction(FId: 1, TSC: 0, CPU: 0); |
328 | T->exitFunction(FId: 1, TSC: 1, CPU: 0); |
329 | |
330 | // Re-initialize the objects in storage. |
331 | T->~FunctionCallTrie(); |
332 | A->~Allocators(); |
333 | new (A) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); |
334 | new (T) FunctionCallTrie(*A); |
335 | |
336 | // Then put some data into it again. |
337 | T->enterFunction(FId: 1, TSC: 0, CPU: 0); |
338 | T->exitFunction(FId: 1, TSC: 1, CPU: 0); |
339 | } |
340 | |
341 | } // namespace |
342 | |
343 | } // namespace __xray |
344 | |