1//===-- function_call_trie_test.cpp ---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of XRay, a function call tracing system.
10//
11//===----------------------------------------------------------------------===//
12#include "xray_function_call_trie.h"
13#include "gtest/gtest.h"
14#include <cstdint>
15
16namespace __xray {
17
18namespace {
19
20TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) {
21 profilingFlags()->setDefaults();
22 FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators();
23 FunctionCallTrie Trie(Allocators);
24}
25
26TEST(FunctionCallTrieTest, EnterAndExitFunction) {
27 profilingFlags()->setDefaults();
28 auto A = FunctionCallTrie::InitAllocators();
29 FunctionCallTrie Trie(A);
30
31 uint64_t TSC = 1;
32 uint16_t CPU = 0;
33 Trie.enterFunction(FId: 1, TSC: TSC++, CPU: CPU++);
34 Trie.exitFunction(FId: 1, TSC: TSC++, CPU: CPU++);
35 const auto &R = Trie.getRoots();
36
37 ASSERT_EQ(R.size(), 1u);
38 ASSERT_EQ(R.front()->FId, 1);
39 ASSERT_EQ(R.front()->CallCount, 1u);
40 ASSERT_EQ(R.front()->CumulativeLocalTime, 1u);
41}
42
43TEST(FunctionCallTrieTest, HandleTSCOverflow) {
44 profilingFlags()->setDefaults();
45 auto A = FunctionCallTrie::InitAllocators();
46 FunctionCallTrie Trie(A);
47
48 Trie.enterFunction(FId: 1, TSC: std::numeric_limits<uint64_t>::max(), CPU: 0);
49 Trie.exitFunction(FId: 1, TSC: 1, CPU: 0);
50 const auto &R = Trie.getRoots();
51
52 ASSERT_EQ(R.size(), 1u);
53 ASSERT_EQ(R.front()->FId, 1);
54 ASSERT_EQ(R.front()->CallCount, 1u);
55 ASSERT_EQ(R.front()->CumulativeLocalTime, 1u);
56}
57
58TEST(FunctionCallTrieTest, MaximalCumulativeTime) {
59 profilingFlags()->setDefaults();
60 auto A = FunctionCallTrie::InitAllocators();
61 FunctionCallTrie Trie(A);
62
63 Trie.enterFunction(FId: 1, TSC: 1, CPU: 0);
64 Trie.exitFunction(FId: 1, TSC: 0, CPU: 0);
65 const auto &R = Trie.getRoots();
66
67 ASSERT_EQ(R.size(), 1u);
68 ASSERT_EQ(R.front()->FId, 1);
69 ASSERT_EQ(R.front()->CallCount, 1u);
70 ASSERT_EQ(R.front()->CumulativeLocalTime,
71 std::numeric_limits<uint64_t>::max() - 1);
72}
73
74TEST(FunctionCallTrieTest, MissingFunctionEntry) {
75 profilingFlags()->setDefaults();
76 auto A = FunctionCallTrie::InitAllocators();
77 FunctionCallTrie Trie(A);
78 Trie.exitFunction(FId: 1, TSC: 1, CPU: 0);
79 const auto &R = Trie.getRoots();
80
81 ASSERT_TRUE(R.empty());
82}
83
84TEST(FunctionCallTrieTest, NoMatchingEntersForExit) {
85 profilingFlags()->setDefaults();
86 auto A = FunctionCallTrie::InitAllocators();
87 FunctionCallTrie Trie(A);
88 Trie.enterFunction(FId: 2, TSC: 1, CPU: 0);
89 Trie.enterFunction(FId: 3, TSC: 3, CPU: 0);
90 Trie.exitFunction(FId: 1, TSC: 5, CPU: 0);
91 const auto &R = Trie.getRoots();
92
93 ASSERT_FALSE(R.empty());
94 EXPECT_EQ(R.size(), size_t{1});
95}
96
97TEST(FunctionCallTrieTest, MissingFunctionExit) {
98 profilingFlags()->setDefaults();
99 auto A = FunctionCallTrie::InitAllocators();
100 FunctionCallTrie Trie(A);
101 Trie.enterFunction(FId: 1, TSC: 1, CPU: 0);
102 const auto &R = Trie.getRoots();
103
104 ASSERT_FALSE(R.empty());
105 EXPECT_EQ(R.size(), size_t{1});
106}
107
108TEST(FunctionCallTrieTest, MultipleRoots) {
109 profilingFlags()->setDefaults();
110 auto A = FunctionCallTrie::InitAllocators();
111 FunctionCallTrie Trie(A);
112
113 // Enter and exit FId = 1.
114 Trie.enterFunction(FId: 1, TSC: 1, CPU: 0);
115 Trie.exitFunction(FId: 1, TSC: 2, CPU: 0);
116
117 // Enter and exit FId = 2.
118 Trie.enterFunction(FId: 2, TSC: 3, CPU: 0);
119 Trie.exitFunction(FId: 2, TSC: 4, CPU: 0);
120
121 const auto &R = Trie.getRoots();
122 ASSERT_FALSE(R.empty());
123 ASSERT_EQ(R.size(), 2u);
124
125 // Make sure the roots have different IDs.
126 const auto R0 = R[0];
127 const auto R1 = R[1];
128 ASSERT_NE(R0->FId, R1->FId);
129
130 // Inspect the roots that they have the right data.
131 ASSERT_NE(R0, nullptr);
132 EXPECT_EQ(R0->CallCount, 1u);
133 EXPECT_EQ(R0->CumulativeLocalTime, 1u);
134
135 ASSERT_NE(R1, nullptr);
136 EXPECT_EQ(R1->CallCount, 1u);
137 EXPECT_EQ(R1->CumulativeLocalTime, 1u);
138}
139
140// While missing an intermediary entry may be rare in practice, we still enforce
141// that we can handle the case where we've missed the entry event somehow, in
142// between call entry/exits. To illustrate, imagine the following shadow call
143// stack:
144//
145// f0@t0 -> f1@t1 -> f2@t2
146//
147// If for whatever reason we see an exit for `f2` @ t3, followed by an exit for
148// `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of
149// accounting local time to `f2` from d = (t3 - t2), then local time to `f1`
150// as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'.
151TEST(FunctionCallTrieTest, MissingIntermediaryExit) {
152 profilingFlags()->setDefaults();
153 auto A = FunctionCallTrie::InitAllocators();
154 FunctionCallTrie Trie(A);
155
156 Trie.enterFunction(FId: 1, TSC: 0, CPU: 0);
157 Trie.enterFunction(FId: 2, TSC: 100, CPU: 0);
158 Trie.enterFunction(FId: 3, TSC: 200, CPU: 0);
159 Trie.exitFunction(FId: 3, TSC: 300, CPU: 0);
160 Trie.exitFunction(FId: 1, TSC: 400, CPU: 0);
161
162 // What we should see at this point is all the functions in the trie in a
163 // specific order (1 -> 2 -> 3) with the appropriate count(s) and local
164 // latencies.
165 const auto &R = Trie.getRoots();
166 ASSERT_FALSE(R.empty());
167 ASSERT_EQ(R.size(), 1u);
168
169 const auto &F1 = *R[0];
170 ASSERT_EQ(F1.FId, 1);
171 ASSERT_FALSE(F1.Callees.empty());
172
173 const auto &F2 = *F1.Callees[0].NodePtr;
174 ASSERT_EQ(F2.FId, 2);
175 ASSERT_FALSE(F2.Callees.empty());
176
177 const auto &F3 = *F2.Callees[0].NodePtr;
178 ASSERT_EQ(F3.FId, 3);
179 ASSERT_TRUE(F3.Callees.empty());
180
181 // Now that we've established the preconditions, we check for specific aspects
182 // of the nodes.
183 EXPECT_EQ(F3.CallCount, 1u);
184 EXPECT_EQ(F2.CallCount, 1u);
185 EXPECT_EQ(F1.CallCount, 1u);
186 EXPECT_EQ(F3.CumulativeLocalTime, 100u);
187 EXPECT_EQ(F2.CumulativeLocalTime, 300u);
188 EXPECT_EQ(F1.CumulativeLocalTime, 100u);
189}
190
191TEST(FunctionCallTrieTest, DeepCallStack) {
192 // Simulate a relatively deep call stack (32 levels) and ensure that we can
193 // properly pop all the way up the stack.
194 profilingFlags()->setDefaults();
195 auto A = FunctionCallTrie::InitAllocators();
196 FunctionCallTrie Trie(A);
197 for (int i = 0; i < 32; ++i)
198 Trie.enterFunction(FId: i + 1, TSC: i, CPU: 0);
199 Trie.exitFunction(FId: 1, TSC: 33, CPU: 0);
200
201 // Here, validate that we have a 32-level deep function call path from the
202 // root (1) down to the leaf (33).
203 const auto &R = Trie.getRoots();
204 ASSERT_EQ(R.size(), 1u);
205 auto F = R[0];
206 for (int i = 0; i < 32; ++i) {
207 EXPECT_EQ(F->FId, i + 1);
208 EXPECT_EQ(F->CallCount, 1u);
209 if (F->Callees.empty() && i != 31)
210 FAIL() << "Empty callees for FId " << F->FId;
211 if (i != 31)
212 F = F->Callees[0].NodePtr;
213 }
214}
215
216// TODO: Test that we can handle cross-CPU migrations, where TSCs are not
217// guaranteed to be synchronised.
218TEST(FunctionCallTrieTest, DeepCopy) {
219 profilingFlags()->setDefaults();
220 auto A = FunctionCallTrie::InitAllocators();
221 FunctionCallTrie Trie(A);
222
223 Trie.enterFunction(FId: 1, TSC: 0, CPU: 0);
224 Trie.enterFunction(FId: 2, TSC: 1, CPU: 0);
225 Trie.exitFunction(FId: 2, TSC: 2, CPU: 0);
226 Trie.enterFunction(FId: 3, TSC: 3, CPU: 0);
227 Trie.exitFunction(FId: 3, TSC: 4, CPU: 0);
228 Trie.exitFunction(FId: 1, TSC: 5, CPU: 0);
229
230 // We want to make a deep copy and compare notes.
231 auto B = FunctionCallTrie::InitAllocators();
232 FunctionCallTrie Copy(B);
233 Trie.deepCopyInto(O&: Copy);
234
235 ASSERT_NE(Trie.getRoots().size(), 0u);
236 ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size());
237 const auto &R0Orig = *Trie.getRoots()[0];
238 const auto &R0Copy = *Copy.getRoots()[0];
239 EXPECT_EQ(R0Orig.FId, 1);
240 EXPECT_EQ(R0Orig.FId, R0Copy.FId);
241
242 ASSERT_EQ(R0Orig.Callees.size(), 2u);
243 ASSERT_EQ(R0Copy.Callees.size(), 2u);
244
245 const auto &F1Orig =
246 *R0Orig.Callees
247 .find_element(
248 P: [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
249 ->NodePtr;
250 const auto &F1Copy =
251 *R0Copy.Callees
252 .find_element(
253 P: [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
254 ->NodePtr;
255 EXPECT_EQ(&R0Orig, F1Orig.Parent);
256 EXPECT_EQ(&R0Copy, F1Copy.Parent);
257}
258
259TEST(FunctionCallTrieTest, MergeInto) {
260 profilingFlags()->setDefaults();
261 auto A = FunctionCallTrie::InitAllocators();
262 FunctionCallTrie T0(A);
263 FunctionCallTrie T1(A);
264
265 // 1 -> 2 -> 3
266 T0.enterFunction(FId: 1, TSC: 0, CPU: 0);
267 T0.enterFunction(FId: 2, TSC: 1, CPU: 0);
268 T0.enterFunction(FId: 3, TSC: 2, CPU: 0);
269 T0.exitFunction(FId: 3, TSC: 3, CPU: 0);
270 T0.exitFunction(FId: 2, TSC: 4, CPU: 0);
271 T0.exitFunction(FId: 1, TSC: 5, CPU: 0);
272
273 // 1 -> 2 -> 3
274 T1.enterFunction(FId: 1, TSC: 0, CPU: 0);
275 T1.enterFunction(FId: 2, TSC: 1, CPU: 0);
276 T1.enterFunction(FId: 3, TSC: 2, CPU: 0);
277 T1.exitFunction(FId: 3, TSC: 3, CPU: 0);
278 T1.exitFunction(FId: 2, TSC: 4, CPU: 0);
279 T1.exitFunction(FId: 1, TSC: 5, CPU: 0);
280
281 // We use a different allocator here to make sure that we're able to transfer
282 // data into a FunctionCallTrie which uses a different allocator. This
283 // reflects the intended usage scenario for when we're collecting profiles
284 // that aggregate across threads.
285 auto B = FunctionCallTrie::InitAllocators();
286 FunctionCallTrie Merged(B);
287
288 T0.mergeInto(O&: Merged);
289 T1.mergeInto(O&: Merged);
290
291 ASSERT_EQ(Merged.getRoots().size(), 1u);
292 const auto &R0 = *Merged.getRoots()[0];
293 EXPECT_EQ(R0.FId, 1);
294 EXPECT_EQ(R0.CallCount, 2u);
295 EXPECT_EQ(R0.CumulativeLocalTime, 10u);
296 EXPECT_EQ(R0.Callees.size(), 1u);
297
298 const auto &F1 = *R0.Callees[0].NodePtr;
299 EXPECT_EQ(F1.FId, 2);
300 EXPECT_EQ(F1.CallCount, 2u);
301 EXPECT_EQ(F1.CumulativeLocalTime, 6u);
302 EXPECT_EQ(F1.Callees.size(), 1u);
303
304 const auto &F2 = *F1.Callees[0].NodePtr;
305 EXPECT_EQ(F2.FId, 3);
306 EXPECT_EQ(F2.CallCount, 2u);
307 EXPECT_EQ(F2.CumulativeLocalTime, 2u);
308 EXPECT_EQ(F2.Callees.size(), 0u);
309}
310
311TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) {
312 profilingFlags()->setDefaults();
313 typename std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
314 alignof(FunctionCallTrie::Allocators)>::type
315 AllocatorsStorage;
316 new (&AllocatorsStorage)
317 FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
318 auto *A =
319 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage);
320
321 typename std::aligned_storage<sizeof(FunctionCallTrie),
322 alignof(FunctionCallTrie)>::type FCTStorage;
323 new (&FCTStorage) FunctionCallTrie(*A);
324 auto *T = reinterpret_cast<FunctionCallTrie *>(&FCTStorage);
325
326 // Put some data into it.
327 T->enterFunction(FId: 1, TSC: 0, CPU: 0);
328 T->exitFunction(FId: 1, TSC: 1, CPU: 0);
329
330 // Re-initialize the objects in storage.
331 T->~FunctionCallTrie();
332 A->~Allocators();
333 new (A) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators());
334 new (T) FunctionCallTrie(*A);
335
336 // Then put some data into it again.
337 T->enterFunction(FId: 1, TSC: 0, CPU: 0);
338 T->exitFunction(FId: 1, TSC: 1, CPU: 0);
339}
340
341} // namespace
342
343} // namespace __xray
344

source code of compiler-rt/lib/xray/tests/unit/function_call_trie_test.cpp