| 1 | //===-- function_call_trie_test.cpp ---------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file is a part of XRay, a function call tracing system. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | #include "xray_function_call_trie.h" |
| 13 | #include "gtest/gtest.h" |
| 14 | #include <cstdint> |
| 15 | |
| 16 | namespace __xray { |
| 17 | |
| 18 | namespace { |
| 19 | |
| 20 | TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) { |
| 21 | profilingFlags()->setDefaults(); |
| 22 | FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators(); |
| 23 | FunctionCallTrie Trie(Allocators); |
| 24 | } |
| 25 | |
| 26 | TEST(FunctionCallTrieTest, EnterAndExitFunction) { |
| 27 | profilingFlags()->setDefaults(); |
| 28 | auto A = FunctionCallTrie::InitAllocators(); |
| 29 | FunctionCallTrie Trie(A); |
| 30 | |
| 31 | uint64_t TSC = 1; |
| 32 | uint16_t CPU = 0; |
| 33 | Trie.enterFunction(FId: 1, TSC: TSC++, CPU: CPU++); |
| 34 | Trie.exitFunction(FId: 1, TSC: TSC++, CPU: CPU++); |
| 35 | const auto &R = Trie.getRoots(); |
| 36 | |
| 37 | ASSERT_EQ(R.size(), 1u); |
| 38 | ASSERT_EQ(R.front()->FId, 1); |
| 39 | ASSERT_EQ(R.front()->CallCount, 1u); |
| 40 | ASSERT_EQ(R.front()->CumulativeLocalTime, 1u); |
| 41 | } |
| 42 | |
| 43 | TEST(FunctionCallTrieTest, HandleTSCOverflow) { |
| 44 | profilingFlags()->setDefaults(); |
| 45 | auto A = FunctionCallTrie::InitAllocators(); |
| 46 | FunctionCallTrie Trie(A); |
| 47 | |
| 48 | Trie.enterFunction(FId: 1, TSC: std::numeric_limits<uint64_t>::max(), CPU: 0); |
| 49 | Trie.exitFunction(FId: 1, TSC: 1, CPU: 0); |
| 50 | const auto &R = Trie.getRoots(); |
| 51 | |
| 52 | ASSERT_EQ(R.size(), 1u); |
| 53 | ASSERT_EQ(R.front()->FId, 1); |
| 54 | ASSERT_EQ(R.front()->CallCount, 1u); |
| 55 | ASSERT_EQ(R.front()->CumulativeLocalTime, 1u); |
| 56 | } |
| 57 | |
| 58 | TEST(FunctionCallTrieTest, MaximalCumulativeTime) { |
| 59 | profilingFlags()->setDefaults(); |
| 60 | auto A = FunctionCallTrie::InitAllocators(); |
| 61 | FunctionCallTrie Trie(A); |
| 62 | |
| 63 | Trie.enterFunction(FId: 1, TSC: 1, CPU: 0); |
| 64 | Trie.exitFunction(FId: 1, TSC: 0, CPU: 0); |
| 65 | const auto &R = Trie.getRoots(); |
| 66 | |
| 67 | ASSERT_EQ(R.size(), 1u); |
| 68 | ASSERT_EQ(R.front()->FId, 1); |
| 69 | ASSERT_EQ(R.front()->CallCount, 1u); |
| 70 | ASSERT_EQ(R.front()->CumulativeLocalTime, |
| 71 | std::numeric_limits<uint64_t>::max() - 1); |
| 72 | } |
| 73 | |
| 74 | TEST(FunctionCallTrieTest, MissingFunctionEntry) { |
| 75 | profilingFlags()->setDefaults(); |
| 76 | auto A = FunctionCallTrie::InitAllocators(); |
| 77 | FunctionCallTrie Trie(A); |
| 78 | Trie.exitFunction(FId: 1, TSC: 1, CPU: 0); |
| 79 | const auto &R = Trie.getRoots(); |
| 80 | |
| 81 | ASSERT_TRUE(R.empty()); |
| 82 | } |
| 83 | |
| 84 | TEST(FunctionCallTrieTest, NoMatchingEntersForExit) { |
| 85 | profilingFlags()->setDefaults(); |
| 86 | auto A = FunctionCallTrie::InitAllocators(); |
| 87 | FunctionCallTrie Trie(A); |
| 88 | Trie.enterFunction(FId: 2, TSC: 1, CPU: 0); |
| 89 | Trie.enterFunction(FId: 3, TSC: 3, CPU: 0); |
| 90 | Trie.exitFunction(FId: 1, TSC: 5, CPU: 0); |
| 91 | const auto &R = Trie.getRoots(); |
| 92 | |
| 93 | ASSERT_FALSE(R.empty()); |
| 94 | EXPECT_EQ(R.size(), size_t{1}); |
| 95 | } |
| 96 | |
| 97 | TEST(FunctionCallTrieTest, MissingFunctionExit) { |
| 98 | profilingFlags()->setDefaults(); |
| 99 | auto A = FunctionCallTrie::InitAllocators(); |
| 100 | FunctionCallTrie Trie(A); |
| 101 | Trie.enterFunction(FId: 1, TSC: 1, CPU: 0); |
| 102 | const auto &R = Trie.getRoots(); |
| 103 | |
| 104 | ASSERT_FALSE(R.empty()); |
| 105 | EXPECT_EQ(R.size(), size_t{1}); |
| 106 | } |
| 107 | |
| 108 | TEST(FunctionCallTrieTest, MultipleRoots) { |
| 109 | profilingFlags()->setDefaults(); |
| 110 | auto A = FunctionCallTrie::InitAllocators(); |
| 111 | FunctionCallTrie Trie(A); |
| 112 | |
| 113 | // Enter and exit FId = 1. |
| 114 | Trie.enterFunction(FId: 1, TSC: 1, CPU: 0); |
| 115 | Trie.exitFunction(FId: 1, TSC: 2, CPU: 0); |
| 116 | |
| 117 | // Enter and exit FId = 2. |
| 118 | Trie.enterFunction(FId: 2, TSC: 3, CPU: 0); |
| 119 | Trie.exitFunction(FId: 2, TSC: 4, CPU: 0); |
| 120 | |
| 121 | const auto &R = Trie.getRoots(); |
| 122 | ASSERT_FALSE(R.empty()); |
| 123 | ASSERT_EQ(R.size(), 2u); |
| 124 | |
| 125 | // Make sure the roots have different IDs. |
| 126 | const auto R0 = R[0]; |
| 127 | const auto R1 = R[1]; |
| 128 | ASSERT_NE(R0->FId, R1->FId); |
| 129 | |
| 130 | // Inspect the roots that they have the right data. |
| 131 | ASSERT_NE(R0, nullptr); |
| 132 | EXPECT_EQ(R0->CallCount, 1u); |
| 133 | EXPECT_EQ(R0->CumulativeLocalTime, 1u); |
| 134 | |
| 135 | ASSERT_NE(R1, nullptr); |
| 136 | EXPECT_EQ(R1->CallCount, 1u); |
| 137 | EXPECT_EQ(R1->CumulativeLocalTime, 1u); |
| 138 | } |
| 139 | |
| 140 | // While missing an intermediary entry may be rare in practice, we still enforce |
| 141 | // that we can handle the case where we've missed the entry event somehow, in |
| 142 | // between call entry/exits. To illustrate, imagine the following shadow call |
| 143 | // stack: |
| 144 | // |
| 145 | // f0@t0 -> f1@t1 -> f2@t2 |
| 146 | // |
| 147 | // If for whatever reason we see an exit for `f2` @ t3, followed by an exit for |
| 148 | // `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of |
| 149 | // accounting local time to `f2` from d = (t3 - t2), then local time to `f1` |
| 150 | // as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'. |
| 151 | TEST(FunctionCallTrieTest, MissingIntermediaryExit) { |
| 152 | profilingFlags()->setDefaults(); |
| 153 | auto A = FunctionCallTrie::InitAllocators(); |
| 154 | FunctionCallTrie Trie(A); |
| 155 | |
| 156 | Trie.enterFunction(FId: 1, TSC: 0, CPU: 0); |
| 157 | Trie.enterFunction(FId: 2, TSC: 100, CPU: 0); |
| 158 | Trie.enterFunction(FId: 3, TSC: 200, CPU: 0); |
| 159 | Trie.exitFunction(FId: 3, TSC: 300, CPU: 0); |
| 160 | Trie.exitFunction(FId: 1, TSC: 400, CPU: 0); |
| 161 | |
| 162 | // What we should see at this point is all the functions in the trie in a |
| 163 | // specific order (1 -> 2 -> 3) with the appropriate count(s) and local |
| 164 | // latencies. |
| 165 | const auto &R = Trie.getRoots(); |
| 166 | ASSERT_FALSE(R.empty()); |
| 167 | ASSERT_EQ(R.size(), 1u); |
| 168 | |
| 169 | const auto &F1 = *R[0]; |
| 170 | ASSERT_EQ(F1.FId, 1); |
| 171 | ASSERT_FALSE(F1.Callees.empty()); |
| 172 | |
| 173 | const auto &F2 = *F1.Callees[0].NodePtr; |
| 174 | ASSERT_EQ(F2.FId, 2); |
| 175 | ASSERT_FALSE(F2.Callees.empty()); |
| 176 | |
| 177 | const auto &F3 = *F2.Callees[0].NodePtr; |
| 178 | ASSERT_EQ(F3.FId, 3); |
| 179 | ASSERT_TRUE(F3.Callees.empty()); |
| 180 | |
| 181 | // Now that we've established the preconditions, we check for specific aspects |
| 182 | // of the nodes. |
| 183 | EXPECT_EQ(F3.CallCount, 1u); |
| 184 | EXPECT_EQ(F2.CallCount, 1u); |
| 185 | EXPECT_EQ(F1.CallCount, 1u); |
| 186 | EXPECT_EQ(F3.CumulativeLocalTime, 100u); |
| 187 | EXPECT_EQ(F2.CumulativeLocalTime, 300u); |
| 188 | EXPECT_EQ(F1.CumulativeLocalTime, 100u); |
| 189 | } |
| 190 | |
| 191 | TEST(FunctionCallTrieTest, DeepCallStack) { |
| 192 | // Simulate a relatively deep call stack (32 levels) and ensure that we can |
| 193 | // properly pop all the way up the stack. |
| 194 | profilingFlags()->setDefaults(); |
| 195 | auto A = FunctionCallTrie::InitAllocators(); |
| 196 | FunctionCallTrie Trie(A); |
| 197 | for (int i = 0; i < 32; ++i) |
| 198 | Trie.enterFunction(FId: i + 1, TSC: i, CPU: 0); |
| 199 | Trie.exitFunction(FId: 1, TSC: 33, CPU: 0); |
| 200 | |
| 201 | // Here, validate that we have a 32-level deep function call path from the |
| 202 | // root (1) down to the leaf (33). |
| 203 | const auto &R = Trie.getRoots(); |
| 204 | ASSERT_EQ(R.size(), 1u); |
| 205 | auto F = R[0]; |
| 206 | for (int i = 0; i < 32; ++i) { |
| 207 | EXPECT_EQ(F->FId, i + 1); |
| 208 | EXPECT_EQ(F->CallCount, 1u); |
| 209 | if (F->Callees.empty() && i != 31) |
| 210 | FAIL() << "Empty callees for FId " << F->FId; |
| 211 | if (i != 31) |
| 212 | F = F->Callees[0].NodePtr; |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | // TODO: Test that we can handle cross-CPU migrations, where TSCs are not |
| 217 | // guaranteed to be synchronised. |
| 218 | TEST(FunctionCallTrieTest, DeepCopy) { |
| 219 | profilingFlags()->setDefaults(); |
| 220 | auto A = FunctionCallTrie::InitAllocators(); |
| 221 | FunctionCallTrie Trie(A); |
| 222 | |
| 223 | Trie.enterFunction(FId: 1, TSC: 0, CPU: 0); |
| 224 | Trie.enterFunction(FId: 2, TSC: 1, CPU: 0); |
| 225 | Trie.exitFunction(FId: 2, TSC: 2, CPU: 0); |
| 226 | Trie.enterFunction(FId: 3, TSC: 3, CPU: 0); |
| 227 | Trie.exitFunction(FId: 3, TSC: 4, CPU: 0); |
| 228 | Trie.exitFunction(FId: 1, TSC: 5, CPU: 0); |
| 229 | |
| 230 | // We want to make a deep copy and compare notes. |
| 231 | auto B = FunctionCallTrie::InitAllocators(); |
| 232 | FunctionCallTrie Copy(B); |
| 233 | Trie.deepCopyInto(O&: Copy); |
| 234 | |
| 235 | ASSERT_NE(Trie.getRoots().size(), 0u); |
| 236 | ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size()); |
| 237 | const auto &R0Orig = *Trie.getRoots()[0]; |
| 238 | const auto &R0Copy = *Copy.getRoots()[0]; |
| 239 | EXPECT_EQ(R0Orig.FId, 1); |
| 240 | EXPECT_EQ(R0Orig.FId, R0Copy.FId); |
| 241 | |
| 242 | ASSERT_EQ(R0Orig.Callees.size(), 2u); |
| 243 | ASSERT_EQ(R0Copy.Callees.size(), 2u); |
| 244 | |
| 245 | const auto &F1Orig = |
| 246 | *R0Orig.Callees |
| 247 | .find_element( |
| 248 | P: [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; }) |
| 249 | ->NodePtr; |
| 250 | const auto &F1Copy = |
| 251 | *R0Copy.Callees |
| 252 | .find_element( |
| 253 | P: [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; }) |
| 254 | ->NodePtr; |
| 255 | EXPECT_EQ(&R0Orig, F1Orig.Parent); |
| 256 | EXPECT_EQ(&R0Copy, F1Copy.Parent); |
| 257 | } |
| 258 | |
| 259 | TEST(FunctionCallTrieTest, MergeInto) { |
| 260 | profilingFlags()->setDefaults(); |
| 261 | auto A = FunctionCallTrie::InitAllocators(); |
| 262 | FunctionCallTrie T0(A); |
| 263 | FunctionCallTrie T1(A); |
| 264 | |
| 265 | // 1 -> 2 -> 3 |
| 266 | T0.enterFunction(FId: 1, TSC: 0, CPU: 0); |
| 267 | T0.enterFunction(FId: 2, TSC: 1, CPU: 0); |
| 268 | T0.enterFunction(FId: 3, TSC: 2, CPU: 0); |
| 269 | T0.exitFunction(FId: 3, TSC: 3, CPU: 0); |
| 270 | T0.exitFunction(FId: 2, TSC: 4, CPU: 0); |
| 271 | T0.exitFunction(FId: 1, TSC: 5, CPU: 0); |
| 272 | |
| 273 | // 1 -> 2 -> 3 |
| 274 | T1.enterFunction(FId: 1, TSC: 0, CPU: 0); |
| 275 | T1.enterFunction(FId: 2, TSC: 1, CPU: 0); |
| 276 | T1.enterFunction(FId: 3, TSC: 2, CPU: 0); |
| 277 | T1.exitFunction(FId: 3, TSC: 3, CPU: 0); |
| 278 | T1.exitFunction(FId: 2, TSC: 4, CPU: 0); |
| 279 | T1.exitFunction(FId: 1, TSC: 5, CPU: 0); |
| 280 | |
| 281 | // We use a different allocator here to make sure that we're able to transfer |
| 282 | // data into a FunctionCallTrie which uses a different allocator. This |
| 283 | // reflects the intended usage scenario for when we're collecting profiles |
| 284 | // that aggregate across threads. |
| 285 | auto B = FunctionCallTrie::InitAllocators(); |
| 286 | FunctionCallTrie Merged(B); |
| 287 | |
| 288 | T0.mergeInto(O&: Merged); |
| 289 | T1.mergeInto(O&: Merged); |
| 290 | |
| 291 | ASSERT_EQ(Merged.getRoots().size(), 1u); |
| 292 | const auto &R0 = *Merged.getRoots()[0]; |
| 293 | EXPECT_EQ(R0.FId, 1); |
| 294 | EXPECT_EQ(R0.CallCount, 2u); |
| 295 | EXPECT_EQ(R0.CumulativeLocalTime, 10u); |
| 296 | EXPECT_EQ(R0.Callees.size(), 1u); |
| 297 | |
| 298 | const auto &F1 = *R0.Callees[0].NodePtr; |
| 299 | EXPECT_EQ(F1.FId, 2); |
| 300 | EXPECT_EQ(F1.CallCount, 2u); |
| 301 | EXPECT_EQ(F1.CumulativeLocalTime, 6u); |
| 302 | EXPECT_EQ(F1.Callees.size(), 1u); |
| 303 | |
| 304 | const auto &F2 = *F1.Callees[0].NodePtr; |
| 305 | EXPECT_EQ(F2.FId, 3); |
| 306 | EXPECT_EQ(F2.CallCount, 2u); |
| 307 | EXPECT_EQ(F2.CumulativeLocalTime, 2u); |
| 308 | EXPECT_EQ(F2.Callees.size(), 0u); |
| 309 | } |
| 310 | |
| 311 | TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) { |
| 312 | profilingFlags()->setDefaults(); |
| 313 | alignas(FunctionCallTrie::Allocators) |
| 314 | std::byte [sizeof(FunctionCallTrie::Allocators)]; |
| 315 | new (&AllocatorsStorage) |
| 316 | FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); |
| 317 | auto *A = |
| 318 | reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage); |
| 319 | |
| 320 | alignas(FunctionCallTrie) std::byte FCTStorage[sizeof(FunctionCallTrie)]; |
| 321 | new (&FCTStorage) FunctionCallTrie(*A); |
| 322 | auto *T = reinterpret_cast<FunctionCallTrie *>(&FCTStorage); |
| 323 | |
| 324 | // Put some data into it. |
| 325 | T->enterFunction(FId: 1, TSC: 0, CPU: 0); |
| 326 | T->exitFunction(FId: 1, TSC: 1, CPU: 0); |
| 327 | |
| 328 | // Re-initialize the objects in storage. |
| 329 | T->~FunctionCallTrie(); |
| 330 | A->~Allocators(); |
| 331 | new (A) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); |
| 332 | new (T) FunctionCallTrie(*A); |
| 333 | |
| 334 | // Then put some data into it again. |
| 335 | T->enterFunction(FId: 1, TSC: 0, CPU: 0); |
| 336 | T->exitFunction(FId: 1, TSC: 1, CPU: 0); |
| 337 | } |
| 338 | |
| 339 | } // namespace |
| 340 | |
| 341 | } // namespace __xray |
| 342 | |