| 1 | //===- llvm/Analysis/ProfileSummaryInfo.h - profile summary ---*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains a pass that provides access to profile summary |
| 10 | // information. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_ANALYSIS_PROFILESUMMARYINFO_H |
| 15 | #define LLVM_ANALYSIS_PROFILESUMMARYINFO_H |
| 16 | |
| 17 | #include "llvm/ADT/DenseMap.h" |
| 18 | #include "llvm/IR/Function.h" |
| 19 | #include "llvm/IR/Instructions.h" |
| 20 | #include "llvm/IR/PassManager.h" |
| 21 | #include "llvm/IR/ProfileSummary.h" |
| 22 | #include "llvm/Pass.h" |
| 23 | #include "llvm/Support/BlockFrequency.h" |
| 24 | #include "llvm/Support/Compiler.h" |
| 25 | #include <memory> |
| 26 | #include <optional> |
| 27 | |
| 28 | namespace llvm { |
| 29 | class BlockFrequencyInfo; |
| 30 | class MachineFunction; |
| 31 | |
| 32 | /// Analysis providing profile information. |
| 33 | /// |
| 34 | /// This is an immutable analysis pass that provides ability to query global |
| 35 | /// (program-level) profile information. The main APIs are isHotCount and |
| 36 | /// isColdCount that tells whether a given profile count is considered hot/cold |
| 37 | /// based on the profile summary. This also provides convenience methods to |
| 38 | /// check whether a function is hot or cold. |
| 39 | |
| 40 | // FIXME: Provide convenience methods to determine hotness/coldness of other IR |
| 41 | // units. This would require making this depend on BFI. |
| 42 | class ProfileSummaryInfo { |
| 43 | private: |
| 44 | const Module *M; |
| 45 | std::unique_ptr<ProfileSummary> Summary; |
| 46 | void computeThresholds(); |
| 47 | // Count thresholds to answer isHotCount and isColdCount queries. |
| 48 | std::optional<uint64_t> HotCountThreshold, ColdCountThreshold; |
| 49 | // True if the working set size of the code is considered huge, |
| 50 | // because the number of profile counts required to reach the hot |
| 51 | // percentile is above a huge threshold. |
| 52 | std::optional<bool> HasHugeWorkingSetSize; |
| 53 | // True if the working set size of the code is considered large, |
| 54 | // because the number of profile counts required to reach the hot |
| 55 | // percentile is above a large threshold. |
| 56 | std::optional<bool> HasLargeWorkingSetSize; |
| 57 | // Compute the threshold for a given cutoff. |
| 58 | std::optional<uint64_t> computeThreshold(int PercentileCutoff) const; |
| 59 | // The map that caches the threshold values. The keys are the percentile |
| 60 | // cutoff values and the values are the corresponding threshold values. |
| 61 | mutable DenseMap<int, uint64_t> ThresholdCache; |
| 62 | |
| 63 | public: |
| 64 | ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); } |
| 65 | ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default; |
| 66 | |
| 67 | /// If a summary is provided as argument, use that. Otherwise, |
| 68 | /// if the `Summary` member is null, attempt to refresh. |
| 69 | LLVM_ABI void refresh(std::unique_ptr<ProfileSummary> &&Other = nullptr); |
| 70 | |
| 71 | /// Returns true if profile summary is available. |
| 72 | bool hasProfileSummary() const { return Summary != nullptr; } |
| 73 | |
| 74 | /// Returns true if module \c M has sample profile. |
| 75 | bool hasSampleProfile() const { |
| 76 | return hasProfileSummary() && |
| 77 | Summary->getKind() == ProfileSummary::PSK_Sample; |
| 78 | } |
| 79 | |
| 80 | /// Returns true if module \c M has instrumentation profile. |
| 81 | bool hasInstrumentationProfile() const { |
| 82 | return hasProfileSummary() && |
| 83 | Summary->getKind() == ProfileSummary::PSK_Instr; |
| 84 | } |
| 85 | |
| 86 | /// Returns true if module \c M has context sensitive instrumentation profile. |
| 87 | bool hasCSInstrumentationProfile() const { |
| 88 | return hasProfileSummary() && |
| 89 | Summary->getKind() == ProfileSummary::PSK_CSInstr; |
| 90 | } |
| 91 | |
| 92 | /// Handle the invalidation of this information. |
| 93 | /// |
| 94 | /// When used as a result of \c ProfileSummaryAnalysis this method will be |
| 95 | /// called when the module this was computed for changes. Since profile |
| 96 | /// summary is immutable after it is annotated on the module, we return false |
| 97 | /// here. |
| 98 | bool invalidate(Module &, const PreservedAnalyses &, |
| 99 | ModuleAnalysisManager::Invalidator &) { |
| 100 | return false; |
| 101 | } |
| 102 | |
| 103 | /// Returns the profile count for \p CallInst. |
| 104 | LLVM_ABI std::optional<uint64_t> |
| 105 | getProfileCount(const CallBase &CallInst, BlockFrequencyInfo *BFI, |
| 106 | bool AllowSynthetic = false) const; |
| 107 | /// Returns true if module \c M has partial-profile sample profile. |
| 108 | LLVM_ABI bool hasPartialSampleProfile() const; |
| 109 | /// Returns true if the working set size of the code is considered huge. |
| 110 | LLVM_ABI bool hasHugeWorkingSetSize() const; |
| 111 | /// Returns true if the working set size of the code is considered large. |
| 112 | LLVM_ABI bool hasLargeWorkingSetSize() const; |
| 113 | /// Returns true if \p F has hot function entry. If it returns false, it |
| 114 | /// either means it is not hot or it is unknown whether it is hot or not (for |
| 115 | /// example, no profile data is available). |
| 116 | template <typename FuncT> bool isFunctionEntryHot(const FuncT *F) const { |
| 117 | if (!F || !hasProfileSummary()) |
| 118 | return false; |
| 119 | std::optional<Function::ProfileCount> FunctionCount = getEntryCount(F); |
| 120 | // FIXME: The heuristic used below for determining hotness is based on |
| 121 | // preliminary SPEC tuning for inliner. This will eventually be a |
| 122 | // convenience method that calls isHotCount. |
| 123 | return FunctionCount && isHotCount(C: FunctionCount->getCount()); |
| 124 | } |
| 125 | |
| 126 | /// Returns true if \p F contains hot code. |
| 127 | template <typename FuncT, typename BFIT> |
| 128 | bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const { |
| 129 | if (!F || !hasProfileSummary()) |
| 130 | return false; |
| 131 | if (auto FunctionCount = getEntryCount(F)) |
| 132 | if (isHotCount(C: FunctionCount->getCount())) |
| 133 | return true; |
| 134 | |
| 135 | if (auto TotalCallCount = getTotalCallCount(F)) |
| 136 | if (isHotCount(C: *TotalCallCount)) |
| 137 | return true; |
| 138 | |
| 139 | for (const auto &BB : *F) |
| 140 | if (isHotBlock(&BB, &BFI)) |
| 141 | return true; |
| 142 | return false; |
| 143 | } |
| 144 | /// Returns true if \p F has cold function entry. |
| 145 | LLVM_ABI bool isFunctionEntryCold(const Function *F) const; |
| 146 | /// Returns true if \p F contains only cold code. |
| 147 | template <typename FuncT, typename BFIT> |
| 148 | bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const { |
| 149 | if (!F || !hasProfileSummary()) |
| 150 | return false; |
| 151 | if (auto FunctionCount = getEntryCount(F)) |
| 152 | if (!isColdCount(C: FunctionCount->getCount())) |
| 153 | return false; |
| 154 | |
| 155 | if (auto TotalCallCount = getTotalCallCount(F)) |
| 156 | if (!isColdCount(C: *TotalCallCount)) |
| 157 | return false; |
| 158 | |
| 159 | for (const auto &BB : *F) |
| 160 | if (!isColdBlock(&BB, &BFI)) |
| 161 | return false; |
| 162 | return true; |
| 163 | } |
| 164 | /// Returns true if the hotness of \p F is unknown. |
| 165 | LLVM_ABI bool isFunctionHotnessUnknown(const Function &F) const; |
| 166 | /// Returns true if \p F contains hot code with regard to a given hot |
| 167 | /// percentile cutoff value. |
| 168 | template <typename FuncT, typename BFIT> |
| 169 | bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff, |
| 170 | const FuncT *F, BFIT &BFI) const { |
| 171 | return isFunctionHotOrColdInCallGraphNthPercentile<true, FuncT, BFIT>( |
| 172 | PercentileCutoff, F, BFI); |
| 173 | } |
| 174 | /// Returns true if \p F contains cold code with regard to a given cold |
| 175 | /// percentile cutoff value. |
| 176 | template <typename FuncT, typename BFIT> |
| 177 | bool isFunctionColdInCallGraphNthPercentile(int PercentileCutoff, |
| 178 | const FuncT *F, BFIT &BFI) const { |
| 179 | return isFunctionHotOrColdInCallGraphNthPercentile<false, FuncT, BFIT>( |
| 180 | PercentileCutoff, F, BFI); |
| 181 | } |
| 182 | /// Returns true if count \p C is considered hot. |
| 183 | LLVM_ABI bool isHotCount(uint64_t C) const; |
| 184 | /// Returns true if count \p C is considered cold. |
| 185 | LLVM_ABI bool isColdCount(uint64_t C) const; |
| 186 | /// Returns true if count \p C is considered hot with regard to a given |
| 187 | /// hot percentile cutoff value. |
| 188 | /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where |
| 189 | /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. |
| 190 | LLVM_ABI bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const; |
| 191 | /// Returns true if count \p C is considered cold with regard to a given |
| 192 | /// cold percentile cutoff value. |
| 193 | /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where |
| 194 | /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. |
| 195 | LLVM_ABI bool isColdCountNthPercentile(int PercentileCutoff, |
| 196 | uint64_t C) const; |
| 197 | |
| 198 | /// Returns true if BasicBlock \p BB is considered hot. |
| 199 | template <typename BBType, typename BFIT> |
| 200 | bool isHotBlock(const BBType *BB, BFIT *BFI) const { |
| 201 | auto Count = BFI->getBlockProfileCount(BB); |
| 202 | return Count && isHotCount(C: *Count); |
| 203 | } |
| 204 | |
| 205 | /// Returns true if BasicBlock \p BB is considered cold. |
| 206 | template <typename BBType, typename BFIT> |
| 207 | bool isColdBlock(const BBType *BB, BFIT *BFI) const { |
| 208 | auto Count = BFI->getBlockProfileCount(BB); |
| 209 | return Count && isColdCount(C: *Count); |
| 210 | } |
| 211 | |
| 212 | template <typename BFIT> |
| 213 | bool isColdBlock(BlockFrequency BlockFreq, const BFIT *BFI) const { |
| 214 | auto Count = BFI->getProfileCountFromFreq(BlockFreq); |
| 215 | return Count && isColdCount(C: *Count); |
| 216 | } |
| 217 | |
| 218 | template <typename BBType, typename BFIT> |
| 219 | bool isHotBlockNthPercentile(int PercentileCutoff, const BBType *BB, |
| 220 | BFIT *BFI) const { |
| 221 | return isHotOrColdBlockNthPercentile<true, BBType, BFIT>(PercentileCutoff, |
| 222 | BB, BFI); |
| 223 | } |
| 224 | |
| 225 | template <typename BFIT> |
| 226 | bool isHotBlockNthPercentile(int PercentileCutoff, BlockFrequency BlockFreq, |
| 227 | BFIT *BFI) const { |
| 228 | return isHotOrColdBlockNthPercentile<true, BFIT>(PercentileCutoff, |
| 229 | BlockFreq, BFI); |
| 230 | } |
| 231 | |
| 232 | /// Returns true if BasicBlock \p BB is considered cold with regard to a given |
| 233 | /// cold percentile cutoff value. |
| 234 | /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where |
| 235 | /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. |
| 236 | template <typename BBType, typename BFIT> |
| 237 | bool isColdBlockNthPercentile(int PercentileCutoff, const BBType *BB, |
| 238 | BFIT *BFI) const { |
| 239 | return isHotOrColdBlockNthPercentile<false, BBType, BFIT>(PercentileCutoff, |
| 240 | BB, BFI); |
| 241 | } |
| 242 | template <typename BFIT> |
| 243 | bool isColdBlockNthPercentile(int PercentileCutoff, BlockFrequency BlockFreq, |
| 244 | BFIT *BFI) const { |
| 245 | return isHotOrColdBlockNthPercentile<false, BFIT>(PercentileCutoff, |
| 246 | BlockFreq, BFI); |
| 247 | } |
| 248 | /// Returns true if the call site \p CB is considered hot. |
| 249 | LLVM_ABI bool isHotCallSite(const CallBase &CB, |
| 250 | BlockFrequencyInfo *BFI) const; |
| 251 | /// Returns true if call site \p CB is considered cold. |
| 252 | LLVM_ABI bool isColdCallSite(const CallBase &CB, |
| 253 | BlockFrequencyInfo *BFI) const; |
| 254 | /// Returns HotCountThreshold if set. Recompute HotCountThreshold |
| 255 | /// if not set. |
| 256 | LLVM_ABI uint64_t getOrCompHotCountThreshold() const; |
| 257 | /// Returns ColdCountThreshold if set. Recompute HotCountThreshold |
| 258 | /// if not set. |
| 259 | LLVM_ABI uint64_t getOrCompColdCountThreshold() const; |
| 260 | /// Returns HotCountThreshold if set. |
| 261 | uint64_t getHotCountThreshold() const { |
| 262 | return HotCountThreshold.value_or(u: 0); |
| 263 | } |
| 264 | /// Returns ColdCountThreshold if set. |
| 265 | uint64_t getColdCountThreshold() const { |
| 266 | return ColdCountThreshold.value_or(u: 0); |
| 267 | } |
| 268 | |
| 269 | private: |
| 270 | template <typename FuncT> |
| 271 | std::optional<uint64_t> getTotalCallCount(const FuncT *F) const { |
| 272 | return std::nullopt; |
| 273 | } |
| 274 | |
| 275 | template <bool isHot, typename FuncT, typename BFIT> |
| 276 | bool isFunctionHotOrColdInCallGraphNthPercentile(int PercentileCutoff, |
| 277 | const FuncT *F, |
| 278 | BFIT &FI) const { |
| 279 | if (!F || !hasProfileSummary()) |
| 280 | return false; |
| 281 | if (auto FunctionCount = getEntryCount(F)) { |
| 282 | if (isHot && |
| 283 | isHotCountNthPercentile(PercentileCutoff, C: FunctionCount->getCount())) |
| 284 | return true; |
| 285 | if (!isHot && !isColdCountNthPercentile(PercentileCutoff, |
| 286 | C: FunctionCount->getCount())) |
| 287 | return false; |
| 288 | } |
| 289 | if (auto TotalCallCount = getTotalCallCount(F)) { |
| 290 | if (isHot && isHotCountNthPercentile(PercentileCutoff, C: *TotalCallCount)) |
| 291 | return true; |
| 292 | if (!isHot && |
| 293 | !isColdCountNthPercentile(PercentileCutoff, C: *TotalCallCount)) |
| 294 | return false; |
| 295 | } |
| 296 | for (const auto &BB : *F) { |
| 297 | if (isHot && isHotBlockNthPercentile(PercentileCutoff, &BB, &FI)) |
| 298 | return true; |
| 299 | if (!isHot && !isColdBlockNthPercentile(PercentileCutoff, &BB, &FI)) |
| 300 | return false; |
| 301 | } |
| 302 | return !isHot; |
| 303 | } |
| 304 | |
| 305 | template <bool isHot> |
| 306 | bool isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C) const; |
| 307 | |
| 308 | template <bool isHot, typename BBType, typename BFIT> |
| 309 | bool isHotOrColdBlockNthPercentile(int PercentileCutoff, const BBType *BB, |
| 310 | BFIT *BFI) const { |
| 311 | auto Count = BFI->getBlockProfileCount(BB); |
| 312 | if (isHot) |
| 313 | return Count && isHotCountNthPercentile(PercentileCutoff, C: *Count); |
| 314 | else |
| 315 | return Count && isColdCountNthPercentile(PercentileCutoff, C: *Count); |
| 316 | } |
| 317 | |
| 318 | template <bool isHot, typename BFIT> |
| 319 | bool isHotOrColdBlockNthPercentile(int PercentileCutoff, |
| 320 | BlockFrequency BlockFreq, |
| 321 | BFIT *BFI) const { |
| 322 | auto Count = BFI->getProfileCountFromFreq(BlockFreq); |
| 323 | if (isHot) |
| 324 | return Count && isHotCountNthPercentile(PercentileCutoff, C: *Count); |
| 325 | else |
| 326 | return Count && isColdCountNthPercentile(PercentileCutoff, C: *Count); |
| 327 | } |
| 328 | |
| 329 | template <typename FuncT> |
| 330 | std::optional<Function::ProfileCount> getEntryCount(const FuncT *F) const { |
| 331 | return F->getEntryCount(); |
| 332 | } |
| 333 | }; |
| 334 | |
| 335 | template <> |
| 336 | inline std::optional<uint64_t> |
| 337 | ProfileSummaryInfo::getTotalCallCount<Function>(const Function *F) const { |
| 338 | if (!hasSampleProfile()) |
| 339 | return std::nullopt; |
| 340 | uint64_t TotalCallCount = 0; |
| 341 | for (const auto &BB : *F) |
| 342 | for (const auto &I : BB) |
| 343 | if (isa<CallInst>(Val: I) || isa<InvokeInst>(Val: I)) |
| 344 | if (auto CallCount = getProfileCount(CallInst: cast<CallBase>(Val: I), BFI: nullptr)) |
| 345 | TotalCallCount += *CallCount; |
| 346 | return TotalCallCount; |
| 347 | } |
| 348 | |
| 349 | // Declare template specialization for llvm::MachineFunction. Do not implement |
| 350 | // here, because we cannot include MachineFunction header here, that would break |
| 351 | // dependency rules. |
| 352 | template <> |
| 353 | std::optional<Function::ProfileCount> |
| 354 | ProfileSummaryInfo::getEntryCount<MachineFunction>( |
| 355 | const MachineFunction *F) const; |
| 356 | |
| 357 | /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. |
| 358 | class LLVM_ABI ProfileSummaryInfoWrapperPass : public ImmutablePass { |
| 359 | std::unique_ptr<ProfileSummaryInfo> PSI; |
| 360 | |
| 361 | public: |
| 362 | static char ID; |
| 363 | ProfileSummaryInfoWrapperPass(); |
| 364 | |
| 365 | ProfileSummaryInfo &getPSI() { return *PSI; } |
| 366 | const ProfileSummaryInfo &getPSI() const { return *PSI; } |
| 367 | |
| 368 | bool doInitialization(Module &M) override; |
| 369 | bool doFinalization(Module &M) override; |
| 370 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 371 | AU.setPreservesAll(); |
| 372 | } |
| 373 | }; |
| 374 | |
| 375 | /// An analysis pass based on the new PM to deliver ProfileSummaryInfo. |
| 376 | class ProfileSummaryAnalysis |
| 377 | : public AnalysisInfoMixin<ProfileSummaryAnalysis> { |
| 378 | public: |
| 379 | typedef ProfileSummaryInfo Result; |
| 380 | |
| 381 | LLVM_ABI Result run(Module &M, ModuleAnalysisManager &); |
| 382 | |
| 383 | private: |
| 384 | friend AnalysisInfoMixin<ProfileSummaryAnalysis>; |
| 385 | LLVM_ABI static AnalysisKey Key; |
| 386 | }; |
| 387 | |
| 388 | /// Printer pass that uses \c ProfileSummaryAnalysis. |
| 389 | class ProfileSummaryPrinterPass |
| 390 | : public PassInfoMixin<ProfileSummaryPrinterPass> { |
| 391 | raw_ostream &OS; |
| 392 | |
| 393 | public: |
| 394 | explicit ProfileSummaryPrinterPass(raw_ostream &OS) : OS(OS) {} |
| 395 | LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
| 396 | static bool isRequired() { return true; } |
| 397 | }; |
| 398 | |
| 399 | } // end namespace llvm |
| 400 | |
| 401 | #endif |
| 402 | |