1 | //===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This contains code to emit warnings for potentially incorrect usage of the |
10 | // llvm.expect intrinsic. This utility extracts the threshold values from |
11 | // metadata associated with the instrumented Branch or Switch instruction. The |
12 | // threshold values are then used to determine if a warning should be emmited. |
13 | // |
14 | // MisExpect's implementation relies on two assumptions about how branch weights |
15 | // are managed in LLVM. |
16 | // |
17 | // 1) Frontend profiling weights are always in place before llvm.expect is |
18 | // lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore |
19 | // needs to extract the branch weights and then compare them to the weights |
20 | // being added by the llvm.expect intrinsic lowering. |
21 | // |
22 | // 2) Sampling and IR based profiles will *only* have branch weight metadata |
23 | // before profiling data is consulted if they are from a lowered llvm.expect |
24 | // intrinsic. These profiles thus always extract the expected weights and then |
25 | // compare them to the weights collected during profiling to determine if a |
26 | // diagnostic message is warranted. |
27 | // |
28 | //===----------------------------------------------------------------------===// |
29 | |
30 | #include "llvm/Transforms/Utils/MisExpect.h" |
31 | #include "llvm/ADT/Twine.h" |
32 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
33 | #include "llvm/IR/Constants.h" |
34 | #include "llvm/IR/DiagnosticInfo.h" |
35 | #include "llvm/IR/Instruction.h" |
36 | #include "llvm/IR/Instructions.h" |
37 | #include "llvm/IR/LLVMContext.h" |
38 | #include "llvm/IR/ProfDataUtils.h" |
39 | #include "llvm/Support/BranchProbability.h" |
40 | #include "llvm/Support/CommandLine.h" |
41 | #include "llvm/Support/Debug.h" |
42 | #include "llvm/Support/FormatVariadic.h" |
43 | #include <algorithm> |
44 | #include <cstdint> |
45 | #include <functional> |
46 | #include <numeric> |
47 | |
48 | #define DEBUG_TYPE "misexpect" |
49 | |
50 | using namespace llvm; |
51 | using namespace misexpect; |
52 | |
53 | namespace llvm { |
54 | |
55 | // Command line option to enable/disable the warning when profile data suggests |
56 | // a mismatch with the use of the llvm.expect intrinsic |
57 | static cl::opt<bool> PGOWarnMisExpect( |
58 | "pgo-warn-misexpect" , cl::init(Val: false), cl::Hidden, |
59 | cl::desc("Use this option to turn on/off " |
60 | "warnings about incorrect usage of llvm.expect intrinsics." )); |
61 | |
62 | static cl::opt<uint32_t> MisExpectTolerance( |
63 | "misexpect-tolerance" , cl::init(Val: 0), |
64 | cl::desc("Prevents emiting diagnostics when profile counts are " |
65 | "within N% of the threshold.." )); |
66 | |
67 | } // namespace llvm |
68 | |
69 | namespace { |
70 | |
71 | bool isMisExpectDiagEnabled(LLVMContext &Ctx) { |
72 | return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested(); |
73 | } |
74 | |
75 | uint32_t getMisExpectTolerance(LLVMContext &Ctx) { |
76 | return std::max(a: static_cast<uint32_t>(MisExpectTolerance), |
77 | b: Ctx.getDiagnosticsMisExpectTolerance()); |
78 | } |
79 | |
80 | Instruction *getInstCondition(Instruction *I) { |
81 | assert(I != nullptr && "MisExpect target Instruction cannot be nullptr" ); |
82 | Instruction *Ret = nullptr; |
83 | if (auto *B = dyn_cast<BranchInst>(Val: I)) { |
84 | Ret = dyn_cast<Instruction>(Val: B->getCondition()); |
85 | } |
86 | // TODO: Find a way to resolve condition location for switches |
87 | // Using the condition of the switch seems to often resolve to an earlier |
88 | // point in the program, i.e. the calculation of the switch condition, rather |
89 | // than the switch's location in the source code. Thus, we should use the |
90 | // instruction to get source code locations rather than the condition to |
91 | // improve diagnostic output, such as the caret. If the same problem exists |
92 | // for branch instructions, then we should remove this function and directly |
93 | // use the instruction |
94 | // |
95 | else if (auto *S = dyn_cast<SwitchInst>(Val: I)) { |
96 | Ret = dyn_cast<Instruction>(Val: S->getCondition()); |
97 | } |
98 | return Ret ? Ret : I; |
99 | } |
100 | |
101 | void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, |
102 | uint64_t ProfCount, uint64_t TotalCount) { |
103 | double PercentageCorrect = (double)ProfCount / TotalCount; |
104 | auto PerString = |
105 | formatv(Fmt: "{0:P} ({1} / {2})" , Vals&: PercentageCorrect, Vals&: ProfCount, Vals&: TotalCount); |
106 | auto RemStr = formatv( |
107 | Fmt: "Potential performance regression from use of the llvm.expect intrinsic: " |
108 | "Annotation was correct on {0} of profiled executions." , |
109 | Vals&: PerString); |
110 | Twine Msg(PerString); |
111 | Instruction *Cond = getInstCondition(I); |
112 | if (isMisExpectDiagEnabled(Ctx)) |
113 | Ctx.diagnose(DI: DiagnosticInfoMisExpect(Cond, Msg)); |
114 | OptimizationRemarkEmitter ORE(I->getParent()->getParent()); |
115 | ORE.emit(OptDiag&: OptimizationRemark(DEBUG_TYPE, "misexpect" , Cond) << RemStr.str()); |
116 | } |
117 | |
118 | } // namespace |
119 | |
120 | namespace llvm { |
121 | namespace misexpect { |
122 | |
123 | void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights, |
124 | ArrayRef<uint32_t> ExpectedWeights) { |
125 | // To determine if we emit a diagnostic, we need to compare the branch weights |
126 | // from the profile to those added by the llvm.expect intrinsic. |
127 | // So first, we extract the "likely" and "unlikely" weights from |
128 | // ExpectedWeights And determine the correct weight in the profile to compare |
129 | // against. |
130 | uint64_t LikelyBranchWeight = 0, |
131 | UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max(); |
132 | size_t MaxIndex = 0; |
133 | for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) { |
134 | uint32_t V = ExpectedWeights[Idx]; |
135 | if (LikelyBranchWeight < V) { |
136 | LikelyBranchWeight = V; |
137 | MaxIndex = Idx; |
138 | } |
139 | if (UnlikelyBranchWeight > V) { |
140 | UnlikelyBranchWeight = V; |
141 | } |
142 | } |
143 | |
144 | const uint64_t ProfiledWeight = RealWeights[MaxIndex]; |
145 | const uint64_t RealWeightsTotal = |
146 | std::accumulate(first: RealWeights.begin(), last: RealWeights.end(), init: (uint64_t)0, |
147 | binary_op: std::plus<uint64_t>()); |
148 | const uint64_t NumUnlikelyTargets = RealWeights.size() - 1; |
149 | |
150 | uint64_t TotalBranchWeight = |
151 | LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); |
152 | |
153 | // FIXME: When we've addressed sample profiling, restore the assertion |
154 | // |
155 | // We cannot calculate branch probability if either of these invariants aren't |
156 | // met. However, MisExpect diagnostics should not prevent code from compiling, |
157 | // so we simply forgo emitting diagnostics here, and return early. |
158 | // assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) |
159 | // && "TotalBranchWeight is less than the Likely branch weight"); |
160 | if ((TotalBranchWeight == 0) || (TotalBranchWeight <= LikelyBranchWeight)) |
161 | return; |
162 | |
163 | // To determine our threshold value we need to obtain the branch probability |
164 | // for the weights added by llvm.expect and use that proportion to calculate |
165 | // our threshold based on the collected profile data. |
166 | auto LikelyProbablilty = BranchProbability::getBranchProbability( |
167 | Numerator: LikelyBranchWeight, Denominator: TotalBranchWeight); |
168 | |
169 | uint64_t ScaledThreshold = LikelyProbablilty.scale(Num: RealWeightsTotal); |
170 | |
171 | // clamp tolerance range to [0, 100) |
172 | auto Tolerance = getMisExpectTolerance(Ctx&: I.getContext()); |
173 | Tolerance = std::clamp(val: Tolerance, lo: 0u, hi: 99u); |
174 | |
175 | // Allow users to relax checking by N% i.e., if they use a 5% tolerance, |
176 | // then we check against 0.95*ScaledThreshold |
177 | if (Tolerance > 0) |
178 | ScaledThreshold *= (1.0 - Tolerance / 100.0); |
179 | |
180 | // When the profile weight is below the threshold, we emit the diagnostic |
181 | if (ProfiledWeight < ScaledThreshold) |
182 | emitMisexpectDiagnostic(I: &I, Ctx&: I.getContext(), ProfCount: ProfiledWeight, |
183 | TotalCount: RealWeightsTotal); |
184 | } |
185 | |
186 | void checkBackendInstrumentation(Instruction &I, |
187 | const ArrayRef<uint32_t> RealWeights) { |
188 | SmallVector<uint32_t> ExpectedWeights; |
189 | if (!extractBranchWeights(I, Weights&: ExpectedWeights)) |
190 | return; |
191 | verifyMisExpect(I, RealWeights, ExpectedWeights); |
192 | } |
193 | |
194 | void checkFrontendInstrumentation(Instruction &I, |
195 | const ArrayRef<uint32_t> ExpectedWeights) { |
196 | SmallVector<uint32_t> RealWeights; |
197 | if (!extractBranchWeights(I, Weights&: RealWeights)) |
198 | return; |
199 | verifyMisExpect(I, RealWeights, ExpectedWeights); |
200 | } |
201 | |
202 | void checkExpectAnnotations(Instruction &I, |
203 | const ArrayRef<uint32_t> ExistingWeights, |
204 | bool IsFrontend) { |
205 | if (IsFrontend) { |
206 | checkFrontendInstrumentation(I, ExpectedWeights: ExistingWeights); |
207 | } else { |
208 | checkBackendInstrumentation(I, RealWeights: ExistingWeights); |
209 | } |
210 | } |
211 | |
212 | } // namespace misexpect |
213 | } // namespace llvm |
214 | #undef DEBUG_TYPE |
215 | |