1 | //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the transformation that promotes indirect calls to |
10 | // conditional direct calls when the indirect-call value profile metadata is |
11 | // available. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/Statistic.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" |
19 | #include "llvm/Analysis/IndirectCallVisitor.h" |
20 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
21 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
22 | #include "llvm/IR/DiagnosticInfo.h" |
23 | #include "llvm/IR/Function.h" |
24 | #include "llvm/IR/InstrTypes.h" |
25 | #include "llvm/IR/Instructions.h" |
26 | #include "llvm/IR/LLVMContext.h" |
27 | #include "llvm/IR/MDBuilder.h" |
28 | #include "llvm/IR/PassManager.h" |
29 | #include "llvm/IR/ProfDataUtils.h" |
30 | #include "llvm/IR/Value.h" |
31 | #include "llvm/ProfileData/InstrProf.h" |
32 | #include "llvm/Support/Casting.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Support/Debug.h" |
35 | #include "llvm/Support/Error.h" |
36 | #include "llvm/Support/raw_ostream.h" |
37 | #include "llvm/Transforms/Instrumentation.h" |
38 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
39 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" |
40 | #include <cassert> |
41 | #include <cstdint> |
42 | #include <memory> |
43 | #include <string> |
44 | #include <utility> |
45 | #include <vector> |
46 | |
47 | using namespace llvm; |
48 | |
49 | #define DEBUG_TYPE "pgo-icall-prom" |
50 | |
51 | STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions." ); |
52 | STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites." ); |
53 | |
54 | // Command line option to disable indirect-call promotion with the default as |
55 | // false. This is for debug purpose. |
56 | static cl::opt<bool> DisableICP("disable-icp" , cl::init(Val: false), cl::Hidden, |
57 | cl::desc("Disable indirect call promotion" )); |
58 | |
59 | // Set the cutoff value for the promotion. If the value is other than 0, we |
60 | // stop the transformation once the total number of promotions equals the cutoff |
61 | // value. |
62 | // For debug use only. |
63 | static cl::opt<unsigned> |
64 | ICPCutOff("icp-cutoff" , cl::init(Val: 0), cl::Hidden, |
65 | cl::desc("Max number of promotions for this compilation" )); |
66 | |
67 | // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. |
68 | // For debug use only. |
69 | static cl::opt<unsigned> |
70 | ICPCSSkip("icp-csskip" , cl::init(Val: 0), cl::Hidden, |
71 | cl::desc("Skip Callsite up to this number for this compilation" )); |
72 | |
73 | // Set if the pass is called in LTO optimization. The difference for LTO mode |
74 | // is the pass won't prefix the source module name to the internal linkage |
75 | // symbols. |
76 | static cl::opt<bool> ICPLTOMode("icp-lto" , cl::init(Val: false), cl::Hidden, |
77 | cl::desc("Run indirect-call promotion in LTO " |
78 | "mode" )); |
79 | |
80 | // Set if the pass is called in SamplePGO mode. The difference for SamplePGO |
81 | // mode is it will add prof metadatato the created direct call. |
82 | static cl::opt<bool> |
83 | ICPSamplePGOMode("icp-samplepgo" , cl::init(Val: false), cl::Hidden, |
84 | cl::desc("Run indirect-call promotion in SamplePGO mode" )); |
85 | |
86 | // If the option is set to true, only call instructions will be considered for |
87 | // transformation -- invoke instructions will be ignored. |
88 | static cl::opt<bool> |
89 | ICPCallOnly("icp-call-only" , cl::init(Val: false), cl::Hidden, |
90 | cl::desc("Run indirect-call promotion for call instructions " |
91 | "only" )); |
92 | |
93 | // If the option is set to true, only invoke instructions will be considered for |
94 | // transformation -- call instructions will be ignored. |
95 | static cl::opt<bool> ICPInvokeOnly("icp-invoke-only" , cl::init(Val: false), |
96 | cl::Hidden, |
97 | cl::desc("Run indirect-call promotion for " |
98 | "invoke instruction only" )); |
99 | |
100 | // Dump the function level IR if the transformation happened in this |
101 | // function. For debug use only. |
102 | static cl::opt<bool> |
103 | ICPDUMPAFTER("icp-dumpafter" , cl::init(Val: false), cl::Hidden, |
104 | cl::desc("Dump IR after transformation happens" )); |
105 | |
106 | namespace { |
107 | |
108 | // Promote indirect calls to conditional direct calls, keeping track of |
109 | // thresholds. |
110 | class IndirectCallPromoter { |
111 | private: |
112 | Function &F; |
113 | |
114 | // Symtab that maps indirect call profile values to function names and |
115 | // defines. |
116 | InstrProfSymtab *const Symtab; |
117 | |
118 | const bool SamplePGO; |
119 | |
120 | OptimizationRemarkEmitter &ORE; |
121 | |
122 | // A struct that records the direct target and it's call count. |
123 | struct PromotionCandidate { |
124 | Function *const TargetFunction; |
125 | const uint64_t Count; |
126 | |
127 | PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} |
128 | }; |
129 | |
130 | // Check if the indirect-call call site should be promoted. Return the number |
131 | // of promotions. Inst is the candidate indirect call, ValueDataRef |
132 | // contains the array of value profile data for profiled targets, |
133 | // TotalCount is the total profiled count of call executions, and |
134 | // NumCandidates is the number of candidate entries in ValueDataRef. |
135 | std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( |
136 | const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, |
137 | uint64_t TotalCount, uint32_t NumCandidates); |
138 | |
139 | // Promote a list of targets for one indirect-call callsite by comparing |
140 | // indirect callee with functions. Returns true if there are IR |
141 | // transformations and false otherwise. |
142 | bool tryToPromoteWithFuncCmp( |
143 | CallBase &CB, const std::vector<PromotionCandidate> &Candidates, |
144 | uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef, |
145 | uint32_t NumCandidates); |
146 | |
147 | public: |
148 | (Function &Func, InstrProfSymtab *Symtab, bool SamplePGO, |
149 | OptimizationRemarkEmitter &ORE) |
150 | : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} |
151 | IndirectCallPromoter(const IndirectCallPromoter &) = delete; |
152 | IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete; |
153 | |
154 | bool processFunction(ProfileSummaryInfo *PSI); |
155 | }; |
156 | |
157 | } // end anonymous namespace |
158 | |
159 | // Indirect-call promotion heuristic. The direct targets are sorted based on |
160 | // the count. Stop at the first target that is not promoted. |
161 | std::vector<IndirectCallPromoter::PromotionCandidate> |
162 | IndirectCallPromoter::getPromotionCandidatesForCallSite( |
163 | const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, |
164 | uint64_t TotalCount, uint32_t NumCandidates) { |
165 | std::vector<PromotionCandidate> Ret; |
166 | |
167 | LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB |
168 | << " Num_targets: " << ValueDataRef.size() |
169 | << " Num_candidates: " << NumCandidates << "\n" ); |
170 | NumOfPGOICallsites++; |
171 | if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { |
172 | LLVM_DEBUG(dbgs() << " Skip: User options.\n" ); |
173 | return Ret; |
174 | } |
175 | |
176 | for (uint32_t I = 0; I < NumCandidates; I++) { |
177 | uint64_t Count = ValueDataRef[I].Count; |
178 | assert(Count <= TotalCount); |
179 | (void)TotalCount; |
180 | uint64_t Target = ValueDataRef[I].Value; |
181 | LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count |
182 | << " Target_func: " << Target << "\n" ); |
183 | |
184 | if (ICPInvokeOnly && isa<CallInst>(Val: CB)) { |
185 | LLVM_DEBUG(dbgs() << " Not promote: User options.\n" ); |
186 | ORE.emit(RemarkBuilder: [&]() { |
187 | return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions" , &CB) |
188 | << " Not promote: User options" ; |
189 | }); |
190 | break; |
191 | } |
192 | if (ICPCallOnly && isa<InvokeInst>(Val: CB)) { |
193 | LLVM_DEBUG(dbgs() << " Not promote: User option.\n" ); |
194 | ORE.emit(RemarkBuilder: [&]() { |
195 | return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions" , &CB) |
196 | << " Not promote: User options" ; |
197 | }); |
198 | break; |
199 | } |
200 | if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { |
201 | LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n" ); |
202 | ORE.emit(RemarkBuilder: [&]() { |
203 | return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached" , &CB) |
204 | << " Not promote: Cutoff reached" ; |
205 | }); |
206 | break; |
207 | } |
208 | |
209 | // Don't promote if the symbol is not defined in the module. This avoids |
210 | // creating a reference to a symbol that doesn't exist in the module |
211 | // This can happen when we compile with a sample profile collected from |
212 | // one binary but used for another, which may have profiled targets that |
213 | // aren't used in the new binary. We might have a declaration initially in |
214 | // the case where the symbol is globally dead in the binary and removed by |
215 | // ThinLTO. |
216 | Function *TargetFunction = Symtab->getFunction(FuncMD5Hash: Target); |
217 | if (TargetFunction == nullptr || TargetFunction->isDeclaration()) { |
218 | LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n" ); |
219 | ORE.emit(RemarkBuilder: [&]() { |
220 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget" , &CB) |
221 | << "Cannot promote indirect call: target with md5sum " |
222 | << ore::NV("target md5sum" , Target) << " not found" ; |
223 | }); |
224 | break; |
225 | } |
226 | |
227 | const char *Reason = nullptr; |
228 | if (!isLegalToPromote(CB, Callee: TargetFunction, FailureReason: &Reason)) { |
229 | using namespace ore; |
230 | |
231 | ORE.emit(RemarkBuilder: [&]() { |
232 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote" , &CB) |
233 | << "Cannot promote indirect call to " |
234 | << NV("TargetFunction" , TargetFunction) << " with count of " |
235 | << NV("Count" , Count) << ": " << Reason; |
236 | }); |
237 | break; |
238 | } |
239 | |
240 | Ret.push_back(x: PromotionCandidate(TargetFunction, Count)); |
241 | TotalCount -= Count; |
242 | } |
243 | return Ret; |
244 | } |
245 | |
246 | CallBase &llvm::pgo::(CallBase &CB, Function *DirectCallee, |
247 | uint64_t Count, uint64_t TotalCount, |
248 | bool AttachProfToDirectCall, |
249 | OptimizationRemarkEmitter *ORE) { |
250 | |
251 | uint64_t ElseCount = TotalCount - Count; |
252 | uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); |
253 | uint64_t Scale = calculateCountScale(MaxCount); |
254 | MDBuilder MDB(CB.getContext()); |
255 | MDNode *BranchWeights = MDB.createBranchWeights( |
256 | TrueWeight: scaleBranchCount(Count, Scale), FalseWeight: scaleBranchCount(Count: ElseCount, Scale)); |
257 | |
258 | CallBase &NewInst = |
259 | promoteCallWithIfThenElse(CB, Callee: DirectCallee, BranchWeights); |
260 | |
261 | if (AttachProfToDirectCall) { |
262 | setBranchWeights(I&: NewInst, Weights: {static_cast<uint32_t>(Count)}); |
263 | } |
264 | |
265 | using namespace ore; |
266 | |
267 | if (ORE) |
268 | ORE->emit(RemarkBuilder: [&]() { |
269 | return OptimizationRemark(DEBUG_TYPE, "Promoted" , &CB) |
270 | << "Promote indirect call to " << NV("DirectCallee" , DirectCallee) |
271 | << " with count " << NV("Count" , Count) << " out of " |
272 | << NV("TotalCount" , TotalCount); |
273 | }); |
274 | return NewInst; |
275 | } |
276 | |
277 | // Promote indirect-call to conditional direct-call for one callsite. |
278 | bool IndirectCallPromoter::tryToPromoteWithFuncCmp( |
279 | CallBase &CB, const std::vector<PromotionCandidate> &Candidates, |
280 | uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef, |
281 | uint32_t NumCandidates) { |
282 | uint32_t NumPromoted = 0; |
283 | |
284 | for (const auto &C : Candidates) { |
285 | uint64_t Count = C.Count; |
286 | pgo::promoteIndirectCall(CB, DirectCallee: C.TargetFunction, Count, TotalCount, AttachProfToDirectCall: SamplePGO, |
287 | ORE: &ORE); |
288 | assert(TotalCount >= Count); |
289 | TotalCount -= Count; |
290 | NumOfPGOICallPromotion++; |
291 | NumPromoted++; |
292 | } |
293 | |
294 | if (NumPromoted == 0) |
295 | return false; |
296 | |
297 | // Adjust the MD.prof metadata. First delete the old one. |
298 | CB.setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr); |
299 | |
300 | assert(NumPromoted <= ICallProfDataRef.size() && |
301 | "Number of promoted functions should not be greater than the number " |
302 | "of values in profile metadata" ); |
303 | // Annotate the remaining value profiles if counter is not zero. |
304 | if (TotalCount != 0) |
305 | annotateValueSite(M&: *F.getParent(), Inst&: CB, VDs: ICallProfDataRef.slice(N: NumPromoted), |
306 | Sum: TotalCount, ValueKind: IPVK_IndirectCallTarget, MaxMDCount: NumCandidates); |
307 | |
308 | return true; |
309 | } |
310 | |
311 | // Traverse all the indirect-call callsite and get the value profile |
312 | // annotation to perform indirect-call promotion. |
313 | bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { |
314 | bool Changed = false; |
315 | ICallPromotionAnalysis ICallAnalysis; |
316 | for (auto *CB : findIndirectCalls(F)) { |
317 | uint32_t NumVals, NumCandidates; |
318 | uint64_t TotalCount; |
319 | auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( |
320 | I: CB, NumVals, TotalCount, NumCandidates); |
321 | if (!NumCandidates || |
322 | (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(C: TotalCount))) |
323 | continue; |
324 | auto PromotionCandidates = getPromotionCandidatesForCallSite( |
325 | CB: *CB, ValueDataRef: ICallProfDataRef, TotalCount, NumCandidates); |
326 | Changed |= tryToPromoteWithFuncCmp(CB&: *CB, Candidates: PromotionCandidates, TotalCount, |
327 | ICallProfDataRef, NumCandidates); |
328 | } |
329 | return Changed; |
330 | } |
331 | |
332 | // A wrapper function that does the actual work. |
333 | static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, |
334 | bool SamplePGO, ModuleAnalysisManager &MAM) { |
335 | if (DisableICP) |
336 | return false; |
337 | InstrProfSymtab Symtab; |
338 | if (Error E = Symtab.create(M, InLTO)) { |
339 | std::string SymtabFailure = toString(E: std::move(E)); |
340 | M.getContext().emitError(ErrorStr: "Failed to create symtab: " + SymtabFailure); |
341 | return false; |
342 | } |
343 | bool Changed = false; |
344 | for (auto &F : M) { |
345 | if (F.isDeclaration() || F.hasOptNone()) |
346 | continue; |
347 | |
348 | auto &FAM = |
349 | MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
350 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
351 | |
352 | IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE); |
353 | bool FuncChanged = CallPromoter.processFunction(PSI); |
354 | if (ICPDUMPAFTER && FuncChanged) { |
355 | LLVM_DEBUG(dbgs() << "\n== IR Dump After ==" ; F.print(dbgs())); |
356 | LLVM_DEBUG(dbgs() << "\n" ); |
357 | } |
358 | Changed |= FuncChanged; |
359 | if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { |
360 | LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n" ); |
361 | break; |
362 | } |
363 | } |
364 | return Changed; |
365 | } |
366 | |
367 | PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, |
368 | ModuleAnalysisManager &MAM) { |
369 | ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(IR&: M); |
370 | |
371 | if (!promoteIndirectCalls(M, PSI, InLTO: InLTO | ICPLTOMode, |
372 | SamplePGO: SamplePGO | ICPSamplePGOMode, MAM)) |
373 | return PreservedAnalyses::all(); |
374 | |
375 | return PreservedAnalyses::none(); |
376 | } |
377 | |