1 | //===- bolt/Passes/IndirectCallPromotion.cpp ------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the IndirectCallPromotion class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/IndirectCallPromotion.h" |
14 | #include "bolt/Passes/BinaryFunctionCallGraph.h" |
15 | #include "bolt/Passes/DataflowInfoManager.h" |
16 | #include "bolt/Passes/Inliner.h" |
17 | #include "llvm/ADT/STLExtras.h" |
18 | #include "llvm/Support/CommandLine.h" |
19 | #include <iterator> |
20 | |
21 | #define DEBUG_TYPE "ICP" |
22 | #define DEBUG_VERBOSE(Level, X) \ |
23 | if (opts::Verbosity >= (Level)) { \ |
24 | X; \ |
25 | } |
26 | |
27 | using namespace llvm; |
28 | using namespace bolt; |
29 | |
30 | namespace opts { |
31 | |
32 | extern cl::OptionCategory BoltOptCategory; |
33 | |
34 | extern cl::opt<IndirectCallPromotionType> ICP; |
35 | extern cl::opt<unsigned> Verbosity; |
36 | extern cl::opt<unsigned> ExecutionCountThreshold; |
37 | |
38 | static cl::opt<unsigned> ICPJTRemainingPercentThreshold( |
39 | "icp-jt-remaining-percent-threshold" , |
40 | cl::desc("The percentage threshold against remaining unpromoted indirect " |
41 | "call count for the promotion for jump tables" ), |
42 | cl::init(Val: 30), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); |
43 | |
44 | static cl::opt<unsigned> ICPJTTotalPercentThreshold( |
45 | "icp-jt-total-percent-threshold" , |
46 | cl::desc( |
47 | "The percentage threshold against total count for the promotion for " |
48 | "jump tables" ), |
49 | cl::init(Val: 5), cl::Hidden, cl::cat(BoltOptCategory)); |
50 | |
51 | static cl::opt<unsigned> ICPCallsRemainingPercentThreshold( |
52 | "icp-calls-remaining-percent-threshold" , |
53 | cl::desc("The percentage threshold against remaining unpromoted indirect " |
54 | "call count for the promotion for calls" ), |
55 | cl::init(Val: 50), cl::Hidden, cl::cat(BoltOptCategory)); |
56 | |
57 | static cl::opt<unsigned> ICPCallsTotalPercentThreshold( |
58 | "icp-calls-total-percent-threshold" , |
59 | cl::desc( |
60 | "The percentage threshold against total count for the promotion for " |
61 | "calls" ), |
62 | cl::init(Val: 30), cl::Hidden, cl::cat(BoltOptCategory)); |
63 | |
64 | static cl::opt<unsigned> ICPMispredictThreshold( |
65 | "indirect-call-promotion-mispredict-threshold" , |
66 | cl::desc("misprediction threshold for skipping ICP on an " |
67 | "indirect call" ), |
68 | cl::init(Val: 0), cl::cat(BoltOptCategory)); |
69 | |
70 | static cl::alias ICPMispredictThresholdAlias( |
71 | "icp-mp-threshold" , |
72 | cl::desc("alias for --indirect-call-promotion-mispredict-threshold" ), |
73 | cl::aliasopt(ICPMispredictThreshold)); |
74 | |
75 | static cl::opt<bool> ICPUseMispredicts( |
76 | "indirect-call-promotion-use-mispredicts" , |
77 | cl::desc("use misprediction frequency for determining whether or not ICP " |
78 | "should be applied at a callsite. The " |
79 | "-indirect-call-promotion-mispredict-threshold value will be used " |
80 | "by this heuristic" ), |
81 | cl::cat(BoltOptCategory)); |
82 | |
83 | static cl::alias ICPUseMispredictsAlias( |
84 | "icp-use-mp" , |
85 | cl::desc("alias for --indirect-call-promotion-use-mispredicts" ), |
86 | cl::aliasopt(ICPUseMispredicts)); |
87 | |
88 | static cl::opt<unsigned> |
89 | ICPTopN("indirect-call-promotion-topn" , |
90 | cl::desc("limit number of targets to consider when doing indirect " |
91 | "call promotion. 0 = no limit" ), |
92 | cl::init(Val: 3), cl::cat(BoltOptCategory)); |
93 | |
94 | static cl::alias |
95 | ICPTopNAlias("icp-topn" , |
96 | cl::desc("alias for --indirect-call-promotion-topn" ), |
97 | cl::aliasopt(ICPTopN)); |
98 | |
99 | static cl::opt<unsigned> ICPCallsTopN( |
100 | "indirect-call-promotion-calls-topn" , |
101 | cl::desc("limit number of targets to consider when doing indirect " |
102 | "call promotion on calls. 0 = no limit" ), |
103 | cl::init(Val: 0), cl::cat(BoltOptCategory)); |
104 | |
105 | static cl::alias ICPCallsTopNAlias( |
106 | "icp-calls-topn" , |
107 | cl::desc("alias for --indirect-call-promotion-calls-topn" ), |
108 | cl::aliasopt(ICPCallsTopN)); |
109 | |
110 | static cl::opt<unsigned> ICPJumpTablesTopN( |
111 | "indirect-call-promotion-jump-tables-topn" , |
112 | cl::desc("limit number of targets to consider when doing indirect " |
113 | "call promotion on jump tables. 0 = no limit" ), |
114 | cl::init(Val: 0), cl::cat(BoltOptCategory)); |
115 | |
116 | static cl::alias ICPJumpTablesTopNAlias( |
117 | "icp-jt-topn" , |
118 | cl::desc("alias for --indirect-call-promotion-jump-tables-topn" ), |
119 | cl::aliasopt(ICPJumpTablesTopN)); |
120 | |
121 | static cl::opt<bool> EliminateLoads( |
122 | "icp-eliminate-loads" , |
123 | cl::desc("enable load elimination using memory profiling data when " |
124 | "performing ICP" ), |
125 | cl::init(Val: true), cl::cat(BoltOptCategory)); |
126 | |
127 | static cl::opt<unsigned> ICPTopCallsites( |
128 | "icp-top-callsites" , |
129 | cl::desc("optimize hottest calls until at least this percentage of all " |
130 | "indirect calls frequency is covered. 0 = all callsites" ), |
131 | cl::init(Val: 99), cl::Hidden, cl::cat(BoltOptCategory)); |
132 | |
133 | static cl::list<std::string> |
134 | ICPFuncsList("icp-funcs" , cl::CommaSeparated, |
135 | cl::desc("list of functions to enable ICP for" ), |
136 | cl::value_desc("func1,func2,func3,..." ), cl::Hidden, |
137 | cl::cat(BoltOptCategory)); |
138 | |
139 | static cl::opt<bool> |
140 | ICPOldCodeSequence("icp-old-code-sequence" , |
141 | cl::desc("use old code sequence for promoted calls" ), |
142 | cl::Hidden, cl::cat(BoltOptCategory)); |
143 | |
144 | static cl::opt<bool> ICPJumpTablesByTarget( |
145 | "icp-jump-tables-targets" , |
146 | cl::desc( |
147 | "for jump tables, optimize indirect jmp targets instead of indices" ), |
148 | cl::Hidden, cl::cat(BoltOptCategory)); |
149 | |
150 | static cl::alias |
151 | ICPJumpTablesByTargetAlias("icp-jt-targets" , |
152 | cl::desc("alias for --icp-jump-tables-targets" ), |
153 | cl::aliasopt(ICPJumpTablesByTarget)); |
154 | |
155 | static cl::opt<bool> ICPPeelForInline( |
156 | "icp-inline" , cl::desc("only promote call targets eligible for inlining" ), |
157 | cl::Hidden, cl::cat(BoltOptCategory)); |
158 | |
159 | } // namespace opts |
160 | |
161 | #ifndef NDEBUG |
162 | static bool verifyProfile(std::map<uint64_t, BinaryFunction> &BFs) { |
163 | bool IsValid = true; |
164 | for (auto &BFI : BFs) { |
165 | BinaryFunction &BF = BFI.second; |
166 | if (!BF.isSimple()) |
167 | continue; |
168 | for (const BinaryBasicBlock &BB : BF) { |
169 | auto BI = BB.branch_info_begin(); |
170 | for (BinaryBasicBlock *SuccBB : BB.successors()) { |
171 | if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && BI->Count > 0) { |
172 | if (BB.getKnownExecutionCount() == 0 || |
173 | SuccBB->getKnownExecutionCount() == 0) { |
174 | BF.getBinaryContext().errs() |
175 | << "BOLT-WARNING: profile verification failed after ICP for " |
176 | "function " |
177 | << BF << '\n'; |
178 | IsValid = false; |
179 | } |
180 | } |
181 | ++BI; |
182 | } |
183 | } |
184 | } |
185 | return IsValid; |
186 | } |
187 | #endif |
188 | |
189 | namespace llvm { |
190 | namespace bolt { |
191 | |
192 | IndirectCallPromotion::Callsite::Callsite(BinaryFunction &BF, |
193 | const IndirectCallProfile &ICP) |
194 | : From(BF.getSymbol()), To(ICP.Offset), Mispreds(ICP.Mispreds), |
195 | Branches(ICP.Count) { |
196 | if (ICP.Symbol) { |
197 | To.Sym = ICP.Symbol; |
198 | To.Addr = 0; |
199 | } |
200 | } |
201 | |
202 | void IndirectCallPromotion::printDecision( |
203 | llvm::raw_ostream &OS, |
204 | std::vector<IndirectCallPromotion::Callsite> &Targets, unsigned N) const { |
205 | uint64_t TotalCount = 0; |
206 | uint64_t TotalMispreds = 0; |
207 | for (const Callsite &S : Targets) { |
208 | TotalCount += S.Branches; |
209 | TotalMispreds += S.Mispreds; |
210 | } |
211 | if (!TotalCount) |
212 | TotalCount = 1; |
213 | if (!TotalMispreds) |
214 | TotalMispreds = 1; |
215 | |
216 | OS << "BOLT-INFO: ICP decision for call site with " << Targets.size() |
217 | << " targets, Count = " << TotalCount << ", Mispreds = " << TotalMispreds |
218 | << "\n" ; |
219 | |
220 | size_t I = 0; |
221 | for (const Callsite &S : Targets) { |
222 | OS << "Count = " << S.Branches << ", " |
223 | << format(Fmt: "%.1f" , Vals: (100.0 * S.Branches) / TotalCount) << ", " |
224 | << "Mispreds = " << S.Mispreds << ", " |
225 | << format(Fmt: "%.1f" , Vals: (100.0 * S.Mispreds) / TotalMispreds); |
226 | if (I < N) |
227 | OS << " * to be optimized *" ; |
228 | if (!S.JTIndices.empty()) { |
229 | OS << " Indices:" ; |
230 | for (const uint64_t Idx : S.JTIndices) |
231 | OS << " " << Idx; |
232 | } |
233 | OS << "\n" ; |
234 | I += S.JTIndices.empty() ? 1 : S.JTIndices.size(); |
235 | } |
236 | } |
237 | |
238 | // Get list of targets for a given call sorted by most frequently |
239 | // called first. |
240 | std::vector<IndirectCallPromotion::Callsite> |
241 | IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB, |
242 | const MCInst &Inst) const { |
243 | BinaryFunction &BF = *BB.getFunction(); |
244 | const BinaryContext &BC = BF.getBinaryContext(); |
245 | std::vector<Callsite> Targets; |
246 | |
247 | if (const JumpTable *JT = BF.getJumpTable(Inst)) { |
248 | // Don't support PIC jump tables for now |
249 | if (!opts::ICPJumpTablesByTarget && JT->Type == JumpTable::JTT_PIC) |
250 | return Targets; |
251 | const Location From(BF.getSymbol()); |
252 | const std::pair<size_t, size_t> Range = |
253 | JT->getEntriesForAddress(Addr: BC.MIB->getJumpTable(Inst)); |
254 | assert(JT->Counts.empty() || JT->Counts.size() >= Range.second); |
255 | JumpTable::JumpInfo DefaultJI; |
256 | const JumpTable::JumpInfo *JI = |
257 | JT->Counts.empty() ? &DefaultJI : &JT->Counts[Range.first]; |
258 | const size_t JIAdj = JT->Counts.empty() ? 0 : 1; |
259 | assert(JT->Type == JumpTable::JTT_PIC || |
260 | JT->EntrySize == BC.AsmInfo->getCodePointerSize()); |
261 | for (size_t I = Range.first; I < Range.second; ++I, JI += JIAdj) { |
262 | MCSymbol *Entry = JT->Entries[I]; |
263 | const BinaryBasicBlock *ToBB = BF.getBasicBlockForLabel(Label: Entry); |
264 | assert(ToBB || Entry == BF.getFunctionEndLabel() || |
265 | Entry == BF.getFunctionEndLabel(FragmentNum::cold())); |
266 | if (Entry == BF.getFunctionEndLabel() || |
267 | Entry == BF.getFunctionEndLabel(Fragment: FragmentNum::cold())) |
268 | continue; |
269 | const Location To(Entry); |
270 | const BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(Succ: *ToBB); |
271 | Targets.emplace_back(args: From, args: To, args: BI.MispredictedCount, args: BI.Count, |
272 | args: I - Range.first); |
273 | } |
274 | |
275 | // Sort by symbol then addr. |
276 | llvm::sort(C&: Targets, Comp: [](const Callsite &A, const Callsite &B) { |
277 | if (A.To.Sym && B.To.Sym) |
278 | return A.To.Sym < B.To.Sym; |
279 | else if (A.To.Sym && !B.To.Sym) |
280 | return true; |
281 | else if (!A.To.Sym && B.To.Sym) |
282 | return false; |
283 | else |
284 | return A.To.Addr < B.To.Addr; |
285 | }); |
286 | |
287 | // Targets may contain multiple entries to the same target, but using |
288 | // different indices. Their profile will report the same number of branches |
289 | // for different indices if the target is the same. That's because we don't |
290 | // profile the index value, but only the target via LBR. |
291 | auto First = Targets.begin(); |
292 | auto Last = Targets.end(); |
293 | auto Result = First; |
294 | while (++First != Last) { |
295 | Callsite &A = *Result; |
296 | const Callsite &B = *First; |
297 | if (A.To.Sym && B.To.Sym && A.To.Sym == B.To.Sym) |
298 | A.JTIndices.insert(position: A.JTIndices.end(), first: B.JTIndices.begin(), |
299 | last: B.JTIndices.end()); |
300 | else |
301 | *(++Result) = *First; |
302 | } |
303 | ++Result; |
304 | |
305 | LLVM_DEBUG(if (Targets.end() - Result > 0) { |
306 | dbgs() << "BOLT-INFO: ICP: " << (Targets.end() - Result) |
307 | << " duplicate targets removed\n" ; |
308 | }); |
309 | |
310 | Targets.erase(first: Result, last: Targets.end()); |
311 | } else { |
312 | // Don't try to optimize PC relative indirect calls. |
313 | if (Inst.getOperand(i: 0).isReg() && |
314 | Inst.getOperand(i: 0).getReg() == BC.MRI->getProgramCounter()) |
315 | return Targets; |
316 | |
317 | const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>( |
318 | Inst, Name: "CallProfile" ); |
319 | if (ICSP) { |
320 | for (const IndirectCallProfile &CSP : ICSP.get()) { |
321 | Callsite Site(BF, CSP); |
322 | if (Site.isValid()) |
323 | Targets.emplace_back(args: std::move(Site)); |
324 | } |
325 | } |
326 | } |
327 | |
328 | // Sort by target count, number of indices in case of jump table, and |
329 | // mispredicts. We prioritize targets with high count, small number of indices |
330 | // and high mispredicts. Break ties by selecting targets with lower addresses. |
331 | llvm::stable_sort(Range&: Targets, C: [](const Callsite &A, const Callsite &B) { |
332 | if (A.Branches != B.Branches) |
333 | return A.Branches > B.Branches; |
334 | if (A.JTIndices.size() != B.JTIndices.size()) |
335 | return A.JTIndices.size() < B.JTIndices.size(); |
336 | if (A.Mispreds != B.Mispreds) |
337 | return A.Mispreds > B.Mispreds; |
338 | return A.To.Addr < B.To.Addr; |
339 | }); |
340 | |
341 | // Remove non-symbol targets |
342 | llvm::erase_if(C&: Targets, P: [](const Callsite &CS) { return !CS.To.Sym; }); |
343 | |
344 | LLVM_DEBUG(if (BF.getJumpTable(Inst)) { |
345 | uint64_t TotalCount = 0; |
346 | uint64_t TotalMispreds = 0; |
347 | for (const Callsite &S : Targets) { |
348 | TotalCount += S.Branches; |
349 | TotalMispreds += S.Mispreds; |
350 | } |
351 | if (!TotalCount) |
352 | TotalCount = 1; |
353 | if (!TotalMispreds) |
354 | TotalMispreds = 1; |
355 | |
356 | dbgs() << "BOLT-INFO: ICP: jump table size = " << Targets.size() |
357 | << ", Count = " << TotalCount << ", Mispreds = " << TotalMispreds |
358 | << "\n" ; |
359 | |
360 | size_t I = 0; |
361 | for (const Callsite &S : Targets) { |
362 | dbgs() << "Count[" << I << "] = " << S.Branches << ", " |
363 | << format("%.1f" , (100.0 * S.Branches) / TotalCount) << ", " |
364 | << "Mispreds[" << I << "] = " << S.Mispreds << ", " |
365 | << format("%.1f" , (100.0 * S.Mispreds) / TotalMispreds) << "\n" ; |
366 | ++I; |
367 | } |
368 | }); |
369 | |
370 | return Targets; |
371 | } |
372 | |
373 | IndirectCallPromotion::JumpTableInfoType |
374 | IndirectCallPromotion::maybeGetHotJumpTableTargets(BinaryBasicBlock &BB, |
375 | MCInst &CallInst, |
376 | MCInst *&TargetFetchInst, |
377 | const JumpTable *JT) const { |
378 | assert(JT && "Can't get jump table addrs for non-jump tables." ); |
379 | |
380 | BinaryFunction &Function = *BB.getFunction(); |
381 | BinaryContext &BC = Function.getBinaryContext(); |
382 | |
383 | if (!Function.hasMemoryProfile() || !opts::EliminateLoads) |
384 | return JumpTableInfoType(); |
385 | |
386 | JumpTableInfoType HotTargets; |
387 | MCInst *MemLocInstr; |
388 | MCInst *PCRelBaseOut; |
389 | unsigned BaseReg, IndexReg; |
390 | int64_t DispValue; |
391 | const MCExpr *DispExpr; |
392 | MutableArrayRef<MCInst> Insts(&BB.front(), &CallInst); |
393 | const IndirectBranchType Type = BC.MIB->analyzeIndirectBranch( |
394 | Instruction&: CallInst, Begin: Insts.begin(), End: Insts.end(), PtrSize: BC.AsmInfo->getCodePointerSize(), |
395 | MemLocInstr, BaseRegNum&: BaseReg, IndexRegNum&: IndexReg, DispValue, DispExpr, PCRelBaseOut); |
396 | |
397 | assert(MemLocInstr && "There should always be a load for jump tables" ); |
398 | if (!MemLocInstr) |
399 | return JumpTableInfoType(); |
400 | |
401 | LLVM_DEBUG({ |
402 | dbgs() << "BOLT-INFO: ICP attempting to find memory profiling data for " |
403 | << "jump table in " << Function << " at @ " |
404 | << (&CallInst - &BB.front()) << "\n" |
405 | << "BOLT-INFO: ICP target fetch instructions:\n" ; |
406 | BC.printInstruction(dbgs(), *MemLocInstr, 0, &Function); |
407 | if (MemLocInstr != &CallInst) |
408 | BC.printInstruction(dbgs(), CallInst, 0, &Function); |
409 | }); |
410 | |
411 | DEBUG_VERBOSE(1, { |
412 | dbgs() << "Jmp info: Type = " << (unsigned)Type << ", " |
413 | << "BaseReg = " << BC.MRI->getName(BaseReg) << ", " |
414 | << "IndexReg = " << BC.MRI->getName(IndexReg) << ", " |
415 | << "DispValue = " << Twine::utohexstr(DispValue) << ", " |
416 | << "DispExpr = " << DispExpr << ", " |
417 | << "MemLocInstr = " ; |
418 | BC.printInstruction(dbgs(), *MemLocInstr, 0, &Function); |
419 | dbgs() << "\n" ; |
420 | }); |
421 | |
422 | ++TotalIndexBasedCandidates; |
423 | |
424 | auto ErrorOrMemAccessProfile = |
425 | BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(Inst&: *MemLocInstr, |
426 | Name: "MemoryAccessProfile" ); |
427 | if (!ErrorOrMemAccessProfile) { |
428 | DEBUG_VERBOSE(1, dbgs() |
429 | << "BOLT-INFO: ICP no memory profiling data found\n" ); |
430 | return JumpTableInfoType(); |
431 | } |
432 | MemoryAccessProfile &MemAccessProfile = ErrorOrMemAccessProfile.get(); |
433 | |
434 | uint64_t ArrayStart; |
435 | if (DispExpr) { |
436 | ErrorOr<uint64_t> DispValueOrError = |
437 | BC.getSymbolValue(Symbol: *BC.MIB->getTargetSymbol(Expr: DispExpr)); |
438 | assert(DispValueOrError && "global symbol needs a value" ); |
439 | ArrayStart = *DispValueOrError; |
440 | } else { |
441 | ArrayStart = static_cast<uint64_t>(DispValue); |
442 | } |
443 | |
444 | if (BaseReg == BC.MRI->getProgramCounter()) |
445 | ArrayStart += Function.getAddress() + MemAccessProfile.NextInstrOffset; |
446 | |
447 | // This is a map of [symbol] -> [count, index] and is used to combine indices |
448 | // into the jump table since there may be multiple addresses that all have the |
449 | // same entry. |
450 | std::map<MCSymbol *, std::pair<uint64_t, uint64_t>> HotTargetMap; |
451 | const std::pair<size_t, size_t> Range = JT->getEntriesForAddress(Addr: ArrayStart); |
452 | |
453 | for (const AddressAccess &AccessInfo : MemAccessProfile.AddressAccessInfo) { |
454 | size_t Index; |
455 | // Mem data occasionally includes nullprs, ignore them. |
456 | if (!AccessInfo.MemoryObject && !AccessInfo.Offset) |
457 | continue; |
458 | |
459 | if (AccessInfo.Offset % JT->EntrySize != 0) // ignore bogus data |
460 | return JumpTableInfoType(); |
461 | |
462 | if (AccessInfo.MemoryObject) { |
463 | // Deal with bad/stale data |
464 | if (!AccessInfo.MemoryObject->getName().starts_with( |
465 | Prefix: "JUMP_TABLE/" + Function.getOneName().str())) |
466 | return JumpTableInfoType(); |
467 | Index = |
468 | (AccessInfo.Offset - (ArrayStart - JT->getAddress())) / JT->EntrySize; |
469 | } else { |
470 | Index = (AccessInfo.Offset - ArrayStart) / JT->EntrySize; |
471 | } |
472 | |
473 | // If Index is out of range it probably means the memory profiling data is |
474 | // wrong for this instruction, bail out. |
475 | if (Index >= Range.second) { |
476 | LLVM_DEBUG(dbgs() << "BOLT-INFO: Index out of range of " << Range.first |
477 | << ", " << Range.second << "\n" ); |
478 | return JumpTableInfoType(); |
479 | } |
480 | |
481 | // Make sure the hot index points at a legal label corresponding to a BB, |
482 | // e.g. not the end of function (unreachable) label. |
483 | if (!Function.getBasicBlockForLabel(Label: JT->Entries[Index + Range.first])) { |
484 | LLVM_DEBUG({ |
485 | dbgs() << "BOLT-INFO: hot index " << Index << " pointing at bogus " |
486 | << "label " << JT->Entries[Index + Range.first]->getName() |
487 | << " in jump table:\n" ; |
488 | JT->print(dbgs()); |
489 | dbgs() << "HotTargetMap:\n" ; |
490 | for (std::pair<MCSymbol *const, std::pair<uint64_t, uint64_t>> &HT : |
491 | HotTargetMap) |
492 | dbgs() << "BOLT-INFO: " << HT.first->getName() |
493 | << " = (count=" << HT.second.first |
494 | << ", index=" << HT.second.second << ")\n" ; |
495 | }); |
496 | return JumpTableInfoType(); |
497 | } |
498 | |
499 | std::pair<uint64_t, uint64_t> &HotTarget = |
500 | HotTargetMap[JT->Entries[Index + Range.first]]; |
501 | HotTarget.first += AccessInfo.Count; |
502 | HotTarget.second = Index; |
503 | } |
504 | |
505 | llvm::copy(Range: llvm::make_second_range(c&: HotTargetMap), |
506 | Out: std::back_inserter(x&: HotTargets)); |
507 | |
508 | // Sort with highest counts first. |
509 | llvm::sort(C: reverse(C&: HotTargets)); |
510 | |
511 | LLVM_DEBUG({ |
512 | dbgs() << "BOLT-INFO: ICP jump table hot targets:\n" ; |
513 | for (const std::pair<uint64_t, uint64_t> &Target : HotTargets) |
514 | dbgs() << "BOLT-INFO: Idx = " << Target.second << ", " |
515 | << "Count = " << Target.first << "\n" ; |
516 | }); |
517 | |
518 | BC.MIB->getOrCreateAnnotationAs<uint16_t>(Inst&: CallInst, Name: "JTIndexReg" ) = IndexReg; |
519 | |
520 | TargetFetchInst = MemLocInstr; |
521 | |
522 | return HotTargets; |
523 | } |
524 | |
525 | IndirectCallPromotion::SymTargetsType |
526 | IndirectCallPromotion::findCallTargetSymbols(std::vector<Callsite> &Targets, |
527 | size_t &N, BinaryBasicBlock &BB, |
528 | MCInst &CallInst, |
529 | MCInst *&TargetFetchInst) const { |
530 | const BinaryContext &BC = BB.getFunction()->getBinaryContext(); |
531 | const JumpTable *JT = BB.getFunction()->getJumpTable(Inst: CallInst); |
532 | SymTargetsType SymTargets; |
533 | |
534 | if (!JT) { |
535 | for (size_t I = 0; I < N; ++I) { |
536 | assert(Targets[I].To.Sym && "All ICP targets must be to known symbols" ); |
537 | assert(Targets[I].JTIndices.empty() && |
538 | "Can't have jump table indices for non-jump tables" ); |
539 | SymTargets.emplace_back(args&: Targets[I].To.Sym, args: 0); |
540 | } |
541 | return SymTargets; |
542 | } |
543 | |
544 | // Use memory profile to select hot targets. |
545 | JumpTableInfoType HotTargets = |
546 | maybeGetHotJumpTableTargets(BB, CallInst, TargetFetchInst, JT); |
547 | |
548 | auto findTargetsIndex = [&](uint64_t JTIndex) { |
549 | for (size_t I = 0; I < Targets.size(); ++I) |
550 | if (llvm::is_contained(Range&: Targets[I].JTIndices, Element: JTIndex)) |
551 | return I; |
552 | LLVM_DEBUG(dbgs() << "BOLT-ERROR: Unable to find target index for hot jump " |
553 | << " table entry in " << *BB.getFunction() << "\n" ); |
554 | llvm_unreachable("Hot indices must be referred to by at least one " |
555 | "callsite" ); |
556 | }; |
557 | |
558 | if (!HotTargets.empty()) { |
559 | if (opts::Verbosity >= 1) |
560 | for (size_t I = 0; I < HotTargets.size(); ++I) |
561 | BC.outs() << "BOLT-INFO: HotTarget[" << I << "] = (" |
562 | << HotTargets[I].first << ", " << HotTargets[I].second |
563 | << ")\n" ; |
564 | |
565 | // Recompute hottest targets, now discriminating which index is hot |
566 | // NOTE: This is a tradeoff. On one hand, we get index information. On the |
567 | // other hand, info coming from the memory profile is much less accurate |
568 | // than LBRs. So we may actually end up working with more coarse |
569 | // profile granularity in exchange for information about indices. |
570 | std::vector<Callsite> NewTargets; |
571 | std::map<const MCSymbol *, uint32_t> IndicesPerTarget; |
572 | uint64_t TotalMemAccesses = 0; |
573 | for (size_t I = 0; I < HotTargets.size(); ++I) { |
574 | const uint64_t TargetIndex = findTargetsIndex(HotTargets[I].second); |
575 | ++IndicesPerTarget[Targets[TargetIndex].To.Sym]; |
576 | TotalMemAccesses += HotTargets[I].first; |
577 | } |
578 | uint64_t RemainingMemAccesses = TotalMemAccesses; |
579 | const size_t TopN = |
580 | opts::ICPJumpTablesTopN ? opts::ICPJumpTablesTopN : opts::ICPTopN; |
581 | size_t I = 0; |
582 | for (; I < HotTargets.size(); ++I) { |
583 | const uint64_t MemAccesses = HotTargets[I].first; |
584 | if (100 * MemAccesses < |
585 | TotalMemAccesses * opts::ICPJTTotalPercentThreshold) |
586 | break; |
587 | if (100 * MemAccesses < |
588 | RemainingMemAccesses * opts::ICPJTRemainingPercentThreshold) |
589 | break; |
590 | if (TopN && I >= TopN) |
591 | break; |
592 | RemainingMemAccesses -= MemAccesses; |
593 | |
594 | const uint64_t JTIndex = HotTargets[I].second; |
595 | Callsite &Target = Targets[findTargetsIndex(JTIndex)]; |
596 | |
597 | NewTargets.push_back(x: Target); |
598 | std::vector<uint64_t>({JTIndex}).swap(x&: NewTargets.back().JTIndices); |
599 | llvm::erase(C&: Target.JTIndices, V: JTIndex); |
600 | |
601 | // Keep fixCFG counts sane if more indices use this same target later |
602 | assert(IndicesPerTarget[Target.To.Sym] > 0 && "wrong map" ); |
603 | NewTargets.back().Branches = |
604 | Target.Branches / IndicesPerTarget[Target.To.Sym]; |
605 | NewTargets.back().Mispreds = |
606 | Target.Mispreds / IndicesPerTarget[Target.To.Sym]; |
607 | assert(Target.Branches >= NewTargets.back().Branches); |
608 | assert(Target.Mispreds >= NewTargets.back().Mispreds); |
609 | Target.Branches -= NewTargets.back().Branches; |
610 | Target.Mispreds -= NewTargets.back().Mispreds; |
611 | } |
612 | llvm::copy(Range&: Targets, Out: std::back_inserter(x&: NewTargets)); |
613 | std::swap(x&: NewTargets, y&: Targets); |
614 | N = I; |
615 | |
616 | if (N == 0 && opts::Verbosity >= 1) { |
617 | BC.outs() << "BOLT-INFO: ICP failed in " << *BB.getFunction() << " in " |
618 | << BB.getName() << ": failed to meet thresholds after memory " |
619 | << "profile data was loaded.\n" ; |
620 | return SymTargets; |
621 | } |
622 | } |
623 | |
624 | for (size_t I = 0, TgtIdx = 0; I < N; ++TgtIdx) { |
625 | Callsite &Target = Targets[TgtIdx]; |
626 | assert(Target.To.Sym && "All ICP targets must be to known symbols" ); |
627 | assert(!Target.JTIndices.empty() && "Jump tables must have indices" ); |
628 | for (uint64_t Idx : Target.JTIndices) { |
629 | SymTargets.emplace_back(args&: Target.To.Sym, args&: Idx); |
630 | ++I; |
631 | } |
632 | } |
633 | |
634 | return SymTargets; |
635 | } |
636 | |
637 | IndirectCallPromotion::MethodInfoType IndirectCallPromotion::maybeGetVtableSyms( |
638 | BinaryBasicBlock &BB, MCInst &Inst, |
639 | const SymTargetsType &SymTargets) const { |
640 | BinaryFunction &Function = *BB.getFunction(); |
641 | BinaryContext &BC = Function.getBinaryContext(); |
642 | std::vector<std::pair<MCSymbol *, uint64_t>> VtableSyms; |
643 | std::vector<MCInst *> MethodFetchInsns; |
644 | unsigned VtableReg, MethodReg; |
645 | uint64_t MethodOffset; |
646 | |
647 | assert(!Function.getJumpTable(Inst) && |
648 | "Can't get vtable addrs for jump tables." ); |
649 | |
650 | if (!Function.hasMemoryProfile() || !opts::EliminateLoads) |
651 | return MethodInfoType(); |
652 | |
653 | MutableArrayRef<MCInst> Insts(&BB.front(), &Inst + 1); |
654 | if (!BC.MIB->analyzeVirtualMethodCall(Begin: Insts.begin(), End: Insts.end(), |
655 | MethodFetchInsns, VtableRegNum&: VtableReg, BaseRegNum&: MethodReg, |
656 | MethodOffset)) { |
657 | DEBUG_VERBOSE( |
658 | 1, dbgs() << "BOLT-INFO: ICP unable to analyze method call in " |
659 | << Function << " at @ " << (&Inst - &BB.front()) << "\n" ); |
660 | return MethodInfoType(); |
661 | } |
662 | |
663 | ++TotalMethodLoadEliminationCandidates; |
664 | |
665 | DEBUG_VERBOSE(1, { |
666 | dbgs() << "BOLT-INFO: ICP found virtual method call in " << Function |
667 | << " at @ " << (&Inst - &BB.front()) << "\n" ; |
668 | dbgs() << "BOLT-INFO: ICP method fetch instructions:\n" ; |
669 | for (MCInst *Inst : MethodFetchInsns) |
670 | BC.printInstruction(dbgs(), *Inst, 0, &Function); |
671 | |
672 | if (MethodFetchInsns.back() != &Inst) |
673 | BC.printInstruction(dbgs(), Inst, 0, &Function); |
674 | }); |
675 | |
676 | // Try to get value profiling data for the method load instruction. |
677 | auto ErrorOrMemAccessProfile = |
678 | BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>(Inst&: *MethodFetchInsns.back(), |
679 | Name: "MemoryAccessProfile" ); |
680 | if (!ErrorOrMemAccessProfile) { |
681 | DEBUG_VERBOSE(1, dbgs() |
682 | << "BOLT-INFO: ICP no memory profiling data found\n" ); |
683 | return MethodInfoType(); |
684 | } |
685 | MemoryAccessProfile &MemAccessProfile = ErrorOrMemAccessProfile.get(); |
686 | |
687 | // Find the vtable that each method belongs to. |
688 | std::map<const MCSymbol *, uint64_t> MethodToVtable; |
689 | |
690 | for (const AddressAccess &AccessInfo : MemAccessProfile.AddressAccessInfo) { |
691 | uint64_t Address = AccessInfo.Offset; |
692 | if (AccessInfo.MemoryObject) |
693 | Address += AccessInfo.MemoryObject->getAddress(); |
694 | |
695 | // Ignore bogus data. |
696 | if (!Address) |
697 | continue; |
698 | |
699 | const uint64_t VtableBase = Address - MethodOffset; |
700 | |
701 | DEBUG_VERBOSE(1, dbgs() << "BOLT-INFO: ICP vtable = " |
702 | << Twine::utohexstr(VtableBase) << "+" |
703 | << MethodOffset << "/" << AccessInfo.Count << "\n" ); |
704 | |
705 | if (ErrorOr<uint64_t> MethodAddr = BC.getPointerAtAddress(Address)) { |
706 | BinaryData *MethodBD = BC.getBinaryDataAtAddress(Address: MethodAddr.get()); |
707 | if (!MethodBD) // skip unknown methods |
708 | continue; |
709 | MCSymbol *MethodSym = MethodBD->getSymbol(); |
710 | MethodToVtable[MethodSym] = VtableBase; |
711 | DEBUG_VERBOSE(1, { |
712 | const BinaryFunction *Method = BC.getFunctionForSymbol(MethodSym); |
713 | dbgs() << "BOLT-INFO: ICP found method = " |
714 | << Twine::utohexstr(MethodAddr.get()) << "/" |
715 | << (Method ? Method->getPrintName() : "" ) << "\n" ; |
716 | }); |
717 | } |
718 | } |
719 | |
720 | // Find the vtable for each target symbol. |
721 | for (size_t I = 0; I < SymTargets.size(); ++I) { |
722 | auto Itr = MethodToVtable.find(x: SymTargets[I].first); |
723 | if (Itr != MethodToVtable.end()) { |
724 | if (BinaryData *BD = BC.getBinaryDataContainingAddress(Address: Itr->second)) { |
725 | const uint64_t Addend = Itr->second - BD->getAddress(); |
726 | VtableSyms.emplace_back(args: BD->getSymbol(), args: Addend); |
727 | continue; |
728 | } |
729 | } |
730 | // Give up if we can't find the vtable for a method. |
731 | DEBUG_VERBOSE(1, dbgs() << "BOLT-INFO: ICP can't find vtable for " |
732 | << SymTargets[I].first->getName() << "\n" ); |
733 | return MethodInfoType(); |
734 | } |
735 | |
736 | // Make sure the vtable reg is not clobbered by the argument passing code |
737 | if (VtableReg != MethodReg) { |
738 | for (MCInst *CurInst = MethodFetchInsns.front(); CurInst < &Inst; |
739 | ++CurInst) { |
740 | const MCInstrDesc &InstrInfo = BC.MII->get(Opcode: CurInst->getOpcode()); |
741 | if (InstrInfo.hasDefOfPhysReg(MI: *CurInst, Reg: VtableReg, RI: *BC.MRI)) |
742 | return MethodInfoType(); |
743 | } |
744 | } |
745 | |
746 | return MethodInfoType(VtableSyms, MethodFetchInsns); |
747 | } |
748 | |
749 | std::vector<std::unique_ptr<BinaryBasicBlock>> |
750 | IndirectCallPromotion::rewriteCall( |
751 | BinaryBasicBlock &IndCallBlock, const MCInst &CallInst, |
752 | MCPlusBuilder::BlocksVectorTy &&ICPcode, |
753 | const std::vector<MCInst *> &MethodFetchInsns) const { |
754 | BinaryFunction &Function = *IndCallBlock.getFunction(); |
755 | MCPlusBuilder *MIB = Function.getBinaryContext().MIB.get(); |
756 | |
757 | // Create new basic blocks with correct code in each one first. |
758 | std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs; |
759 | const bool IsTailCallOrJT = |
760 | (MIB->isTailCall(Inst: CallInst) || Function.getJumpTable(Inst: CallInst)); |
761 | |
762 | // If we are tracking the indirect call/jump address, propagate the address to |
763 | // the ICP code. |
764 | const std::optional<uint32_t> IndirectInstrOffset = MIB->getOffset(Inst: CallInst); |
765 | if (IndirectInstrOffset) { |
766 | for (auto &[Symbol, Instructions] : ICPcode) |
767 | for (MCInst &Inst : Instructions) |
768 | MIB->setOffset(Inst, Offset: *IndirectInstrOffset); |
769 | } |
770 | |
771 | // Move instructions from the tail of the original call block |
772 | // to the merge block. |
773 | |
774 | // Remember any pseudo instructions following a tail call. These |
775 | // must be preserved and moved to the original block. |
776 | InstructionListType TailInsts; |
777 | const MCInst *TailInst = &CallInst; |
778 | if (IsTailCallOrJT) |
779 | while (TailInst + 1 < &(*IndCallBlock.end()) && |
780 | MIB->isPseudo(Inst: *(TailInst + 1))) |
781 | TailInsts.push_back(x: *++TailInst); |
782 | |
783 | InstructionListType MovedInst = IndCallBlock.splitInstructions(Inst: &CallInst); |
784 | // Link new BBs to the original input offset of the indirect call site or its |
785 | // containing BB, so we can map samples recorded in new BBs back to the |
786 | // original BB seen in the input binary (if using BAT). |
787 | const uint32_t OrigOffset = IndirectInstrOffset |
788 | ? *IndirectInstrOffset |
789 | : IndCallBlock.getInputOffset(); |
790 | |
791 | IndCallBlock.eraseInstructions(Begin: MethodFetchInsns.begin(), |
792 | End: MethodFetchInsns.end()); |
793 | if (IndCallBlock.empty() || |
794 | (!MethodFetchInsns.empty() && MethodFetchInsns.back() == &CallInst)) |
795 | IndCallBlock.addInstructions(Begin: ICPcode.front().second.begin(), |
796 | End: ICPcode.front().second.end()); |
797 | else |
798 | IndCallBlock.replaceInstruction(II: std::prev(x: IndCallBlock.end()), |
799 | Replacement: ICPcode.front().second); |
800 | IndCallBlock.addInstructions(Begin: TailInsts.begin(), End: TailInsts.end()); |
801 | |
802 | for (auto Itr = ICPcode.begin() + 1; Itr != ICPcode.end(); ++Itr) { |
803 | MCSymbol *&Sym = Itr->first; |
804 | InstructionListType &Insts = Itr->second; |
805 | assert(Sym); |
806 | std::unique_ptr<BinaryBasicBlock> TBB = Function.createBasicBlock(Label: Sym); |
807 | TBB->setOffset(OrigOffset); |
808 | for (MCInst &Inst : Insts) // sanitize new instructions. |
809 | if (MIB->isCall(Inst)) |
810 | MIB->removeAnnotation(Inst, Name: "CallProfile" ); |
811 | TBB->addInstructions(Begin: Insts.begin(), End: Insts.end()); |
812 | NewBBs.emplace_back(args: std::move(TBB)); |
813 | } |
814 | |
815 | // Move tail of instructions from after the original call to |
816 | // the merge block. |
817 | if (!IsTailCallOrJT) |
818 | NewBBs.back()->addInstructions(Begin: MovedInst.begin(), End: MovedInst.end()); |
819 | |
820 | return NewBBs; |
821 | } |
822 | |
823 | BinaryBasicBlock * |
824 | IndirectCallPromotion::fixCFG(BinaryBasicBlock &IndCallBlock, |
825 | const bool IsTailCall, const bool IsJumpTable, |
826 | IndirectCallPromotion::BasicBlocksVector &&NewBBs, |
827 | const std::vector<Callsite> &Targets) const { |
828 | BinaryFunction &Function = *IndCallBlock.getFunction(); |
829 | using BinaryBranchInfo = BinaryBasicBlock::BinaryBranchInfo; |
830 | BinaryBasicBlock *MergeBlock = nullptr; |
831 | |
832 | // Scale indirect call counts to the execution count of the original |
833 | // basic block containing the indirect call. |
834 | uint64_t TotalCount = IndCallBlock.getKnownExecutionCount(); |
835 | uint64_t TotalIndirectBranches = 0; |
836 | for (const Callsite &Target : Targets) |
837 | TotalIndirectBranches += Target.Branches; |
838 | if (TotalIndirectBranches == 0) |
839 | TotalIndirectBranches = 1; |
840 | BinaryBasicBlock::BranchInfoType BBI; |
841 | BinaryBasicBlock::BranchInfoType ScaledBBI; |
842 | for (const Callsite &Target : Targets) { |
843 | const size_t NumEntries = |
844 | std::max(a: static_cast<std::size_t>(1UL), b: Target.JTIndices.size()); |
845 | for (size_t I = 0; I < NumEntries; ++I) { |
846 | BBI.push_back( |
847 | Elt: BinaryBranchInfo{.Count: (Target.Branches + NumEntries - 1) / NumEntries, |
848 | .MispredictedCount: (Target.Mispreds + NumEntries - 1) / NumEntries}); |
849 | ScaledBBI.push_back( |
850 | Elt: BinaryBranchInfo{.Count: uint64_t(TotalCount * Target.Branches / |
851 | (NumEntries * TotalIndirectBranches)), |
852 | .MispredictedCount: uint64_t(TotalCount * Target.Mispreds / |
853 | (NumEntries * TotalIndirectBranches))}); |
854 | } |
855 | } |
856 | |
857 | if (IsJumpTable) { |
858 | BinaryBasicBlock *NewIndCallBlock = NewBBs.back().get(); |
859 | IndCallBlock.moveAllSuccessorsTo(New: NewIndCallBlock); |
860 | |
861 | std::vector<MCSymbol *> SymTargets; |
862 | for (const Callsite &Target : Targets) { |
863 | const size_t NumEntries = |
864 | std::max(a: static_cast<std::size_t>(1UL), b: Target.JTIndices.size()); |
865 | for (size_t I = 0; I < NumEntries; ++I) |
866 | SymTargets.push_back(x: Target.To.Sym); |
867 | } |
868 | assert(SymTargets.size() > NewBBs.size() - 1 && |
869 | "There must be a target symbol associated with each new BB." ); |
870 | |
871 | for (uint64_t I = 0; I < NewBBs.size(); ++I) { |
872 | BinaryBasicBlock *SourceBB = I ? NewBBs[I - 1].get() : &IndCallBlock; |
873 | SourceBB->setExecutionCount(TotalCount); |
874 | |
875 | BinaryBasicBlock *TargetBB = |
876 | Function.getBasicBlockForLabel(Label: SymTargets[I]); |
877 | SourceBB->addSuccessor(Succ: TargetBB, BI: ScaledBBI[I]); // taken |
878 | |
879 | TotalCount -= ScaledBBI[I].Count; |
880 | SourceBB->addSuccessor(Succ: NewBBs[I].get(), Count: TotalCount); // fall-through |
881 | |
882 | // Update branch info for the indirect jump. |
883 | BinaryBasicBlock::BinaryBranchInfo &BranchInfo = |
884 | NewIndCallBlock->getBranchInfo(Succ: *TargetBB); |
885 | if (BranchInfo.Count > BBI[I].Count) |
886 | BranchInfo.Count -= BBI[I].Count; |
887 | else |
888 | BranchInfo.Count = 0; |
889 | |
890 | if (BranchInfo.MispredictedCount > BBI[I].MispredictedCount) |
891 | BranchInfo.MispredictedCount -= BBI[I].MispredictedCount; |
892 | else |
893 | BranchInfo.MispredictedCount = 0; |
894 | } |
895 | } else { |
896 | assert(NewBBs.size() >= 2); |
897 | assert(NewBBs.size() % 2 == 1 || IndCallBlock.succ_empty()); |
898 | assert(NewBBs.size() % 2 == 1 || IsTailCall); |
899 | |
900 | auto ScaledBI = ScaledBBI.begin(); |
901 | auto updateCurrentBranchInfo = [&] { |
902 | assert(ScaledBI != ScaledBBI.end()); |
903 | TotalCount -= ScaledBI->Count; |
904 | ++ScaledBI; |
905 | }; |
906 | |
907 | if (!IsTailCall) { |
908 | MergeBlock = NewBBs.back().get(); |
909 | IndCallBlock.moveAllSuccessorsTo(New: MergeBlock); |
910 | } |
911 | |
912 | // Fix up successors and execution counts. |
913 | updateCurrentBranchInfo(); |
914 | IndCallBlock.addSuccessor(Succ: NewBBs[1].get(), Count: TotalCount); |
915 | IndCallBlock.addSuccessor(Succ: NewBBs[0].get(), BI: ScaledBBI[0]); |
916 | |
917 | const size_t Adj = IsTailCall ? 1 : 2; |
918 | for (size_t I = 0; I < NewBBs.size() - Adj; ++I) { |
919 | assert(TotalCount <= IndCallBlock.getExecutionCount() || |
920 | TotalCount <= uint64_t(TotalIndirectBranches)); |
921 | uint64_t ExecCount = ScaledBBI[(I + 1) / 2].Count; |
922 | if (I % 2 == 0) { |
923 | if (MergeBlock) |
924 | NewBBs[I]->addSuccessor(Succ: MergeBlock, Count: ScaledBBI[(I + 1) / 2].Count); |
925 | } else { |
926 | assert(I + 2 < NewBBs.size()); |
927 | updateCurrentBranchInfo(); |
928 | NewBBs[I]->addSuccessor(Succ: NewBBs[I + 2].get(), Count: TotalCount); |
929 | NewBBs[I]->addSuccessor(Succ: NewBBs[I + 1].get(), BI: ScaledBBI[(I + 1) / 2]); |
930 | ExecCount += TotalCount; |
931 | } |
932 | NewBBs[I]->setExecutionCount(ExecCount); |
933 | } |
934 | |
935 | if (MergeBlock) { |
936 | // Arrange for the MergeBlock to be the fallthrough for the first |
937 | // promoted call block. |
938 | std::unique_ptr<BinaryBasicBlock> MBPtr; |
939 | std::swap(x&: MBPtr, y&: NewBBs.back()); |
940 | NewBBs.pop_back(); |
941 | NewBBs.emplace(position: NewBBs.begin() + 1, args: std::move(MBPtr)); |
942 | // TODO: is COUNT_FALLTHROUGH_EDGE the right thing here? |
943 | NewBBs.back()->addSuccessor(Succ: MergeBlock, Count: TotalCount); // uncond branch |
944 | } |
945 | } |
946 | |
947 | // Update the execution count. |
948 | NewBBs.back()->setExecutionCount(TotalCount); |
949 | |
950 | // Update BB and BB layout. |
951 | Function.insertBasicBlocks(Start: &IndCallBlock, NewBBs: std::move(NewBBs)); |
952 | assert(Function.validateCFG()); |
953 | |
954 | return MergeBlock; |
955 | } |
956 | |
957 | size_t IndirectCallPromotion::canPromoteCallsite( |
958 | const BinaryBasicBlock &BB, const MCInst &Inst, |
959 | const std::vector<Callsite> &Targets, uint64_t NumCalls) { |
960 | BinaryFunction *BF = BB.getFunction(); |
961 | const BinaryContext &BC = BF->getBinaryContext(); |
962 | |
963 | if (BB.getKnownExecutionCount() < opts::ExecutionCountThreshold) |
964 | return 0; |
965 | |
966 | const bool IsJumpTable = BF->getJumpTable(Inst); |
967 | |
968 | auto computeStats = [&](size_t N) { |
969 | for (size_t I = 0; I < N; ++I) |
970 | if (IsJumpTable) |
971 | TotalNumFrequentJmps += Targets[I].Branches; |
972 | else |
973 | TotalNumFrequentCalls += Targets[I].Branches; |
974 | }; |
975 | |
976 | // If we have no targets (or no calls), skip this callsite. |
977 | if (Targets.empty() || !NumCalls) { |
978 | if (opts::Verbosity >= 1) { |
979 | const ptrdiff_t InstIdx = &Inst - &(*BB.begin()); |
980 | BC.outs() << "BOLT-INFO: ICP failed in " << *BF << " @ " << InstIdx |
981 | << " in " << BB.getName() << ", calls = " << NumCalls |
982 | << ", targets empty or NumCalls == 0.\n" ; |
983 | } |
984 | return 0; |
985 | } |
986 | |
987 | size_t TopN = opts::ICPTopN; |
988 | if (IsJumpTable) |
989 | TopN = opts::ICPJumpTablesTopN ? opts::ICPJumpTablesTopN : TopN; |
990 | else |
991 | TopN = opts::ICPCallsTopN ? opts::ICPCallsTopN : TopN; |
992 | |
993 | const size_t TrialN = TopN ? std::min(a: TopN, b: Targets.size()) : Targets.size(); |
994 | |
995 | if (opts::ICPTopCallsites && !BC.MIB->hasAnnotation(Inst, Name: "DoICP" )) |
996 | return 0; |
997 | |
998 | // Pick the top N targets. |
999 | uint64_t TotalMispredictsTopN = 0; |
1000 | size_t N = 0; |
1001 | |
1002 | if (opts::ICPUseMispredicts && |
1003 | (!IsJumpTable || opts::ICPJumpTablesByTarget)) { |
1004 | // Count total number of mispredictions for (at most) the top N targets. |
1005 | // We may choose a smaller N (TrialN vs. N) if the frequency threshold |
1006 | // is exceeded by fewer targets. |
1007 | double Threshold = double(opts::ICPMispredictThreshold); |
1008 | for (size_t I = 0; I < TrialN && Threshold > 0; ++I, ++N) { |
1009 | Threshold -= (100.0 * Targets[I].Mispreds) / NumCalls; |
1010 | TotalMispredictsTopN += Targets[I].Mispreds; |
1011 | } |
1012 | computeStats(N); |
1013 | |
1014 | // Compute the misprediction frequency of the top N call targets. If this |
1015 | // frequency is greater than the threshold, we should try ICP on this |
1016 | // callsite. |
1017 | const double TopNFrequency = (100.0 * TotalMispredictsTopN) / NumCalls; |
1018 | if (TopNFrequency == 0 || TopNFrequency < opts::ICPMispredictThreshold) { |
1019 | if (opts::Verbosity >= 1) { |
1020 | const ptrdiff_t InstIdx = &Inst - &(*BB.begin()); |
1021 | BC.outs() << "BOLT-INFO: ICP failed in " << *BF << " @ " << InstIdx |
1022 | << " in " << BB.getName() << ", calls = " << NumCalls |
1023 | << ", top N mis. frequency " << format(Fmt: "%.1f" , Vals: TopNFrequency) |
1024 | << "% < " << opts::ICPMispredictThreshold << "%\n" ; |
1025 | } |
1026 | return 0; |
1027 | } |
1028 | } else { |
1029 | size_t MaxTargets = 0; |
1030 | |
1031 | // Count total number of calls for (at most) the top N targets. |
1032 | // We may choose a smaller N (TrialN vs. N) if the frequency threshold |
1033 | // is exceeded by fewer targets. |
1034 | const unsigned TotalThreshold = IsJumpTable |
1035 | ? opts::ICPJTTotalPercentThreshold |
1036 | : opts::ICPCallsTotalPercentThreshold; |
1037 | const unsigned RemainingThreshold = |
1038 | IsJumpTable ? opts::ICPJTRemainingPercentThreshold |
1039 | : opts::ICPCallsRemainingPercentThreshold; |
1040 | uint64_t NumRemainingCalls = NumCalls; |
1041 | for (size_t I = 0; I < TrialN; ++I, ++MaxTargets) { |
1042 | if (100 * Targets[I].Branches < NumCalls * TotalThreshold) |
1043 | break; |
1044 | if (100 * Targets[I].Branches < NumRemainingCalls * RemainingThreshold) |
1045 | break; |
1046 | if (N + (Targets[I].JTIndices.empty() ? 1 : Targets[I].JTIndices.size()) > |
1047 | TrialN) |
1048 | break; |
1049 | TotalMispredictsTopN += Targets[I].Mispreds; |
1050 | NumRemainingCalls -= Targets[I].Branches; |
1051 | N += Targets[I].JTIndices.empty() ? 1 : Targets[I].JTIndices.size(); |
1052 | } |
1053 | computeStats(MaxTargets); |
1054 | |
1055 | // Don't check misprediction frequency for jump tables -- we don't really |
1056 | // care as long as we are saving loads from the jump table. |
1057 | if (!IsJumpTable || opts::ICPJumpTablesByTarget) { |
1058 | // Compute the misprediction frequency of the top N call targets. If |
1059 | // this frequency is less than the threshold, we should skip ICP at |
1060 | // this callsite. |
1061 | const double TopNMispredictFrequency = |
1062 | (100.0 * TotalMispredictsTopN) / NumCalls; |
1063 | |
1064 | if (TopNMispredictFrequency < opts::ICPMispredictThreshold) { |
1065 | if (opts::Verbosity >= 1) { |
1066 | const ptrdiff_t InstIdx = &Inst - &(*BB.begin()); |
1067 | BC.outs() << "BOLT-INFO: ICP failed in " << *BF << " @ " << InstIdx |
1068 | << " in " << BB.getName() << ", calls = " << NumCalls |
1069 | << ", top N mispredict frequency " |
1070 | << format(Fmt: "%.1f" , Vals: TopNMispredictFrequency) << "% < " |
1071 | << opts::ICPMispredictThreshold << "%\n" ; |
1072 | } |
1073 | return 0; |
1074 | } |
1075 | } |
1076 | } |
1077 | |
1078 | // Filter by inline-ability of target functions, stop at first target that |
1079 | // can't be inlined. |
1080 | if (!IsJumpTable && opts::ICPPeelForInline) { |
1081 | for (size_t I = 0; I < N; ++I) { |
1082 | const MCSymbol *TargetSym = Targets[I].To.Sym; |
1083 | const BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol: TargetSym); |
1084 | if (!TargetBF || !BinaryFunctionPass::shouldOptimize(BF: *TargetBF) || |
1085 | getInliningInfo(BF: *TargetBF).Type == InliningType::INL_NONE) { |
1086 | N = I; |
1087 | break; |
1088 | } |
1089 | } |
1090 | } |
1091 | |
1092 | // Filter functions that can have ICP applied (for debugging) |
1093 | if (!opts::ICPFuncsList.empty()) { |
1094 | for (std::string &Name : opts::ICPFuncsList) |
1095 | if (BF->hasName(FunctionName: Name)) |
1096 | return N; |
1097 | return 0; |
1098 | } |
1099 | |
1100 | return N; |
1101 | } |
1102 | |
1103 | void IndirectCallPromotion::printCallsiteInfo( |
1104 | const BinaryBasicBlock &BB, const MCInst &Inst, |
1105 | const std::vector<Callsite> &Targets, const size_t N, |
1106 | uint64_t NumCalls) const { |
1107 | BinaryContext &BC = BB.getFunction()->getBinaryContext(); |
1108 | const bool IsTailCall = BC.MIB->isTailCall(Inst); |
1109 | const bool IsJumpTable = BB.getFunction()->getJumpTable(Inst); |
1110 | const ptrdiff_t InstIdx = &Inst - &(*BB.begin()); |
1111 | |
1112 | BC.outs() << "BOLT-INFO: ICP candidate branch info: " << *BB.getFunction() |
1113 | << " @ " << InstIdx << " in " << BB.getName() |
1114 | << " -> calls = " << NumCalls |
1115 | << (IsTailCall ? " (tail)" : (IsJumpTable ? " (jump table)" : "" )) |
1116 | << "\n" ; |
1117 | for (size_t I = 0; I < N; I++) { |
1118 | const double Frequency = 100.0 * Targets[I].Branches / NumCalls; |
1119 | const double MisFrequency = 100.0 * Targets[I].Mispreds / NumCalls; |
1120 | BC.outs() << "BOLT-INFO: " ; |
1121 | if (Targets[I].To.Sym) |
1122 | BC.outs() << Targets[I].To.Sym->getName(); |
1123 | else |
1124 | BC.outs() << Targets[I].To.Addr; |
1125 | BC.outs() << ", calls = " << Targets[I].Branches |
1126 | << ", mispreds = " << Targets[I].Mispreds |
1127 | << ", taken freq = " << format(Fmt: "%.1f" , Vals: Frequency) << "%" |
1128 | << ", mis. freq = " << format(Fmt: "%.1f" , Vals: MisFrequency) << "%" ; |
1129 | bool First = true; |
1130 | for (uint64_t JTIndex : Targets[I].JTIndices) { |
1131 | BC.outs() << (First ? ", indices = " : ", " ) << JTIndex; |
1132 | First = false; |
1133 | } |
1134 | BC.outs() << "\n" ; |
1135 | } |
1136 | |
1137 | LLVM_DEBUG({ |
1138 | dbgs() << "BOLT-INFO: ICP original call instruction:" ; |
1139 | BC.printInstruction(dbgs(), Inst, Targets[0].From.Addr, nullptr, true); |
1140 | }); |
1141 | } |
1142 | |
1143 | Error IndirectCallPromotion::runOnFunctions(BinaryContext &BC) { |
1144 | if (opts::ICP == ICP_NONE) |
1145 | return Error::success(); |
1146 | |
1147 | auto &BFs = BC.getBinaryFunctions(); |
1148 | |
1149 | const bool OptimizeCalls = (opts::ICP == ICP_CALLS || opts::ICP == ICP_ALL); |
1150 | const bool OptimizeJumpTables = |
1151 | (opts::ICP == ICP_JUMP_TABLES || opts::ICP == ICP_ALL); |
1152 | |
1153 | std::unique_ptr<RegAnalysis> RA; |
1154 | std::unique_ptr<BinaryFunctionCallGraph> CG; |
1155 | if (OptimizeJumpTables) { |
1156 | CG.reset(p: new BinaryFunctionCallGraph(buildCallGraph(BC))); |
1157 | RA.reset(p: new RegAnalysis(BC, &BFs, &*CG)); |
1158 | } |
1159 | |
1160 | // If icp-top-callsites is enabled, compute the total number of indirect |
1161 | // calls and then optimize the hottest callsites that contribute to that |
1162 | // total. |
1163 | SetVector<BinaryFunction *> Functions; |
1164 | if (opts::ICPTopCallsites == 0) { |
1165 | for (auto &KV : BFs) |
1166 | Functions.insert(X: &KV.second); |
1167 | } else { |
1168 | using IndirectCallsite = std::tuple<uint64_t, MCInst *, BinaryFunction *>; |
1169 | std::vector<IndirectCallsite> IndirectCalls; |
1170 | size_t TotalIndirectCalls = 0; |
1171 | |
1172 | // Find all the indirect callsites. |
1173 | for (auto &BFIt : BFs) { |
1174 | BinaryFunction &Function = BFIt.second; |
1175 | |
1176 | if (!shouldOptimize(BF: Function)) |
1177 | continue; |
1178 | |
1179 | const bool HasLayout = !Function.getLayout().block_empty(); |
1180 | |
1181 | for (BinaryBasicBlock &BB : Function) { |
1182 | if (HasLayout && Function.isSplit() && BB.isCold()) |
1183 | continue; |
1184 | |
1185 | for (MCInst &Inst : BB) { |
1186 | const bool IsJumpTable = Function.getJumpTable(Inst); |
1187 | const bool HasIndirectCallProfile = |
1188 | BC.MIB->hasAnnotation(Inst, Name: "CallProfile" ); |
1189 | const bool IsDirectCall = |
1190 | (BC.MIB->isCall(Inst) && BC.MIB->getTargetSymbol(Inst, OpNum: 0)); |
1191 | |
1192 | if (!IsDirectCall && |
1193 | ((HasIndirectCallProfile && !IsJumpTable && OptimizeCalls) || |
1194 | (IsJumpTable && OptimizeJumpTables))) { |
1195 | uint64_t NumCalls = 0; |
1196 | for (const Callsite &BInfo : getCallTargets(BB, Inst)) |
1197 | NumCalls += BInfo.Branches; |
1198 | IndirectCalls.push_back( |
1199 | x: std::make_tuple(args&: NumCalls, args: &Inst, args: &Function)); |
1200 | TotalIndirectCalls += NumCalls; |
1201 | } |
1202 | } |
1203 | } |
1204 | } |
1205 | |
1206 | // Sort callsites by execution count. |
1207 | llvm::sort(C: reverse(C&: IndirectCalls)); |
1208 | |
1209 | // Find callsites that contribute to the top "opts::ICPTopCallsites"% |
1210 | // number of calls. |
1211 | const float TopPerc = opts::ICPTopCallsites / 100.0f; |
1212 | int64_t MaxCalls = TotalIndirectCalls * TopPerc; |
1213 | uint64_t LastFreq = std::numeric_limits<uint64_t>::max(); |
1214 | size_t Num = 0; |
1215 | for (const IndirectCallsite &IC : IndirectCalls) { |
1216 | const uint64_t CurFreq = std::get<0>(t: IC); |
1217 | // Once we decide to stop, include at least all branches that share the |
1218 | // same frequency of the last one to avoid non-deterministic behavior |
1219 | // (e.g. turning on/off ICP depending on the order of functions) |
1220 | if (MaxCalls <= 0 && CurFreq != LastFreq) |
1221 | break; |
1222 | MaxCalls -= CurFreq; |
1223 | LastFreq = CurFreq; |
1224 | BC.MIB->addAnnotation(Inst&: *std::get<1>(t: IC), Name: "DoICP" , Val: true); |
1225 | Functions.insert(X: std::get<2>(t: IC)); |
1226 | ++Num; |
1227 | } |
1228 | BC.outs() << "BOLT-INFO: ICP Total indirect calls = " << TotalIndirectCalls |
1229 | << ", " << Num << " callsites cover " << opts::ICPTopCallsites |
1230 | << "% of all indirect calls\n" ; |
1231 | } |
1232 | |
1233 | for (BinaryFunction *FuncPtr : Functions) { |
1234 | BinaryFunction &Function = *FuncPtr; |
1235 | |
1236 | if (!shouldOptimize(BF: Function)) |
1237 | continue; |
1238 | |
1239 | const bool HasLayout = !Function.getLayout().block_empty(); |
1240 | |
1241 | // Total number of indirect calls issued from the current Function. |
1242 | // (a fraction of TotalIndirectCalls) |
1243 | uint64_t FuncTotalIndirectCalls = 0; |
1244 | uint64_t FuncTotalIndirectJmps = 0; |
1245 | |
1246 | std::vector<BinaryBasicBlock *> BBs; |
1247 | for (BinaryBasicBlock &BB : Function) { |
1248 | // Skip indirect calls in cold blocks. |
1249 | if (!HasLayout || !Function.isSplit() || !BB.isCold()) |
1250 | BBs.push_back(x: &BB); |
1251 | } |
1252 | if (BBs.empty()) |
1253 | continue; |
1254 | |
1255 | DataflowInfoManager Info(Function, RA.get(), nullptr); |
1256 | while (!BBs.empty()) { |
1257 | BinaryBasicBlock *BB = BBs.back(); |
1258 | BBs.pop_back(); |
1259 | |
1260 | for (unsigned Idx = 0; Idx < BB->size(); ++Idx) { |
1261 | MCInst &Inst = BB->getInstructionAtIndex(Index: Idx); |
1262 | const ptrdiff_t InstIdx = &Inst - &(*BB->begin()); |
1263 | const bool IsTailCall = BC.MIB->isTailCall(Inst); |
1264 | const bool HasIndirectCallProfile = |
1265 | BC.MIB->hasAnnotation(Inst, Name: "CallProfile" ); |
1266 | const bool IsJumpTable = Function.getJumpTable(Inst); |
1267 | |
1268 | if (BC.MIB->isCall(Inst)) |
1269 | TotalCalls += BB->getKnownExecutionCount(); |
1270 | |
1271 | if (IsJumpTable && !OptimizeJumpTables) |
1272 | continue; |
1273 | |
1274 | if (!IsJumpTable && (!HasIndirectCallProfile || !OptimizeCalls)) |
1275 | continue; |
1276 | |
1277 | // Ignore direct calls. |
1278 | if (BC.MIB->isCall(Inst) && BC.MIB->getTargetSymbol(Inst, OpNum: 0)) |
1279 | continue; |
1280 | |
1281 | assert((BC.MIB->isCall(Inst) || BC.MIB->isIndirectBranch(Inst)) && |
1282 | "expected a call or an indirect jump instruction" ); |
1283 | |
1284 | if (IsJumpTable) |
1285 | ++TotalJumpTableCallsites; |
1286 | else |
1287 | ++TotalIndirectCallsites; |
1288 | |
1289 | std::vector<Callsite> Targets = getCallTargets(BB&: *BB, Inst); |
1290 | |
1291 | // Compute the total number of calls from this particular callsite. |
1292 | uint64_t NumCalls = 0; |
1293 | for (const Callsite &BInfo : Targets) |
1294 | NumCalls += BInfo.Branches; |
1295 | if (!IsJumpTable) |
1296 | FuncTotalIndirectCalls += NumCalls; |
1297 | else |
1298 | FuncTotalIndirectJmps += NumCalls; |
1299 | |
1300 | // If FLAGS regs is alive after this jmp site, do not try |
1301 | // promoting because we will clobber FLAGS. |
1302 | if (IsJumpTable) { |
1303 | ErrorOr<const BitVector &> State = |
1304 | Info.getLivenessAnalysis().getStateBefore(Point: Inst); |
1305 | if (!State || (State && (*State)[BC.MIB->getFlagsReg()])) { |
1306 | if (opts::Verbosity >= 1) |
1307 | BC.outs() << "BOLT-INFO: ICP failed in " << Function << " @ " |
1308 | << InstIdx << " in " << BB->getName() |
1309 | << ", calls = " << NumCalls |
1310 | << (State ? ", cannot clobber flags reg.\n" |
1311 | : ", no liveness data available.\n" ); |
1312 | continue; |
1313 | } |
1314 | } |
1315 | |
1316 | // Should this callsite be optimized? Return the number of targets |
1317 | // to use when promoting this call. A value of zero means to skip |
1318 | // this callsite. |
1319 | size_t N = canPromoteCallsite(BB: *BB, Inst, Targets, NumCalls); |
1320 | |
1321 | // If it is a jump table and it failed to meet our initial threshold, |
1322 | // proceed to findCallTargetSymbols -- it may reevaluate N if |
1323 | // memory profile is present |
1324 | if (!N && !IsJumpTable) |
1325 | continue; |
1326 | |
1327 | if (opts::Verbosity >= 1) |
1328 | printCallsiteInfo(BB: *BB, Inst, Targets, N, NumCalls); |
1329 | |
1330 | // Find MCSymbols or absolute addresses for each call target. |
1331 | MCInst *TargetFetchInst = nullptr; |
1332 | const SymTargetsType SymTargets = |
1333 | findCallTargetSymbols(Targets, N, BB&: *BB, CallInst&: Inst, TargetFetchInst); |
1334 | |
1335 | // findCallTargetSymbols may have changed N if mem profile is available |
1336 | // for jump tables |
1337 | if (!N) |
1338 | continue; |
1339 | |
1340 | LLVM_DEBUG(printDecision(dbgs(), Targets, N)); |
1341 | |
1342 | // If we can't resolve any of the target symbols, punt on this callsite. |
1343 | // TODO: can this ever happen? |
1344 | if (SymTargets.size() < N) { |
1345 | const size_t LastTarget = SymTargets.size(); |
1346 | if (opts::Verbosity >= 1) |
1347 | BC.outs() << "BOLT-INFO: ICP failed in " << Function << " @ " |
1348 | << InstIdx << " in " << BB->getName() |
1349 | << ", calls = " << NumCalls |
1350 | << ", ICP failed to find target symbol for " |
1351 | << Targets[LastTarget].To.Sym->getName() << "\n" ; |
1352 | continue; |
1353 | } |
1354 | |
1355 | MethodInfoType MethodInfo; |
1356 | |
1357 | if (!IsJumpTable) { |
1358 | MethodInfo = maybeGetVtableSyms(BB&: *BB, Inst, SymTargets); |
1359 | TotalMethodLoadsEliminated += MethodInfo.first.empty() ? 0 : 1; |
1360 | LLVM_DEBUG(dbgs() |
1361 | << "BOLT-INFO: ICP " |
1362 | << (!MethodInfo.first.empty() ? "found" : "did not find" ) |
1363 | << " vtables for all methods.\n" ); |
1364 | } else if (TargetFetchInst) { |
1365 | ++TotalIndexBasedJumps; |
1366 | MethodInfo.second.push_back(x: TargetFetchInst); |
1367 | } |
1368 | |
1369 | // Generate new promoted call code for this callsite. |
1370 | MCPlusBuilder::BlocksVectorTy ICPcode = |
1371 | (IsJumpTable && !opts::ICPJumpTablesByTarget) |
1372 | ? BC.MIB->jumpTablePromotion(IJmpInst: Inst, Targets: SymTargets, |
1373 | TargetFetchInsns: MethodInfo.second, Ctx: BC.Ctx.get()) |
1374 | : BC.MIB->indirectCallPromotion( |
1375 | CallInst: Inst, Targets: SymTargets, VtableSyms: MethodInfo.first, MethodFetchInsns: MethodInfo.second, |
1376 | MinimizeCodeSize: opts::ICPOldCodeSequence, Ctx: BC.Ctx.get()); |
1377 | |
1378 | if (ICPcode.empty()) { |
1379 | if (opts::Verbosity >= 1) |
1380 | BC.outs() << "BOLT-INFO: ICP failed in " << Function << " @ " |
1381 | << InstIdx << " in " << BB->getName() |
1382 | << ", calls = " << NumCalls |
1383 | << ", unable to generate promoted call code.\n" ; |
1384 | continue; |
1385 | } |
1386 | |
1387 | LLVM_DEBUG({ |
1388 | uint64_t Offset = Targets[0].From.Addr; |
1389 | dbgs() << "BOLT-INFO: ICP indirect call code:\n" ; |
1390 | for (const auto &entry : ICPcode) { |
1391 | const MCSymbol *const &Sym = entry.first; |
1392 | const InstructionListType &Insts = entry.second; |
1393 | if (Sym) |
1394 | dbgs() << Sym->getName() << ":\n" ; |
1395 | Offset = BC.printInstructions(dbgs(), Insts.begin(), Insts.end(), |
1396 | Offset); |
1397 | } |
1398 | dbgs() << "---------------------------------------------------\n" ; |
1399 | }); |
1400 | |
1401 | // Rewrite the CFG with the newly generated ICP code. |
1402 | std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs = |
1403 | rewriteCall(IndCallBlock&: *BB, CallInst: Inst, ICPcode: std::move(ICPcode), MethodFetchInsns: MethodInfo.second); |
1404 | |
1405 | // Fix the CFG after inserting the new basic blocks. |
1406 | BinaryBasicBlock *MergeBlock = |
1407 | fixCFG(IndCallBlock&: *BB, IsTailCall, IsJumpTable, NewBBs: std::move(NewBBs), Targets); |
1408 | |
1409 | // Since the tail of the original block was split off and it may contain |
1410 | // additional indirect calls, we must add the merge block to the set of |
1411 | // blocks to process. |
1412 | if (MergeBlock) |
1413 | BBs.push_back(x: MergeBlock); |
1414 | |
1415 | if (opts::Verbosity >= 1) |
1416 | BC.outs() << "BOLT-INFO: ICP succeeded in " << Function << " @ " |
1417 | << InstIdx << " in " << BB->getName() |
1418 | << " -> calls = " << NumCalls << "\n" ; |
1419 | |
1420 | if (IsJumpTable) |
1421 | ++TotalOptimizedJumpTableCallsites; |
1422 | else |
1423 | ++TotalOptimizedIndirectCallsites; |
1424 | |
1425 | Modified.insert(x: &Function); |
1426 | } |
1427 | } |
1428 | TotalIndirectCalls += FuncTotalIndirectCalls; |
1429 | TotalIndirectJmps += FuncTotalIndirectJmps; |
1430 | } |
1431 | |
1432 | BC.outs() |
1433 | << "BOLT-INFO: ICP total indirect callsites with profile = " |
1434 | << TotalIndirectCallsites << "\n" |
1435 | << "BOLT-INFO: ICP total jump table callsites = " |
1436 | << TotalJumpTableCallsites << "\n" |
1437 | << "BOLT-INFO: ICP total number of calls = " << TotalCalls << "\n" |
1438 | << "BOLT-INFO: ICP percentage of calls that are indirect = " |
1439 | << format(Fmt: "%.1f" , Vals: (100.0 * TotalIndirectCalls) / TotalCalls) << "%\n" |
1440 | << "BOLT-INFO: ICP percentage of indirect calls that can be " |
1441 | "optimized = " |
1442 | << format(Fmt: "%.1f" , Vals: (100.0 * TotalNumFrequentCalls) / |
1443 | std::max<size_t>(a: TotalIndirectCalls, b: 1)) |
1444 | << "%\n" |
1445 | << "BOLT-INFO: ICP percentage of indirect callsites that are " |
1446 | "optimized = " |
1447 | << format(Fmt: "%.1f" , Vals: (100.0 * TotalOptimizedIndirectCallsites) / |
1448 | std::max<uint64_t>(a: TotalIndirectCallsites, b: 1)) |
1449 | << "%\n" |
1450 | << "BOLT-INFO: ICP number of method load elimination candidates = " |
1451 | << TotalMethodLoadEliminationCandidates << "\n" |
1452 | << "BOLT-INFO: ICP percentage of method calls candidates that have " |
1453 | "loads eliminated = " |
1454 | << format(Fmt: "%.1f" , |
1455 | Vals: (100.0 * TotalMethodLoadsEliminated) / |
1456 | std::max<uint64_t>(a: TotalMethodLoadEliminationCandidates, b: 1)) |
1457 | << "%\n" |
1458 | << "BOLT-INFO: ICP percentage of indirect branches that are " |
1459 | "optimized = " |
1460 | << format(Fmt: "%.1f" , Vals: (100.0 * TotalNumFrequentJmps) / |
1461 | std::max<uint64_t>(a: TotalIndirectJmps, b: 1)) |
1462 | << "%\n" |
1463 | << "BOLT-INFO: ICP percentage of jump table callsites that are " |
1464 | << "optimized = " |
1465 | << format(Fmt: "%.1f" , Vals: (100.0 * TotalOptimizedJumpTableCallsites) / |
1466 | std::max<uint64_t>(a: TotalJumpTableCallsites, b: 1)) |
1467 | << "%\n" |
1468 | << "BOLT-INFO: ICP number of jump table callsites that can use hot " |
1469 | << "indices = " << TotalIndexBasedCandidates << "\n" |
1470 | << "BOLT-INFO: ICP percentage of jump table callsites that use hot " |
1471 | "indices = " |
1472 | << format(Fmt: "%.1f" , Vals: (100.0 * TotalIndexBasedJumps) / |
1473 | std::max<uint64_t>(a: TotalIndexBasedCandidates, b: 1)) |
1474 | << "%\n" ; |
1475 | |
1476 | #ifndef NDEBUG |
1477 | verifyProfile(BFs); |
1478 | #endif |
1479 | return Error::success(); |
1480 | } |
1481 | |
1482 | } // namespace bolt |
1483 | } // namespace llvm |
1484 | |