1//===- bolt/Profile/StaleProfileMatching.cpp - Profile data matching ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// BOLT often has to deal with profiles collected on binaries built from several
10// revisions behind release. As a result, a certain percentage of functions is
11// considered stale and not optimized. This file implements an ability to match
12// profile to functions that are not 100% binary identical, and thus, increasing
13// the optimization coverage and boost the performance of applications.
14//
15// The algorithm consists of two phases: matching and inference:
16// - At the matching phase, we try to "guess" as many block and jump counts from
17// the stale profile as possible. To this end, the content of each basic block
18// is hashed and stored in the (yaml) profile. When BOLT optimizes a binary,
19// it computes block hashes and identifies the corresponding entries in the
20// stale profile. It yields a partial profile for every CFG in the binary.
21// - At the inference phase, we employ a network flow-based algorithm (profi) to
22// reconstruct "realistic" block and jump counts from the partial profile
23// generated at the first stage. In practice, we don't always produce proper
24// profile data but the majority (e.g., >90%) of CFGs get the correct counts.
25//
26//===----------------------------------------------------------------------===//
27
28#include "bolt/Core/HashUtilities.h"
29#include "bolt/Profile/YAMLProfileReader.h"
30#include "llvm/ADT/Bitfields.h"
31#include "llvm/ADT/Hashing.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/xxhash.h"
34#include "llvm/Transforms/Utils/SampleProfileInference.h"
35
36#include <queue>
37
38using namespace llvm;
39
40#undef DEBUG_TYPE
41#define DEBUG_TYPE "bolt-prof"
42
43namespace opts {
44
45extern cl::OptionCategory BoltOptCategory;
46
47cl::opt<bool>
48 InferStaleProfile("infer-stale-profile",
49 cl::desc("Infer counts from stale profile data."),
50 cl::init(Val: false), cl::Hidden, cl::cat(BoltOptCategory));
51
52cl::opt<unsigned> StaleMatchingMaxFuncSize(
53 "stale-matching-max-func-size",
54 cl::desc("The maximum size of a function to consider for inference."),
55 cl::init(Val: 10000), cl::Hidden, cl::cat(BoltOptCategory));
56
57// Parameters of the profile inference algorithm. The default values are tuned
58// on several benchmarks.
59cl::opt<bool> StaleMatchingEvenFlowDistribution(
60 "stale-matching-even-flow-distribution",
61 cl::desc("Try to evenly distribute flow when there are multiple equally "
62 "likely options."),
63 cl::init(Val: true), cl::ReallyHidden, cl::cat(BoltOptCategory));
64
65cl::opt<bool> StaleMatchingRebalanceUnknown(
66 "stale-matching-rebalance-unknown",
67 cl::desc("Evenly re-distribute flow among unknown subgraphs."),
68 cl::init(Val: false), cl::ReallyHidden, cl::cat(BoltOptCategory));
69
70cl::opt<bool> StaleMatchingJoinIslands(
71 "stale-matching-join-islands",
72 cl::desc("Join isolated components having positive flow."), cl::init(Val: true),
73 cl::ReallyHidden, cl::cat(BoltOptCategory));
74
75cl::opt<unsigned> StaleMatchingCostBlockInc(
76 "stale-matching-cost-block-inc",
77 cl::desc("The cost of increasing a block count by one."), cl::init(Val: 150),
78 cl::ReallyHidden, cl::cat(BoltOptCategory));
79
80cl::opt<unsigned> StaleMatchingCostBlockDec(
81 "stale-matching-cost-block-dec",
82 cl::desc("The cost of decreasing a block count by one."), cl::init(Val: 150),
83 cl::ReallyHidden, cl::cat(BoltOptCategory));
84
85cl::opt<unsigned> StaleMatchingCostJumpInc(
86 "stale-matching-cost-jump-inc",
87 cl::desc("The cost of increasing a jump count by one."), cl::init(Val: 150),
88 cl::ReallyHidden, cl::cat(BoltOptCategory));
89
90cl::opt<unsigned> StaleMatchingCostJumpDec(
91 "stale-matching-cost-jump-dec",
92 cl::desc("The cost of decreasing a jump count by one."), cl::init(Val: 150),
93 cl::ReallyHidden, cl::cat(BoltOptCategory));
94
95cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
96 "stale-matching-cost-block-unknown-inc",
97 cl::desc("The cost of increasing an unknown block count by one."),
98 cl::init(Val: 1), cl::ReallyHidden, cl::cat(BoltOptCategory));
99
100cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
101 "stale-matching-cost-jump-unknown-inc",
102 cl::desc("The cost of increasing an unknown jump count by one."),
103 cl::init(Val: 140), cl::ReallyHidden, cl::cat(BoltOptCategory));
104
105cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
106 "stale-matching-cost-jump-unknown-ft-inc",
107 cl::desc(
108 "The cost of increasing an unknown fall-through jump count by one."),
109 cl::init(Val: 3), cl::ReallyHidden, cl::cat(BoltOptCategory));
110
111} // namespace opts
112
113namespace llvm {
114namespace bolt {
115
116/// An object wrapping several components of a basic block hash. The combined
117/// (blended) hash is represented and stored as one uint64_t, while individual
118/// components are of smaller size (e.g., uint16_t or uint8_t).
119struct BlendedBlockHash {
120private:
121 using ValueOffset = Bitfield::Element<uint16_t, 0, 16>;
122 using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>;
123 using ValueInstr = Bitfield::Element<uint16_t, 32, 16>;
124 using ValuePred = Bitfield::Element<uint8_t, 48, 8>;
125 using ValueSucc = Bitfield::Element<uint8_t, 56, 8>;
126
127public:
128 explicit BlendedBlockHash() {}
129
130 explicit BlendedBlockHash(uint64_t Hash) {
131 Offset = Bitfield::get<ValueOffset>(Packed: Hash);
132 OpcodeHash = Bitfield::get<ValueOpcode>(Packed: Hash);
133 InstrHash = Bitfield::get<ValueInstr>(Packed: Hash);
134 PredHash = Bitfield::get<ValuePred>(Packed: Hash);
135 SuccHash = Bitfield::get<ValueSucc>(Packed: Hash);
136 }
137
138 /// Combine the blended hash into uint64_t.
139 uint64_t combine() const {
140 uint64_t Hash = 0;
141 Bitfield::set<ValueOffset>(Packed&: Hash, Value: Offset);
142 Bitfield::set<ValueOpcode>(Packed&: Hash, Value: OpcodeHash);
143 Bitfield::set<ValueInstr>(Packed&: Hash, Value: InstrHash);
144 Bitfield::set<ValuePred>(Packed&: Hash, Value: PredHash);
145 Bitfield::set<ValueSucc>(Packed&: Hash, Value: SuccHash);
146 return Hash;
147 }
148
149 /// Compute a distance between two given blended hashes. The smaller the
150 /// distance, the more similar two blocks are. For identical basic blocks,
151 /// the distance is zero.
152 uint64_t distance(const BlendedBlockHash &BBH) const {
153 assert(OpcodeHash == BBH.OpcodeHash &&
154 "incorrect blended hash distance computation");
155 uint64_t Dist = 0;
156 // Account for NeighborHash
157 Dist += SuccHash == BBH.SuccHash ? 0 : 1;
158 Dist += PredHash == BBH.PredHash ? 0 : 1;
159 Dist <<= 16;
160 // Account for InstrHash
161 Dist += InstrHash == BBH.InstrHash ? 0 : 1;
162 Dist <<= 16;
163 // Account for Offset
164 Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
165 return Dist;
166 }
167
168 /// The offset of the basic block from the function start.
169 uint16_t Offset{0};
170 /// (Loose) Hash of the basic block instructions, excluding operands.
171 uint16_t OpcodeHash{0};
172 /// (Strong) Hash of the basic block instructions, including opcodes and
173 /// operands.
174 uint16_t InstrHash{0};
175 /// (Loose) Hashes of the predecessors of the basic block.
176 uint8_t PredHash{0};
177 /// (Loose) Hashes of the successors of the basic block.
178 uint8_t SuccHash{0};
179};
180
181/// The object is used to identify and match basic blocks in a BinaryFunction
182/// given their hashes computed on a binary built from several revisions behind
183/// release.
184class StaleMatcher {
185public:
186 /// Initialize stale matcher.
187 void init(const std::vector<FlowBlock *> &Blocks,
188 const std::vector<BlendedBlockHash> &Hashes) {
189 assert(Blocks.size() == Hashes.size() &&
190 "incorrect matcher initialization");
191 for (size_t I = 0; I < Blocks.size(); I++) {
192 FlowBlock *Block = Blocks[I];
193 uint16_t OpHash = Hashes[I].OpcodeHash;
194 OpHashToBlocks[OpHash].push_back(x: std::make_pair(x: Hashes[I], y&: Block));
195 }
196 }
197
198 /// Find the most similar block for a given hash.
199 const FlowBlock *matchBlock(BlendedBlockHash BlendedHash) const {
200 auto BlockIt = OpHashToBlocks.find(x: BlendedHash.OpcodeHash);
201 if (BlockIt == OpHashToBlocks.end())
202 return nullptr;
203 FlowBlock *BestBlock = nullptr;
204 uint64_t BestDist = std::numeric_limits<uint64_t>::max();
205 for (const auto &[Hash, Block] : BlockIt->second) {
206 uint64_t Dist = Hash.distance(BBH: BlendedHash);
207 if (BestBlock == nullptr || Dist < BestDist) {
208 BestDist = Dist;
209 BestBlock = Block;
210 }
211 }
212 return BestBlock;
213 }
214
215 /// Returns true if the two basic blocks (in the binary and in the profile)
216 /// corresponding to the given hashes are matched to each other with a high
217 /// confidence.
218 static bool isHighConfidenceMatch(BlendedBlockHash Hash1,
219 BlendedBlockHash Hash2) {
220 return Hash1.InstrHash == Hash2.InstrHash;
221 }
222
223private:
224 using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
225 std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
226};
227
228void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
229 if (size() == 0)
230 return;
231
232 assert(hasCFG() && "the function is expected to have CFG");
233
234 std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size());
235 std::vector<uint64_t> OpcodeHashes(BasicBlocks.size());
236 // Initialize hash components.
237 for (size_t I = 0; I < BasicBlocks.size(); I++) {
238 const BinaryBasicBlock *BB = BasicBlocks[I];
239 assert(BB->getIndex() == I && "incorrect block index");
240 BlendedHashes[I].Offset = BB->getOffset();
241 // Hashing complete instructions.
242 std::string InstrHashStr = hashBlock(
243 BC, BB: *BB, OperandHashFunc: [&](const MCOperand &Op) { return hashInstOperand(BC, Operand: Op); });
244 if (HashFunction == HashFunction::StdHash) {
245 uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr);
246 BlendedHashes[I].InstrHash = (uint16_t)hash_value(value: InstrHash);
247 } else if (HashFunction == HashFunction::XXH3) {
248 uint64_t InstrHash = llvm::xxh3_64bits(data: InstrHashStr);
249 BlendedHashes[I].InstrHash = (uint16_t)InstrHash;
250 } else {
251 llvm_unreachable("Unhandled HashFunction");
252 }
253 // Hashing opcodes.
254 std::string OpcodeHashStr = hashBlockLoose(BC, BB: *BB);
255 if (HashFunction == HashFunction::StdHash) {
256 OpcodeHashes[I] = std::hash<std::string>{}(OpcodeHashStr);
257 BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(value: OpcodeHashes[I]);
258 } else if (HashFunction == HashFunction::XXH3) {
259 OpcodeHashes[I] = llvm::xxh3_64bits(data: OpcodeHashStr);
260 BlendedHashes[I].OpcodeHash = (uint16_t)OpcodeHashes[I];
261 } else {
262 llvm_unreachable("Unhandled HashFunction");
263 }
264 }
265
266 // Initialize neighbor hash.
267 for (size_t I = 0; I < BasicBlocks.size(); I++) {
268 const BinaryBasicBlock *BB = BasicBlocks[I];
269 // Append hashes of successors.
270 uint64_t Hash = 0;
271 for (BinaryBasicBlock *SuccBB : BB->successors()) {
272 uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()];
273 Hash = hashing::detail::hash_16_bytes(low: Hash, high: SuccHash);
274 }
275 if (HashFunction == HashFunction::StdHash) {
276 // Compatibility with old behavior.
277 BlendedHashes[I].SuccHash = (uint8_t)hash_value(value: Hash);
278 } else {
279 BlendedHashes[I].SuccHash = (uint8_t)Hash;
280 }
281
282 // Append hashes of predecessors.
283 Hash = 0;
284 for (BinaryBasicBlock *PredBB : BB->predecessors()) {
285 uint64_t PredHash = OpcodeHashes[PredBB->getIndex()];
286 Hash = hashing::detail::hash_16_bytes(low: Hash, high: PredHash);
287 }
288 if (HashFunction == HashFunction::StdHash) {
289 // Compatibility with old behavior.
290 BlendedHashes[I].PredHash = (uint8_t)hash_value(value: Hash);
291 } else {
292 BlendedHashes[I].PredHash = (uint8_t)Hash;
293 }
294 }
295
296 // Assign hashes.
297 for (size_t I = 0; I < BasicBlocks.size(); I++) {
298 const BinaryBasicBlock *BB = BasicBlocks[I];
299 BB->setHash(BlendedHashes[I].combine());
300 }
301}
302
303/// Create a wrapper flow function to use with the profile inference algorithm,
304/// and initialize its jumps and metadata.
305FlowFunction
306createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
307 FlowFunction Func;
308
309 // Add a special "dummy" source so that there is always a unique entry point.
310 // Because of the extra source, for all other blocks in FlowFunction it holds
311 // that Block.Index == BB->getIndex() + 1
312 FlowBlock EntryBlock;
313 EntryBlock.Index = 0;
314 Func.Blocks.push_back(x: EntryBlock);
315
316 // Create FlowBlock for every basic block in the binary function
317 for (const BinaryBasicBlock *BB : BlockOrder) {
318 Func.Blocks.emplace_back();
319 FlowBlock &Block = Func.Blocks.back();
320 Block.Index = Func.Blocks.size() - 1;
321 (void)BB;
322 assert(Block.Index == BB->getIndex() + 1 &&
323 "incorrectly assigned basic block index");
324 }
325
326 // Create FlowJump for each jump between basic blocks in the binary function
327 std::vector<uint64_t> InDegree(Func.Blocks.size(), 0);
328 for (const BinaryBasicBlock *SrcBB : BlockOrder) {
329 std::unordered_set<const BinaryBasicBlock *> UniqueSuccs;
330 // Collect regular jumps
331 for (const BinaryBasicBlock *DstBB : SrcBB->successors()) {
332 // Ignoring parallel edges
333 if (UniqueSuccs.find(x: DstBB) != UniqueSuccs.end())
334 continue;
335
336 Func.Jumps.emplace_back();
337 FlowJump &Jump = Func.Jumps.back();
338 Jump.Source = SrcBB->getIndex() + 1;
339 Jump.Target = DstBB->getIndex() + 1;
340 InDegree[Jump.Target]++;
341 UniqueSuccs.insert(x: DstBB);
342 }
343 // Collect jumps to landing pads
344 for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) {
345 // Ignoring parallel edges
346 if (UniqueSuccs.find(x: DstBB) != UniqueSuccs.end())
347 continue;
348
349 Func.Jumps.emplace_back();
350 FlowJump &Jump = Func.Jumps.back();
351 Jump.Source = SrcBB->getIndex() + 1;
352 Jump.Target = DstBB->getIndex() + 1;
353 InDegree[Jump.Target]++;
354 UniqueSuccs.insert(x: DstBB);
355 }
356 }
357
358 // Add dummy edges to the extra sources. If there are multiple entry blocks,
359 // add an unlikely edge from 0 to the subsequent ones
360 assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors");
361 for (uint64_t I = 1; I < Func.Blocks.size(); I++) {
362 const BinaryBasicBlock *BB = BlockOrder[I - 1];
363 if (BB->isEntryPoint() || InDegree[I] == 0) {
364 Func.Jumps.emplace_back();
365 FlowJump &Jump = Func.Jumps.back();
366 Jump.Source = 0;
367 Jump.Target = I;
368 if (!BB->isEntryPoint())
369 Jump.IsUnlikely = true;
370 }
371 }
372
373 // Create necessary metadata for the flow function
374 for (FlowJump &Jump : Func.Jumps) {
375 Func.Blocks.at(n: Jump.Source).SuccJumps.push_back(x: &Jump);
376 Func.Blocks.at(n: Jump.Target).PredJumps.push_back(x: &Jump);
377 }
378 return Func;
379}
380
381/// Assign initial block/jump weights based on the stale profile data. The goal
382/// is to extract as much information from the stale profile as possible. Here
383/// we assume that each basic block is specified via a hash value computed from
384/// its content and the hashes of the unchanged basic blocks stay the same
385/// across different revisions of the binary.
386/// Whenever there is a count in the profile with the hash corresponding to one
387/// of the basic blocks in the binary, the count is "matched" to the block.
388/// Similarly, if both the source and the target of a count in the profile are
389/// matched to a jump in the binary, the count is recorded in CFG.
390void matchWeightsByHashes(BinaryContext &BC,
391 const BinaryFunction::BasicBlockOrderType &BlockOrder,
392 const yaml::bolt::BinaryFunctionProfile &YamlBF,
393 FlowFunction &Func) {
394 assert(Func.Blocks.size() == BlockOrder.size() + 1);
395
396 std::vector<FlowBlock *> Blocks;
397 std::vector<BlendedBlockHash> BlendedHashes;
398 for (uint64_t I = 0; I < BlockOrder.size(); I++) {
399 const BinaryBasicBlock *BB = BlockOrder[I];
400 assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
401 Blocks.push_back(x: &Func.Blocks[I + 1]);
402 BlendedBlockHash BlendedHash(BB->getHash());
403 BlendedHashes.push_back(x: BlendedHash);
404 LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
405 << Twine::utohexstr(BB->getHash()) << "\n");
406 }
407 StaleMatcher Matcher;
408 Matcher.init(Blocks, Hashes: BlendedHashes);
409
410 // Index in yaml profile => corresponding (matched) block
411 DenseMap<uint64_t, const FlowBlock *> MatchedBlocks;
412 // Match blocks from the profile to the blocks in CFG
413 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
414 assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile");
415 BlendedBlockHash YamlHash(YamlBB.Hash);
416 const FlowBlock *MatchedBlock = Matcher.matchBlock(BlendedHash: YamlHash);
417 // Always match the entry block.
418 if (MatchedBlock == nullptr && YamlBB.Index == 0)
419 MatchedBlock = Blocks[0];
420 if (MatchedBlock != nullptr) {
421 const BinaryBasicBlock *BB = BlockOrder[MatchedBlock->Index - 1];
422 MatchedBlocks[YamlBB.Index] = MatchedBlock;
423 BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1];
424 LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBB.Index << ")"
425 << " with hash " << Twine::utohexstr(YamlBB.Hash)
426 << " to BB (index = " << MatchedBlock->Index - 1 << ")"
427 << " with hash " << Twine::utohexstr(BinHash.combine())
428 << "\n");
429 // Update matching stats accounting for the matched block.
430 if (Matcher.isHighConfidenceMatch(Hash1: BinHash, Hash2: YamlHash)) {
431 ++BC.Stats.NumMatchedBlocks;
432 BC.Stats.MatchedSampleCount += YamlBB.ExecCount;
433 LLVM_DEBUG(dbgs() << " exact match\n");
434 } else {
435 LLVM_DEBUG(dbgs() << " loose match\n");
436 }
437 if (YamlBB.NumInstructions == BB->size())
438 ++BC.Stats.NumStaleBlocksWithEqualIcount;
439 } else {
440 LLVM_DEBUG(
441 dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index << ")"
442 << " with hash " << Twine::utohexstr(YamlBB.Hash) << "\n");
443 }
444
445 // Update matching stats.
446 ++BC.Stats.NumStaleBlocks;
447 BC.Stats.StaleSampleCount += YamlBB.ExecCount;
448 }
449
450 // Match jumps from the profile to the jumps from CFG
451 std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
452 std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
453 for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
454 for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
455 if (YamlSI.Count == 0)
456 continue;
457
458 // Try to find the jump for a given (src, dst) pair from the profile and
459 // assign the jump weight based on the profile count
460 const uint64_t SrcIndex = YamlBB.Index;
461 const uint64_t DstIndex = YamlSI.Index;
462
463 const FlowBlock *MatchedSrcBlock = MatchedBlocks.lookup(Val: SrcIndex);
464 const FlowBlock *MatchedDstBlock = MatchedBlocks.lookup(Val: DstIndex);
465
466 if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) {
467 // Find a jump between the two blocks
468 FlowJump *Jump = nullptr;
469 for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) {
470 if (SuccJump->Target == MatchedDstBlock->Index) {
471 Jump = SuccJump;
472 break;
473 }
474 }
475 // Assign the weight, if the corresponding jump is found
476 if (Jump != nullptr) {
477 Jump->Weight = YamlSI.Count;
478 Jump->HasUnknownWeight = false;
479 }
480 }
481 // Assign the weight for the src block, if it is found
482 if (MatchedSrcBlock != nullptr)
483 OutWeight[MatchedSrcBlock->Index] += YamlSI.Count;
484 // Assign the weight for the dst block, if it is found
485 if (MatchedDstBlock != nullptr)
486 InWeight[MatchedDstBlock->Index] += YamlSI.Count;
487 }
488 }
489
490 // Assign block counts based on in-/out- jumps
491 for (FlowBlock &Block : Func.Blocks) {
492 if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) {
493 assert(Block.HasUnknownWeight && "unmatched block with a positive count");
494 continue;
495 }
496 Block.HasUnknownWeight = false;
497 Block.Weight = std::max(a: OutWeight[Block.Index], b: InWeight[Block.Index]);
498 }
499}
500
501/// The function finds all blocks that are (i) reachable from the Entry block
502/// and (ii) do not have a path to an exit, and marks all such blocks 'cold'
503/// so that profi does not send any flow to such blocks.
504void preprocessUnreachableBlocks(FlowFunction &Func) {
505 const uint64_t NumBlocks = Func.Blocks.size();
506
507 // Start bfs from the source
508 std::queue<uint64_t> Queue;
509 std::vector<bool> VisitedEntry(NumBlocks, false);
510 for (uint64_t I = 0; I < NumBlocks; I++) {
511 FlowBlock &Block = Func.Blocks[I];
512 if (Block.isEntry()) {
513 Queue.push(x: I);
514 VisitedEntry[I] = true;
515 break;
516 }
517 }
518 while (!Queue.empty()) {
519 const uint64_t Src = Queue.front();
520 Queue.pop();
521 for (FlowJump *Jump : Func.Blocks[Src].SuccJumps) {
522 const uint64_t Dst = Jump->Target;
523 if (!VisitedEntry[Dst]) {
524 Queue.push(x: Dst);
525 VisitedEntry[Dst] = true;
526 }
527 }
528 }
529
530 // Start bfs from all sinks
531 std::vector<bool> VisitedExit(NumBlocks, false);
532 for (uint64_t I = 0; I < NumBlocks; I++) {
533 FlowBlock &Block = Func.Blocks[I];
534 if (Block.isExit() && VisitedEntry[I]) {
535 Queue.push(x: I);
536 VisitedExit[I] = true;
537 }
538 }
539 while (!Queue.empty()) {
540 const uint64_t Src = Queue.front();
541 Queue.pop();
542 for (FlowJump *Jump : Func.Blocks[Src].PredJumps) {
543 const uint64_t Dst = Jump->Source;
544 if (!VisitedExit[Dst]) {
545 Queue.push(x: Dst);
546 VisitedExit[Dst] = true;
547 }
548 }
549 }
550
551 // Make all blocks of zero weight so that flow is not sent
552 for (uint64_t I = 0; I < NumBlocks; I++) {
553 FlowBlock &Block = Func.Blocks[I];
554 if (Block.Weight == 0)
555 continue;
556 if (!VisitedEntry[I] || !VisitedExit[I]) {
557 Block.Weight = 0;
558 Block.HasUnknownWeight = true;
559 Block.IsUnlikely = true;
560 for (FlowJump *Jump : Block.SuccJumps) {
561 if (Jump->Source == Block.Index && Jump->Target == Block.Index) {
562 Jump->Weight = 0;
563 Jump->HasUnknownWeight = true;
564 Jump->IsUnlikely = true;
565 }
566 }
567 }
568 }
569}
570
571/// Decide if stale profile matching can be applied for a given function.
572/// Currently we skip inference for (very) large instances and for instances
573/// having "unexpected" control flow (e.g., having no sink basic blocks).
574bool canApplyInference(const FlowFunction &Func) {
575 if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
576 return false;
577
578 bool HasExitBlocks = llvm::any_of(
579 Range: Func.Blocks, P: [&](const FlowBlock &Block) { return Block.isExit(); });
580 if (!HasExitBlocks)
581 return false;
582
583 return true;
584}
585
586/// Apply the profile inference algorithm for a given flow function.
587void applyInference(FlowFunction &Func) {
588 ProfiParams Params;
589 // Set the params from the command-line flags.
590 Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution;
591 Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown;
592 Params.JoinIslands = opts::StaleMatchingJoinIslands;
593
594 Params.CostBlockInc = opts::StaleMatchingCostBlockInc;
595 Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc;
596 Params.CostBlockDec = opts::StaleMatchingCostBlockDec;
597 Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec;
598 Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc;
599
600 Params.CostJumpInc = opts::StaleMatchingCostJumpInc;
601 Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc;
602 Params.CostJumpDec = opts::StaleMatchingCostJumpDec;
603 Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec;
604 Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc;
605 Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc;
606
607 applyFlowInference(Params, Func);
608}
609
610/// Collect inferred counts from the flow function and update annotations in
611/// the binary function.
612void assignProfile(BinaryFunction &BF,
613 const BinaryFunction::BasicBlockOrderType &BlockOrder,
614 FlowFunction &Func) {
615 BinaryContext &BC = BF.getBinaryContext();
616
617 assert(Func.Blocks.size() == BlockOrder.size() + 1);
618 for (uint64_t I = 0; I < BlockOrder.size(); I++) {
619 FlowBlock &Block = Func.Blocks[I + 1];
620 BinaryBasicBlock *BB = BlockOrder[I];
621
622 // Update block's count
623 BB->setExecutionCount(Block.Flow);
624
625 // Update jump counts: (i) clean existing counts and then (ii) set new ones
626 auto BI = BB->branch_info_begin();
627 for (const BinaryBasicBlock *DstBB : BB->successors()) {
628 (void)DstBB;
629 BI->Count = 0;
630 BI->MispredictedCount = 0;
631 ++BI;
632 }
633 for (FlowJump *Jump : Block.SuccJumps) {
634 if (Jump->IsUnlikely)
635 continue;
636 if (Jump->Flow == 0)
637 continue;
638
639 BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1];
640 // Check if the edge corresponds to a regular jump or a landing pad
641 if (BB->getSuccessor(Label: SuccBB.getLabel())) {
642 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(Succ: SuccBB);
643 BI.Count += Jump->Flow;
644 } else {
645 BinaryBasicBlock *LP = BB->getLandingPad(Label: SuccBB.getLabel());
646 if (LP && LP->getKnownExecutionCount() < Jump->Flow)
647 LP->setExecutionCount(Jump->Flow);
648 }
649 }
650
651 // Update call-site annotations
652 auto setOrUpdateAnnotation = [&](MCInst &Instr, StringRef Name,
653 uint64_t Count) {
654 if (BC.MIB->hasAnnotation(Inst: Instr, Name))
655 BC.MIB->removeAnnotation(Inst&: Instr, Name);
656 // Do not add zero-count annotations
657 if (Count == 0)
658 return;
659 BC.MIB->addAnnotation(Inst&: Instr, Name, Val: Count);
660 };
661
662 for (MCInst &Instr : *BB) {
663 // Ignore pseudo instructions
664 if (BC.MIB->isPseudo(Inst: Instr))
665 continue;
666 // Ignore jump tables
667 const MCInst *LastInstr = BB->getLastNonPseudoInstr();
668 if (BC.MIB->getJumpTable(Inst: *LastInstr) && LastInstr == &Instr)
669 continue;
670
671 if (BC.MIB->isIndirectCall(Inst: Instr) || BC.MIB->isIndirectBranch(Inst: Instr)) {
672 auto &ICSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
673 Inst&: Instr, Name: "CallProfile");
674 if (!ICSP.empty()) {
675 // Try to evenly distribute the counts among the call sites
676 const uint64_t TotalCount = Block.Flow;
677 const uint64_t NumSites = ICSP.size();
678 for (uint64_t Idx = 0; Idx < ICSP.size(); Idx++) {
679 IndirectCallProfile &CSP = ICSP[Idx];
680 uint64_t CountPerSite = TotalCount / NumSites;
681 // When counts cannot be exactly distributed, increase by 1 the
682 // counts of the first (TotalCount % NumSites) call sites
683 if (Idx < TotalCount % NumSites)
684 CountPerSite++;
685 CSP.Count = CountPerSite;
686 }
687 } else {
688 ICSP.emplace_back(Args: nullptr, Args&: Block.Flow, Args: 0);
689 }
690 } else if (BC.MIB->getConditionalTailCall(Inst: Instr)) {
691 // We don't know exactly the number of times the conditional tail call
692 // is executed; conservatively, setting it to the count of the block
693 setOrUpdateAnnotation(Instr, "CTCTakenCount", Block.Flow);
694 BC.MIB->removeAnnotation(Inst&: Instr, Name: "CTCMispredCount");
695 } else if (BC.MIB->isCall(Inst: Instr)) {
696 setOrUpdateAnnotation(Instr, "Count", Block.Flow);
697 }
698 }
699 }
700
701 // Update function's execution count and mark the function inferred.
702 BF.setExecutionCount(Func.Blocks[0].Flow);
703 BF.setHasInferredProfile(true);
704}
705
706bool YAMLProfileReader::inferStaleProfile(
707 BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
708 if (!BF.hasCFG())
709 return false;
710
711 LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for "
712 << "\"" << BF.getPrintName() << "\"\n");
713
714 // Make sure that block hashes are up to date.
715 BF.computeBlockHashes(HashFunction: YamlBP.Header.HashFunction);
716
717 const BinaryFunction::BasicBlockOrderType BlockOrder(
718 BF.getLayout().block_begin(), BF.getLayout().block_end());
719
720 // Create a wrapper flow function to use with the profile inference algorithm.
721 FlowFunction Func = createFlowFunction(BlockOrder);
722
723 // Match as many block/jump counts from the stale profile as possible
724 matchWeightsByHashes(BC&: BF.getBinaryContext(), BlockOrder, YamlBF, Func);
725
726 // Adjust the flow function by marking unreachable blocks Unlikely so that
727 // they don't get any counts assigned.
728 preprocessUnreachableBlocks(Func);
729
730 // Check if profile inference can be applied for the instance.
731 if (!canApplyInference(Func))
732 return false;
733
734 // Apply the profile inference algorithm.
735 applyInference(Func);
736
737 // Collect inferred counts and update function annotations.
738 assignProfile(BF, BlockOrder, Func);
739
740 // As of now, we always mark the binary function having "correct" profile.
741 // In the future, we may discard the results for instances with poor inference
742 // metrics and keep such functions un-optimized.
743 return true;
744}
745
746} // end namespace bolt
747} // end namespace llvm
748

source code of bolt/lib/Profile/StaleProfileMatching.cpp