StaleProfileMatching.cpp source code [bolt/lib/Profile/StaleProfileMatching.cpp]

1	//===- bolt/Profile/StaleProfileMatching.cpp - Profile data matching ----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// BOLT often has to deal with profiles collected on binaries built from several
10	// revisions behind release. As a result, a certain percentage of functions is
11	// considered stale and not optimized. This file implements an ability to match
12	// profile to functions that are not 100% binary identical, and thus, increasing
13	// the optimization coverage and boost the performance of applications.
14	//
15	// The algorithm consists of two phases: matching and inference:
16	// - At the matching phase, we try to "guess" as many block and jump counts from
17	// the stale profile as possible. To this end, the content of each basic block
18	// is hashed and stored in the (yaml) profile. When BOLT optimizes a binary,
19	// it computes block hashes and identifies the corresponding entries in the
20	// stale profile. It yields a partial profile for every CFG in the binary.
21	// - At the inference phase, we employ a network flow-based algorithm (profi) to
22	// reconstruct "realistic" block and jump counts from the partial profile
23	// generated at the first stage. In practice, we don't always produce proper
24	// profile data but the majority (e.g., >90%) of CFGs get the correct counts.
25	//
26	//===----------------------------------------------------------------------===//
27
28	#include "bolt/Core/HashUtilities.h"
29	#include "bolt/Profile/YAMLProfileReader.h"
30	#include "llvm/ADT/Bitfields.h"
31	#include "llvm/ADT/Hashing.h"
32	#include "llvm/MC/MCPseudoProbe.h"
33	#include "llvm/Support/CommandLine.h"
34	#include "llvm/Support/Timer.h"
35	#include "llvm/Support/xxhash.h"
36	#include "llvm/Transforms/Utils/SampleProfileInference.h"
37
38	#include <queue>
39
40	using namespace llvm;
41
42	#undef DEBUG_TYPE
43	#define DEBUG_TYPE "bolt-prof"
44
45	namespace opts {
46
47	extern cl::opt<bool> TimeRewrite;
48	extern cl::OptionCategory BoltOptCategory;
49
50	cl::opt<bool>
51	InferStaleProfile("infer-stale-profile",
52	cl::desc ("Infer counts from stale profile data."),
53	cl::init(Val: false), cl::Hidden, cl::cat (BoltOptCategory));
54
55	static cl::opt<unsigned> StaleMatchingMinMatchedBlock(
56	"stale-matching-min-matched-block",
57	cl::desc ("Percentage threshold of matched basic blocks at which stale "
58	"profile inference is executed."),
59	cl::init(Val: `0`), cl::Hidden, cl::cat (BoltOptCategory));
60
61	static cl::opt<unsigned> StaleMatchingMaxFuncSize(
62	"stale-matching-max-func-size",
63	cl::desc ("The maximum size of a function to consider for inference."),
64	cl::init(Val: `10000`), cl::Hidden, cl::cat (BoltOptCategory));
65
66	// Parameters of the profile inference algorithm. The default values are tuned
67	// on several benchmarks.
68	static cl::opt<bool> StaleMatchingEvenFlowDistribution(
69	"stale-matching-even-flow-distribution",
70	cl::desc ("Try to evenly distribute flow when there are multiple equally "
71	"likely options."),
72	cl::init(Val: true), cl::ReallyHidden, cl::cat (BoltOptCategory));
73
74	static cl::opt<bool> StaleMatchingRebalanceUnknown(
75	"stale-matching-rebalance-unknown",
76	cl::desc ("Evenly re-distribute flow among unknown subgraphs."),
77	cl::init(Val: false), cl::ReallyHidden, cl::cat (BoltOptCategory));
78
79	static cl::opt<bool> StaleMatchingJoinIslands(
80	"stale-matching-join-islands",
81	cl::desc ("Join isolated components having positive flow."), cl::init(Val: true),
82	cl::ReallyHidden, cl::cat (BoltOptCategory));
83
84	static cl::opt<unsigned> StaleMatchingCostBlockInc(
85	"stale-matching-cost-block-inc",
86	cl::desc ("The cost of increasing a block count by one."), cl::init(Val: `150`),
87	cl::ReallyHidden, cl::cat (BoltOptCategory));
88
89	static cl::opt<unsigned> StaleMatchingCostBlockDec(
90	"stale-matching-cost-block-dec",
91	cl::desc ("The cost of decreasing a block count by one."), cl::init(Val: `150`),
92	cl::ReallyHidden, cl::cat (BoltOptCategory));
93
94	static cl::opt<unsigned> StaleMatchingCostJumpInc(
95	"stale-matching-cost-jump-inc",
96	cl::desc ("The cost of increasing a jump count by one."), cl::init(Val: `150`),
97	cl::ReallyHidden, cl::cat (BoltOptCategory));
98
99	static cl::opt<unsigned> StaleMatchingCostJumpDec(
100	"stale-matching-cost-jump-dec",
101	cl::desc ("The cost of decreasing a jump count by one."), cl::init(Val: `150`),
102	cl::ReallyHidden, cl::cat (BoltOptCategory));
103
104	static cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
105	"stale-matching-cost-block-unknown-inc",
106	cl::desc ("The cost of increasing an unknown block count by one."),
107	cl::init(Val: `1`), cl::ReallyHidden, cl::cat (BoltOptCategory));
108
109	static cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
110	"stale-matching-cost-jump-unknown-inc",
111	cl::desc ("The cost of increasing an unknown jump count by one."),
112	cl::init(Val: `140`), cl::ReallyHidden, cl::cat (BoltOptCategory));
113
114	static cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
115	"stale-matching-cost-jump-unknown-ft-inc",
116	cl::desc (
117	"The cost of increasing an unknown fall-through jump count by one."),
118	cl::init(Val: `3`), cl::ReallyHidden, cl::cat (BoltOptCategory));
119
120	cl::opt<bool> StaleMatchingWithPseudoProbes(
121	"stale-matching-with-pseudo-probes",
122	cl::desc ("Turns on stale matching with block pseudo probes."),
123	cl::init(Val: false), cl::ReallyHidden, cl::cat (BoltOptCategory));
124
125	} // namespace opts
126
127	namespace llvm {
128	namespace bolt {
129
130	/// An object wrapping several components of a basic block hash. The combined
131	/// (blended) hash is represented and stored as one uint64_t, while individual
132	/// components are of smaller size (e.g., uint16_t or uint8_t).
133	struct BlendedBlockHash {
134	private:
135	using ValueOffset = Bitfield::Element<uint16_t, `0`, `16`>;
136	using ValueOpcode = Bitfield::Element<uint16_t, `16`, `16`>;
137	using ValueInstr = Bitfield::Element<uint16_t, `32`, `16`>;
138	using ValuePred = Bitfield::Element<uint8_t, `48`, `8`>;
139	using ValueSucc = Bitfield::Element<uint8_t, `56`, `8`>;
140
141	public:
142	explicit BlendedBlockHash() {}
143
144	explicit BlendedBlockHash(uint64_t Hash) {
145	Offset = Bitfield::get<ValueOffset>(Packed: Hash);
146	OpcodeHash = Bitfield::get<ValueOpcode>(Packed: Hash);
147	InstrHash = Bitfield::get<ValueInstr>(Packed: Hash);
148	PredHash = Bitfield::get<ValuePred>(Packed: Hash);
149	SuccHash = Bitfield::get<ValueSucc>(Packed: Hash);
150	}
151
152	/// Combine the blended hash into uint64_t.
153	uint64_t combine() const {
154	uint64_t Hash = `0`;
155	Bitfield::set<ValueOffset>(Packed&: Hash, Value: Offset);
156	Bitfield::set<ValueOpcode>(Packed&: Hash, Value: OpcodeHash);
157	Bitfield::set<ValueInstr>(Packed&: Hash, Value: InstrHash);
158	Bitfield::set<ValuePred>(Packed&: Hash, Value: PredHash);
159	Bitfield::set<ValueSucc>(Packed&: Hash, Value: SuccHash);
160	return Hash;
161	}
162
163	/// Compute a distance between two given blended hashes. The smaller the
164	/// distance, the more similar two blocks are. For identical basic blocks,
165	/// the distance is zero.
166	uint64_t distance(const BlendedBlockHash &BBH) const {
167	assert(OpcodeHash == BBH.OpcodeHash &&
168	"incorrect blended hash distance computation");
169	uint64_t Dist = `0`;
170	// Account for NeighborHash
171	Dist += SuccHash == BBH.SuccHash ? `0` : `1`;
172	Dist += PredHash == BBH.PredHash ? `0` : `1`;
173	Dist <<= `16`;
174	// Account for InstrHash
175	Dist += InstrHash == BBH.InstrHash ? `0` : `1`;
176	Dist <<= `16`;
177	// Account for Offset
178	Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
179	return Dist;
180	}
181
182	/// The offset of the basic block from the function start.
183	uint16_t Offset{`0`};
184	/// (Loose) Hash of the basic block instructions, excluding operands.
185	uint16_t OpcodeHash{`0`};
186	/// (Strong) Hash of the basic block instructions, including opcodes and
187	/// operands.
188	uint16_t InstrHash{`0`};
189	/// (Loose) Hashes of the predecessors of the basic block.
190	uint8_t PredHash{`0`};
191	/// (Loose) Hashes of the successors of the basic block.
192	uint8_t SuccHash{`0`};
193	};
194
195	/// The object is used to identify and match basic blocks in a BinaryFunction
196	/// given their hashes computed on a binary built from several revisions behind
197	/// release.
198	class StaleMatcher {
199	public:
200	/// Initialize stale matcher.
201	void init(const std::vector<FlowBlock *> &Blocks,
202	const std::vector<BlendedBlockHash> &Hashes,
203	const std::vector<uint64_t> &CallHashes) {
204	assert(Blocks.size() == Hashes.size() &&
205	Hashes.size() == CallHashes.size() &&
206	"incorrect matcher initialization");
207	for (size_t I = `0`; I < Blocks.size(); I++) {
208	FlowBlock *Block = Blocks [I];
209	uint16_t OpHash = Hashes [I].OpcodeHash;
210	OpHashToBlocks [OpHash].push_back(x: std::make_pair(x: Hashes [I], y&: Block));
211	if (CallHashes [I])
212	CallHashToBlocks [CallHashes [I]].push_back(
213	x: std::make_pair(x: Hashes [I], y&: Block));
214	}
215	}
216
217	/// Creates a mapping from a pseudo probe to a flow block.
218	void mapProbeToBB(const MCDecodedPseudoProbe Probe, FlowBlock Block) {
219	BBPseudoProbeToBlock [Probe] = Block;
220	}
221
222	enum MatchMethod : char {
223	MATCH_EXACT = `0`,
224	MATCH_PROBE_EXACT,
225	MATCH_PROBE_LOOSE,
226	MATCH_OPCODE,
227	MATCH_CALL,
228	NO_MATCH
229	};
230
231	/// Find the most similar flow block for a profile block given blended hash.
232	std::pair<const FlowBlock *, MatchMethod>
233	matchBlockStrict(BlendedBlockHash BlendedHash) {
234	const auto &[Block, ExactHash] = matchWithOpcodes(BlendedHash);
235	if (Block && ExactHash)
236	return {Block, MATCH_EXACT};
237	return {nullptr, NO_MATCH};
238	}
239
240	/// Find the most similar flow block for a profile block given pseudo probes.
241	std::pair<const FlowBlock *, MatchMethod> matchBlockProbe(
242	const ArrayRef<yaml::bolt::PseudoProbeInfo> PseudoProbes,
243	const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) {
244	const auto &[ProbeBlock, ExactProbe] =
245	matchWithPseudoProbes(BlockPseudoProbes: PseudoProbes, InlineTreeNodeMap);
246	if (ProbeBlock)
247	return {ProbeBlock, ExactProbe ? MATCH_PROBE_EXACT : MATCH_PROBE_LOOSE};
248	return {nullptr, NO_MATCH};
249	}
250
251	/// Find the most similar flow block for a profile block given its hashes.
252	std::pair<const FlowBlock *, MatchMethod>
253	matchBlockLoose(BlendedBlockHash BlendedHash, uint64_t CallHash) {
254	if (const FlowBlock *CallBlock = matchWithCalls(BlendedHash, CallHash))
255	return {CallBlock, MATCH_CALL};
256	if (const FlowBlock *OpcodeBlock = matchWithOpcodes(BlendedHash).first)
257	return {OpcodeBlock, MATCH_OPCODE};
258	return {nullptr, NO_MATCH};
259	}
260
261	/// Returns true if the two basic blocks (in the binary and in the profile)
262	/// corresponding to the given hashes are matched to each other with a high
263	/// confidence.
264	static bool isHighConfidenceMatch(BlendedBlockHash Hash1,
265	BlendedBlockHash Hash2) {
266	return Hash1.InstrHash == Hash2.InstrHash;
267	}
268
269	private:
270	using HashBlockPairType = std::pair<BlendedBlockHash, FlowBlock *>;
271	std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
272	std::unordered_map<uint64_t, std::vector<HashBlockPairType>> CallHashToBlocks;
273	DenseMap<const MCDecodedPseudoProbe , FlowBlock > BBPseudoProbeToBlock;
274
275	// Uses OpcodeHash to find the most similar block for a given hash.
276	std::pair<const FlowBlock , bool*>
277	matchWithOpcodes(BlendedBlockHash BlendedHash) const {
278	auto BlockIt = OpHashToBlocks.find(x: BlendedHash.OpcodeHash);
279	if (BlockIt == OpHashToBlocks.end())
280	return {nullptr, false};
281	FlowBlock BestBlock = nullptr*;
282	uint64_t BestDist = std::numeric_limits<uint64_t>::max();
283	BlendedBlockHash BestHash;
284	for (const auto &[Hash, Block] : BlockIt ->second) {
285	uint64_t Dist = Hash.distance(BBH: BlendedHash);
286	if (BestBlock == nullptr \|\| Dist < BestDist) {
287	BestDist = Dist;
288	BestBlock = Block;
289	BestHash = Hash;
290	}
291	}
292	return {BestBlock, isHighConfidenceMatch(Hash1: BestHash, Hash2: BlendedHash)};
293	}
294
295	// Uses CallHash to find the most similar block for a given hash.
296	const FlowBlock *matchWithCalls(BlendedBlockHash BlendedHash,
297	uint64_t CallHash) const {
298	if (!CallHash)
299	return nullptr;
300	auto BlockIt = CallHashToBlocks.find(x: CallHash);
301	if (BlockIt == CallHashToBlocks.end())
302	return nullptr;
303	FlowBlock BestBlock = nullptr*;
304	uint64_t BestDist = std::numeric_limits<uint64_t>::max();
305	for (const auto &[Hash, Block] : BlockIt ->second) {
306	uint64_t Dist = Hash.OpcodeHash > BlendedHash.OpcodeHash
307	? Hash.OpcodeHash - BlendedHash.OpcodeHash
308	: BlendedHash.OpcodeHash - Hash.OpcodeHash;
309	if (BestBlock == nullptr \|\| Dist < BestDist) {
310	BestDist = Dist;
311	BestBlock = Block;
312	}
313	}
314	return BestBlock;
315	}
316
317	/// Matches a profile block with a binary block based on pseudo probes.
318	/// Returns the best matching block (or nullptr) and whether the match is
319	/// unambiguous.
320	std::pair<const FlowBlock , bool*> matchWithPseudoProbes(
321	const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes,
322	const YAMLProfileReader::InlineTreeNodeMapTy &InlineTreeNodeMap) const {
323
324	if (!opts::StaleMatchingWithPseudoProbes)
325	return {nullptr, false};
326
327	DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
328
329	auto matchProfileProbeToBlock = [&](uint32_t NodeId,
330	uint64_t ProbeId) -> const FlowBlock * {
331	const MCDecodedPseudoProbeInlineTree *BinaryNode =
332	InlineTreeNodeMap.getInlineTreeNode(ProfileInlineTreeNodeId: NodeId);
333	if (!BinaryNode)
334	return nullptr;
335	const MCDecodedPseudoProbe Dummy(`0`, ProbeId, PseudoProbeType::Block, `0`, `0`,
336	nullptr);
337	ArrayRef<MCDecodedPseudoProbe> BinaryProbes = BinaryNode->getProbes();
338	auto BinaryProbeIt = llvm::lower_bound(
339	Range&: BinaryProbes, Value: Dummy, C: [](const auto &LHS, const auto &RHS) {
340	return LHS.getIndex() < RHS.getIndex();
341	});
342	if (BinaryProbeIt == BinaryNode->getProbes().end() \|\|
343	BinaryProbeIt->getIndex() != ProbeId)
344	return nullptr;
345	auto It = BBPseudoProbeToBlock.find(Val: &*BinaryProbeIt);
346	if (It == BBPseudoProbeToBlock.end())
347	return nullptr;
348	return It ->second;
349	};
350
351	auto matchPseudoProbeInfo = [&](const yaml::bolt::PseudoProbeInfo
352	&ProfileProbe,
353	uint32_t NodeId) {
354	for (uint64_t Index = `0`; Index < `64`; ++Index)
355	if (ProfileProbe.BlockMask & `1ull` << Index)
356	++FlowBlockMatchCount [matchProfileProbeToBlock(NodeId, Index + `1`)];
357	for (const auto &ProfileProbes :
358	{ProfileProbe.BlockProbes, ProfileProbe.IndCallProbes,
359	ProfileProbe.CallProbes})
360	for (uint64_t ProfileProbe : ProfileProbes)
361	++FlowBlockMatchCount [matchProfileProbeToBlock(NodeId, ProfileProbe)];
362	};
363
364	for (const yaml::bolt::PseudoProbeInfo &ProfileProbe : BlockPseudoProbes) {
365	if (!ProfileProbe.InlineTreeNodes.empty())
366	for (uint32_t ProfileInlineTreeNode : ProfileProbe.InlineTreeNodes)
367	matchPseudoProbeInfo(ProfileProbe, ProfileInlineTreeNode);
368	else
369	matchPseudoProbeInfo(ProfileProbe, ProfileProbe.InlineTreeIndex);
370	}
371	uint32_t BestMatchCount = `0`;
372	uint32_t TotalMatchCount = `0`;
373	const FlowBlock BestMatchBlock = nullptr*;
374	for (const auto &[FlowBlock, Count] : FlowBlockMatchCount) {
375	TotalMatchCount += Count;
376	if (Count < BestMatchCount \|\| (Count == BestMatchCount && BestMatchBlock))
377	continue;
378	BestMatchBlock = FlowBlock;
379	BestMatchCount = Count;
380	}
381	return {BestMatchBlock, BestMatchCount == TotalMatchCount};
382	}
383	};
384
385	void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
386	if (size() == `0`)
387	return;
388
389	assert(hasCFG() && "the function is expected to have CFG");
390
391	std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size());
392	std::vector<uint64_t> OpcodeHashes(BasicBlocks.size());
393	// Initialize hash components.
394	for (size_t I = `0`; I < BasicBlocks.size(); I++) {
395	const BinaryBasicBlock *BB = BasicBlocks [I];
396	assert(BB->getIndex() == I && "incorrect block index");
397	BlendedHashes [I].Offset = BB->getOffset();
398	// Hashing complete instructions.
399	std::string InstrHashStr = hashBlock(
400	BC, BB: BB, OperandHashFunc: [&](const* MCOperand &Op) { return hashInstOperand(BC, Operand: Op); });
401	if (HashFunction == HashFunction::StdHash) {
402	uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr);
403	BlendedHashes [I].InstrHash = (uint16_t)hash_value(value: InstrHash);
404	} else if (HashFunction == HashFunction::XXH3) {
405	uint64_t InstrHash = llvm::xxh3_64bits(data: InstrHashStr);
406	BlendedHashes [I].InstrHash = (uint16_t)InstrHash;
407	} else {
408	llvm_unreachable("Unhandled HashFunction");
409	}
410	// Hashing opcodes.
411	std::string OpcodeHashStr = hashBlockLoose(BC, BB: *BB);
412	if (HashFunction == HashFunction::StdHash) {
413	OpcodeHashes [I] = std::hash<std::string>{}(OpcodeHashStr);
414	BlendedHashes [I].OpcodeHash = (uint16_t)hash_value(value: OpcodeHashes [I]);
415	} else if (HashFunction == HashFunction::XXH3) {
416	OpcodeHashes [I] = llvm::xxh3_64bits(data: OpcodeHashStr);
417	BlendedHashes [I].OpcodeHash = (uint16_t)OpcodeHashes [I];
418	} else {
419	llvm_unreachable("Unhandled HashFunction");
420	}
421	}
422
423	// Initialize neighbor hash.
424	for (size_t I = `0`; I < BasicBlocks.size(); I++) {
425	const BinaryBasicBlock *BB = BasicBlocks [I];
426	// Append hashes of successors.
427	uint64_t Hash = `0`;
428	for (BinaryBasicBlock *SuccBB : BB->successors()) {
429	uint64_t SuccHash = OpcodeHashes [SuccBB->getIndex()];
430	Hash = hashing::detail::hash_16_bytes(low: Hash, high: SuccHash);
431	}
432	if (HashFunction == HashFunction::StdHash) {
433	// Compatibility with old behavior.
434	BlendedHashes [I].SuccHash = (uint8_t)hash_value(value: Hash);
435	} else {
436	BlendedHashes [I].SuccHash = (uint8_t)Hash;
437	}
438
439	// Append hashes of predecessors.
440	Hash = `0`;
441	for (BinaryBasicBlock *PredBB : BB->predecessors()) {
442	uint64_t PredHash = OpcodeHashes [PredBB->getIndex()];
443	Hash = hashing::detail::hash_16_bytes(low: Hash, high: PredHash);
444	}
445	if (HashFunction == HashFunction::StdHash) {
446	// Compatibility with old behavior.
447	BlendedHashes [I].PredHash = (uint8_t)hash_value(value: Hash);
448	} else {
449	BlendedHashes [I].PredHash = (uint8_t)Hash;
450	}
451	}
452
453	// Assign hashes.
454	for (size_t I = `0`; I < BasicBlocks.size(); I++) {
455	const BinaryBasicBlock *BB = BasicBlocks [I];
456	BB->setHash(BlendedHashes [I].combine());
457	}
458	}
459	// TODO: mediate the difference between flow function construction here in BOLT
460	// and in the compiler by splitting blocks with exception throwing calls at the
461	// call and adding the landing pad as the successor.
462	/// Create a wrapper flow function to use with the profile inference algorithm,
463	/// and initialize its jumps and metadata.
464	FlowFunction
465	createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
466	FlowFunction Func;
467
468	// Add a special "dummy" source so that there is always a unique entry point.
469	FlowBlock EntryBlock;
470	EntryBlock.Index = `0`;
471	Func.Blocks.push_back(x: EntryBlock);
472
473	// Create FlowBlock for every basic block in the binary function.
474	for (const BinaryBasicBlock *BB : BlockOrder) {
475	Func.Blocks.emplace_back();
476	FlowBlock &Block = Func.Blocks.back();
477	Block.Index = Func.Blocks.size() - `1`;
478	(void)BB;
479	assert(Block.Index == BB->getIndex() + `1` &&
480	"incorrectly assigned basic block index");
481	}
482
483	// Add a special "dummy" sink block so there is always a unique sink.
484	FlowBlock SinkBlock;
485	SinkBlock.Index = Func.Blocks.size();
486	Func.Blocks.push_back(x: SinkBlock);
487
488	// Create FlowJump for each jump between basic blocks in the binary function.
489	std::vector<uint64_t> InDegree(Func.Blocks.size(), `0`);
490	for (const BinaryBasicBlock *SrcBB : BlockOrder) {
491	std::unordered_set<const BinaryBasicBlock *> UniqueSuccs;
492	// Collect regular jumps
493	for (const BinaryBasicBlock *DstBB : SrcBB->successors()) {
494	// Ignoring parallel edges
495	if (UniqueSuccs.find(x: DstBB) != UniqueSuccs.end())
496	continue;
497
498	Func.Jumps.emplace_back();
499	FlowJump &Jump = Func.Jumps.back();
500	Jump.Source = SrcBB->getIndex() + `1`;
501	Jump.Target = DstBB->getIndex() + `1`;
502	InDegree [Jump.Target]++;
503	UniqueSuccs.insert(x: DstBB);
504	}
505	// TODO: set jump from exit block to landing pad to Unlikely.
506	// If the block is an exit, add a dummy edge from it to the sink block.
507	if (UniqueSuccs.empty()) {
508	Func.Jumps.emplace_back();
509	FlowJump &Jump = Func.Jumps.back();
510	Jump.Source = SrcBB->getIndex() + `1`;
511	Jump.Target = Func.Blocks.size() - `1`;
512	InDegree [Jump.Target]++;
513	}
514
515	// Collect jumps to landing pads
516	for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) {
517	// Ignoring parallel edges
518	if (UniqueSuccs.find(x: DstBB) != UniqueSuccs.end())
519	continue;
520
521	Func.Jumps.emplace_back();
522	FlowJump &Jump = Func.Jumps.back();
523	Jump.Source = SrcBB->getIndex() + `1`;
524	Jump.Target = DstBB->getIndex() + `1`;
525	InDegree [Jump.Target]++;
526	UniqueSuccs.insert(x: DstBB);
527	}
528	}
529
530	// Add dummy edges to the extra sources. If there are multiple entry blocks,
531	// add an unlikely edge from 0 to the subsequent ones. Skips the sink block.
532	assert(InDegree[`0`] == `0` && "dummy entry blocks shouldn't have predecessors");
533	for (uint64_t I = `1`; I < Func.Blocks.size() - `1`; I++) {
534	const BinaryBasicBlock *BB = BlockOrder [I - `1`];
535	if (BB->isEntryPoint() \|\| InDegree [I] == `0`) {
536	Func.Jumps.emplace_back();
537	FlowJump &Jump = Func.Jumps.back();
538	Jump.Source = `0`;
539	Jump.Target = I;
540	if (!BB->isEntryPoint())
541	Jump.IsUnlikely = true;
542	}
543	}
544
545	// Create necessary metadata for the flow function
546	for (FlowJump &Jump : Func.Jumps) {
547	assert(Jump.Source < Func.Blocks.size());
548	Func.Blocks [Jump.Source].SuccJumps.push_back(x: &Jump);
549	assert(Jump.Target < Func.Blocks.size());
550	Func.Blocks [Jump.Target].PredJumps.push_back(x: &Jump);
551	}
552	return Func;
553	}
554
555	/// Assign initial block/jump weights based on the stale profile data. The goal
556	/// is to extract as much information from the stale profile as possible. Here
557	/// we assume that each basic block is specified via a hash value computed from
558	/// its content and the hashes of the unchanged basic blocks stay the same
559	/// across different revisions of the binary. Blocks may also have pseudo probe
560	/// information in the profile and the binary which is used for matching.
561	/// Whenever there is a count in the profile with the hash corresponding to one
562	/// of the basic blocks in the binary, the count is "matched" to the block.
563	/// Similarly, if both the source and the target of a count in the profile are
564	/// matched to a jump in the binary, the count is recorded in CFG.
565	size_t matchWeights(
566	BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
567	const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func,
568	HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
569	const BinaryFunction &BF,
570	const ArrayRef<YAMLProfileReader::ProbeMatchSpec> ProbeMatchSpecs) {
571
572	assert(Func.Blocks.size() == BlockOrder.size() + `2`);
573
574	std::vector<uint64_t> CallHashes;
575	std::vector<FlowBlock *> Blocks;
576	std::vector<BlendedBlockHash> BlendedHashes;
577	for (uint64_t I = `0`; I < BlockOrder.size(); I++) {
578	const BinaryBasicBlock *BB = BlockOrder [I];
579	assert(BB->getHash() != `0` && "empty hash of BinaryBasicBlock");
580
581	std::string CallHashStr = hashBlockCalls(BC, BB: *BB);
582	if (CallHashStr.empty()) {
583	CallHashes.push_back(x: `0`);
584	} else {
585	if (HashFunction == HashFunction::StdHash)
586	CallHashes.push_back(x: std::hash<std::string>{}(CallHashStr));
587	else if (HashFunction == HashFunction::XXH3)
588	CallHashes.push_back(x: llvm::xxh3_64bits(data: CallHashStr));
589	else
590	llvm_unreachable("Unhandled HashFunction");
591	}
592
593	Blocks.push_back(x: &Func.Blocks [I + `1`]);
594	BlendedBlockHash BlendedHash(BB->getHash());
595	BlendedHashes.push_back(x: BlendedHash);
596	LLVM_DEBUG(dbgs() << "BB with index " << I << " has hash = "
597	<< Twine::utohexstr(BB->getHash()) << "\n");
598	}
599	StaleMatcher Matcher;
600	// Collects function pseudo probes for use in the StaleMatcher.
601	if (opts::StaleMatchingWithPseudoProbes) {
602	const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
603	assert(Decoder &&
604	"If pseudo probes are in use, pseudo probe decoder should exist");
605	const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap();
606	const uint64_t FuncAddr = BF.getAddress();
607	for (const MCDecodedPseudoProbe &Probe :
608	ProbeMap.find(From: FuncAddr, To: FuncAddr + BF.getSize()))
609	if (const BinaryBasicBlock *BB =
610	BF.getBasicBlockContainingOffset(Offset: Probe.getAddress() - FuncAddr))
611	Matcher.mapProbeToBB(Probe: &Probe, Block: Blocks [BB->getIndex()]);
612	}
613	Matcher.init(Blocks, Hashes: BlendedHashes, CallHashes);
614
615	using FlowBlockTy =
616	std::pair<const FlowBlock , const* yaml::bolt::BinaryBasicBlockProfile *>;
617	using ProfileBlockMatchMap = DenseMap<uint32_t, FlowBlockTy>;
618	// Binary profile => block index => matched block + its block profile
619	DenseMap<const yaml::bolt::BinaryFunctionProfile *, ProfileBlockMatchMap>
620	MatchedBlocks;
621
622	// Map of FlowBlock and matching method.
623	DenseMap<const FlowBlock *, StaleMatcher::MatchMethod> MatchedFlowBlocks;
624
625	auto addMatchedBlock =
626	[&](std::pair<const FlowBlock *, StaleMatcher::MatchMethod> BlockMethod,
627	const yaml::bolt::BinaryFunctionProfile &YamlBP,
628	const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
629	const auto &[MatchedBlock, Method] = BlockMethod;
630	if (!MatchedBlock)
631	return;
632	// Don't override earlier matches
633	if (MatchedFlowBlocks.contains(Val: MatchedBlock))
634	return;
635	MatchedFlowBlocks.try_emplace(Key: MatchedBlock, Args: Method);
636	MatchedBlocks [&YamlBP][YamlBB.Index] = {MatchedBlock, &YamlBB};
637	};
638
639	// Match blocks from the profile to the blocks in CFG by strict hash.
640	for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
641	// Update matching stats.
642	++BC.Stats.NumStaleBlocks;
643	BC.Stats.StaleSampleCount += YamlBB.ExecCount;
644
645	assert(YamlBB.Hash != `0` && "empty hash of BinaryBasicBlockProfile");
646	BlendedBlockHash YamlHash(YamlBB.Hash);
647	addMatchedBlock (Matcher.matchBlockStrict(BlendedHash: YamlHash), YamlBF, YamlBB);
648	}
649	// Match blocks from the profile to the blocks in CFG by pseudo probes.
650	for (const auto &[InlineNodeMap, YamlBP] : ProbeMatchSpecs) {
651	for (const yaml::bolt::BinaryBasicBlockProfile &BB : YamlBP.get().Blocks)
652	if (!BB.PseudoProbes.empty())
653	addMatchedBlock (Matcher.matchBlockProbe(PseudoProbes: BB.PseudoProbes, InlineTreeNodeMap: InlineNodeMap),
654	YamlBP, BB);
655	}
656	// Match blocks from the profile to the blocks in CFG with loose methods.
657	for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
658	assert(YamlBB.Hash != `0` && "empty hash of BinaryBasicBlockProfile");
659	BlendedBlockHash YamlHash(YamlBB.Hash);
660
661	std::string CallHashStr = hashBlockCalls(IdToYamlFunction: IdToYamlBF, YamlBB);
662	uint64_t CallHash = `0`;
663	if (!CallHashStr.empty()) {
664	if (HashFunction == HashFunction::StdHash)
665	CallHash = std::hash<std::string>{}(CallHashStr);
666	else if (HashFunction == HashFunction::XXH3)
667	CallHash = llvm::xxh3_64bits(data: CallHashStr);
668	else
669	llvm_unreachable("Unhandled HashFunction");
670	}
671	auto [MatchedBlock, Method] = Matcher.matchBlockLoose(BlendedHash: YamlHash, CallHash);
672	if (MatchedBlock == nullptr && YamlBB.Index == `0`) {
673	MatchedBlock = Blocks [`0`];
674	// Report as loose match
675	Method = StaleMatcher::MATCH_OPCODE;
676	}
677	if (!MatchedBlock) {
678	LLVM_DEBUG(dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index
679	<< ")" << " with hash " << Twine::utohexstr(YamlBB.Hash)
680	<< "\n");
681	continue;
682	}
683	addMatchedBlock ({MatchedBlock, Method}, YamlBF, YamlBB);
684	}
685
686	// Match jumps from the profile to the jumps from CFG
687	std::vector<uint64_t> OutWeight(Func.Blocks.size(), `0`);
688	std::vector<uint64_t> InWeight(Func.Blocks.size(), `0`);
689
690	for (const auto &[YamlBF, MatchMap] : MatchedBlocks) {
691	for (const auto &[YamlBBIdx, FlowBlockProfile] : MatchMap) {
692	const auto &[MatchedBlock, YamlBB] = FlowBlockProfile;
693	StaleMatcher::MatchMethod Method = MatchedFlowBlocks.lookup(Val: MatchedBlock);
694	BlendedBlockHash BinHash = BlendedHashes [MatchedBlock->Index - `1`];
695	LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBBIdx << ")"
696	<< " with hash " << Twine::utohexstr(YamlBB->Hash)
697	<< " to BB (index = " << MatchedBlock->Index - `1` << ")"
698	<< " with hash " << Twine::utohexstr(BinHash.combine())
699	<< "\n");
700	(void)BinHash;
701	uint64_t ExecCount = YamlBB->ExecCount;
702	// Update matching stats accounting for the matched block.
703	switch (Method) {
704	case StaleMatcher::MATCH_EXACT:
705	++BC.Stats.NumExactMatchedBlocks;
706	BC.Stats.ExactMatchedSampleCount += ExecCount;
707	LLVM_DEBUG(dbgs() << " exact match\n");
708	break;
709	case StaleMatcher::MATCH_PROBE_EXACT:
710	++BC.Stats.NumPseudoProbeExactMatchedBlocks;
711	BC.Stats.PseudoProbeExactMatchedSampleCount += ExecCount;
712	LLVM_DEBUG(dbgs() << " exact pseudo probe match\n");
713	break;
714	case StaleMatcher::MATCH_PROBE_LOOSE:
715	++BC.Stats.NumPseudoProbeLooseMatchedBlocks;
716	BC.Stats.PseudoProbeLooseMatchedSampleCount += ExecCount;
717	LLVM_DEBUG(dbgs() << " loose pseudo probe match\n");
718	break;
719	case StaleMatcher::MATCH_CALL:
720	++BC.Stats.NumCallMatchedBlocks;
721	BC.Stats.CallMatchedSampleCount += ExecCount;
722	LLVM_DEBUG(dbgs() << " call match\n");
723	break;
724	case StaleMatcher::MATCH_OPCODE:
725	++BC.Stats.NumLooseMatchedBlocks;
726	BC.Stats.LooseMatchedSampleCount += ExecCount;
727	LLVM_DEBUG(dbgs() << " loose match\n");
728	break;
729	case StaleMatcher::NO_MATCH:
730	LLVM_DEBUG(dbgs() << " no match\n");
731	}
732	}
733
734	for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF->Blocks) {
735	for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) {
736	if (YamlSI.Count == `0`)
737	continue;
738
739	// Try to find the jump for a given (src, dst) pair from the profile and
740	// assign the jump weight based on the profile count
741	const uint64_t SrcIndex = YamlBB.Index;
742	const uint64_t DstIndex = YamlSI.Index;
743
744	const FlowBlock *MatchedSrcBlock = MatchMap.lookup(Val: SrcIndex).first;
745	const FlowBlock *MatchedDstBlock = MatchMap.lookup(Val: DstIndex).first;
746
747	if (MatchedSrcBlock != nullptr && MatchedDstBlock != nullptr) {
748	// Find a jump between the two blocks
749	FlowJump Jump = nullptr*;
750	for (FlowJump *SuccJump : MatchedSrcBlock->SuccJumps) {
751	if (SuccJump->Target == MatchedDstBlock->Index) {
752	Jump = SuccJump;
753	break;
754	}
755	}
756	// Assign the weight, if the corresponding jump is found
757	if (Jump != nullptr) {
758	Jump->Weight = YamlSI.Count;
759	Jump->HasUnknownWeight = false;
760	}
761	}
762	// Assign the weight for the src block, if it is found
763	if (MatchedSrcBlock != nullptr)
764	OutWeight [MatchedSrcBlock->Index] += YamlSI.Count;
765	// Assign the weight for the dst block, if it is found
766	if (MatchedDstBlock != nullptr)
767	InWeight [MatchedDstBlock->Index] += YamlSI.Count;
768	}
769	}
770	}
771
772	// Assign block counts based on in-/out- jumps
773	for (FlowBlock &Block : Func.Blocks) {
774	if (OutWeight [Block.Index] == `0` && InWeight [Block.Index] == `0`) {
775	assert(Block.HasUnknownWeight && "unmatched block with a positive count");
776	continue;
777	}
778	Block.HasUnknownWeight = false;
779	Block.Weight = std::max(a: OutWeight [Block.Index], b: InWeight [Block.Index]);
780	}
781
782	return MatchedBlocks [&YamlBF].size();
783	}
784
785	/// The function finds all blocks that are (i) reachable from the Entry block
786	/// and (ii) do not have a path to an exit, and marks all such blocks 'cold'
787	/// so that profi does not send any flow to such blocks.
788	void preprocessUnreachableBlocks(FlowFunction &Func) {
789	const uint64_t NumBlocks = Func.Blocks.size();
790
791	// Start bfs from the source
792	std::queue<uint64_t> Queue;
793	std::vector<bool> VisitedEntry(NumBlocks, false);
794	for (uint64_t I = `0`; I < NumBlocks; I++) {
795	FlowBlock &Block = Func.Blocks [I];
796	if (Block.isEntry()) {
797	Queue.push(x: I);
798	VisitedEntry [I] = true;
799	break;
800	}
801	}
802	while (!Queue.empty()) {
803	const uint64_t Src = Queue.front();
804	Queue.pop();
805	for (FlowJump *Jump : Func.Blocks [Src].SuccJumps) {
806	const uint64_t Dst = Jump->Target;
807	if (!VisitedEntry [Dst]) {
808	Queue.push(x: Dst);
809	VisitedEntry [Dst] = true;
810	}
811	}
812	}
813
814	// Start bfs from all sinks
815	std::vector<bool> VisitedExit(NumBlocks, false);
816	for (uint64_t I = `0`; I < NumBlocks; I++) {
817	FlowBlock &Block = Func.Blocks [I];
818	if (Block.isExit() && VisitedEntry [I]) {
819	Queue.push(x: I);
820	VisitedExit [I] = true;
821	}
822	}
823	while (!Queue.empty()) {
824	const uint64_t Src = Queue.front();
825	Queue.pop();
826	for (FlowJump *Jump : Func.Blocks [Src].PredJumps) {
827	const uint64_t Dst = Jump->Source;
828	if (!VisitedExit [Dst]) {
829	Queue.push(x: Dst);
830	VisitedExit [Dst] = true;
831	}
832	}
833	}
834
835	// Make all blocks of zero weight so that flow is not sent
836	for (uint64_t I = `0`; I < NumBlocks; I++) {
837	FlowBlock &Block = Func.Blocks [I];
838	if (Block.Weight == `0`)
839	continue;
840	if (!VisitedEntry [I] \|\| !VisitedExit [I]) {
841	Block.Weight = `0`;
842	Block.HasUnknownWeight = true;
843	Block.IsUnlikely = true;
844	for (FlowJump *Jump : Block.SuccJumps) {
845	if (Jump->Source == Block.Index && Jump->Target == Block.Index) {
846	Jump->Weight = `0`;
847	Jump->HasUnknownWeight = true;
848	Jump->IsUnlikely = true;
849	}
850	}
851	}
852	}
853	}
854
855	/// Decide if stale profile matching can be applied for a given function.
856	/// Currently we skip inference for (very) large instances and for instances
857	/// having "unexpected" control flow (e.g., having no sink basic blocks).
858	bool canApplyInference(const FlowFunction &Func,
859	const yaml::bolt::BinaryFunctionProfile &YamlBF,
860	const uint64_t &MatchedBlocks) {
861	if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
862	return false;
863
864	if (MatchedBlocks * `100` <
865	opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
866	return false;
867
868	// Returns false if the artificial sink block has no predecessors meaning
869	// there are no exit blocks.
870	if (Func.Blocks [Func.Blocks.size() - `1`].isEntry())
871	return false;
872
873	return true;
874	}
875
876	/// Apply the profile inference algorithm for a given flow function.
877	void applyInference(FlowFunction &Func) {
878	ProfiParams Params;
879	// Set the params from the command-line flags.
880	Params.EvenFlowDistribution = opts::StaleMatchingEvenFlowDistribution;
881	Params.RebalanceUnknown = opts::StaleMatchingRebalanceUnknown;
882	Params.JoinIslands = opts::StaleMatchingJoinIslands;
883
884	Params.CostBlockInc = opts::StaleMatchingCostBlockInc;
885	Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc;
886	Params.CostBlockDec = opts::StaleMatchingCostBlockDec;
887	Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec;
888	Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc;
889
890	Params.CostJumpInc = opts::StaleMatchingCostJumpInc;
891	Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc;
892	Params.CostJumpDec = opts::StaleMatchingCostJumpDec;
893	Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec;
894	Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc;
895	Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc;
896
897	applyFlowInference(Params, Func);
898	}
899
900	/// Collect inferred counts from the flow function and update annotations in
901	/// the binary function.
902	void assignProfile(BinaryFunction &BF,
903	const BinaryFunction::BasicBlockOrderType &BlockOrder,
904	FlowFunction &Func) {
905	BinaryContext &BC = BF.getBinaryContext();
906
907	assert(Func.Blocks.size() == BlockOrder.size() + `2`);
908	for (uint64_t I = `0`; I < BlockOrder.size(); I++) {
909	FlowBlock &Block = Func.Blocks [I + `1`];
910	BinaryBasicBlock *BB = BlockOrder [I];
911
912	// Update block's count
913	BB->setExecutionCount(Block.Flow);
914
915	// Update jump counts: (i) clean existing counts and then (ii) set new ones
916	auto BI = BB->branch_info_begin();
917	for (const BinaryBasicBlock *DstBB : BB->successors()) {
918	(void)DstBB;
919	BI->Count = `0`;
920	BI->MispredictedCount = `0`;
921	++BI;
922	}
923	for (FlowJump *Jump : Block.SuccJumps) {
924	if (Jump->IsUnlikely)
925	continue;
926	if (Jump->Flow == `0`)
927	continue;
928
929	// Skips the artificial sink block.
930	if (Jump->Target == Func.Blocks.size() - `1`)
931	continue;
932	BinaryBasicBlock &SuccBB = *BlockOrder [Jump->Target - `1`];
933	// Check if the edge corresponds to a regular jump or a landing pad
934	if (BB->getSuccessor(Label: SuccBB.getLabel())) {
935	BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(Succ: SuccBB);
936	BI.Count += Jump->Flow;
937	} else {
938	BinaryBasicBlock *LP = BB->getLandingPad(Label: SuccBB.getLabel());
939	if (LP && LP->getKnownExecutionCount() < Jump->Flow)
940	LP->setExecutionCount(Jump->Flow);
941	}
942	}
943
944	// Update call-site annotations
945	auto setOrUpdateAnnotation = [&](MCInst &Instr, StringRef Name,
946	uint64_t Count) {
947	if (BC.MIB ->hasAnnotation(Inst: Instr, Name))
948	BC.MIB ->removeAnnotation(Inst&: Instr, Name);
949	// Do not add zero-count annotations
950	if (Count == `0`)
951	return;
952	BC.MIB ->addAnnotation(Inst&: Instr, Name, Val: Count);
953	};
954
955	for (MCInst &Instr : *BB) {
956	// Ignore pseudo instructions
957	if (BC.MIB ->isPseudo(Inst: Instr))
958	continue;
959	// Ignore jump tables
960	const MCInst *LastInstr = BB->getLastNonPseudoInstr();
961	if (BC.MIB ->getJumpTable(Inst: *LastInstr) && LastInstr == &Instr)
962	continue;
963
964	if (BC.MIB ->isIndirectCall(Inst: Instr) \|\| BC.MIB ->isIndirectBranch(Inst: Instr)) {
965	auto &ICSP = BC.MIB ->getOrCreateAnnotationAs<IndirectCallSiteProfile>(
966	Inst&: Instr, Name: "CallProfile");
967	if (!ICSP.empty()) {
968	// Try to evenly distribute the counts among the call sites
969	const uint64_t TotalCount = Block.Flow;
970	const uint64_t NumSites = ICSP.size();
971	for (uint64_t Idx = `0`; Idx < ICSP.size(); Idx++) {
972	IndirectCallProfile &CSP = ICSP [Idx];
973	uint64_t CountPerSite = TotalCount / NumSites;
974	// When counts cannot be exactly distributed, increase by 1 the
975	// counts of the first (TotalCount % NumSites) call sites
976	if (Idx < TotalCount % NumSites)
977	CountPerSite++;
978	CSP.Count = CountPerSite;
979	}
980	} else {
981	ICSP.emplace_back(Args: nullptr, Args&: Block.Flow, Args: `0`);
982	}
983	} else if (BC.MIB ->getConditionalTailCall(Inst: Instr)) {
984	// We don't know exactly the number of times the conditional tail call
985	// is executed; conservatively, setting it to the count of the block
986	setOrUpdateAnnotation (Instr, "CTCTakenCount", Block.Flow);
987	BC.MIB ->removeAnnotation(Inst&: Instr, Name: "CTCMispredCount");
988	} else if (BC.MIB ->isCall(Inst: Instr)) {
989	setOrUpdateAnnotation (Instr, "Count", Block.Flow);
990	}
991	}
992	}
993
994	// Update function's execution count and mark the function inferred.
995	BF.setExecutionCount(Func.Blocks [`0`].Flow);
996	BF.setHasInferredProfile(true);
997	}
998
999	bool YAMLProfileReader::inferStaleProfile(
1000	BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF,
1001	const ArrayRef<ProbeMatchSpec> ProbeMatchSpecs) {
1002
1003	NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite",
1004	"Rewrite passes", opts::TimeRewrite);
1005
1006	if (!BF.hasCFG())
1007	return false;
1008
1009	LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for "
1010	<< "\"" << BF.getPrintName() << "\"\n");
1011
1012	// Make sure that block hashes are up to date.
1013	BF.computeBlockHashes(HashFunction: YamlBP.Header.HashFunction);
1014
1015	const BinaryFunction::BasicBlockOrderType BlockOrder(
1016	BF.getLayout().block_begin(), BF.getLayout().block_end());
1017
1018	// Tracks the number of matched blocks.
1019
1020	// Create a wrapper flow function to use with the profile inference algorithm.
1021	FlowFunction Func = createFlowFunction(BlockOrder);
1022
1023	// Match as many block/jump counts from the stale profile as possible
1024	size_t MatchedBlocks =
1025	matchWeights(BC&: BF.getBinaryContext(), BlockOrder, YamlBF, Func,
1026	HashFunction: YamlBP.Header.HashFunction, IdToYamlBF&: IdToYamLBF, BF, ProbeMatchSpecs);
1027
1028	// Adjust the flow function by marking unreachable blocks Unlikely so that
1029	// they don't get any counts assigned.
1030	preprocessUnreachableBlocks(Func);
1031
1032	// Check if profile inference can be applied for the instance.
1033	if (!canApplyInference(Func, YamlBF, MatchedBlocks))
1034	return false;
1035
1036	// Apply the profile inference algorithm.
1037	applyInference(Func);
1038
1039	// Collect inferred counts and update function annotations.
1040	assignProfile(BF, BlockOrder, Func);
1041
1042	// As of now, we always mark the binary function having "correct" profile.
1043	// In the future, we may discard the results for instances with poor inference
1044	// metrics and keep such functions un-optimized.
1045	return true;
1046	}
1047
1048	} // end namespace bolt
1049	} // end namespace llvm
1050

source code of bolt/lib/Profile/StaleProfileMatching.cpp