1//===- bolt/Profile/YAMLProfileWriter.cpp - YAML profile serializer -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "bolt/Profile/YAMLProfileWriter.h"
10#include "bolt/Core/BinaryBasicBlock.h"
11#include "bolt/Core/BinaryFunction.h"
12#include "bolt/Profile/BoltAddressTranslation.h"
13#include "bolt/Profile/DataAggregator.h"
14#include "bolt/Profile/ProfileReaderBase.h"
15#include "bolt/Rewrite/RewriteInstance.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include "llvm/MC/MCPseudoProbe.h"
18#include "llvm/Support/CommandLine.h"
19#include "llvm/Support/FileSystem.h"
20#include "llvm/Support/raw_ostream.h"
21
22#undef DEBUG_TYPE
23#define DEBUG_TYPE "bolt-prof"
24
25namespace opts {
26using namespace llvm;
27extern cl::opt<bool> ProfileUseDFS;
28cl::opt<bool> ProfileWritePseudoProbes(
29 "profile-write-pseudo-probes",
30 cl::desc("Use pseudo probes in profile generation"), cl::Hidden,
31 cl::cat(BoltOptCategory));
32} // namespace opts
33
34namespace llvm {
35namespace bolt {
36
37const BinaryFunction *YAMLProfileWriter::setCSIDestination(
38 const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
39 const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
40 uint32_t Offset) {
41 CSI.DestId = 0; // designated for unknown functions
42 CSI.EntryDiscriminator = 0;
43
44 if (Symbol) {
45 uint64_t EntryID = 0;
46 if (const BinaryFunction *Callee =
47 BC.getFunctionForSymbol(Symbol, EntryDesc: &EntryID)) {
48 if (BAT && BAT->isBATFunction(Address: Callee->getAddress()))
49 std::tie(args&: Callee, args&: EntryID) = BAT->translateSymbol(BC, Symbol: *Symbol, InputOffset: Offset);
50 else if (const BinaryBasicBlock *BB =
51 Callee->getBasicBlockContainingOffset(Offset))
52 BC.getFunctionForSymbol(Symbol: Callee->getSecondaryEntryPointSymbol(BB: *BB),
53 EntryDesc: &EntryID);
54 CSI.DestId = Callee->getFunctionNumber();
55 CSI.EntryDiscriminator = EntryID;
56 return Callee;
57 }
58 }
59 return nullptr;
60}
61
62std::vector<YAMLProfileWriter::InlineTreeNode>
63YAMLProfileWriter::collectInlineTree(
64 const MCPseudoProbeDecoder &Decoder,
65 const MCDecodedPseudoProbeInlineTree &Root) {
66 auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) {
67 return Decoder.getFuncDescForGUID(GUID: Node.Guid)->FuncHash;
68 };
69 std::vector<InlineTreeNode> InlineTree(
70 {InlineTreeNode{.InlineTree: &Root, .GUID: Root.Guid, .Hash: getHash(Root), .ParentId: 0, .InlineSite: 0}});
71 uint32_t ParentId = 0;
72 while (ParentId != InlineTree.size()) {
73 const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree;
74 for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren())
75 InlineTree.emplace_back(
76 args: InlineTreeNode{.InlineTree: &Child, .GUID: Child.Guid, .Hash: getHash(Child), .ParentId: ParentId,
77 .InlineSite: std::get<1>(t: Child.getInlineSite())});
78 ++ParentId;
79 }
80
81 return InlineTree;
82}
83
84std::tuple<yaml::bolt::ProfilePseudoProbeDesc,
85 YAMLProfileWriter::InlineTreeDesc>
86YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) {
87 yaml::bolt::ProfilePseudoProbeDesc Desc;
88 InlineTreeDesc InlineTree;
89
90 for (const MCDecodedPseudoProbeInlineTree &TopLev :
91 Decoder.getDummyInlineRoot().getChildren())
92 InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
93
94 for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap())
95 ++InlineTree.HashIdxMap[FuncDesc.FuncHash];
96
97 InlineTree.GUIDIdxMap.reserve(n: Decoder.getGUID2FuncDescMap().size());
98 for (const auto &Node : Decoder.getInlineTreeVec())
99 ++InlineTree.GUIDIdxMap[Node.Guid];
100
101 std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec;
102 GUIDFreqVec.reserve(n: InlineTree.GUIDIdxMap.size());
103 for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap)
104 GUIDFreqVec.emplace_back(args: Cnt, args: GUID);
105 llvm::sort(C&: GUIDFreqVec);
106
107 std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec;
108 HashFreqVec.reserve(n: InlineTree.HashIdxMap.size());
109 for (const auto [Hash, Cnt] : InlineTree.HashIdxMap)
110 HashFreqVec.emplace_back(args: Cnt, args: Hash);
111 llvm::sort(C&: HashFreqVec);
112
113 uint32_t Index = 0;
114 Desc.Hash.reserve(n: HashFreqVec.size());
115 for (uint64_t Hash : llvm::make_second_range(c: llvm::reverse(C&: HashFreqVec))) {
116 Desc.Hash.emplace_back(args&: Hash);
117 InlineTree.HashIdxMap[Hash] = Index++;
118 }
119
120 Index = 0;
121 Desc.GUID.reserve(n: GUIDFreqVec.size());
122 for (uint64_t GUID : llvm::make_second_range(c: llvm::reverse(C&: GUIDFreqVec))) {
123 Desc.GUID.emplace_back(args&: GUID);
124 InlineTree.GUIDIdxMap[GUID] = Index++;
125 uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash;
126 Desc.GUIDHashIdx.emplace_back(args&: InlineTree.HashIdxMap[Hash]);
127 }
128
129 return {Desc, InlineTree};
130}
131
132std::vector<yaml::bolt::PseudoProbeInfo>
133YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) {
134 struct BlockProbeInfoHasher {
135 size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const {
136 return llvm::hash_combine(args: llvm::hash_combine_range(R: BPI.BlockProbes),
137 args: llvm::hash_combine_range(R: BPI.CallProbes),
138 args: llvm::hash_combine_range(R: BPI.IndCallProbes));
139 }
140 };
141
142 // Check identical BlockProbeInfo structs and merge them
143 std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>,
144 BlockProbeInfoHasher>
145 BPIToNodes;
146 for (auto &[NodeId, Probes] : NodeProbes) {
147 yaml::bolt::PseudoProbeInfo BPI;
148 BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end());
149 BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end());
150 BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end());
151 BPIToNodes[BPI].push_back(x: NodeId);
152 }
153
154 auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) {
155 for (auto Id : Ids)
156 if (Id > 64)
157 Vec.emplace_back(Id);
158 else
159 Mask |= 1ull << (Id - 1);
160 };
161
162 // Add to YAML with merged nodes/block mask optimizations
163 std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes;
164 YamlProbes.reserve(n: BPIToNodes.size());
165 for (const auto &[BPI, Nodes] : BPIToNodes) {
166 auto &YamlBPI = YamlProbes.emplace_back(args: yaml::bolt::PseudoProbeInfo());
167 YamlBPI.CallProbes = BPI.CallProbes;
168 YamlBPI.IndCallProbes = BPI.IndCallProbes;
169 if (Nodes.size() == 1)
170 YamlBPI.InlineTreeIndex = Nodes.front();
171 else
172 YamlBPI.InlineTreeNodes = Nodes;
173 handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask);
174 }
175 return YamlProbes;
176}
177
178std::tuple<std::vector<yaml::bolt::InlineTreeNode>,
179 YAMLProfileWriter::InlineTreeMapTy>
180YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
181 const InlineTreeDesc &InlineTree,
182 uint64_t GUID) {
183 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
184 std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree;
185 auto It = InlineTree.TopLevelGUIDToInlineTree.find(x: GUID);
186 if (It == InlineTree.TopLevelGUIDToInlineTree.end())
187 return {YamlInlineTree, InlineTreeNodeId};
188 const MCDecodedPseudoProbeInlineTree *Root = It->second;
189 assert(Root && "Malformed TopLevelGUIDToInlineTree");
190 uint32_t Index = 0;
191 uint32_t PrevParent = 0;
192 uint32_t PrevGUIDIdx = 0;
193 for (const auto &Node : collectInlineTree(Decoder, Root: *Root)) {
194 InlineTreeNodeId[Node.InlineTree] = Index++;
195 auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(x: Node.GUID);
196 assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap");
197 uint32_t GUIDIdx = GUIDIdxIt->second;
198 if (GUIDIdx == PrevGUIDIdx)
199 GUIDIdx = UINT32_MAX;
200 else
201 PrevGUIDIdx = GUIDIdx;
202 YamlInlineTree.emplace_back(args: yaml::bolt::InlineTreeNode{
203 .ParentIndexDelta: Node.ParentId - PrevParent, .CallSiteProbe: Node.InlineSite, .GUIDIndex: GUIDIdx, .GUID: 0, .Hash: 0});
204 PrevParent = Node.ParentId;
205 }
206 return {YamlInlineTree, InlineTreeNodeId};
207}
208
209yaml::bolt::BinaryFunctionProfile
210YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
211 const InlineTreeDesc &InlineTree,
212 const BoltAddressTranslation *BAT) {
213 yaml::bolt::BinaryFunctionProfile YamlBF;
214 const BinaryContext &BC = BF.getBinaryContext();
215 const MCPseudoProbeDecoder *PseudoProbeDecoder =
216 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
217
218 const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_BRANCH;
219
220 // Prepare function and block hashes
221 BF.computeHash(UseDFS);
222 BF.computeBlockHashes();
223
224 YamlBF.Name = DataAggregator::getLocationName(Func: BF, BAT);
225 YamlBF.Id = BF.getFunctionNumber();
226 YamlBF.Hash = BF.getHash();
227 YamlBF.NumBasicBlocks = BF.size();
228 YamlBF.ExecCount = BF.getKnownExecutionCount();
229 YamlBF.ExternEntryCount = BF.getExternEntryCount();
230 DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
231 if (PseudoProbeDecoder && BF.getGUID()) {
232 std::tie(args&: YamlBF.InlineTree, args&: InlineTreeNodeId) =
233 convertBFInlineTree(Decoder: *PseudoProbeDecoder, InlineTree, GUID: BF.getGUID());
234 }
235
236 BinaryFunction::BasicBlockOrderType Order;
237 llvm::copy(Range: UseDFS ? BF.dfs() : BF.getLayout().blocks(),
238 Out: std::back_inserter(x&: Order));
239
240 const FunctionLayout Layout = BF.getLayout();
241 Layout.updateLayoutIndices(Order);
242
243 for (const BinaryBasicBlock *BB : Order) {
244 yaml::bolt::BinaryBasicBlockProfile YamlBB;
245 YamlBB.Index = BB->getLayoutIndex();
246 YamlBB.NumInstructions = BB->getNumNonPseudos();
247 YamlBB.Hash = BB->getHash();
248
249 if (!LBRProfile) {
250 YamlBB.EventCount = BB->getKnownExecutionCount();
251 if (YamlBB.EventCount)
252 YamlBF.Blocks.emplace_back(args&: YamlBB);
253 continue;
254 }
255
256 YamlBB.ExecCount = BB->getKnownExecutionCount();
257
258 for (const MCInst &Instr : *BB) {
259 if (!BC.MIB->isCall(Inst: Instr) && !BC.MIB->isIndirectBranch(Inst: Instr))
260 continue;
261
262 SmallVector<std::pair<StringRef, yaml::bolt::CallSiteInfo>> CSTargets;
263 yaml::bolt::CallSiteInfo CSI;
264 std::optional<uint32_t> Offset = BC.MIB->getOffset(Inst: Instr);
265 if (!Offset || *Offset < BB->getInputOffset())
266 continue;
267 CSI.Offset = *Offset - BB->getInputOffset();
268
269 if (BC.MIB->isIndirectCall(Inst: Instr) || BC.MIB->isIndirectBranch(Inst: Instr)) {
270 const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
271 Inst: Instr, Name: "CallProfile");
272 if (!ICSP)
273 continue;
274 for (const IndirectCallProfile &CSP : ICSP.get()) {
275 StringRef TargetName = "";
276 const BinaryFunction *Callee =
277 setCSIDestination(BC, CSI, Symbol: CSP.Symbol, BAT);
278 if (Callee)
279 TargetName = Callee->getOneName();
280 CSI.Count = CSP.Count;
281 CSI.Mispreds = CSP.Mispreds;
282 CSTargets.emplace_back(Args&: TargetName, Args&: CSI);
283 }
284 } else { // direct call or a tail call
285 StringRef TargetName = "";
286 const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Inst: Instr);
287 const BinaryFunction *const Callee =
288 setCSIDestination(BC, CSI, Symbol: CalleeSymbol, BAT);
289 if (Callee)
290 TargetName = Callee->getOneName();
291
292 auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) {
293 return BC.MIB->getAnnotationWithDefault(Inst: Instr, Name: Ann, DefaultValue: 0ull);
294 };
295 if (BC.MIB->getConditionalTailCall(Inst: Instr)) {
296 CSI.Count = getAnnotationWithDefault(Instr, "CTCTakenCount");
297 CSI.Mispreds = getAnnotationWithDefault(Instr, "CTCMispredCount");
298 } else {
299 CSI.Count = getAnnotationWithDefault(Instr, "Count");
300 }
301
302 if (CSI.Count)
303 CSTargets.emplace_back(Args&: TargetName, Args&: CSI);
304 }
305 // Sort targets in a similar way to getBranchData, see Location::operator<
306 llvm::sort(C&: CSTargets, Comp: [](const auto &RHS, const auto &LHS) {
307 return std::tie(RHS.first, RHS.second.Offset) <
308 std::tie(LHS.first, LHS.second.Offset);
309 });
310 for (auto &KV : CSTargets)
311 YamlBB.CallSites.push_back(x: KV.second);
312 }
313
314 // Skip printing if there's no profile data for non-entry basic block.
315 // Include landing pads with non-zero execution count.
316 if (YamlBB.CallSites.empty() && !BB->isEntryPoint() &&
317 !(BB->isLandingPad() && BB->getKnownExecutionCount() != 0)) {
318 // Include blocks having successors or predecessors with positive counts.
319 uint64_t SuccessorExecCount = 0;
320 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
321 BB->branch_info())
322 SuccessorExecCount += BranchInfo.Count;
323 uint64_t PredecessorExecCount = 0;
324 for (auto Pred : BB->predecessors())
325 PredecessorExecCount += Pred->getBranchInfo(Succ: *BB).Count;
326 if (!SuccessorExecCount && !PredecessorExecCount)
327 continue;
328 }
329
330 auto BranchInfo = BB->branch_info_begin();
331 for (const BinaryBasicBlock *Successor : BB->successors()) {
332 yaml::bolt::SuccessorInfo YamlSI;
333 YamlSI.Index = Successor->getLayoutIndex();
334 YamlSI.Count = BranchInfo->Count;
335 YamlSI.Mispreds = BranchInfo->MispredictedCount;
336
337 YamlBB.Successors.emplace_back(args&: YamlSI);
338
339 ++BranchInfo;
340 }
341
342 if (PseudoProbeDecoder) {
343 const AddressProbesMap &ProbeMap =
344 PseudoProbeDecoder->getAddress2ProbesMap();
345 const uint64_t FuncAddr = BF.getAddress();
346 const std::pair<uint64_t, uint64_t> &BlockRange =
347 BB->getInputAddressRange();
348 const std::pair<uint64_t, uint64_t> BlockAddrRange = {
349 FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
350 auto Probes = ProbeMap.find(From: BlockAddrRange.first, To: BlockAddrRange.second);
351 YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId);
352 }
353
354 YamlBF.Blocks.emplace_back(args&: YamlBB);
355 }
356 return YamlBF;
357}
358
359std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
360 const BinaryContext &BC = RI.getBinaryContext();
361 const auto &Functions = BC.getBinaryFunctions();
362
363 std::error_code EC;
364 OS = std::make_unique<raw_fd_ostream>(args&: Filename, args&: EC, args: sys::fs::OF_None);
365 if (EC) {
366 errs() << "BOLT-WARNING: " << EC.message() << " : unable to open "
367 << Filename << " for output.\n";
368 return EC;
369 }
370
371 yaml::bolt::BinaryProfile BP;
372
373 // Fill out the header info.
374 BP.Header.Version = 1;
375 BP.Header.FileName = std::string(BC.getFilename());
376 std::optional<StringRef> BuildID = BC.getFileBuildID();
377 BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
378 BP.Header.Origin = std::string(RI.getProfileReader()->getReaderName());
379 BP.Header.IsDFSOrder = opts::ProfileUseDFS;
380 BP.Header.HashFunction = HashFunction::Default;
381
382 StringSet<> EventNames = RI.getProfileReader()->getEventNames();
383 if (!EventNames.empty()) {
384 std::string Sep;
385 for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) {
386 BP.Header.EventNames += Sep + EventEntry.first().str();
387 Sep = ",";
388 }
389 }
390
391 // Make sure the profile is consistent across all functions.
392 uint16_t ProfileFlags = BinaryFunction::PF_NONE;
393 for (const auto &BFI : Functions) {
394 const BinaryFunction &BF = BFI.second;
395 if (BF.hasProfile() && !BF.empty()) {
396 assert(BF.getProfileFlags() != BinaryFunction::PF_NONE);
397 if (ProfileFlags == BinaryFunction::PF_NONE)
398 ProfileFlags = BF.getProfileFlags();
399
400 assert(BF.getProfileFlags() == ProfileFlags &&
401 "expected consistent profile flags across all functions");
402 }
403 }
404 BP.Header.Flags = ProfileFlags;
405
406 // Add probe inline tree nodes.
407 InlineTreeDesc InlineTree;
408 if (const MCPseudoProbeDecoder *Decoder =
409 opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr)
410 std::tie(args&: BP.PseudoProbeDesc, args&: InlineTree) = convertPseudoProbeDesc(Decoder: *Decoder);
411
412 // Add all function objects.
413 for (const auto &BFI : Functions) {
414 const BinaryFunction &BF = BFI.second;
415 if (BF.hasProfile()) {
416 if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource())
417 continue;
418
419 BP.Functions.emplace_back(args: convert(BF, UseDFS: opts::ProfileUseDFS, InlineTree));
420 }
421 }
422
423 // Write the profile.
424 yaml::Output Out(*OS, nullptr, 0);
425 Out << BP;
426
427 return std::error_code();
428}
429
430} // namespace bolt
431} // namespace llvm
432

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of bolt/lib/Profile/YAMLProfileWriter.cpp