1 | //===- bolt/Profile/YAMLProfileWriter.cpp - YAML profile serializer -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "bolt/Profile/YAMLProfileWriter.h" |
10 | #include "bolt/Core/BinaryBasicBlock.h" |
11 | #include "bolt/Core/BinaryFunction.h" |
12 | #include "bolt/Profile/BoltAddressTranslation.h" |
13 | #include "bolt/Profile/DataAggregator.h" |
14 | #include "bolt/Profile/ProfileReaderBase.h" |
15 | #include "bolt/Rewrite/RewriteInstance.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include "llvm/MC/MCPseudoProbe.h" |
18 | #include "llvm/Support/CommandLine.h" |
19 | #include "llvm/Support/FileSystem.h" |
20 | #include "llvm/Support/raw_ostream.h" |
21 | |
22 | #undef DEBUG_TYPE |
23 | #define DEBUG_TYPE "bolt-prof" |
24 | |
25 | namespace opts { |
26 | using namespace llvm; |
27 | extern cl::opt<bool> ProfileUseDFS; |
28 | cl::opt<bool> ProfileWritePseudoProbes( |
29 | "profile-write-pseudo-probes" , |
30 | cl::desc("Use pseudo probes in profile generation" ), cl::Hidden, |
31 | cl::cat(BoltOptCategory)); |
32 | } // namespace opts |
33 | |
34 | namespace llvm { |
35 | namespace bolt { |
36 | |
37 | const BinaryFunction *YAMLProfileWriter::setCSIDestination( |
38 | const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, |
39 | const MCSymbol *Symbol, const BoltAddressTranslation *BAT, |
40 | uint32_t Offset) { |
41 | CSI.DestId = 0; // designated for unknown functions |
42 | CSI.EntryDiscriminator = 0; |
43 | |
44 | if (Symbol) { |
45 | uint64_t EntryID = 0; |
46 | if (const BinaryFunction *Callee = |
47 | BC.getFunctionForSymbol(Symbol, EntryDesc: &EntryID)) { |
48 | if (BAT && BAT->isBATFunction(Address: Callee->getAddress())) |
49 | std::tie(args&: Callee, args&: EntryID) = BAT->translateSymbol(BC, Symbol: *Symbol, InputOffset: Offset); |
50 | else if (const BinaryBasicBlock *BB = |
51 | Callee->getBasicBlockContainingOffset(Offset)) |
52 | BC.getFunctionForSymbol(Symbol: Callee->getSecondaryEntryPointSymbol(BB: *BB), |
53 | EntryDesc: &EntryID); |
54 | CSI.DestId = Callee->getFunctionNumber(); |
55 | CSI.EntryDiscriminator = EntryID; |
56 | return Callee; |
57 | } |
58 | } |
59 | return nullptr; |
60 | } |
61 | |
62 | std::vector<YAMLProfileWriter::InlineTreeNode> |
63 | YAMLProfileWriter::collectInlineTree( |
64 | const MCPseudoProbeDecoder &Decoder, |
65 | const MCDecodedPseudoProbeInlineTree &Root) { |
66 | auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) { |
67 | return Decoder.getFuncDescForGUID(GUID: Node.Guid)->FuncHash; |
68 | }; |
69 | std::vector<InlineTreeNode> InlineTree( |
70 | {InlineTreeNode{.InlineTree: &Root, .GUID: Root.Guid, .Hash: getHash(Root), .ParentId: 0, .InlineSite: 0}}); |
71 | uint32_t ParentId = 0; |
72 | while (ParentId != InlineTree.size()) { |
73 | const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree; |
74 | for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren()) |
75 | InlineTree.emplace_back( |
76 | args: InlineTreeNode{.InlineTree: &Child, .GUID: Child.Guid, .Hash: getHash(Child), .ParentId: ParentId, |
77 | .InlineSite: std::get<1>(t: Child.getInlineSite())}); |
78 | ++ParentId; |
79 | } |
80 | |
81 | return InlineTree; |
82 | } |
83 | |
84 | std::tuple<yaml::bolt::ProfilePseudoProbeDesc, |
85 | YAMLProfileWriter::InlineTreeDesc> |
86 | YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) { |
87 | yaml::bolt::ProfilePseudoProbeDesc Desc; |
88 | InlineTreeDesc InlineTree; |
89 | |
90 | for (const MCDecodedPseudoProbeInlineTree &TopLev : |
91 | Decoder.getDummyInlineRoot().getChildren()) |
92 | InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev; |
93 | |
94 | for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap()) |
95 | ++InlineTree.HashIdxMap[FuncDesc.FuncHash]; |
96 | |
97 | InlineTree.GUIDIdxMap.reserve(n: Decoder.getGUID2FuncDescMap().size()); |
98 | for (const auto &Node : Decoder.getInlineTreeVec()) |
99 | ++InlineTree.GUIDIdxMap[Node.Guid]; |
100 | |
101 | std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec; |
102 | GUIDFreqVec.reserve(n: InlineTree.GUIDIdxMap.size()); |
103 | for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap) |
104 | GUIDFreqVec.emplace_back(args: Cnt, args: GUID); |
105 | llvm::sort(C&: GUIDFreqVec); |
106 | |
107 | std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec; |
108 | HashFreqVec.reserve(n: InlineTree.HashIdxMap.size()); |
109 | for (const auto [Hash, Cnt] : InlineTree.HashIdxMap) |
110 | HashFreqVec.emplace_back(args: Cnt, args: Hash); |
111 | llvm::sort(C&: HashFreqVec); |
112 | |
113 | uint32_t Index = 0; |
114 | Desc.Hash.reserve(n: HashFreqVec.size()); |
115 | for (uint64_t Hash : llvm::make_second_range(c: llvm::reverse(C&: HashFreqVec))) { |
116 | Desc.Hash.emplace_back(args&: Hash); |
117 | InlineTree.HashIdxMap[Hash] = Index++; |
118 | } |
119 | |
120 | Index = 0; |
121 | Desc.GUID.reserve(n: GUIDFreqVec.size()); |
122 | for (uint64_t GUID : llvm::make_second_range(c: llvm::reverse(C&: GUIDFreqVec))) { |
123 | Desc.GUID.emplace_back(args&: GUID); |
124 | InlineTree.GUIDIdxMap[GUID] = Index++; |
125 | uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash; |
126 | Desc.GUIDHashIdx.emplace_back(args&: InlineTree.HashIdxMap[Hash]); |
127 | } |
128 | |
129 | return {Desc, InlineTree}; |
130 | } |
131 | |
132 | std::vector<yaml::bolt::PseudoProbeInfo> |
133 | YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) { |
134 | struct BlockProbeInfoHasher { |
135 | size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const { |
136 | return llvm::hash_combine(args: llvm::hash_combine_range(R: BPI.BlockProbes), |
137 | args: llvm::hash_combine_range(R: BPI.CallProbes), |
138 | args: llvm::hash_combine_range(R: BPI.IndCallProbes)); |
139 | } |
140 | }; |
141 | |
142 | // Check identical BlockProbeInfo structs and merge them |
143 | std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>, |
144 | BlockProbeInfoHasher> |
145 | BPIToNodes; |
146 | for (auto &[NodeId, Probes] : NodeProbes) { |
147 | yaml::bolt::PseudoProbeInfo BPI; |
148 | BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end()); |
149 | BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end()); |
150 | BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end()); |
151 | BPIToNodes[BPI].push_back(x: NodeId); |
152 | } |
153 | |
154 | auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) { |
155 | for (auto Id : Ids) |
156 | if (Id > 64) |
157 | Vec.emplace_back(Id); |
158 | else |
159 | Mask |= 1ull << (Id - 1); |
160 | }; |
161 | |
162 | // Add to YAML with merged nodes/block mask optimizations |
163 | std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes; |
164 | YamlProbes.reserve(n: BPIToNodes.size()); |
165 | for (const auto &[BPI, Nodes] : BPIToNodes) { |
166 | auto &YamlBPI = YamlProbes.emplace_back(args: yaml::bolt::PseudoProbeInfo()); |
167 | YamlBPI.CallProbes = BPI.CallProbes; |
168 | YamlBPI.IndCallProbes = BPI.IndCallProbes; |
169 | if (Nodes.size() == 1) |
170 | YamlBPI.InlineTreeIndex = Nodes.front(); |
171 | else |
172 | YamlBPI.InlineTreeNodes = Nodes; |
173 | handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask); |
174 | } |
175 | return YamlProbes; |
176 | } |
177 | |
178 | std::tuple<std::vector<yaml::bolt::InlineTreeNode>, |
179 | YAMLProfileWriter::InlineTreeMapTy> |
180 | YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, |
181 | const InlineTreeDesc &InlineTree, |
182 | uint64_t GUID) { |
183 | DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; |
184 | std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree; |
185 | auto It = InlineTree.TopLevelGUIDToInlineTree.find(x: GUID); |
186 | if (It == InlineTree.TopLevelGUIDToInlineTree.end()) |
187 | return {YamlInlineTree, InlineTreeNodeId}; |
188 | const MCDecodedPseudoProbeInlineTree *Root = It->second; |
189 | assert(Root && "Malformed TopLevelGUIDToInlineTree" ); |
190 | uint32_t Index = 0; |
191 | uint32_t PrevParent = 0; |
192 | uint32_t PrevGUIDIdx = 0; |
193 | for (const auto &Node : collectInlineTree(Decoder, Root: *Root)) { |
194 | InlineTreeNodeId[Node.InlineTree] = Index++; |
195 | auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(x: Node.GUID); |
196 | assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap" ); |
197 | uint32_t GUIDIdx = GUIDIdxIt->second; |
198 | if (GUIDIdx == PrevGUIDIdx) |
199 | GUIDIdx = UINT32_MAX; |
200 | else |
201 | PrevGUIDIdx = GUIDIdx; |
202 | YamlInlineTree.emplace_back(args: yaml::bolt::InlineTreeNode{ |
203 | .ParentIndexDelta: Node.ParentId - PrevParent, .CallSiteProbe: Node.InlineSite, .GUIDIndex: GUIDIdx, .GUID: 0, .Hash: 0}); |
204 | PrevParent = Node.ParentId; |
205 | } |
206 | return {YamlInlineTree, InlineTreeNodeId}; |
207 | } |
208 | |
209 | yaml::bolt::BinaryFunctionProfile |
210 | YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, |
211 | const InlineTreeDesc &InlineTree, |
212 | const BoltAddressTranslation *BAT) { |
213 | yaml::bolt::BinaryFunctionProfile YamlBF; |
214 | const BinaryContext &BC = BF.getBinaryContext(); |
215 | const MCPseudoProbeDecoder *PseudoProbeDecoder = |
216 | opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; |
217 | |
218 | const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_BRANCH; |
219 | |
220 | // Prepare function and block hashes |
221 | BF.computeHash(UseDFS); |
222 | BF.computeBlockHashes(); |
223 | |
224 | YamlBF.Name = DataAggregator::getLocationName(Func: BF, BAT); |
225 | YamlBF.Id = BF.getFunctionNumber(); |
226 | YamlBF.Hash = BF.getHash(); |
227 | YamlBF.NumBasicBlocks = BF.size(); |
228 | YamlBF.ExecCount = BF.getKnownExecutionCount(); |
229 | YamlBF.ExternEntryCount = BF.getExternEntryCount(); |
230 | DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; |
231 | if (PseudoProbeDecoder && BF.getGUID()) { |
232 | std::tie(args&: YamlBF.InlineTree, args&: InlineTreeNodeId) = |
233 | convertBFInlineTree(Decoder: *PseudoProbeDecoder, InlineTree, GUID: BF.getGUID()); |
234 | } |
235 | |
236 | BinaryFunction::BasicBlockOrderType Order; |
237 | llvm::copy(Range: UseDFS ? BF.dfs() : BF.getLayout().blocks(), |
238 | Out: std::back_inserter(x&: Order)); |
239 | |
240 | const FunctionLayout Layout = BF.getLayout(); |
241 | Layout.updateLayoutIndices(Order); |
242 | |
243 | for (const BinaryBasicBlock *BB : Order) { |
244 | yaml::bolt::BinaryBasicBlockProfile YamlBB; |
245 | YamlBB.Index = BB->getLayoutIndex(); |
246 | YamlBB.NumInstructions = BB->getNumNonPseudos(); |
247 | YamlBB.Hash = BB->getHash(); |
248 | |
249 | if (!LBRProfile) { |
250 | YamlBB.EventCount = BB->getKnownExecutionCount(); |
251 | if (YamlBB.EventCount) |
252 | YamlBF.Blocks.emplace_back(args&: YamlBB); |
253 | continue; |
254 | } |
255 | |
256 | YamlBB.ExecCount = BB->getKnownExecutionCount(); |
257 | |
258 | for (const MCInst &Instr : *BB) { |
259 | if (!BC.MIB->isCall(Inst: Instr) && !BC.MIB->isIndirectBranch(Inst: Instr)) |
260 | continue; |
261 | |
262 | SmallVector<std::pair<StringRef, yaml::bolt::CallSiteInfo>> CSTargets; |
263 | yaml::bolt::CallSiteInfo CSI; |
264 | std::optional<uint32_t> Offset = BC.MIB->getOffset(Inst: Instr); |
265 | if (!Offset || *Offset < BB->getInputOffset()) |
266 | continue; |
267 | CSI.Offset = *Offset - BB->getInputOffset(); |
268 | |
269 | if (BC.MIB->isIndirectCall(Inst: Instr) || BC.MIB->isIndirectBranch(Inst: Instr)) { |
270 | const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>( |
271 | Inst: Instr, Name: "CallProfile" ); |
272 | if (!ICSP) |
273 | continue; |
274 | for (const IndirectCallProfile &CSP : ICSP.get()) { |
275 | StringRef TargetName = "" ; |
276 | const BinaryFunction *Callee = |
277 | setCSIDestination(BC, CSI, Symbol: CSP.Symbol, BAT); |
278 | if (Callee) |
279 | TargetName = Callee->getOneName(); |
280 | CSI.Count = CSP.Count; |
281 | CSI.Mispreds = CSP.Mispreds; |
282 | CSTargets.emplace_back(Args&: TargetName, Args&: CSI); |
283 | } |
284 | } else { // direct call or a tail call |
285 | StringRef TargetName = "" ; |
286 | const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Inst: Instr); |
287 | const BinaryFunction *const Callee = |
288 | setCSIDestination(BC, CSI, Symbol: CalleeSymbol, BAT); |
289 | if (Callee) |
290 | TargetName = Callee->getOneName(); |
291 | |
292 | auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) { |
293 | return BC.MIB->getAnnotationWithDefault(Inst: Instr, Name: Ann, DefaultValue: 0ull); |
294 | }; |
295 | if (BC.MIB->getConditionalTailCall(Inst: Instr)) { |
296 | CSI.Count = getAnnotationWithDefault(Instr, "CTCTakenCount" ); |
297 | CSI.Mispreds = getAnnotationWithDefault(Instr, "CTCMispredCount" ); |
298 | } else { |
299 | CSI.Count = getAnnotationWithDefault(Instr, "Count" ); |
300 | } |
301 | |
302 | if (CSI.Count) |
303 | CSTargets.emplace_back(Args&: TargetName, Args&: CSI); |
304 | } |
305 | // Sort targets in a similar way to getBranchData, see Location::operator< |
306 | llvm::sort(C&: CSTargets, Comp: [](const auto &RHS, const auto &LHS) { |
307 | return std::tie(RHS.first, RHS.second.Offset) < |
308 | std::tie(LHS.first, LHS.second.Offset); |
309 | }); |
310 | for (auto &KV : CSTargets) |
311 | YamlBB.CallSites.push_back(x: KV.second); |
312 | } |
313 | |
314 | // Skip printing if there's no profile data for non-entry basic block. |
315 | // Include landing pads with non-zero execution count. |
316 | if (YamlBB.CallSites.empty() && !BB->isEntryPoint() && |
317 | !(BB->isLandingPad() && BB->getKnownExecutionCount() != 0)) { |
318 | // Include blocks having successors or predecessors with positive counts. |
319 | uint64_t SuccessorExecCount = 0; |
320 | for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : |
321 | BB->branch_info()) |
322 | SuccessorExecCount += BranchInfo.Count; |
323 | uint64_t PredecessorExecCount = 0; |
324 | for (auto Pred : BB->predecessors()) |
325 | PredecessorExecCount += Pred->getBranchInfo(Succ: *BB).Count; |
326 | if (!SuccessorExecCount && !PredecessorExecCount) |
327 | continue; |
328 | } |
329 | |
330 | auto BranchInfo = BB->branch_info_begin(); |
331 | for (const BinaryBasicBlock *Successor : BB->successors()) { |
332 | yaml::bolt::SuccessorInfo YamlSI; |
333 | YamlSI.Index = Successor->getLayoutIndex(); |
334 | YamlSI.Count = BranchInfo->Count; |
335 | YamlSI.Mispreds = BranchInfo->MispredictedCount; |
336 | |
337 | YamlBB.Successors.emplace_back(args&: YamlSI); |
338 | |
339 | ++BranchInfo; |
340 | } |
341 | |
342 | if (PseudoProbeDecoder) { |
343 | const AddressProbesMap &ProbeMap = |
344 | PseudoProbeDecoder->getAddress2ProbesMap(); |
345 | const uint64_t FuncAddr = BF.getAddress(); |
346 | const std::pair<uint64_t, uint64_t> &BlockRange = |
347 | BB->getInputAddressRange(); |
348 | const std::pair<uint64_t, uint64_t> BlockAddrRange = { |
349 | FuncAddr + BlockRange.first, FuncAddr + BlockRange.second}; |
350 | auto Probes = ProbeMap.find(From: BlockAddrRange.first, To: BlockAddrRange.second); |
351 | YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId); |
352 | } |
353 | |
354 | YamlBF.Blocks.emplace_back(args&: YamlBB); |
355 | } |
356 | return YamlBF; |
357 | } |
358 | |
359 | std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) { |
360 | const BinaryContext &BC = RI.getBinaryContext(); |
361 | const auto &Functions = BC.getBinaryFunctions(); |
362 | |
363 | std::error_code EC; |
364 | OS = std::make_unique<raw_fd_ostream>(args&: Filename, args&: EC, args: sys::fs::OF_None); |
365 | if (EC) { |
366 | errs() << "BOLT-WARNING: " << EC.message() << " : unable to open " |
367 | << Filename << " for output.\n" ; |
368 | return EC; |
369 | } |
370 | |
371 | yaml::bolt::BinaryProfile BP; |
372 | |
373 | // Fill out the header info. |
374 | BP.Header.Version = 1; |
375 | BP.Header.FileName = std::string(BC.getFilename()); |
376 | std::optional<StringRef> BuildID = BC.getFileBuildID(); |
377 | BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>" ; |
378 | BP.Header.Origin = std::string(RI.getProfileReader()->getReaderName()); |
379 | BP.Header.IsDFSOrder = opts::ProfileUseDFS; |
380 | BP.Header.HashFunction = HashFunction::Default; |
381 | |
382 | StringSet<> EventNames = RI.getProfileReader()->getEventNames(); |
383 | if (!EventNames.empty()) { |
384 | std::string Sep; |
385 | for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) { |
386 | BP.Header.EventNames += Sep + EventEntry.first().str(); |
387 | Sep = "," ; |
388 | } |
389 | } |
390 | |
391 | // Make sure the profile is consistent across all functions. |
392 | uint16_t ProfileFlags = BinaryFunction::PF_NONE; |
393 | for (const auto &BFI : Functions) { |
394 | const BinaryFunction &BF = BFI.second; |
395 | if (BF.hasProfile() && !BF.empty()) { |
396 | assert(BF.getProfileFlags() != BinaryFunction::PF_NONE); |
397 | if (ProfileFlags == BinaryFunction::PF_NONE) |
398 | ProfileFlags = BF.getProfileFlags(); |
399 | |
400 | assert(BF.getProfileFlags() == ProfileFlags && |
401 | "expected consistent profile flags across all functions" ); |
402 | } |
403 | } |
404 | BP.Header.Flags = ProfileFlags; |
405 | |
406 | // Add probe inline tree nodes. |
407 | InlineTreeDesc InlineTree; |
408 | if (const MCPseudoProbeDecoder *Decoder = |
409 | opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr) |
410 | std::tie(args&: BP.PseudoProbeDesc, args&: InlineTree) = convertPseudoProbeDesc(Decoder: *Decoder); |
411 | |
412 | // Add all function objects. |
413 | for (const auto &BFI : Functions) { |
414 | const BinaryFunction &BF = BFI.second; |
415 | if (BF.hasProfile()) { |
416 | if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource()) |
417 | continue; |
418 | |
419 | BP.Functions.emplace_back(args: convert(BF, UseDFS: opts::ProfileUseDFS, InlineTree)); |
420 | } |
421 | } |
422 | |
423 | // Write the profile. |
424 | yaml::Output Out(*OS, nullptr, 0); |
425 | Out << BP; |
426 | |
427 | return std::error_code(); |
428 | } |
429 | |
430 | } // namespace bolt |
431 | } // namespace llvm |
432 | |