1 | //===- bolt/Rewrite/PseudoProbeRewriter.cpp -------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implement support for pseudo probes. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/BinaryFunction.h" |
14 | #include "bolt/Rewrite/MetadataRewriter.h" |
15 | #include "bolt/Rewrite/MetadataRewriters.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include "llvm/IR/Function.h" |
18 | #include "llvm/MC/MCPseudoProbe.h" |
19 | #include "llvm/Support/CommandLine.h" |
20 | #include "llvm/Support/Debug.h" |
21 | #include "llvm/Support/LEB128.h" |
22 | |
23 | #undef DEBUG_TYPE |
24 | #define DEBUG_TYPE "pseudo-probe-rewriter" |
25 | |
26 | using namespace llvm; |
27 | using namespace bolt; |
28 | |
29 | namespace opts { |
30 | |
31 | enum PrintPseudoProbesOptions { |
32 | PPP_None = 0, |
33 | PPP_Probes_Section_Decode = 0x1, |
34 | PPP_Probes_Address_Conversion = 0x2, |
35 | PPP_Encoded_Probes = 0x3, |
36 | PPP_All = 0xf |
37 | }; |
38 | |
39 | static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( |
40 | "print-pseudo-probes" , cl::desc("print pseudo probe info" ), |
41 | cl::init(Val: PPP_None), |
42 | cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode" , |
43 | "decode probes section from binary" ), |
44 | clEnumValN(PPP_Probes_Address_Conversion, "address_conversion" , |
45 | "update address2ProbesMap with output block address" ), |
46 | clEnumValN(PPP_Encoded_Probes, "encoded_probes" , |
47 | "display the encoded probes in binary section" ), |
48 | clEnumValN(PPP_All, "all" , "enable all debugging printout" )), |
49 | cl::Hidden, cl::cat(BoltCategory)); |
50 | |
51 | } // namespace opts |
52 | |
53 | namespace { |
54 | class PseudoProbeRewriter final : public MetadataRewriter { |
55 | /// .pseudo_probe_desc section. |
56 | /// Contains information about pseudo probe description, like its related |
57 | /// function |
58 | ErrorOr<BinarySection &> PseudoProbeDescSection{std::errc::bad_address}; |
59 | |
60 | /// .pseudo_probe section. |
61 | /// Contains information about pseudo probe details, like its address |
62 | ErrorOr<BinarySection &> PseudoProbeSection{std::errc::bad_address}; |
63 | |
64 | /// Update address of MCDecodedPseudoProbe. |
65 | void updatePseudoProbes(); |
66 | |
67 | /// Encode MCDecodedPseudoProbe. |
68 | void encodePseudoProbes(); |
69 | |
70 | /// Parse .pseudo_probe_desc section and .pseudo_probe section |
71 | /// Setup Pseudo probe decoder |
72 | void parsePseudoProbe(); |
73 | |
74 | /// PseudoProbe decoder |
75 | MCPseudoProbeDecoder ProbeDecoder; |
76 | |
77 | public: |
78 | PseudoProbeRewriter(BinaryContext &BC) |
79 | : MetadataRewriter("pseudo-probe-rewriter" , BC) {} |
80 | |
81 | Error postEmitFinalizer() override; |
82 | }; |
83 | |
84 | Error PseudoProbeRewriter::postEmitFinalizer() { |
85 | parsePseudoProbe(); |
86 | updatePseudoProbes(); |
87 | |
88 | return Error::success(); |
89 | } |
90 | |
91 | void PseudoProbeRewriter::parsePseudoProbe() { |
92 | PseudoProbeDescSection = BC.getUniqueSectionByName(SectionName: ".pseudo_probe_desc" ); |
93 | PseudoProbeSection = BC.getUniqueSectionByName(SectionName: ".pseudo_probe" ); |
94 | |
95 | if (!PseudoProbeDescSection && !PseudoProbeSection) { |
96 | // pesudo probe is not added to binary. It is normal and no warning needed. |
97 | return; |
98 | } |
99 | |
100 | // If only one section is found, it might mean the ELF is corrupted. |
101 | if (!PseudoProbeDescSection) { |
102 | errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n" ; |
103 | return; |
104 | } else if (!PseudoProbeSection) { |
105 | errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n" ; |
106 | return; |
107 | } |
108 | |
109 | StringRef Contents = PseudoProbeDescSection->getContents(); |
110 | if (!ProbeDecoder.buildGUID2FuncDescMap( |
111 | Start: reinterpret_cast<const uint8_t *>(Contents.data()), |
112 | Size: Contents.size())) { |
113 | errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n" ; |
114 | return; |
115 | } |
116 | |
117 | MCPseudoProbeDecoder::Uint64Set GuidFilter; |
118 | MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; |
119 | for (const BinaryFunction *F : BC.getAllBinaryFunctions()) { |
120 | for (const MCSymbol *Sym : F->getSymbols()) { |
121 | FuncStartAddrs[Function::getGUID(GlobalName: NameResolver::restore(Name: Sym->getName()))] = |
122 | F->getAddress(); |
123 | } |
124 | } |
125 | Contents = PseudoProbeSection->getContents(); |
126 | if (!ProbeDecoder.buildAddress2ProbeMap( |
127 | Start: reinterpret_cast<const uint8_t *>(Contents.data()), Size: Contents.size(), |
128 | GuildFilter: GuidFilter, FuncStartAddrs)) { |
129 | ProbeDecoder.getAddress2ProbesMap().clear(); |
130 | errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n" ; |
131 | return; |
132 | } |
133 | |
134 | if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || |
135 | opts::PrintPseudoProbes == |
136 | opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) { |
137 | outs() << "Report of decoding input pseudo probe binaries \n" ; |
138 | ProbeDecoder.printGUID2FuncDescMap(OS&: outs()); |
139 | ProbeDecoder.printProbesForAllAddresses(OS&: outs()); |
140 | } |
141 | } |
142 | |
143 | void PseudoProbeRewriter::updatePseudoProbes() { |
144 | // check if there is pseudo probe section decoded |
145 | if (ProbeDecoder.getAddress2ProbesMap().empty()) |
146 | return; |
147 | // input address converted to output |
148 | AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap(); |
149 | const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap(); |
150 | |
151 | for (auto &AP : Address2ProbesMap) { |
152 | BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address: AP.first); |
153 | // If F is removed, eliminate all probes inside it from inline tree |
154 | // Setting probes' addresses as INT64_MAX means elimination |
155 | if (!F) { |
156 | for (MCDecodedPseudoProbe &Probe : AP.second) |
157 | Probe.setAddress(INT64_MAX); |
158 | continue; |
159 | } |
160 | // If F is not emitted, the function will remain in the same address as its |
161 | // input |
162 | if (!F->isEmitted()) |
163 | continue; |
164 | |
165 | uint64_t Offset = AP.first - F->getAddress(); |
166 | const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); |
167 | uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; |
168 | // Check if block output address is defined. |
169 | // If not, such block is removed from binary. Then remove the probes from |
170 | // inline tree |
171 | if (BlkOutputAddress == 0) { |
172 | for (MCDecodedPseudoProbe &Probe : AP.second) |
173 | Probe.setAddress(INT64_MAX); |
174 | continue; |
175 | } |
176 | |
177 | unsigned ProbeTrack = AP.second.size(); |
178 | std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin(); |
179 | while (ProbeTrack != 0) { |
180 | if (Probe->isBlock()) { |
181 | Probe->setAddress(BlkOutputAddress); |
182 | } else if (Probe->isCall()) { |
183 | // A call probe may be duplicated due to ICP |
184 | // Go through output of InputOffsetToAddressMap to collect all related |
185 | // probes |
186 | auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(InputAddress: AP.first); |
187 | auto CallOutputAddress = CallOutputAddresses.first; |
188 | if (CallOutputAddress == CallOutputAddresses.second) { |
189 | Probe->setAddress(INT64_MAX); |
190 | } else { |
191 | Probe->setAddress(CallOutputAddress->second); |
192 | CallOutputAddress = std::next(x: CallOutputAddress); |
193 | } |
194 | |
195 | while (CallOutputAddress != CallOutputAddresses.second) { |
196 | AP.second.push_back(x: *Probe); |
197 | AP.second.back().setAddress(CallOutputAddress->second); |
198 | Probe->getInlineTreeNode()->addProbes(Probe: &(AP.second.back())); |
199 | CallOutputAddress = std::next(x: CallOutputAddress); |
200 | } |
201 | } |
202 | Probe = std::next(x: Probe); |
203 | ProbeTrack--; |
204 | } |
205 | } |
206 | |
207 | if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || |
208 | opts::PrintPseudoProbes == |
209 | opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) { |
210 | outs() << "Pseudo Probe Address Conversion results:\n" ; |
211 | // table that correlates address to block |
212 | std::unordered_map<uint64_t, StringRef> Addr2BlockNames; |
213 | for (auto &F : BC.getBinaryFunctions()) |
214 | for (BinaryBasicBlock &BinaryBlock : F.second) |
215 | Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] = |
216 | BinaryBlock.getName(); |
217 | |
218 | // scan all addresses -> correlate probe to block when print out |
219 | std::vector<uint64_t> Addresses; |
220 | for (auto &Entry : Address2ProbesMap) |
221 | Addresses.push_back(x: Entry.first); |
222 | llvm::sort(C&: Addresses); |
223 | for (uint64_t Key : Addresses) { |
224 | for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) { |
225 | if (Probe.getAddress() == INT64_MAX) |
226 | outs() << "Deleted Probe: " ; |
227 | else |
228 | outs() << "Address: " << format_hex(N: Probe.getAddress(), Width: 8) << " " ; |
229 | Probe.print(OS&: outs(), GUID2FuncMAP: GUID2Func, ShowName: true); |
230 | // print block name only if the probe is block type and undeleted. |
231 | if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) |
232 | outs() << format_hex(N: Probe.getAddress(), Width: 8) << " Probe is in " |
233 | << Addr2BlockNames[Probe.getAddress()] << "\n" ; |
234 | } |
235 | } |
236 | outs() << "=======================================\n" ; |
237 | } |
238 | |
239 | // encode pseudo probes with updated addresses |
240 | encodePseudoProbes(); |
241 | } |
242 | |
243 | void PseudoProbeRewriter::encodePseudoProbes() { |
244 | // Buffer for new pseudo probes section |
245 | SmallString<8> Contents; |
246 | MCDecodedPseudoProbe *LastProbe = nullptr; |
247 | |
248 | auto EmitInt = [&](uint64_t Value, uint32_t Size) { |
249 | const bool IsLittleEndian = BC.AsmInfo->isLittleEndian(); |
250 | uint64_t Swapped = support::endian::byte_swap( |
251 | value: Value, |
252 | endian: IsLittleEndian ? llvm::endianness::little : llvm::endianness::big); |
253 | unsigned Index = IsLittleEndian ? 0 : 8 - Size; |
254 | auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size); |
255 | Contents.append(in_start: Entry.begin(), in_end: Entry.end()); |
256 | }; |
257 | |
258 | auto EmitULEB128IntValue = [&](uint64_t Value) { |
259 | SmallString<128> Tmp; |
260 | raw_svector_ostream OSE(Tmp); |
261 | encodeULEB128(Value, OS&: OSE, PadTo: 0); |
262 | Contents.append(in_start: OSE.str().begin(), in_end: OSE.str().end()); |
263 | }; |
264 | |
265 | auto EmitSLEB128IntValue = [&](int64_t Value) { |
266 | SmallString<128> Tmp; |
267 | raw_svector_ostream OSE(Tmp); |
268 | encodeSLEB128(Value, OS&: OSE); |
269 | Contents.append(in_start: OSE.str().begin(), in_end: OSE.str().end()); |
270 | }; |
271 | |
272 | // Emit indiviual pseudo probes in a inline tree node |
273 | // Probe index, type, attribute, address type and address are encoded |
274 | // Address of the first probe is absolute. |
275 | // Other probes' address are represented by delta |
276 | auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { |
277 | assert(!isSentinelProbe(CurProbe->getAttributes()) && |
278 | "Sentinel probes should not be emitted" ); |
279 | EmitULEB128IntValue(CurProbe->getIndex()); |
280 | uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); |
281 | uint8_t Flag = |
282 | LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; |
283 | EmitInt(Flag | PackedType, 1); |
284 | if (LastProbe) { |
285 | // Emit the delta between the address label and LastProbe. |
286 | int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress(); |
287 | EmitSLEB128IntValue(Delta); |
288 | } else { |
289 | // Emit absolute address for encoding the first pseudo probe. |
290 | uint32_t AddrSize = BC.AsmInfo->getCodePointerSize(); |
291 | EmitInt(CurProbe->getAddress(), AddrSize); |
292 | } |
293 | }; |
294 | |
295 | std::map<InlineSite, MCDecodedPseudoProbeInlineTree *, |
296 | std::greater<InlineSite>> |
297 | Inlinees; |
298 | |
299 | // DFS of inline tree to emit pseudo probes in all tree node |
300 | // Inline site index of a probe is emitted first. |
301 | // Then tree node Guid, size of pseudo probes and children nodes, and detail |
302 | // of contained probes are emitted Deleted probes are skipped Root node is not |
303 | // encoded to binaries. It's a "wrapper" of inline trees of each function. |
304 | std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes; |
305 | const MCDecodedPseudoProbeInlineTree &Root = |
306 | ProbeDecoder.getDummyInlineRoot(); |
307 | for (auto Child = Root.getChildren().begin(); |
308 | Child != Root.getChildren().end(); ++Child) |
309 | Inlinees[Child->first] = Child->second.get(); |
310 | |
311 | for (auto Inlinee : Inlinees) |
312 | // INT64_MAX is "placeholder" of unused callsite index field in the pair |
313 | NextNodes.push_back(x: {INT64_MAX, Inlinee.second}); |
314 | |
315 | Inlinees.clear(); |
316 | |
317 | while (!NextNodes.empty()) { |
318 | uint64_t ProbeIndex = NextNodes.back().first; |
319 | MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second; |
320 | NextNodes.pop_back(); |
321 | |
322 | if (Cur->Parent && !Cur->Parent->isRoot()) |
323 | // Emit probe inline site |
324 | EmitULEB128IntValue(ProbeIndex); |
325 | |
326 | // Emit probes grouped by GUID. |
327 | LLVM_DEBUG({ |
328 | dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); |
329 | dbgs() << "GUID: " << Cur->Guid << "\n" ; |
330 | }); |
331 | // Emit Guid |
332 | EmitInt(Cur->Guid, 8); |
333 | // Emit number of probes in this node |
334 | uint64_t Deleted = 0; |
335 | for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) |
336 | if (Probe->getAddress() == INT64_MAX) |
337 | Deleted++; |
338 | LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n" ); |
339 | uint64_t ProbesSize = Cur->getProbes().size() - Deleted; |
340 | EmitULEB128IntValue(ProbesSize); |
341 | // Emit number of direct inlinees |
342 | EmitULEB128IntValue(Cur->getChildren().size()); |
343 | // Emit probes in this group |
344 | for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) { |
345 | if (Probe->getAddress() == INT64_MAX) |
346 | continue; |
347 | EmitDecodedPseudoProbe(Probe); |
348 | LastProbe = Probe; |
349 | } |
350 | |
351 | for (auto Child = Cur->getChildren().begin(); |
352 | Child != Cur->getChildren().end(); ++Child) |
353 | Inlinees[Child->first] = Child->second.get(); |
354 | for (const auto &Inlinee : Inlinees) { |
355 | assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid" ); |
356 | NextNodes.push_back(x: {std::get<1>(t: Inlinee.first), Inlinee.second}); |
357 | LLVM_DEBUG({ |
358 | dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); |
359 | dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n" ; |
360 | }); |
361 | } |
362 | Inlinees.clear(); |
363 | } |
364 | |
365 | // Create buffer for new contents for the section |
366 | // Freed when parent section is destroyed |
367 | uint8_t *Output = new uint8_t[Contents.str().size()]; |
368 | memcpy(dest: Output, src: Contents.str().data(), n: Contents.str().size()); |
369 | BC.registerOrUpdateSection(Name: ".pseudo_probe" , ELFType: PseudoProbeSection->getELFType(), |
370 | ELFFlags: PseudoProbeSection->getELFFlags(), Data: Output, |
371 | Size: Contents.str().size(), Alignment: 1); |
372 | if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || |
373 | opts::PrintPseudoProbes == |
374 | opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) { |
375 | // create a dummy decoder; |
376 | MCPseudoProbeDecoder DummyDecoder; |
377 | StringRef DescContents = PseudoProbeDescSection->getContents(); |
378 | DummyDecoder.buildGUID2FuncDescMap( |
379 | Start: reinterpret_cast<const uint8_t *>(DescContents.data()), |
380 | Size: DescContents.size()); |
381 | StringRef ProbeContents = PseudoProbeSection->getOutputContents(); |
382 | MCPseudoProbeDecoder::Uint64Set GuidFilter; |
383 | MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; |
384 | for (const BinaryFunction *F : BC.getAllBinaryFunctions()) { |
385 | const uint64_t Addr = |
386 | F->isEmitted() ? F->getOutputAddress() : F->getAddress(); |
387 | FuncStartAddrs[Function::getGUID( |
388 | GlobalName: NameResolver::restore(Name: F->getOneName()))] = Addr; |
389 | } |
390 | DummyDecoder.buildAddress2ProbeMap( |
391 | Start: reinterpret_cast<const uint8_t *>(ProbeContents.data()), |
392 | Size: ProbeContents.size(), GuildFilter: GuidFilter, FuncStartAddrs); |
393 | DummyDecoder.printProbesForAllAddresses(OS&: outs()); |
394 | } |
395 | } |
396 | } // namespace |
397 | |
398 | std::unique_ptr<MetadataRewriter> |
399 | llvm::bolt::createPseudoProbeRewriter(BinaryContext &BC) { |
400 | return std::make_unique<PseudoProbeRewriter>(args&: BC); |
401 | } |
402 | |