1 | //===- bolt/Rewrite/PseudoProbeRewriter.cpp -------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implement support for pseudo probes. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/BinaryFunction.h" |
14 | #include "bolt/Rewrite/MetadataRewriter.h" |
15 | #include "bolt/Rewrite/MetadataRewriters.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include "bolt/Utils/Utils.h" |
18 | #include "llvm/IR/Function.h" |
19 | #include "llvm/MC/MCPseudoProbe.h" |
20 | #include "llvm/Support/CommandLine.h" |
21 | #include "llvm/Support/Debug.h" |
22 | #include "llvm/Support/LEB128.h" |
23 | #include <memory> |
24 | |
25 | #undef DEBUG_TYPE |
26 | #define DEBUG_TYPE "pseudo-probe-rewriter" |
27 | |
28 | using namespace llvm; |
29 | using namespace bolt; |
30 | |
31 | namespace opts { |
32 | |
33 | enum PrintPseudoProbesOptions { |
34 | PPP_None = 0, |
35 | PPP_Probes_Section_Decode = 0x1, |
36 | PPP_Probes_Address_Conversion = 0x2, |
37 | PPP_Encoded_Probes = 0x3, |
38 | PPP_All = 0xf |
39 | }; |
40 | |
41 | static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( |
42 | "print-pseudo-probes" , cl::desc("print pseudo probe info" ), |
43 | cl::init(Val: PPP_None), |
44 | cl::values(clEnumValN(PPP_Probes_Section_Decode, "decode" , |
45 | "decode probes section from binary" ), |
46 | clEnumValN(PPP_Probes_Address_Conversion, "address_conversion" , |
47 | "update address2ProbesMap with output block address" ), |
48 | clEnumValN(PPP_Encoded_Probes, "encoded_probes" , |
49 | "display the encoded probes in binary section" ), |
50 | clEnumValN(PPP_All, "all" , "enable all debugging printout" )), |
51 | cl::Hidden, cl::cat(BoltCategory)); |
52 | |
53 | extern cl::opt<bool> ProfileWritePseudoProbes; |
54 | extern cl::opt<bool> StaleMatchingWithPseudoProbes; |
55 | } // namespace opts |
56 | |
57 | namespace { |
58 | class PseudoProbeRewriter final : public MetadataRewriter { |
59 | /// .pseudo_probe_desc section. |
60 | /// Contains information about pseudo probe description, like its related |
61 | /// function |
62 | ErrorOr<BinarySection &> PseudoProbeDescSection{std::errc::bad_address}; |
63 | |
64 | /// .pseudo_probe section. |
65 | /// Contains information about pseudo probe details, like its address |
66 | ErrorOr<BinarySection &> PseudoProbeSection{std::errc::bad_address}; |
67 | |
68 | /// Update address of MCDecodedPseudoProbe. |
69 | void updatePseudoProbes(); |
70 | |
71 | /// Encode MCDecodedPseudoProbe. |
72 | void encodePseudoProbes(); |
73 | |
74 | /// Parse .pseudo_probe_desc section and .pseudo_probe section |
75 | /// Setup Pseudo probe decoder |
76 | /// If \p ProfiledOnly is set, only parse records for functions with profile. |
77 | void parsePseudoProbe(bool ProfiledOnly = false); |
78 | |
79 | /// PseudoProbe decoder |
80 | std::shared_ptr<MCPseudoProbeDecoder> ProbeDecoderPtr; |
81 | |
82 | public: |
83 | PseudoProbeRewriter(BinaryContext &BC) |
84 | : MetadataRewriter("pseudo-probe-rewriter" , BC), |
85 | ProbeDecoderPtr(std::make_shared<MCPseudoProbeDecoder>()) { |
86 | BC.setPseudoProbeDecoder(ProbeDecoderPtr); |
87 | } |
88 | |
89 | Error preCFGInitializer() override; |
90 | Error postEmitFinalizer() override; |
91 | |
92 | ~PseudoProbeRewriter() override { ProbeDecoderPtr.reset(); } |
93 | }; |
94 | |
95 | Error PseudoProbeRewriter::preCFGInitializer() { |
96 | if (opts::ProfileWritePseudoProbes || opts::StaleMatchingWithPseudoProbes) |
97 | parsePseudoProbe(ProfiledOnly: opts::ProfileWritePseudoProbes); |
98 | |
99 | return Error::success(); |
100 | } |
101 | |
102 | Error PseudoProbeRewriter::postEmitFinalizer() { |
103 | if (!opts::StaleMatchingWithPseudoProbes) |
104 | parsePseudoProbe(); |
105 | updatePseudoProbes(); |
106 | |
107 | return Error::success(); |
108 | } |
109 | |
110 | void PseudoProbeRewriter::parsePseudoProbe(bool ProfiledOnly) { |
111 | MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr); |
112 | PseudoProbeDescSection = BC.getUniqueSectionByName(SectionName: ".pseudo_probe_desc" ); |
113 | PseudoProbeSection = BC.getUniqueSectionByName(SectionName: ".pseudo_probe" ); |
114 | |
115 | if (!PseudoProbeDescSection && !PseudoProbeSection) { |
116 | // pesudo probe is not added to binary. It is normal and no warning needed. |
117 | return; |
118 | } |
119 | |
120 | // If only one section is found, it might mean the ELF is corrupted. |
121 | if (!PseudoProbeDescSection) { |
122 | errs() << "BOLT-WARNING: fail in reading .pseudo_probe_desc binary\n" ; |
123 | return; |
124 | } else if (!PseudoProbeSection) { |
125 | errs() << "BOLT-WARNING: fail in reading .pseudo_probe binary\n" ; |
126 | return; |
127 | } |
128 | |
129 | StringRef Contents = PseudoProbeDescSection->getContents(); |
130 | if (!ProbeDecoder.buildGUID2FuncDescMap( |
131 | Start: reinterpret_cast<const uint8_t *>(Contents.data()), Size: Contents.size(), |
132 | /*IsMMapped*/ true)) { |
133 | errs() << "BOLT-WARNING: fail in building GUID2FuncDescMap\n" ; |
134 | return; |
135 | } |
136 | |
137 | MCPseudoProbeDecoder::Uint64Set GuidFilter; |
138 | MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; |
139 | SmallVector<StringRef, 0> Suffixes( |
140 | {".destroy" , ".resume" , ".llvm." , ".cold" , ".warm" }); |
141 | for (const BinaryFunction *F : BC.getAllBinaryFunctions()) { |
142 | bool HasProfile = F->hasProfileAvailable(); |
143 | for (const MCSymbol *Sym : F->getSymbols()) { |
144 | StringRef SymName = Sym->getName(); |
145 | for (auto Name : {std::optional(NameResolver::restore(Name: SymName)), |
146 | getCommonName(Name: SymName, KeepSuffix: false, Suffixes)}) { |
147 | if (!Name) |
148 | continue; |
149 | SymName = *Name; |
150 | uint64_t GUID = Function::getGUIDAssumingExternalLinkage(GlobalName: SymName); |
151 | FuncStartAddrs[GUID] = F->getAddress(); |
152 | if (ProfiledOnly && HasProfile) |
153 | GuidFilter.insert(V: GUID); |
154 | } |
155 | } |
156 | } |
157 | Contents = PseudoProbeSection->getContents(); |
158 | if (!ProbeDecoder.buildAddress2ProbeMap( |
159 | Start: reinterpret_cast<const uint8_t *>(Contents.data()), Size: Contents.size(), |
160 | GuildFilter: GuidFilter, FuncStartAddrs)) { |
161 | errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n" ; |
162 | return; |
163 | } |
164 | |
165 | if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || |
166 | opts::PrintPseudoProbes == |
167 | opts::PrintPseudoProbesOptions::PPP_Probes_Section_Decode) { |
168 | outs() << "Report of decoding input pseudo probe binaries \n" ; |
169 | ProbeDecoder.printGUID2FuncDescMap(OS&: outs()); |
170 | ProbeDecoder.printProbesForAllAddresses(OS&: outs()); |
171 | } |
172 | |
173 | const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap(); |
174 | // Checks GUID in GUID2Func and returns it if it's present or null otherwise. |
175 | auto checkGUID = [&](StringRef SymName) -> uint64_t { |
176 | uint64_t GUID = Function::getGUIDAssumingExternalLinkage(GlobalName: SymName); |
177 | if (GUID2Func.find(GUID) == GUID2Func.end()) |
178 | return 0; |
179 | return GUID; |
180 | }; |
181 | for (BinaryFunction *F : BC.getAllBinaryFunctions()) { |
182 | for (const MCSymbol *Sym : F->getSymbols()) { |
183 | StringRef SymName = NameResolver::restore(Name: Sym->getName()); |
184 | uint64_t GUID = checkGUID(SymName); |
185 | std::optional<StringRef> CommonName = |
186 | getCommonName(Name: SymName, KeepSuffix: false, Suffixes); |
187 | if (!GUID && CommonName) |
188 | GUID = checkGUID(*CommonName); |
189 | if (GUID) |
190 | F->setGUID(GUID); |
191 | } |
192 | } |
193 | } |
194 | |
195 | void PseudoProbeRewriter::updatePseudoProbes() { |
196 | MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr); |
197 | // check if there is pseudo probe section decoded |
198 | if (ProbeDecoder.getAddress2ProbesMap().empty()) |
199 | return; |
200 | // input address converted to output |
201 | AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap(); |
202 | const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap(); |
203 | |
204 | for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) { |
205 | uint64_t Address = Probe.getAddress(); |
206 | BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address); |
207 | // If F is removed, eliminate all probes inside it from inline tree |
208 | // Setting probes' addresses as INT64_MAX means elimination |
209 | if (!F) { |
210 | Probe.setAddress(INT64_MAX); |
211 | continue; |
212 | } |
213 | // If F is not emitted, the function will remain in the same address as its |
214 | // input |
215 | if (!F->isEmitted()) |
216 | continue; |
217 | |
218 | uint64_t Offset = Address - F->getAddress(); |
219 | const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset); |
220 | uint64_t BlkOutputAddress = BB->getOutputAddressRange().first; |
221 | // Check if block output address is defined. |
222 | // If not, such block is removed from binary. Then remove the probes from |
223 | // inline tree |
224 | if (BlkOutputAddress == 0) { |
225 | Probe.setAddress(INT64_MAX); |
226 | continue; |
227 | } |
228 | |
229 | if (Probe.isBlock()) { |
230 | Probe.setAddress(BlkOutputAddress); |
231 | } else if (Probe.isCall()) { |
232 | // A call probe may be duplicated due to ICP |
233 | // Go through output of InputOffsetToAddressMap to collect all related |
234 | // probes |
235 | auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(InputAddress: Address); |
236 | auto CallOutputAddress = CallOutputAddresses.first; |
237 | if (CallOutputAddress == CallOutputAddresses.second) { |
238 | Probe.setAddress(INT64_MAX); |
239 | } else { |
240 | Probe.setAddress(CallOutputAddress->second); |
241 | CallOutputAddress = std::next(x: CallOutputAddress); |
242 | } |
243 | |
244 | while (CallOutputAddress != CallOutputAddresses.second) { |
245 | ProbeDecoder.addInjectedProbe(Probe, Address: CallOutputAddress->second); |
246 | CallOutputAddress = std::next(x: CallOutputAddress); |
247 | } |
248 | } |
249 | } |
250 | |
251 | if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || |
252 | opts::PrintPseudoProbes == |
253 | opts::PrintPseudoProbesOptions::PPP_Probes_Address_Conversion) { |
254 | outs() << "Pseudo Probe Address Conversion results:\n" ; |
255 | // table that correlates address to block |
256 | std::unordered_map<uint64_t, StringRef> Addr2BlockNames; |
257 | for (auto &F : BC.getBinaryFunctions()) |
258 | for (BinaryBasicBlock &BinaryBlock : F.second) |
259 | Addr2BlockNames[BinaryBlock.getOutputAddressRange().first] = |
260 | BinaryBlock.getName(); |
261 | |
262 | // scan all addresses -> correlate probe to block when print out |
263 | for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) { |
264 | if (Probe.getAddress() == INT64_MAX) |
265 | outs() << "Deleted Probe: " ; |
266 | else |
267 | outs() << "Address: " << format_hex(N: Probe.getAddress(), Width: 8) << " " ; |
268 | Probe.print(OS&: outs(), GUID2FuncMAP: GUID2Func, ShowName: true); |
269 | // print block name only if the probe is block type and undeleted. |
270 | if (Probe.isBlock() && Probe.getAddress() != INT64_MAX) |
271 | outs() << format_hex(N: Probe.getAddress(), Width: 8) << " Probe is in " |
272 | << Addr2BlockNames[Probe.getAddress()] << "\n" ; |
273 | } |
274 | outs() << "=======================================\n" ; |
275 | } |
276 | |
277 | // encode pseudo probes with updated addresses |
278 | encodePseudoProbes(); |
279 | } |
280 | |
281 | void PseudoProbeRewriter::encodePseudoProbes() { |
282 | MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr); |
283 | // Buffer for new pseudo probes section |
284 | SmallString<8> Contents; |
285 | MCDecodedPseudoProbe *LastProbe = nullptr; |
286 | |
287 | auto EmitInt = [&](uint64_t Value, uint32_t Size) { |
288 | const bool IsLittleEndian = BC.AsmInfo->isLittleEndian(); |
289 | uint64_t Swapped = support::endian::byte_swap( |
290 | value: Value, |
291 | endian: IsLittleEndian ? llvm::endianness::little : llvm::endianness::big); |
292 | unsigned Index = IsLittleEndian ? 0 : 8 - Size; |
293 | auto Entry = StringRef(reinterpret_cast<char *>(&Swapped) + Index, Size); |
294 | Contents.append(in_start: Entry.begin(), in_end: Entry.end()); |
295 | }; |
296 | |
297 | auto EmitULEB128IntValue = [&](uint64_t Value) { |
298 | SmallString<128> Tmp; |
299 | raw_svector_ostream OSE(Tmp); |
300 | encodeULEB128(Value, OS&: OSE, PadTo: 0); |
301 | Contents.append(in_start: OSE.str().begin(), in_end: OSE.str().end()); |
302 | }; |
303 | |
304 | auto EmitSLEB128IntValue = [&](int64_t Value) { |
305 | SmallString<128> Tmp; |
306 | raw_svector_ostream OSE(Tmp); |
307 | encodeSLEB128(Value, OS&: OSE); |
308 | Contents.append(in_start: OSE.str().begin(), in_end: OSE.str().end()); |
309 | }; |
310 | |
311 | // Emit indiviual pseudo probes in a inline tree node |
312 | // Probe index, type, attribute, address type and address are encoded |
313 | // Address of the first probe is absolute. |
314 | // Other probes' address are represented by delta |
315 | auto EmitDecodedPseudoProbe = [&](MCDecodedPseudoProbe *&CurProbe) { |
316 | assert(!isSentinelProbe(CurProbe->getAttributes()) && |
317 | "Sentinel probes should not be emitted" ); |
318 | EmitULEB128IntValue(CurProbe->getIndex()); |
319 | uint8_t PackedType = CurProbe->getType() | (CurProbe->getAttributes() << 4); |
320 | uint8_t Flag = |
321 | LastProbe ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0; |
322 | EmitInt(Flag | PackedType, 1); |
323 | if (LastProbe) { |
324 | // Emit the delta between the address label and LastProbe. |
325 | int64_t Delta = CurProbe->getAddress() - LastProbe->getAddress(); |
326 | EmitSLEB128IntValue(Delta); |
327 | } else { |
328 | // Emit absolute address for encoding the first pseudo probe. |
329 | uint32_t AddrSize = BC.AsmInfo->getCodePointerSize(); |
330 | EmitInt(CurProbe->getAddress(), AddrSize); |
331 | } |
332 | }; |
333 | |
334 | std::map<InlineSite, MCDecodedPseudoProbeInlineTree *, |
335 | std::greater<InlineSite>> |
336 | Inlinees; |
337 | |
338 | // DFS of inline tree to emit pseudo probes in all tree node |
339 | // Inline site index of a probe is emitted first. |
340 | // Then tree node Guid, size of pseudo probes and children nodes, and detail |
341 | // of contained probes are emitted Deleted probes are skipped Root node is not |
342 | // encoded to binaries. It's a "wrapper" of inline trees of each function. |
343 | std::list<std::pair<uint64_t, MCDecodedPseudoProbeInlineTree *>> NextNodes; |
344 | const MCDecodedPseudoProbeInlineTree &Root = |
345 | ProbeDecoder.getDummyInlineRoot(); |
346 | for (auto Child = Root.getChildren().begin(); |
347 | Child != Root.getChildren().end(); ++Child) |
348 | Inlinees[Child->getInlineSite()] = &*Child; |
349 | |
350 | for (auto Inlinee : Inlinees) |
351 | // INT64_MAX is "placeholder" of unused callsite index field in the pair |
352 | NextNodes.push_back(x: {INT64_MAX, Inlinee.second}); |
353 | |
354 | Inlinees.clear(); |
355 | |
356 | while (!NextNodes.empty()) { |
357 | uint64_t ProbeIndex = NextNodes.back().first; |
358 | MCDecodedPseudoProbeInlineTree *Cur = NextNodes.back().second; |
359 | NextNodes.pop_back(); |
360 | |
361 | if (Cur->Parent && !Cur->Parent->isRoot()) |
362 | // Emit probe inline site |
363 | EmitULEB128IntValue(ProbeIndex); |
364 | |
365 | // Emit probes grouped by GUID. |
366 | LLVM_DEBUG({ |
367 | dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); |
368 | dbgs() << "GUID: " << Cur->Guid << "\n" ; |
369 | }); |
370 | // Emit Guid |
371 | EmitInt(Cur->Guid, 8); |
372 | // Emit number of probes in this node |
373 | uint64_t Deleted = 0; |
374 | for (MCDecodedPseudoProbe *&Probe : |
375 | llvm::make_pointer_range(Range: Cur->getProbes())) |
376 | if (Probe->getAddress() == INT64_MAX) |
377 | Deleted++; |
378 | LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n" ); |
379 | size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Parent: Cur); |
380 | uint64_t ProbesSize = Cur->getProbes().size() - Deleted + InjectedProbes; |
381 | EmitULEB128IntValue(ProbesSize); |
382 | // Emit number of direct inlinees |
383 | EmitULEB128IntValue(Cur->getChildren().size()); |
384 | // Emit probes in this group |
385 | for (MCDecodedPseudoProbe *&Probe : |
386 | llvm::make_pointer_range(Range: Cur->getProbes())) { |
387 | if (Probe->getAddress() == INT64_MAX) |
388 | continue; |
389 | EmitDecodedPseudoProbe(Probe); |
390 | LastProbe = Probe; |
391 | } |
392 | if (InjectedProbes) { |
393 | for (MCDecodedPseudoProbe *&Probe : |
394 | llvm::make_pointer_range(Range: ProbeDecoder.getInjectedProbes(Parent: Cur))) { |
395 | if (Probe->getAddress() == INT64_MAX) |
396 | continue; |
397 | EmitDecodedPseudoProbe(Probe); |
398 | LastProbe = Probe; |
399 | } |
400 | } |
401 | |
402 | for (auto Child = Cur->getChildren().begin(); |
403 | Child != Cur->getChildren().end(); ++Child) |
404 | Inlinees[Child->getInlineSite()] = &*Child; |
405 | for (const auto &Inlinee : Inlinees) { |
406 | assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid" ); |
407 | NextNodes.push_back(x: {std::get<1>(t: Inlinee.first), Inlinee.second}); |
408 | LLVM_DEBUG({ |
409 | dbgs().indent(MCPseudoProbeTable::DdgPrintIndent); |
410 | dbgs() << "InlineSite: " << std::get<1>(Inlinee.first) << "\n" ; |
411 | }); |
412 | } |
413 | Inlinees.clear(); |
414 | } |
415 | |
416 | // Create buffer for new contents for the section |
417 | // Freed when parent section is destroyed |
418 | uint8_t *Output = new uint8_t[Contents.str().size()]; |
419 | memcpy(dest: Output, src: Contents.str().data(), n: Contents.str().size()); |
420 | BC.registerOrUpdateSection(Name: ".pseudo_probe" , ELFType: PseudoProbeSection->getELFType(), |
421 | ELFFlags: PseudoProbeSection->getELFFlags(), Data: Output, |
422 | Size: Contents.str().size(), Alignment: 1); |
423 | if (opts::PrintPseudoProbes == opts::PrintPseudoProbesOptions::PPP_All || |
424 | opts::PrintPseudoProbes == |
425 | opts::PrintPseudoProbesOptions::PPP_Encoded_Probes) { |
426 | // create a dummy decoder; |
427 | MCPseudoProbeDecoder DummyDecoder; |
428 | StringRef DescContents = PseudoProbeDescSection->getContents(); |
429 | DummyDecoder.buildGUID2FuncDescMap( |
430 | Start: reinterpret_cast<const uint8_t *>(DescContents.data()), |
431 | Size: DescContents.size()); |
432 | StringRef ProbeContents = PseudoProbeSection->getOutputContents(); |
433 | MCPseudoProbeDecoder::Uint64Set GuidFilter; |
434 | MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; |
435 | for (const BinaryFunction *F : BC.getAllBinaryFunctions()) { |
436 | const uint64_t Addr = |
437 | F->isEmitted() ? F->getOutputAddress() : F->getAddress(); |
438 | FuncStartAddrs[Function::getGUIDAssumingExternalLinkage( |
439 | GlobalName: NameResolver::restore(Name: F->getOneName()))] = Addr; |
440 | } |
441 | DummyDecoder.buildAddress2ProbeMap( |
442 | Start: reinterpret_cast<const uint8_t *>(ProbeContents.data()), |
443 | Size: ProbeContents.size(), GuildFilter: GuidFilter, FuncStartAddrs); |
444 | DummyDecoder.printProbesForAllAddresses(OS&: outs()); |
445 | } |
446 | } |
447 | } // namespace |
448 | |
449 | std::unique_ptr<MetadataRewriter> |
450 | llvm::bolt::createPseudoProbeRewriter(BinaryContext &BC) { |
451 | return std::make_unique<PseudoProbeRewriter>(args&: BC); |
452 | } |
453 | |