1 | //===- bolt/Core/BinaryFunction.cpp - Low-level function ------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the BinaryFunction class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Core/BinaryFunction.h" |
14 | #include "bolt/Core/BinaryBasicBlock.h" |
15 | #include "bolt/Core/DynoStats.h" |
16 | #include "bolt/Core/HashUtilities.h" |
17 | #include "bolt/Core/MCPlusBuilder.h" |
18 | #include "bolt/Utils/CommandLineOpts.h" |
19 | #include "bolt/Utils/NameResolver.h" |
20 | #include "bolt/Utils/NameShortener.h" |
21 | #include "bolt/Utils/Utils.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/SmallSet.h" |
24 | #include "llvm/ADT/StringExtras.h" |
25 | #include "llvm/ADT/StringRef.h" |
26 | #include "llvm/Demangle/Demangle.h" |
27 | #include "llvm/MC/MCAsmInfo.h" |
28 | #include "llvm/MC/MCContext.h" |
29 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
30 | #include "llvm/MC/MCExpr.h" |
31 | #include "llvm/MC/MCInst.h" |
32 | #include "llvm/MC/MCInstPrinter.h" |
33 | #include "llvm/MC/MCRegisterInfo.h" |
34 | #include "llvm/MC/MCSymbol.h" |
35 | #include "llvm/Object/ObjectFile.h" |
36 | #include "llvm/Support/CommandLine.h" |
37 | #include "llvm/Support/Debug.h" |
38 | #include "llvm/Support/GenericDomTreeConstruction.h" |
39 | #include "llvm/Support/GenericLoopInfoImpl.h" |
40 | #include "llvm/Support/GraphWriter.h" |
41 | #include "llvm/Support/LEB128.h" |
42 | #include "llvm/Support/Regex.h" |
43 | #include "llvm/Support/Timer.h" |
44 | #include "llvm/Support/raw_ostream.h" |
45 | #include "llvm/Support/xxhash.h" |
46 | #include <functional> |
47 | #include <limits> |
48 | #include <numeric> |
49 | #include <stack> |
50 | #include <string> |
51 | |
52 | #define DEBUG_TYPE "bolt" |
53 | |
54 | using namespace llvm; |
55 | using namespace bolt; |
56 | |
57 | namespace opts { |
58 | |
59 | extern cl::OptionCategory BoltCategory; |
60 | extern cl::OptionCategory BoltOptCategory; |
61 | |
62 | extern cl::opt<bool> EnableBAT; |
63 | extern cl::opt<bool> Instrument; |
64 | extern cl::opt<bool> StrictMode; |
65 | extern cl::opt<bool> UpdateDebugSections; |
66 | extern cl::opt<unsigned> Verbosity; |
67 | |
68 | extern bool BinaryAnalysisMode; |
69 | extern HeatmapModeKind HeatmapMode; |
70 | extern bool processAllFunctions(); |
71 | |
72 | static cl::opt<bool> CheckEncoding( |
73 | "check-encoding", |
74 | cl::desc("perform verification of LLVM instruction encoding/decoding. " |
75 | "Every instruction in the input is decoded and re-encoded. " |
76 | "If the resulting bytes do not match the input, a warning message " |
77 | "is printed."), |
78 | cl::Hidden, cl::cat(BoltCategory)); |
79 | |
80 | static cl::opt<bool> DotToolTipCode( |
81 | "dot-tooltip-code", |
82 | cl::desc("add basic block instructions as tool tips on nodes"), cl::Hidden, |
83 | cl::cat(BoltCategory)); |
84 | |
85 | cl::opt<JumpTableSupportLevel> |
86 | JumpTables("jump-tables", |
87 | cl::desc("jump tables support (default=basic)"), |
88 | cl::init(Val: JTS_BASIC), |
89 | cl::values( |
90 | clEnumValN(JTS_NONE, "none", |
91 | "do not optimize functions with jump tables"), |
92 | clEnumValN(JTS_BASIC, "basic", |
93 | "optimize functions with jump tables"), |
94 | clEnumValN(JTS_MOVE, "move", |
95 | "move jump tables to a separate section"), |
96 | clEnumValN(JTS_SPLIT, "split", |
97 | "split jump tables section into hot and cold based on " |
98 | "function execution frequency"), |
99 | clEnumValN(JTS_AGGRESSIVE, "aggressive", |
100 | "aggressively split jump tables section based on usage " |
101 | "of the tables")), |
102 | cl::ZeroOrMore, |
103 | cl::cat(BoltOptCategory)); |
104 | |
105 | static cl::opt<bool> NoScan( |
106 | "no-scan", |
107 | cl::desc( |
108 | "do not scan cold functions for external references (may result in " |
109 | "slower binary)"), |
110 | cl::Hidden, cl::cat(BoltOptCategory)); |
111 | |
112 | cl::opt<bool> |
113 | PreserveBlocksAlignment("preserve-blocks-alignment", |
114 | cl::desc("try to preserve basic block alignment"), |
115 | cl::cat(BoltOptCategory)); |
116 | |
117 | static cl::opt<bool> PrintOutputAddressRange( |
118 | "print-output-address-range", |
119 | cl::desc( |
120 | "print output address range for each basic block in the function when" |
121 | "BinaryFunction::print is called"), |
122 | cl::Hidden, cl::cat(BoltOptCategory)); |
123 | |
124 | cl::opt<bool> |
125 | PrintDynoStats("dyno-stats", |
126 | cl::desc("print execution info based on profile"), |
127 | cl::cat(BoltCategory)); |
128 | |
129 | static cl::opt<bool> |
130 | PrintDynoStatsOnly("print-dyno-stats-only", |
131 | cl::desc("while printing functions output dyno-stats and skip instructions"), |
132 | cl::init(Val: false), |
133 | cl::Hidden, |
134 | cl::cat(BoltCategory)); |
135 | |
136 | static cl::list<std::string> |
137 | PrintOnly("print-only", |
138 | cl::CommaSeparated, |
139 | cl::desc("list of functions to print"), |
140 | cl::value_desc("func1,func2,func3,..."), |
141 | cl::Hidden, |
142 | cl::cat(BoltCategory)); |
143 | |
144 | cl::opt<bool> |
145 | TimeBuild("time-build", |
146 | cl::desc("print time spent constructing binary functions"), |
147 | cl::Hidden, cl::cat(BoltCategory)); |
148 | |
149 | static cl::opt<bool> TrapOnAVX512( |
150 | "trap-avx512", |
151 | cl::desc("in relocation mode trap upon entry to any function that uses " |
152 | "AVX-512 instructions"), |
153 | cl::init(Val: false), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); |
154 | |
155 | bool shouldPrint(const BinaryFunction &Function) { |
156 | if (Function.isIgnored()) |
157 | return false; |
158 | |
159 | if (PrintOnly.empty()) |
160 | return true; |
161 | |
162 | for (std::string &Name : opts::PrintOnly) { |
163 | if (Function.hasNameRegex(NameRegex: Name)) { |
164 | return true; |
165 | } |
166 | } |
167 | |
168 | std::optional<StringRef> Origin = Function.getOriginSectionName(); |
169 | return Origin && llvm::is_contained(Range&: opts::PrintOnly, Element: *Origin); |
170 | } |
171 | |
172 | } // namespace opts |
173 | |
174 | namespace llvm { |
175 | namespace bolt { |
176 | |
177 | template <typename R> static bool emptyRange(const R &Range) { |
178 | return Range.begin() == Range.end(); |
179 | } |
180 | |
181 | /// Gets debug line information for the instruction located at the given |
182 | /// address in the original binary. The SMLoc's pointer is used |
183 | /// to point to this information, which is represented by a |
184 | /// DebugLineTableRowRef. The returned pointer is null if no debug line |
185 | /// information for this instruction was found. |
186 | static SMLoc findDebugLineInformationForInstructionAt( |
187 | uint64_t Address, DWARFUnit *Unit, |
188 | const DWARFDebugLine::LineTable *LineTable) { |
189 | // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef, |
190 | // which occupies 64 bits. Thus, we can only proceed if the struct fits into |
191 | // the pointer itself. |
192 | static_assert( |
193 | sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef), |
194 | "Cannot fit instruction debug line information into SMLoc's pointer"); |
195 | |
196 | SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc(); |
197 | uint32_t RowIndex = LineTable->lookupAddress( |
198 | Address: {.Address: Address, .SectionIndex: object::SectionedAddress::UndefSection}); |
199 | if (RowIndex == LineTable->UnknownRowIndex) |
200 | return NullResult; |
201 | |
202 | assert(RowIndex < LineTable->Rows.size() && |
203 | "Line Table lookup returned invalid index."); |
204 | |
205 | decltype(SMLoc().getPointer()) Ptr; |
206 | DebugLineTableRowRef *InstructionLocation = |
207 | reinterpret_cast<DebugLineTableRowRef *>(&Ptr); |
208 | |
209 | InstructionLocation->DwCompileUnitIndex = Unit->getOffset(); |
210 | InstructionLocation->RowIndex = RowIndex + 1; |
211 | |
212 | return SMLoc::getFromPointer(Ptr); |
213 | } |
214 | |
215 | static std::string buildSectionName(StringRef Prefix, StringRef Name, |
216 | const BinaryContext &BC) { |
217 | if (BC.isELF()) |
218 | return (Prefix + Name).str(); |
219 | static NameShortener NS; |
220 | return (Prefix + Twine(NS.getID(Name))).str(); |
221 | } |
222 | |
223 | static raw_ostream &operator<<(raw_ostream &OS, |
224 | const BinaryFunction::State State) { |
225 | switch (State) { |
226 | case BinaryFunction::State::Empty: OS << "empty"; break; |
227 | case BinaryFunction::State::Disassembled: OS << "disassembled"; break; |
228 | case BinaryFunction::State::CFG: OS << "CFG constructed"; break; |
229 | case BinaryFunction::State::CFG_Finalized: OS << "CFG finalized"; break; |
230 | case BinaryFunction::State::EmittedCFG: OS << "emitted with CFG"; break; |
231 | case BinaryFunction::State::Emitted: OS << "emitted"; break; |
232 | } |
233 | |
234 | return OS; |
235 | } |
236 | |
237 | std::string BinaryFunction::buildCodeSectionName(StringRef Name, |
238 | const BinaryContext &BC) { |
239 | return buildSectionName(Prefix: BC.isELF() ? ".local.text.": ".l.text.", Name, BC); |
240 | } |
241 | |
242 | std::string BinaryFunction::buildColdCodeSectionName(StringRef Name, |
243 | const BinaryContext &BC) { |
244 | return buildSectionName(Prefix: BC.isELF() ? ".local.cold.text.": ".l.c.text.", Name, |
245 | BC); |
246 | } |
247 | |
248 | uint64_t BinaryFunction::Count = 0; |
249 | |
250 | std::optional<StringRef> |
251 | BinaryFunction::hasNameRegex(const StringRef Name) const { |
252 | const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str(); |
253 | Regex MatchName(RegexName); |
254 | return forEachName( |
255 | Callback: [&MatchName](StringRef Name) { return MatchName.match(String: Name); }); |
256 | } |
257 | |
258 | std::optional<StringRef> |
259 | BinaryFunction::hasRestoredNameRegex(const StringRef Name) const { |
260 | const std::string RegexName = (Twine("^") + StringRef(Name) + "$").str(); |
261 | Regex MatchName(RegexName); |
262 | return forEachName(Callback: [&MatchName](StringRef Name) { |
263 | return MatchName.match(String: NameResolver::restore(Name)); |
264 | }); |
265 | } |
266 | |
267 | std::string BinaryFunction::getDemangledName() const { |
268 | StringRef MangledName = NameResolver::restore(Name: getOneName()); |
269 | return demangle(MangledName: MangledName.str()); |
270 | } |
271 | |
272 | BinaryBasicBlock * |
273 | BinaryFunction::getBasicBlockContainingOffset(uint64_t Offset) { |
274 | if (Offset > Size) |
275 | return nullptr; |
276 | |
277 | if (BasicBlockOffsets.empty()) |
278 | return nullptr; |
279 | |
280 | /* |
281 | * This is commented out because it makes BOLT too slow. |
282 | * assert(std::is_sorted(BasicBlockOffsets.begin(), |
283 | * BasicBlockOffsets.end(), |
284 | * CompareBasicBlockOffsets()))); |
285 | */ |
286 | auto I = |
287 | llvm::upper_bound(Range&: BasicBlockOffsets, Value: BasicBlockOffset(Offset, nullptr), |
288 | C: CompareBasicBlockOffsets()); |
289 | assert(I != BasicBlockOffsets.begin() && "first basic block not at offset 0"); |
290 | --I; |
291 | BinaryBasicBlock *BB = I->second; |
292 | return (Offset < BB->getOffset() + BB->getOriginalSize()) ? BB : nullptr; |
293 | } |
294 | |
295 | void BinaryFunction::markUnreachableBlocks() { |
296 | std::stack<BinaryBasicBlock *> Stack; |
297 | |
298 | for (BinaryBasicBlock &BB : blocks()) |
299 | BB.markValid(Valid: false); |
300 | |
301 | // Add all entries and landing pads as roots. |
302 | for (BinaryBasicBlock *BB : BasicBlocks) { |
303 | if (isEntryPoint(BB: *BB) || BB->isLandingPad()) { |
304 | Stack.push(x: BB); |
305 | BB->markValid(Valid: true); |
306 | continue; |
307 | } |
308 | // FIXME: |
309 | // Also mark BBs with indirect jumps as reachable, since we do not |
310 | // support removing unused jump tables yet (GH-issue20). |
311 | for (const MCInst &Inst : *BB) { |
312 | if (BC.MIB->getJumpTable(Inst)) { |
313 | Stack.push(x: BB); |
314 | BB->markValid(Valid: true); |
315 | break; |
316 | } |
317 | } |
318 | } |
319 | |
320 | // Determine reachable BBs from the entry point |
321 | while (!Stack.empty()) { |
322 | BinaryBasicBlock *BB = Stack.top(); |
323 | Stack.pop(); |
324 | for (BinaryBasicBlock *Succ : BB->successors()) { |
325 | if (Succ->isValid()) |
326 | continue; |
327 | Succ->markValid(Valid: true); |
328 | Stack.push(x: Succ); |
329 | } |
330 | } |
331 | } |
332 | |
333 | // Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs |
334 | // will be cleaned up by fixBranches(). |
335 | std::pair<unsigned, uint64_t> |
336 | BinaryFunction::eraseInvalidBBs(const MCCodeEmitter *Emitter) { |
337 | DenseSet<const BinaryBasicBlock *> InvalidBBs; |
338 | unsigned Count = 0; |
339 | uint64_t Bytes = 0; |
340 | for (BinaryBasicBlock *const BB : BasicBlocks) { |
341 | if (!BB->isValid()) { |
342 | assert(!isEntryPoint(*BB) && "all entry blocks must be valid"); |
343 | InvalidBBs.insert(V: BB); |
344 | ++Count; |
345 | Bytes += BC.computeCodeSize(Beg: BB->begin(), End: BB->end(), Emitter); |
346 | } |
347 | } |
348 | |
349 | Layout.eraseBasicBlocks(ToErase: InvalidBBs); |
350 | |
351 | BasicBlockListType NewBasicBlocks; |
352 | for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) { |
353 | BinaryBasicBlock *BB = *I; |
354 | if (InvalidBBs.contains(V: BB)) { |
355 | // Make sure the block is removed from the list of predecessors. |
356 | BB->removeAllSuccessors(); |
357 | DeletedBasicBlocks.push_back(Elt: BB); |
358 | } else { |
359 | NewBasicBlocks.push_back(Elt: BB); |
360 | } |
361 | } |
362 | BasicBlocks = std::move(NewBasicBlocks); |
363 | |
364 | assert(BasicBlocks.size() == Layout.block_size()); |
365 | |
366 | // Update CFG state if needed |
367 | if (Count > 0) |
368 | recomputeLandingPads(); |
369 | |
370 | return std::make_pair(x&: Count, y&: Bytes); |
371 | } |
372 | |
373 | bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const { |
374 | // This function should work properly before and after function reordering. |
375 | // In order to accomplish this, we use the function index (if it is valid). |
376 | // If the function indices are not valid, we fall back to the original |
377 | // addresses. This should be ok because the functions without valid indices |
378 | // should have been ordered with a stable sort. |
379 | const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(Symbol: CalleeSymbol); |
380 | if (CalleeBF) { |
381 | if (CalleeBF->isInjected()) |
382 | return true; |
383 | return compareBinaryFunctionByIndex(A: this, B: CalleeBF); |
384 | } else { |
385 | // Absolute symbol. |
386 | ErrorOr<uint64_t> CalleeAddressOrError = BC.getSymbolValue(Symbol: *CalleeSymbol); |
387 | assert(CalleeAddressOrError && "unregistered symbol found"); |
388 | return *CalleeAddressOrError > getAddress(); |
389 | } |
390 | } |
391 | |
392 | void BinaryFunction::dump() const { |
393 | // getDynoStats calls FunctionLayout::updateLayoutIndices and |
394 | // BasicBlock::analyzeBranch. The former cannot be const, but should be |
395 | // removed, the latter should be made const, but seems to require refactoring. |
396 | // Forcing all callers to have a non-const reference to BinaryFunction to call |
397 | // dump non-const however is not ideal either. Adding this const_cast is right |
398 | // now the best solution. It is safe, because BinaryFunction itself is not |
399 | // modified. Only BinaryBasicBlocks are actually modified (if it all) and we |
400 | // have mutable pointers to those regardless whether this function is |
401 | // const-qualified or not. |
402 | const_cast<BinaryFunction &>(*this).print(OS&: dbgs(), Annotation: ""); |
403 | } |
404 | |
405 | void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { |
406 | if (!opts::shouldPrint(Function: *this)) |
407 | return; |
408 | |
409 | StringRef SectionName = |
410 | OriginSection ? OriginSection->getName() : "<no origin section>"; |
411 | OS << "Binary Function \""<< *this << "\" "<< Annotation << " {"; |
412 | std::vector<StringRef> AllNames = getNames(); |
413 | if (AllNames.size() > 1) { |
414 | OS << "\n All names : "; |
415 | const char *Sep = ""; |
416 | for (const StringRef &Name : AllNames) { |
417 | OS << Sep << Name; |
418 | Sep = "\n "; |
419 | } |
420 | } |
421 | OS << "\n Number : "<< FunctionNumber; |
422 | OS << "\n State : "<< CurrentState; |
423 | OS << "\n Address : 0x"<< Twine::utohexstr(Val: Address); |
424 | OS << "\n Size : 0x"<< Twine::utohexstr(Val: Size); |
425 | OS << "\n MaxSize : 0x"<< Twine::utohexstr(Val: MaxSize); |
426 | OS << "\n Offset : 0x"<< Twine::utohexstr(Val: getFileOffset()); |
427 | OS << "\n Section : "<< SectionName; |
428 | OS << "\n Orc Section : "<< getCodeSectionName(); |
429 | OS << "\n LSDA : 0x"<< Twine::utohexstr(Val: getLSDAAddress()); |
430 | OS << "\n IsSimple : "<< IsSimple; |
431 | OS << "\n IsMultiEntry: "<< isMultiEntry(); |
432 | OS << "\n IsSplit : "<< isSplit(); |
433 | OS << "\n BB Count : "<< size(); |
434 | |
435 | if (HasUnknownControlFlow) |
436 | OS << "\n Unknown CF : true"; |
437 | if (getPersonalityFunction()) |
438 | OS << "\n Personality : "<< getPersonalityFunction()->getName(); |
439 | if (IsFragment) |
440 | OS << "\n IsFragment : true"; |
441 | if (isFolded()) |
442 | OS << "\n FoldedInto : "<< *getFoldedIntoFunction(); |
443 | for (BinaryFunction *ParentFragment : ParentFragments) |
444 | OS << "\n Parent : "<< *ParentFragment; |
445 | if (!Fragments.empty()) { |
446 | OS << "\n Fragments : "; |
447 | ListSeparator LS; |
448 | for (BinaryFunction *Frag : Fragments) |
449 | OS << LS << *Frag; |
450 | } |
451 | if (hasCFG()) |
452 | OS << "\n Hash : "<< Twine::utohexstr(Val: computeHash()); |
453 | if (isMultiEntry()) { |
454 | OS << "\n Secondary Entry Points : "; |
455 | ListSeparator LS; |
456 | for (const auto &KV : SecondaryEntryPoints) |
457 | OS << LS << KV.second->getName(); |
458 | } |
459 | if (FrameInstructions.size()) |
460 | OS << "\n CFI Instrs : "<< FrameInstructions.size(); |
461 | if (!Layout.block_empty()) { |
462 | OS << "\n BB Layout : "; |
463 | ListSeparator LS; |
464 | for (const BinaryBasicBlock *BB : Layout.blocks()) |
465 | OS << LS << BB->getName(); |
466 | } |
467 | if (getImageAddress()) |
468 | OS << "\n Image : 0x"<< Twine::utohexstr(Val: getImageAddress()); |
469 | if (ExecutionCount != COUNT_NO_PROFILE) { |
470 | OS << "\n Exec Count : "<< ExecutionCount; |
471 | OS << "\n Sample Count: "<< RawSampleCount; |
472 | OS << "\n Profile Acc : "<< format(Fmt: "%.1f%%", Vals: ProfileMatchRatio * 100.0f); |
473 | } |
474 | if (ExternEntryCount) |
475 | OS << "\n Extern Entry Count: "<< ExternEntryCount; |
476 | |
477 | if (opts::PrintDynoStats && !getLayout().block_empty()) { |
478 | OS << '\n'; |
479 | DynoStats dynoStats = getDynoStats(BF&: *this); |
480 | OS << dynoStats; |
481 | } |
482 | |
483 | OS << "\n}\n"; |
484 | |
485 | if (opts::PrintDynoStatsOnly || !BC.InstPrinter) |
486 | return; |
487 | |
488 | // Offset of the instruction in function. |
489 | uint64_t Offset = 0; |
490 | |
491 | auto printConstantIslandInRange = [&](uint64_t Start, uint64_t End) { |
492 | assert(Start <= End && "Invalid range"); |
493 | std::optional<uint64_t> IslandOffset = getIslandInRange(StartOffset: Start, EndOffset: End); |
494 | |
495 | if (!IslandOffset) |
496 | return; |
497 | |
498 | // Print label if it exists at this offset. |
499 | if (const BinaryData *BD = |
500 | BC.getBinaryDataAtAddress(Address: getAddress() + *IslandOffset)) |
501 | OS << BD->getName() << ":\n"; |
502 | |
503 | const size_t IslandSize = getSizeOfDataInCodeAt(Offset: *IslandOffset); |
504 | BC.printData(OS, Data: BC.extractData(Address: getAddress() + *IslandOffset, Size: IslandSize), |
505 | Offset: *IslandOffset); |
506 | }; |
507 | |
508 | if (BasicBlocks.empty() && !Instructions.empty()) { |
509 | // Print before CFG was built. |
510 | uint64_t PrevOffset = 0; |
511 | for (const std::pair<const uint32_t, MCInst> &II : Instructions) { |
512 | Offset = II.first; |
513 | |
514 | // Print any constant islands inbeetween the instructions. |
515 | printConstantIslandInRange(PrevOffset, Offset); |
516 | |
517 | // Print label if exists at this offset. |
518 | auto LI = Labels.find(x: Offset); |
519 | if (LI != Labels.end()) { |
520 | if (const MCSymbol *EntrySymbol = |
521 | getSecondaryEntryPointSymbol(BBLabel: LI->second)) |
522 | OS << EntrySymbol->getName() << " (Entry Point):\n"; |
523 | OS << LI->second->getName() << ":\n"; |
524 | } |
525 | |
526 | BC.printInstruction(OS, Instruction: II.second, Offset, Function: this); |
527 | |
528 | PrevOffset = Offset; |
529 | } |
530 | |
531 | // Print any data at the end of the function. |
532 | printConstantIslandInRange(PrevOffset, getMaxSize()); |
533 | } |
534 | |
535 | StringRef SplitPointMsg = ""; |
536 | for (const FunctionFragment &FF : Layout.fragments()) { |
537 | OS << SplitPointMsg; |
538 | SplitPointMsg = "------- HOT-COLD SPLIT POINT -------\n\n"; |
539 | for (const BinaryBasicBlock *BB : FF) { |
540 | OS << BB->getName() << " ("<< BB->size() |
541 | << " instructions, align : "<< BB->getAlignment() << ")\n"; |
542 | |
543 | if (opts::PrintOutputAddressRange) |
544 | OS << formatv(Fmt: " Output Address Range: [{0:x}, {1:x}) ({2} bytes)\n", |
545 | Vals: BB->getOutputAddressRange().first, |
546 | Vals: BB->getOutputAddressRange().second, Vals: BB->getOutputSize()); |
547 | |
548 | if (isEntryPoint(BB: *BB)) { |
549 | if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB: *BB)) |
550 | OS << " Secondary Entry Point: "<< EntrySymbol->getName() << '\n'; |
551 | else |
552 | OS << " Entry Point\n"; |
553 | } |
554 | |
555 | if (BB->isLandingPad()) |
556 | OS << " Landing Pad\n"; |
557 | |
558 | uint64_t BBExecCount = BB->getExecutionCount(); |
559 | if (hasValidProfile()) { |
560 | OS << " Exec Count : "; |
561 | if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) |
562 | OS << BBExecCount << '\n'; |
563 | else |
564 | OS << "<unknown>\n"; |
565 | } |
566 | if (hasCFI()) |
567 | OS << " CFI State : "<< BB->getCFIState() << '\n'; |
568 | if (opts::EnableBAT) { |
569 | OS << " Input offset: 0x"<< Twine::utohexstr(Val: BB->getInputOffset()) |
570 | << "\n"; |
571 | } |
572 | if (!BB->pred_empty()) { |
573 | OS << " Predecessors: "; |
574 | ListSeparator LS; |
575 | for (BinaryBasicBlock *Pred : BB->predecessors()) |
576 | OS << LS << Pred->getName(); |
577 | OS << '\n'; |
578 | } |
579 | if (!BB->throw_empty()) { |
580 | OS << " Throwers: "; |
581 | ListSeparator LS; |
582 | for (BinaryBasicBlock *Throw : BB->throwers()) |
583 | OS << LS << Throw->getName(); |
584 | OS << '\n'; |
585 | } |
586 | |
587 | Offset = alignTo(Value: Offset, Align: BB->getAlignment()); |
588 | |
589 | // Note: offsets are imprecise since this is happening prior to |
590 | // relaxation. |
591 | Offset = BC.printInstructions(OS, Begin: BB->begin(), End: BB->end(), Offset, Function: this); |
592 | |
593 | if (!BB->succ_empty()) { |
594 | OS << " Successors: "; |
595 | // For more than 2 successors, sort them based on frequency. |
596 | std::vector<uint64_t> Indices(BB->succ_size()); |
597 | std::iota(first: Indices.begin(), last: Indices.end(), value: 0); |
598 | if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) { |
599 | llvm::stable_sort(Range&: Indices, C: [&](const uint64_t A, const uint64_t B) { |
600 | return BB->BranchInfo[B] < BB->BranchInfo[A]; |
601 | }); |
602 | } |
603 | ListSeparator LS; |
604 | for (unsigned I = 0; I < Indices.size(); ++I) { |
605 | BinaryBasicBlock *Succ = BB->Successors[Indices[I]]; |
606 | const BinaryBasicBlock::BinaryBranchInfo &BI = |
607 | BB->BranchInfo[Indices[I]]; |
608 | OS << LS << Succ->getName(); |
609 | if (ExecutionCount != COUNT_NO_PROFILE && |
610 | BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { |
611 | OS << " (mispreds: "<< BI.MispredictedCount |
612 | << ", count: "<< BI.Count << ")"; |
613 | } else if (ExecutionCount != COUNT_NO_PROFILE && |
614 | BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) { |
615 | OS << " (inferred count: "<< BI.Count << ")"; |
616 | } |
617 | } |
618 | OS << '\n'; |
619 | } |
620 | |
621 | if (!BB->lp_empty()) { |
622 | OS << " Landing Pads: "; |
623 | ListSeparator LS; |
624 | for (BinaryBasicBlock *LP : BB->landing_pads()) { |
625 | OS << LS << LP->getName(); |
626 | if (ExecutionCount != COUNT_NO_PROFILE) { |
627 | OS << " (count: "<< LP->getExecutionCount() << ")"; |
628 | } |
629 | } |
630 | OS << '\n'; |
631 | } |
632 | |
633 | // In CFG_Finalized state we can miscalculate CFI state at exit. |
634 | if (CurrentState == State::CFG && hasCFI()) { |
635 | const int32_t CFIStateAtExit = BB->getCFIStateAtExit(); |
636 | if (CFIStateAtExit >= 0) |
637 | OS << " CFI State: "<< CFIStateAtExit << '\n'; |
638 | } |
639 | |
640 | OS << '\n'; |
641 | } |
642 | } |
643 | |
644 | // Dump new exception ranges for the function. |
645 | if (!CallSites.empty()) { |
646 | OS << "EH table:\n"; |
647 | for (const FunctionFragment &FF : getLayout().fragments()) { |
648 | for (const auto &FCSI : getCallSites(F: FF.getFragmentNum())) { |
649 | const CallSite &CSI = FCSI.second; |
650 | OS << " ["<< *CSI.Start << ", "<< *CSI.End << ") landing pad : "; |
651 | if (CSI.LP) |
652 | OS << *CSI.LP; |
653 | else |
654 | OS << "0"; |
655 | OS << ", action : "<< CSI.Action << '\n'; |
656 | } |
657 | } |
658 | OS << '\n'; |
659 | } |
660 | |
661 | // Print all jump tables. |
662 | for (const std::pair<const uint64_t, JumpTable *> &JTI : JumpTables) |
663 | JTI.second->print(OS); |
664 | |
665 | OS << "DWARF CFI Instructions:\n"; |
666 | if (OffsetToCFI.size()) { |
667 | // Pre-buildCFG information |
668 | for (const std::pair<const uint32_t, uint32_t> &Elmt : OffsetToCFI) { |
669 | OS << format(Fmt: " %08x:\t", Vals: Elmt.first); |
670 | assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset"); |
671 | BinaryContext::printCFI(OS, Inst: FrameInstructions[Elmt.second]); |
672 | OS << "\n"; |
673 | } |
674 | } else { |
675 | // Post-buildCFG information |
676 | for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) { |
677 | const MCCFIInstruction &CFI = FrameInstructions[I]; |
678 | OS << format(Fmt: " %d:\t", Vals: I); |
679 | BinaryContext::printCFI(OS, Inst: CFI); |
680 | OS << "\n"; |
681 | } |
682 | } |
683 | if (FrameInstructions.empty()) |
684 | OS << " <empty>\n"; |
685 | |
686 | OS << "End of Function \""<< *this << "\"\n\n"; |
687 | } |
688 | |
689 | void BinaryFunction::printRelocations(raw_ostream &OS, uint64_t Offset, |
690 | uint64_t Size) const { |
691 | const char *Sep = " # Relocs: "; |
692 | |
693 | auto RI = Relocations.lower_bound(x: Offset); |
694 | while (RI != Relocations.end() && RI->first < Offset + Size) { |
695 | OS << Sep << "(R: "<< RI->second << ")"; |
696 | Sep = ", "; |
697 | ++RI; |
698 | } |
699 | } |
700 | |
701 | static std::string mutateDWARFExpressionTargetReg(const MCCFIInstruction &Instr, |
702 | MCPhysReg NewReg) { |
703 | StringRef ExprBytes = Instr.getValues(); |
704 | assert(ExprBytes.size() > 1 && "DWARF expression CFI is too short"); |
705 | uint8_t Opcode = ExprBytes[0]; |
706 | assert((Opcode == dwarf::DW_CFA_expression || |
707 | Opcode == dwarf::DW_CFA_val_expression) && |
708 | "invalid DWARF expression CFI"); |
709 | (void)Opcode; |
710 | const uint8_t *const Start = |
711 | reinterpret_cast<const uint8_t *>(ExprBytes.drop_front(N: 1).data()); |
712 | const uint8_t *const End = |
713 | reinterpret_cast<const uint8_t *>(Start + ExprBytes.size() - 1); |
714 | unsigned Size = 0; |
715 | decodeULEB128(p: Start, n: &Size, end: End); |
716 | assert(Size > 0 && "Invalid reg encoding for DWARF expression CFI"); |
717 | SmallString<8> Tmp; |
718 | raw_svector_ostream OSE(Tmp); |
719 | encodeULEB128(Value: NewReg, OS&: OSE); |
720 | return Twine(ExprBytes.slice(Start: 0, End: 1)) |
721 | .concat(Suffix: OSE.str()) |
722 | .concat(Suffix: ExprBytes.drop_front(N: 1 + Size)) |
723 | .str(); |
724 | } |
725 | |
726 | void BinaryFunction::mutateCFIRegisterFor(const MCInst &Instr, |
727 | MCPhysReg NewReg) { |
728 | const MCCFIInstruction *OldCFI = getCFIFor(Instr); |
729 | assert(OldCFI && "invalid CFI instr"); |
730 | switch (OldCFI->getOperation()) { |
731 | default: |
732 | llvm_unreachable("Unexpected instruction"); |
733 | case MCCFIInstruction::OpDefCfa: |
734 | setCFIFor(Instr, CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: NewReg, |
735 | Offset: OldCFI->getOffset())); |
736 | break; |
737 | case MCCFIInstruction::OpDefCfaRegister: |
738 | setCFIFor(Instr, CFIInst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: NewReg)); |
739 | break; |
740 | case MCCFIInstruction::OpOffset: |
741 | setCFIFor(Instr, CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: NewReg, |
742 | Offset: OldCFI->getOffset())); |
743 | break; |
744 | case MCCFIInstruction::OpRegister: |
745 | setCFIFor(Instr, CFIInst: MCCFIInstruction::createRegister(L: nullptr, Register1: NewReg, |
746 | Register2: OldCFI->getRegister2())); |
747 | break; |
748 | case MCCFIInstruction::OpSameValue: |
749 | setCFIFor(Instr, CFIInst: MCCFIInstruction::createSameValue(L: nullptr, Register: NewReg)); |
750 | break; |
751 | case MCCFIInstruction::OpEscape: |
752 | setCFIFor(Instr, |
753 | CFIInst: MCCFIInstruction::createEscape( |
754 | L: nullptr, |
755 | Vals: StringRef(mutateDWARFExpressionTargetReg(Instr: *OldCFI, NewReg)))); |
756 | break; |
757 | case MCCFIInstruction::OpRestore: |
758 | setCFIFor(Instr, CFIInst: MCCFIInstruction::createRestore(L: nullptr, Register: NewReg)); |
759 | break; |
760 | case MCCFIInstruction::OpUndefined: |
761 | setCFIFor(Instr, CFIInst: MCCFIInstruction::createUndefined(L: nullptr, Register: NewReg)); |
762 | break; |
763 | } |
764 | } |
765 | |
766 | const MCCFIInstruction *BinaryFunction::mutateCFIOffsetFor(const MCInst &Instr, |
767 | int64_t NewOffset) { |
768 | const MCCFIInstruction *OldCFI = getCFIFor(Instr); |
769 | assert(OldCFI && "invalid CFI instr"); |
770 | switch (OldCFI->getOperation()) { |
771 | default: |
772 | llvm_unreachable("Unexpected instruction"); |
773 | case MCCFIInstruction::OpDefCfaOffset: |
774 | setCFIFor(Instr, CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: NewOffset)); |
775 | break; |
776 | case MCCFIInstruction::OpAdjustCfaOffset: |
777 | setCFIFor(Instr, |
778 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: NewOffset)); |
779 | break; |
780 | case MCCFIInstruction::OpDefCfa: |
781 | setCFIFor(Instr, CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: OldCFI->getRegister(), |
782 | Offset: NewOffset)); |
783 | break; |
784 | case MCCFIInstruction::OpOffset: |
785 | setCFIFor(Instr, CFIInst: MCCFIInstruction::createOffset( |
786 | L: nullptr, Register: OldCFI->getRegister(), Offset: NewOffset)); |
787 | break; |
788 | } |
789 | return getCFIFor(Instr); |
790 | } |
791 | |
792 | IndirectBranchType |
793 | BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size, |
794 | uint64_t Offset, |
795 | uint64_t &TargetAddress) { |
796 | const unsigned PtrSize = BC.AsmInfo->getCodePointerSize(); |
797 | |
798 | // The instruction referencing memory used by the branch instruction. |
799 | // It could be the branch instruction itself or one of the instructions |
800 | // setting the value of the register used by the branch. |
801 | MCInst *MemLocInstr; |
802 | |
803 | // The instruction loading the fixed PIC jump table entry value. |
804 | MCInst *FixedEntryLoadInstr; |
805 | |
806 | // Address of the table referenced by MemLocInstr. Could be either an |
807 | // array of function pointers, or a jump table. |
808 | uint64_t ArrayStart = 0; |
809 | |
810 | unsigned BaseRegNum, IndexRegNum; |
811 | int64_t DispValue; |
812 | const MCExpr *DispExpr; |
813 | |
814 | // In AArch, identify the instruction adding the PC-relative offset to |
815 | // jump table entries to correctly decode it. |
816 | MCInst *PCRelBaseInstr; |
817 | uint64_t PCRelAddr = 0; |
818 | |
819 | auto Begin = Instructions.begin(); |
820 | if (BC.isAArch64()) { |
821 | // Start at the last label as an approximation of the current basic block. |
822 | // This is a heuristic, since the full set of labels have yet to be |
823 | // determined |
824 | for (const uint32_t Offset : |
825 | llvm::make_first_range(c: llvm::reverse(C&: Labels))) { |
826 | auto II = Instructions.find(x: Offset); |
827 | if (II != Instructions.end()) { |
828 | Begin = II; |
829 | break; |
830 | } |
831 | } |
832 | } |
833 | |
834 | IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch( |
835 | Instruction, Begin, End: Instructions.end(), PtrSize, MemLocInstr, BaseRegNum, |
836 | IndexRegNum, DispValue, DispExpr, PCRelBaseOut&: PCRelBaseInstr, FixedEntryLoadInst&: FixedEntryLoadInstr); |
837 | |
838 | if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr) |
839 | return BranchType; |
840 | |
841 | if (MemLocInstr != &Instruction) |
842 | IndexRegNum = BC.MIB->getNoRegister(); |
843 | |
844 | if (BC.isAArch64()) { |
845 | const MCSymbol *Sym = BC.MIB->getTargetSymbol(Inst: *PCRelBaseInstr, OpNum: 1); |
846 | assert(Sym && "Symbol extraction failed"); |
847 | ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(Symbol: *Sym); |
848 | if (SymValueOrError) { |
849 | PCRelAddr = *SymValueOrError; |
850 | } else { |
851 | for (std::pair<const uint32_t, MCSymbol *> &Elmt : Labels) { |
852 | if (Elmt.second == Sym) { |
853 | PCRelAddr = Elmt.first + getAddress(); |
854 | break; |
855 | } |
856 | } |
857 | } |
858 | uint64_t InstrAddr = 0; |
859 | for (auto II = Instructions.rbegin(); II != Instructions.rend(); ++II) { |
860 | if (&II->second == PCRelBaseInstr) { |
861 | InstrAddr = II->first + getAddress(); |
862 | break; |
863 | } |
864 | } |
865 | assert(InstrAddr != 0 && "instruction not found"); |
866 | // We do this to avoid spurious references to code locations outside this |
867 | // function (for example, if the indirect jump lives in the last basic |
868 | // block of the function, it will create a reference to the next function). |
869 | // This replaces a symbol reference with an immediate. |
870 | BC.MIB->replaceMemOperandDisp(Inst&: *PCRelBaseInstr, |
871 | Operand: MCOperand::createImm(Val: PCRelAddr - InstrAddr)); |
872 | // FIXME: Disable full jump table processing for AArch64 until we have a |
873 | // proper way of determining the jump table limits. |
874 | return IndirectBranchType::UNKNOWN; |
875 | } |
876 | |
877 | auto getExprValue = [&](const MCExpr *Expr) { |
878 | const MCSymbol *TargetSym; |
879 | uint64_t TargetOffset; |
880 | std::tie(args&: TargetSym, args&: TargetOffset) = BC.MIB->getTargetSymbolInfo(Expr); |
881 | ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(Symbol: *TargetSym); |
882 | assert(SymValueOrError && "Global symbol needs a value"); |
883 | return *SymValueOrError + TargetOffset; |
884 | }; |
885 | |
886 | // RIP-relative addressing should be converted to symbol form by now |
887 | // in processed instructions (but not in jump). |
888 | if (DispExpr) { |
889 | ArrayStart = getExprValue(DispExpr); |
890 | BaseRegNum = BC.MIB->getNoRegister(); |
891 | if (BC.isAArch64()) { |
892 | ArrayStart &= ~0xFFFULL; |
893 | ArrayStart += DispValue & 0xFFFULL; |
894 | } |
895 | } else { |
896 | ArrayStart = static_cast<uint64_t>(DispValue); |
897 | } |
898 | |
899 | if (BaseRegNum == BC.MRI->getProgramCounter()) |
900 | ArrayStart += getAddress() + Offset + Size; |
901 | |
902 | if (FixedEntryLoadInstr) { |
903 | assert(BranchType == IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH && |
904 | "Invalid IndirectBranch type"); |
905 | MCInst::iterator FixedEntryDispOperand = |
906 | BC.MIB->getMemOperandDisp(Inst&: *FixedEntryLoadInstr); |
907 | assert(FixedEntryDispOperand != FixedEntryLoadInstr->end() && |
908 | "Invalid memory instruction"); |
909 | const MCExpr *FixedEntryDispExpr = FixedEntryDispOperand->getExpr(); |
910 | const uint64_t EntryAddress = getExprValue(FixedEntryDispExpr); |
911 | uint64_t EntrySize = BC.getJumpTableEntrySize(Type: JumpTable::JTT_PIC); |
912 | ErrorOr<int64_t> Value = |
913 | BC.getSignedValueAtAddress(Address: EntryAddress, Size: EntrySize); |
914 | if (!Value) |
915 | return IndirectBranchType::UNKNOWN; |
916 | |
917 | BC.outs() << "BOLT-INFO: fixed PIC indirect branch detected in "<< *this |
918 | << " at 0x"<< Twine::utohexstr(Val: getAddress() + Offset) |
919 | << " referencing data at 0x"<< Twine::utohexstr(Val: EntryAddress) |
920 | << " the destination value is 0x" |
921 | << Twine::utohexstr(Val: ArrayStart + *Value) << '\n'; |
922 | |
923 | TargetAddress = ArrayStart + *Value; |
924 | |
925 | // Remove spurious JumpTable at EntryAddress caused by PIC reference from |
926 | // the load instruction. |
927 | BC.deleteJumpTable(Address: EntryAddress); |
928 | |
929 | // Replace FixedEntryDispExpr used in target address calculation with outer |
930 | // jump table reference. |
931 | JumpTable *JT = BC.getJumpTableContainingAddress(Address: ArrayStart); |
932 | assert(JT && "Must have a containing jump table for PIC fixed branch"); |
933 | BC.MIB->replaceMemOperandDisp(Inst&: *FixedEntryLoadInstr, Label: JT->getFirstLabel(), |
934 | Addend: EntryAddress - ArrayStart, Ctx: &*BC.Ctx); |
935 | |
936 | return BranchType; |
937 | } |
938 | |
939 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x" |
940 | << Twine::utohexstr(ArrayStart) << '\n'); |
941 | |
942 | ErrorOr<BinarySection &> Section = BC.getSectionForAddress(Address: ArrayStart); |
943 | if (!Section) { |
944 | // No section - possibly an absolute address. Since we don't allow |
945 | // internal function addresses to escape the function scope - we |
946 | // consider it a tail call. |
947 | if (opts::Verbosity >= 1) { |
948 | BC.errs() << "BOLT-WARNING: no section for address 0x" |
949 | << Twine::utohexstr(Val: ArrayStart) << " referenced from function " |
950 | << *this << '\n'; |
951 | } |
952 | return IndirectBranchType::POSSIBLE_TAIL_CALL; |
953 | } |
954 | if (Section->isVirtual()) { |
955 | // The contents are filled at runtime. |
956 | return IndirectBranchType::POSSIBLE_TAIL_CALL; |
957 | } |
958 | |
959 | if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) { |
960 | ErrorOr<uint64_t> Value = BC.getPointerAtAddress(Address: ArrayStart); |
961 | if (!Value) |
962 | return IndirectBranchType::UNKNOWN; |
963 | |
964 | if (BC.getSectionForAddress(Address: ArrayStart)->isWritable()) |
965 | return IndirectBranchType::UNKNOWN; |
966 | |
967 | BC.outs() << "BOLT-INFO: fixed indirect branch detected in "<< *this |
968 | << " at 0x"<< Twine::utohexstr(Val: getAddress() + Offset) |
969 | << " referencing data at 0x"<< Twine::utohexstr(Val: ArrayStart) |
970 | << " the destination value is 0x"<< Twine::utohexstr(Val: *Value) |
971 | << '\n'; |
972 | |
973 | TargetAddress = *Value; |
974 | return BranchType; |
975 | } |
976 | |
977 | // Check if there's already a jump table registered at this address. |
978 | MemoryContentsType MemType; |
979 | if (JumpTable *JT = BC.getJumpTableContainingAddress(Address: ArrayStart)) { |
980 | switch (JT->Type) { |
981 | case JumpTable::JTT_NORMAL: |
982 | MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE; |
983 | break; |
984 | case JumpTable::JTT_PIC: |
985 | MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE; |
986 | break; |
987 | } |
988 | } else { |
989 | MemType = BC.analyzeMemoryAt(Address: ArrayStart, BF&: *this); |
990 | } |
991 | |
992 | // Check that jump table type in instruction pattern matches memory contents. |
993 | JumpTable::JumpTableType JTType; |
994 | if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) { |
995 | if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE) |
996 | return IndirectBranchType::UNKNOWN; |
997 | JTType = JumpTable::JTT_PIC; |
998 | } else { |
999 | if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE) |
1000 | return IndirectBranchType::UNKNOWN; |
1001 | |
1002 | if (MemType == MemoryContentsType::UNKNOWN) |
1003 | return IndirectBranchType::POSSIBLE_TAIL_CALL; |
1004 | |
1005 | BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE; |
1006 | JTType = JumpTable::JTT_NORMAL; |
1007 | } |
1008 | |
1009 | // Convert the instruction into jump table branch. |
1010 | const MCSymbol *JTLabel = BC.getOrCreateJumpTable(Function&: *this, Address: ArrayStart, Type: JTType); |
1011 | BC.MIB->replaceMemOperandDisp(Inst&: *MemLocInstr, Label: JTLabel, Ctx: BC.Ctx.get()); |
1012 | BC.MIB->setJumpTable(Inst&: Instruction, Value: ArrayStart, IndexReg: IndexRegNum); |
1013 | |
1014 | JTSites.emplace_back(Args&: Offset, Args&: ArrayStart); |
1015 | |
1016 | return BranchType; |
1017 | } |
1018 | |
1019 | MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address, |
1020 | bool CreatePastEnd) { |
1021 | const uint64_t Offset = Address - getAddress(); |
1022 | |
1023 | if ((Offset == getSize()) && CreatePastEnd) |
1024 | return getFunctionEndLabel(); |
1025 | |
1026 | auto LI = Labels.find(x: Offset); |
1027 | if (LI != Labels.end()) |
1028 | return LI->second; |
1029 | |
1030 | // For AArch64, check if this address is part of a constant island. |
1031 | if (BC.isAArch64()) { |
1032 | if (MCSymbol *IslandSym = getOrCreateIslandAccess(Address)) |
1033 | return IslandSym; |
1034 | } |
1035 | |
1036 | MCSymbol *Label = BC.Ctx->createNamedTempSymbol(); |
1037 | Labels[Offset] = Label; |
1038 | |
1039 | return Label; |
1040 | } |
1041 | |
1042 | ErrorOr<ArrayRef<uint8_t>> BinaryFunction::getData() const { |
1043 | BinarySection &Section = *getOriginSection(); |
1044 | assert(Section.containsRange(getAddress(), getMaxSize()) && |
1045 | "wrong section for function"); |
1046 | |
1047 | if (!Section.isText() || Section.isVirtual() || !Section.getSize()) |
1048 | return std::make_error_code(e: std::errc::bad_address); |
1049 | |
1050 | StringRef SectionContents = Section.getContents(); |
1051 | |
1052 | assert(SectionContents.size() == Section.getSize() && |
1053 | "section size mismatch"); |
1054 | |
1055 | // Function offset from the section start. |
1056 | uint64_t Offset = getAddress() - Section.getAddress(); |
1057 | auto *Bytes = reinterpret_cast<const uint8_t *>(SectionContents.data()); |
1058 | return ArrayRef<uint8_t>(Bytes + Offset, getMaxSize()); |
1059 | } |
1060 | |
1061 | size_t BinaryFunction::getSizeOfDataInCodeAt(uint64_t Offset) const { |
1062 | if (!Islands) |
1063 | return 0; |
1064 | |
1065 | if (!llvm::is_contained(Range&: Islands->DataOffsets, Element: Offset)) |
1066 | return 0; |
1067 | |
1068 | auto Iter = Islands->CodeOffsets.upper_bound(x: Offset); |
1069 | if (Iter != Islands->CodeOffsets.end()) |
1070 | return *Iter - Offset; |
1071 | return getMaxSize() - Offset; |
1072 | } |
1073 | |
1074 | std::optional<uint64_t> |
1075 | BinaryFunction::getIslandInRange(uint64_t StartOffset, |
1076 | uint64_t EndOffset) const { |
1077 | if (!Islands) |
1078 | return std::nullopt; |
1079 | |
1080 | auto Iter = llvm::lower_bound(Range&: Islands->DataOffsets, Value&: StartOffset); |
1081 | if (Iter != Islands->DataOffsets.end() && *Iter < EndOffset) |
1082 | return *Iter; |
1083 | |
1084 | return std::nullopt; |
1085 | } |
1086 | |
1087 | bool BinaryFunction::isZeroPaddingAt(uint64_t Offset) const { |
1088 | ArrayRef<uint8_t> FunctionData = *getData(); |
1089 | uint64_t EndOfCode = getSize(); |
1090 | if (Islands) { |
1091 | auto Iter = Islands->DataOffsets.upper_bound(x: Offset); |
1092 | if (Iter != Islands->DataOffsets.end()) |
1093 | EndOfCode = *Iter; |
1094 | } |
1095 | for (uint64_t I = Offset; I < EndOfCode; ++I) |
1096 | if (FunctionData[I] != 0) |
1097 | return false; |
1098 | |
1099 | return true; |
1100 | } |
1101 | |
1102 | Error BinaryFunction::handlePCRelOperand(MCInst &Instruction, uint64_t Address, |
1103 | uint64_t Size) { |
1104 | auto &MIB = BC.MIB; |
1105 | uint64_t TargetAddress = 0; |
1106 | if (!MIB->evaluateMemOperandTarget(Inst: Instruction, Target&: TargetAddress, Address, |
1107 | Size)) { |
1108 | std::string Msg; |
1109 | raw_string_ostream SS(Msg); |
1110 | SS << "BOLT-ERROR: PC-relative operand can't be evaluated:\n"; |
1111 | BC.InstPrinter->printInst(MI: &Instruction, Address: 0, Annot: "", STI: *BC.STI, OS&: SS); |
1112 | SS << '\n'; |
1113 | Instruction.dump_pretty(OS&: SS, Printer: BC.InstPrinter.get()); |
1114 | SS << '\n'; |
1115 | SS << "BOLT-ERROR: cannot handle PC-relative operand at 0x" |
1116 | << Twine::utohexstr(Val: Address) << ". Skipping function "<< *this << ".\n"; |
1117 | if (BC.HasRelocations) |
1118 | return createFatalBOLTError(S: Msg); |
1119 | IsSimple = false; |
1120 | return createNonFatalBOLTError(S: Msg); |
1121 | } |
1122 | if (TargetAddress == 0 && opts::Verbosity >= 1) { |
1123 | BC.outs() << "BOLT-INFO: PC-relative operand is zero in function "<< *this |
1124 | << '\n'; |
1125 | } |
1126 | |
1127 | const MCSymbol *TargetSymbol; |
1128 | uint64_t TargetOffset; |
1129 | std::tie(args&: TargetSymbol, args&: TargetOffset) = |
1130 | BC.handleAddressRef(Address: TargetAddress, BF&: *this, /*IsPCRel*/ true); |
1131 | |
1132 | bool ReplaceSuccess = MIB->replaceMemOperandDisp( |
1133 | Inst&: Instruction, Label: TargetSymbol, Addend: static_cast<int64_t>(TargetOffset), Ctx: &*BC.Ctx); |
1134 | (void)ReplaceSuccess; |
1135 | assert(ReplaceSuccess && "Failed to replace mem operand with symbol+off."); |
1136 | return Error::success(); |
1137 | } |
1138 | |
1139 | MCSymbol *BinaryFunction::handleExternalReference(MCInst &Instruction, |
1140 | uint64_t Size, |
1141 | uint64_t Offset, |
1142 | uint64_t TargetAddress, |
1143 | bool &IsCall) { |
1144 | auto &MIB = BC.MIB; |
1145 | |
1146 | const uint64_t AbsoluteInstrAddr = getAddress() + Offset; |
1147 | BC.addInterproceduralReference(Function: this, Address: TargetAddress); |
1148 | if (opts::Verbosity >= 2 && !IsCall && Size == 2 && !BC.HasRelocations) { |
1149 | BC.errs() << "BOLT-WARNING: relaxed tail call detected at 0x" |
1150 | << Twine::utohexstr(Val: AbsoluteInstrAddr) << " in function "<< *this |
1151 | << ". Code size will be increased.\n"; |
1152 | } |
1153 | |
1154 | assert(!MIB->isTailCall(Instruction) && |
1155 | "synthetic tail call instruction found"); |
1156 | |
1157 | // This is a call regardless of the opcode. |
1158 | // Assign proper opcode for tail calls, so that they could be |
1159 | // treated as calls. |
1160 | if (!IsCall) { |
1161 | if (!MIB->convertJmpToTailCall(Inst&: Instruction)) { |
1162 | assert(MIB->isConditionalBranch(Instruction) && |
1163 | "unknown tail call instruction"); |
1164 | if (opts::Verbosity >= 2) { |
1165 | BC.errs() << "BOLT-WARNING: conditional tail call detected in " |
1166 | << "function "<< *this << " at 0x" |
1167 | << Twine::utohexstr(Val: AbsoluteInstrAddr) << ".\n"; |
1168 | } |
1169 | } |
1170 | IsCall = true; |
1171 | } |
1172 | |
1173 | if (opts::Verbosity >= 2 && TargetAddress == 0) { |
1174 | // We actually see calls to address 0 in presence of weak |
1175 | // symbols originating from libraries. This code is never meant |
1176 | // to be executed. |
1177 | BC.outs() << "BOLT-INFO: Function "<< *this |
1178 | << " has a call to address zero.\n"; |
1179 | } |
1180 | |
1181 | return BC.getOrCreateGlobalSymbol(Address: TargetAddress, Prefix: "FUNCat"); |
1182 | } |
1183 | |
1184 | void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size, |
1185 | uint64_t Offset) { |
1186 | auto &MIB = BC.MIB; |
1187 | uint64_t IndirectTarget = 0; |
1188 | IndirectBranchType Result = |
1189 | processIndirectBranch(Instruction, Size, Offset, TargetAddress&: IndirectTarget); |
1190 | switch (Result) { |
1191 | default: |
1192 | llvm_unreachable("unexpected result"); |
1193 | case IndirectBranchType::POSSIBLE_TAIL_CALL: { |
1194 | bool Result = MIB->convertJmpToTailCall(Inst&: Instruction); |
1195 | (void)Result; |
1196 | assert(Result); |
1197 | break; |
1198 | } |
1199 | case IndirectBranchType::POSSIBLE_JUMP_TABLE: |
1200 | case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE: |
1201 | case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH: |
1202 | if (opts::JumpTables == JTS_NONE) |
1203 | IsSimple = false; |
1204 | break; |
1205 | case IndirectBranchType::POSSIBLE_FIXED_BRANCH: { |
1206 | if (containsAddress(PC: IndirectTarget)) { |
1207 | const MCSymbol *TargetSymbol = getOrCreateLocalLabel(Address: IndirectTarget); |
1208 | Instruction.clear(); |
1209 | MIB->createUncondBranch(Inst&: Instruction, TBB: TargetSymbol, Ctx: BC.Ctx.get()); |
1210 | TakenBranches.emplace_back(Args&: Offset, Args: IndirectTarget - getAddress()); |
1211 | addEntryPointAtOffset(Offset: IndirectTarget - getAddress()); |
1212 | } else { |
1213 | MIB->convertJmpToTailCall(Inst&: Instruction); |
1214 | BC.addInterproceduralReference(Function: this, Address: IndirectTarget); |
1215 | } |
1216 | break; |
1217 | } |
1218 | case IndirectBranchType::UNKNOWN: |
1219 | // Keep processing. We'll do more checks and fixes in |
1220 | // postProcessIndirectBranches(). |
1221 | if (opts::Verbosity > 2) { |
1222 | outs() << "BOLT-WARNING: failed to match indirect branch, " |
1223 | << getPrintName() << " at 0x"<< Twine::utohexstr(Val: Offset) |
1224 | << " offset\n"; |
1225 | } |
1226 | UnknownIndirectBranchOffsets.emplace(args&: Offset); |
1227 | break; |
1228 | } |
1229 | } |
1230 | |
1231 | void BinaryFunction::handleAArch64IndirectCall(MCInst &Instruction, |
1232 | const uint64_t Offset) { |
1233 | auto &MIB = BC.MIB; |
1234 | const uint64_t AbsoluteInstrAddr = getAddress() + Offset; |
1235 | MCInst *TargetHiBits, *TargetLowBits; |
1236 | uint64_t TargetAddress, Count; |
1237 | Count = MIB->matchLinkerVeneer(Begin: Instructions.begin(), End: Instructions.end(), |
1238 | Address: AbsoluteInstrAddr, CurInst: Instruction, TargetHiBits, |
1239 | TargetLowBits, Target&: TargetAddress); |
1240 | if (Count) { |
1241 | MIB->addAnnotation(Inst&: Instruction, Name: "AArch64Veneer", Val: true); |
1242 | --Count; |
1243 | for (auto It = std::prev(x: Instructions.end()); Count != 0; |
1244 | It = std::prev(x: It), --Count) { |
1245 | MIB->addAnnotation(Inst&: It->second, Name: "AArch64Veneer", Val: true); |
1246 | } |
1247 | |
1248 | BC.addAdrpAddRelocAArch64(BF&: *this, LoadLowBits&: *TargetLowBits, LoadHiBits&: *TargetHiBits, |
1249 | Target: TargetAddress); |
1250 | } |
1251 | } |
1252 | |
1253 | std::optional<MCInst> |
1254 | BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const { |
1255 | assert(CurrentState == State::Empty && "Function should not be disassembled"); |
1256 | assert(Offset < MaxSize && "Invalid offset"); |
1257 | ErrorOr<ArrayRef<unsigned char>> FunctionData = getData(); |
1258 | assert(FunctionData && "Cannot get function as data"); |
1259 | MCInst Instr; |
1260 | uint64_t InstrSize = 0; |
1261 | const uint64_t InstrAddress = getAddress() + Offset; |
1262 | if (BC.DisAsm->getInstruction(Instr, Size&: InstrSize, Bytes: FunctionData->slice(N: Offset), |
1263 | Address: InstrAddress, CStream&: nulls())) |
1264 | return Instr; |
1265 | return std::nullopt; |
1266 | } |
1267 | |
1268 | Error BinaryFunction::disassemble() { |
1269 | NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs", |
1270 | "Build Binary Functions", opts::TimeBuild); |
1271 | ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData(); |
1272 | assert(ErrorOrFunctionData && "function data is not available"); |
1273 | ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData; |
1274 | assert(FunctionData.size() == getMaxSize() && |
1275 | "function size does not match raw data size"); |
1276 | |
1277 | auto &Ctx = BC.Ctx; |
1278 | auto &MIB = BC.MIB; |
1279 | |
1280 | BC.SymbolicDisAsm->setSymbolizer(MIB->createTargetSymbolizer(Function&: *this)); |
1281 | |
1282 | // Insert a label at the beginning of the function. This will be our first |
1283 | // basic block. |
1284 | Labels[0] = Ctx->createNamedTempSymbol(Name: "BB0"); |
1285 | |
1286 | // Map offsets in the function to a label that should always point to the |
1287 | // corresponding instruction. This is used for labels that shouldn't point to |
1288 | // the start of a basic block but always to a specific instruction. This is |
1289 | // used, for example, on RISC-V where %pcrel_lo relocations point to the |
1290 | // corresponding %pcrel_hi. |
1291 | LabelsMapType InstructionLabels; |
1292 | |
1293 | uint64_t Size = 0; // instruction size |
1294 | for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) { |
1295 | MCInst Instruction; |
1296 | const uint64_t AbsoluteInstrAddr = getAddress() + Offset; |
1297 | |
1298 | // Check for data inside code and ignore it |
1299 | if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) { |
1300 | Size = DataInCodeSize; |
1301 | continue; |
1302 | } |
1303 | |
1304 | if (!BC.SymbolicDisAsm->getInstruction(Instr&: Instruction, Size, |
1305 | Bytes: FunctionData.slice(N: Offset), |
1306 | Address: AbsoluteInstrAddr, CStream&: nulls())) { |
1307 | // Functions with "soft" boundaries, e.g. coming from assembly source, |
1308 | // can have 0-byte padding at the end. |
1309 | if (isZeroPaddingAt(Offset)) |
1310 | break; |
1311 | |
1312 | BC.errs() |
1313 | << "BOLT-WARNING: unable to disassemble instruction at offset 0x" |
1314 | << Twine::utohexstr(Val: Offset) << " (address 0x" |
1315 | << Twine::utohexstr(Val: AbsoluteInstrAddr) << ") in function "<< *this |
1316 | << '\n'; |
1317 | // Some AVX-512 instructions could not be disassembled at all. |
1318 | if (BC.HasRelocations && opts::TrapOnAVX512 && BC.isX86()) { |
1319 | setTrapOnEntry(); |
1320 | BC.TrappedFunctions.push_back(x: this); |
1321 | } else { |
1322 | setIgnored(); |
1323 | } |
1324 | |
1325 | break; |
1326 | } |
1327 | |
1328 | // Check integrity of LLVM assembler/disassembler. |
1329 | if (opts::CheckEncoding && !BC.MIB->isBranch(Inst: Instruction) && |
1330 | !BC.MIB->isCall(Inst: Instruction) && !BC.MIB->isNoop(Inst: Instruction)) { |
1331 | if (!BC.validateInstructionEncoding(Sequence: FunctionData.slice(N: Offset, M: Size))) { |
1332 | BC.errs() << "BOLT-WARNING: mismatching LLVM encoding detected in " |
1333 | << "function "<< *this << " for instruction :\n"; |
1334 | BC.printInstruction(OS&: BC.errs(), Instruction, Offset: AbsoluteInstrAddr); |
1335 | BC.errs() << '\n'; |
1336 | } |
1337 | |
1338 | // Verify that we've symbolized an operand if the instruction has a |
1339 | // relocation against it. |
1340 | if (getRelocationInRange(StartOffset: Offset, EndOffset: Offset + Size)) { |
1341 | bool HasSymbolicOp = false; |
1342 | for (MCOperand &Op : Instruction) { |
1343 | if (Op.isExpr()) { |
1344 | HasSymbolicOp = true; |
1345 | break; |
1346 | } |
1347 | } |
1348 | if (!HasSymbolicOp) |
1349 | return createFatalBOLTError( |
1350 | S: "expected symbolized operand for instruction at 0x"+ |
1351 | Twine::utohexstr(Val: AbsoluteInstrAddr)); |
1352 | } |
1353 | } |
1354 | |
1355 | // Special handling for AVX-512 instructions. |
1356 | if (MIB->hasEVEXEncoding(Inst: Instruction)) { |
1357 | if (BC.HasRelocations && opts::TrapOnAVX512) { |
1358 | setTrapOnEntry(); |
1359 | BC.TrappedFunctions.push_back(x: this); |
1360 | break; |
1361 | } |
1362 | |
1363 | if (!BC.validateInstructionEncoding(Sequence: FunctionData.slice(N: Offset, M: Size))) { |
1364 | BC.errs() << "BOLT-WARNING: internal assembler/disassembler error " |
1365 | "detected for AVX512 instruction:\n"; |
1366 | BC.printInstruction(OS&: BC.errs(), Instruction, Offset: AbsoluteInstrAddr); |
1367 | BC.errs() << " in function "<< *this << '\n'; |
1368 | setIgnored(); |
1369 | break; |
1370 | } |
1371 | } |
1372 | |
1373 | bool IsUnsupported = BC.MIB->isUnsupportedInstruction(Inst: Instruction); |
1374 | if (IsUnsupported) |
1375 | setIgnored(); |
1376 | |
1377 | if (MIB->isBranch(Inst: Instruction) || MIB->isCall(Inst: Instruction)) { |
1378 | uint64_t TargetAddress = 0; |
1379 | if (MIB->evaluateBranch(Inst: Instruction, Addr: AbsoluteInstrAddr, Size, |
1380 | Target&: TargetAddress)) { |
1381 | // Check if the target is within the same function. Otherwise it's |
1382 | // a call, possibly a tail call. |
1383 | // |
1384 | // If the target *is* the function address it could be either a branch |
1385 | // or a recursive call. |
1386 | bool IsCall = MIB->isCall(Inst: Instruction); |
1387 | const bool IsCondBranch = MIB->isConditionalBranch(Inst: Instruction); |
1388 | MCSymbol *TargetSymbol = nullptr; |
1389 | |
1390 | if (IsUnsupported) |
1391 | if (auto *TargetFunc = |
1392 | BC.getBinaryFunctionContainingAddress(Address: TargetAddress)) |
1393 | TargetFunc->setIgnored(); |
1394 | |
1395 | if (IsCall && TargetAddress == getAddress()) { |
1396 | // A recursive call. Calls to internal blocks are handled by |
1397 | // ValidateInternalCalls pass. |
1398 | TargetSymbol = getSymbol(); |
1399 | } |
1400 | |
1401 | if (!TargetSymbol) { |
1402 | // Create either local label or external symbol. |
1403 | if (containsAddress(PC: TargetAddress)) { |
1404 | TargetSymbol = getOrCreateLocalLabel(Address: TargetAddress); |
1405 | } else { |
1406 | if (TargetAddress == getAddress() + getSize() && |
1407 | TargetAddress < getAddress() + getMaxSize() && |
1408 | !(BC.isAArch64() && |
1409 | BC.handleAArch64Veneer(Address: TargetAddress, /*MatchOnly*/ true))) { |
1410 | // Result of __builtin_unreachable(). |
1411 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: jump past end detected at 0x" |
1412 | << Twine::utohexstr(AbsoluteInstrAddr) |
1413 | << " in function "<< *this |
1414 | << " : replacing with nop.\n"); |
1415 | BC.MIB->createNoop(Inst&: Instruction); |
1416 | if (IsCondBranch) { |
1417 | // Register branch offset for profile validation. |
1418 | IgnoredBranches.emplace_back(Args&: Offset, Args: Offset + Size); |
1419 | } |
1420 | goto add_instruction; |
1421 | } |
1422 | // May update Instruction and IsCall |
1423 | TargetSymbol = handleExternalReference(Instruction, Size, Offset, |
1424 | TargetAddress, IsCall); |
1425 | } |
1426 | } |
1427 | |
1428 | if (!IsCall) { |
1429 | // Add taken branch info. |
1430 | TakenBranches.emplace_back(Args&: Offset, Args: TargetAddress - getAddress()); |
1431 | } |
1432 | BC.MIB->replaceBranchTarget(Inst&: Instruction, TBB: TargetSymbol, Ctx: &*Ctx); |
1433 | |
1434 | // Mark CTC. |
1435 | if (IsCondBranch && IsCall) |
1436 | MIB->setConditionalTailCall(Inst&: Instruction, Dest: TargetAddress); |
1437 | } else { |
1438 | // Could not evaluate branch. Should be an indirect call or an |
1439 | // indirect branch. Bail out on the latter case. |
1440 | if (MIB->isIndirectBranch(Inst: Instruction)) |
1441 | handleIndirectBranch(Instruction, Size, Offset); |
1442 | // Indirect call. We only need to fix it if the operand is RIP-relative. |
1443 | if (IsSimple && MIB->hasPCRelOperand(Inst: Instruction)) { |
1444 | if (auto NewE = handleErrors( |
1445 | E: handlePCRelOperand(Instruction, Address: AbsoluteInstrAddr, Size), |
1446 | Hs: [&](const BOLTError &E) -> Error { |
1447 | if (E.isFatal()) |
1448 | return Error(std::make_unique<BOLTError>(args: std::move(E))); |
1449 | if (!E.getMessage().empty()) |
1450 | E.log(OS&: BC.errs()); |
1451 | return Error::success(); |
1452 | })) { |
1453 | return Error(std::move(NewE)); |
1454 | } |
1455 | } |
1456 | |
1457 | if (BC.isAArch64()) |
1458 | handleAArch64IndirectCall(Instruction, Offset); |
1459 | } |
1460 | } else if (BC.isRISCV()) { |
1461 | // Check if there's a relocation associated with this instruction. |
1462 | for (auto Itr = Relocations.lower_bound(x: Offset), |
1463 | ItrE = Relocations.lower_bound(x: Offset + Size); |
1464 | Itr != ItrE; ++Itr) { |
1465 | const Relocation &Relocation = Itr->second; |
1466 | MCSymbol *Symbol = Relocation.Symbol; |
1467 | |
1468 | if (Relocation::isInstructionReference(Type: Relocation.Type)) { |
1469 | uint64_t RefOffset = Relocation.Value - getAddress(); |
1470 | LabelsMapType::iterator LI = InstructionLabels.find(x: RefOffset); |
1471 | |
1472 | if (LI == InstructionLabels.end()) { |
1473 | Symbol = BC.Ctx->createNamedTempSymbol(); |
1474 | InstructionLabels.emplace(args&: RefOffset, args&: Symbol); |
1475 | } else { |
1476 | Symbol = LI->second; |
1477 | } |
1478 | } |
1479 | |
1480 | uint64_t Addend = Relocation.Addend; |
1481 | |
1482 | // For GOT relocations, create a reference against GOT entry ignoring |
1483 | // the relocation symbol. |
1484 | if (Relocation::isGOT(Type: Relocation.Type)) { |
1485 | assert(Relocation::isPCRelative(Relocation.Type) && |
1486 | "GOT relocation must be PC-relative on RISC-V"); |
1487 | Symbol = BC.registerNameAtAddress(Name: "__BOLT_got_zero", Address: 0, Size: 0, Alignment: 0); |
1488 | Addend = Relocation.Value + Relocation.Offset + getAddress(); |
1489 | } |
1490 | int64_t Value = Relocation.Value; |
1491 | const bool Result = BC.MIB->replaceImmWithSymbolRef( |
1492 | Inst&: Instruction, Symbol, Addend, Ctx: Ctx.get(), Value, RelType: Relocation.Type); |
1493 | (void)Result; |
1494 | assert(Result && "cannot replace immediate with relocation"); |
1495 | } |
1496 | } |
1497 | |
1498 | add_instruction: |
1499 | if (getDWARFLineTable()) { |
1500 | Instruction.setLoc(findDebugLineInformationForInstructionAt( |
1501 | Address: AbsoluteInstrAddr, Unit: getDWARFUnit(), LineTable: getDWARFLineTable())); |
1502 | } |
1503 | |
1504 | // Record offset of the instruction for profile matching. |
1505 | if (BC.keepOffsetForInstruction(Inst: Instruction)) |
1506 | MIB->setOffset(Inst&: Instruction, Offset: static_cast<uint32_t>(Offset)); |
1507 | |
1508 | if (BC.isX86() && BC.MIB->isNoop(Inst: Instruction)) { |
1509 | // NOTE: disassembly loses the correct size information for noops on x86. |
1510 | // E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only |
1511 | // 5 bytes. Preserve the size info using annotations. |
1512 | MIB->setSize(Inst&: Instruction, Size); |
1513 | } |
1514 | |
1515 | addInstruction(Offset, Instruction: std::move(Instruction)); |
1516 | } |
1517 | |
1518 | for (auto [Offset, Label] : InstructionLabels) { |
1519 | InstrMapType::iterator II = Instructions.find(x: Offset); |
1520 | assert(II != Instructions.end() && "reference to non-existing instruction"); |
1521 | |
1522 | BC.MIB->setInstLabel(Inst&: II->second, Label); |
1523 | } |
1524 | |
1525 | // Reset symbolizer for the disassembler. |
1526 | BC.SymbolicDisAsm->setSymbolizer(nullptr); |
1527 | |
1528 | if (uint64_t Offset = getFirstInstructionOffset()) |
1529 | Labels[Offset] = BC.Ctx->createNamedTempSymbol(); |
1530 | |
1531 | if (!IsSimple) { |
1532 | clearList(List&: Instructions); |
1533 | return createNonFatalBOLTError(S: ""); |
1534 | } |
1535 | |
1536 | updateState(State: State::Disassembled); |
1537 | |
1538 | return Error::success(); |
1539 | } |
1540 | |
1541 | MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) { |
1542 | assert(CurrentState == State::Disassembled && |
1543 | "Cannot register branch unless function is in disassembled state."); |
1544 | assert(containsAddress(Src) && containsAddress(Dst) && |
1545 | "Cannot register external branch."); |
1546 | MCSymbol *Target = getOrCreateLocalLabel(Address: Dst); |
1547 | TakenBranches.emplace_back(Args: Src - getAddress(), Args: Dst - getAddress()); |
1548 | return Target; |
1549 | } |
1550 | |
1551 | void BinaryFunction::analyzeInstructionForFuncReference(const MCInst &Inst) { |
1552 | for (unsigned OpNum = 0; OpNum < MCPlus::getNumPrimeOperands(Inst); ++OpNum) { |
1553 | const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum); |
1554 | if (!Symbol) |
1555 | continue; |
1556 | if (BinaryFunction *BF = BC.getFunctionForSymbol(Symbol)) |
1557 | BF->setHasAddressTaken(true); |
1558 | } |
1559 | } |
1560 | |
1561 | bool BinaryFunction::scanExternalRefs() { |
1562 | bool Success = true; |
1563 | bool DisassemblyFailed = false; |
1564 | |
1565 | // Ignore pseudo functions. |
1566 | if (isPseudo()) |
1567 | return Success; |
1568 | |
1569 | if (opts::NoScan) { |
1570 | clearList(List&: Relocations); |
1571 | clearList(List&: ExternallyReferencedOffsets); |
1572 | |
1573 | return false; |
1574 | } |
1575 | |
1576 | // List of external references for this function. |
1577 | std::vector<Relocation> FunctionRelocations; |
1578 | |
1579 | static BinaryContext::IndependentCodeEmitter Emitter = |
1580 | BC.createIndependentMCCodeEmitter(); |
1581 | |
1582 | ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = getData(); |
1583 | assert(ErrorOrFunctionData && "function data is not available"); |
1584 | ArrayRef<uint8_t> FunctionData = *ErrorOrFunctionData; |
1585 | assert(FunctionData.size() == getMaxSize() && |
1586 | "function size does not match raw data size"); |
1587 | |
1588 | BC.SymbolicDisAsm->setSymbolizer( |
1589 | BC.MIB->createTargetSymbolizer(Function&: *this, /*CreateSymbols*/ CreateNewSymbols: false)); |
1590 | |
1591 | // A list of patches for this function. |
1592 | using PatchTy = std::pair<uint64_t, MCInst>; |
1593 | std::vector<PatchTy> InstructionPatches; |
1594 | |
1595 | // Disassemble contents of the function. Detect code entry points and create |
1596 | // relocations for references to code that will be moved. |
1597 | uint64_t Size = 0; // instruction size |
1598 | MCInst Instruction; |
1599 | MCInst PrevInstruction; |
1600 | for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) { |
1601 | // Check for data inside code and ignore it |
1602 | if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(Offset)) { |
1603 | Size = DataInCodeSize; |
1604 | continue; |
1605 | } |
1606 | |
1607 | const uint64_t AbsoluteInstrAddr = getAddress() + Offset; |
1608 | PrevInstruction = Instruction; |
1609 | if (!BC.SymbolicDisAsm->getInstruction(Instr&: Instruction, Size, |
1610 | Bytes: FunctionData.slice(N: Offset), |
1611 | Address: AbsoluteInstrAddr, CStream&: nulls())) { |
1612 | if (opts::Verbosity >= 1 && !isZeroPaddingAt(Offset)) { |
1613 | BC.errs() |
1614 | << "BOLT-WARNING: unable to disassemble instruction at offset 0x" |
1615 | << Twine::utohexstr(Val: Offset) << " (address 0x" |
1616 | << Twine::utohexstr(Val: AbsoluteInstrAddr) << ") in function "<< *this |
1617 | << '\n'; |
1618 | } |
1619 | Success = false; |
1620 | DisassemblyFailed = true; |
1621 | break; |
1622 | } |
1623 | |
1624 | // Return true if we can skip handling the Target function reference. |
1625 | auto ignoreFunctionRef = [&](const BinaryFunction &Target) { |
1626 | if (&Target == this) |
1627 | return true; |
1628 | |
1629 | // Note that later we may decide not to emit Target function. In that |
1630 | // case, we conservatively create references that will be ignored or |
1631 | // resolved to the same function. |
1632 | if (!BC.shouldEmit(Function: Target)) |
1633 | return true; |
1634 | |
1635 | return false; |
1636 | }; |
1637 | |
1638 | // Return true if we can ignore reference to the symbol. |
1639 | auto ignoreReference = [&](const MCSymbol *TargetSymbol) { |
1640 | if (!TargetSymbol) |
1641 | return true; |
1642 | |
1643 | if (BC.forceSymbolRelocations(SymbolName: TargetSymbol->getName())) |
1644 | return false; |
1645 | |
1646 | BinaryFunction *TargetFunction = BC.getFunctionForSymbol(Symbol: TargetSymbol); |
1647 | if (!TargetFunction) |
1648 | return true; |
1649 | |
1650 | return ignoreFunctionRef(*TargetFunction); |
1651 | }; |
1652 | |
1653 | // Handle calls and branches separately as symbolization doesn't work for |
1654 | // them yet. |
1655 | MCSymbol *BranchTargetSymbol = nullptr; |
1656 | if (BC.MIB->isCall(Inst: Instruction) || BC.MIB->isBranch(Inst: Instruction)) { |
1657 | uint64_t TargetAddress = 0; |
1658 | BC.MIB->evaluateBranch(Inst: Instruction, Addr: AbsoluteInstrAddr, Size, |
1659 | Target&: TargetAddress); |
1660 | |
1661 | // Create an entry point at reference address if needed. |
1662 | BinaryFunction *TargetFunction = |
1663 | BC.getBinaryFunctionContainingAddress(Address: TargetAddress); |
1664 | |
1665 | if (!TargetFunction || ignoreFunctionRef(*TargetFunction)) |
1666 | continue; |
1667 | |
1668 | const uint64_t FunctionOffset = |
1669 | TargetAddress - TargetFunction->getAddress(); |
1670 | BranchTargetSymbol = |
1671 | FunctionOffset ? TargetFunction->addEntryPointAtOffset(Offset: FunctionOffset) |
1672 | : TargetFunction->getSymbol(); |
1673 | } |
1674 | |
1675 | // Can't find more references. Not creating relocations since we are not |
1676 | // moving code. |
1677 | if (!BC.HasRelocations) |
1678 | continue; |
1679 | |
1680 | if (BranchTargetSymbol) { |
1681 | BC.MIB->replaceBranchTarget(Inst&: Instruction, TBB: BranchTargetSymbol, |
1682 | Ctx: Emitter.LocalCtx.get()); |
1683 | } else { |
1684 | analyzeInstructionForFuncReference(Inst: Instruction); |
1685 | const bool NeedsPatch = llvm::any_of( |
1686 | Range: MCPlus::primeOperands(Inst&: Instruction), P: [&](const MCOperand &Op) { |
1687 | return Op.isExpr() && |
1688 | !ignoreReference(BC.MIB->getTargetSymbol(Expr: Op.getExpr())); |
1689 | }); |
1690 | if (!NeedsPatch) |
1691 | continue; |
1692 | } |
1693 | |
1694 | // For AArch64, we need to undo relaxation done by the linker if the target |
1695 | // of the instruction is a function that we plan to move. |
1696 | // |
1697 | // Linker relaxation is documented at: |
1698 | // https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst |
1699 | // under #relocation-optimization. |
1700 | if (const Relocation *Rel; |
1701 | BC.isAArch64() && (Rel = getRelocationAt(Offset))) { |
1702 | // NOP+ADR sequence can originate from either ADRP+ADD or ADRP+LDR. |
1703 | // In either case, we convert it into ADRP+ADD. |
1704 | if (BC.MIB->isADR(Inst: Instruction) && |
1705 | (Rel->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC || |
1706 | Rel->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC)) { |
1707 | if (!BC.MIB->isNoop(Inst: PrevInstruction)) { |
1708 | // In case of unexpected conversion from the linker, skip target |
1709 | // optimization. |
1710 | const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst: Instruction); |
1711 | BC.errs() << "BOLT-WARNING: cannot undo linker relaxation for " |
1712 | "instruction at 0x" |
1713 | << Twine::utohexstr(Val: AbsoluteInstrAddr) << " referencing " |
1714 | << Symbol->getName() << '\n'; |
1715 | if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol)) |
1716 | TargetBF->setIgnored(); |
1717 | continue; |
1718 | } |
1719 | |
1720 | InstructionListType AdrpAdd = |
1721 | BC.MIB->undoAdrpAddRelaxation(ADRInst: Instruction, Ctx: BC.Ctx.get()); |
1722 | assert(AdrpAdd.size() == 2 && "Two instructions expected"); |
1723 | LLVM_DEBUG({ |
1724 | dbgs() << "BOLT-DEBUG: linker relaxation undone for instruction " |
1725 | "at 0x" |
1726 | << Twine::utohexstr(AbsoluteInstrAddr) << '\n'; |
1727 | }); |
1728 | InstructionPatches.push_back(x: {AbsoluteInstrAddr - 4, AdrpAdd[0]}); |
1729 | InstructionPatches.push_back(x: {AbsoluteInstrAddr, AdrpAdd[1]}); |
1730 | continue; |
1731 | } |
1732 | |
1733 | // If ADR was emitted by the compiler/assembler to reference a nearby |
1734 | // local function, we cannot move away that function due to ADR address |
1735 | // span limitation. Hence, we skip the optimization. |
1736 | if (BC.MIB->isADR(Inst: Instruction) && |
1737 | Rel->Type == ELF::R_AARCH64_ADR_PREL_LO21) { |
1738 | BC.errs() << "BOLT-WARNING: unable to convert ADR that references " |
1739 | << Rel->Symbol->getName() |
1740 | << ". Will not optimize the target\n"; |
1741 | if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol: Rel->Symbol)) |
1742 | TargetBF->setIgnored(); |
1743 | continue; |
1744 | } |
1745 | |
1746 | // In the case of GOT load, ADRP+LDR can also be converted into ADRP+ADD. |
1747 | // When this happens, it's not always possible to properly symbolize ADRP |
1748 | // operand and we might have to adjust the operand based on the next |
1749 | // instruction. |
1750 | if (BC.MIB->isAddXri(Inst: Instruction) && |
1751 | Rel->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) { |
1752 | if (!BC.MIB->matchAdrpAddPair(Adrp: PrevInstruction, Add: Instruction)) { |
1753 | BC.errs() << "BOLT-ERROR: cannot find matching ADRP for relaxed LDR " |
1754 | "instruction at 0x" |
1755 | << Twine::utohexstr(Val: AbsoluteInstrAddr) << '\n'; |
1756 | exit(status: 1); |
1757 | } |
1758 | |
1759 | // Check if ADRP was already patched. If not, add a new patch for it. |
1760 | if (InstructionPatches.empty() || |
1761 | InstructionPatches.back().first != AbsoluteInstrAddr - 4) |
1762 | InstructionPatches.push_back( |
1763 | x: {AbsoluteInstrAddr - 4, PrevInstruction}); |
1764 | |
1765 | // Adjust the operand for ADRP from the patch. |
1766 | MCInst &ADRPInst = InstructionPatches.back().second; |
1767 | const MCSymbol *ADRPSymbol = BC.MIB->getTargetSymbol(Inst: ADRPInst); |
1768 | const MCSymbol *ADDSymbol = BC.MIB->getTargetSymbol(Inst: Instruction); |
1769 | if (ADRPSymbol != ADDSymbol) { |
1770 | const int64_t Addend = BC.MIB->getTargetAddend(Inst: Instruction); |
1771 | BC.MIB->setOperandToSymbolRef(Inst&: ADRPInst, /*OpNum*/ 1, Symbol: ADDSymbol, |
1772 | Addend, Ctx: BC.Ctx.get(), |
1773 | RelType: ELF::R_AARCH64_NONE); |
1774 | } |
1775 | } |
1776 | } |
1777 | |
1778 | // On AArch64, we use instruction patches for fixing references. We make an |
1779 | // exception for branch instructions since they require optional |
1780 | // relocations. |
1781 | if (BC.isAArch64()) { |
1782 | if (!BranchTargetSymbol) { |
1783 | LLVM_DEBUG(BC.printInstruction(dbgs(), Instruction, AbsoluteInstrAddr)); |
1784 | InstructionPatches.push_back(x: {AbsoluteInstrAddr, Instruction}); |
1785 | continue; |
1786 | } |
1787 | |
1788 | // Conditional tail calls require new relocation types that are currently |
1789 | // not supported. https://github.com/llvm/llvm-project/issues/138264 |
1790 | if (BC.MIB->isConditionalBranch(Inst: Instruction)) { |
1791 | if (BinaryFunction *TargetBF = |
1792 | BC.getFunctionForSymbol(Symbol: BranchTargetSymbol)) { |
1793 | TargetBF->setNeedsPatch(true); |
1794 | continue; |
1795 | } |
1796 | } |
1797 | } |
1798 | |
1799 | // Emit the instruction using temp emitter and generate relocations. |
1800 | SmallString<256> Code; |
1801 | SmallVector<MCFixup, 4> Fixups; |
1802 | Emitter.MCE->encodeInstruction(Inst: Instruction, CB&: Code, Fixups, STI: *BC.STI); |
1803 | |
1804 | // Create relocation for every fixup. |
1805 | for (const MCFixup &Fixup : Fixups) { |
1806 | std::optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, MAB: *BC.MAB); |
1807 | if (!Rel) { |
1808 | Success = false; |
1809 | continue; |
1810 | } |
1811 | |
1812 | if (ignoreReference(Rel->Symbol)) |
1813 | continue; |
1814 | |
1815 | if (Relocation::getSizeForType(Type: Rel->Type) < 4) { |
1816 | // If the instruction uses a short form, then we might not be able |
1817 | // to handle the rewrite without relaxation, and hence cannot reliably |
1818 | // create an external reference relocation. |
1819 | Success = false; |
1820 | continue; |
1821 | } |
1822 | |
1823 | if (BC.isAArch64()) { |
1824 | // Allow the relocation to be skipped in case of the overflow during the |
1825 | // relocation value encoding. |
1826 | Rel->setOptional(); |
1827 | |
1828 | if (!opts::CompactCodeModel) |
1829 | if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol: Rel->Symbol)) |
1830 | TargetBF->setNeedsPatch(true); |
1831 | } |
1832 | |
1833 | Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset; |
1834 | FunctionRelocations.push_back(x: *Rel); |
1835 | } |
1836 | |
1837 | if (!Success) |
1838 | break; |
1839 | } |
1840 | |
1841 | // Reset symbolizer for the disassembler. |
1842 | BC.SymbolicDisAsm->setSymbolizer(nullptr); |
1843 | |
1844 | // Add relocations unless disassembly failed for this function. |
1845 | if (!DisassemblyFailed) |
1846 | for (Relocation &Rel : FunctionRelocations) |
1847 | getOriginSection()->addPendingRelocation(Rel); |
1848 | |
1849 | // Add patches grouping them together. |
1850 | if (!InstructionPatches.empty()) { |
1851 | uint64_t PatchGroupAddress; |
1852 | InstructionListType PatchGroup; |
1853 | for (auto PI = InstructionPatches.begin(), PE = InstructionPatches.end(); |
1854 | PI != PE; ++PI) { |
1855 | auto &Patch = *PI; |
1856 | if (PatchGroup.empty()) |
1857 | PatchGroupAddress = Patch.first; |
1858 | PatchGroup.push_back(x: Patch.second); |
1859 | if (std::next(x: PI) == PE || std::next(x: PI)->first != Patch.first + 4) { |
1860 | BC.createInstructionPatch(Address: PatchGroupAddress, Instructions: PatchGroup); |
1861 | PatchGroup.clear(); |
1862 | } |
1863 | } |
1864 | } |
1865 | |
1866 | // Inform BinaryContext that this function symbols will not be defined and |
1867 | // relocations should not be created against them. |
1868 | if (BC.HasRelocations) { |
1869 | for (std::pair<const uint32_t, MCSymbol *> &LI : Labels) |
1870 | BC.UndefinedSymbols.insert(x: LI.second); |
1871 | for (MCSymbol *const EndLabel : FunctionEndLabels) |
1872 | if (EndLabel) |
1873 | BC.UndefinedSymbols.insert(x: EndLabel); |
1874 | } |
1875 | |
1876 | clearList(List&: Relocations); |
1877 | clearList(List&: ExternallyReferencedOffsets); |
1878 | |
1879 | if (Success && BC.HasRelocations) |
1880 | HasExternalRefRelocations = true; |
1881 | |
1882 | if (opts::Verbosity >= 1 && !Success) |
1883 | BC.outs() << "BOLT-INFO: failed to scan refs for "<< *this << '\n'; |
1884 | |
1885 | return Success; |
1886 | } |
1887 | |
1888 | void BinaryFunction::postProcessEntryPoints() { |
1889 | if (!isSimple()) |
1890 | return; |
1891 | |
1892 | for (auto &KV : Labels) { |
1893 | MCSymbol *Label = KV.second; |
1894 | if (!getSecondaryEntryPointSymbol(BBLabel: Label)) |
1895 | continue; |
1896 | |
1897 | // In non-relocation mode there's potentially an external undetectable |
1898 | // reference to the entry point and hence we cannot move this entry |
1899 | // point. Optimizing without moving could be difficult. |
1900 | // In aggregation, register any known entry points for CFG construction. |
1901 | if (!BC.HasRelocations && !opts::AggregateOnly) |
1902 | setSimple(false); |
1903 | |
1904 | const uint32_t Offset = KV.first; |
1905 | |
1906 | // If we are at Offset 0 and there is no instruction associated with it, |
1907 | // this means this is an empty function. Just ignore. If we find an |
1908 | // instruction at this offset, this entry point is valid. |
1909 | if (!Offset || getInstructionAtOffset(Offset)) |
1910 | continue; |
1911 | |
1912 | // On AArch64 there are legitimate reasons to have references past the |
1913 | // end of the function, e.g. jump tables. |
1914 | if (BC.isAArch64() && Offset == getSize()) |
1915 | continue; |
1916 | |
1917 | // If we have grabbed a wrong code label which actually points to some |
1918 | // constant island inside the function, ignore this label and remove it |
1919 | // from the secondary entry point map. |
1920 | if (isStartOfConstantIsland(Offset)) { |
1921 | BC.SymbolToFunctionMap.erase(x: Label); |
1922 | removeSymbolFromSecondaryEntryPointMap(Label); |
1923 | continue; |
1924 | } |
1925 | |
1926 | BC.errs() << "BOLT-WARNING: reference in the middle of instruction " |
1927 | "detected in function " |
1928 | << *this << " at offset 0x"<< Twine::utohexstr(Val: Offset) << '\n'; |
1929 | if (BC.HasRelocations) |
1930 | setIgnored(); |
1931 | setSimple(false); |
1932 | return; |
1933 | } |
1934 | } |
1935 | |
1936 | void BinaryFunction::postProcessJumpTables() { |
1937 | // Create labels for all entries. |
1938 | for (auto &JTI : JumpTables) { |
1939 | JumpTable &JT = *JTI.second; |
1940 | if (JT.Type == JumpTable::JTT_PIC && opts::JumpTables == JTS_BASIC) { |
1941 | opts::JumpTables = JTS_MOVE; |
1942 | BC.outs() << "BOLT-INFO: forcing -jump-tables=move as PIC jump table was " |
1943 | "detected in function " |
1944 | << *this << '\n'; |
1945 | } |
1946 | const uint64_t BDSize = |
1947 | BC.getBinaryDataAtAddress(Address: JT.getAddress())->getSize(); |
1948 | if (!BDSize) { |
1949 | BC.setBinaryDataSize(Address: JT.getAddress(), Size: JT.getSize()); |
1950 | } else { |
1951 | assert(BDSize >= JT.getSize() && |
1952 | "jump table cannot be larger than the containing object"); |
1953 | } |
1954 | if (!JT.Entries.empty()) |
1955 | continue; |
1956 | |
1957 | bool HasOneParent = (JT.Parents.size() == 1); |
1958 | for (uint64_t EntryAddress : JT.EntriesAsAddress) { |
1959 | // builtin_unreachable does not belong to any function |
1960 | // Need to handle separately |
1961 | bool IsBuiltinUnreachable = |
1962 | llvm::any_of(Range&: JT.Parents, P: [&](const BinaryFunction *Parent) { |
1963 | return EntryAddress == Parent->getAddress() + Parent->getSize(); |
1964 | }); |
1965 | if (IsBuiltinUnreachable) { |
1966 | MCSymbol *Label = getOrCreateLocalLabel(Address: EntryAddress, CreatePastEnd: true); |
1967 | JT.Entries.push_back(x: Label); |
1968 | continue; |
1969 | } |
1970 | // Create a local label for targets that cannot be reached by other |
1971 | // fragments. Otherwise, create a secondary entry point in the target |
1972 | // function. |
1973 | BinaryFunction *TargetBF = |
1974 | BC.getBinaryFunctionContainingAddress(Address: EntryAddress); |
1975 | MCSymbol *Label; |
1976 | if (HasOneParent && TargetBF == this) { |
1977 | Label = getOrCreateLocalLabel(Address: EntryAddress, CreatePastEnd: true); |
1978 | } else { |
1979 | const uint64_t Offset = EntryAddress - TargetBF->getAddress(); |
1980 | Label = Offset ? TargetBF->addEntryPointAtOffset(Offset) |
1981 | : TargetBF->getSymbol(); |
1982 | } |
1983 | JT.Entries.push_back(x: Label); |
1984 | } |
1985 | } |
1986 | |
1987 | // Add TakenBranches from JumpTables. |
1988 | // |
1989 | // We want to do it after initial processing since we don't know jump tables' |
1990 | // boundaries until we process them all. |
1991 | for (auto &JTSite : JTSites) { |
1992 | const uint64_t JTSiteOffset = JTSite.first; |
1993 | const uint64_t JTAddress = JTSite.second; |
1994 | const JumpTable *JT = getJumpTableContainingAddress(Address: JTAddress); |
1995 | assert(JT && "cannot find jump table for address"); |
1996 | |
1997 | uint64_t EntryOffset = JTAddress - JT->getAddress(); |
1998 | while (EntryOffset < JT->getSize()) { |
1999 | uint64_t EntryAddress = JT->EntriesAsAddress[EntryOffset / JT->EntrySize]; |
2000 | uint64_t TargetOffset = EntryAddress - getAddress(); |
2001 | if (TargetOffset < getSize()) { |
2002 | TakenBranches.emplace_back(Args: JTSiteOffset, Args&: TargetOffset); |
2003 | |
2004 | if (opts::StrictMode) |
2005 | registerReferencedOffset(Offset: TargetOffset); |
2006 | } |
2007 | |
2008 | EntryOffset += JT->EntrySize; |
2009 | |
2010 | // A label at the next entry means the end of this jump table. |
2011 | if (JT->Labels.count(x: EntryOffset)) |
2012 | break; |
2013 | } |
2014 | } |
2015 | clearList(List&: JTSites); |
2016 | |
2017 | // Conservatively populate all possible destinations for unknown indirect |
2018 | // branches. |
2019 | if (opts::StrictMode && hasInternalReference()) { |
2020 | for (uint64_t Offset : UnknownIndirectBranchOffsets) { |
2021 | for (uint64_t PossibleDestination : ExternallyReferencedOffsets) { |
2022 | // Ignore __builtin_unreachable(). |
2023 | if (PossibleDestination == getSize()) |
2024 | continue; |
2025 | TakenBranches.emplace_back(Args&: Offset, Args&: PossibleDestination); |
2026 | } |
2027 | } |
2028 | } |
2029 | } |
2030 | |
2031 | bool BinaryFunction::validateExternallyReferencedOffsets() { |
2032 | SmallPtrSet<MCSymbol *, 4> JTTargets; |
2033 | for (const JumpTable *JT : llvm::make_second_range(c&: JumpTables)) |
2034 | JTTargets.insert_range(R: JT->Entries); |
2035 | |
2036 | bool HasUnclaimedReference = false; |
2037 | for (uint64_t Destination : ExternallyReferencedOffsets) { |
2038 | // Ignore __builtin_unreachable(). |
2039 | if (Destination == getSize()) |
2040 | continue; |
2041 | // Ignore constant islands |
2042 | if (isInConstantIsland(Address: Destination + getAddress())) |
2043 | continue; |
2044 | |
2045 | if (BinaryBasicBlock *BB = getBasicBlockAtOffset(Offset: Destination)) { |
2046 | // Check if the externally referenced offset is a recognized jump table |
2047 | // target. |
2048 | if (JTTargets.contains(Ptr: BB->getLabel())) |
2049 | continue; |
2050 | |
2051 | if (opts::Verbosity >= 1) { |
2052 | BC.errs() << "BOLT-WARNING: unclaimed data to code reference (possibly " |
2053 | << "an unrecognized jump table entry) to "<< BB->getName() |
2054 | << " in "<< *this << "\n"; |
2055 | } |
2056 | auto L = BC.scopeLock(); |
2057 | addEntryPoint(BB: *BB); |
2058 | } else { |
2059 | BC.errs() << "BOLT-WARNING: unknown data to code reference to offset " |
2060 | << Twine::utohexstr(Val: Destination) << " in "<< *this << "\n"; |
2061 | setIgnored(); |
2062 | } |
2063 | HasUnclaimedReference = true; |
2064 | } |
2065 | return !HasUnclaimedReference; |
2066 | } |
2067 | |
2068 | bool BinaryFunction::postProcessIndirectBranches( |
2069 | MCPlusBuilder::AllocatorIdTy AllocId) { |
2070 | auto addUnknownControlFlow = [&](BinaryBasicBlock &BB) { |
2071 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding unknown control flow in "<< *this |
2072 | << " for "<< BB.getName() << "\n"); |
2073 | HasUnknownControlFlow = true; |
2074 | BB.removeAllSuccessors(); |
2075 | for (uint64_t PossibleDestination : ExternallyReferencedOffsets) |
2076 | if (BinaryBasicBlock *SuccBB = getBasicBlockAtOffset(Offset: PossibleDestination)) |
2077 | BB.addSuccessor(Succ: SuccBB); |
2078 | }; |
2079 | |
2080 | uint64_t NumIndirectJumps = 0; |
2081 | MCInst *LastIndirectJump = nullptr; |
2082 | BinaryBasicBlock *LastIndirectJumpBB = nullptr; |
2083 | uint64_t LastJT = 0; |
2084 | uint16_t LastJTIndexReg = BC.MIB->getNoRegister(); |
2085 | for (BinaryBasicBlock &BB : blocks()) { |
2086 | for (BinaryBasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) { |
2087 | MCInst &Instr = *II; |
2088 | if (!BC.MIB->isIndirectBranch(Inst: Instr)) |
2089 | continue; |
2090 | |
2091 | // If there's an indirect branch in a single-block function - |
2092 | // it must be a tail call. |
2093 | if (BasicBlocks.size() == 1) { |
2094 | BC.MIB->convertJmpToTailCall(Inst&: Instr); |
2095 | return true; |
2096 | } |
2097 | |
2098 | ++NumIndirectJumps; |
2099 | |
2100 | if (opts::StrictMode && !hasInternalReference()) { |
2101 | BC.MIB->convertJmpToTailCall(Inst&: Instr); |
2102 | break; |
2103 | } |
2104 | |
2105 | // Validate the tail call or jump table assumptions now that we know |
2106 | // basic block boundaries. |
2107 | if (BC.MIB->isTailCall(Inst: Instr) || BC.MIB->getJumpTable(Inst: Instr)) { |
2108 | const unsigned PtrSize = BC.AsmInfo->getCodePointerSize(); |
2109 | MCInst *MemLocInstr; |
2110 | unsigned BaseRegNum, IndexRegNum; |
2111 | int64_t DispValue; |
2112 | const MCExpr *DispExpr; |
2113 | MCInst *PCRelBaseInstr; |
2114 | MCInst *FixedEntryLoadInstr; |
2115 | IndirectBranchType Type = BC.MIB->analyzeIndirectBranch( |
2116 | Instruction&: Instr, Begin: BB.begin(), End: II, PtrSize, MemLocInstr, BaseRegNum, |
2117 | IndexRegNum, DispValue, DispExpr, PCRelBaseOut&: PCRelBaseInstr, |
2118 | FixedEntryLoadInst&: FixedEntryLoadInstr); |
2119 | if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr) |
2120 | continue; |
2121 | |
2122 | if (!opts::StrictMode) |
2123 | return false; |
2124 | |
2125 | if (BC.MIB->isTailCall(Inst: Instr)) { |
2126 | BC.MIB->convertTailCallToJmp(Inst&: Instr); |
2127 | } else { |
2128 | LastIndirectJump = &Instr; |
2129 | LastIndirectJumpBB = &BB; |
2130 | LastJT = BC.MIB->getJumpTable(Inst: Instr); |
2131 | LastJTIndexReg = BC.MIB->getJumpTableIndexReg(Inst: Instr); |
2132 | BC.MIB->unsetJumpTable(Inst&: Instr); |
2133 | |
2134 | JumpTable *JT = BC.getJumpTableContainingAddress(Address: LastJT); |
2135 | if (JT->Type == JumpTable::JTT_NORMAL) { |
2136 | // Invalidating the jump table may also invalidate other jump table |
2137 | // boundaries. Until we have/need a support for this, mark the |
2138 | // function as non-simple. |
2139 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejected jump table reference" |
2140 | << JT->getName() << " in "<< *this << '\n'); |
2141 | return false; |
2142 | } |
2143 | } |
2144 | |
2145 | addUnknownControlFlow(BB); |
2146 | continue; |
2147 | } |
2148 | |
2149 | // If this block contains an epilogue code and has an indirect branch, |
2150 | // then most likely it's a tail call. Otherwise, we cannot tell for sure |
2151 | // what it is and conservatively reject the function's CFG. |
2152 | bool IsEpilogue = llvm::any_of(Range&: BB, P: [&](const MCInst &Instr) { |
2153 | return BC.MIB->isLeave(Inst: Instr) || BC.MIB->isPop(Inst: Instr); |
2154 | }); |
2155 | if (IsEpilogue) { |
2156 | BC.MIB->convertJmpToTailCall(Inst&: Instr); |
2157 | BB.removeAllSuccessors(); |
2158 | continue; |
2159 | } |
2160 | |
2161 | if (opts::Verbosity >= 2) { |
2162 | BC.outs() << "BOLT-INFO: rejected potential indirect tail call in " |
2163 | << "function "<< *this << " in basic block "<< BB.getName() |
2164 | << ".\n"; |
2165 | LLVM_DEBUG(BC.printInstructions(dbgs(), BB.begin(), BB.end(), |
2166 | BB.getOffset(), this, true)); |
2167 | } |
2168 | |
2169 | if (!opts::StrictMode) |
2170 | return false; |
2171 | |
2172 | addUnknownControlFlow(BB); |
2173 | } |
2174 | } |
2175 | |
2176 | if (HasInternalLabelReference) |
2177 | return false; |
2178 | |
2179 | // If there's only one jump table, and one indirect jump, and no other |
2180 | // references, then we should be able to derive the jump table even if we |
2181 | // fail to match the pattern. |
2182 | if (HasUnknownControlFlow && NumIndirectJumps == 1 && |
2183 | JumpTables.size() == 1 && LastIndirectJump && |
2184 | !BC.getJumpTableContainingAddress(Address: LastJT)->IsSplit) { |
2185 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: unsetting unknown control flow in " |
2186 | << *this << '\n'); |
2187 | BC.MIB->setJumpTable(Inst&: *LastIndirectJump, Value: LastJT, IndexReg: LastJTIndexReg, AllocId); |
2188 | HasUnknownControlFlow = false; |
2189 | |
2190 | LastIndirectJumpBB->updateJumpTableSuccessors(); |
2191 | } |
2192 | |
2193 | // Validate that all data references to function offsets are claimed by |
2194 | // recognized jump tables. Register externally referenced blocks as entry |
2195 | // points. |
2196 | if (!opts::StrictMode && hasInternalReference()) { |
2197 | if (!validateExternallyReferencedOffsets()) |
2198 | return false; |
2199 | } |
2200 | |
2201 | if (HasUnknownControlFlow && !BC.HasRelocations) |
2202 | return false; |
2203 | |
2204 | return true; |
2205 | } |
2206 | |
2207 | void BinaryFunction::recomputeLandingPads() { |
2208 | updateBBIndices(StartIndex: 0); |
2209 | |
2210 | for (BinaryBasicBlock *BB : BasicBlocks) { |
2211 | BB->LandingPads.clear(); |
2212 | BB->Throwers.clear(); |
2213 | } |
2214 | |
2215 | for (BinaryBasicBlock *BB : BasicBlocks) { |
2216 | std::unordered_set<const BinaryBasicBlock *> BBLandingPads; |
2217 | for (MCInst &Instr : *BB) { |
2218 | if (!BC.MIB->isInvoke(Inst: Instr)) |
2219 | continue; |
2220 | |
2221 | const std::optional<MCPlus::MCLandingPad> EHInfo = |
2222 | BC.MIB->getEHInfo(Inst: Instr); |
2223 | if (!EHInfo || !EHInfo->first) |
2224 | continue; |
2225 | |
2226 | BinaryBasicBlock *LPBlock = getBasicBlockForLabel(Label: EHInfo->first); |
2227 | if (!BBLandingPads.count(x: LPBlock)) { |
2228 | BBLandingPads.insert(x: LPBlock); |
2229 | BB->LandingPads.emplace_back(Args&: LPBlock); |
2230 | LPBlock->Throwers.emplace_back(Args&: BB); |
2231 | } |
2232 | } |
2233 | } |
2234 | } |
2235 | |
2236 | Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { |
2237 | auto &MIB = BC.MIB; |
2238 | |
2239 | if (!isSimple()) { |
2240 | assert(!BC.HasRelocations && |
2241 | "cannot process file with non-simple function in relocs mode"); |
2242 | return createNonFatalBOLTError(S: ""); |
2243 | } |
2244 | |
2245 | if (CurrentState != State::Disassembled) |
2246 | return createNonFatalBOLTError(S: ""); |
2247 | |
2248 | assert(BasicBlocks.empty() && "basic block list should be empty"); |
2249 | assert((Labels.find(getFirstInstructionOffset()) != Labels.end()) && |
2250 | "first instruction should always have a label"); |
2251 | |
2252 | // Create basic blocks in the original layout order: |
2253 | // |
2254 | // * Every instruction with associated label marks |
2255 | // the beginning of a basic block. |
2256 | // * Conditional instruction marks the end of a basic block, |
2257 | // except when the following instruction is an |
2258 | // unconditional branch, and the unconditional branch is not |
2259 | // a destination of another branch. In the latter case, the |
2260 | // basic block will consist of a single unconditional branch |
2261 | // (missed "double-jump" optimization). |
2262 | // |
2263 | // Created basic blocks are sorted in layout order since they are |
2264 | // created in the same order as instructions, and instructions are |
2265 | // sorted by offsets. |
2266 | BinaryBasicBlock *InsertBB = nullptr; |
2267 | BinaryBasicBlock *PrevBB = nullptr; |
2268 | bool IsLastInstrNop = false; |
2269 | // Offset of the last non-nop instruction. |
2270 | uint64_t LastInstrOffset = 0; |
2271 | |
2272 | auto addCFIPlaceholders = [this](uint64_t CFIOffset, |
2273 | BinaryBasicBlock *InsertBB) { |
2274 | for (auto FI = OffsetToCFI.lower_bound(x: CFIOffset), |
2275 | FE = OffsetToCFI.upper_bound(x: CFIOffset); |
2276 | FI != FE; ++FI) { |
2277 | addCFIPseudo(BB: InsertBB, Pos: InsertBB->end(), Offset: FI->second); |
2278 | } |
2279 | }; |
2280 | |
2281 | // For profiling purposes we need to save the offset of the last instruction |
2282 | // in the basic block. |
2283 | // NOTE: nops always have an Offset annotation. Annotate the last non-nop as |
2284 | // older profiles ignored nops. |
2285 | auto updateOffset = [&](uint64_t Offset) { |
2286 | assert(PrevBB && PrevBB != InsertBB && "invalid previous block"); |
2287 | MCInst *LastNonNop = nullptr; |
2288 | for (BinaryBasicBlock::reverse_iterator RII = PrevBB->getLastNonPseudo(), |
2289 | E = PrevBB->rend(); |
2290 | RII != E; ++RII) { |
2291 | if (!BC.MIB->isPseudo(Inst: *RII) && !BC.MIB->isNoop(Inst: *RII)) { |
2292 | LastNonNop = &*RII; |
2293 | break; |
2294 | } |
2295 | } |
2296 | if (LastNonNop && !MIB->getOffset(Inst: *LastNonNop)) |
2297 | MIB->setOffset(Inst&: *LastNonNop, Offset: static_cast<uint32_t>(Offset)); |
2298 | }; |
2299 | |
2300 | for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) { |
2301 | const uint32_t Offset = I->first; |
2302 | MCInst &Instr = I->second; |
2303 | |
2304 | auto LI = Labels.find(x: Offset); |
2305 | if (LI != Labels.end()) { |
2306 | // Always create new BB at branch destination. |
2307 | PrevBB = InsertBB ? InsertBB : PrevBB; |
2308 | InsertBB = addBasicBlockAt(Offset: LI->first, Label: LI->second); |
2309 | if (opts::PreserveBlocksAlignment && IsLastInstrNop) |
2310 | InsertBB->setDerivedAlignment(); |
2311 | |
2312 | if (PrevBB) |
2313 | updateOffset(LastInstrOffset); |
2314 | } |
2315 | |
2316 | // Mark all nops with Offset for profile tracking purposes. |
2317 | if (MIB->isNoop(Inst: Instr) && !MIB->getOffset(Inst: Instr)) { |
2318 | // If "Offset" annotation is not present, set it and mark the nop for |
2319 | // deletion. |
2320 | MIB->setOffset(Inst&: Instr, Offset: static_cast<uint32_t>(Offset)); |
2321 | // Annotate ordinary nops, so we can safely delete them if required. |
2322 | MIB->addAnnotation(Inst&: Instr, Name: "NOP", Val: static_cast<uint32_t>(1), AllocatorId); |
2323 | } |
2324 | |
2325 | if (!InsertBB) { |
2326 | // It must be a fallthrough or unreachable code. Create a new block unless |
2327 | // we see an unconditional branch following a conditional one. The latter |
2328 | // should not be a conditional tail call. |
2329 | assert(PrevBB && "no previous basic block for a fall through"); |
2330 | MCInst *PrevInstr = PrevBB->getLastNonPseudoInstr(); |
2331 | assert(PrevInstr && "no previous instruction for a fall through"); |
2332 | if (MIB->isUnconditionalBranch(Inst: Instr) && |
2333 | !MIB->isIndirectBranch(Inst: *PrevInstr) && |
2334 | !MIB->isUnconditionalBranch(Inst: *PrevInstr) && |
2335 | !MIB->getConditionalTailCall(Inst: *PrevInstr) && |
2336 | !MIB->isReturn(Inst: *PrevInstr)) { |
2337 | // Temporarily restore inserter basic block. |
2338 | InsertBB = PrevBB; |
2339 | } else { |
2340 | MCSymbol *Label; |
2341 | { |
2342 | auto L = BC.scopeLock(); |
2343 | Label = BC.Ctx->createNamedTempSymbol(Name: "FT"); |
2344 | } |
2345 | InsertBB = addBasicBlockAt(Offset, Label); |
2346 | if (opts::PreserveBlocksAlignment && IsLastInstrNop) |
2347 | InsertBB->setDerivedAlignment(); |
2348 | updateOffset(LastInstrOffset); |
2349 | } |
2350 | } |
2351 | if (Offset == getFirstInstructionOffset()) { |
2352 | // Add associated CFI pseudos in the first offset |
2353 | addCFIPlaceholders(Offset, InsertBB); |
2354 | } |
2355 | |
2356 | const bool IsBlockEnd = MIB->isTerminator(Inst: Instr); |
2357 | IsLastInstrNop = MIB->isNoop(Inst: Instr); |
2358 | if (!IsLastInstrNop) |
2359 | LastInstrOffset = Offset; |
2360 | InsertBB->addInstruction(Inst: std::move(Instr)); |
2361 | |
2362 | // Add associated CFI instrs. We always add the CFI instruction that is |
2363 | // located immediately after this instruction, since the next CFI |
2364 | // instruction reflects the change in state caused by this instruction. |
2365 | auto NextInstr = std::next(x: I); |
2366 | uint64_t CFIOffset; |
2367 | if (NextInstr != E) |
2368 | CFIOffset = NextInstr->first; |
2369 | else |
2370 | CFIOffset = getSize(); |
2371 | |
2372 | // Note: this potentially invalidates instruction pointers/iterators. |
2373 | addCFIPlaceholders(CFIOffset, InsertBB); |
2374 | |
2375 | if (IsBlockEnd) { |
2376 | PrevBB = InsertBB; |
2377 | InsertBB = nullptr; |
2378 | } |
2379 | } |
2380 | |
2381 | if (BasicBlocks.empty()) { |
2382 | setSimple(false); |
2383 | return createNonFatalBOLTError(S: ""); |
2384 | } |
2385 | |
2386 | // Intermediate dump. |
2387 | LLVM_DEBUG(print(dbgs(), "after creating basic blocks")); |
2388 | |
2389 | // TODO: handle properly calls to no-return functions, |
2390 | // e.g. exit(3), etc. Otherwise we'll see a false fall-through |
2391 | // blocks. |
2392 | |
2393 | // Remove duplicates branches. We can get a bunch of them from jump tables. |
2394 | // Without doing jump table value profiling we don't have a use for extra |
2395 | // (duplicate) branches. |
2396 | llvm::sort(C&: TakenBranches); |
2397 | auto NewEnd = llvm::unique(R&: TakenBranches); |
2398 | TakenBranches.erase(CS: NewEnd, CE: TakenBranches.end()); |
2399 | |
2400 | for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) { |
2401 | LLVM_DEBUG(dbgs() << "registering branch [0x" |
2402 | << Twine::utohexstr(Branch.first) << "] -> [0x" |
2403 | << Twine::utohexstr(Branch.second) << "]\n"); |
2404 | BinaryBasicBlock *FromBB = getBasicBlockContainingOffset(Offset: Branch.first); |
2405 | BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Offset: Branch.second); |
2406 | if (!FromBB || !ToBB) { |
2407 | if (!FromBB) |
2408 | BC.errs() << "BOLT-ERROR: cannot find BB containing the branch.\n"; |
2409 | if (!ToBB) |
2410 | BC.errs() |
2411 | << "BOLT-ERROR: cannot find BB containing branch destination.\n"; |
2412 | return createFatalBOLTError(S: BC.generateBugReportMessage( |
2413 | Message: "disassembly failed - inconsistent branch found.", Function: *this)); |
2414 | } |
2415 | |
2416 | FromBB->addSuccessor(Succ: ToBB); |
2417 | } |
2418 | |
2419 | // Add fall-through branches. |
2420 | PrevBB = nullptr; |
2421 | bool IsPrevFT = false; // Is previous block a fall-through. |
2422 | for (BinaryBasicBlock *BB : BasicBlocks) { |
2423 | if (IsPrevFT) |
2424 | PrevBB->addSuccessor(Succ: BB); |
2425 | |
2426 | if (BB->empty()) { |
2427 | IsPrevFT = true; |
2428 | PrevBB = BB; |
2429 | continue; |
2430 | } |
2431 | |
2432 | MCInst *LastInstr = BB->getLastNonPseudoInstr(); |
2433 | assert(LastInstr && |
2434 | "should have non-pseudo instruction in non-empty block"); |
2435 | |
2436 | if (BB->succ_size() == 0) { |
2437 | // Since there's no existing successors, we know the last instruction is |
2438 | // not a conditional branch. Thus if it's a terminator, it shouldn't be a |
2439 | // fall-through. |
2440 | // |
2441 | // Conditional tail call is a special case since we don't add a taken |
2442 | // branch successor for it. |
2443 | IsPrevFT = !MIB->isTerminator(Inst: *LastInstr) || |
2444 | MIB->getConditionalTailCall(Inst: *LastInstr); |
2445 | } else if (BB->succ_size() == 1) { |
2446 | IsPrevFT = MIB->isConditionalBranch(Inst: *LastInstr); |
2447 | } else { |
2448 | IsPrevFT = false; |
2449 | } |
2450 | |
2451 | PrevBB = BB; |
2452 | } |
2453 | |
2454 | // Assign landing pads and throwers info. |
2455 | recomputeLandingPads(); |
2456 | |
2457 | // Assign CFI information to each BB entry. |
2458 | annotateCFIState(); |
2459 | |
2460 | // Annotate invoke instructions with GNU_args_size data. |
2461 | propagateGnuArgsSizeInfo(AllocId: AllocatorId); |
2462 | |
2463 | // Set the basic block layout to the original order and set end offsets. |
2464 | PrevBB = nullptr; |
2465 | for (BinaryBasicBlock *BB : BasicBlocks) { |
2466 | Layout.addBasicBlock(BB); |
2467 | if (PrevBB) |
2468 | PrevBB->setEndOffset(BB->getOffset()); |
2469 | PrevBB = BB; |
2470 | } |
2471 | PrevBB->setEndOffset(getSize()); |
2472 | |
2473 | Layout.updateLayoutIndices(); |
2474 | |
2475 | normalizeCFIState(); |
2476 | |
2477 | // Clean-up memory taken by intermediate structures. |
2478 | // |
2479 | // NB: don't clear Labels list as we may need them if we mark the function |
2480 | // as non-simple later in the process of discovering extra entry points. |
2481 | clearList(List&: Instructions); |
2482 | clearList(List&: OffsetToCFI); |
2483 | clearList(List&: TakenBranches); |
2484 | |
2485 | // Update the state. |
2486 | CurrentState = State::CFG; |
2487 | |
2488 | // Make any necessary adjustments for indirect branches. |
2489 | if (!postProcessIndirectBranches(AllocId: AllocatorId)) { |
2490 | if (opts::Verbosity) { |
2491 | BC.errs() << "BOLT-WARNING: failed to post-process indirect branches for " |
2492 | << *this << '\n'; |
2493 | } |
2494 | |
2495 | if (BC.isAArch64()) |
2496 | PreserveNops = BC.HasRelocations; |
2497 | |
2498 | // In relocation mode we want to keep processing the function but avoid |
2499 | // optimizing it. |
2500 | setSimple(false); |
2501 | } |
2502 | |
2503 | clearList(List&: ExternallyReferencedOffsets); |
2504 | clearList(List&: UnknownIndirectBranchOffsets); |
2505 | |
2506 | return Error::success(); |
2507 | } |
2508 | |
2509 | void BinaryFunction::postProcessCFG() { |
2510 | if (isSimple() && !BasicBlocks.empty()) { |
2511 | // Convert conditional tail call branches to conditional branches that jump |
2512 | // to a tail call. |
2513 | removeConditionalTailCalls(); |
2514 | |
2515 | postProcessProfile(); |
2516 | |
2517 | // Eliminate inconsistencies between branch instructions and CFG. |
2518 | postProcessBranches(); |
2519 | } |
2520 | |
2521 | // The final cleanup of intermediate structures. |
2522 | clearList(List&: IgnoredBranches); |
2523 | |
2524 | // Remove "Offset" annotations, unless we need an address-translation table |
2525 | // later. This has no cost, since annotations are allocated by a bumpptr |
2526 | // allocator and won't be released anyway until late in the pipeline. |
2527 | if (!requiresAddressTranslation() && !opts::Instrument) { |
2528 | for (BinaryBasicBlock &BB : blocks()) |
2529 | for (MCInst &Inst : BB) |
2530 | BC.MIB->clearOffset(Inst); |
2531 | } |
2532 | |
2533 | assert((!isSimple() || validateCFG()) && |
2534 | "invalid CFG detected after post-processing"); |
2535 | } |
2536 | |
2537 | void BinaryFunction::removeTagsFromProfile() { |
2538 | for (BinaryBasicBlock *BB : BasicBlocks) { |
2539 | if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) |
2540 | BB->ExecutionCount = 0; |
2541 | for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { |
2542 | if (BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE && |
2543 | BI.MispredictedCount != BinaryBasicBlock::COUNT_NO_PROFILE) |
2544 | continue; |
2545 | BI.Count = 0; |
2546 | BI.MispredictedCount = 0; |
2547 | } |
2548 | } |
2549 | } |
2550 | |
2551 | void BinaryFunction::removeConditionalTailCalls() { |
2552 | // Blocks to be appended at the end. |
2553 | std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks; |
2554 | |
2555 | for (auto BBI = begin(); BBI != end(); ++BBI) { |
2556 | BinaryBasicBlock &BB = *BBI; |
2557 | MCInst *CTCInstr = BB.getLastNonPseudoInstr(); |
2558 | if (!CTCInstr) |
2559 | continue; |
2560 | |
2561 | std::optional<uint64_t> TargetAddressOrNone = |
2562 | BC.MIB->getConditionalTailCall(Inst: *CTCInstr); |
2563 | if (!TargetAddressOrNone) |
2564 | continue; |
2565 | |
2566 | // Gather all necessary information about CTC instruction before |
2567 | // annotations are destroyed. |
2568 | const int32_t CFIStateBeforeCTC = BB.getCFIStateAtInstr(Instr: CTCInstr); |
2569 | uint64_t CTCTakenCount = BinaryBasicBlock::COUNT_NO_PROFILE; |
2570 | uint64_t CTCMispredCount = BinaryBasicBlock::COUNT_NO_PROFILE; |
2571 | if (hasValidProfile()) { |
2572 | CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>( |
2573 | Inst: *CTCInstr, Name: "CTCTakenCount"); |
2574 | CTCMispredCount = BC.MIB->getAnnotationWithDefault<uint64_t>( |
2575 | Inst: *CTCInstr, Name: "CTCMispredCount"); |
2576 | } |
2577 | |
2578 | // Assert that the tail call does not throw. |
2579 | assert(!BC.MIB->getEHInfo(*CTCInstr) && |
2580 | "found tail call with associated landing pad"); |
2581 | |
2582 | // Create a basic block with an unconditional tail call instruction using |
2583 | // the same destination. |
2584 | const MCSymbol *CTCTargetLabel = BC.MIB->getTargetSymbol(Inst: *CTCInstr); |
2585 | assert(CTCTargetLabel && "symbol expected for conditional tail call"); |
2586 | MCInst TailCallInstr; |
2587 | BC.MIB->createTailCall(Inst&: TailCallInstr, Target: CTCTargetLabel, Ctx: BC.Ctx.get()); |
2588 | |
2589 | // Move offset from CTCInstr to TailCallInstr. |
2590 | if (const std::optional<uint32_t> Offset = BC.MIB->getOffset(Inst: *CTCInstr)) { |
2591 | BC.MIB->setOffset(Inst&: TailCallInstr, Offset: *Offset); |
2592 | BC.MIB->clearOffset(Inst&: *CTCInstr); |
2593 | } |
2594 | |
2595 | // Link new BBs to the original input offset of the BB where the CTC |
2596 | // is, so we can map samples recorded in new BBs back to the original BB |
2597 | // seem in the input binary (if using BAT) |
2598 | std::unique_ptr<BinaryBasicBlock> TailCallBB = |
2599 | createBasicBlock(Label: BC.Ctx->createNamedTempSymbol(Name: "TC")); |
2600 | TailCallBB->setOffset(BB.getInputOffset()); |
2601 | TailCallBB->addInstruction(Inst: TailCallInstr); |
2602 | TailCallBB->setCFIState(CFIStateBeforeCTC); |
2603 | |
2604 | // Add CFG edge with profile info from BB to TailCallBB. |
2605 | BB.addSuccessor(Succ: TailCallBB.get(), Count: CTCTakenCount, MispredictedCount: CTCMispredCount); |
2606 | |
2607 | // Add execution count for the block. |
2608 | TailCallBB->setExecutionCount(CTCTakenCount); |
2609 | |
2610 | BC.MIB->convertTailCallToJmp(Inst&: *CTCInstr); |
2611 | |
2612 | BC.MIB->replaceBranchTarget(Inst&: *CTCInstr, TBB: TailCallBB->getLabel(), |
2613 | Ctx: BC.Ctx.get()); |
2614 | |
2615 | // Add basic block to the list that will be added to the end. |
2616 | NewBlocks.emplace_back(args: std::move(TailCallBB)); |
2617 | |
2618 | // Swap edges as the TailCallBB corresponds to the taken branch. |
2619 | BB.swapConditionalSuccessors(); |
2620 | |
2621 | // This branch is no longer a conditional tail call. |
2622 | BC.MIB->unsetConditionalTailCall(Inst&: *CTCInstr); |
2623 | } |
2624 | |
2625 | insertBasicBlocks(StartBB: std::prev(x: end()), NewBBs: std::move(NewBlocks), |
2626 | /* UpdateLayout */ true, |
2627 | /* UpdateCFIState */ false); |
2628 | } |
2629 | |
2630 | uint64_t BinaryFunction::getFunctionScore() const { |
2631 | if (FunctionScore != -1) |
2632 | return FunctionScore; |
2633 | |
2634 | if (!isSimple() || !hasValidProfile()) { |
2635 | FunctionScore = 0; |
2636 | return FunctionScore; |
2637 | } |
2638 | |
2639 | uint64_t TotalScore = 0ULL; |
2640 | for (const BinaryBasicBlock &BB : blocks()) { |
2641 | uint64_t BBExecCount = BB.getExecutionCount(); |
2642 | if (BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) |
2643 | continue; |
2644 | TotalScore += BBExecCount * BB.getNumNonPseudos(); |
2645 | } |
2646 | FunctionScore = TotalScore; |
2647 | return FunctionScore; |
2648 | } |
2649 | |
2650 | void BinaryFunction::annotateCFIState() { |
2651 | assert(CurrentState == State::Disassembled && "unexpected function state"); |
2652 | assert(!BasicBlocks.empty() && "basic block list should not be empty"); |
2653 | |
2654 | // This is an index of the last processed CFI in FDE CFI program. |
2655 | uint32_t State = 0; |
2656 | |
2657 | // This is an index of RememberState CFI reflecting effective state right |
2658 | // after execution of RestoreState CFI. |
2659 | // |
2660 | // It differs from State iff the CFI at (State-1) |
2661 | // was RestoreState (modulo GNU_args_size CFIs, which are ignored). |
2662 | // |
2663 | // This allows us to generate shorter replay sequences when producing new |
2664 | // CFI programs. |
2665 | uint32_t EffectiveState = 0; |
2666 | |
2667 | // For tracking RememberState/RestoreState sequences. |
2668 | std::stack<uint32_t> StateStack; |
2669 | |
2670 | for (BinaryBasicBlock *BB : BasicBlocks) { |
2671 | BB->setCFIState(EffectiveState); |
2672 | |
2673 | for (const MCInst &Instr : *BB) { |
2674 | const MCCFIInstruction *CFI = getCFIFor(Instr); |
2675 | if (!CFI) |
2676 | continue; |
2677 | |
2678 | ++State; |
2679 | |
2680 | switch (CFI->getOperation()) { |
2681 | case MCCFIInstruction::OpRememberState: |
2682 | StateStack.push(x: EffectiveState); |
2683 | EffectiveState = State; |
2684 | break; |
2685 | case MCCFIInstruction::OpRestoreState: |
2686 | assert(!StateStack.empty() && "corrupt CFI stack"); |
2687 | EffectiveState = StateStack.top(); |
2688 | StateStack.pop(); |
2689 | break; |
2690 | case MCCFIInstruction::OpGnuArgsSize: |
2691 | // OpGnuArgsSize CFIs do not affect the CFI state. |
2692 | break; |
2693 | default: |
2694 | // Any other CFI updates the state. |
2695 | EffectiveState = State; |
2696 | break; |
2697 | } |
2698 | } |
2699 | } |
2700 | |
2701 | if (opts::Verbosity >= 1 && !StateStack.empty()) { |
2702 | BC.errs() << "BOLT-WARNING: non-empty CFI stack at the end of "<< *this |
2703 | << '\n'; |
2704 | } |
2705 | } |
2706 | |
2707 | namespace { |
2708 | |
2709 | /// Our full interpretation of a DWARF CFI machine state at a given point |
2710 | struct CFISnapshot { |
2711 | /// CFA register number and offset defining the canonical frame at this |
2712 | /// point, or the number of a rule (CFI state) that computes it with a |
2713 | /// DWARF expression. This number will be negative if it refers to a CFI |
2714 | /// located in the CIE instead of the FDE. |
2715 | uint32_t CFAReg; |
2716 | int32_t CFAOffset; |
2717 | int32_t CFARule; |
2718 | /// Mapping of rules (CFI states) that define the location of each |
2719 | /// register. If absent, no rule defining the location of such register |
2720 | /// was ever read. This number will be negative if it refers to a CFI |
2721 | /// located in the CIE instead of the FDE. |
2722 | DenseMap<int32_t, int32_t> RegRule; |
2723 | |
2724 | /// References to CIE, FDE and expanded instructions after a restore state |
2725 | const BinaryFunction::CFIInstrMapType &CIE; |
2726 | const BinaryFunction::CFIInstrMapType &FDE; |
2727 | const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents; |
2728 | |
2729 | /// Current FDE CFI number representing the state where the snapshot is at |
2730 | int32_t CurState; |
2731 | |
2732 | /// Used when we don't have information about which state/rule to apply |
2733 | /// to recover the location of either the CFA or a specific register |
2734 | constexpr static int32_t UNKNOWN = std::numeric_limits<int32_t>::min(); |
2735 | |
2736 | private: |
2737 | /// Update our snapshot by executing a single CFI |
2738 | void update(const MCCFIInstruction &Instr, int32_t RuleNumber) { |
2739 | switch (Instr.getOperation()) { |
2740 | case MCCFIInstruction::OpSameValue: |
2741 | case MCCFIInstruction::OpRelOffset: |
2742 | case MCCFIInstruction::OpOffset: |
2743 | case MCCFIInstruction::OpRestore: |
2744 | case MCCFIInstruction::OpUndefined: |
2745 | case MCCFIInstruction::OpRegister: |
2746 | RegRule[Instr.getRegister()] = RuleNumber; |
2747 | break; |
2748 | case MCCFIInstruction::OpDefCfaRegister: |
2749 | CFAReg = Instr.getRegister(); |
2750 | CFARule = UNKNOWN; |
2751 | |
2752 | // This shouldn't happen according to the spec but GNU binutils on RISC-V |
2753 | // emits a DW_CFA_def_cfa_register in CIE's which leaves the offset |
2754 | // unspecified. Both readelf and llvm-dwarfdump interpret the offset as 0 |
2755 | // in this case so let's do the same. |
2756 | if (CFAOffset == UNKNOWN) |
2757 | CFAOffset = 0; |
2758 | break; |
2759 | case MCCFIInstruction::OpDefCfaOffset: |
2760 | CFAOffset = Instr.getOffset(); |
2761 | CFARule = UNKNOWN; |
2762 | break; |
2763 | case MCCFIInstruction::OpDefCfa: |
2764 | CFAReg = Instr.getRegister(); |
2765 | CFAOffset = Instr.getOffset(); |
2766 | CFARule = UNKNOWN; |
2767 | break; |
2768 | case MCCFIInstruction::OpEscape: { |
2769 | std::optional<uint8_t> Reg = |
2770 | readDWARFExpressionTargetReg(ExprBytes: Instr.getValues()); |
2771 | // Handle DW_CFA_def_cfa_expression |
2772 | if (!Reg) { |
2773 | CFARule = RuleNumber; |
2774 | break; |
2775 | } |
2776 | RegRule[*Reg] = RuleNumber; |
2777 | break; |
2778 | } |
2779 | case MCCFIInstruction::OpAdjustCfaOffset: |
2780 | case MCCFIInstruction::OpWindowSave: |
2781 | case MCCFIInstruction::OpNegateRAStateWithPC: |
2782 | case MCCFIInstruction::OpLLVMDefAspaceCfa: |
2783 | case MCCFIInstruction::OpLabel: |
2784 | case MCCFIInstruction::OpValOffset: |
2785 | llvm_unreachable("unsupported CFI opcode"); |
2786 | break; |
2787 | case MCCFIInstruction::OpNegateRAState: |
2788 | if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) { |
2789 | llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. " |
2790 | "as produced by '-mbranch-protection=pac-ret') are " |
2791 | "currently not supported by BOLT."); |
2792 | } |
2793 | break; |
2794 | case MCCFIInstruction::OpRememberState: |
2795 | case MCCFIInstruction::OpRestoreState: |
2796 | case MCCFIInstruction::OpGnuArgsSize: |
2797 | // do not affect CFI state |
2798 | break; |
2799 | } |
2800 | } |
2801 | |
2802 | public: |
2803 | /// Advance state reading FDE CFI instructions up to State number |
2804 | void advanceTo(int32_t State) { |
2805 | for (int32_t I = CurState, E = State; I != E; ++I) { |
2806 | const MCCFIInstruction &Instr = FDE[I]; |
2807 | if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) { |
2808 | update(Instr, RuleNumber: I); |
2809 | continue; |
2810 | } |
2811 | // If restore state instruction, fetch the equivalent CFIs that have |
2812 | // the same effect of this restore. This is used to ensure remember- |
2813 | // restore pairs are completely removed. |
2814 | auto Iter = FrameRestoreEquivalents.find(Val: I); |
2815 | if (Iter == FrameRestoreEquivalents.end()) |
2816 | continue; |
2817 | for (int32_t RuleNumber : Iter->second) |
2818 | update(Instr: FDE[RuleNumber], RuleNumber); |
2819 | } |
2820 | |
2821 | assert(((CFAReg != (uint32_t)UNKNOWN && CFAOffset != UNKNOWN) || |
2822 | CFARule != UNKNOWN) && |
2823 | "CIE did not define default CFA?"); |
2824 | |
2825 | CurState = State; |
2826 | } |
2827 | |
2828 | /// Interpret all CIE and FDE instructions up until CFI State number and |
2829 | /// populate this snapshot |
2830 | CFISnapshot( |
2831 | const BinaryFunction::CFIInstrMapType &CIE, |
2832 | const BinaryFunction::CFIInstrMapType &FDE, |
2833 | const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents, |
2834 | int32_t State) |
2835 | : CIE(CIE), FDE(FDE), FrameRestoreEquivalents(FrameRestoreEquivalents) { |
2836 | CFAReg = UNKNOWN; |
2837 | CFAOffset = UNKNOWN; |
2838 | CFARule = UNKNOWN; |
2839 | CurState = 0; |
2840 | |
2841 | for (int32_t I = 0, E = CIE.size(); I != E; ++I) { |
2842 | const MCCFIInstruction &Instr = CIE[I]; |
2843 | update(Instr, RuleNumber: -I); |
2844 | } |
2845 | |
2846 | advanceTo(State); |
2847 | } |
2848 | }; |
2849 | |
2850 | /// A CFI snapshot with the capability of checking if incremental additions to |
2851 | /// it are redundant. This is used to ensure we do not emit two CFI instructions |
2852 | /// back-to-back that are doing the same state change, or to avoid emitting a |
2853 | /// CFI at all when the state at that point would not be modified after that CFI |
2854 | struct CFISnapshotDiff : public CFISnapshot { |
2855 | bool RestoredCFAReg{false}; |
2856 | bool RestoredCFAOffset{false}; |
2857 | DenseMap<int32_t, bool> RestoredRegs; |
2858 | |
2859 | CFISnapshotDiff(const CFISnapshot &S) : CFISnapshot(S) {} |
2860 | |
2861 | CFISnapshotDiff( |
2862 | const BinaryFunction::CFIInstrMapType &CIE, |
2863 | const BinaryFunction::CFIInstrMapType &FDE, |
2864 | const DenseMap<int32_t, SmallVector<int32_t, 4>> &FrameRestoreEquivalents, |
2865 | int32_t State) |
2866 | : CFISnapshot(CIE, FDE, FrameRestoreEquivalents, State) {} |
2867 | |
2868 | /// Return true if applying Instr to this state is redundant and can be |
2869 | /// dismissed. |
2870 | bool isRedundant(const MCCFIInstruction &Instr) { |
2871 | switch (Instr.getOperation()) { |
2872 | case MCCFIInstruction::OpSameValue: |
2873 | case MCCFIInstruction::OpRelOffset: |
2874 | case MCCFIInstruction::OpOffset: |
2875 | case MCCFIInstruction::OpRestore: |
2876 | case MCCFIInstruction::OpUndefined: |
2877 | case MCCFIInstruction::OpRegister: |
2878 | case MCCFIInstruction::OpEscape: { |
2879 | uint32_t Reg; |
2880 | if (Instr.getOperation() != MCCFIInstruction::OpEscape) { |
2881 | Reg = Instr.getRegister(); |
2882 | } else { |
2883 | std::optional<uint8_t> R = |
2884 | readDWARFExpressionTargetReg(ExprBytes: Instr.getValues()); |
2885 | // Handle DW_CFA_def_cfa_expression |
2886 | if (!R) { |
2887 | if (RestoredCFAReg && RestoredCFAOffset) |
2888 | return true; |
2889 | RestoredCFAReg = true; |
2890 | RestoredCFAOffset = true; |
2891 | return false; |
2892 | } |
2893 | Reg = *R; |
2894 | } |
2895 | if (RestoredRegs[Reg]) |
2896 | return true; |
2897 | RestoredRegs[Reg] = true; |
2898 | const int32_t CurRegRule = RegRule.contains(Val: Reg) ? RegRule[Reg] : UNKNOWN; |
2899 | if (CurRegRule == UNKNOWN) { |
2900 | if (Instr.getOperation() == MCCFIInstruction::OpRestore || |
2901 | Instr.getOperation() == MCCFIInstruction::OpSameValue) |
2902 | return true; |
2903 | return false; |
2904 | } |
2905 | const MCCFIInstruction &LastDef = |
2906 | CurRegRule < 0 ? CIE[-CurRegRule] : FDE[CurRegRule]; |
2907 | return LastDef == Instr; |
2908 | } |
2909 | case MCCFIInstruction::OpDefCfaRegister: |
2910 | if (RestoredCFAReg) |
2911 | return true; |
2912 | RestoredCFAReg = true; |
2913 | return CFAReg == Instr.getRegister(); |
2914 | case MCCFIInstruction::OpDefCfaOffset: |
2915 | if (RestoredCFAOffset) |
2916 | return true; |
2917 | RestoredCFAOffset = true; |
2918 | return CFAOffset == Instr.getOffset(); |
2919 | case MCCFIInstruction::OpDefCfa: |
2920 | if (RestoredCFAReg && RestoredCFAOffset) |
2921 | return true; |
2922 | RestoredCFAReg = true; |
2923 | RestoredCFAOffset = true; |
2924 | return CFAReg == Instr.getRegister() && CFAOffset == Instr.getOffset(); |
2925 | case MCCFIInstruction::OpAdjustCfaOffset: |
2926 | case MCCFIInstruction::OpWindowSave: |
2927 | case MCCFIInstruction::OpNegateRAStateWithPC: |
2928 | case MCCFIInstruction::OpLLVMDefAspaceCfa: |
2929 | case MCCFIInstruction::OpLabel: |
2930 | case MCCFIInstruction::OpValOffset: |
2931 | llvm_unreachable("unsupported CFI opcode"); |
2932 | return false; |
2933 | case MCCFIInstruction::OpNegateRAState: |
2934 | if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) { |
2935 | llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. " |
2936 | "as produced by '-mbranch-protection=pac-ret') are " |
2937 | "currently not supported by BOLT."); |
2938 | } |
2939 | break; |
2940 | case MCCFIInstruction::OpRememberState: |
2941 | case MCCFIInstruction::OpRestoreState: |
2942 | case MCCFIInstruction::OpGnuArgsSize: |
2943 | // do not affect CFI state |
2944 | return true; |
2945 | } |
2946 | return false; |
2947 | } |
2948 | }; |
2949 | |
2950 | } // end anonymous namespace |
2951 | |
2952 | bool BinaryFunction::replayCFIInstrs(int32_t FromState, int32_t ToState, |
2953 | BinaryBasicBlock *InBB, |
2954 | BinaryBasicBlock::iterator InsertIt) { |
2955 | if (FromState == ToState) |
2956 | return true; |
2957 | assert(FromState < ToState && "can only replay CFIs forward"); |
2958 | |
2959 | CFISnapshotDiff CFIDiff(CIEFrameInstructions, FrameInstructions, |
2960 | FrameRestoreEquivalents, FromState); |
2961 | |
2962 | std::vector<uint32_t> NewCFIs; |
2963 | for (int32_t CurState = FromState; CurState < ToState; ++CurState) { |
2964 | MCCFIInstruction *Instr = &FrameInstructions[CurState]; |
2965 | if (Instr->getOperation() == MCCFIInstruction::OpRestoreState) { |
2966 | auto Iter = FrameRestoreEquivalents.find(Val: CurState); |
2967 | assert(Iter != FrameRestoreEquivalents.end()); |
2968 | NewCFIs.insert(position: NewCFIs.end(), first: Iter->second.begin(), last: Iter->second.end()); |
2969 | // RestoreState / Remember will be filtered out later by CFISnapshotDiff, |
2970 | // so we might as well fall-through here. |
2971 | } |
2972 | NewCFIs.push_back(x: CurState); |
2973 | } |
2974 | |
2975 | // Replay instructions while avoiding duplicates |
2976 | for (int32_t State : llvm::reverse(C&: NewCFIs)) { |
2977 | if (CFIDiff.isRedundant(Instr: FrameInstructions[State])) |
2978 | continue; |
2979 | InsertIt = addCFIPseudo(BB: InBB, Pos: InsertIt, Offset: State); |
2980 | } |
2981 | |
2982 | return true; |
2983 | } |
2984 | |
2985 | SmallVector<int32_t, 4> |
2986 | BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState, |
2987 | BinaryBasicBlock *InBB, |
2988 | BinaryBasicBlock::iterator &InsertIt) { |
2989 | SmallVector<int32_t, 4> NewStates; |
2990 | |
2991 | CFISnapshot ToCFITable(CIEFrameInstructions, FrameInstructions, |
2992 | FrameRestoreEquivalents, ToState); |
2993 | CFISnapshotDiff FromCFITable(ToCFITable); |
2994 | FromCFITable.advanceTo(State: FromState); |
2995 | |
2996 | auto undoStateDefCfa = [&]() { |
2997 | if (ToCFITable.CFARule == CFISnapshot::UNKNOWN) { |
2998 | FrameInstructions.emplace_back(Args: MCCFIInstruction::cfiDefCfa( |
2999 | L: nullptr, Register: ToCFITable.CFAReg, Offset: ToCFITable.CFAOffset)); |
3000 | if (FromCFITable.isRedundant(Instr: FrameInstructions.back())) { |
3001 | FrameInstructions.pop_back(); |
3002 | return; |
3003 | } |
3004 | NewStates.push_back(Elt: FrameInstructions.size() - 1); |
3005 | InsertIt = addCFIPseudo(BB: InBB, Pos: InsertIt, Offset: FrameInstructions.size() - 1); |
3006 | ++InsertIt; |
3007 | } else if (ToCFITable.CFARule < 0) { |
3008 | if (FromCFITable.isRedundant(Instr: CIEFrameInstructions[-ToCFITable.CFARule])) |
3009 | return; |
3010 | NewStates.push_back(Elt: FrameInstructions.size()); |
3011 | InsertIt = addCFIPseudo(BB: InBB, Pos: InsertIt, Offset: FrameInstructions.size()); |
3012 | ++InsertIt; |
3013 | FrameInstructions.emplace_back(Args&: CIEFrameInstructions[-ToCFITable.CFARule]); |
3014 | } else if (!FromCFITable.isRedundant( |
3015 | Instr: FrameInstructions[ToCFITable.CFARule])) { |
3016 | NewStates.push_back(Elt: ToCFITable.CFARule); |
3017 | InsertIt = addCFIPseudo(BB: InBB, Pos: InsertIt, Offset: ToCFITable.CFARule); |
3018 | ++InsertIt; |
3019 | } |
3020 | }; |
3021 | |
3022 | auto undoState = [&](const MCCFIInstruction &Instr) { |
3023 | switch (Instr.getOperation()) { |
3024 | case MCCFIInstruction::OpRememberState: |
3025 | case MCCFIInstruction::OpRestoreState: |
3026 | break; |
3027 | case MCCFIInstruction::OpSameValue: |
3028 | case MCCFIInstruction::OpRelOffset: |
3029 | case MCCFIInstruction::OpOffset: |
3030 | case MCCFIInstruction::OpRestore: |
3031 | case MCCFIInstruction::OpUndefined: |
3032 | case MCCFIInstruction::OpEscape: |
3033 | case MCCFIInstruction::OpRegister: { |
3034 | uint32_t Reg; |
3035 | if (Instr.getOperation() != MCCFIInstruction::OpEscape) { |
3036 | Reg = Instr.getRegister(); |
3037 | } else { |
3038 | std::optional<uint8_t> R = |
3039 | readDWARFExpressionTargetReg(ExprBytes: Instr.getValues()); |
3040 | // Handle DW_CFA_def_cfa_expression |
3041 | if (!R) { |
3042 | undoStateDefCfa(); |
3043 | return; |
3044 | } |
3045 | Reg = *R; |
3046 | } |
3047 | |
3048 | if (!ToCFITable.RegRule.contains(Val: Reg)) { |
3049 | FrameInstructions.emplace_back( |
3050 | Args: MCCFIInstruction::createRestore(L: nullptr, Register: Reg)); |
3051 | if (FromCFITable.isRedundant(Instr: FrameInstructions.back())) { |
3052 | FrameInstructions.pop_back(); |
3053 | break; |
3054 | } |
3055 | NewStates.push_back(Elt: FrameInstructions.size() - 1); |
3056 | InsertIt = addCFIPseudo(BB: InBB, Pos: InsertIt, Offset: FrameInstructions.size() - 1); |
3057 | ++InsertIt; |
3058 | break; |
3059 | } |
3060 | const int32_t Rule = ToCFITable.RegRule[Reg]; |
3061 | if (Rule < 0) { |
3062 | if (FromCFITable.isRedundant(Instr: CIEFrameInstructions[-Rule])) |
3063 | break; |
3064 | NewStates.push_back(Elt: FrameInstructions.size()); |
3065 | InsertIt = addCFIPseudo(BB: InBB, Pos: InsertIt, Offset: FrameInstructions.size()); |
3066 | ++InsertIt; |
3067 | FrameInstructions.emplace_back(Args&: CIEFrameInstructions[-Rule]); |
3068 | break; |
3069 | } |
3070 | if (FromCFITable.isRedundant(Instr: FrameInstructions[Rule])) |
3071 | break; |
3072 | NewStates.push_back(Elt: Rule); |
3073 | InsertIt = addCFIPseudo(BB: InBB, Pos: InsertIt, Offset: Rule); |
3074 | ++InsertIt; |
3075 | break; |
3076 | } |
3077 | case MCCFIInstruction::OpDefCfaRegister: |
3078 | case MCCFIInstruction::OpDefCfaOffset: |
3079 | case MCCFIInstruction::OpDefCfa: |
3080 | undoStateDefCfa(); |
3081 | break; |
3082 | case MCCFIInstruction::OpAdjustCfaOffset: |
3083 | case MCCFIInstruction::OpWindowSave: |
3084 | case MCCFIInstruction::OpNegateRAStateWithPC: |
3085 | case MCCFIInstruction::OpLLVMDefAspaceCfa: |
3086 | case MCCFIInstruction::OpLabel: |
3087 | case MCCFIInstruction::OpValOffset: |
3088 | llvm_unreachable("unsupported CFI opcode"); |
3089 | break; |
3090 | case MCCFIInstruction::OpNegateRAState: |
3091 | if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) { |
3092 | llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. " |
3093 | "as produced by '-mbranch-protection=pac-ret') are " |
3094 | "currently not supported by BOLT."); |
3095 | } |
3096 | break; |
3097 | case MCCFIInstruction::OpGnuArgsSize: |
3098 | // do not affect CFI state |
3099 | break; |
3100 | } |
3101 | }; |
3102 | |
3103 | // Undo all modifications from ToState to FromState |
3104 | for (int32_t I = ToState, E = FromState; I != E; ++I) { |
3105 | const MCCFIInstruction &Instr = FrameInstructions[I]; |
3106 | if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) { |
3107 | undoState(Instr); |
3108 | continue; |
3109 | } |
3110 | auto Iter = FrameRestoreEquivalents.find(Val: I); |
3111 | if (Iter == FrameRestoreEquivalents.end()) |
3112 | continue; |
3113 | for (int32_t State : Iter->second) |
3114 | undoState(FrameInstructions[State]); |
3115 | } |
3116 | |
3117 | return NewStates; |
3118 | } |
3119 | |
3120 | void BinaryFunction::normalizeCFIState() { |
3121 | // Reordering blocks with remember-restore state instructions can be specially |
3122 | // tricky. When rewriting the CFI, we omit remember-restore state instructions |
3123 | // entirely. For restore state, we build a map expanding each restore to the |
3124 | // equivalent unwindCFIState sequence required at that point to achieve the |
3125 | // same effect of the restore. All remember state are then just ignored. |
3126 | std::stack<int32_t> Stack; |
3127 | for (BinaryBasicBlock *CurBB : Layout.blocks()) { |
3128 | for (auto II = CurBB->begin(); II != CurBB->end(); ++II) { |
3129 | if (const MCCFIInstruction *CFI = getCFIFor(Instr: *II)) { |
3130 | if (CFI->getOperation() == MCCFIInstruction::OpRememberState) { |
3131 | Stack.push(x: II->getOperand(i: 0).getImm()); |
3132 | continue; |
3133 | } |
3134 | if (CFI->getOperation() == MCCFIInstruction::OpRestoreState) { |
3135 | const int32_t RememberState = Stack.top(); |
3136 | const int32_t CurState = II->getOperand(i: 0).getImm(); |
3137 | FrameRestoreEquivalents[CurState] = |
3138 | unwindCFIState(FromState: CurState, ToState: RememberState, InBB: CurBB, InsertIt&: II); |
3139 | Stack.pop(); |
3140 | } |
3141 | } |
3142 | } |
3143 | } |
3144 | } |
3145 | |
3146 | bool BinaryFunction::finalizeCFIState() { |
3147 | LLVM_DEBUG( |
3148 | dbgs() << "Trying to fix CFI states for each BB after reordering.\n"); |
3149 | LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of "<< *this |
3150 | << ": "); |
3151 | |
3152 | const char *Sep = ""; |
3153 | (void)Sep; |
3154 | for (FunctionFragment &FF : Layout.fragments()) { |
3155 | // Hot-cold border: at start of each region (with a different FDE) we need |
3156 | // to reset the CFI state. |
3157 | int32_t State = 0; |
3158 | |
3159 | for (BinaryBasicBlock *BB : FF) { |
3160 | const int32_t CFIStateAtExit = BB->getCFIStateAtExit(); |
3161 | |
3162 | // We need to recover the correct state if it doesn't match expected |
3163 | // state at BB entry point. |
3164 | if (BB->getCFIState() < State) { |
3165 | // In this case, State is currently higher than what this BB expect it |
3166 | // to be. To solve this, we need to insert CFI instructions to undo |
3167 | // the effect of all CFI from BB's state to current State. |
3168 | auto InsertIt = BB->begin(); |
3169 | unwindCFIState(FromState: State, ToState: BB->getCFIState(), InBB: BB, InsertIt); |
3170 | } else if (BB->getCFIState() > State) { |
3171 | // If BB's CFI state is greater than State, it means we are behind in |
3172 | // the state. Just emit all instructions to reach this state at the |
3173 | // beginning of this BB. If this sequence of instructions involve |
3174 | // remember state or restore state, bail out. |
3175 | if (!replayCFIInstrs(FromState: State, ToState: BB->getCFIState(), InBB: BB, InsertIt: BB->begin())) |
3176 | return false; |
3177 | } |
3178 | |
3179 | State = CFIStateAtExit; |
3180 | LLVM_DEBUG(dbgs() << Sep << State; Sep = ", "); |
3181 | } |
3182 | } |
3183 | LLVM_DEBUG(dbgs() << "\n"); |
3184 | |
3185 | for (BinaryBasicBlock &BB : blocks()) { |
3186 | for (auto II = BB.begin(); II != BB.end();) { |
3187 | const MCCFIInstruction *CFI = getCFIFor(Instr: *II); |
3188 | if (CFI && (CFI->getOperation() == MCCFIInstruction::OpRememberState || |
3189 | CFI->getOperation() == MCCFIInstruction::OpRestoreState)) { |
3190 | II = BB.eraseInstruction(II); |
3191 | } else { |
3192 | ++II; |
3193 | } |
3194 | } |
3195 | } |
3196 | |
3197 | return true; |
3198 | } |
3199 | |
3200 | bool BinaryFunction::requiresAddressTranslation() const { |
3201 | return opts::EnableBAT || hasSDTMarker() || hasPseudoProbe(); |
3202 | } |
3203 | |
3204 | bool BinaryFunction::requiresAddressMap() const { |
3205 | if (isInjected()) |
3206 | return false; |
3207 | |
3208 | return opts::UpdateDebugSections || isMultiEntry() || |
3209 | requiresAddressTranslation(); |
3210 | } |
3211 | |
3212 | uint64_t BinaryFunction::getInstructionCount() const { |
3213 | uint64_t Count = 0; |
3214 | for (const BinaryBasicBlock &BB : blocks()) |
3215 | Count += BB.getNumNonPseudos(); |
3216 | return Count; |
3217 | } |
3218 | |
3219 | void BinaryFunction::clearDisasmState() { |
3220 | clearList(List&: Instructions); |
3221 | clearList(List&: IgnoredBranches); |
3222 | clearList(List&: TakenBranches); |
3223 | |
3224 | if (BC.HasRelocations) { |
3225 | for (std::pair<const uint32_t, MCSymbol *> &LI : Labels) |
3226 | BC.UndefinedSymbols.insert(x: LI.second); |
3227 | for (MCSymbol *const EndLabel : FunctionEndLabels) |
3228 | if (EndLabel) |
3229 | BC.UndefinedSymbols.insert(x: EndLabel); |
3230 | } |
3231 | } |
3232 | |
3233 | void BinaryFunction::setTrapOnEntry() { |
3234 | clearDisasmState(); |
3235 | |
3236 | forEachEntryPoint(Callback: [&](uint64_t Offset, const MCSymbol *Label) -> bool { |
3237 | MCInst TrapInstr; |
3238 | BC.MIB->createTrap(Inst&: TrapInstr); |
3239 | addInstruction(Offset, Instruction: std::move(TrapInstr)); |
3240 | return true; |
3241 | }); |
3242 | |
3243 | TrapsOnEntry = true; |
3244 | } |
3245 | |
3246 | void BinaryFunction::setIgnored() { |
3247 | IsIgnored = true; |
3248 | |
3249 | if (opts::processAllFunctions()) { |
3250 | // We can accept ignored functions before they've been disassembled. |
3251 | // In that case, they would still get disassembled and emitted, but not |
3252 | // optimized. |
3253 | if (CurrentState != State::Empty) { |
3254 | BC.errs() << "BOLT-ERROR: cannot ignore non-empty function "<< *this |
3255 | << " in current mode\n"; |
3256 | exit(status: 1); |
3257 | } |
3258 | return; |
3259 | } |
3260 | |
3261 | IsSimple = false; |
3262 | LLVM_DEBUG(dbgs() << "Ignoring "<< getPrintName() << '\n'); |
3263 | |
3264 | if (CurrentState == State::Empty) |
3265 | return; |
3266 | |
3267 | clearDisasmState(); |
3268 | |
3269 | // Clear CFG state too. |
3270 | if (hasCFG()) { |
3271 | releaseCFG(); |
3272 | |
3273 | for (BinaryBasicBlock *BB : BasicBlocks) |
3274 | delete BB; |
3275 | clearList(List&: BasicBlocks); |
3276 | |
3277 | for (BinaryBasicBlock *BB : DeletedBasicBlocks) |
3278 | delete BB; |
3279 | clearList(List&: DeletedBasicBlocks); |
3280 | |
3281 | Layout.clear(); |
3282 | } |
3283 | |
3284 | CurrentState = State::Empty; |
3285 | |
3286 | // Fix external references in the original function body. |
3287 | if (BC.HasRelocations) { |
3288 | LLVM_DEBUG(dbgs() << "Scanning refs in "<< *this << '\n'); |
3289 | scanExternalRefs(); |
3290 | } |
3291 | } |
3292 | |
3293 | void BinaryFunction::duplicateConstantIslands() { |
3294 | assert(Islands && "function expected to have constant islands"); |
3295 | |
3296 | for (BinaryBasicBlock *BB : getLayout().blocks()) { |
3297 | if (!BB->isCold()) |
3298 | continue; |
3299 | |
3300 | for (MCInst &Inst : *BB) { |
3301 | int OpNum = 0; |
3302 | for (MCOperand &Operand : Inst) { |
3303 | if (!Operand.isExpr()) { |
3304 | ++OpNum; |
3305 | continue; |
3306 | } |
3307 | const MCSymbol *Symbol = BC.MIB->getTargetSymbol(Inst, OpNum); |
3308 | // Check if this is an island symbol |
3309 | if (!Islands->Symbols.count(Ptr: Symbol) && |
3310 | !Islands->ProxySymbols.count(Val: Symbol)) |
3311 | continue; |
3312 | |
3313 | // Create cold symbol, if missing |
3314 | auto ISym = Islands->ColdSymbols.find(Val: Symbol); |
3315 | MCSymbol *ColdSymbol; |
3316 | if (ISym != Islands->ColdSymbols.end()) { |
3317 | ColdSymbol = ISym->second; |
3318 | } else { |
3319 | ColdSymbol = BC.Ctx->getOrCreateSymbol(Name: Symbol->getName() + ".cold"); |
3320 | Islands->ColdSymbols[Symbol] = ColdSymbol; |
3321 | // Check if this is a proxy island symbol and update owner proxy map |
3322 | if (Islands->ProxySymbols.count(Val: Symbol)) { |
3323 | BinaryFunction *Owner = Islands->ProxySymbols[Symbol]; |
3324 | auto IProxiedSym = Owner->Islands->Proxies[this].find(x: Symbol); |
3325 | Owner->Islands->ColdProxies[this][IProxiedSym->second] = ColdSymbol; |
3326 | } |
3327 | } |
3328 | |
3329 | // Update instruction reference |
3330 | Operand = MCOperand::createExpr(Val: BC.MIB->getTargetExprFor( |
3331 | Inst, |
3332 | Expr: MCSymbolRefExpr::create(Symbol: ColdSymbol, Kind: MCSymbolRefExpr::VK_None, |
3333 | Ctx&: *BC.Ctx), |
3334 | Ctx&: *BC.Ctx, RelType: 0)); |
3335 | ++OpNum; |
3336 | } |
3337 | } |
3338 | } |
3339 | } |
3340 | |
3341 | #ifndef MAX_PATH |
3342 | #define MAX_PATH 255 |
3343 | #endif |
3344 | |
3345 | static std::string constructFilename(std::string Filename, |
3346 | std::string Annotation, |
3347 | std::string Suffix) { |
3348 | llvm::replace(Range&: Filename, OldValue: '/', NewValue: '-'); |
3349 | if (!Annotation.empty()) |
3350 | Annotation.insert(pos: 0, s: "-"); |
3351 | if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) { |
3352 | assert(Suffix.size() + Annotation.size() <= MAX_PATH); |
3353 | Filename.resize(MAX_PATH - (Suffix.size() + Annotation.size())); |
3354 | } |
3355 | Filename += Annotation; |
3356 | Filename += Suffix; |
3357 | return Filename; |
3358 | } |
3359 | |
3360 | static std::string formatEscapes(const std::string &Str) { |
3361 | std::string Result; |
3362 | for (unsigned I = 0; I < Str.size(); ++I) { |
3363 | char C = Str[I]; |
3364 | switch (C) { |
3365 | case '\n': |
3366 | Result += " "; |
3367 | break; |
3368 | case '"': |
3369 | break; |
3370 | default: |
3371 | Result += C; |
3372 | break; |
3373 | } |
3374 | } |
3375 | return Result; |
3376 | } |
3377 | |
3378 | void BinaryFunction::dumpGraph(raw_ostream &OS) const { |
3379 | OS << "digraph \""<< getPrintName() << "\" {\n" |
3380 | << "node [fontname=courier, shape=box, style=filled, colorscheme=brbg9]\n"; |
3381 | uint64_t Offset = Address; |
3382 | for (BinaryBasicBlock *BB : BasicBlocks) { |
3383 | auto LayoutPos = find(Range: Layout.blocks(), Val: BB); |
3384 | unsigned LayoutIndex = LayoutPos - Layout.block_begin(); |
3385 | const char *ColdStr = BB->isCold() ? " (cold)": ""; |
3386 | std::vector<std::string> Attrs; |
3387 | // Bold box for entry points |
3388 | if (isEntryPoint(BB: *BB)) |
3389 | Attrs.push_back(x: "penwidth=2"); |
3390 | if (BLI && BLI->getLoopFor(BB)) { |
3391 | // Distinguish innermost loops |
3392 | const BinaryLoop *Loop = BLI->getLoopFor(BB); |
3393 | if (Loop->isInnermost()) |
3394 | Attrs.push_back(x: "fillcolor=6"); |
3395 | else // some outer loop |
3396 | Attrs.push_back(x: "fillcolor=4"); |
3397 | } else { // non-loopy code |
3398 | Attrs.push_back(x: "fillcolor=5"); |
3399 | } |
3400 | ListSeparator LS; |
3401 | OS << "\""<< BB->getName() << "\" ["; |
3402 | for (StringRef Attr : Attrs) |
3403 | OS << LS << Attr; |
3404 | OS << "]\n"; |
3405 | OS << format(Fmt: "\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u,CFI:%u)\\n", |
3406 | Vals: BB->getName().data(), Vals: BB->getName().data(), Vals: ColdStr, |
3407 | Vals: BB->getKnownExecutionCount(), Vals: BB->getOffset(), Vals: getIndex(BB), |
3408 | Vals: LayoutIndex, Vals: BB->getCFIState()); |
3409 | |
3410 | if (opts::DotToolTipCode) { |
3411 | std::string Str; |
3412 | raw_string_ostream CS(Str); |
3413 | Offset = BC.printInstructions(OS&: CS, Begin: BB->begin(), End: BB->end(), Offset, Function: this, |
3414 | /* PrintMCInst = */ false, |
3415 | /* PrintMemData = */ false, |
3416 | /* PrintRelocations = */ false, |
3417 | /* Endl = */ R"(\\l)"); |
3418 | OS << formatEscapes(Str: CS.str()) << '\n'; |
3419 | } |
3420 | OS << "\"]\n"; |
3421 | |
3422 | // analyzeBranch is just used to get the names of the branch |
3423 | // opcodes. |
3424 | const MCSymbol *TBB = nullptr; |
3425 | const MCSymbol *FBB = nullptr; |
3426 | MCInst *CondBranch = nullptr; |
3427 | MCInst *UncondBranch = nullptr; |
3428 | const bool Success = BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch); |
3429 | |
3430 | const MCInst *LastInstr = BB->getLastNonPseudoInstr(); |
3431 | const bool IsJumpTable = LastInstr && BC.MIB->getJumpTable(Inst: *LastInstr); |
3432 | |
3433 | auto BI = BB->branch_info_begin(); |
3434 | for (BinaryBasicBlock *Succ : BB->successors()) { |
3435 | std::string Branch; |
3436 | if (Success) { |
3437 | if (Succ == BB->getConditionalSuccessor(Condition: true)) { |
3438 | Branch = CondBranch ? std::string(BC.InstPrinter->getOpcodeName( |
3439 | Opcode: CondBranch->getOpcode())) |
3440 | : "TB"; |
3441 | } else if (Succ == BB->getConditionalSuccessor(Condition: false)) { |
3442 | Branch = UncondBranch ? std::string(BC.InstPrinter->getOpcodeName( |
3443 | Opcode: UncondBranch->getOpcode())) |
3444 | : "FB"; |
3445 | } else { |
3446 | Branch = "FT"; |
3447 | } |
3448 | } |
3449 | if (IsJumpTable) |
3450 | Branch = "JT"; |
3451 | OS << format(Fmt: "\"%s\" -> \"%s\" [label=\"%s", Vals: BB->getName().data(), |
3452 | Vals: Succ->getName().data(), Vals: Branch.c_str()); |
3453 | |
3454 | if (BB->getExecutionCount() != COUNT_NO_PROFILE && |
3455 | BI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { |
3456 | OS << "\\n(C:"<< BI->Count << ",M:"<< BI->MispredictedCount << ")"; |
3457 | } else if (ExecutionCount != COUNT_NO_PROFILE && |
3458 | BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) { |
3459 | OS << "\\n(IC:"<< BI->Count << ")"; |
3460 | } |
3461 | OS << "\"]\n"; |
3462 | |
3463 | ++BI; |
3464 | } |
3465 | for (BinaryBasicBlock *LP : BB->landing_pads()) { |
3466 | OS << format(Fmt: "\"%s\" -> \"%s\" [constraint=false style=dashed]\n", |
3467 | Vals: BB->getName().data(), Vals: LP->getName().data()); |
3468 | } |
3469 | } |
3470 | OS << "}\n"; |
3471 | } |
3472 | |
3473 | void BinaryFunction::viewGraph() const { |
3474 | SmallString<MAX_PATH> Filename; |
3475 | if (std::error_code EC = |
3476 | sys::fs::createTemporaryFile(Prefix: "bolt-cfg", Suffix: "dot", ResultPath&: Filename)) { |
3477 | BC.errs() << "BOLT-ERROR: "<< EC.message() << ", unable to create " |
3478 | << " bolt-cfg-XXXXX.dot temporary file.\n"; |
3479 | return; |
3480 | } |
3481 | dumpGraphToFile(Filename: std::string(Filename)); |
3482 | if (DisplayGraph(Filename)) |
3483 | BC.errs() << "BOLT-ERROR: Can't display "<< Filename |
3484 | << " with graphviz.\n"; |
3485 | if (std::error_code EC = sys::fs::remove(path: Filename)) { |
3486 | BC.errs() << "BOLT-WARNING: "<< EC.message() << ", failed to remove " |
3487 | << Filename << "\n"; |
3488 | } |
3489 | } |
3490 | |
3491 | void BinaryFunction::dumpGraphForPass(std::string Annotation) const { |
3492 | if (!opts::shouldPrint(Function: *this)) |
3493 | return; |
3494 | |
3495 | std::string Filename = constructFilename(Filename: getPrintName(), Annotation, Suffix: ".dot"); |
3496 | if (opts::Verbosity >= 1) |
3497 | BC.outs() << "BOLT-INFO: dumping CFG to "<< Filename << "\n"; |
3498 | dumpGraphToFile(Filename); |
3499 | } |
3500 | |
3501 | void BinaryFunction::dumpGraphToFile(std::string Filename) const { |
3502 | std::error_code EC; |
3503 | raw_fd_ostream of(Filename, EC, sys::fs::OF_None); |
3504 | if (EC) { |
3505 | if (opts::Verbosity >= 1) { |
3506 | BC.errs() << "BOLT-WARNING: "<< EC.message() << ", unable to open " |
3507 | << Filename << " for output.\n"; |
3508 | } |
3509 | return; |
3510 | } |
3511 | dumpGraph(OS&: of); |
3512 | } |
3513 | |
3514 | bool BinaryFunction::validateCFG() const { |
3515 | // Skip the validation of CFG after it is finalized |
3516 | if (CurrentState == State::CFG_Finalized) |
3517 | return true; |
3518 | |
3519 | for (BinaryBasicBlock *BB : BasicBlocks) |
3520 | if (!BB->validateSuccessorInvariants()) |
3521 | return false; |
3522 | |
3523 | // Make sure all blocks in CFG are valid. |
3524 | auto validateBlock = [this](const BinaryBasicBlock *BB, StringRef Desc) { |
3525 | if (!BB->isValid()) { |
3526 | BC.errs() << "BOLT-ERROR: deleted "<< Desc << " "<< BB->getName() |
3527 | << " detected in:\n"; |
3528 | this->dump(); |
3529 | return false; |
3530 | } |
3531 | return true; |
3532 | }; |
3533 | for (const BinaryBasicBlock *BB : BasicBlocks) { |
3534 | if (!validateBlock(BB, "block")) |
3535 | return false; |
3536 | for (const BinaryBasicBlock *PredBB : BB->predecessors()) |
3537 | if (!validateBlock(PredBB, "predecessor")) |
3538 | return false; |
3539 | for (const BinaryBasicBlock *SuccBB : BB->successors()) |
3540 | if (!validateBlock(SuccBB, "successor")) |
3541 | return false; |
3542 | for (const BinaryBasicBlock *LP : BB->landing_pads()) |
3543 | if (!validateBlock(LP, "landing pad")) |
3544 | return false; |
3545 | for (const BinaryBasicBlock *Thrower : BB->throwers()) |
3546 | if (!validateBlock(Thrower, "thrower")) |
3547 | return false; |
3548 | } |
3549 | |
3550 | for (const BinaryBasicBlock *BB : BasicBlocks) { |
3551 | std::unordered_set<const BinaryBasicBlock *> BBLandingPads; |
3552 | for (const BinaryBasicBlock *LP : BB->landing_pads()) { |
3553 | if (BBLandingPads.count(x: LP)) { |
3554 | BC.errs() << "BOLT-ERROR: duplicate landing pad detected in" |
3555 | << BB->getName() << " in function "<< *this << '\n'; |
3556 | return false; |
3557 | } |
3558 | BBLandingPads.insert(x: LP); |
3559 | } |
3560 | |
3561 | std::unordered_set<const BinaryBasicBlock *> BBThrowers; |
3562 | for (const BinaryBasicBlock *Thrower : BB->throwers()) { |
3563 | if (BBThrowers.count(x: Thrower)) { |
3564 | BC.errs() << "BOLT-ERROR: duplicate thrower detected in" |
3565 | << BB->getName() << " in function "<< *this << '\n'; |
3566 | return false; |
3567 | } |
3568 | BBThrowers.insert(x: Thrower); |
3569 | } |
3570 | |
3571 | for (const BinaryBasicBlock *LPBlock : BB->landing_pads()) { |
3572 | if (!llvm::is_contained(Range: LPBlock->throwers(), Element: BB)) { |
3573 | BC.errs() << "BOLT-ERROR: inconsistent landing pad detected in " |
3574 | << *this << ": "<< BB->getName() |
3575 | << " is in LandingPads but not in "<< LPBlock->getName() |
3576 | << " Throwers\n"; |
3577 | return false; |
3578 | } |
3579 | } |
3580 | for (const BinaryBasicBlock *Thrower : BB->throwers()) { |
3581 | if (!llvm::is_contained(Range: Thrower->landing_pads(), Element: BB)) { |
3582 | BC.errs() << "BOLT-ERROR: inconsistent thrower detected in "<< *this |
3583 | << ": "<< BB->getName() << " is in Throwers list but not in " |
3584 | << Thrower->getName() << " LandingPads\n"; |
3585 | return false; |
3586 | } |
3587 | } |
3588 | } |
3589 | |
3590 | return true; |
3591 | } |
3592 | |
3593 | void BinaryFunction::fixBranches() { |
3594 | assert(isSimple() && "Expected function with valid CFG."); |
3595 | |
3596 | auto &MIB = BC.MIB; |
3597 | MCContext *Ctx = BC.Ctx.get(); |
3598 | |
3599 | for (BinaryBasicBlock *BB : BasicBlocks) { |
3600 | const MCSymbol *TBB = nullptr; |
3601 | const MCSymbol *FBB = nullptr; |
3602 | MCInst *CondBranch = nullptr; |
3603 | MCInst *UncondBranch = nullptr; |
3604 | if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) |
3605 | continue; |
3606 | |
3607 | // We will create unconditional branch with correct destination if needed. |
3608 | if (UncondBranch) |
3609 | BB->eraseInstruction(II: BB->findInstruction(Inst: UncondBranch)); |
3610 | |
3611 | // Basic block that follows the current one in the final layout. |
3612 | const BinaryBasicBlock *const NextBB = |
3613 | Layout.getBasicBlockAfter(BB, /*IgnoreSplits=*/false); |
3614 | |
3615 | if (BB->succ_size() == 1) { |
3616 | // __builtin_unreachable() could create a conditional branch that |
3617 | // falls-through into the next function - hence the block will have only |
3618 | // one valid successor. Since behaviour is undefined - we replace |
3619 | // the conditional branch with an unconditional if required. |
3620 | if (CondBranch) |
3621 | BB->eraseInstruction(II: BB->findInstruction(Inst: CondBranch)); |
3622 | if (BB->getSuccessor() == NextBB) |
3623 | continue; |
3624 | BB->addBranchInstruction(Successor: BB->getSuccessor()); |
3625 | } else if (BB->succ_size() == 2) { |
3626 | assert(CondBranch && "conditional branch expected"); |
3627 | const BinaryBasicBlock *TSuccessor = BB->getConditionalSuccessor(Condition: true); |
3628 | const BinaryBasicBlock *FSuccessor = BB->getConditionalSuccessor(Condition: false); |
3629 | |
3630 | // Eliminate unnecessary conditional branch. |
3631 | if (TSuccessor == FSuccessor) { |
3632 | // FIXME: at the moment, we cannot safely remove static key branches. |
3633 | if (MIB->isDynamicBranch(Inst: *CondBranch)) { |
3634 | if (opts::Verbosity) { |
3635 | BC.outs() |
3636 | << "BOLT-INFO: unable to remove redundant dynamic branch in " |
3637 | << *this << '\n'; |
3638 | } |
3639 | continue; |
3640 | } |
3641 | |
3642 | BB->removeDuplicateConditionalSuccessor(CondBranch); |
3643 | if (TSuccessor != NextBB) |
3644 | BB->addBranchInstruction(Successor: TSuccessor); |
3645 | continue; |
3646 | } |
3647 | |
3648 | // Reverse branch condition and swap successors. |
3649 | auto swapSuccessors = [&]() { |
3650 | if (!MIB->isReversibleBranch(Inst: *CondBranch)) { |
3651 | if (opts::Verbosity) { |
3652 | BC.outs() << "BOLT-INFO: unable to swap successors in "<< *this |
3653 | << '\n'; |
3654 | } |
3655 | return false; |
3656 | } |
3657 | std::swap(a&: TSuccessor, b&: FSuccessor); |
3658 | BB->swapConditionalSuccessors(); |
3659 | auto L = BC.scopeLock(); |
3660 | MIB->reverseBranchCondition(Inst&: *CondBranch, TBB: TSuccessor->getLabel(), Ctx); |
3661 | return true; |
3662 | }; |
3663 | |
3664 | // Check whether the next block is a "taken" target and try to swap it |
3665 | // with a "fall-through" target. |
3666 | if (TSuccessor == NextBB && swapSuccessors()) |
3667 | continue; |
3668 | |
3669 | // Update conditional branch destination if needed. |
3670 | if (MIB->getTargetSymbol(Inst: *CondBranch) != TSuccessor->getLabel()) { |
3671 | auto L = BC.scopeLock(); |
3672 | MIB->replaceBranchTarget(Inst&: *CondBranch, TBB: TSuccessor->getLabel(), Ctx); |
3673 | } |
3674 | |
3675 | // No need for the unconditional branch. |
3676 | if (FSuccessor == NextBB) |
3677 | continue; |
3678 | |
3679 | if (BC.isX86()) { |
3680 | // We are going to generate two branches. Check if their targets are in |
3681 | // the same fragment as this block. If only one target is in the same |
3682 | // fragment, make it the destination of the conditional branch. There |
3683 | // is a chance it will be a short branch which takes 4 bytes fewer than |
3684 | // a long conditional branch. For unconditional branch, the difference |
3685 | // is 3 bytes. |
3686 | if (BB->getFragmentNum() != TSuccessor->getFragmentNum() && |
3687 | BB->getFragmentNum() == FSuccessor->getFragmentNum()) |
3688 | swapSuccessors(); |
3689 | } |
3690 | |
3691 | BB->addBranchInstruction(Successor: FSuccessor); |
3692 | } |
3693 | // Cases where the number of successors is 0 (block ends with a |
3694 | // terminator) or more than 2 (switch table) don't require branch |
3695 | // instruction adjustments. |
3696 | } |
3697 | assert((!isSimple() || validateCFG()) && |
3698 | "Invalid CFG detected after fixing branches"); |
3699 | } |
3700 | |
3701 | void BinaryFunction::propagateGnuArgsSizeInfo( |
3702 | MCPlusBuilder::AllocatorIdTy AllocId) { |
3703 | assert(CurrentState == State::Disassembled && "unexpected function state"); |
3704 | |
3705 | if (!hasEHRanges() || !usesGnuArgsSize()) |
3706 | return; |
3707 | |
3708 | // The current value of DW_CFA_GNU_args_size affects all following |
3709 | // invoke instructions until the next CFI overrides it. |
3710 | // It is important to iterate basic blocks in the original order when |
3711 | // assigning the value. |
3712 | uint64_t CurrentGnuArgsSize = 0; |
3713 | for (BinaryBasicBlock *BB : BasicBlocks) { |
3714 | for (auto II = BB->begin(); II != BB->end();) { |
3715 | MCInst &Instr = *II; |
3716 | if (BC.MIB->isCFI(Inst: Instr)) { |
3717 | const MCCFIInstruction *CFI = getCFIFor(Instr); |
3718 | if (CFI->getOperation() == MCCFIInstruction::OpGnuArgsSize) { |
3719 | CurrentGnuArgsSize = CFI->getOffset(); |
3720 | // Delete DW_CFA_GNU_args_size instructions and only regenerate |
3721 | // during the final code emission. The information is embedded |
3722 | // inside call instructions. |
3723 | II = BB->erasePseudoInstruction(II); |
3724 | continue; |
3725 | } |
3726 | } else if (BC.MIB->isInvoke(Inst: Instr)) { |
3727 | // Add the value of GNU_args_size as an extra operand to invokes. |
3728 | BC.MIB->addGnuArgsSize(Inst&: Instr, GnuArgsSize: CurrentGnuArgsSize); |
3729 | } |
3730 | ++II; |
3731 | } |
3732 | } |
3733 | } |
3734 | |
3735 | void BinaryFunction::postProcessBranches() { |
3736 | if (!isSimple()) |
3737 | return; |
3738 | for (BinaryBasicBlock &BB : blocks()) { |
3739 | auto LastInstrRI = BB.getLastNonPseudo(); |
3740 | if (BB.succ_size() == 1) { |
3741 | if (LastInstrRI != BB.rend() && |
3742 | BC.MIB->isConditionalBranch(Inst: *LastInstrRI)) { |
3743 | // __builtin_unreachable() could create a conditional branch that |
3744 | // falls-through into the next function - hence the block will have only |
3745 | // one valid successor. Such behaviour is undefined and thus we remove |
3746 | // the conditional branch while leaving a valid successor. |
3747 | BB.eraseInstruction(II: std::prev(x: LastInstrRI.base())); |
3748 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: erasing conditional branch in " |
3749 | << BB.getName() << " in function "<< *this << '\n'); |
3750 | } |
3751 | } else if (BB.succ_size() == 0) { |
3752 | // Ignore unreachable basic blocks. |
3753 | if (BB.pred_size() == 0 || BB.isLandingPad()) |
3754 | continue; |
3755 | |
3756 | // If it's the basic block that does not end up with a terminator - we |
3757 | // insert a return instruction unless it's a call instruction. |
3758 | if (LastInstrRI == BB.rend()) { |
3759 | LLVM_DEBUG( |
3760 | dbgs() << "BOLT-DEBUG: at least one instruction expected in BB " |
3761 | << BB.getName() << " in function "<< *this << '\n'); |
3762 | continue; |
3763 | } |
3764 | if (!BC.MIB->isTerminator(Inst: *LastInstrRI) && |
3765 | !BC.MIB->isCall(Inst: *LastInstrRI)) { |
3766 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: adding return to basic block " |
3767 | << BB.getName() << " in function "<< *this << '\n'); |
3768 | MCInst ReturnInstr; |
3769 | BC.MIB->createReturn(Inst&: ReturnInstr); |
3770 | BB.addInstruction(Inst: ReturnInstr); |
3771 | } |
3772 | } |
3773 | } |
3774 | assert(validateCFG() && "invalid CFG"); |
3775 | } |
3776 | |
3777 | MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) { |
3778 | assert(Offset && "cannot add primary entry point"); |
3779 | |
3780 | const uint64_t EntryPointAddress = getAddress() + Offset; |
3781 | MCSymbol *LocalSymbol = getOrCreateLocalLabel(Address: EntryPointAddress); |
3782 | |
3783 | MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BBLabel: LocalSymbol); |
3784 | if (EntrySymbol) |
3785 | return EntrySymbol; |
3786 | |
3787 | assert(CurrentState == State::Empty || CurrentState == State::Disassembled); |
3788 | |
3789 | if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(Address: EntryPointAddress)) { |
3790 | EntrySymbol = EntryBD->getSymbol(); |
3791 | } else { |
3792 | EntrySymbol = BC.getOrCreateGlobalSymbol( |
3793 | Address: EntryPointAddress, Prefix: Twine("__ENTRY_") + getOneName() + "@"); |
3794 | } |
3795 | SecondaryEntryPoints[LocalSymbol] = EntrySymbol; |
3796 | |
3797 | BC.setSymbolToFunctionMap(Sym: EntrySymbol, BF: this); |
3798 | |
3799 | return EntrySymbol; |
3800 | } |
3801 | |
3802 | MCSymbol *BinaryFunction::addEntryPoint(const BinaryBasicBlock &BB) { |
3803 | assert(CurrentState == State::CFG && |
3804 | "basic block can be added as an entry only in a function with CFG"); |
3805 | |
3806 | if (&BB == BasicBlocks.front()) |
3807 | return getSymbol(); |
3808 | |
3809 | MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB); |
3810 | if (EntrySymbol) |
3811 | return EntrySymbol; |
3812 | |
3813 | EntrySymbol = |
3814 | BC.Ctx->getOrCreateSymbol(Name: "__ENTRY_"+ BB.getLabel()->getName()); |
3815 | |
3816 | SecondaryEntryPoints[BB.getLabel()] = EntrySymbol; |
3817 | |
3818 | BC.setSymbolToFunctionMap(Sym: EntrySymbol, BF: this); |
3819 | |
3820 | return EntrySymbol; |
3821 | } |
3822 | |
3823 | MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) { |
3824 | if (EntryID == 0) |
3825 | return getSymbol(); |
3826 | |
3827 | if (!isMultiEntry()) |
3828 | return nullptr; |
3829 | |
3830 | uint64_t NumEntries = 1; |
3831 | if (hasCFG()) { |
3832 | for (BinaryBasicBlock *BB : BasicBlocks) { |
3833 | MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB: *BB); |
3834 | if (!EntrySymbol) |
3835 | continue; |
3836 | if (NumEntries == EntryID) |
3837 | return EntrySymbol; |
3838 | ++NumEntries; |
3839 | } |
3840 | } else { |
3841 | for (std::pair<const uint32_t, MCSymbol *> &KV : Labels) { |
3842 | MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BBLabel: KV.second); |
3843 | if (!EntrySymbol) |
3844 | continue; |
3845 | if (NumEntries == EntryID) |
3846 | return EntrySymbol; |
3847 | ++NumEntries; |
3848 | } |
3849 | } |
3850 | |
3851 | return nullptr; |
3852 | } |
3853 | |
3854 | uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const { |
3855 | if (!isMultiEntry()) |
3856 | return 0; |
3857 | |
3858 | for (const MCSymbol *FunctionSymbol : getSymbols()) |
3859 | if (FunctionSymbol == Symbol) |
3860 | return 0; |
3861 | |
3862 | // Check all secondary entries available as either basic blocks or lables. |
3863 | uint64_t NumEntries = 1; |
3864 | for (const BinaryBasicBlock *BB : BasicBlocks) { |
3865 | MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BB: *BB); |
3866 | if (!EntrySymbol) |
3867 | continue; |
3868 | if (EntrySymbol == Symbol) |
3869 | return NumEntries; |
3870 | ++NumEntries; |
3871 | } |
3872 | NumEntries = 1; |
3873 | for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) { |
3874 | MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BBLabel: KV.second); |
3875 | if (!EntrySymbol) |
3876 | continue; |
3877 | if (EntrySymbol == Symbol) |
3878 | return NumEntries; |
3879 | ++NumEntries; |
3880 | } |
3881 | |
3882 | llvm_unreachable("symbol not found"); |
3883 | } |
3884 | |
3885 | bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const { |
3886 | bool Status = Callback(0, getSymbol()); |
3887 | if (!isMultiEntry()) |
3888 | return Status; |
3889 | |
3890 | for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) { |
3891 | if (!Status) |
3892 | break; |
3893 | |
3894 | MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(BBLabel: KV.second); |
3895 | if (!EntrySymbol) |
3896 | continue; |
3897 | |
3898 | Status = Callback(KV.first, EntrySymbol); |
3899 | } |
3900 | |
3901 | return Status; |
3902 | } |
3903 | |
3904 | BinaryFunction::BasicBlockListType BinaryFunction::dfs() const { |
3905 | BasicBlockListType DFS; |
3906 | std::stack<BinaryBasicBlock *> Stack; |
3907 | std::set<BinaryBasicBlock *> Visited; |
3908 | |
3909 | // Push entry points to the stack in reverse order. |
3910 | // |
3911 | // NB: we rely on the original order of entries to match. |
3912 | SmallVector<BinaryBasicBlock *> EntryPoints; |
3913 | llvm::copy_if(Range: BasicBlocks, Out: std::back_inserter(x&: EntryPoints), |
3914 | P: [&](const BinaryBasicBlock *const BB) { return isEntryPoint(BB: *BB); }); |
3915 | // Sort entry points by their offset to make sure we got them in the right |
3916 | // order. |
3917 | llvm::stable_sort(Range&: EntryPoints, C: [](const BinaryBasicBlock *const A, |
3918 | const BinaryBasicBlock *const B) { |
3919 | return A->getOffset() < B->getOffset(); |
3920 | }); |
3921 | for (BinaryBasicBlock *const BB : reverse(C&: EntryPoints)) |
3922 | Stack.push(x: BB); |
3923 | |
3924 | while (!Stack.empty()) { |
3925 | BinaryBasicBlock *BB = Stack.top(); |
3926 | Stack.pop(); |
3927 | |
3928 | if (!Visited.insert(x: BB).second) |
3929 | continue; |
3930 | DFS.push_back(Elt: BB); |
3931 | |
3932 | for (BinaryBasicBlock *SuccBB : BB->landing_pads()) { |
3933 | Stack.push(x: SuccBB); |
3934 | } |
3935 | |
3936 | const MCSymbol *TBB = nullptr; |
3937 | const MCSymbol *FBB = nullptr; |
3938 | MCInst *CondBranch = nullptr; |
3939 | MCInst *UncondBranch = nullptr; |
3940 | if (BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch) && CondBranch && |
3941 | BB->succ_size() == 2) { |
3942 | if (BC.MIB->getCanonicalBranchCondCode(CC: BC.MIB->getCondCode( |
3943 | Inst: *CondBranch)) == BC.MIB->getCondCode(Inst: *CondBranch)) { |
3944 | Stack.push(x: BB->getConditionalSuccessor(Condition: true)); |
3945 | Stack.push(x: BB->getConditionalSuccessor(Condition: false)); |
3946 | } else { |
3947 | Stack.push(x: BB->getConditionalSuccessor(Condition: false)); |
3948 | Stack.push(x: BB->getConditionalSuccessor(Condition: true)); |
3949 | } |
3950 | } else { |
3951 | for (BinaryBasicBlock *SuccBB : BB->successors()) { |
3952 | Stack.push(x: SuccBB); |
3953 | } |
3954 | } |
3955 | } |
3956 | |
3957 | return DFS; |
3958 | } |
3959 | |
3960 | size_t BinaryFunction::computeHash(bool UseDFS, HashFunction HashFunction, |
3961 | OperandHashFuncTy OperandHashFunc) const { |
3962 | LLVM_DEBUG({ |
3963 | dbgs() << "BOLT-DEBUG: computeHash "<< getPrintName() << ' ' |
3964 | << (UseDFS ? "dfs": "bin") << " order " |
3965 | << (HashFunction == HashFunction::StdHash ? "std::hash": "xxh3") |
3966 | << '\n'; |
3967 | }); |
3968 | |
3969 | if (size() == 0) |
3970 | return 0; |
3971 | |
3972 | assert(hasCFG() && "function is expected to have CFG"); |
3973 | |
3974 | SmallVector<const BinaryBasicBlock *, 0> Order; |
3975 | if (UseDFS) |
3976 | llvm::copy(Range: dfs(), Out: std::back_inserter(x&: Order)); |
3977 | else |
3978 | llvm::copy(Range: Layout.blocks(), Out: std::back_inserter(x&: Order)); |
3979 | |
3980 | // The hash is computed by creating a string of all instruction opcodes and |
3981 | // possibly their operands and then hashing that string with std::hash. |
3982 | std::string HashString; |
3983 | for (const BinaryBasicBlock *BB : Order) |
3984 | HashString.append(str: hashBlock(BC, BB: *BB, OperandHashFunc)); |
3985 | |
3986 | switch (HashFunction) { |
3987 | case HashFunction::StdHash: |
3988 | return Hash = std::hash<std::string>{}(HashString); |
3989 | case HashFunction::XXH3: |
3990 | return Hash = llvm::xxh3_64bits(data: HashString); |
3991 | } |
3992 | llvm_unreachable("Unhandled HashFunction"); |
3993 | } |
3994 | |
3995 | void BinaryFunction::insertBasicBlocks( |
3996 | BinaryBasicBlock *Start, |
3997 | std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs, |
3998 | const bool UpdateLayout, const bool UpdateCFIState, |
3999 | const bool RecomputeLandingPads) { |
4000 | const int64_t StartIndex = Start ? getIndex(BB: Start) : -1LL; |
4001 | const size_t NumNewBlocks = NewBBs.size(); |
4002 | |
4003 | BasicBlocks.insert(I: BasicBlocks.begin() + (StartIndex + 1), NumToInsert: NumNewBlocks, |
4004 | Elt: nullptr); |
4005 | |
4006 | int64_t I = StartIndex + 1; |
4007 | for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) { |
4008 | assert(!BasicBlocks[I]); |
4009 | BasicBlocks[I++] = BB.release(); |
4010 | } |
4011 | |
4012 | if (RecomputeLandingPads) |
4013 | recomputeLandingPads(); |
4014 | else |
4015 | updateBBIndices(StartIndex: 0); |
4016 | |
4017 | if (UpdateLayout) |
4018 | updateLayout(Start, NumNewBlocks); |
4019 | |
4020 | if (UpdateCFIState) |
4021 | updateCFIState(Start, NumNewBlocks); |
4022 | } |
4023 | |
4024 | BinaryFunction::iterator BinaryFunction::insertBasicBlocks( |
4025 | BinaryFunction::iterator StartBB, |
4026 | std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs, |
4027 | const bool UpdateLayout, const bool UpdateCFIState, |
4028 | const bool RecomputeLandingPads) { |
4029 | const unsigned StartIndex = getIndex(BB: &*StartBB); |
4030 | const size_t NumNewBlocks = NewBBs.size(); |
4031 | |
4032 | BasicBlocks.insert(I: BasicBlocks.begin() + StartIndex + 1, NumToInsert: NumNewBlocks, |
4033 | Elt: nullptr); |
4034 | auto RetIter = BasicBlocks.begin() + StartIndex + 1; |
4035 | |
4036 | unsigned I = StartIndex + 1; |
4037 | for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) { |
4038 | assert(!BasicBlocks[I]); |
4039 | BasicBlocks[I++] = BB.release(); |
4040 | } |
4041 | |
4042 | if (RecomputeLandingPads) |
4043 | recomputeLandingPads(); |
4044 | else |
4045 | updateBBIndices(StartIndex: 0); |
4046 | |
4047 | if (UpdateLayout) |
4048 | updateLayout(Start: *std::prev(x: RetIter), NumNewBlocks); |
4049 | |
4050 | if (UpdateCFIState) |
4051 | updateCFIState(Start: *std::prev(x: RetIter), NumNewBlocks); |
4052 | |
4053 | return RetIter; |
4054 | } |
4055 | |
4056 | void BinaryFunction::updateBBIndices(const unsigned StartIndex) { |
4057 | for (unsigned I = StartIndex; I < BasicBlocks.size(); ++I) |
4058 | BasicBlocks[I]->Index = I; |
4059 | } |
4060 | |
4061 | void BinaryFunction::updateCFIState(BinaryBasicBlock *Start, |
4062 | const unsigned NumNewBlocks) { |
4063 | const int32_t CFIState = Start->getCFIStateAtExit(); |
4064 | const unsigned StartIndex = getIndex(BB: Start) + 1; |
4065 | for (unsigned I = 0; I < NumNewBlocks; ++I) |
4066 | BasicBlocks[StartIndex + I]->setCFIState(CFIState); |
4067 | } |
4068 | |
4069 | void BinaryFunction::updateLayout(BinaryBasicBlock *Start, |
4070 | const unsigned NumNewBlocks) { |
4071 | BasicBlockListType::iterator Begin; |
4072 | BasicBlockListType::iterator End; |
4073 | |
4074 | // If start not provided copy new blocks from the beginning of BasicBlocks |
4075 | if (!Start) { |
4076 | Begin = BasicBlocks.begin(); |
4077 | End = BasicBlocks.begin() + NumNewBlocks; |
4078 | } else { |
4079 | unsigned StartIndex = getIndex(BB: Start); |
4080 | Begin = std::next(x: BasicBlocks.begin(), n: StartIndex + 1); |
4081 | End = std::next(x: BasicBlocks.begin(), n: StartIndex + NumNewBlocks + 1); |
4082 | } |
4083 | |
4084 | // Insert new blocks in the layout immediately after Start. |
4085 | Layout.insertBasicBlocks(InsertAfter: Start, NewBlocks: {Begin, End}); |
4086 | Layout.updateLayoutIndices(); |
4087 | } |
4088 | |
4089 | bool BinaryFunction::checkForAmbiguousJumpTables() { |
4090 | SmallSet<uint64_t, 4> JumpTables; |
4091 | for (BinaryBasicBlock *&BB : BasicBlocks) { |
4092 | for (MCInst &Inst : *BB) { |
4093 | if (!BC.MIB->isIndirectBranch(Inst)) |
4094 | continue; |
4095 | uint64_t JTAddress = BC.MIB->getJumpTable(Inst); |
4096 | if (!JTAddress) |
4097 | continue; |
4098 | // This address can be inside another jump table, but we only consider |
4099 | // it ambiguous when the same start address is used, not the same JT |
4100 | // object. |
4101 | if (!JumpTables.count(V: JTAddress)) { |
4102 | JumpTables.insert(V: JTAddress); |
4103 | continue; |
4104 | } |
4105 | return true; |
4106 | } |
4107 | } |
4108 | return false; |
4109 | } |
4110 | |
4111 | void BinaryFunction::disambiguateJumpTables( |
4112 | MCPlusBuilder::AllocatorIdTy AllocId) { |
4113 | assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations); |
4114 | SmallPtrSet<JumpTable *, 4> JumpTables; |
4115 | for (BinaryBasicBlock *&BB : BasicBlocks) { |
4116 | for (MCInst &Inst : *BB) { |
4117 | if (!BC.MIB->isIndirectBranch(Inst)) |
4118 | continue; |
4119 | JumpTable *JT = getJumpTable(Inst); |
4120 | if (!JT) |
4121 | continue; |
4122 | if (JumpTables.insert(Ptr: JT).second) |
4123 | continue; |
4124 | // This instruction is an indirect jump using a jump table, but it is |
4125 | // using the same jump table of another jump. Try all our tricks to |
4126 | // extract the jump table symbol and make it point to a new, duplicated JT |
4127 | MCPhysReg BaseReg1; |
4128 | uint64_t Scale; |
4129 | const MCSymbol *Target; |
4130 | // In case we match if our first matcher, first instruction is the one to |
4131 | // patch |
4132 | MCInst *JTLoadInst = &Inst; |
4133 | // Try a standard indirect jump matcher, scale 8 |
4134 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher = |
4135 | BC.MIB->matchIndJmp(Base: BC.MIB->matchReg(Reg&: BaseReg1), |
4136 | Scale: BC.MIB->matchImm(Imm&: Scale), Index: BC.MIB->matchReg(), |
4137 | /*Offset=*/BC.MIB->matchSymbol(Sym&: Target)); |
4138 | if (!IndJmpMatcher->match( |
4139 | MRI: *BC.MRI, MIA&: *BC.MIB, |
4140 | InInstrWindow: MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), OpNum: -1) || |
4141 | BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) { |
4142 | MCPhysReg BaseReg2; |
4143 | uint64_t Offset; |
4144 | // Standard JT matching failed. Trying now: |
4145 | // movq "jt.2397/1"(,%rax,8), %rax |
4146 | // jmpq *%rax |
4147 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> LoadMatcherOwner = |
4148 | BC.MIB->matchLoad(Base: BC.MIB->matchReg(Reg&: BaseReg1), |
4149 | Scale: BC.MIB->matchImm(Imm&: Scale), Index: BC.MIB->matchReg(), |
4150 | /*Offset=*/BC.MIB->matchSymbol(Sym&: Target)); |
4151 | MCPlusBuilder::MCInstMatcher *LoadMatcher = LoadMatcherOwner.get(); |
4152 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> IndJmpMatcher2 = |
4153 | BC.MIB->matchIndJmp(Target: std::move(LoadMatcherOwner)); |
4154 | if (!IndJmpMatcher2->match( |
4155 | MRI: *BC.MRI, MIA&: *BC.MIB, |
4156 | InInstrWindow: MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), OpNum: -1) || |
4157 | BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) { |
4158 | // JT matching failed. Trying now: |
4159 | // PIC-style matcher, scale 4 |
4160 | // addq %rdx, %rsi |
4161 | // addq %rdx, %rdi |
4162 | // leaq DATAat0x402450(%rip), %r11 |
4163 | // movslq (%r11,%rdx,4), %rcx |
4164 | // addq %r11, %rcx |
4165 | // jmpq *%rcx # JUMPTABLE @0x402450 |
4166 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICIndJmpMatcher = |
4167 | BC.MIB->matchIndJmp(Target: BC.MIB->matchAdd( |
4168 | A: BC.MIB->matchReg(Reg&: BaseReg1), |
4169 | B: BC.MIB->matchLoad(Base: BC.MIB->matchReg(Reg&: BaseReg2), |
4170 | Scale: BC.MIB->matchImm(Imm&: Scale), Index: BC.MIB->matchReg(), |
4171 | Offset: BC.MIB->matchImm(Imm&: Offset)))); |
4172 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> LEAMatcherOwner = |
4173 | BC.MIB->matchLoadAddr(Target: BC.MIB->matchSymbol(Sym&: Target)); |
4174 | MCPlusBuilder::MCInstMatcher *LEAMatcher = LEAMatcherOwner.get(); |
4175 | std::unique_ptr<MCPlusBuilder::MCInstMatcher> PICBaseAddrMatcher = |
4176 | BC.MIB->matchIndJmp(Target: BC.MIB->matchAdd(A: std::move(LEAMatcherOwner), |
4177 | B: BC.MIB->matchAnyOperand())); |
4178 | if (!PICIndJmpMatcher->match( |
4179 | MRI: *BC.MRI, MIA&: *BC.MIB, |
4180 | InInstrWindow: MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), OpNum: -1) || |
4181 | Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 || |
4182 | !PICBaseAddrMatcher->match( |
4183 | MRI: *BC.MRI, MIA&: *BC.MIB, |
4184 | InInstrWindow: MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), OpNum: -1)) { |
4185 | llvm_unreachable("Failed to extract jump table base"); |
4186 | continue; |
4187 | } |
4188 | // Matched PIC, identify the instruction with the reference to the JT |
4189 | JTLoadInst = LEAMatcher->CurInst; |
4190 | } else { |
4191 | // Matched non-PIC |
4192 | JTLoadInst = LoadMatcher->CurInst; |
4193 | } |
4194 | } |
4195 | |
4196 | uint64_t NewJumpTableID = 0; |
4197 | const MCSymbol *NewJTLabel; |
4198 | std::tie(args&: NewJumpTableID, args&: NewJTLabel) = |
4199 | BC.duplicateJumpTable(Function&: *this, JT, OldLabel: Target); |
4200 | { |
4201 | auto L = BC.scopeLock(); |
4202 | BC.MIB->replaceMemOperandDisp(Inst&: *JTLoadInst, Label: NewJTLabel, Ctx: BC.Ctx.get()); |
4203 | } |
4204 | // We use a unique ID with the high bit set as address for this "injected" |
4205 | // jump table (not originally in the input binary). |
4206 | BC.MIB->setJumpTable(Inst, Value: NewJumpTableID, IndexReg: 0, AllocId); |
4207 | } |
4208 | } |
4209 | } |
4210 | |
4211 | bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB, |
4212 | BinaryBasicBlock *OldDest, |
4213 | BinaryBasicBlock *NewDest) { |
4214 | MCInst *Instr = BB->getLastNonPseudoInstr(); |
4215 | if (!Instr || !BC.MIB->isIndirectBranch(Inst: *Instr)) |
4216 | return false; |
4217 | uint64_t JTAddress = BC.MIB->getJumpTable(Inst: *Instr); |
4218 | assert(JTAddress && "Invalid jump table address"); |
4219 | JumpTable *JT = getJumpTableContainingAddress(Address: JTAddress); |
4220 | assert(JT && "No jump table structure for this indirect branch"); |
4221 | bool Patched = JT->replaceDestination(JTAddress, OldDest: OldDest->getLabel(), |
4222 | NewDest: NewDest->getLabel()); |
4223 | (void)Patched; |
4224 | assert(Patched && "Invalid entry to be replaced in jump table"); |
4225 | return true; |
4226 | } |
4227 | |
4228 | BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From, |
4229 | BinaryBasicBlock *To) { |
4230 | // Create intermediate BB |
4231 | MCSymbol *Tmp; |
4232 | { |
4233 | auto L = BC.scopeLock(); |
4234 | Tmp = BC.Ctx->createNamedTempSymbol(Name: "SplitEdge"); |
4235 | } |
4236 | // Link new BBs to the original input offset of the From BB, so we can map |
4237 | // samples recorded in new BBs back to the original BB seem in the input |
4238 | // binary (if using BAT) |
4239 | std::unique_ptr<BinaryBasicBlock> NewBB = createBasicBlock(Label: Tmp); |
4240 | NewBB->setOffset(From->getInputOffset()); |
4241 | BinaryBasicBlock *NewBBPtr = NewBB.get(); |
4242 | |
4243 | // Update "From" BB |
4244 | auto I = From->succ_begin(); |
4245 | auto BI = From->branch_info_begin(); |
4246 | for (; I != From->succ_end(); ++I) { |
4247 | if (*I == To) |
4248 | break; |
4249 | ++BI; |
4250 | } |
4251 | assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!"); |
4252 | uint64_t OrigCount = BI->Count; |
4253 | uint64_t OrigMispreds = BI->MispredictedCount; |
4254 | replaceJumpTableEntryIn(BB: From, OldDest: To, NewDest: NewBBPtr); |
4255 | From->replaceSuccessor(Succ: To, NewSucc: NewBBPtr, Count: OrigCount, MispredictedCount: OrigMispreds); |
4256 | |
4257 | NewBB->addSuccessor(Succ: To, Count: OrigCount, MispredictedCount: OrigMispreds); |
4258 | NewBB->setExecutionCount(OrigCount); |
4259 | NewBB->setIsCold(From->isCold()); |
4260 | |
4261 | // Update CFI and BB layout with new intermediate BB |
4262 | std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs; |
4263 | NewBBs.emplace_back(args: std::move(NewBB)); |
4264 | insertBasicBlocks(Start: From, NewBBs: std::move(NewBBs), UpdateLayout: true, UpdateCFIState: true, |
4265 | /*RecomputeLandingPads=*/false); |
4266 | return NewBBPtr; |
4267 | } |
4268 | |
4269 | void BinaryFunction::deleteConservativeEdges() { |
4270 | // Our goal is to aggressively remove edges from the CFG that we believe are |
4271 | // wrong. This is used for instrumentation, where it is safe to remove |
4272 | // fallthrough edges because we won't reorder blocks. |
4273 | for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) { |
4274 | BinaryBasicBlock *BB = *I; |
4275 | if (BB->succ_size() != 1 || BB->size() == 0) |
4276 | continue; |
4277 | |
4278 | auto NextBB = std::next(x: I); |
4279 | MCInst *Last = BB->getLastNonPseudoInstr(); |
4280 | // Fallthrough is a landing pad? Delete this edge (as long as we don't |
4281 | // have a direct jump to it) |
4282 | if ((*BB->succ_begin())->isLandingPad() && NextBB != E && |
4283 | *BB->succ_begin() == *NextBB && Last && !BC.MIB->isBranch(Inst: *Last)) { |
4284 | BB->removeAllSuccessors(); |
4285 | continue; |
4286 | } |
4287 | |
4288 | // Look for suspicious calls at the end of BB where gcc may optimize it and |
4289 | // remove the jump to the epilogue when it knows the call won't return. |
4290 | if (!Last || !BC.MIB->isCall(Inst: *Last)) |
4291 | continue; |
4292 | |
4293 | const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Inst: *Last); |
4294 | if (!CalleeSymbol) |
4295 | continue; |
4296 | |
4297 | StringRef CalleeName = CalleeSymbol->getName(); |
4298 | if (CalleeName != "__cxa_throw@PLT"&& CalleeName != "_Unwind_Resume@PLT"&& |
4299 | CalleeName != "__cxa_rethrow@PLT"&& CalleeName != "exit@PLT"&& |
4300 | CalleeName != "abort@PLT") |
4301 | continue; |
4302 | |
4303 | BB->removeAllSuccessors(); |
4304 | } |
4305 | } |
4306 | |
4307 | bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol, |
4308 | uint64_t SymbolSize) const { |
4309 | // If this symbol is in a different section from the one where the |
4310 | // function symbol is, don't consider it as valid. |
4311 | if (!getOriginSection()->containsAddress( |
4312 | Address: cantFail(ValOrErr: Symbol.getAddress(), Msg: "cannot get symbol address"))) |
4313 | return false; |
4314 | |
4315 | // Some symbols are tolerated inside function bodies, others are not. |
4316 | // The real function boundaries may not be known at this point. |
4317 | if (BC.isMarker(Symbol)) |
4318 | return true; |
4319 | |
4320 | // It's okay to have a zero-sized symbol in the middle of non-zero-sized |
4321 | // function. |
4322 | if (SymbolSize == 0 && containsAddress(PC: cantFail(ValOrErr: Symbol.getAddress()))) |
4323 | return true; |
4324 | |
4325 | if (cantFail(ValOrErr: Symbol.getType()) != SymbolRef::ST_Unknown) |
4326 | return false; |
4327 | |
4328 | if (cantFail(ValOrErr: Symbol.getFlags()) & SymbolRef::SF_Global) |
4329 | return false; |
4330 | |
4331 | return true; |
4332 | } |
4333 | |
4334 | void BinaryFunction::adjustExecutionCount(uint64_t Count) { |
4335 | if (getKnownExecutionCount() == 0 || Count == 0) |
4336 | return; |
4337 | |
4338 | if (ExecutionCount < Count) |
4339 | Count = ExecutionCount; |
4340 | |
4341 | double AdjustmentRatio = ((double)ExecutionCount - Count) / ExecutionCount; |
4342 | if (AdjustmentRatio < 0.0) |
4343 | AdjustmentRatio = 0.0; |
4344 | |
4345 | for (BinaryBasicBlock &BB : blocks()) |
4346 | BB.adjustExecutionCount(Ratio: AdjustmentRatio); |
4347 | |
4348 | ExecutionCount -= Count; |
4349 | } |
4350 | |
4351 | BinaryFunction::~BinaryFunction() { |
4352 | for (BinaryBasicBlock *BB : BasicBlocks) |
4353 | delete BB; |
4354 | for (BinaryBasicBlock *BB : DeletedBasicBlocks) |
4355 | delete BB; |
4356 | } |
4357 | |
4358 | void BinaryFunction::constructDomTree() { |
4359 | BDT.reset(p: new BinaryDominatorTree); |
4360 | BDT->recalculate(Func&: *this); |
4361 | } |
4362 | |
4363 | void BinaryFunction::calculateLoopInfo() { |
4364 | if (!hasDomTree()) |
4365 | constructDomTree(); |
4366 | // Discover loops. |
4367 | BLI.reset(p: new BinaryLoopInfo()); |
4368 | BLI->analyze(DomTree: getDomTree()); |
4369 | |
4370 | // Traverse discovered loops and add depth and profile information. |
4371 | std::stack<BinaryLoop *> St; |
4372 | for (auto I = BLI->begin(), E = BLI->end(); I != E; ++I) { |
4373 | St.push(x: *I); |
4374 | ++BLI->OuterLoops; |
4375 | } |
4376 | |
4377 | while (!St.empty()) { |
4378 | BinaryLoop *L = St.top(); |
4379 | St.pop(); |
4380 | ++BLI->TotalLoops; |
4381 | BLI->MaximumDepth = std::max(a: L->getLoopDepth(), b: BLI->MaximumDepth); |
4382 | |
4383 | // Add nested loops in the stack. |
4384 | for (BinaryLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) |
4385 | St.push(x: *I); |
4386 | |
4387 | // Skip if no valid profile is found. |
4388 | if (!hasValidProfile()) { |
4389 | L->EntryCount = COUNT_NO_PROFILE; |
4390 | L->ExitCount = COUNT_NO_PROFILE; |
4391 | L->TotalBackEdgeCount = COUNT_NO_PROFILE; |
4392 | continue; |
4393 | } |
4394 | |
4395 | // Compute back edge count. |
4396 | SmallVector<BinaryBasicBlock *, 1> Latches; |
4397 | L->getLoopLatches(LoopLatches&: Latches); |
4398 | |
4399 | for (BinaryBasicBlock *Latch : Latches) { |
4400 | auto BI = Latch->branch_info_begin(); |
4401 | for (BinaryBasicBlock *Succ : Latch->successors()) { |
4402 | if (Succ == L->getHeader()) { |
4403 | assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && |
4404 | "profile data not found"); |
4405 | L->TotalBackEdgeCount += BI->Count; |
4406 | } |
4407 | ++BI; |
4408 | } |
4409 | } |
4410 | |
4411 | // Compute entry count. |
4412 | L->EntryCount = L->getHeader()->getExecutionCount() - L->TotalBackEdgeCount; |
4413 | |
4414 | // Compute exit count. |
4415 | SmallVector<BinaryLoop::Edge, 1> ExitEdges; |
4416 | L->getExitEdges(ExitEdges); |
4417 | for (BinaryLoop::Edge &Exit : ExitEdges) { |
4418 | const BinaryBasicBlock *Exiting = Exit.first; |
4419 | const BinaryBasicBlock *ExitTarget = Exit.second; |
4420 | auto BI = Exiting->branch_info_begin(); |
4421 | for (BinaryBasicBlock *Succ : Exiting->successors()) { |
4422 | if (Succ == ExitTarget) { |
4423 | assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && |
4424 | "profile data not found"); |
4425 | L->ExitCount += BI->Count; |
4426 | } |
4427 | ++BI; |
4428 | } |
4429 | } |
4430 | } |
4431 | } |
4432 | |
4433 | void BinaryFunction::updateOutputValues(const BOLTLinker &Linker) { |
4434 | if (!isEmitted()) { |
4435 | assert(!isInjected() && "injected function should be emitted"); |
4436 | setOutputAddress(getAddress()); |
4437 | setOutputSize(getSize()); |
4438 | return; |
4439 | } |
4440 | |
4441 | const auto SymbolInfo = Linker.lookupSymbolInfo(Name: getSymbol()->getName()); |
4442 | assert(SymbolInfo && "Cannot find function entry symbol"); |
4443 | setOutputAddress(SymbolInfo->Address); |
4444 | setOutputSize(SymbolInfo->Size); |
4445 | |
4446 | if (BC.HasRelocations || isInjected()) { |
4447 | if (hasConstantIsland()) { |
4448 | const auto IslandLabelSymInfo = |
4449 | Linker.lookupSymbolInfo(Name: getFunctionConstantIslandLabel()->getName()); |
4450 | assert(IslandLabelSymInfo && "Cannot find function CI symbol"); |
4451 | setOutputDataAddress(IslandLabelSymInfo->Address); |
4452 | for (auto It : Islands->Offsets) { |
4453 | const uint64_t OldOffset = It.first; |
4454 | BinaryData *BD = BC.getBinaryDataAtAddress(Address: getAddress() + OldOffset); |
4455 | if (!BD) |
4456 | continue; |
4457 | |
4458 | MCSymbol *Symbol = It.second; |
4459 | const auto SymInfo = Linker.lookupSymbolInfo(Name: Symbol->getName()); |
4460 | assert(SymInfo && "Cannot find CI symbol"); |
4461 | auto &Section = *getCodeSection(); |
4462 | const auto NewOffset = SymInfo->Address - Section.getOutputAddress(); |
4463 | BD->setOutputLocation(NewSection&: Section, NewOffset); |
4464 | } |
4465 | } |
4466 | if (isSplit()) { |
4467 | for (FunctionFragment &FF : getLayout().getSplitFragments()) { |
4468 | ErrorOr<BinarySection &> ColdSection = |
4469 | getCodeSection(Fragment: FF.getFragmentNum()); |
4470 | // If fragment is empty, cold section might not exist |
4471 | if (FF.empty() && ColdSection.getError()) |
4472 | continue; |
4473 | |
4474 | const MCSymbol *ColdStartSymbol = getSymbol(Fragment: FF.getFragmentNum()); |
4475 | // If fragment is empty, symbol might have not been emitted |
4476 | if (FF.empty() && (!ColdStartSymbol || !ColdStartSymbol->isDefined()) && |
4477 | !hasConstantIsland()) |
4478 | continue; |
4479 | assert(ColdStartSymbol && ColdStartSymbol->isDefined() && |
4480 | "split function should have defined cold symbol"); |
4481 | const auto ColdStartSymbolInfo = |
4482 | Linker.lookupSymbolInfo(Name: ColdStartSymbol->getName()); |
4483 | assert(ColdStartSymbolInfo && "Cannot find cold start symbol"); |
4484 | FF.setAddress(ColdStartSymbolInfo->Address); |
4485 | FF.setImageSize(ColdStartSymbolInfo->Size); |
4486 | if (hasConstantIsland()) { |
4487 | const auto SymInfo = Linker.lookupSymbolInfo( |
4488 | Name: getFunctionColdConstantIslandLabel()->getName()); |
4489 | assert(SymInfo && "Cannot find cold CI symbol"); |
4490 | setOutputColdDataAddress(SymInfo->Address); |
4491 | } |
4492 | } |
4493 | } |
4494 | } |
4495 | |
4496 | // Update basic block output ranges for the debug info, if we have |
4497 | // secondary entry points in the symbol table to update or if writing BAT. |
4498 | if (!requiresAddressMap()) |
4499 | return; |
4500 | |
4501 | // AArch64 may have functions that only contains a constant island (no code). |
4502 | if (getLayout().block_empty()) |
4503 | return; |
4504 | |
4505 | for (FunctionFragment &FF : getLayout().fragments()) { |
4506 | if (FF.empty()) |
4507 | continue; |
4508 | |
4509 | const uint64_t FragmentBaseAddress = |
4510 | getCodeSection(Fragment: isSimple() ? FF.getFragmentNum() : FragmentNum::main()) |
4511 | ->getOutputAddress(); |
4512 | |
4513 | BinaryBasicBlock *PrevBB = nullptr; |
4514 | for (BinaryBasicBlock *const BB : FF) { |
4515 | assert(BB->getLabel()->isDefined() && "symbol should be defined"); |
4516 | if (!BC.HasRelocations) { |
4517 | if (BB->isSplit()) |
4518 | assert(FragmentBaseAddress == FF.getAddress()); |
4519 | else |
4520 | assert(FragmentBaseAddress == getOutputAddress()); |
4521 | (void)FragmentBaseAddress; |
4522 | } |
4523 | |
4524 | // Injected functions likely will fail lookup, as they have no |
4525 | // input range. Just assign the BB the output address of the |
4526 | // function. |
4527 | auto MaybeBBAddress = BC.getIOAddressMap().lookup(Symbol: BB->getLabel()); |
4528 | const uint64_t BBAddress = MaybeBBAddress ? *MaybeBBAddress |
4529 | : BB->isSplit() ? FF.getAddress() |
4530 | : getOutputAddress(); |
4531 | BB->setOutputStartAddress(BBAddress); |
4532 | |
4533 | if (PrevBB) { |
4534 | assert(PrevBB->getOutputAddressRange().first <= BBAddress && |
4535 | "Bad output address for basic block."); |
4536 | assert((PrevBB->getOutputAddressRange().first != BBAddress || |
4537 | !hasInstructions() || !PrevBB->getNumNonPseudos()) && |
4538 | "Bad output address for basic block."); |
4539 | PrevBB->setOutputEndAddress(BBAddress); |
4540 | } |
4541 | PrevBB = BB; |
4542 | } |
4543 | |
4544 | PrevBB->setOutputEndAddress(PrevBB->isSplit() |
4545 | ? FF.getAddress() + FF.getImageSize() |
4546 | : getOutputAddress() + getOutputSize()); |
4547 | } |
4548 | |
4549 | // Reset output addresses for deleted blocks. |
4550 | for (BinaryBasicBlock *BB : DeletedBasicBlocks) { |
4551 | BB->setOutputStartAddress(0); |
4552 | BB->setOutputEndAddress(0); |
4553 | } |
4554 | } |
4555 | |
4556 | DebugAddressRangesVector BinaryFunction::getOutputAddressRanges() const { |
4557 | DebugAddressRangesVector OutputRanges; |
4558 | |
4559 | if (isFolded()) |
4560 | return OutputRanges; |
4561 | |
4562 | if (IsFragment) |
4563 | return OutputRanges; |
4564 | |
4565 | OutputRanges.emplace_back(Args: getOutputAddress(), |
4566 | Args: getOutputAddress() + getOutputSize()); |
4567 | if (isSplit()) { |
4568 | assert(isEmitted() && "split function should be emitted"); |
4569 | for (const FunctionFragment &FF : getLayout().getSplitFragments()) |
4570 | OutputRanges.emplace_back(Args: FF.getAddress(), |
4571 | Args: FF.getAddress() + FF.getImageSize()); |
4572 | } |
4573 | |
4574 | if (isSimple()) |
4575 | return OutputRanges; |
4576 | |
4577 | for (BinaryFunction *Frag : Fragments) { |
4578 | assert(!Frag->isSimple() && |
4579 | "fragment of non-simple function should also be non-simple"); |
4580 | OutputRanges.emplace_back(Args: Frag->getOutputAddress(), |
4581 | Args: Frag->getOutputAddress() + Frag->getOutputSize()); |
4582 | } |
4583 | |
4584 | return OutputRanges; |
4585 | } |
4586 | |
4587 | uint64_t BinaryFunction::translateInputToOutputAddress(uint64_t Address) const { |
4588 | if (isFolded()) |
4589 | return 0; |
4590 | |
4591 | // If the function hasn't changed return the same address. |
4592 | if (!isEmitted()) |
4593 | return Address; |
4594 | |
4595 | if (Address < getAddress()) |
4596 | return 0; |
4597 | |
4598 | // Check if the address is associated with an instruction that is tracked |
4599 | // by address translation. |
4600 | if (auto OutputAddress = BC.getIOAddressMap().lookup(InputAddress: Address)) |
4601 | return *OutputAddress; |
4602 | |
4603 | // FIXME: #18950828 - we rely on relative offsets inside basic blocks to stay |
4604 | // intact. Instead we can use pseudo instructions and/or annotations. |
4605 | const uint64_t Offset = Address - getAddress(); |
4606 | const BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset); |
4607 | if (!BB) { |
4608 | // Special case for address immediately past the end of the function. |
4609 | if (Offset == getSize()) |
4610 | return getOutputAddress() + getOutputSize(); |
4611 | |
4612 | return 0; |
4613 | } |
4614 | |
4615 | return std::min(a: BB->getOutputAddressRange().first + Offset - BB->getOffset(), |
4616 | b: BB->getOutputAddressRange().second); |
4617 | } |
4618 | |
4619 | DebugAddressRangesVector |
4620 | BinaryFunction::translateInputToOutputRange(DebugAddressRange InRange) const { |
4621 | DebugAddressRangesVector OutRanges; |
4622 | |
4623 | // The function was removed from the output. Return an empty range. |
4624 | if (isFolded()) |
4625 | return OutRanges; |
4626 | |
4627 | // If the function hasn't changed return the same range. |
4628 | if (!isEmitted()) { |
4629 | OutRanges.emplace_back(Args&: InRange); |
4630 | return OutRanges; |
4631 | } |
4632 | |
4633 | if (!containsAddress(PC: InRange.LowPC)) |
4634 | return OutRanges; |
4635 | |
4636 | // Special case of an empty range [X, X). Some tools expect X to be updated. |
4637 | if (InRange.LowPC == InRange.HighPC) { |
4638 | if (uint64_t NewPC = translateInputToOutputAddress(Address: InRange.LowPC)) |
4639 | OutRanges.push_back(Elt: DebugAddressRange{NewPC, NewPC}); |
4640 | return OutRanges; |
4641 | } |
4642 | |
4643 | uint64_t InputOffset = InRange.LowPC - getAddress(); |
4644 | const uint64_t InputEndOffset = |
4645 | std::min(a: InRange.HighPC - getAddress(), b: getSize()); |
4646 | |
4647 | auto BBI = llvm::upper_bound(Range: BasicBlockOffsets, |
4648 | Value: BasicBlockOffset(InputOffset, nullptr), |
4649 | C: CompareBasicBlockOffsets()); |
4650 | assert(BBI != BasicBlockOffsets.begin()); |
4651 | |
4652 | // Iterate over blocks in the input order using BasicBlockOffsets. |
4653 | for (--BBI; InputOffset < InputEndOffset && BBI != BasicBlockOffsets.end(); |
4654 | InputOffset = BBI->second->getEndOffset(), ++BBI) { |
4655 | const BinaryBasicBlock &BB = *BBI->second; |
4656 | if (InputOffset < BB.getOffset() || InputOffset >= BB.getEndOffset()) { |
4657 | LLVM_DEBUG( |
4658 | dbgs() << "BOLT-DEBUG: invalid debug address range detected for " |
4659 | << *this << " : [0x"<< Twine::utohexstr(InRange.LowPC) |
4660 | << ", 0x"<< Twine::utohexstr(InRange.HighPC) << "]\n"); |
4661 | break; |
4662 | } |
4663 | |
4664 | // Skip the block if it wasn't emitted. |
4665 | if (!BB.getOutputAddressRange().first) |
4666 | continue; |
4667 | |
4668 | // Find output address for an instruction with an offset greater or equal |
4669 | // to /p Offset. The output address should fall within the same basic |
4670 | // block boundaries. |
4671 | auto translateBlockOffset = [&](const uint64_t Offset) { |
4672 | const uint64_t OutAddress = BB.getOutputAddressRange().first + Offset; |
4673 | return std::min(a: OutAddress, b: BB.getOutputAddressRange().second); |
4674 | }; |
4675 | |
4676 | uint64_t OutLowPC = BB.getOutputAddressRange().first; |
4677 | if (InputOffset > BB.getOffset()) |
4678 | OutLowPC = translateBlockOffset(InputOffset - BB.getOffset()); |
4679 | |
4680 | uint64_t OutHighPC = BB.getOutputAddressRange().second; |
4681 | if (InputEndOffset < BB.getEndOffset()) { |
4682 | assert(InputEndOffset >= BB.getOffset()); |
4683 | OutHighPC = translateBlockOffset(InputEndOffset - BB.getOffset()); |
4684 | } |
4685 | |
4686 | // Check if we can expand the last translated range. |
4687 | if (!OutRanges.empty() && OutRanges.back().HighPC == OutLowPC) |
4688 | OutRanges.back().HighPC = std::max(a: OutRanges.back().HighPC, b: OutHighPC); |
4689 | else |
4690 | OutRanges.emplace_back(Args&: OutLowPC, Args: std::max(a: OutLowPC, b: OutHighPC)); |
4691 | } |
4692 | |
4693 | LLVM_DEBUG({ |
4694 | dbgs() << "BOLT-DEBUG: translated address range "<< InRange << " -> "; |
4695 | for (const DebugAddressRange &R : OutRanges) |
4696 | dbgs() << R << ' '; |
4697 | dbgs() << '\n'; |
4698 | }); |
4699 | |
4700 | return OutRanges; |
4701 | } |
4702 | |
4703 | MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) { |
4704 | if (CurrentState == State::Disassembled) { |
4705 | auto II = Instructions.find(x: Offset); |
4706 | return (II == Instructions.end()) ? nullptr : &II->second; |
4707 | } else if (CurrentState == State::CFG) { |
4708 | BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset); |
4709 | if (!BB) |
4710 | return nullptr; |
4711 | |
4712 | for (MCInst &Inst : *BB) { |
4713 | constexpr uint32_t InvalidOffset = std::numeric_limits<uint32_t>::max(); |
4714 | if (Offset == BC.MIB->getOffsetWithDefault(Inst, Default: InvalidOffset)) |
4715 | return &Inst; |
4716 | } |
4717 | |
4718 | if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) { |
4719 | if (std::optional<uint32_t> Size = BC.MIB->getSize(Inst: *LastInstr)) { |
4720 | if (BB->getEndOffset() - Offset == Size) { |
4721 | return LastInstr; |
4722 | } |
4723 | } |
4724 | } |
4725 | |
4726 | return nullptr; |
4727 | } else { |
4728 | llvm_unreachable("invalid CFG state to use getInstructionAtOffset()"); |
4729 | } |
4730 | } |
4731 | |
4732 | MCInst *BinaryFunction::getInstructionContainingOffset(uint64_t Offset) { |
4733 | assert(CurrentState == State::Disassembled && "Wrong function state"); |
4734 | |
4735 | if (Offset > Size) |
4736 | return nullptr; |
4737 | |
4738 | auto II = Instructions.upper_bound(x: Offset); |
4739 | assert(II != Instructions.begin() && "First instruction not at offset 0"); |
4740 | --II; |
4741 | return &II->second; |
4742 | } |
4743 | |
4744 | void BinaryFunction::printLoopInfo(raw_ostream &OS) const { |
4745 | if (!opts::shouldPrint(Function: *this)) |
4746 | return; |
4747 | |
4748 | OS << "Loop Info for Function \""<< *this << "\""; |
4749 | if (hasValidProfile()) |
4750 | OS << " (count: "<< getExecutionCount() << ")"; |
4751 | OS << "\n"; |
4752 | |
4753 | std::stack<BinaryLoop *> St; |
4754 | for (BinaryLoop *L : *BLI) |
4755 | St.push(x: L); |
4756 | while (!St.empty()) { |
4757 | BinaryLoop *L = St.top(); |
4758 | St.pop(); |
4759 | |
4760 | for (BinaryLoop *Inner : *L) |
4761 | St.push(x: Inner); |
4762 | |
4763 | if (!hasValidProfile()) |
4764 | continue; |
4765 | |
4766 | OS << (L->getLoopDepth() > 1 ? "Nested": "Outer") |
4767 | << " loop header: "<< L->getHeader()->getName(); |
4768 | OS << "\n"; |
4769 | OS << "Loop basic blocks: "; |
4770 | ListSeparator LS; |
4771 | for (BinaryBasicBlock *BB : L->blocks()) |
4772 | OS << LS << BB->getName(); |
4773 | OS << "\n"; |
4774 | if (hasValidProfile()) { |
4775 | OS << "Total back edge count: "<< L->TotalBackEdgeCount << "\n"; |
4776 | OS << "Loop entry count: "<< L->EntryCount << "\n"; |
4777 | OS << "Loop exit count: "<< L->ExitCount << "\n"; |
4778 | if (L->EntryCount > 0) { |
4779 | OS << "Average iters per entry: " |
4780 | << format(Fmt: "%.4lf", Vals: (double)L->TotalBackEdgeCount / L->EntryCount) |
4781 | << "\n"; |
4782 | } |
4783 | } |
4784 | OS << "----\n"; |
4785 | } |
4786 | |
4787 | OS << "Total number of loops: "<< BLI->TotalLoops << "\n"; |
4788 | OS << "Number of outer loops: "<< BLI->OuterLoops << "\n"; |
4789 | OS << "Maximum nested loop depth: "<< BLI->MaximumDepth << "\n\n"; |
4790 | } |
4791 | |
4792 | bool BinaryFunction::isAArch64Veneer() const { |
4793 | if (empty() || hasIslandsInfo()) |
4794 | return false; |
4795 | |
4796 | BinaryBasicBlock &BB = **BasicBlocks.begin(); |
4797 | for (MCInst &Inst : BB) |
4798 | if (!BC.MIB->hasAnnotation(Inst, Name: "AArch64Veneer")) |
4799 | return false; |
4800 | |
4801 | for (auto I = BasicBlocks.begin() + 1, E = BasicBlocks.end(); I != E; ++I) { |
4802 | for (MCInst &Inst : **I) |
4803 | if (!BC.MIB->isNoop(Inst)) |
4804 | return false; |
4805 | } |
4806 | |
4807 | return true; |
4808 | } |
4809 | |
4810 | void BinaryFunction::addRelocation(uint64_t Address, MCSymbol *Symbol, |
4811 | uint32_t RelType, uint64_t Addend, |
4812 | uint64_t Value) { |
4813 | assert(Address >= getAddress() && Address < getAddress() + getMaxSize() && |
4814 | "address is outside of the function"); |
4815 | uint64_t Offset = Address - getAddress(); |
4816 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addRelocation in " |
4817 | << formatv("{0}@{1:x} against {2}\n", *this, Offset, |
4818 | (Symbol ? Symbol->getName() : "<undef>"))); |
4819 | bool IsCI = BC.isAArch64() && isInConstantIsland(Address); |
4820 | std::map<uint64_t, Relocation> &Rels = |
4821 | IsCI ? Islands->Relocations : Relocations; |
4822 | if (BC.MIB->shouldRecordCodeRelocation(RelType)) |
4823 | Rels[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value}; |
4824 | } |
4825 | |
4826 | } // namespace bolt |
4827 | } // namespace llvm |
4828 |
Definitions
- CheckEncoding
- DotToolTipCode
- JumpTables
- NoScan
- PreserveBlocksAlignment
- PrintOutputAddressRange
- PrintDynoStats
- PrintDynoStatsOnly
- PrintOnly
- TimeBuild
- TrapOnAVX512
- shouldPrint
- emptyRange
- findDebugLineInformationForInstructionAt
- buildSectionName
- operator<<
- buildCodeSectionName
- buildColdCodeSectionName
- Count
- hasNameRegex
- hasRestoredNameRegex
- getDemangledName
- getBasicBlockContainingOffset
- markUnreachableBlocks
- eraseInvalidBBs
- isForwardCall
- dump
- printRelocations
- mutateDWARFExpressionTargetReg
- mutateCFIRegisterFor
- mutateCFIOffsetFor
- processIndirectBranch
- getOrCreateLocalLabel
- getData
- getSizeOfDataInCodeAt
- getIslandInRange
- isZeroPaddingAt
- handlePCRelOperand
- handleExternalReference
- handleIndirectBranch
- handleAArch64IndirectCall
- disassembleInstructionAtOffset
- disassemble
- registerBranch
- analyzeInstructionForFuncReference
- scanExternalRefs
- postProcessEntryPoints
- postProcessJumpTables
- validateExternallyReferencedOffsets
- postProcessIndirectBranches
- recomputeLandingPads
- buildCFG
- postProcessCFG
- removeTagsFromProfile
- removeConditionalTailCalls
- getFunctionScore
- annotateCFIState
- CFISnapshot
- UNKNOWN
- update
- advanceTo
- CFISnapshot
- CFISnapshotDiff
- CFISnapshotDiff
- CFISnapshotDiff
- isRedundant
- replayCFIInstrs
- unwindCFIState
- normalizeCFIState
- finalizeCFIState
- requiresAddressTranslation
- requiresAddressMap
- getInstructionCount
- clearDisasmState
- setTrapOnEntry
- setIgnored
- duplicateConstantIslands
- constructFilename
- formatEscapes
- dumpGraph
- viewGraph
- dumpGraphForPass
- dumpGraphToFile
- validateCFG
- fixBranches
- propagateGnuArgsSizeInfo
- postProcessBranches
- addEntryPointAtOffset
- addEntryPoint
- getSymbolForEntryID
- getEntryIDForSymbol
- forEachEntryPoint
- dfs
- computeHash
- insertBasicBlocks
- insertBasicBlocks
- updateBBIndices
- updateCFIState
- updateLayout
- checkForAmbiguousJumpTables
- disambiguateJumpTables
- replaceJumpTableEntryIn
- splitEdge
- deleteConservativeEdges
- isSymbolValidInScope
- adjustExecutionCount
- ~BinaryFunction
- constructDomTree
- calculateLoopInfo
- updateOutputValues
- getOutputAddressRanges
- translateInputToOutputAddress
- translateInputToOutputRange
- getInstructionAtOffset
- getInstructionContainingOffset
- printLoopInfo
- isAArch64Veneer
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more