1 | //===- bolt/Core/BinaryBasicBlock.h - Low-level basic block -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Sequence of MC/MCPlus instructions. Call/invoke does not terminate the block. |
10 | // CFI instructions are part of the instruction list with the initial CFI state |
11 | // defined at the beginning of the block. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H |
16 | #define BOLT_CORE_BINARY_BASIC_BLOCK_H |
17 | |
18 | #include "bolt/Core/FunctionLayout.h" |
19 | #include "bolt/Core/MCPlus.h" |
20 | #include "llvm/ADT/GraphTraits.h" |
21 | #include "llvm/ADT/StringRef.h" |
22 | #include "llvm/MC/MCInst.h" |
23 | #include "llvm/MC/MCSymbol.h" |
24 | #include "llvm/Support/ErrorOr.h" |
25 | #include "llvm/Support/raw_ostream.h" |
26 | #include <limits> |
27 | #include <utility> |
28 | |
29 | namespace llvm { |
30 | class MCCodeEmitter; |
31 | |
32 | namespace bolt { |
33 | |
34 | class BinaryFunction; |
35 | class JumpTable; |
36 | |
37 | class BinaryBasicBlock { |
38 | public: |
39 | /// Profile execution information for a given edge in CFG. |
40 | /// |
41 | /// If MispredictedCount equals COUNT_INFERRED, then we have a profile |
42 | /// data for a fall-through edge with a Count representing an inferred |
43 | /// execution count, i.e. the count we calculated internally, not the one |
44 | /// coming from profile data. |
45 | /// |
46 | /// For all other values of MispredictedCount, Count represents the number of |
47 | /// branch executions from a profile, and MispredictedCount is the number |
48 | /// of times the branch was mispredicted according to this profile. |
49 | struct BinaryBranchInfo { |
50 | uint64_t Count; |
51 | uint64_t MispredictedCount; /// number of branches mispredicted |
52 | |
53 | bool operator<(const BinaryBranchInfo &Other) const { |
54 | return (Count < Other.Count) || |
55 | (Count == Other.Count && |
56 | MispredictedCount < Other.MispredictedCount); |
57 | } |
58 | }; |
59 | |
60 | static constexpr uint32_t INVALID_OFFSET = |
61 | std::numeric_limits<uint32_t>::max(); |
62 | |
63 | using BranchInfoType = SmallVector<BinaryBranchInfo, 0>; |
64 | |
65 | private: |
66 | /// Vector of all instructions in the block. |
67 | InstructionListType Instructions; |
68 | |
69 | /// CFG information. |
70 | using EdgeListType = SmallVector<BinaryBasicBlock *, 0>; |
71 | EdgeListType Predecessors; |
72 | EdgeListType Successors; |
73 | |
74 | /// Each successor has a corresponding BranchInfo entry in the list. |
75 | BranchInfoType BranchInfo; |
76 | |
77 | using ExceptionListType = SmallVector<BinaryBasicBlock *, 0>; |
78 | |
79 | /// List of blocks that this landing pad is handling. |
80 | ExceptionListType Throwers; |
81 | |
82 | /// List of blocks that can catch exceptions thrown by code in this block. |
83 | ExceptionListType LandingPads; |
84 | |
85 | /// Function that owns this basic block. |
86 | BinaryFunction *Function; |
87 | |
88 | /// Label associated with the block. |
89 | MCSymbol *Label{nullptr}; |
90 | |
91 | /// [Begin, End) address range for this block in the output binary. |
92 | std::pair<uint32_t, uint32_t> OutputAddressRange = {0, 0}; |
93 | |
94 | /// Original offset range of the basic block in the function. |
95 | std::pair<uint32_t, uint32_t> InputRange = {INVALID_OFFSET, INVALID_OFFSET}; |
96 | |
97 | /// Map input offset (from function start) of an instruction to an output |
98 | /// symbol. Enables writing BOLT address translation tables used for mapping |
99 | /// control transfer in the output binary back to the original binary. |
100 | using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>; |
101 | std::unique_ptr<LocSymsTy> LocSyms; |
102 | |
103 | /// Alignment requirements for the block. |
104 | uint32_t Alignment{1}; |
105 | |
106 | /// Maximum number of bytes to use for alignment of the block. |
107 | uint32_t AlignmentMaxBytes{0}; |
108 | |
109 | /// Number of times this basic block was executed. |
110 | uint64_t ExecutionCount{COUNT_NO_PROFILE}; |
111 | |
112 | static constexpr unsigned InvalidIndex = ~0u; |
113 | |
114 | /// Index to BasicBlocks vector in BinaryFunction. |
115 | unsigned Index{InvalidIndex}; |
116 | |
117 | /// Index in the current layout. |
118 | mutable unsigned LayoutIndex{InvalidIndex}; |
119 | |
120 | /// Number of pseudo instructions in this block. |
121 | uint32_t NumPseudos{0}; |
122 | |
123 | /// CFI state at the entry to this basic block. |
124 | int32_t CFIState{-1}; |
125 | |
126 | /// In cases where the parent function has been split, FragmentNum > 0 means |
127 | /// this BB will be allocated in a fragment outside its parent function. |
128 | FragmentNum Fragment; |
129 | |
130 | /// Indicates if the block could be outlined. |
131 | bool CanOutline{true}; |
132 | |
133 | /// Flag to indicate whether this block is valid or not. Invalid |
134 | /// blocks may contain out of date or incorrect information. |
135 | bool IsValid{true}; |
136 | |
137 | /// Last computed hash value. |
138 | mutable uint64_t Hash{0}; |
139 | |
140 | private: |
141 | BinaryBasicBlock() = delete; |
142 | BinaryBasicBlock(const BinaryBasicBlock &) = delete; |
143 | BinaryBasicBlock(const BinaryBasicBlock &&) = delete; |
144 | BinaryBasicBlock &operator=(const BinaryBasicBlock &) = delete; |
145 | BinaryBasicBlock &operator=(const BinaryBasicBlock &&) = delete; |
146 | |
147 | explicit BinaryBasicBlock(BinaryFunction *Function, MCSymbol *Label) |
148 | : Function(Function), Label(Label) { |
149 | assert(Function && "Function must be non-null" ); |
150 | } |
151 | |
152 | // Exclusively managed by BinaryFunction. |
153 | friend class BinaryFunction; |
154 | friend bool operator<(const BinaryBasicBlock &LHS, |
155 | const BinaryBasicBlock &RHS); |
156 | |
157 | /// Assign new label to the basic block. |
158 | void setLabel(MCSymbol *Symbol) { Label = Symbol; } |
159 | |
160 | public: |
161 | static constexpr uint64_t COUNT_INFERRED = |
162 | std::numeric_limits<uint64_t>::max(); |
163 | static constexpr uint64_t COUNT_NO_PROFILE = |
164 | std::numeric_limits<uint64_t>::max(); |
165 | |
166 | // Instructions iterators. |
167 | using iterator = InstructionListType::iterator; |
168 | using const_iterator = InstructionListType::const_iterator; |
169 | using reverse_iterator = std::reverse_iterator<iterator>; |
170 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; |
171 | |
172 | bool empty() const { assert(hasInstructions()); |
173 | return Instructions.empty(); } |
174 | size_t size() const { assert(hasInstructions()); |
175 | return Instructions.size(); } |
176 | MCInst &front() { assert(hasInstructions()); |
177 | return Instructions.front(); } |
178 | MCInst &back() { assert(hasInstructions()); |
179 | return Instructions.back(); } |
180 | const MCInst &front() const { assert(hasInstructions()); |
181 | return Instructions.front(); } |
182 | const MCInst &back() const { assert(hasInstructions()); |
183 | return Instructions.back(); } |
184 | |
185 | iterator begin() { assert(hasInstructions()); |
186 | return Instructions.begin(); } |
187 | const_iterator begin() const { assert(hasInstructions()); |
188 | return Instructions.begin(); } |
189 | iterator end () { assert(hasInstructions()); |
190 | return Instructions.end(); } |
191 | const_iterator end () const { assert(hasInstructions()); |
192 | return Instructions.end(); } |
193 | reverse_iterator rbegin() { assert(hasInstructions()); |
194 | return Instructions.rbegin(); } |
195 | const_reverse_iterator rbegin() const { assert(hasInstructions()); |
196 | return Instructions.rbegin(); } |
197 | reverse_iterator rend () { assert(hasInstructions()); |
198 | return Instructions.rend(); } |
199 | const_reverse_iterator rend () const { assert(hasInstructions()); |
200 | return Instructions.rend(); } |
201 | |
202 | // CFG iterators. |
203 | using pred_iterator = EdgeListType::iterator; |
204 | using const_pred_iterator = EdgeListType::const_iterator; |
205 | using succ_iterator = EdgeListType::iterator; |
206 | using const_succ_iterator = EdgeListType::const_iterator; |
207 | using throw_iterator = decltype(Throwers)::iterator; |
208 | using const_throw_iterator = decltype(Throwers)::const_iterator; |
209 | using lp_iterator = decltype(LandingPads)::iterator; |
210 | using const_lp_iterator = decltype(LandingPads)::const_iterator; |
211 | |
212 | using pred_reverse_iterator = std::reverse_iterator<pred_iterator>; |
213 | using const_pred_reverse_iterator = |
214 | std::reverse_iterator<const_pred_iterator>; |
215 | using succ_reverse_iterator = std::reverse_iterator<succ_iterator>; |
216 | using const_succ_reverse_iterator = |
217 | std::reverse_iterator<const_succ_iterator>; |
218 | |
219 | pred_iterator pred_begin() { return Predecessors.begin(); } |
220 | const_pred_iterator pred_begin() const { return Predecessors.begin(); } |
221 | pred_iterator pred_end() { return Predecessors.end(); } |
222 | const_pred_iterator pred_end() const { return Predecessors.end(); } |
223 | pred_reverse_iterator pred_rbegin() |
224 | { return Predecessors.rbegin();} |
225 | const_pred_reverse_iterator pred_rbegin() const |
226 | { return Predecessors.rbegin();} |
227 | pred_reverse_iterator pred_rend() |
228 | { return Predecessors.rend(); } |
229 | const_pred_reverse_iterator pred_rend() const |
230 | { return Predecessors.rend(); } |
231 | size_t pred_size() const { |
232 | return Predecessors.size(); |
233 | } |
234 | bool pred_empty() const { return Predecessors.empty(); } |
235 | |
236 | succ_iterator succ_begin() { return Successors.begin(); } |
237 | const_succ_iterator succ_begin() const { return Successors.begin(); } |
238 | succ_iterator succ_end() { return Successors.end(); } |
239 | const_succ_iterator succ_end() const { return Successors.end(); } |
240 | succ_reverse_iterator succ_rbegin() |
241 | { return Successors.rbegin(); } |
242 | const_succ_reverse_iterator succ_rbegin() const |
243 | { return Successors.rbegin(); } |
244 | succ_reverse_iterator succ_rend() |
245 | { return Successors.rend(); } |
246 | const_succ_reverse_iterator succ_rend() const |
247 | { return Successors.rend(); } |
248 | size_t succ_size() const { |
249 | return Successors.size(); |
250 | } |
251 | bool succ_empty() const { return Successors.empty(); } |
252 | |
253 | throw_iterator throw_begin() { return Throwers.begin(); } |
254 | const_throw_iterator throw_begin() const { return Throwers.begin(); } |
255 | throw_iterator throw_end() { return Throwers.end(); } |
256 | const_throw_iterator throw_end() const { return Throwers.end(); } |
257 | size_t throw_size() const { |
258 | return Throwers.size(); |
259 | } |
260 | bool throw_empty() const { return Throwers.empty(); } |
261 | bool isLandingPad() const { return !Throwers.empty(); } |
262 | |
263 | lp_iterator lp_begin() { return LandingPads.begin(); } |
264 | const_lp_iterator lp_begin() const { return LandingPads.begin(); } |
265 | lp_iterator lp_end() { return LandingPads.end(); } |
266 | const_lp_iterator lp_end() const { return LandingPads.end(); } |
267 | size_t lp_size() const { |
268 | return LandingPads.size(); |
269 | } |
270 | bool lp_empty() const { return LandingPads.empty(); } |
271 | |
272 | inline iterator_range<iterator> instructions() { |
273 | assert(hasInstructions()); |
274 | return iterator_range<iterator>(begin(), end()); |
275 | } |
276 | inline iterator_range<const_iterator> instructions() const { |
277 | assert(hasInstructions()); |
278 | return iterator_range<const_iterator>(begin(), end()); |
279 | } |
280 | inline iterator_range<pred_iterator> predecessors() { |
281 | assert(hasCFG()); |
282 | return iterator_range<pred_iterator>(pred_begin(), pred_end()); |
283 | } |
284 | inline iterator_range<const_pred_iterator> predecessors() const { |
285 | assert(hasCFG()); |
286 | return iterator_range<const_pred_iterator>(pred_begin(), pred_end()); |
287 | } |
288 | inline iterator_range<succ_iterator> successors() { |
289 | assert(hasCFG()); |
290 | return iterator_range<succ_iterator>(succ_begin(), succ_end()); |
291 | } |
292 | inline iterator_range<const_succ_iterator> successors() const { |
293 | assert(hasCFG()); |
294 | return iterator_range<const_succ_iterator>(succ_begin(), succ_end()); |
295 | } |
296 | inline iterator_range<throw_iterator> throwers() { |
297 | assert(hasCFG()); |
298 | return iterator_range<throw_iterator>(throw_begin(), throw_end()); |
299 | } |
300 | inline iterator_range<const_throw_iterator> throwers() const { |
301 | assert(hasCFG()); |
302 | return iterator_range<const_throw_iterator>(throw_begin(), throw_end()); |
303 | } |
304 | inline iterator_range<lp_iterator> landing_pads() { |
305 | assert(hasCFG()); |
306 | return iterator_range<lp_iterator>(lp_begin(), lp_end()); |
307 | } |
308 | inline iterator_range<const_lp_iterator> landing_pads() const { |
309 | assert(hasCFG()); |
310 | return iterator_range<const_lp_iterator>(lp_begin(), lp_end()); |
311 | } |
312 | |
313 | // BranchInfo iterators. |
314 | using branch_info_iterator = BranchInfoType::iterator; |
315 | using const_branch_info_iterator = BranchInfoType::const_iterator; |
316 | using branch_info_reverse_iterator = |
317 | std::reverse_iterator<branch_info_iterator>; |
318 | using const_branch_info_reverse_iterator = |
319 | std::reverse_iterator<const_branch_info_iterator>; |
320 | |
321 | branch_info_iterator branch_info_begin() { return BranchInfo.begin(); } |
322 | branch_info_iterator branch_info_end() { return BranchInfo.end(); } |
323 | const_branch_info_iterator branch_info_begin() const { |
324 | return BranchInfo.begin(); |
325 | } |
326 | const_branch_info_iterator branch_info_end() const { |
327 | return BranchInfo.end(); |
328 | } |
329 | branch_info_reverse_iterator branch_info_rbegin() { |
330 | return BranchInfo.rbegin(); |
331 | } |
332 | branch_info_reverse_iterator branch_info_rend() { return BranchInfo.rend(); } |
333 | const_branch_info_reverse_iterator branch_info_rbegin() const { |
334 | return BranchInfo.rbegin(); |
335 | } |
336 | const_branch_info_reverse_iterator branch_info_rend() const { |
337 | return BranchInfo.rend(); |
338 | } |
339 | |
340 | size_t branch_info_size() const { return BranchInfo.size(); } |
341 | bool branch_info_empty() const { return BranchInfo.empty(); } |
342 | |
343 | inline iterator_range<branch_info_iterator> branch_info() { |
344 | return iterator_range<branch_info_iterator>(BranchInfo.begin(), |
345 | BranchInfo.end()); |
346 | } |
347 | inline iterator_range<const_branch_info_iterator> branch_info() const { |
348 | return iterator_range<const_branch_info_iterator>(BranchInfo.begin(), |
349 | BranchInfo.end()); |
350 | } |
351 | |
352 | /// Get instruction at given index. |
353 | MCInst &getInstructionAtIndex(unsigned Index) { return Instructions[Index]; } |
354 | |
355 | const MCInst &getInstructionAtIndex(unsigned Index) const { |
356 | return Instructions[Index]; |
357 | } |
358 | |
359 | /// Return symbol marking the start of this basic block. |
360 | MCSymbol *getLabel() { return Label; } |
361 | |
362 | /// Return symbol marking the start of this basic block (const version). |
363 | const MCSymbol *getLabel() const { return Label; } |
364 | |
365 | /// Get successor with given \p Label if \p Label != nullptr. |
366 | /// Returns nullptr if no such successor is found. |
367 | /// If the \p Label == nullptr and the block has only one successor then |
368 | /// return the successor. |
369 | BinaryBasicBlock *getSuccessor(const MCSymbol *Label = nullptr) const; |
370 | |
371 | /// Return the related branch info as well as the successor. |
372 | BinaryBasicBlock *getSuccessor(const MCSymbol *Label, |
373 | BinaryBranchInfo &BI) const; |
374 | |
375 | /// If the basic block ends with a conditional branch (possibly followed by |
376 | /// an unconditional branch) and thus has 2 successors, return a successor |
377 | /// corresponding to a jump condition which could be true or false. |
378 | /// Return nullptr if the basic block does not have a conditional jump. |
379 | BinaryBasicBlock *getConditionalSuccessor(bool Condition) { |
380 | if (succ_size() != 2) |
381 | return nullptr; |
382 | return Successors[Condition == true ? 0 : 1]; |
383 | } |
384 | |
385 | const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { |
386 | return const_cast<BinaryBasicBlock *>(this)->getConditionalSuccessor( |
387 | Condition); |
388 | } |
389 | |
390 | /// Find the fallthrough successor for a block, or nullptr if there is |
391 | /// none. |
392 | BinaryBasicBlock *getFallthrough() { |
393 | if (succ_size() == 2) |
394 | return getConditionalSuccessor(Condition: false); |
395 | else |
396 | return getSuccessor(); |
397 | } |
398 | |
399 | const BinaryBasicBlock *getFallthrough() const { |
400 | return const_cast<BinaryBasicBlock *>(this)->getFallthrough(); |
401 | } |
402 | |
403 | /// Return branch info corresponding to a taken branch. |
404 | const BinaryBranchInfo &getTakenBranchInfo() const { |
405 | assert(BranchInfo.size() == 2 && |
406 | "could only be called for blocks with 2 successors" ); |
407 | return BranchInfo[0]; |
408 | }; |
409 | |
410 | /// Return branch info corresponding to a fall-through branch. |
411 | const BinaryBranchInfo &getFallthroughBranchInfo() const { |
412 | assert(BranchInfo.size() == 2 && |
413 | "could only be called for blocks with 2 successors" ); |
414 | return BranchInfo[1]; |
415 | }; |
416 | |
417 | /// Return branch info corresponding to an edge going to \p Succ basic block. |
418 | BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ); |
419 | |
420 | /// Return branch info corresponding to an edge going to \p Succ basic block. |
421 | const BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ) const; |
422 | |
423 | /// Set branch information for the outgoing edge to block \p Succ. |
424 | void setSuccessorBranchInfo(const BinaryBasicBlock &Succ, uint64_t Count, |
425 | uint64_t MispredictedCount) { |
426 | BinaryBranchInfo &BI = getBranchInfo(Succ); |
427 | BI.Count = Count; |
428 | BI.MispredictedCount = MispredictedCount; |
429 | } |
430 | |
431 | /// Try to compute the taken and misprediction frequencies for the given |
432 | /// successor. The result is an error if no information can be found. |
433 | ErrorOr<std::pair<double, double>> |
434 | getBranchStats(const BinaryBasicBlock *Succ) const; |
435 | |
436 | /// If the basic block ends with a conditional branch (possibly followed by |
437 | /// an unconditional branch) and thus has 2 successor, reverse the order of |
438 | /// its successors in CFG, update branch info, and return true. If the basic |
439 | /// block does not have 2 successors return false. |
440 | bool swapConditionalSuccessors(); |
441 | |
442 | /// Add an instruction with unconditional control transfer to \p Successor |
443 | /// basic block to the end of this basic block. |
444 | void addBranchInstruction(const BinaryBasicBlock *Successor); |
445 | |
446 | /// Add an instruction with tail call control transfer to \p Target |
447 | /// to the end of this basic block. |
448 | void addTailCallInstruction(const MCSymbol *Target); |
449 | |
450 | /// Return the number of call instructions in this basic block. |
451 | uint32_t getNumCalls() const; |
452 | |
453 | /// Get landing pad with given label. Returns nullptr if no such |
454 | /// landing pad is found. |
455 | BinaryBasicBlock *getLandingPad(const MCSymbol *Label) const; |
456 | |
457 | /// Return local name for the block. |
458 | StringRef getName() const { return Label->getName(); } |
459 | |
460 | /// Add instruction at the end of this basic block. |
461 | /// Returns iterator pointing to the inserted instruction. |
462 | iterator addInstruction(MCInst &&Inst) { |
463 | adjustNumPseudos(Inst, Sign: 1); |
464 | Instructions.emplace_back(args&: Inst); |
465 | return std::prev(x: Instructions.end()); |
466 | } |
467 | |
468 | /// Add instruction at the end of this basic block. |
469 | /// Returns iterator pointing to the inserted instruction. |
470 | iterator addInstruction(const MCInst &Inst) { |
471 | adjustNumPseudos(Inst, Sign: 1); |
472 | Instructions.push_back(x: Inst); |
473 | return std::prev(x: Instructions.end()); |
474 | } |
475 | |
476 | /// Add a range of instructions to the end of this basic block. |
477 | template <typename Itr> void addInstructions(Itr Begin, Itr End) { |
478 | while (Begin != End) |
479 | addInstruction(*Begin++); |
480 | } |
481 | |
482 | /// Add a range of instructions to the end of this basic block. |
483 | template <typename RangeTy> void addInstructions(RangeTy R) { |
484 | for (auto &I : R) |
485 | addInstruction(I); |
486 | } |
487 | |
488 | /// Add instruction before Pos in this basic block. |
489 | template <typename Itr> Itr insertPseudoInstr(Itr Pos, MCInst &Instr) { |
490 | ++NumPseudos; |
491 | return Instructions.insert(Pos, Instr); |
492 | } |
493 | |
494 | /// Return the number of pseudo instructions in the basic block. |
495 | uint32_t getNumPseudos() const; |
496 | |
497 | /// Return the number of emitted instructions for this basic block. |
498 | uint32_t getNumNonPseudos() const { return size() - getNumPseudos(); } |
499 | |
500 | /// Return iterator to the first non-pseudo instruction or end() |
501 | /// if no such instruction was found. |
502 | iterator getFirstNonPseudo(); |
503 | |
504 | /// Return a pointer to the first non-pseudo instruction in this basic |
505 | /// block. Returns nullptr if none exists. |
506 | MCInst *getFirstNonPseudoInstr() { |
507 | auto II = getFirstNonPseudo(); |
508 | return II == Instructions.end() ? nullptr : &*II; |
509 | } |
510 | |
511 | /// Return reverse iterator to the last non-pseudo instruction or rend() |
512 | /// if no such instruction was found. |
513 | reverse_iterator getLastNonPseudo(); |
514 | const_reverse_iterator getLastNonPseudo() const { |
515 | return const_cast<BinaryBasicBlock *>(this)->getLastNonPseudo(); |
516 | } |
517 | |
518 | /// Return a pointer to the last non-pseudo instruction in this basic |
519 | /// block. Returns nullptr if none exists. |
520 | MCInst *getLastNonPseudoInstr() { |
521 | auto RII = getLastNonPseudo(); |
522 | return RII == Instructions.rend() ? nullptr : &*RII; |
523 | } |
524 | const MCInst *getLastNonPseudoInstr() const { |
525 | auto RII = getLastNonPseudo(); |
526 | return RII == Instructions.rend() ? nullptr : &*RII; |
527 | } |
528 | |
529 | /// Set CFI state at entry to this basic block. |
530 | void setCFIState(int32_t NewCFIState) { |
531 | assert((CFIState == -1 || NewCFIState == CFIState) && |
532 | "unexpected change of CFI state for basic block" ); |
533 | CFIState = NewCFIState; |
534 | } |
535 | |
536 | /// Return CFI state (expected) at entry of this basic block. |
537 | int32_t getCFIState() const { |
538 | assert(CFIState >= 0 && "unknown CFI state" ); |
539 | return CFIState; |
540 | } |
541 | |
542 | /// Calculate and return CFI state right before instruction \p Instr in |
543 | /// this basic block. If \p Instr is nullptr then return the state at |
544 | /// the end of the basic block. |
545 | int32_t getCFIStateAtInstr(const MCInst *Instr) const; |
546 | |
547 | /// Calculate and return CFI state after execution of this basic block. |
548 | /// The state depends on CFI state at entry and CFI instructions inside the |
549 | /// basic block. |
550 | int32_t getCFIStateAtExit() const { return getCFIStateAtInstr(Instr: nullptr); } |
551 | |
552 | /// Set minimum alignment for the basic block. |
553 | void setAlignment(uint32_t Align) { Alignment = Align; } |
554 | |
555 | /// Set alignment of the block based on the alignment of its offset. |
556 | void setDerivedAlignment() { |
557 | const uint64_t DerivedAlignment = getOffset() & (1 + ~getOffset()); |
558 | Alignment = std::min(a: DerivedAlignment, b: uint64_t(32)); |
559 | } |
560 | |
561 | /// Return required alignment for the block. |
562 | Align getAlign() const { return Align(Alignment); } |
563 | uint32_t getAlignment() const { return Alignment; } |
564 | |
565 | /// Set the maximum number of bytes to use for the block alignment. |
566 | void setAlignmentMaxBytes(uint32_t Value) { AlignmentMaxBytes = Value; } |
567 | |
568 | /// Return the maximum number of bytes to use for the block alignment. |
569 | uint32_t getAlignmentMaxBytes() const { return AlignmentMaxBytes; } |
570 | |
571 | /// Adds block to successor list, and also updates predecessor list for |
572 | /// successor block. |
573 | /// Set branch info for this path. |
574 | void addSuccessor(BinaryBasicBlock *Succ, uint64_t Count = 0, |
575 | uint64_t MispredictedCount = 0); |
576 | |
577 | void addSuccessor(BinaryBasicBlock *Succ, const BinaryBranchInfo &BI) { |
578 | addSuccessor(Succ, Count: BI.Count, MispredictedCount: BI.MispredictedCount); |
579 | } |
580 | |
581 | /// Add a range of successors. |
582 | template <typename Itr> void addSuccessors(Itr Begin, Itr End) { |
583 | while (Begin != End) |
584 | addSuccessor(*Begin++); |
585 | } |
586 | |
587 | /// Add a range of successors with branch info. |
588 | template <typename Itr, typename BrItr> |
589 | void addSuccessors(Itr Begin, Itr End, BrItr BrBegin, BrItr BrEnd) { |
590 | assert(std::distance(Begin, End) == std::distance(BrBegin, BrEnd)); |
591 | while (Begin != End) |
592 | addSuccessor(*Begin++, *BrBegin++); |
593 | } |
594 | |
595 | /// Replace Succ with NewSucc. This routine is helpful for preserving |
596 | /// the order of conditional successors when editing the CFG. |
597 | void replaceSuccessor(BinaryBasicBlock *Succ, BinaryBasicBlock *NewSucc, |
598 | uint64_t Count = 0, uint64_t MispredictedCount = 0); |
599 | |
600 | /// Move all of this block's successors to a new block, and set the |
601 | /// execution count of this new block with our execution count. This is |
602 | /// useful when splitting a block in two. |
603 | void moveAllSuccessorsTo(BinaryBasicBlock *New) { |
604 | New->addSuccessors(Begin: successors().begin(), End: successors().end(), |
605 | BrBegin: branch_info_begin(), BrEnd: branch_info_end()); |
606 | removeAllSuccessors(); |
607 | |
608 | // Update the execution count on the new block. |
609 | New->setExecutionCount(getExecutionCount()); |
610 | } |
611 | |
612 | /// Remove /p Succ basic block from the list of successors. Update the |
613 | /// list of predecessors of /p Succ and update branch info. |
614 | void removeSuccessor(BinaryBasicBlock *Succ); |
615 | |
616 | /// Remove all successors of the basic block, and remove the block |
617 | /// from respective lists of predecessors. |
618 | void removeAllSuccessors(); |
619 | |
620 | /// Remove useless duplicate successors. When the conditional |
621 | /// successor is the same as the unconditional successor, we can |
622 | /// remove the conditional successor and branch instruction. |
623 | void removeDuplicateConditionalSuccessor(MCInst *CondBranch); |
624 | |
625 | /// Update successors of the basic block based on the jump table instruction. |
626 | /// The block must end with a jump table instruction. |
627 | void updateJumpTableSuccessors(); |
628 | |
629 | /// Test if BB is a predecessor of this block. |
630 | bool isPredecessor(const BinaryBasicBlock *BB) const { |
631 | return llvm::is_contained(Range: Predecessors, Element: BB); |
632 | } |
633 | |
634 | /// Test if BB is a successor of this block. |
635 | bool isSuccessor(const BinaryBasicBlock *BB) const { |
636 | return llvm::is_contained(Range: Successors, Element: BB); |
637 | } |
638 | |
639 | /// Test if this BB has a valid execution count. |
640 | bool hasProfile() const { return ExecutionCount != COUNT_NO_PROFILE; } |
641 | |
642 | /// Return the information about the number of times this basic block was |
643 | /// executed. |
644 | /// |
645 | /// Return COUNT_NO_PROFILE if there's no profile info. |
646 | uint64_t getExecutionCount() const { return ExecutionCount; } |
647 | |
648 | /// Return the execution count for blocks with known profile. |
649 | /// Return 0 if the block has no profile. |
650 | uint64_t getKnownExecutionCount() const { |
651 | return !hasProfile() ? 0 : ExecutionCount; |
652 | } |
653 | |
654 | /// Set the execution count for this block. |
655 | void setExecutionCount(uint64_t Count) { ExecutionCount = Count; } |
656 | |
657 | /// Apply a given \p Ratio to the profile information of this basic block. |
658 | void adjustExecutionCount(double Ratio); |
659 | |
660 | /// Return true if the basic block is an entry point into the function |
661 | /// (either primary or secondary). |
662 | bool isEntryPoint() const; |
663 | |
664 | bool isValid() const { return IsValid; } |
665 | |
666 | void markValid(const bool Valid) { IsValid = Valid; } |
667 | |
668 | FragmentNum getFragmentNum() const { return Fragment; } |
669 | |
670 | void setFragmentNum(const FragmentNum Value) { Fragment = Value; } |
671 | |
672 | bool isSplit() const { return Fragment != FragmentNum::main(); } |
673 | |
674 | bool isCold() const { |
675 | assert(Fragment.get() < 2 && |
676 | "Function is split into more than two (hot/cold)-fragments" ); |
677 | return isSplit(); |
678 | } |
679 | |
680 | void setIsCold(const bool Flag) { |
681 | Fragment = Flag ? FragmentNum::cold() : FragmentNum::main(); |
682 | } |
683 | |
684 | /// Return true if the block can be outlined. At the moment we disallow |
685 | /// outlining of blocks that can potentially throw exceptions or are |
686 | /// the beginning of a landing pad. The entry basic block also can |
687 | /// never be outlined. |
688 | bool canOutline() const { return CanOutline; } |
689 | |
690 | void setCanOutline(const bool Flag) { CanOutline = Flag; } |
691 | |
692 | /// Erase pseudo instruction at a given iterator. |
693 | /// Return iterator following the removed instruction. |
694 | iterator erasePseudoInstruction(iterator II) { |
695 | --NumPseudos; |
696 | return Instructions.erase(position: II); |
697 | } |
698 | |
699 | /// Erase non-pseudo instruction at a given iterator \p II. |
700 | /// Return iterator following the removed instruction. |
701 | iterator eraseInstruction(iterator II) { |
702 | adjustNumPseudos(Inst: *II, Sign: -1); |
703 | return Instructions.erase(position: II); |
704 | } |
705 | |
706 | /// Erase non-pseudo instruction at a given \p Index |
707 | void eraseInstructionAtIndex(unsigned Index) { |
708 | eraseInstruction(II: Instructions.begin() + Index); |
709 | } |
710 | |
711 | /// Erase instructions in the specified range. |
712 | template <typename ItrType> |
713 | void eraseInstructions(ItrType Begin, ItrType End) { |
714 | while (End > Begin) |
715 | eraseInstruction(II: findInstruction(Inst: *--End)); |
716 | } |
717 | |
718 | /// Erase all instructions. |
719 | void clear() { |
720 | Instructions.clear(); |
721 | NumPseudos = 0; |
722 | } |
723 | |
724 | /// Retrieve iterator for \p Inst or return end iterator if instruction is not |
725 | /// from this basic block. |
726 | decltype(Instructions)::iterator findInstruction(const MCInst *Inst) { |
727 | if (Instructions.empty()) |
728 | return Instructions.end(); |
729 | size_t Index = Inst - &Instructions[0]; |
730 | return Index >= Instructions.size() ? Instructions.end() |
731 | : Instructions.begin() + Index; |
732 | } |
733 | |
734 | /// Replace instruction referenced by iterator \II with a sequence of |
735 | /// instructions defined by [\p Begin, \p End] range. |
736 | /// |
737 | /// Return iterator pointing to the first inserted instruction. |
738 | template <typename Itr> |
739 | iterator replaceInstruction(iterator II, Itr Begin, Itr End) { |
740 | adjustNumPseudos(Inst: *II, Sign: -1); |
741 | adjustNumPseudos(Begin, End, 1); |
742 | |
743 | auto I = II - Instructions.begin(); |
744 | Instructions.insert(Instructions.erase(position: II), Begin, End); |
745 | return I + Instructions.begin(); |
746 | } |
747 | |
748 | iterator replaceInstruction(iterator II, |
749 | const InstructionListType &Replacement) { |
750 | return replaceInstruction(II, Begin: Replacement.begin(), End: Replacement.end()); |
751 | } |
752 | |
753 | /// Insert \p NewInst before \p At, which must be an existing instruction in |
754 | /// this BB. Return iterator pointing to the newly inserted instruction. |
755 | iterator insertInstruction(iterator At, MCInst &&NewInst) { |
756 | adjustNumPseudos(Inst: NewInst, Sign: 1); |
757 | return Instructions.emplace(position: At, args: std::move(NewInst)); |
758 | } |
759 | |
760 | iterator insertInstruction(iterator At, MCInst &NewInst) { |
761 | adjustNumPseudos(Inst: NewInst, Sign: 1); |
762 | return Instructions.insert(position: At, x: NewInst); |
763 | } |
764 | |
765 | /// Helper to retrieve any terminators in \p BB before \p Pos. This is used |
766 | /// to skip CFI instructions and to retrieve the first terminator instruction |
767 | /// in basic blocks with two terminators (conditional jump and unconditional |
768 | /// jump). |
769 | MCInst *getTerminatorBefore(MCInst *Pos); |
770 | |
771 | /// Used to identify whether an instruction is before a terminator and whether |
772 | /// moving it to the end of the BB would render it dead code. |
773 | bool hasTerminatorAfter(MCInst *Pos); |
774 | |
775 | /// Split apart the instructions in this basic block starting at Inst. |
776 | /// The instructions following Inst are removed and returned in a vector. |
777 | InstructionListType splitInstructions(const MCInst *Inst) { |
778 | InstructionListType SplitInst; |
779 | |
780 | assert(!Instructions.empty()); |
781 | while (&Instructions.back() != Inst) { |
782 | SplitInst.push_back(x: Instructions.back()); |
783 | Instructions.pop_back(); |
784 | } |
785 | std::reverse(first: SplitInst.begin(), last: SplitInst.end()); |
786 | NumPseudos = 0; |
787 | adjustNumPseudos(Begin: Instructions.begin(), End: Instructions.end(), Sign: 1); |
788 | return SplitInst; |
789 | } |
790 | |
791 | /// Split basic block at the instruction pointed to by II. |
792 | /// All iterators pointing after II get invalidated. |
793 | /// |
794 | /// Return the new basic block that starts with the instruction |
795 | /// at the split point. |
796 | BinaryBasicBlock *splitAt(iterator II); |
797 | |
798 | /// Set start offset of this basic block in the input binary. |
799 | void setOffset(uint32_t Offset) { InputRange.first = Offset; }; |
800 | |
801 | /// Sets address of the basic block in the output. |
802 | void setOutputStartAddress(uint64_t Address) { |
803 | OutputAddressRange.first = Address; |
804 | } |
805 | |
806 | /// Sets address past the end of the basic block in the output. |
807 | void setOutputEndAddress(uint64_t Address) { |
808 | OutputAddressRange.second = Address; |
809 | } |
810 | |
811 | /// Gets the memory address range of this BB in the input binary. |
812 | std::pair<uint64_t, uint64_t> getInputAddressRange() const { |
813 | return InputRange; |
814 | } |
815 | |
816 | /// Gets the memory address range of this BB in the output binary. |
817 | std::pair<uint64_t, uint64_t> getOutputAddressRange() const { |
818 | return OutputAddressRange; |
819 | } |
820 | |
821 | bool hasLocSyms() const { return LocSyms != nullptr; } |
822 | |
823 | /// Return mapping of input offsets to symbols in the output. |
824 | LocSymsTy &getLocSyms() { |
825 | return LocSyms ? *LocSyms : *(LocSyms = std::make_unique<LocSymsTy>()); |
826 | } |
827 | |
828 | /// Return mapping of input offsets to symbols in the output. |
829 | const LocSymsTy &getLocSyms() const { |
830 | return const_cast<BinaryBasicBlock *>(this)->getLocSyms(); |
831 | } |
832 | |
833 | /// Return size of the basic block in the output binary. |
834 | uint64_t getOutputSize() const { |
835 | return OutputAddressRange.second - OutputAddressRange.first; |
836 | } |
837 | |
838 | BinaryFunction *getFunction() const { return Function; } |
839 | |
840 | /// Analyze and interpret the terminators of this basic block. TBB must be |
841 | /// initialized with the original fall-through for this BB. |
842 | bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB, |
843 | MCInst *&CondBranch, MCInst *&UncondBranch); |
844 | |
845 | /// Return true if iterator \p I is pointing to the first instruction in |
846 | /// a pair that could be macro-fused. |
847 | bool isMacroOpFusionPair(const_iterator I) const; |
848 | |
849 | /// If the basic block has a pair of instructions suitable for macro-fusion, |
850 | /// return iterator to the first instruction of the pair. |
851 | /// Otherwise return end(). |
852 | const_iterator getMacroOpFusionPair() const; |
853 | |
854 | /// Printer required for printing dominator trees. |
855 | void printAsOperand(raw_ostream &OS, bool PrintType = true) { |
856 | if (PrintType) |
857 | OS << "basic block " ; |
858 | OS << getName(); |
859 | } |
860 | |
861 | /// A simple dump function for debugging. |
862 | void dump() const; |
863 | |
864 | /// Validate successor invariants for this BB. |
865 | bool validateSuccessorInvariants(); |
866 | |
867 | /// Return offset of the basic block from the function start on input. |
868 | uint32_t getInputOffset() const { return InputRange.first; } |
869 | |
870 | /// Return offset from the function start to location immediately past |
871 | /// the end of the basic block. |
872 | uint32_t getEndOffset() const { return InputRange.second; } |
873 | |
874 | /// Return size of the basic block on input. |
875 | uint32_t getOriginalSize() const { |
876 | return InputRange.second - InputRange.first; |
877 | } |
878 | |
879 | /// Returns an estimate of size of basic block during run time optionally |
880 | /// using a user-supplied emitter for lock-free multi-thread work. |
881 | /// MCCodeEmitter is not thread safe and each thread should operate with its |
882 | /// own copy of it. |
883 | uint64_t estimateSize(const MCCodeEmitter *Emitter = nullptr) const; |
884 | |
885 | /// Return index in the current layout. The user is responsible for |
886 | /// making sure the indices are up to date, |
887 | /// e.g. by calling BinaryFunction::updateLayoutIndices(); |
888 | unsigned getLayoutIndex() const { |
889 | assert(isValid()); |
890 | return LayoutIndex; |
891 | } |
892 | |
893 | /// Set layout index. To be used by BinaryFunction. |
894 | void setLayoutIndex(unsigned Index) const { LayoutIndex = Index; } |
895 | |
896 | /// Needed by graph traits. |
897 | BinaryFunction *getParent() const { return getFunction(); } |
898 | |
899 | /// Return true if the containing function is in CFG state. |
900 | bool hasCFG() const; |
901 | |
902 | /// Return true if the containing function is in a state with instructions. |
903 | bool hasInstructions() const; |
904 | |
905 | /// Return offset of the basic block from the function start. |
906 | uint32_t getOffset() const { return InputRange.first; } |
907 | |
908 | /// Get the index of this basic block. |
909 | unsigned getIndex() const { |
910 | assert(isValid()); |
911 | return Index; |
912 | } |
913 | |
914 | /// Return jump table if the block contains a jump table instruction or |
915 | /// nullptr otherwise. |
916 | const JumpTable *getJumpTable() const; |
917 | |
918 | /// Check if the block has a jump table instruction. |
919 | bool hasJumpTable() const { return getJumpTable() != nullptr; } |
920 | |
921 | /// Returns the last computed hash value of the block. |
922 | uint64_t getHash() const { return Hash; } |
923 | |
924 | private: |
925 | void adjustNumPseudos(const MCInst &Inst, int Sign); |
926 | |
927 | template <typename Itr> void adjustNumPseudos(Itr Begin, Itr End, int Sign) { |
928 | while (Begin != End) |
929 | adjustNumPseudos(*Begin++, Sign); |
930 | } |
931 | |
932 | /// Adds predecessor to the BB. Most likely you don't need to call this. |
933 | void addPredecessor(BinaryBasicBlock *Pred); |
934 | |
935 | /// Remove predecessor of the basic block. Don't use directly, instead |
936 | /// use removeSuccessor() function. |
937 | /// If \p Multiple is set to true, it will remove all predecessors that |
938 | /// are equal to \p Pred. Otherwise, the first instance of \p Pred found |
939 | /// will be removed. This only matters in awkward, redundant CFGs. |
940 | void removePredecessor(BinaryBasicBlock *Pred, bool Multiple = true); |
941 | |
942 | /// Set end offset of this basic block. |
943 | void setEndOffset(uint32_t Offset) { InputRange.second = Offset; } |
944 | |
945 | /// Set the index of this basic block. |
946 | void setIndex(unsigned I) { Index = I; } |
947 | |
948 | /// Sets the hash value of the basic block. |
949 | void setHash(uint64_t Value) const { Hash = Value; } |
950 | |
951 | template <typename T> void clearList(T &List) { |
952 | T TempList; |
953 | TempList.swap(List); |
954 | } |
955 | |
956 | /// Release memory taken by CFG edges and instructions. |
957 | void releaseCFG() { |
958 | clearList(List&: Predecessors); |
959 | clearList(List&: Successors); |
960 | clearList(List&: Throwers); |
961 | clearList(List&: LandingPads); |
962 | clearList(List&: BranchInfo); |
963 | clearList(List&: Instructions); |
964 | } |
965 | }; |
966 | |
967 | #if defined(LLVM_ON_UNIX) |
968 | /// Keep the size of the BinaryBasicBlock within a reasonable size class |
969 | /// (jemalloc bucket) on Linux |
970 | static_assert(sizeof(BinaryBasicBlock) <= 256); |
971 | #endif |
972 | |
973 | bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); |
974 | |
975 | } // namespace bolt |
976 | |
977 | // GraphTraits specializations for basic block graphs (CFGs) |
978 | template <> struct GraphTraits<bolt::BinaryBasicBlock *> { |
979 | using NodeRef = bolt::BinaryBasicBlock *; |
980 | using ChildIteratorType = bolt::BinaryBasicBlock::succ_iterator; |
981 | |
982 | static NodeRef getEntryNode(bolt::BinaryBasicBlock *BB) { return BB; } |
983 | static inline ChildIteratorType child_begin(NodeRef N) { |
984 | return N->succ_begin(); |
985 | } |
986 | static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } |
987 | }; |
988 | |
989 | template <> struct GraphTraits<const bolt::BinaryBasicBlock *> { |
990 | using NodeRef = const bolt::BinaryBasicBlock *; |
991 | using ChildIteratorType = bolt::BinaryBasicBlock::const_succ_iterator; |
992 | |
993 | static NodeRef getEntryNode(const bolt::BinaryBasicBlock *BB) { return BB; } |
994 | static inline ChildIteratorType child_begin(NodeRef N) { |
995 | return N->succ_begin(); |
996 | } |
997 | static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } |
998 | }; |
999 | |
1000 | template <> struct GraphTraits<Inverse<bolt::BinaryBasicBlock *>> { |
1001 | using NodeRef = bolt::BinaryBasicBlock *; |
1002 | using ChildIteratorType = bolt::BinaryBasicBlock::pred_iterator; |
1003 | static NodeRef getEntryNode(Inverse<bolt::BinaryBasicBlock *> G) { |
1004 | return G.Graph; |
1005 | } |
1006 | static inline ChildIteratorType child_begin(NodeRef N) { |
1007 | return N->pred_begin(); |
1008 | } |
1009 | static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } |
1010 | }; |
1011 | |
1012 | template <> struct GraphTraits<Inverse<const bolt::BinaryBasicBlock *>> { |
1013 | using NodeRef = const bolt::BinaryBasicBlock *; |
1014 | using ChildIteratorType = bolt::BinaryBasicBlock::const_pred_iterator; |
1015 | static NodeRef getEntryNode(Inverse<const bolt::BinaryBasicBlock *> G) { |
1016 | return G.Graph; |
1017 | } |
1018 | static inline ChildIteratorType child_begin(NodeRef N) { |
1019 | return N->pred_begin(); |
1020 | } |
1021 | static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } |
1022 | }; |
1023 | |
1024 | } // namespace llvm |
1025 | |
1026 | #endif |
1027 | |