1//===- bolt/Passes/DataflowAnalysis.h ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef BOLT_PASSES_DATAFLOWANALYSIS_H
10#define BOLT_PASSES_DATAFLOWANALYSIS_H
11
12#include "bolt/Core/BinaryContext.h"
13#include "bolt/Core/BinaryFunction.h"
14#include "llvm/Support/Errc.h"
15#include <optional>
16#include <queue>
17
18namespace llvm {
19namespace bolt {
20
21/// Represents a given program point as viewed by a dataflow analysis. This
22/// point is a location that may be either an instruction or a basic block.
23/// Example:
24///
25/// BB1: --> ProgramPoint 1 (stored as bb *)
26/// add --> ProgramPoint 2 (stored as inst *)
27/// sub --> ProgramPoint 3 (stored as inst *)
28/// jmp --> ProgramPoint 4 (stored as inst *)
29///
30/// ProgramPoints allow us to attach a state to any location in the program
31/// and is a core concept used in the dataflow analysis engine.
32///
33/// A dataflow analysis will associate a state with a program point. In
34/// analyses whose direction is forward, this state tracks what happened after
35/// the execution of an instruction, and the BB tracks the state of what
36/// happened before the execution of the first instruction in this BB. For
37/// backwards dataflow analyses, state tracks what happened before the
38/// execution of a given instruction, while the state associated with a BB
39/// tracks what happened after the execution of the last instruction of a BB.
40class ProgramPoint {
41 enum IDTy : bool { BB = 0, Inst } ID;
42
43 union DataU {
44 BinaryBasicBlock *BB;
45 MCInst *Inst;
46 DataU(BinaryBasicBlock *BB) : BB(BB) {}
47 DataU(MCInst *Inst) : Inst(Inst) {}
48 } Data;
49
50public:
51 ProgramPoint() : ID(IDTy::BB), Data((MCInst *)nullptr) {}
52 ProgramPoint(BinaryBasicBlock *BB) : ID(IDTy::BB), Data(BB) {}
53 ProgramPoint(MCInst *Inst) : ID(IDTy::Inst), Data(Inst) {}
54
55 /// Convenience function to access the last program point of a basic block,
56 /// which is equal to its last instruction. If it is empty, it is equal to
57 /// itself.
58 static ProgramPoint getLastPointAt(BinaryBasicBlock &BB) {
59 auto Last = BB.rbegin();
60 if (Last != BB.rend())
61 return ProgramPoint(&*Last);
62 return ProgramPoint(&BB);
63 }
64
65 /// Similar to getLastPointAt.
66 static ProgramPoint getFirstPointAt(BinaryBasicBlock &BB) {
67 auto First = BB.begin();
68 if (First != BB.end())
69 return ProgramPoint(&*First);
70 return ProgramPoint(&BB);
71 }
72
73 bool operator<(const ProgramPoint &PP) const { return Data.BB < PP.Data.BB; }
74 bool operator==(const ProgramPoint &PP) const {
75 return Data.BB == PP.Data.BB;
76 }
77
78 bool isBB() const { return ID == IDTy::BB; }
79 bool isInst() const { return ID == IDTy::Inst; }
80
81 BinaryBasicBlock *getBB() const {
82 assert(isBB());
83 return Data.BB;
84 }
85 MCInst *getInst() const {
86 assert(isInst());
87 return Data.Inst;
88 }
89
90 friend DenseMapInfo<ProgramPoint>;
91};
92
93/// Convenience function to operate on all predecessors of a BB, as viewed
94/// by a dataflow analysis. This includes throw sites if it is a landing pad.
95void doForAllPreds(const BinaryBasicBlock &BB,
96 std::function<void(ProgramPoint)> Task);
97
98/// Operates on all successors of a basic block.
99void doForAllSuccs(const BinaryBasicBlock &BB,
100 std::function<void(ProgramPoint)> Task);
101
102/// Default printer for State data.
103template <typename StateTy> class StatePrinter {
104public:
105 void print(raw_ostream &OS, const StateTy &State) const { OS << State; }
106 explicit StatePrinter(const BinaryContext &) {}
107};
108
109/// Printer for State data that is a BitVector of registers.
110class RegStatePrinter {
111public:
112 void print(raw_ostream &OS, const BitVector &State) const;
113 explicit RegStatePrinter(const BinaryContext &BC) : BC(BC) {}
114
115private:
116 const BinaryContext &BC;
117};
118
119/// Base class for dataflow analyses. Depends on the type of whatever object is
120/// stored as the state (StateTy) at each program point. The dataflow then
121/// updates the state at each program point depending on the instruction being
122/// processed, iterating until all points converge and agree on a state value.
123/// Remember that depending on how you formulate your dataflow equation, this
124/// may not converge and will loop indefinitely.
125/// /p Backward indicates the direction of the dataflow. If false, direction is
126/// forward.
127///
128/// Example: Compute the set of live registers at each program point.
129///
130/// Modelling:
131/// Let State be the set of registers that are live. The kill set of a
132/// point is the set of all registers clobbered by the instruction at this
133/// program point. The gen set is the set of all registers read by it.
134///
135/// out{b} = Union (s E succs{b}) {in{s}}
136/// in{b} = (out{b} - kill{b}) U gen{b}
137///
138/// Template parameters:
139/// StateTy = BitVector, where each index corresponds to a machine register
140/// Backward = true (live reg operates in reverse order)
141///
142/// Subclass implementation notes:
143/// Confluence operator = union (if a reg is alive in any succ, it is alive
144/// in the current block).
145///
146template <typename Derived, typename StateTy, bool Backward = false,
147 typename StatePrinterTy = StatePrinter<StateTy>>
148class DataflowAnalysis {
149 /// CRTP convenience methods
150 Derived &derived() { return *static_cast<Derived *>(this); }
151
152 const Derived &const_derived() const {
153 return *static_cast<const Derived *>(this);
154 }
155
156 mutable std::optional<unsigned> AnnotationIndex;
157
158protected:
159 const BinaryContext &BC;
160 /// Reference to the function being analysed
161 BinaryFunction &Func;
162
163 /// The id of the annotation allocator to be used
164 MCPlusBuilder::AllocatorIdTy AllocatorId = 0;
165
166 /// Tracks the state at basic block start (end) if direction of the dataflow
167 /// is forward (backward).
168 std::unordered_map<const BinaryBasicBlock *, StateTy> StateAtBBEntry;
169 /// Map a point to its previous (succeeding) point if the direction of the
170 /// dataflow is forward (backward). This is used to support convenience
171 /// methods to access the resulting state before (after) a given instruction,
172 /// otherwise our clients need to keep "prev" pointers themselves.
173 DenseMap<const MCInst *, ProgramPoint> PrevPoint;
174
175 /// Perform any bookkeeping before dataflow starts
176 void preflight() { llvm_unreachable("Unimplemented method"); }
177
178 /// Sets initial state for each BB
179 StateTy getStartingStateAtBB(const BinaryBasicBlock &BB) {
180 llvm_unreachable("Unimplemented method");
181 }
182
183 /// Sets initial state for each instruction (out set)
184 StateTy getStartingStateAtPoint(const MCInst &Point) {
185 llvm_unreachable("Unimplemented method");
186 }
187
188 /// Computes the in set for the first instruction in a BB by applying the
189 /// confluence operator to the out sets of the last instruction of each pred
190 /// (in case of a backwards dataflow, we will operate on the in sets of each
191 /// successor to determine the starting state of the last instruction of the
192 /// current BB)
193 void doConfluence(StateTy &StateOut, const StateTy &StateIn) {
194 llvm_unreachable("Unimplemented method");
195 }
196
197 /// In case of a forwards dataflow, compute the in set for the first
198 /// instruction in a Landing Pad considering all out sets for associated
199 /// throw sites.
200 /// In case of a backwards dataflow, compute the in set of a invoke
201 /// instruction considering in sets for the first instructions of its
202 /// landing pads.
203 void doConfluenceWithLP(StateTy &StateOut, const StateTy &StateIn,
204 const MCInst &Invoke) {
205 return derived().doConfluence(StateOut, StateIn);
206 }
207
208 /// Returns the out set of an instruction given its in set.
209 /// If backwards, computes the in set given its out set.
210 StateTy computeNext(const MCInst &Point, const StateTy &Cur) {
211 llvm_unreachable("Unimplemented method");
212 return StateTy();
213 }
214
215 /// Returns the MCAnnotation name
216 StringRef getAnnotationName() const {
217 llvm_unreachable("Unimplemented method");
218 return StringRef("");
219 }
220
221 unsigned getAnnotationIndex() const {
222 if (AnnotationIndex)
223 return *AnnotationIndex;
224 AnnotationIndex =
225 BC.MIB->getOrCreateAnnotationIndex(Name: const_derived().getAnnotationName());
226 return *AnnotationIndex;
227 }
228
229 /// Private getter methods accessing state in a read-write fashion
230 StateTy &getOrCreateStateAt(const BinaryBasicBlock &BB) {
231 return StateAtBBEntry[&BB];
232 }
233
234 StateTy &getOrCreateStateAt(MCInst &Point) {
235 return BC.MIB->getOrCreateAnnotationAs<StateTy>(
236 Point, derived().getAnnotationIndex(), AllocatorId);
237 }
238
239 StateTy &getOrCreateStateAt(ProgramPoint Point) {
240 if (Point.isBB())
241 return getOrCreateStateAt(*Point.getBB());
242 return getOrCreateStateAt(*Point.getInst());
243 }
244
245public:
246 /// Return the allocator id
247 unsigned getAllocatorId() { return AllocatorId; }
248
249 /// If the direction of the dataflow is forward, operates on the last
250 /// instruction of all predecessors when performing an iteration of the
251 /// dataflow equation for the start of this BB. If backwards, operates on
252 /// the first instruction of all successors.
253 void doForAllSuccsOrPreds(const BinaryBasicBlock &BB,
254 std::function<void(ProgramPoint)> Task) {
255 if (!Backward)
256 return doForAllPreds(BB, Task);
257 return doForAllSuccs(BB, Task);
258 }
259
260 /// We need the current binary context and the function that will be processed
261 /// in this dataflow analysis.
262 DataflowAnalysis(BinaryFunction &BF,
263 MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
264 : BC(BF.getBinaryContext()), Func(BF), AllocatorId(AllocatorId) {}
265
266 virtual ~DataflowAnalysis() { cleanAnnotations(); }
267
268 /// Track the state at basic block start (end) if direction of the dataflow
269 /// is forward (backward).
270 ErrorOr<const StateTy &> getStateAt(const BinaryBasicBlock &BB) const {
271 auto Iter = StateAtBBEntry.find(&BB);
272 if (Iter == StateAtBBEntry.end())
273 return make_error_code(E: errc::result_out_of_range);
274 return Iter->second;
275 }
276
277 /// Track the state at the end (start) of each MCInst in this function if
278 /// the direction of the dataflow is forward (backward).
279 ErrorOr<const StateTy &> getStateAt(const MCInst &Point) const {
280 return BC.MIB->tryGetAnnotationAs<StateTy>(
281 Point, const_derived().getAnnotationIndex());
282 }
283
284 /// Return the out set (in set) of a given program point if the direction of
285 /// the dataflow is forward (backward).
286 ErrorOr<const StateTy &> getStateAt(ProgramPoint Point) const {
287 if (Point.isBB())
288 return getStateAt(*Point.getBB());
289 return getStateAt(*Point.getInst());
290 }
291
292 /// Relies on a ptr map to fetch the previous instruction and then retrieve
293 /// state. WARNING: Watch out for invalidated pointers. Do not use this
294 /// function if you invalidated pointers after the analysis has been completed
295 ErrorOr<const StateTy &> getStateBefore(const MCInst &Point) {
296 return getStateAt(PrevPoint[&Point]);
297 }
298
299 ErrorOr<const StateTy &> getStateBefore(ProgramPoint Point) {
300 if (Point.isBB())
301 return getStateAt(*Point.getBB());
302 return getStateAt(PrevPoint[Point.getInst()]);
303 }
304
305 /// Remove any state annotations left by this analysis
306 void cleanAnnotations() {
307 for (BinaryBasicBlock &BB : Func) {
308 for (MCInst &Inst : BB) {
309 BC.MIB->removeAnnotation(Inst, derived().getAnnotationIndex());
310 }
311 }
312 }
313
314 /// Public entry point that will perform the entire analysis form start to
315 /// end.
316 void run() {
317 derived().preflight();
318
319 if (Func.begin() == Func.end())
320 return;
321 // Initialize state for all points of the function
322 for (BinaryBasicBlock &BB : Func) {
323 StateTy &St = getOrCreateStateAt(BB);
324 St = derived().getStartingStateAtBB(BB);
325 for (MCInst &Inst : BB) {
326 StateTy &St = getOrCreateStateAt(Inst);
327 St = derived().getStartingStateAtPoint(Inst);
328 }
329 }
330
331 std::queue<BinaryBasicBlock *> Worklist;
332 // TODO: Pushing this in a DFS ordering will greatly speed up the dataflow
333 // performance.
334 if (!Backward) {
335 for (BinaryBasicBlock &BB : Func) {
336 Worklist.push(x: &BB);
337 MCInst *Prev = nullptr;
338 for (MCInst &Inst : BB) {
339 PrevPoint[&Inst] = Prev ? ProgramPoint(Prev) : ProgramPoint(&BB);
340 Prev = &Inst;
341 }
342 }
343 } else {
344 for (BinaryBasicBlock &BB : llvm::reverse(C&: Func)) {
345 Worklist.push(x: &BB);
346 MCInst *Prev = nullptr;
347 for (MCInst &Inst : llvm::reverse(C&: BB)) {
348 PrevPoint[&Inst] = Prev ? ProgramPoint(Prev) : ProgramPoint(&BB);
349 Prev = &Inst;
350 }
351 }
352 }
353
354 // Main dataflow loop
355 while (!Worklist.empty()) {
356 BinaryBasicBlock *BB = Worklist.front();
357 Worklist.pop();
358
359 // Calculate state at the entry of first instruction in BB
360 StateTy StateAtEntry = getOrCreateStateAt(*BB);
361 if (BB->isLandingPad()) {
362 doForAllSuccsOrPreds(BB: *BB, Task: [&](ProgramPoint P) {
363 if (P.isInst() && BC.MIB->isInvoke(Inst: *P.getInst()))
364 derived().doConfluenceWithLP(StateAtEntry, *getStateAt(P),
365 *P.getInst());
366 else
367 derived().doConfluence(StateAtEntry, *getStateAt(P));
368 });
369 } else {
370 doForAllSuccsOrPreds(BB: *BB, Task: [&](ProgramPoint P) {
371 derived().doConfluence(StateAtEntry, *getStateAt(P));
372 });
373 }
374
375 bool Changed = false;
376 StateTy &St = getOrCreateStateAt(*BB);
377 if (St != StateAtEntry) {
378 Changed = true;
379 St = std::move(StateAtEntry);
380 }
381
382 // Propagate information from first instruction down to the last one
383 StateTy *PrevState = &St;
384 const MCInst *LAST = nullptr;
385 if (!Backward)
386 LAST = &*BB->rbegin();
387 else
388 LAST = &*BB->begin();
389
390 auto doNext = [&](MCInst &Inst, const BinaryBasicBlock &BB) {
391 StateTy CurState = derived().computeNext(Inst, *PrevState);
392
393 if (Backward && BC.MIB->isInvoke(Inst)) {
394 BinaryBasicBlock *LBB = Func.getLandingPadBBFor(BB, InvokeInst: Inst);
395 if (LBB) {
396 auto First = LBB->begin();
397 if (First != LBB->end())
398 derived().doConfluenceWithLP(CurState,
399 getOrCreateStateAt(&*First), Inst);
400 else
401 derived().doConfluenceWithLP(CurState, getOrCreateStateAt(LBB),
402 Inst);
403 }
404 }
405
406 StateTy &St = getOrCreateStateAt(Inst);
407 if (St != CurState) {
408 St = CurState;
409 if (&Inst == LAST)
410 Changed = true;
411 }
412 PrevState = &St;
413 };
414
415 if (!Backward)
416 for (MCInst &Inst : *BB)
417 doNext(Inst, *BB);
418 else
419 for (MCInst &Inst : llvm::reverse(C&: *BB))
420 doNext(Inst, *BB);
421
422 if (Changed) {
423 if (!Backward) {
424 for (BinaryBasicBlock *Succ : BB->successors())
425 Worklist.push(x: Succ);
426 for (BinaryBasicBlock *LandingPad : BB->landing_pads())
427 Worklist.push(x: LandingPad);
428 } else {
429 for (BinaryBasicBlock *Pred : BB->predecessors())
430 Worklist.push(x: Pred);
431 for (BinaryBasicBlock *Thrower : BB->throwers())
432 Worklist.push(x: Thrower);
433 }
434 }
435 } // end while (!Worklist.empty())
436 }
437};
438
439/// Define an iterator for navigating the expressions calculated by a
440/// dataflow analysis at each program point, when they are backed by a
441/// BitVector.
442class ExprIterator {
443 const BitVector *BV;
444 const std::vector<MCInst *> &Expressions;
445 int Idx;
446
447public:
448 using iterator_category = std::forward_iterator_tag;
449 using value_type = const MCInst *;
450 using difference_type = std::ptrdiff_t;
451 using pointer = value_type *;
452 using reference = value_type &;
453
454 ExprIterator &operator++() {
455 assert(Idx != -1 && "Iterator already at the end");
456 Idx = BV->find_next(Prev: Idx);
457 return *this;
458 }
459 ExprIterator operator++(int) {
460 assert(Idx != -1 && "Iterator already at the end");
461 ExprIterator Ret = *this;
462 ++(*this);
463 return Ret;
464 }
465 bool operator==(const ExprIterator &Other) const { return Idx == Other.Idx; }
466 bool operator!=(const ExprIterator &Other) const { return Idx != Other.Idx; }
467 MCInst *operator*() {
468 assert(Idx != -1 && "Invalid access to end iterator");
469 return Expressions[Idx];
470 }
471 ExprIterator(const BitVector *BV, const std::vector<MCInst *> &Exprs)
472 : BV(BV), Expressions(Exprs) {
473 Idx = BV->find_first();
474 }
475 ExprIterator(const BitVector *BV, const std::vector<MCInst *> &Exprs, int Idx)
476 : BV(BV), Expressions(Exprs), Idx(Idx) {}
477
478 int getBitVectorIndex() const { return Idx; }
479};
480
481/// Specialization of DataflowAnalysis whose state specifically stores
482/// a set of instructions.
483template <typename Derived, bool Backward = false,
484 typename StatePrinterTy = StatePrinter<BitVector>>
485class InstrsDataflowAnalysis
486 : public DataflowAnalysis<Derived, BitVector, Backward, StatePrinterTy> {
487public:
488 /// These iterator functions offer access to the set of pointers to
489 /// instructions in a given program point
490 template <typename T> ExprIterator expr_begin(const T &Point) const {
491 if (auto State = this->getStateAt(Point))
492 return ExprIterator(&*State, Expressions);
493 return expr_end();
494 }
495 ExprIterator expr_begin(const BitVector &BV) const {
496 return ExprIterator(&BV, Expressions);
497 }
498 ExprIterator expr_end() const {
499 return ExprIterator(nullptr, Expressions, -1);
500 }
501
502 /// Used to size the set of expressions/definitions being tracked by the
503 /// dataflow analysis
504 uint64_t NumInstrs{0};
505 /// We put every MCInst we want to track (which one representing an
506 /// expression/def) into a vector because we need to associate them with
507 /// small numbers. They will be tracked via BitVectors throughout the
508 /// dataflow analysis.
509 std::vector<MCInst *> Expressions;
510 /// Maps expressions defs (MCInsts) to its index in the Expressions vector
511 std::unordered_map<const MCInst *, uint64_t> ExprToIdx;
512
513 /// Return whether \p Expr is in the state set at \p Point
514 bool count(ProgramPoint Point, const MCInst &Expr) const {
515 auto IdxIter = ExprToIdx.find(x: &Expr);
516 assert(IdxIter != ExprToIdx.end() && "Invalid Expr");
517 return (*this->getStateAt(Point))[IdxIter->second];
518 }
519
520 bool count(const MCInst &Point, const MCInst &Expr) const {
521 auto IdxIter = ExprToIdx.find(x: &Expr);
522 assert(IdxIter != ExprToIdx.end() && "Invalid Expr");
523 return (*this->getStateAt(Point))[IdxIter->second];
524 }
525
526 /// Return whether \p Expr is in the state set at the instr of index
527 /// \p PointIdx
528 bool count(unsigned PointIdx, const MCInst &Expr) const {
529 return count(*Expressions[PointIdx], Expr);
530 }
531
532 InstrsDataflowAnalysis(BinaryFunction &BF,
533 MCPlusBuilder::AllocatorIdTy AllocId = 0)
534 : DataflowAnalysis<Derived, BitVector, Backward, StatePrinterTy>(
535 BF, AllocId) {}
536 virtual ~InstrsDataflowAnalysis() {}
537};
538
539} // namespace bolt
540
541/// DenseMapInfo allows us to use the DenseMap LLVM data structure to store
542/// ProgramPoints.
543template <> struct DenseMapInfo<bolt::ProgramPoint> {
544 static inline bolt::ProgramPoint getEmptyKey() {
545 uintptr_t Val = static_cast<uintptr_t>(-1);
546 Val <<= PointerLikeTypeTraits<MCInst *>::NumLowBitsAvailable;
547 return bolt::ProgramPoint(reinterpret_cast<MCInst *>(Val));
548 }
549 static inline bolt::ProgramPoint getTombstoneKey() {
550 uintptr_t Val = static_cast<uintptr_t>(-2);
551 Val <<= PointerLikeTypeTraits<MCInst *>::NumLowBitsAvailable;
552 return bolt::ProgramPoint(reinterpret_cast<MCInst *>(Val));
553 }
554 static unsigned getHashValue(const bolt::ProgramPoint &PP) {
555 return (unsigned((uintptr_t)PP.Data.BB) >> 4) ^
556 (unsigned((uintptr_t)PP.Data.BB) >> 9);
557 }
558 static bool isEqual(const bolt::ProgramPoint &LHS,
559 const bolt::ProgramPoint &RHS) {
560 return LHS.Data.BB == RHS.Data.BB;
561 }
562};
563
564raw_ostream &operator<<(raw_ostream &OS, const BitVector &Val);
565
566} // namespace llvm
567
568#endif
569

source code of bolt/include/bolt/Passes/DataflowAnalysis.h