1 | //===- DataFlowFramework.h - A generic framework for data-flow analysis ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a generic framework for writing data-flow analysis in MLIR. |
10 | // The framework consists of a solver, which runs the fixed-point iteration and |
11 | // manages analysis dependencies, and a data-flow analysis class used to |
12 | // implement specific analyses. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H |
17 | #define MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H |
18 | |
19 | #include "mlir/IR/Operation.h" |
20 | #include "mlir/Support/StorageUniquer.h" |
21 | #include "llvm/ADT/SetVector.h" |
22 | #include "llvm/Support/Compiler.h" |
23 | #include "llvm/Support/TypeName.h" |
24 | #include <queue> |
25 | |
26 | namespace mlir { |
27 | |
28 | //===----------------------------------------------------------------------===// |
29 | // ChangeResult |
30 | //===----------------------------------------------------------------------===// |
31 | |
32 | /// A result type used to indicate if a change happened. Boolean operations on |
33 | /// ChangeResult behave as though `Change` is truth. |
34 | enum class [[nodiscard]] ChangeResult { |
35 | NoChange, |
36 | Change, |
37 | }; |
38 | inline ChangeResult operator|(ChangeResult lhs, ChangeResult rhs) { |
39 | return lhs == ChangeResult::Change ? lhs : rhs; |
40 | } |
41 | inline ChangeResult &operator|=(ChangeResult &lhs, ChangeResult rhs) { |
42 | lhs = lhs | rhs; |
43 | return lhs; |
44 | } |
45 | inline ChangeResult operator&(ChangeResult lhs, ChangeResult rhs) { |
46 | return lhs == ChangeResult::NoChange ? lhs : rhs; |
47 | } |
48 | |
49 | /// Forward declare the analysis state class. |
50 | class AnalysisState; |
51 | |
52 | //===----------------------------------------------------------------------===// |
53 | // GenericProgramPoint |
54 | //===----------------------------------------------------------------------===// |
55 | |
56 | /// Abstract class for generic program points. In classical data-flow analysis, |
57 | /// programs points represent positions in a program to which lattice elements |
58 | /// are attached. In sparse data-flow analysis, these can be SSA values, and in |
59 | /// dense data-flow analysis, these are the program points before and after |
60 | /// every operation. |
61 | /// |
62 | /// In the general MLIR data-flow analysis framework, program points are an |
63 | /// extensible concept. Program points are uniquely identifiable objects to |
64 | /// which analysis states can be attached. The semantics of program points are |
65 | /// defined by the analyses that specify their transfer functions. |
66 | /// |
67 | /// Program points are implemented using MLIR's storage uniquer framework and |
68 | /// type ID system to provide RTTI. |
69 | class GenericProgramPoint : public StorageUniquer::BaseStorage { |
70 | public: |
71 | virtual ~GenericProgramPoint(); |
72 | |
73 | /// Get the abstract program point's type identifier. |
74 | TypeID getTypeID() const { return typeID; } |
75 | |
76 | /// Get a derived source location for the program point. |
77 | virtual Location getLoc() const = 0; |
78 | |
79 | /// Print the program point. |
80 | virtual void print(raw_ostream &os) const = 0; |
81 | |
82 | protected: |
83 | /// Create an abstract program point with type identifier. |
84 | explicit GenericProgramPoint(TypeID typeID) : typeID(typeID) {} |
85 | |
86 | private: |
87 | /// The type identifier of the program point. |
88 | TypeID typeID; |
89 | }; |
90 | |
91 | //===----------------------------------------------------------------------===// |
92 | // GenericProgramPointBase |
93 | //===----------------------------------------------------------------------===// |
94 | |
95 | /// Base class for generic program points based on a concrete program point |
96 | /// type and a content key. This class defines the common methods required for |
97 | /// operability with the storage uniquer framework. |
98 | /// |
99 | /// The provided key type uniquely identifies the concrete program point |
100 | /// instance and are the data members of the class. |
101 | template <typename ConcreteT, typename Value> |
102 | class GenericProgramPointBase : public GenericProgramPoint { |
103 | public: |
104 | /// The concrete key type used by the storage uniquer. This class is uniqued |
105 | /// by its contents. |
106 | using KeyTy = Value; |
107 | /// Alias for the base class. |
108 | using Base = GenericProgramPointBase<ConcreteT, Value>; |
109 | |
110 | /// Construct an instance of the program point using the provided value and |
111 | /// the type ID of the concrete type. |
112 | template <typename ValueT> |
113 | explicit GenericProgramPointBase(ValueT &&value) |
114 | : GenericProgramPoint(TypeID::get<ConcreteT>()), |
115 | value(std::forward<ValueT>(value)) {} |
116 | |
117 | /// Get a uniqued instance of this program point class with the given |
118 | /// arguments. |
119 | template <typename... Args> |
120 | static ConcreteT *get(StorageUniquer &uniquer, Args &&...args) { |
121 | return uniquer.get<ConcreteT>(/*initFn=*/{}, std::forward<Args>(args)...); |
122 | } |
123 | |
124 | /// Allocate space for a program point and construct it in-place. |
125 | template <typename ValueT> |
126 | static ConcreteT *construct(StorageUniquer::StorageAllocator &alloc, |
127 | ValueT &&value) { |
128 | return new (alloc.allocate<ConcreteT>()) |
129 | ConcreteT(std::forward<ValueT>(value)); |
130 | } |
131 | |
132 | /// Two program points are equal if their values are equal. |
133 | bool operator==(const Value &value) const { return this->value == value; } |
134 | |
135 | /// Provide LLVM-style RTTI using type IDs. |
136 | static bool classof(const GenericProgramPoint *point) { |
137 | return point->getTypeID() == TypeID::get<ConcreteT>(); |
138 | } |
139 | |
140 | /// Get the contents of the program point. |
141 | const Value &getValue() const { return value; } |
142 | |
143 | private: |
144 | /// The program point value. |
145 | Value value; |
146 | }; |
147 | |
148 | //===----------------------------------------------------------------------===// |
149 | // ProgramPoint |
150 | //===----------------------------------------------------------------------===// |
151 | |
152 | /// Fundamental IR components are supported as first-class program points. |
153 | struct ProgramPoint |
154 | : public PointerUnion<GenericProgramPoint *, Operation *, Value, Block *> { |
155 | using ParentTy = |
156 | PointerUnion<GenericProgramPoint *, Operation *, Value, Block *>; |
157 | /// Inherit constructors. |
158 | using ParentTy::PointerUnion; |
159 | /// Allow implicit conversion from the parent type. |
160 | ProgramPoint(ParentTy point = nullptr) : ParentTy(point) {} |
161 | /// Allow implicit conversions from operation wrappers. |
162 | /// TODO: For Windows only. Find a better solution. |
163 | template <typename OpT, typename = std::enable_if_t< |
164 | std::is_convertible<OpT, Operation *>::value && |
165 | !std::is_same<OpT, Operation *>::value>> |
166 | ProgramPoint(OpT op) : ParentTy(op) {} |
167 | |
168 | /// Print the program point. |
169 | void print(raw_ostream &os) const; |
170 | |
171 | /// Get the source location of the program point. |
172 | Location getLoc() const; |
173 | }; |
174 | |
175 | /// Forward declaration of the data-flow analysis class. |
176 | class DataFlowAnalysis; |
177 | |
178 | //===----------------------------------------------------------------------===// |
179 | // DataFlowConfig |
180 | //===----------------------------------------------------------------------===// |
181 | |
182 | /// Configuration class for data flow solver and child analyses. Follows the |
183 | /// fluent API pattern. |
184 | class DataFlowConfig { |
185 | public: |
186 | DataFlowConfig() = default; |
187 | |
188 | /// Set whether the solver should operate interpocedurally, i.e. enter the |
189 | /// callee body when available. Interprocedural analyses may be more precise, |
190 | /// but also more expensive as more states need to be computed and the |
191 | /// fixpoint convergence takes longer. |
192 | DataFlowConfig &setInterprocedural(bool enable) { |
193 | interprocedural = enable; |
194 | return *this; |
195 | } |
196 | |
197 | /// Return `true` if the solver operates interprocedurally, `false` otherwise. |
198 | bool isInterprocedural() const { return interprocedural; } |
199 | |
200 | private: |
201 | bool interprocedural = true; |
202 | }; |
203 | |
204 | //===----------------------------------------------------------------------===// |
205 | // DataFlowSolver |
206 | //===----------------------------------------------------------------------===// |
207 | |
208 | /// The general data-flow analysis solver. This class is responsible for |
209 | /// orchestrating child data-flow analyses, running the fixed-point iteration |
210 | /// algorithm, managing analysis state and program point memory, and tracking |
211 | /// dependencies between analyses, program points, and analysis states. |
212 | /// |
213 | /// Steps to run a data-flow analysis: |
214 | /// |
215 | /// 1. Load and initialize children analyses. Children analyses are instantiated |
216 | /// in the solver and initialized, building their dependency relations. |
217 | /// 2. Configure and run the analysis. The solver invokes the children analyses |
218 | /// according to their dependency relations until a fixed point is reached. |
219 | /// 3. Query analysis state results from the solver. |
220 | /// |
221 | /// TODO: Optimize the internal implementation of the solver. |
222 | class DataFlowSolver { |
223 | public: |
224 | explicit DataFlowSolver(const DataFlowConfig &config = DataFlowConfig()) |
225 | : config(config) {} |
226 | |
227 | /// Load an analysis into the solver. Return the analysis instance. |
228 | template <typename AnalysisT, typename... Args> |
229 | AnalysisT *load(Args &&...args); |
230 | |
231 | /// Initialize the children analyses starting from the provided top-level |
232 | /// operation and run the analysis until fixpoint. |
233 | LogicalResult initializeAndRun(Operation *top); |
234 | |
235 | /// Lookup an analysis state for the given program point. Returns null if one |
236 | /// does not exist. |
237 | template <typename StateT, typename PointT> |
238 | const StateT *lookupState(PointT point) const { |
239 | auto it = analysisStates.find({ProgramPoint(point), TypeID::get<StateT>()}); |
240 | if (it == analysisStates.end()) |
241 | return nullptr; |
242 | return static_cast<const StateT *>(it->second.get()); |
243 | } |
244 | |
245 | /// Get a uniqued program point instance. If one is not present, it is |
246 | /// created with the provided arguments. |
247 | template <typename PointT, typename... Args> |
248 | PointT *getProgramPoint(Args &&...args) { |
249 | return PointT::get(uniquer, std::forward<Args>(args)...); |
250 | } |
251 | |
252 | /// A work item on the solver queue is a program point, child analysis pair. |
253 | /// Each item is processed by invoking the child analysis at the program |
254 | /// point. |
255 | using WorkItem = std::pair<ProgramPoint, DataFlowAnalysis *>; |
256 | /// Push a work item onto the worklist. |
257 | void enqueue(WorkItem item) { worklist.push(x: std::move(item)); } |
258 | |
259 | /// Get the state associated with the given program point. If it does not |
260 | /// exist, create an uninitialized state. |
261 | template <typename StateT, typename PointT> |
262 | StateT *getOrCreateState(PointT point); |
263 | |
264 | /// Propagate an update to an analysis state if it changed by pushing |
265 | /// dependent work items to the back of the queue. |
266 | void propagateIfChanged(AnalysisState *state, ChangeResult changed); |
267 | |
268 | /// Get the configuration of the solver. |
269 | const DataFlowConfig &getConfig() const { return config; } |
270 | |
271 | private: |
272 | /// Configuration of the dataflow solver. |
273 | DataFlowConfig config; |
274 | |
275 | /// The solver's work queue. Work items can be inserted to the front of the |
276 | /// queue to be processed greedily, speeding up computations that otherwise |
277 | /// quickly degenerate to quadratic due to propagation of state updates. |
278 | std::queue<WorkItem> worklist; |
279 | |
280 | /// Type-erased instances of the children analyses. |
281 | SmallVector<std::unique_ptr<DataFlowAnalysis>> childAnalyses; |
282 | |
283 | /// The storage uniquer instance that owns the memory of the allocated program |
284 | /// points. |
285 | StorageUniquer uniquer; |
286 | |
287 | /// A type-erased map of program points to associated analysis states for |
288 | /// first-class program points. |
289 | DenseMap<std::pair<ProgramPoint, TypeID>, std::unique_ptr<AnalysisState>> |
290 | analysisStates; |
291 | |
292 | /// Allow the base child analysis class to access the internals of the solver. |
293 | friend class DataFlowAnalysis; |
294 | }; |
295 | |
296 | //===----------------------------------------------------------------------===// |
297 | // AnalysisState |
298 | //===----------------------------------------------------------------------===// |
299 | |
300 | /// Base class for generic analysis states. Analysis states contain data-flow |
301 | /// information that are attached to program points and which evolve as the |
302 | /// analysis iterates. |
303 | /// |
304 | /// This class places no restrictions on the semantics of analysis states beyond |
305 | /// these requirements. |
306 | /// |
307 | /// 1. Querying the state of a program point prior to visiting that point |
308 | /// results in uninitialized state. Analyses must be aware of unintialized |
309 | /// states. |
310 | /// 2. Analysis states can reach fixpoints, where subsequent updates will never |
311 | /// trigger a change in the state. |
312 | /// 3. Analysis states that are uninitialized can be forcefully initialized to a |
313 | /// default value. |
314 | class AnalysisState { |
315 | public: |
316 | virtual ~AnalysisState(); |
317 | |
318 | /// Create the analysis state at the given program point. |
319 | AnalysisState(ProgramPoint point) : point(point) {} |
320 | |
321 | /// Returns the program point this state is located at. |
322 | ProgramPoint getPoint() const { return point; } |
323 | |
324 | /// Print the contents of the analysis state. |
325 | virtual void print(raw_ostream &os) const = 0; |
326 | LLVM_DUMP_METHOD void dump() const; |
327 | |
328 | /// Add a dependency to this analysis state on a program point and an |
329 | /// analysis. If this state is updated, the analysis will be invoked on the |
330 | /// given program point again (in onUpdate()). |
331 | void addDependency(ProgramPoint dependent, DataFlowAnalysis *analysis); |
332 | |
333 | protected: |
334 | /// This function is called by the solver when the analysis state is updated |
335 | /// to enqueue more work items. For example, if a state tracks dependents |
336 | /// through the IR (e.g. use-def chains), this function can be implemented to |
337 | /// push those dependents on the worklist. |
338 | virtual void onUpdate(DataFlowSolver *solver) const { |
339 | for (const DataFlowSolver::WorkItem &item : dependents) |
340 | solver->enqueue(item); |
341 | } |
342 | |
343 | /// The program point to which the state belongs. |
344 | ProgramPoint point; |
345 | |
346 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
347 | /// When compiling with debugging, keep a name for the analysis state. |
348 | StringRef debugName; |
349 | #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS |
350 | |
351 | private: |
352 | /// The dependency relations originating from this analysis state. An entry |
353 | /// `state -> (analysis, point)` is created when `analysis` queries `state` |
354 | /// when updating `point`. |
355 | /// |
356 | /// When this state is updated, all dependent child analysis invocations are |
357 | /// pushed to the back of the queue. Use a `SetVector` to keep the analysis |
358 | /// deterministic. |
359 | /// |
360 | /// Store the dependents on the analysis state for efficiency. |
361 | SetVector<DataFlowSolver::WorkItem> dependents; |
362 | |
363 | /// Allow the framework to access the dependents. |
364 | friend class DataFlowSolver; |
365 | }; |
366 | |
367 | //===----------------------------------------------------------------------===// |
368 | // DataFlowAnalysis |
369 | //===----------------------------------------------------------------------===// |
370 | |
371 | /// Base class for all data-flow analyses. A child analysis is expected to build |
372 | /// an initial dependency graph (and optionally provide an initial state) when |
373 | /// initialized and define transfer functions when visiting program points. |
374 | /// |
375 | /// In classical data-flow analysis, the dependency graph is fixed and analyses |
376 | /// define explicit transfer functions between input states and output states. |
377 | /// In this framework, however, the dependency graph can change during the |
378 | /// analysis, and transfer functions are opaque such that the solver doesn't |
379 | /// know what states calling `visit` on an analysis will be updated. This allows |
380 | /// multiple analyses to plug in and provide values for the same state. |
381 | /// |
382 | /// Generally, when an analysis queries an uninitialized state, it is expected |
383 | /// to "bail out", i.e., not provide any updates. When the value is initialized, |
384 | /// the solver will re-invoke the analysis. If the solver exhausts its worklist, |
385 | /// however, and there are still uninitialized states, the solver "nudges" the |
386 | /// analyses by default-initializing those states. |
387 | class DataFlowAnalysis { |
388 | public: |
389 | virtual ~DataFlowAnalysis(); |
390 | |
391 | /// Create an analysis with a reference to the parent solver. |
392 | explicit DataFlowAnalysis(DataFlowSolver &solver); |
393 | |
394 | /// Initialize the analysis from the provided top-level operation by building |
395 | /// an initial dependency graph between all program points of interest. This |
396 | /// can be implemented by calling `visit` on all program points of interest |
397 | /// below the top-level operation. |
398 | /// |
399 | /// An analysis can optionally provide initial values to certain analysis |
400 | /// states to influence the evolution of the analysis. |
401 | virtual LogicalResult initialize(Operation *top) = 0; |
402 | |
403 | /// Visit the given program point. This function is invoked by the solver on |
404 | /// this analysis with a given program point when a dependent analysis state |
405 | /// is updated. The function is similar to a transfer function; it queries |
406 | /// certain analysis states and sets other states. |
407 | /// |
408 | /// The function is expected to create dependencies on queried states and |
409 | /// propagate updates on changed states. A dependency can be created by |
410 | /// calling `addDependency` between the input state and a program point, |
411 | /// indicating that, if the state is updated, the solver should invoke `solve` |
412 | /// on the program point. The dependent point does not have to be the same as |
413 | /// the provided point. An update to a state is propagated by calling |
414 | /// `propagateIfChange` on the state. If the state has changed, then all its |
415 | /// dependents are placed on the worklist. |
416 | /// |
417 | /// The dependency graph does not need to be static. Each invocation of |
418 | /// `visit` can add new dependencies, but these dependencies will not be |
419 | /// dynamically added to the worklist because the solver doesn't know what |
420 | /// will provide a value for then. |
421 | virtual LogicalResult visit(ProgramPoint point) = 0; |
422 | |
423 | protected: |
424 | /// Create a dependency between the given analysis state and program point |
425 | /// on this analysis. |
426 | void addDependency(AnalysisState *state, ProgramPoint point); |
427 | |
428 | /// Propagate an update to a state if it changed. |
429 | void propagateIfChanged(AnalysisState *state, ChangeResult changed); |
430 | |
431 | /// Register a custom program point class. |
432 | template <typename PointT> |
433 | void registerPointKind() { |
434 | solver.uniquer.registerParametricStorageType<PointT>(); |
435 | } |
436 | |
437 | /// Get or create a custom program point. |
438 | template <typename PointT, typename... Args> |
439 | PointT *getProgramPoint(Args &&...args) { |
440 | return solver.getProgramPoint<PointT>(std::forward<Args>(args)...); |
441 | } |
442 | |
443 | /// Get the analysis state associated with the program point. The returned |
444 | /// state is expected to be "write-only", and any updates need to be |
445 | /// propagated by `propagateIfChanged`. |
446 | template <typename StateT, typename PointT> |
447 | StateT *getOrCreate(PointT point) { |
448 | return solver.getOrCreateState<StateT>(point); |
449 | } |
450 | |
451 | /// Get a read-only analysis state for the given point and create a dependency |
452 | /// on `dependent`. If the return state is updated elsewhere, this analysis is |
453 | /// re-invoked on the dependent. |
454 | template <typename StateT, typename PointT> |
455 | const StateT *getOrCreateFor(ProgramPoint dependent, PointT point) { |
456 | StateT *state = getOrCreate<StateT>(point); |
457 | addDependency(state, point: dependent); |
458 | return state; |
459 | } |
460 | |
461 | /// Return the configuration of the solver used for this analysis. |
462 | const DataFlowConfig &getSolverConfig() const { return solver.getConfig(); } |
463 | |
464 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
465 | /// When compiling with debugging, keep a name for the analyis. |
466 | StringRef debugName; |
467 | #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS |
468 | |
469 | private: |
470 | /// The parent data-flow solver. |
471 | DataFlowSolver &solver; |
472 | |
473 | /// Allow the data-flow solver to access the internals of this class. |
474 | friend class DataFlowSolver; |
475 | }; |
476 | |
477 | template <typename AnalysisT, typename... Args> |
478 | AnalysisT *DataFlowSolver::load(Args &&...args) { |
479 | childAnalyses.emplace_back(new AnalysisT(*this, std::forward<Args>(args)...)); |
480 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
481 | childAnalyses.back().get()->debugName = llvm::getTypeName<AnalysisT>(); |
482 | #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS |
483 | return static_cast<AnalysisT *>(childAnalyses.back().get()); |
484 | } |
485 | |
486 | template <typename StateT, typename PointT> |
487 | StateT *DataFlowSolver::getOrCreateState(PointT point) { |
488 | std::unique_ptr<AnalysisState> &state = |
489 | analysisStates[{ProgramPoint(point), TypeID::get<StateT>()}]; |
490 | if (!state) { |
491 | state = std::unique_ptr<StateT>(new StateT(point)); |
492 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
493 | state->debugName = llvm::getTypeName<StateT>(); |
494 | #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS |
495 | } |
496 | return static_cast<StateT *>(state.get()); |
497 | } |
498 | |
499 | inline raw_ostream &operator<<(raw_ostream &os, const AnalysisState &state) { |
500 | state.print(os); |
501 | return os; |
502 | } |
503 | |
504 | inline raw_ostream &operator<<(raw_ostream &os, ProgramPoint point) { |
505 | point.print(os); |
506 | return os; |
507 | } |
508 | |
509 | } // end namespace mlir |
510 | |
511 | namespace llvm { |
512 | /// Allow hashing of program points. |
513 | template <> |
514 | struct DenseMapInfo<mlir::ProgramPoint> |
515 | : public DenseMapInfo<mlir::ProgramPoint::ParentTy> {}; |
516 | |
517 | // Allow llvm::cast style functions. |
518 | template <typename To> |
519 | struct CastInfo<To, mlir::ProgramPoint> |
520 | : public CastInfo<To, mlir::ProgramPoint::PointerUnion> {}; |
521 | |
522 | template <typename To> |
523 | struct CastInfo<To, const mlir::ProgramPoint> |
524 | : public CastInfo<To, const mlir::ProgramPoint::PointerUnion> {}; |
525 | |
526 | } // end namespace llvm |
527 | |
528 | #endif // MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H |
529 | |