1//===- DataFlowFramework.h - A generic framework for data-flow analysis ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a generic framework for writing data-flow analysis in MLIR.
10// The framework consists of a solver, which runs the fixed-point iteration and
11// manages analysis dependencies, and a data-flow analysis class used to
12// implement specific analyses.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
17#define MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
18
19#include "mlir/IR/Operation.h"
20#include "mlir/Support/StorageUniquer.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/TypeName.h"
24#include <queue>
25
26namespace mlir {
27
28//===----------------------------------------------------------------------===//
29// ChangeResult
30//===----------------------------------------------------------------------===//
31
32/// A result type used to indicate if a change happened. Boolean operations on
33/// ChangeResult behave as though `Change` is truth.
34enum class [[nodiscard]] ChangeResult {
35 NoChange,
36 Change,
37};
38inline ChangeResult operator|(ChangeResult lhs, ChangeResult rhs) {
39 return lhs == ChangeResult::Change ? lhs : rhs;
40}
41inline ChangeResult &operator|=(ChangeResult &lhs, ChangeResult rhs) {
42 lhs = lhs | rhs;
43 return lhs;
44}
45inline ChangeResult operator&(ChangeResult lhs, ChangeResult rhs) {
46 return lhs == ChangeResult::NoChange ? lhs : rhs;
47}
48
49/// Forward declare the analysis state class.
50class AnalysisState;
51
52//===----------------------------------------------------------------------===//
53// GenericProgramPoint
54//===----------------------------------------------------------------------===//
55
56/// Abstract class for generic program points. In classical data-flow analysis,
57/// programs points represent positions in a program to which lattice elements
58/// are attached. In sparse data-flow analysis, these can be SSA values, and in
59/// dense data-flow analysis, these are the program points before and after
60/// every operation.
61///
62/// In the general MLIR data-flow analysis framework, program points are an
63/// extensible concept. Program points are uniquely identifiable objects to
64/// which analysis states can be attached. The semantics of program points are
65/// defined by the analyses that specify their transfer functions.
66///
67/// Program points are implemented using MLIR's storage uniquer framework and
68/// type ID system to provide RTTI.
69class GenericProgramPoint : public StorageUniquer::BaseStorage {
70public:
71 virtual ~GenericProgramPoint();
72
73 /// Get the abstract program point's type identifier.
74 TypeID getTypeID() const { return typeID; }
75
76 /// Get a derived source location for the program point.
77 virtual Location getLoc() const = 0;
78
79 /// Print the program point.
80 virtual void print(raw_ostream &os) const = 0;
81
82protected:
83 /// Create an abstract program point with type identifier.
84 explicit GenericProgramPoint(TypeID typeID) : typeID(typeID) {}
85
86private:
87 /// The type identifier of the program point.
88 TypeID typeID;
89};
90
91//===----------------------------------------------------------------------===//
92// GenericProgramPointBase
93//===----------------------------------------------------------------------===//
94
95/// Base class for generic program points based on a concrete program point
96/// type and a content key. This class defines the common methods required for
97/// operability with the storage uniquer framework.
98///
99/// The provided key type uniquely identifies the concrete program point
100/// instance and are the data members of the class.
101template <typename ConcreteT, typename Value>
102class GenericProgramPointBase : public GenericProgramPoint {
103public:
104 /// The concrete key type used by the storage uniquer. This class is uniqued
105 /// by its contents.
106 using KeyTy = Value;
107 /// Alias for the base class.
108 using Base = GenericProgramPointBase<ConcreteT, Value>;
109
110 /// Construct an instance of the program point using the provided value and
111 /// the type ID of the concrete type.
112 template <typename ValueT>
113 explicit GenericProgramPointBase(ValueT &&value)
114 : GenericProgramPoint(TypeID::get<ConcreteT>()),
115 value(std::forward<ValueT>(value)) {}
116
117 /// Get a uniqued instance of this program point class with the given
118 /// arguments.
119 template <typename... Args>
120 static ConcreteT *get(StorageUniquer &uniquer, Args &&...args) {
121 return uniquer.get<ConcreteT>(/*initFn=*/{}, std::forward<Args>(args)...);
122 }
123
124 /// Allocate space for a program point and construct it in-place.
125 template <typename ValueT>
126 static ConcreteT *construct(StorageUniquer::StorageAllocator &alloc,
127 ValueT &&value) {
128 return new (alloc.allocate<ConcreteT>())
129 ConcreteT(std::forward<ValueT>(value));
130 }
131
132 /// Two program points are equal if their values are equal.
133 bool operator==(const Value &value) const { return this->value == value; }
134
135 /// Provide LLVM-style RTTI using type IDs.
136 static bool classof(const GenericProgramPoint *point) {
137 return point->getTypeID() == TypeID::get<ConcreteT>();
138 }
139
140 /// Get the contents of the program point.
141 const Value &getValue() const { return value; }
142
143private:
144 /// The program point value.
145 Value value;
146};
147
148//===----------------------------------------------------------------------===//
149// ProgramPoint
150//===----------------------------------------------------------------------===//
151
152/// Fundamental IR components are supported as first-class program points.
153struct ProgramPoint
154 : public PointerUnion<GenericProgramPoint *, Operation *, Value, Block *> {
155 using ParentTy =
156 PointerUnion<GenericProgramPoint *, Operation *, Value, Block *>;
157 /// Inherit constructors.
158 using ParentTy::PointerUnion;
159 /// Allow implicit conversion from the parent type.
160 ProgramPoint(ParentTy point = nullptr) : ParentTy(point) {}
161 /// Allow implicit conversions from operation wrappers.
162 /// TODO: For Windows only. Find a better solution.
163 template <typename OpT, typename = std::enable_if_t<
164 std::is_convertible<OpT, Operation *>::value &&
165 !std::is_same<OpT, Operation *>::value>>
166 ProgramPoint(OpT op) : ParentTy(op) {}
167
168 /// Print the program point.
169 void print(raw_ostream &os) const;
170
171 /// Get the source location of the program point.
172 Location getLoc() const;
173};
174
175/// Forward declaration of the data-flow analysis class.
176class DataFlowAnalysis;
177
178//===----------------------------------------------------------------------===//
179// DataFlowConfig
180//===----------------------------------------------------------------------===//
181
182/// Configuration class for data flow solver and child analyses. Follows the
183/// fluent API pattern.
184class DataFlowConfig {
185public:
186 DataFlowConfig() = default;
187
188 /// Set whether the solver should operate interpocedurally, i.e. enter the
189 /// callee body when available. Interprocedural analyses may be more precise,
190 /// but also more expensive as more states need to be computed and the
191 /// fixpoint convergence takes longer.
192 DataFlowConfig &setInterprocedural(bool enable) {
193 interprocedural = enable;
194 return *this;
195 }
196
197 /// Return `true` if the solver operates interprocedurally, `false` otherwise.
198 bool isInterprocedural() const { return interprocedural; }
199
200private:
201 bool interprocedural = true;
202};
203
204//===----------------------------------------------------------------------===//
205// DataFlowSolver
206//===----------------------------------------------------------------------===//
207
208/// The general data-flow analysis solver. This class is responsible for
209/// orchestrating child data-flow analyses, running the fixed-point iteration
210/// algorithm, managing analysis state and program point memory, and tracking
211/// dependencies between analyses, program points, and analysis states.
212///
213/// Steps to run a data-flow analysis:
214///
215/// 1. Load and initialize children analyses. Children analyses are instantiated
216/// in the solver and initialized, building their dependency relations.
217/// 2. Configure and run the analysis. The solver invokes the children analyses
218/// according to their dependency relations until a fixed point is reached.
219/// 3. Query analysis state results from the solver.
220///
221/// TODO: Optimize the internal implementation of the solver.
222class DataFlowSolver {
223public:
224 explicit DataFlowSolver(const DataFlowConfig &config = DataFlowConfig())
225 : config(config) {}
226
227 /// Load an analysis into the solver. Return the analysis instance.
228 template <typename AnalysisT, typename... Args>
229 AnalysisT *load(Args &&...args);
230
231 /// Initialize the children analyses starting from the provided top-level
232 /// operation and run the analysis until fixpoint.
233 LogicalResult initializeAndRun(Operation *top);
234
235 /// Lookup an analysis state for the given program point. Returns null if one
236 /// does not exist.
237 template <typename StateT, typename PointT>
238 const StateT *lookupState(PointT point) const {
239 auto it = analysisStates.find({ProgramPoint(point), TypeID::get<StateT>()});
240 if (it == analysisStates.end())
241 return nullptr;
242 return static_cast<const StateT *>(it->second.get());
243 }
244
245 /// Get a uniqued program point instance. If one is not present, it is
246 /// created with the provided arguments.
247 template <typename PointT, typename... Args>
248 PointT *getProgramPoint(Args &&...args) {
249 return PointT::get(uniquer, std::forward<Args>(args)...);
250 }
251
252 /// A work item on the solver queue is a program point, child analysis pair.
253 /// Each item is processed by invoking the child analysis at the program
254 /// point.
255 using WorkItem = std::pair<ProgramPoint, DataFlowAnalysis *>;
256 /// Push a work item onto the worklist.
257 void enqueue(WorkItem item) { worklist.push(x: std::move(item)); }
258
259 /// Get the state associated with the given program point. If it does not
260 /// exist, create an uninitialized state.
261 template <typename StateT, typename PointT>
262 StateT *getOrCreateState(PointT point);
263
264 /// Propagate an update to an analysis state if it changed by pushing
265 /// dependent work items to the back of the queue.
266 void propagateIfChanged(AnalysisState *state, ChangeResult changed);
267
268 /// Get the configuration of the solver.
269 const DataFlowConfig &getConfig() const { return config; }
270
271private:
272 /// Configuration of the dataflow solver.
273 DataFlowConfig config;
274
275 /// The solver's work queue. Work items can be inserted to the front of the
276 /// queue to be processed greedily, speeding up computations that otherwise
277 /// quickly degenerate to quadratic due to propagation of state updates.
278 std::queue<WorkItem> worklist;
279
280 /// Type-erased instances of the children analyses.
281 SmallVector<std::unique_ptr<DataFlowAnalysis>> childAnalyses;
282
283 /// The storage uniquer instance that owns the memory of the allocated program
284 /// points.
285 StorageUniquer uniquer;
286
287 /// A type-erased map of program points to associated analysis states for
288 /// first-class program points.
289 DenseMap<std::pair<ProgramPoint, TypeID>, std::unique_ptr<AnalysisState>>
290 analysisStates;
291
292 /// Allow the base child analysis class to access the internals of the solver.
293 friend class DataFlowAnalysis;
294};
295
296//===----------------------------------------------------------------------===//
297// AnalysisState
298//===----------------------------------------------------------------------===//
299
300/// Base class for generic analysis states. Analysis states contain data-flow
301/// information that are attached to program points and which evolve as the
302/// analysis iterates.
303///
304/// This class places no restrictions on the semantics of analysis states beyond
305/// these requirements.
306///
307/// 1. Querying the state of a program point prior to visiting that point
308/// results in uninitialized state. Analyses must be aware of unintialized
309/// states.
310/// 2. Analysis states can reach fixpoints, where subsequent updates will never
311/// trigger a change in the state.
312/// 3. Analysis states that are uninitialized can be forcefully initialized to a
313/// default value.
314class AnalysisState {
315public:
316 virtual ~AnalysisState();
317
318 /// Create the analysis state at the given program point.
319 AnalysisState(ProgramPoint point) : point(point) {}
320
321 /// Returns the program point this state is located at.
322 ProgramPoint getPoint() const { return point; }
323
324 /// Print the contents of the analysis state.
325 virtual void print(raw_ostream &os) const = 0;
326 LLVM_DUMP_METHOD void dump() const;
327
328 /// Add a dependency to this analysis state on a program point and an
329 /// analysis. If this state is updated, the analysis will be invoked on the
330 /// given program point again (in onUpdate()).
331 void addDependency(ProgramPoint dependent, DataFlowAnalysis *analysis);
332
333protected:
334 /// This function is called by the solver when the analysis state is updated
335 /// to enqueue more work items. For example, if a state tracks dependents
336 /// through the IR (e.g. use-def chains), this function can be implemented to
337 /// push those dependents on the worklist.
338 virtual void onUpdate(DataFlowSolver *solver) const {
339 for (const DataFlowSolver::WorkItem &item : dependents)
340 solver->enqueue(item);
341 }
342
343 /// The program point to which the state belongs.
344 ProgramPoint point;
345
346#if LLVM_ENABLE_ABI_BREAKING_CHECKS
347 /// When compiling with debugging, keep a name for the analysis state.
348 StringRef debugName;
349#endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
350
351private:
352 /// The dependency relations originating from this analysis state. An entry
353 /// `state -> (analysis, point)` is created when `analysis` queries `state`
354 /// when updating `point`.
355 ///
356 /// When this state is updated, all dependent child analysis invocations are
357 /// pushed to the back of the queue. Use a `SetVector` to keep the analysis
358 /// deterministic.
359 ///
360 /// Store the dependents on the analysis state for efficiency.
361 SetVector<DataFlowSolver::WorkItem> dependents;
362
363 /// Allow the framework to access the dependents.
364 friend class DataFlowSolver;
365};
366
367//===----------------------------------------------------------------------===//
368// DataFlowAnalysis
369//===----------------------------------------------------------------------===//
370
371/// Base class for all data-flow analyses. A child analysis is expected to build
372/// an initial dependency graph (and optionally provide an initial state) when
373/// initialized and define transfer functions when visiting program points.
374///
375/// In classical data-flow analysis, the dependency graph is fixed and analyses
376/// define explicit transfer functions between input states and output states.
377/// In this framework, however, the dependency graph can change during the
378/// analysis, and transfer functions are opaque such that the solver doesn't
379/// know what states calling `visit` on an analysis will be updated. This allows
380/// multiple analyses to plug in and provide values for the same state.
381///
382/// Generally, when an analysis queries an uninitialized state, it is expected
383/// to "bail out", i.e., not provide any updates. When the value is initialized,
384/// the solver will re-invoke the analysis. If the solver exhausts its worklist,
385/// however, and there are still uninitialized states, the solver "nudges" the
386/// analyses by default-initializing those states.
387class DataFlowAnalysis {
388public:
389 virtual ~DataFlowAnalysis();
390
391 /// Create an analysis with a reference to the parent solver.
392 explicit DataFlowAnalysis(DataFlowSolver &solver);
393
394 /// Initialize the analysis from the provided top-level operation by building
395 /// an initial dependency graph between all program points of interest. This
396 /// can be implemented by calling `visit` on all program points of interest
397 /// below the top-level operation.
398 ///
399 /// An analysis can optionally provide initial values to certain analysis
400 /// states to influence the evolution of the analysis.
401 virtual LogicalResult initialize(Operation *top) = 0;
402
403 /// Visit the given program point. This function is invoked by the solver on
404 /// this analysis with a given program point when a dependent analysis state
405 /// is updated. The function is similar to a transfer function; it queries
406 /// certain analysis states and sets other states.
407 ///
408 /// The function is expected to create dependencies on queried states and
409 /// propagate updates on changed states. A dependency can be created by
410 /// calling `addDependency` between the input state and a program point,
411 /// indicating that, if the state is updated, the solver should invoke `solve`
412 /// on the program point. The dependent point does not have to be the same as
413 /// the provided point. An update to a state is propagated by calling
414 /// `propagateIfChange` on the state. If the state has changed, then all its
415 /// dependents are placed on the worklist.
416 ///
417 /// The dependency graph does not need to be static. Each invocation of
418 /// `visit` can add new dependencies, but these dependencies will not be
419 /// dynamically added to the worklist because the solver doesn't know what
420 /// will provide a value for then.
421 virtual LogicalResult visit(ProgramPoint point) = 0;
422
423protected:
424 /// Create a dependency between the given analysis state and program point
425 /// on this analysis.
426 void addDependency(AnalysisState *state, ProgramPoint point);
427
428 /// Propagate an update to a state if it changed.
429 void propagateIfChanged(AnalysisState *state, ChangeResult changed);
430
431 /// Register a custom program point class.
432 template <typename PointT>
433 void registerPointKind() {
434 solver.uniquer.registerParametricStorageType<PointT>();
435 }
436
437 /// Get or create a custom program point.
438 template <typename PointT, typename... Args>
439 PointT *getProgramPoint(Args &&...args) {
440 return solver.getProgramPoint<PointT>(std::forward<Args>(args)...);
441 }
442
443 /// Get the analysis state associated with the program point. The returned
444 /// state is expected to be "write-only", and any updates need to be
445 /// propagated by `propagateIfChanged`.
446 template <typename StateT, typename PointT>
447 StateT *getOrCreate(PointT point) {
448 return solver.getOrCreateState<StateT>(point);
449 }
450
451 /// Get a read-only analysis state for the given point and create a dependency
452 /// on `dependent`. If the return state is updated elsewhere, this analysis is
453 /// re-invoked on the dependent.
454 template <typename StateT, typename PointT>
455 const StateT *getOrCreateFor(ProgramPoint dependent, PointT point) {
456 StateT *state = getOrCreate<StateT>(point);
457 addDependency(state, point: dependent);
458 return state;
459 }
460
461 /// Return the configuration of the solver used for this analysis.
462 const DataFlowConfig &getSolverConfig() const { return solver.getConfig(); }
463
464#if LLVM_ENABLE_ABI_BREAKING_CHECKS
465 /// When compiling with debugging, keep a name for the analyis.
466 StringRef debugName;
467#endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
468
469private:
470 /// The parent data-flow solver.
471 DataFlowSolver &solver;
472
473 /// Allow the data-flow solver to access the internals of this class.
474 friend class DataFlowSolver;
475};
476
477template <typename AnalysisT, typename... Args>
478AnalysisT *DataFlowSolver::load(Args &&...args) {
479 childAnalyses.emplace_back(new AnalysisT(*this, std::forward<Args>(args)...));
480#if LLVM_ENABLE_ABI_BREAKING_CHECKS
481 childAnalyses.back().get()->debugName = llvm::getTypeName<AnalysisT>();
482#endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
483 return static_cast<AnalysisT *>(childAnalyses.back().get());
484}
485
486template <typename StateT, typename PointT>
487StateT *DataFlowSolver::getOrCreateState(PointT point) {
488 std::unique_ptr<AnalysisState> &state =
489 analysisStates[{ProgramPoint(point), TypeID::get<StateT>()}];
490 if (!state) {
491 state = std::unique_ptr<StateT>(new StateT(point));
492#if LLVM_ENABLE_ABI_BREAKING_CHECKS
493 state->debugName = llvm::getTypeName<StateT>();
494#endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
495 }
496 return static_cast<StateT *>(state.get());
497}
498
499inline raw_ostream &operator<<(raw_ostream &os, const AnalysisState &state) {
500 state.print(os);
501 return os;
502}
503
504inline raw_ostream &operator<<(raw_ostream &os, ProgramPoint point) {
505 point.print(os);
506 return os;
507}
508
509} // end namespace mlir
510
511namespace llvm {
512/// Allow hashing of program points.
513template <>
514struct DenseMapInfo<mlir::ProgramPoint>
515 : public DenseMapInfo<mlir::ProgramPoint::ParentTy> {};
516
517// Allow llvm::cast style functions.
518template <typename To>
519struct CastInfo<To, mlir::ProgramPoint>
520 : public CastInfo<To, mlir::ProgramPoint::PointerUnion> {};
521
522template <typename To>
523struct CastInfo<To, const mlir::ProgramPoint>
524 : public CastInfo<To, const mlir::ProgramPoint::PointerUnion> {};
525
526} // end namespace llvm
527
528#endif // MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
529

source code of mlir/include/mlir/Analysis/DataFlowFramework.h