DataFlowFramework.h source code [mlir/include/mlir/Analysis/DataFlowFramework.h]

1	//===- DataFlowFramework.h - A generic framework for data-flow analysis ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines a generic framework for writing data-flow analysis in MLIR.
10	// The framework consists of a solver, which runs the fixed-point iteration and
11	// manages analysis dependencies, and a data-flow analysis class used to
12	// implement specific analyses.
13	//
14	//===----------------------------------------------------------------------===//
15
16	#ifndef MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
17	#define MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
18
19	#include "mlir/IR/Operation.h"
20	#include "mlir/Support/StorageUniquer.h"
21	#include "llvm/ADT/SetVector.h"
22	#include "llvm/Support/Compiler.h"
23	#include "llvm/Support/TypeName.h"
24	#include <queue>
25
26	namespace mlir {
27
28	//===----------------------------------------------------------------------===//
29	// ChangeResult
30	//===----------------------------------------------------------------------===//
31
32	/// A result type used to indicate if a change happened. Boolean operations on
33	/// ChangeResult behave as though `Change` is truth.
34	enum class [[nodiscard]] ChangeResult {
35	NoChange,
36	Change,
37	};
38	inline ChangeResult operator\|(ChangeResult lhs, ChangeResult rhs) {
39	return lhs == ChangeResult::Change ? lhs : rhs;
40	}
41	inline ChangeResult &operator\|=(ChangeResult &lhs, ChangeResult rhs) {
42	lhs = lhs \| rhs;
43	return lhs;
44	}
45	inline ChangeResult operator&(ChangeResult lhs, ChangeResult rhs) {
46	return lhs == ChangeResult::NoChange ? lhs : rhs;
47	}
48
49	/// Forward declare the analysis state class.
50	class AnalysisState;
51
52	//===----------------------------------------------------------------------===//
53	// GenericProgramPoint
54	//===----------------------------------------------------------------------===//
55
56	/// Abstract class for generic program points. In classical data-flow analysis,
57	/// programs points represent positions in a program to which lattice elements
58	/// are attached. In sparse data-flow analysis, these can be SSA values, and in
59	/// dense data-flow analysis, these are the program points before and after
60	/// every operation.
61	///
62	/// In the general MLIR data-flow analysis framework, program points are an
63	/// extensible concept. Program points are uniquely identifiable objects to
64	/// which analysis states can be attached. The semantics of program points are
65	/// defined by the analyses that specify their transfer functions.
66	///
67	/// Program points are implemented using MLIR's storage uniquer framework and
68	/// type ID system to provide RTTI.
69	class GenericProgramPoint : public StorageUniquer::BaseStorage {
70	public:
71	virtual ~GenericProgramPoint();
72
73	/// Get the abstract program point's type identifier.
74	TypeID getTypeID() const { return typeID; }
75
76	/// Get a derived source location for the program point.
77	virtual Location getLoc() const = `0`;
78
79	/// Print the program point.
80	virtual void print(raw_ostream &os) const = `0`;
81
82	protected:
83	/// Create an abstract program point with type identifier.
84	explicit GenericProgramPoint(TypeID typeID) : typeID (typeID) {}
85
86	private:
87	/// The type identifier of the program point.
88	TypeID typeID;
89	};
90
91	//===----------------------------------------------------------------------===//
92	// GenericProgramPointBase
93	//===----------------------------------------------------------------------===//
94
95	/// Base class for generic program points based on a concrete program point
96	/// type and a content key. This class defines the common methods required for
97	/// operability with the storage uniquer framework.
98	///
99	/// The provided key type uniquely identifies the concrete program point
100	/// instance and are the data members of the class.
101	template <typename ConcreteT, typename Value>
102	class GenericProgramPointBase : public GenericProgramPoint {
103	public:
104	/// The concrete key type used by the storage uniquer. This class is uniqued
105	/// by its contents.
106	using KeyTy = Value;
107	/// Alias for the base class.
108	using Base = GenericProgramPointBase<ConcreteT, Value>;
109
110	/// Construct an instance of the program point using the provided value and
111	/// the type ID of the concrete type.
112	template <typename ValueT>
113	explicit GenericProgramPointBase(ValueT &&value)
114	: GenericProgramPoint(TypeID::get<ConcreteT>()),
115	value(std::forward<ValueT>(value)) {}
116
117	/// Get a uniqued instance of this program point class with the given
118	/// arguments.
119	template <typename... Args>
120	static ConcreteT *get(StorageUniquer &uniquer, Args &&...args) {
121	return uniquer.get<ConcreteT>(/initFn=/{}, std::forward<Args>(args)...);
122	}
123
124	/// Allocate space for a program point and construct it in-place.
125	template <typename ValueT>
126	static ConcreteT *construct(StorageUniquer::StorageAllocator &alloc,
127	ValueT &&value) {
128	return new (alloc.allocate<ConcreteT>())
129	ConcreteT(std::forward<ValueT>(value));
130	}
131
132	/// Two program points are equal if their values are equal.
133	bool operator==(const Value &value) const { return this->value == value; }
134
135	/// Provide LLVM-style RTTI using type IDs.
136	static bool classof(const GenericProgramPoint *point) {
137	return point->getTypeID() == TypeID::get<ConcreteT>();
138	}
139
140	/// Get the contents of the program point.
141	const Value &getValue() const { return value; }
142
143	private:
144	/// The program point value.
145	Value value;
146	};
147
148	//===----------------------------------------------------------------------===//
149	// ProgramPoint
150	//===----------------------------------------------------------------------===//
151
152	/// Fundamental IR components are supported as first-class program points.
153	struct ProgramPoint
154	: public PointerUnion<GenericProgramPoint , Operation , Value, Block *> {
155	using ParentTy =
156	PointerUnion<GenericProgramPoint , Operation , Value, Block *>;
157	/// Inherit constructors.
158	using ParentTy::PointerUnion;
159	/// Allow implicit conversion from the parent type.
160	ProgramPoint(ParentTy point = nullptr) : ParentTy (point) {}
161	/// Allow implicit conversions from operation wrappers.
162	/// TODO: For Windows only. Find a better solution.
163	template <typename OpT, typename = std::enable_if_t<
164	std::is_convertible<OpT, Operation *>::value &&
165	!std::is_same<OpT, Operation *>::value>>
166	ProgramPoint(OpT op) : ParentTy(op) {}
167
168	/// Print the program point.
169	void print(raw_ostream &os) const;
170
171	/// Get the source location of the program point.
172	Location getLoc() const;
173	};
174
175	/// Forward declaration of the data-flow analysis class.
176	class DataFlowAnalysis;
177
178	//===----------------------------------------------------------------------===//
179	// DataFlowConfig
180	//===----------------------------------------------------------------------===//
181
182	/// Configuration class for data flow solver and child analyses. Follows the
183	/// fluent API pattern.
184	class DataFlowConfig {
185	public:
186	DataFlowConfig() = default;
187
188	/// Set whether the solver should operate interpocedurally, i.e. enter the
189	/// callee body when available. Interprocedural analyses may be more precise,
190	/// but also more expensive as more states need to be computed and the
191	/// fixpoint convergence takes longer.
192	DataFlowConfig &setInterprocedural(bool enable) {
193	interprocedural = enable;
194	return *this;
195	}
196
197	/// Return `true` if the solver operates interprocedurally, `false` otherwise.
198	bool isInterprocedural() const { return interprocedural; }
199
200	private:
201	bool interprocedural = true;
202	};
203
204	//===----------------------------------------------------------------------===//
205	// DataFlowSolver
206	//===----------------------------------------------------------------------===//
207
208	/// The general data-flow analysis solver. This class is responsible for
209	/// orchestrating child data-flow analyses, running the fixed-point iteration
210	/// algorithm, managing analysis state and program point memory, and tracking
211	/// dependencies between analyses, program points, and analysis states.
212	///
213	/// Steps to run a data-flow analysis:
214	///
215	/// 1. Load and initialize children analyses. Children analyses are instantiated
216	/// in the solver and initialized, building their dependency relations.
217	/// 2. Configure and run the analysis. The solver invokes the children analyses
218	/// according to their dependency relations until a fixed point is reached.
219	/// 3. Query analysis state results from the solver.
220	///
221	/// TODO: Optimize the internal implementation of the solver.
222	class DataFlowSolver {
223	public:
224	explicit DataFlowSolver(const DataFlowConfig &config = DataFlowConfig ())
225	: config (config) {}
226
227	/// Load an analysis into the solver. Return the analysis instance.
228	template <typename AnalysisT, typename... Args>
229	AnalysisT *load(Args &&...args);
230
231	/// Initialize the children analyses starting from the provided top-level
232	/// operation and run the analysis until fixpoint.
233	LogicalResult initializeAndRun(Operation *top);
234
235	/// Lookup an analysis state for the given program point. Returns null if one
236	/// does not exist.
237	template <typename StateT, typename PointT>
238	const StateT lookupState(PointT point) const* {
239	auto it = analysisStates.find({ProgramPoint(point), TypeID::get<StateT>()});
240	if (it == analysisStates.end())
241	return nullptr;
242	return static_cast<const StateT *>(it->second.get());
243	}
244
245	/// Get a uniqued program point instance. If one is not present, it is
246	/// created with the provided arguments.
247	template <typename PointT, typename... Args>
248	PointT *getProgramPoint(Args &&...args) {
249	return PointT::get(uniquer, std::forward<Args>(args)...);
250	}
251
252	/// A work item on the solver queue is a program point, child analysis pair.
253	/// Each item is processed by invoking the child analysis at the program
254	/// point.
255	using WorkItem = std::pair<ProgramPoint, DataFlowAnalysis *>;
256	/// Push a work item onto the worklist.
257	void enqueue(WorkItem item) { worklist.push(x: std::move(item)); }
258
259	/// Get the state associated with the given program point. If it does not
260	/// exist, create an uninitialized state.
261	template <typename StateT, typename PointT>
262	StateT *getOrCreateState(PointT point);
263
264	/// Propagate an update to an analysis state if it changed by pushing
265	/// dependent work items to the back of the queue.
266	void propagateIfChanged(AnalysisState *state, ChangeResult changed);
267
268	/// Get the configuration of the solver.
269	const DataFlowConfig &getConfig() const { return config; }
270
271	private:
272	/// Configuration of the dataflow solver.
273	DataFlowConfig config;
274
275	/// The solver's work queue. Work items can be inserted to the front of the
276	/// queue to be processed greedily, speeding up computations that otherwise
277	/// quickly degenerate to quadratic due to propagation of state updates.
278	std::queue<WorkItem> worklist;
279
280	/// Type-erased instances of the children analyses.
281	SmallVector<std::unique_ptr<DataFlowAnalysis>> childAnalyses;
282
283	/// The storage uniquer instance that owns the memory of the allocated program
284	/// points.
285	StorageUniquer uniquer;
286
287	/// A type-erased map of program points to associated analysis states for
288	/// first-class program points.
289	DenseMap<std::pair<ProgramPoint, TypeID>, std::unique_ptr<AnalysisState>>
290	analysisStates;
291
292	/// Allow the base child analysis class to access the internals of the solver.
293	friend class DataFlowAnalysis;
294	};
295
296	//===----------------------------------------------------------------------===//
297	// AnalysisState
298	//===----------------------------------------------------------------------===//
299
300	/// Base class for generic analysis states. Analysis states contain data-flow
301	/// information that are attached to program points and which evolve as the
302	/// analysis iterates.
303	///
304	/// This class places no restrictions on the semantics of analysis states beyond
305	/// these requirements.
306	///
307	/// 1. Querying the state of a program point prior to visiting that point
308	/// results in uninitialized state. Analyses must be aware of unintialized
309	/// states.
310	/// 2. Analysis states can reach fixpoints, where subsequent updates will never
311	/// trigger a change in the state.
312	/// 3. Analysis states that are uninitialized can be forcefully initialized to a
313	/// default value.
314	class AnalysisState {
315	public:
316	virtual ~AnalysisState();
317
318	/// Create the analysis state at the given program point.
319	AnalysisState(ProgramPoint point) : point (point) {}
320
321	/// Returns the program point this state is located at.
322	ProgramPoint getPoint() const { return point; }
323
324	/// Print the contents of the analysis state.
325	virtual void print(raw_ostream &os) const = `0`;
326	LLVM_DUMP_METHOD void dump() const;
327
328	/// Add a dependency to this analysis state on a program point and an
329	/// analysis. If this state is updated, the analysis will be invoked on the
330	/// given program point again (in onUpdate()).
331	void addDependency(ProgramPoint dependent, DataFlowAnalysis *analysis);
332
333	protected:
334	/// This function is called by the solver when the analysis state is updated
335	/// to enqueue more work items. For example, if a state tracks dependents
336	/// through the IR (e.g. use-def chains), this function can be implemented to
337	/// push those dependents on the worklist.
338	virtual void onUpdate(DataFlowSolver solver) const* {
339	for (const DataFlowSolver::WorkItem &item : dependents)
340	solver->enqueue(item);
341	}
342
343	/// The program point to which the state belongs.
344	ProgramPoint point;
345
346	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
347	/// When compiling with debugging, keep a name for the analysis state.
348	StringRef debugName;
349	#endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
350
351	private:
352	/// The dependency relations originating from this analysis state. An entry
353	/// `state -> (analysis, point)` is created when `analysis` queries `state`
354	/// when updating `point`.
355	///
356	/// When this state is updated, all dependent child analysis invocations are
357	/// pushed to the back of the queue. Use a `SetVector` to keep the analysis
358	/// deterministic.
359	///
360	/// Store the dependents on the analysis state for efficiency.
361	SetVector<DataFlowSolver::WorkItem> dependents;
362
363	/// Allow the framework to access the dependents.
364	friend class DataFlowSolver;
365	};
366
367	//===----------------------------------------------------------------------===//
368	// DataFlowAnalysis
369	//===----------------------------------------------------------------------===//
370
371	/// Base class for all data-flow analyses. A child analysis is expected to build
372	/// an initial dependency graph (and optionally provide an initial state) when
373	/// initialized and define transfer functions when visiting program points.
374	///
375	/// In classical data-flow analysis, the dependency graph is fixed and analyses
376	/// define explicit transfer functions between input states and output states.
377	/// In this framework, however, the dependency graph can change during the
378	/// analysis, and transfer functions are opaque such that the solver doesn't
379	/// know what states calling `visit` on an analysis will be updated. This allows
380	/// multiple analyses to plug in and provide values for the same state.
381	///
382	/// Generally, when an analysis queries an uninitialized state, it is expected
383	/// to "bail out", i.e., not provide any updates. When the value is initialized,
384	/// the solver will re-invoke the analysis. If the solver exhausts its worklist,
385	/// however, and there are still uninitialized states, the solver "nudges" the
386	/// analyses by default-initializing those states.
387	class DataFlowAnalysis {
388	public:
389	virtual ~DataFlowAnalysis();
390
391	/// Create an analysis with a reference to the parent solver.
392	explicit DataFlowAnalysis(DataFlowSolver &solver);
393
394	/// Initialize the analysis from the provided top-level operation by building
395	/// an initial dependency graph between all program points of interest. This
396	/// can be implemented by calling `visit` on all program points of interest
397	/// below the top-level operation.
398	///
399	/// An analysis can optionally provide initial values to certain analysis
400	/// states to influence the evolution of the analysis.
401	virtual LogicalResult initialize(Operation *top) = `0`;
402
403	/// Visit the given program point. This function is invoked by the solver on
404	/// this analysis with a given program point when a dependent analysis state
405	/// is updated. The function is similar to a transfer function; it queries
406	/// certain analysis states and sets other states.
407	///
408	/// The function is expected to create dependencies on queried states and
409	/// propagate updates on changed states. A dependency can be created by
410	/// calling `addDependency` between the input state and a program point,
411	/// indicating that, if the state is updated, the solver should invoke `solve`
412	/// on the program point. The dependent point does not have to be the same as
413	/// the provided point. An update to a state is propagated by calling
414	/// `propagateIfChange` on the state. If the state has changed, then all its
415	/// dependents are placed on the worklist.
416	///
417	/// The dependency graph does not need to be static. Each invocation of
418	/// `visit` can add new dependencies, but these dependencies will not be
419	/// dynamically added to the worklist because the solver doesn't know what
420	/// will provide a value for then.
421	virtual LogicalResult visit(ProgramPoint point) = `0`;
422
423	protected:
424	/// Create a dependency between the given analysis state and program point
425	/// on this analysis.
426	void addDependency(AnalysisState *state, ProgramPoint point);
427
428	/// Propagate an update to a state if it changed.
429	void propagateIfChanged(AnalysisState *state, ChangeResult changed);
430
431	/// Register a custom program point class.
432	template <typename PointT>
433	void registerPointKind() {
434	solver.uniquer.registerParametricStorageType<PointT>();
435	}
436
437	/// Get or create a custom program point.
438	template <typename PointT, typename... Args>
439	PointT *getProgramPoint(Args &&...args) {
440	return solver.getProgramPoint<PointT>(std::forward<Args>(args)...);
441	}
442
443	/// Get the analysis state associated with the program point. The returned
444	/// state is expected to be "write-only", and any updates need to be
445	/// propagated by `propagateIfChanged`.
446	template <typename StateT, typename PointT>
447	StateT *getOrCreate(PointT point) {
448	return solver.getOrCreateState<StateT>(point);
449	}
450
451	/// Get a read-only analysis state for the given point and create a dependency
452	/// on `dependent`. If the return state is updated elsewhere, this analysis is
453	/// re-invoked on the dependent.
454	template <typename StateT, typename PointT>
455	const StateT *getOrCreateFor(ProgramPoint dependent, PointT point) {
456	StateT *state = getOrCreate<StateT>(point);
457	addDependency(state, point: dependent);
458	return state;
459	}
460
461	/// Return the configuration of the solver used for this analysis.
462	const DataFlowConfig &getSolverConfig() const { return solver.getConfig(); }
463
464	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
465	/// When compiling with debugging, keep a name for the analyis.
466	StringRef debugName;
467	#endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
468
469	private:
470	/// The parent data-flow solver.
471	DataFlowSolver &solver;
472
473	/// Allow the data-flow solver to access the internals of this class.
474	friend class DataFlowSolver;
475	};
476
477	template <typename AnalysisT, typename... Args>
478	AnalysisT *DataFlowSolver::load(Args &&...args) {
479	childAnalyses.emplace_back(new AnalysisT(*this, std::forward<Args>(args)...));
480	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
481	childAnalyses.back().get()->debugName = llvm::getTypeName<AnalysisT>();
482	#endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
483	return static_cast<AnalysisT *>(childAnalyses.back().get());
484	}
485
486	template <typename StateT, typename PointT>
487	StateT *DataFlowSolver::getOrCreateState(PointT point) {
488	std::unique_ptr<AnalysisState> &state =
489	analysisStates[{ProgramPoint(point), TypeID::get<StateT>()}];
490	if (!state) {
491	state = std::unique_ptr<StateT>(new StateT(point));
492	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
493	state ->debugName = llvm::getTypeName<StateT>();
494	#endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
495	}
496	return static_cast<StateT *>(state.get());
497	}
498
499	inline raw_ostream &operator<<(raw_ostream &os, const AnalysisState &state) {
500	state.print(os);
501	return os;
502	}
503
504	inline raw_ostream &operator<<(raw_ostream &os, ProgramPoint point) {
505	point.print(os);
506	return os;
507	}
508
509	} // end namespace mlir
510
511	namespace llvm {
512	/// Allow hashing of program points.
513	template <>
514	struct DenseMapInfo<mlir::ProgramPoint>
515	: public DenseMapInfo<mlir::ProgramPoint::ParentTy> {};
516
517	// Allow llvm::cast style functions.
518	template <typename To>
519	struct CastInfo<To, mlir::ProgramPoint>
520	: public CastInfo<To, mlir::ProgramPoint::PointerUnion> {};
521
522	template <typename To>
523	struct CastInfo<To, const mlir::ProgramPoint>
524	: public CastInfo<To, const mlir::ProgramPoint::PointerUnion> {};
525
526	} // end namespace llvm
527
528	#endif // MLIR_ANALYSIS_DATAFLOWFRAMEWORK_H
529

source code of mlir/include/mlir/Analysis/DataFlowFramework.h