Passes.h source code [mlir/include/mlir/Dialect/GPU/Transforms/Passes.h]

1	//===- Passes.h - Pass Entrypoints ------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This header file defines prototypes that expose pass constructors.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
14	#define MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
15
16	#include "Utils.h"
17	#include "mlir/Dialect/GPU/IR/GPUDialect.h"
18	#include "mlir/IR/PatternMatch.h"
19	#include "mlir/Pass/Pass.h"
20	#include <optional>
21
22	namespace llvm {
23	class TargetMachine;
24	class LLVMContext;
25	class Module;
26	} // namespace llvm
27
28	namespace mlir {
29	class TypeConverter;
30	class ConversionTarget;
31	namespace func {
32	class FuncOp;
33	} // namespace func
34
35	#define GEN_PASS_DECL
36	#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
37
38	/// Pass that moves ops which are likely an index computation into gpu.launch
39	/// body.
40	std::unique_ptr<Pass> createGpuLauchSinkIndexComputationsPass();
41
42	/// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into
43	/// a separate kernel function.
44	std::unique_ptr<OperationPass<ModuleOp>>
45	createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
46
47	/// Rewrites a function region so that GPU ops execute asynchronously.
48	std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();
49
50	/// Maps the parallel loops found in the given function to workgroups. The first
51	/// loop encountered will be mapped to the global workgroup and the second loop
52	/// encountered to the local workgroup. Within each mapping, the first three
53	/// dimensions are mapped to x/y/z hardware ids and all following dimensions are
54	/// mapped to sequential loops.
55	std::unique_ptr<OperationPass<func::FuncOp>> createGpuMapParallelLoopsPass();
56
57	/// Collect a set of patterns to rewrite GlobalIdOp op within the GPU dialect.
58	void populateGpuGlobalIdPatterns(RewritePatternSet &patterns);
59
60	/// Collect a set of patterns to rewrite shuffle ops within the GPU dialect.
61	void populateGpuShufflePatterns(RewritePatternSet &patterns);
62
63	/// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
64	void populateGpuAllReducePatterns(RewritePatternSet &patterns);
65
66	/// Collect a set of patterns to break down subgroup_reduce ops into smaller
67	/// ones supported by the target of `size <= maxShuffleBitwidth`, where `size`
68	/// is the subgroup_reduce value bitwidth.
69	void populateGpuBreakDownSubgrupReducePatterns(RewritePatternSet &patterns,
70	unsigned maxShuffleBitwidth = `32`,
71	PatternBenefit benefit = `1`);
72
73	/// Collect a set of patterns to lower `gpu.subgroup_reduce` into `gpu.shuffle`
74	/// ops over `shuffleBitwidth` scalar types. Assumes that the subgroup has
75	/// `subgroupSize` lanes. Uses the butterfly shuffle algorithm.
76	void populateGpuLowerSubgroupReduceToShufflePattenrs(
77	RewritePatternSet &patterns, unsigned subgroupSize,
78	unsigned shuffleBitwidth = `32`, PatternBenefit benefit = `1`);
79
80	/// Collect all patterns to rewrite ops within the GPU dialect.
81	inline void populateGpuRewritePatterns(RewritePatternSet &patterns) {
82	populateGpuAllReducePatterns(patterns);
83	populateGpuGlobalIdPatterns(patterns);
84	populateGpuShufflePatterns(patterns);
85	}
86
87	namespace gpu {
88	/// Searches for all GPU modules in `op` and transforms them into GPU binary
89	/// operations. The resulting `gpu.binary` has `handler` as its offloading
90	/// handler attribute.
91	LogicalResult transformGpuModulesToBinaries(
92	Operation op, OffloadingLLVMTranslationAttrInterface handler = nullptr*,
93	const gpu::TargetOptions &options = {});
94
95	/// Base pass class to serialize kernel functions through LLVM into
96	/// user-specified IR and add the resulting blob as module attribute.
97	class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
98	public:
99	SerializeToBlobPass(TypeID passID);
100	SerializeToBlobPass(const SerializeToBlobPass &other);
101
102	void runOnOperation() final;
103
104	protected:
105	/// Hook allowing the application of optimizations before codegen
106	/// By default, does nothing
107	virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule,
108	llvm::TargetMachine &targetMachine);
109
110	/// Translates the 'getOperation()' result to an LLVM module.
111	virtual std::unique_ptr<llvm::Module>
112	translateToLLVMIR(llvm::LLVMContext &llvmContext);
113
114	private:
115	/// Creates the LLVM target machine to generate the ISA.
116	std::unique_ptr<llvm::TargetMachine> createTargetMachine();
117
118	/// Translates the module to ISA
119	std::optional<std::string> translateToISA(llvm::Module &llvmModule,
120	llvm::TargetMachine &targetMachine);
121
122	/// Serializes the target ISA to binary form.
123	virtual std::unique_ptr<std::vector<char>>
124	serializeISA(const std::string &isa) = `0`;
125
126	protected:
127	Option<std::string> triple{*this, "triple",
128	::llvm::cl::desc("Target triple")};
129	Option<std::string> chip{*this, "chip",
130	::llvm::cl::desc("Target architecture")};
131	Option<std::string> features{*this, "features",
132	::llvm::cl::desc("Target features")};
133	Option<int> optLevel{*this, "opt-level",
134	llvm::cl::desc("Optimization level for compilation"),
135	llvm::cl::init(`2`)};
136	Option<std::string> gpuBinaryAnnotation{
137	*this, "gpu-binary-annotation",
138	llvm::cl::desc("Annotation attribute string for GPU binary"),
139	llvm::cl::init(getDefaultGpuBinaryAnnotation())};
140	Option<bool> dumpPtx{*this, "dump-ptx",
141	::llvm::cl::desc("Dump generated PTX"),
142	llvm::cl::init(false)};
143	};
144	} // namespace gpu
145
146	//===----------------------------------------------------------------------===//
147	// Registration
148	//===----------------------------------------------------------------------===//
149
150	/// Register pass to serialize GPU kernel functions to a HSAco binary
151	/// annotation.
152	LLVM_DEPRECATED("use Target attributes instead", "")
153	void registerGpuSerializeToHsacoPass();
154
155	/// Create an instance of the GPU kernel function to HSAco binary serialization
156	/// pass.
157	LLVM_DEPRECATED("use Target attributes instead", "")
158	std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,
159	StringRef arch,
160	StringRef features,
161	int optLevel);
162
163	/// Collect a set of patterns to decompose memrefs ops.
164	void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns);
165
166	/// Pass decomposes memref ops inside `gpu.launch` body.
167	std::unique_ptr<Pass> createGpuDecomposeMemrefsPass();
168
169	/// Erase barriers that do not enforce conflicting memory side effects.
170	void populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns);
171
172	/// Generate the code for registering passes.
173	#define GEN_PASS_REGISTRATION
174	#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
175
176	} // namespace mlir
177
178	#endif // MLIR_DIALECT_GPU_TRANSFORMS_PASSES_H_
179

source code of mlir/include/mlir/Dialect/GPU/Transforms/Passes.h