SCFToGPUPass.cpp source code [mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp]

1	//===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
10
11	#include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
12	#include "mlir/Dialect/Affine/IR/AffineOps.h"
13	#include "mlir/Dialect/Arith/IR/Arith.h"
14	#include "mlir/Dialect/Complex/IR/Complex.h"
15	#include "mlir/Dialect/GPU/IR/GPUDialect.h"
16	#include "mlir/Dialect/SCF/IR/SCF.h"
17	#include "mlir/Pass/Pass.h"
18	#include "mlir/Transforms/DialectConversion.h"
19	#include "llvm/ADT/ArrayRef.h"
20	#include "llvm/Support/CommandLine.h"
21
22	namespace mlir {
23	#define GEN_PASS_DEF_CONVERTAFFINEFORTOGPU
24	#define GEN_PASS_DEF_CONVERTPARALLELLOOPTOGPU
25	#include "mlir/Conversion/Passes.h.inc"
26	} // namespace mlir
27
28	using namespace mlir;
29	using namespace mlir::scf;
30
31	namespace {
32	// A pass that traverses top-level loops in the function and converts them to
33	// GPU launch operations. Nested launches are not allowed, so this does not
34	// walk the function recursively to avoid considering nested loops.
35	struct ForLoopMapper : public impl::ConvertAffineForToGPUBase<ForLoopMapper> {
36	ForLoopMapper() = default;
37	ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
38	this->numBlockDims = numBlockDims;
39	this->numThreadDims = numThreadDims;
40	}
41
42	void runOnOperation() override {
43	for (Operation &op : llvm::make_early_inc_range(
44	getOperation().getFunctionBody().getOps())) {
45	if (auto forOp = dyn_cast<affine::AffineForOp>(&op)) {
46	if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
47	numThreadDims)))
48	signalPassFailure();
49	}
50	}
51	}
52	};
53
54	struct ParallelLoopToGpuPass
55	: public impl::ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
56	void runOnOperation() override {
57	RewritePatternSet patterns(&getContext());
58	populateParallelLoopToGPUPatterns(patterns);
59	ConversionTarget target(getContext());
60	target.markUnknownOpDynamicallyLegal([](Operation ) { return* true; });
61	configureParallelLoopToGPULegality(target);
62	if (failed(applyPartialConversion(getOperation(), target,
63	std::move(patterns))))
64	signalPassFailure();
65	finalizeParallelLoopToGPUConversion(getOperation());
66	}
67	};
68
69	} // namespace
70
71	std::unique_ptr<InterfacePass<FunctionOpInterface>>
72	mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
73	return std::make_unique<ForLoopMapper>(args&: numBlockDims, args&: numThreadDims);
74	}
75	std::unique_ptr<InterfacePass<FunctionOpInterface>>
76	mlir::createAffineForToGPUPass() {
77	return std::make_unique<ForLoopMapper>();
78	}
79
80	std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() {
81	return std::make_unique<ParallelLoopToGpuPass>();
82	}
83

source code of mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp