ParallelLoopMapper.cpp source code [mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp]

1	//===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements utilities to generate mappings for parallel loops to
10	// GPU devices.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "mlir/Dialect/GPU/Transforms/Passes.h"
15
16	#include "mlir/Dialect/Func/IR/FuncOps.h"
17	#include "mlir/Dialect/GPU/IR/GPUDialect.h"
18	#include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
19	#include "mlir/Dialect/SCF/IR/SCF.h"
20	#include "mlir/IR/AffineMap.h"
21
22	namespace mlir {
23	#define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS
24	#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
25	} // namespace mlir
26
27	namespace mlir {
28
29	using scf::ParallelOp;
30
31	StringRef gpu::getMappingAttrName() { return "mapping"; }
32
33	LogicalResult
34	gpu::setMappingAttr(ParallelOp ploopOp,
35	ArrayRef<ParallelLoopDimMappingAttr> mapping) {
36	// Verify that each processor is mapped to only once.
37	llvm::DenseSet<gpu::Processor> specifiedMappings;
38	for (auto dimAttr : mapping) {
39	gpu::Processor processor = dimAttr.getProcessor();
40	if (processor != gpu::Processor::Sequential &&
41	specifiedMappings.count(processor))
42	return ploopOp.emitError(
43	"invalid mapping multiple loops to same processor");
44	specifiedMappings.insert(processor);
45	}
46	ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
47	ploopOp->setAttr(getMappingAttrName(),
48	ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
49	return success();
50	}
51
52	namespace gpu {
53	namespace {
54	enum MappingLevel { MapGrid = `0`, MapBlock = `1`, Sequential = `2` };
55	} // namespace
56
57	static constexpr int kNumHardwareIds = `3`;
58
59	/// Bounded increment on MappingLevel. Increments to the next
60	/// level unless Sequential was already reached.
61	static MappingLevel &operator++(MappingLevel &mappingLevel) {
62	if (mappingLevel < Sequential) {
63	mappingLevel = static_cast<MappingLevel>(mappingLevel + `1`);
64	}
65	return mappingLevel;
66	}
67
68	/// Computed the hardware id to use for a given mapping level. Will
69	/// assign x,y and z hardware ids for the first 3 dimensions and use
70	/// sequential after.
71	/// TODO: Make this use x for the inner-most loop that is
72	/// distributed to map to x, the next innermost to y and the next innermost to
73	/// z.
74	static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
75
76	if (dimension >= kNumHardwareIds \|\| level == Sequential)
77	return Processor::Sequential;
78	switch (level) {
79	case MapGrid:
80	switch (dimension) {
81	case `0`:
82	return Processor::BlockX;
83	case `1`:
84	return Processor::BlockY;
85	case `2`:
86	return Processor::BlockZ;
87	default:
88	return Processor::Sequential;
89	}
90	break;
91	case MapBlock:
92	switch (dimension) {
93	case `0`:
94	return Processor::ThreadX;
95	case `1`:
96	return Processor::ThreadY;
97	case `2`:
98	return Processor::ThreadZ;
99	default:
100	return Processor::Sequential;
101	}
102	default:;
103	}
104	return Processor::Sequential;
105	}
106
107	/// Add mapping information to the given parallel loop. Do not add
108	/// mapping information if the loop already has it. Also, don't
109	/// start a mapping at a nested loop.
110	static void mapParallelOp(ParallelOp parallelOp,
111	MappingLevel mappingLevel = MapGrid) {
112	// Do not try to add a mapping to already mapped loops or nested loops.
113	if (parallelOp->getAttr(getMappingAttrName()) \|\|
114	((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
115	return;
116
117	MLIRContext *ctx = parallelOp.getContext();
118	Builder b(ctx);
119	SmallVector<ParallelLoopDimMappingAttr, `4`> attrs;
120	attrs.reserve(parallelOp.getNumLoops());
121	for (int i = `0`, e = parallelOp.getNumLoops(); i < e; ++i) {
122	attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
123	getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
124	b.getDimIdentityMap()));
125	}
126	(void)setMappingAttr(parallelOp, attrs);
127	++mappingLevel;
128	// Parallel loop operations are immediately nested, so do not use
129	// walk but just iterate over the operations.
130	for (Operation &op : *parallelOp.getBody()) {
131	if (ParallelOp nested = dyn_cast<ParallelOp>(op))
132	mapParallelOp(nested, mappingLevel);
133	}
134	}
135
136	namespace {
137	struct GpuMapParallelLoopsPass
138	: public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
139	void runOnOperation() override {
140	for (Region &region : getOperation()->getRegions()) {
141	region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
142	}
143	}
144	};
145
146	} // namespace
147	} // namespace gpu
148	} // namespace mlir
149
150	std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
151	mlir::createGpuMapParallelLoopsPass() {
152	return std::make_unique<gpu::GpuMapParallelLoopsPass>();
153	}
154

source code of mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp