1//===- ParallelLoopMapper.cpp - Utilities for mapping parallel loops to GPU =//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utilities to generate mappings for parallel loops to
10// GPU devices.
11//
12//===----------------------------------------------------------------------===//
13
14#include "mlir/Dialect/GPU/Transforms/Passes.h"
15
16#include "mlir/Dialect/Func/IR/FuncOps.h"
17#include "mlir/Dialect/GPU/IR/GPUDialect.h"
18#include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
19#include "mlir/Dialect/SCF/IR/SCF.h"
20#include "mlir/IR/AffineMap.h"
21
22namespace mlir {
23#define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS
24#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
25} // namespace mlir
26
27namespace mlir {
28
29using scf::ParallelOp;
30
31StringRef gpu::getMappingAttrName() { return "mapping"; }
32
33LogicalResult
34gpu::setMappingAttr(ParallelOp ploopOp,
35 ArrayRef<ParallelLoopDimMappingAttr> mapping) {
36 // Verify that each processor is mapped to only once.
37 llvm::DenseSet<gpu::Processor> specifiedMappings;
38 for (auto dimAttr : mapping) {
39 gpu::Processor processor = dimAttr.getProcessor();
40 if (processor != gpu::Processor::Sequential &&
41 specifiedMappings.count(processor))
42 return ploopOp.emitError(
43 "invalid mapping multiple loops to same processor");
44 specifiedMappings.insert(processor);
45 }
46 ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
47 ploopOp->setAttr(getMappingAttrName(),
48 ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
49 return success();
50}
51
52namespace gpu {
53namespace {
54enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
55} // namespace
56
57static constexpr int kNumHardwareIds = 3;
58
59/// Bounded increment on MappingLevel. Increments to the next
60/// level unless Sequential was already reached.
61static MappingLevel &operator++(MappingLevel &mappingLevel) {
62 if (mappingLevel < Sequential) {
63 mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
64 }
65 return mappingLevel;
66}
67
68/// Computed the hardware id to use for a given mapping level. Will
69/// assign x,y and z hardware ids for the first 3 dimensions and use
70/// sequential after.
71/// TODO: Make this use x for the inner-most loop that is
72/// distributed to map to x, the next innermost to y and the next innermost to
73/// z.
74static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
75
76 if (dimension >= kNumHardwareIds || level == Sequential)
77 return Processor::Sequential;
78 switch (level) {
79 case MapGrid:
80 switch (dimension) {
81 case 0:
82 return Processor::BlockX;
83 case 1:
84 return Processor::BlockY;
85 case 2:
86 return Processor::BlockZ;
87 default:
88 return Processor::Sequential;
89 }
90 break;
91 case MapBlock:
92 switch (dimension) {
93 case 0:
94 return Processor::ThreadX;
95 case 1:
96 return Processor::ThreadY;
97 case 2:
98 return Processor::ThreadZ;
99 default:
100 return Processor::Sequential;
101 }
102 default:;
103 }
104 return Processor::Sequential;
105}
106
107/// Add mapping information to the given parallel loop. Do not add
108/// mapping information if the loop already has it. Also, don't
109/// start a mapping at a nested loop.
110static void mapParallelOp(ParallelOp parallelOp,
111 MappingLevel mappingLevel = MapGrid) {
112 // Do not try to add a mapping to already mapped loops or nested loops.
113 if (parallelOp->getAttr(getMappingAttrName()) ||
114 ((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
115 return;
116
117 MLIRContext *ctx = parallelOp.getContext();
118 Builder b(ctx);
119 SmallVector<ParallelLoopDimMappingAttr, 4> attrs;
120 attrs.reserve(parallelOp.getNumLoops());
121 for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {
122 attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
123 getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
124 b.getDimIdentityMap()));
125 }
126 (void)setMappingAttr(parallelOp, attrs);
127 ++mappingLevel;
128 // Parallel loop operations are immediately nested, so do not use
129 // walk but just iterate over the operations.
130 for (Operation &op : *parallelOp.getBody()) {
131 if (ParallelOp nested = dyn_cast<ParallelOp>(op))
132 mapParallelOp(nested, mappingLevel);
133 }
134}
135
136namespace {
137struct GpuMapParallelLoopsPass
138 : public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
139 void runOnOperation() override {
140 for (Region &region : getOperation()->getRegions()) {
141 region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
142 }
143 }
144};
145
146} // namespace
147} // namespace gpu
148} // namespace mlir
149
150std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
151mlir::createGpuMapParallelLoopsPass() {
152 return std::make_unique<gpu::GpuMapParallelLoopsPass>();
153}
154

source code of mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp