1 | //===- GPUTransformOps.h - GPU transform ops --------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef MLIR_DIALECT_GPU_TRANSFORMOPS_GPUTRANSFORMOPS_H |
10 | #define MLIR_DIALECT_GPU_TRANSFORMOPS_GPUTRANSFORMOPS_H |
11 | |
12 | #include "mlir/Dialect/SCF/IR/SCF.h" |
13 | #include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h" |
14 | #include "mlir/IR/OpImplementation.h" |
15 | #include "mlir/IR/PatternMatch.h" |
16 | |
17 | namespace mlir { |
18 | namespace gpu { |
19 | class GpuOp; |
20 | } // namespace gpu |
21 | } // namespace mlir |
22 | |
23 | //===----------------------------------------------------------------------===// |
24 | // GPU Transform Operations |
25 | //===----------------------------------------------------------------------===// |
26 | |
27 | #define GET_OP_CLASSES |
28 | #include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h.inc" |
29 | |
30 | namespace mlir { |
31 | class DialectRegistry; |
32 | namespace transform { |
33 | namespace gpu { |
34 | struct GpuIdBuilder; |
35 | |
36 | /// Map the top level `scf.forall` op to GPU blocks. |
37 | /// Mapping is one-to-one and the induction variables of `scf.forall` are |
38 | /// rewritten to gpu.block_id according to the thread_dim_mapping attribute. |
39 | /// |
40 | /// Dynamic, `scf.forall` trip counts are currently not supported. |
41 | /// Dynamic `gridDims` are currently not supported. |
42 | DiagnosedSilenceableFailure |
43 | mapForallToBlocksImpl(RewriterBase &rewriter, TransformOpInterface transformOp, |
44 | scf::ForallOp forallOp, |
45 | SmallVectorImpl<int64_t> &gridDims, |
46 | const GpuIdBuilder &gpuIdBuilder); |
47 | |
48 | /// Search `scf.forall` ops nested under `target` and map each such op to an |
49 | /// explicit GPU implementation along `blockDims`. |
50 | /// The mapping is one-to-one and the induction variables of `scf.forall` are |
51 | /// rewritten to gpuIdBuilder.idBuilder according to the |
52 | /// gpuIdBuilder.mappingAttributes attribute. |
53 | /// |
54 | /// Dynamic, `scf.forall` trip counts are currently not supported. |
55 | /// Dynamic `blockDims` sizes are currently not supported. |
56 | /// `blockDims` is expected to be of size 3. |
57 | DiagnosedSilenceableFailure |
58 | mapOneForallToThreadsImpl(RewriterBase &rewriter, |
59 | std::optional<TransformOpInterface> transformOp, |
60 | scf::ForallOp forallOp, ArrayRef<int64_t> blockDims, |
61 | int64_t warpSize, bool syncAfterDistribute); |
62 | |
63 | /// Search `scf.forall` ops nested under `target` and map each such op to an |
64 | /// explicit GPU implementation along `blockDims`. |
65 | /// The mapping is one-to-one and the induction variables of `scf.forall` are |
66 | /// rewritten to appropriate ids according to the mapping attribute. |
67 | /// |
68 | /// Dynamic, `scf.forall` trip counts are currently not supported. |
69 | /// Dynamic `blockDims` or `newBasis` entries are currently not |
70 | /// supported. `blockDims` is expected to be of size 3. |
71 | /// |
72 | /// The insertion point of the `rewriter` is expected to be set at the |
73 | /// beginning of the `target` body block and dominate all other blocks. |
74 | DiagnosedSilenceableFailure |
75 | mapNestedForallToThreadsImpl(RewriterBase &rewriter, |
76 | std::optional<TransformOpInterface> transformOp, |
77 | Operation *target, ArrayRef<int64_t> blockDims, |
78 | int64_t warpSize, bool syncAfterDistribute); |
79 | |
80 | } // namespace gpu |
81 | } // namespace transform |
82 | |
83 | namespace gpu { |
84 | void registerTransformDialectExtension(DialectRegistry ®istry); |
85 | } // namespace gpu |
86 | } // namespace mlir |
87 | |
88 | #endif // MLIR_DIALECT_GPU_TRANSFORMOPS_GPUTRANSFORMOPS_H |
89 | |