| 1 | //===- ValueBoundsOpInterfaceImpl.cpp - Impl. of ValueBoundsOpInterface ---===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h" |
| 10 | |
| 11 | #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
| 12 | #include "mlir/Interfaces/InferIntRangeInterface.h" |
| 13 | #include "mlir/Interfaces/ValueBoundsOpInterface.h" |
| 14 | |
| 15 | using namespace mlir; |
| 16 | using namespace mlir::gpu; |
| 17 | |
| 18 | namespace { |
| 19 | /// Implement ValueBoundsOpInterface (which only works on index-typed values, |
| 20 | /// gathers a set of constraint expressions, and is used for affine analyses) |
| 21 | /// in terms of InferIntRangeInterface (which works |
| 22 | /// on arbitrary integer types, creates [min, max] ranges, and is used in for |
| 23 | /// arithmetic simplification). |
| 24 | template <typename Op> |
| 25 | struct GpuIdOpInterface |
| 26 | : public ValueBoundsOpInterface::ExternalModel<GpuIdOpInterface<Op>, Op> { |
| 27 | void populateBoundsForIndexValue(Operation *op, Value value, |
| 28 | ValueBoundsConstraintSet &cstr) const { |
| 29 | auto inferrable = cast<InferIntRangeInterface>(op); |
| 30 | assert(value == op->getResult(0) && |
| 31 | "inferring for value that isn't the GPU op's result" ); |
| 32 | auto translateConstraint = [&](Value v, const ConstantIntRanges &range) { |
| 33 | assert(v == value && |
| 34 | "GPU ID op inferring values for something that's not its result" ); |
| 35 | cstr.bound(value: v) >= range.smin().getSExtValue(); |
| 36 | cstr.bound(value: v) <= range.smax().getSExtValue(); |
| 37 | }; |
| 38 | assert(inferrable->getNumOperands() == 0 && "ID ops have no operands" ); |
| 39 | inferrable.inferResultRanges({}, translateConstraint); |
| 40 | } |
| 41 | }; |
| 42 | |
| 43 | struct GpuLaunchOpInterface |
| 44 | : public ValueBoundsOpInterface::ExternalModel<GpuLaunchOpInterface, |
| 45 | LaunchOp> { |
| 46 | void populateBoundsForIndexValue(Operation *op, Value value, |
| 47 | ValueBoundsConstraintSet &cstr) const { |
| 48 | auto launchOp = cast<LaunchOp>(op); |
| 49 | |
| 50 | Value sizeArg = nullptr; |
| 51 | bool isSize = false; |
| 52 | KernelDim3 gridSizeArgs = launchOp.getGridSizeOperandValues(); |
| 53 | KernelDim3 blockSizeArgs = launchOp.getBlockSizeOperandValues(); |
| 54 | |
| 55 | auto match = [&](KernelDim3 bodyArgs, KernelDim3 externalArgs, |
| 56 | bool areSizeArgs) { |
| 57 | if (value == bodyArgs.x) { |
| 58 | sizeArg = externalArgs.x; |
| 59 | isSize = areSizeArgs; |
| 60 | } |
| 61 | if (value == bodyArgs.y) { |
| 62 | sizeArg = externalArgs.y; |
| 63 | isSize = areSizeArgs; |
| 64 | } |
| 65 | if (value == bodyArgs.z) { |
| 66 | sizeArg = externalArgs.z; |
| 67 | isSize = areSizeArgs; |
| 68 | } |
| 69 | }; |
| 70 | match(launchOp.getThreadIds(), blockSizeArgs, false); |
| 71 | match(launchOp.getBlockSize(), blockSizeArgs, true); |
| 72 | match(launchOp.getBlockIds(), gridSizeArgs, false); |
| 73 | match(launchOp.getGridSize(), gridSizeArgs, true); |
| 74 | if (launchOp.hasClusterSize()) { |
| 75 | KernelDim3 clusterSizeArgs = *launchOp.getClusterSizeOperandValues(); |
| 76 | match(*launchOp.getClusterIds(), clusterSizeArgs, false); |
| 77 | match(*launchOp.getClusterSize(), clusterSizeArgs, true); |
| 78 | } |
| 79 | |
| 80 | if (!sizeArg) |
| 81 | return; |
| 82 | if (isSize) { |
| 83 | cstr.bound(value) == cstr.getExpr(value: sizeArg); |
| 84 | cstr.bound(value) >= 1; |
| 85 | } else { |
| 86 | cstr.bound(value) < cstr.getExpr(value: sizeArg); |
| 87 | cstr.bound(value) >= 0; |
| 88 | } |
| 89 | } |
| 90 | }; |
| 91 | } // namespace |
| 92 | |
| 93 | void mlir::gpu::registerValueBoundsOpInterfaceExternalModels( |
| 94 | DialectRegistry ®istry) { |
| 95 | registry.addExtension(extensionFn: +[](MLIRContext *ctx, GPUDialect *dialect) { |
| 96 | #define REGISTER(X) X::attachInterface<GpuIdOpInterface<X>>(*ctx); |
| 97 | REGISTER(ClusterDimOp) |
| 98 | REGISTER(ClusterDimBlocksOp) |
| 99 | REGISTER(ClusterIdOp) |
| 100 | REGISTER(ClusterBlockIdOp) |
| 101 | REGISTER(BlockDimOp) |
| 102 | REGISTER(BlockIdOp) |
| 103 | REGISTER(GridDimOp) |
| 104 | REGISTER(ThreadIdOp) |
| 105 | REGISTER(LaneIdOp) |
| 106 | REGISTER(SubgroupIdOp) |
| 107 | REGISTER(GlobalIdOp) |
| 108 | REGISTER(NumSubgroupsOp) |
| 109 | REGISTER(SubgroupSizeOp) |
| 110 | #undef REGISTER |
| 111 | |
| 112 | LaunchOp::attachInterface<GpuLaunchOpInterface>(*ctx); |
| 113 | }); |
| 114 | } |
| 115 | |