| 1 | //===------ ManualOptimizer.cpp -------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Handle pragma/metadata-directed transformations. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "polly/ManualOptimizer.h" |
| 14 | #include "polly/DependenceInfo.h" |
| 15 | #include "polly/Options.h" |
| 16 | #include "polly/ScheduleTreeTransform.h" |
| 17 | #include "polly/Support/ScopHelper.h" |
| 18 | #include "llvm/ADT/StringRef.h" |
| 19 | #include "llvm/Analysis/LoopInfo.h" |
| 20 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| 21 | #include "llvm/IR/Metadata.h" |
| 22 | #include "llvm/Transforms/Utils/LoopUtils.h" |
| 23 | #include <optional> |
| 24 | |
| 25 | #include "polly/Support/PollyDebug.h" |
| 26 | #define DEBUG_TYPE "polly-opt-manual" |
| 27 | |
| 28 | using namespace polly; |
| 29 | using namespace llvm; |
| 30 | |
| 31 | namespace { |
| 32 | |
| 33 | static cl::opt<bool> IgnoreDepcheck( |
| 34 | "polly-pragma-ignore-depcheck" , |
| 35 | cl::desc("Skip the dependency check for pragma-based transformations" ), |
| 36 | cl::cat(PollyCategory)); |
| 37 | |
| 38 | /// Same as llvm::hasUnrollTransformation(), but takes a LoopID as argument |
| 39 | /// instead of a Loop. |
| 40 | static TransformationMode hasUnrollTransformation(MDNode *LoopID) { |
| 41 | if (getBooleanLoopAttribute(LoopID, Name: "llvm.loop.unroll.disable" )) |
| 42 | return TM_SuppressedByUser; |
| 43 | |
| 44 | std::optional<int> Count = |
| 45 | getOptionalIntLoopAttribute(LoopID, Name: "llvm.loop.unroll.count" ); |
| 46 | if (Count) |
| 47 | return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser; |
| 48 | |
| 49 | if (getBooleanLoopAttribute(LoopID, Name: "llvm.loop.unroll.enable" )) |
| 50 | return TM_ForcedByUser; |
| 51 | |
| 52 | if (getBooleanLoopAttribute(LoopID, Name: "llvm.loop.unroll.full" )) |
| 53 | return TM_ForcedByUser; |
| 54 | |
| 55 | if (hasDisableAllTransformsHint(LoopID)) |
| 56 | return TM_Disable; |
| 57 | |
| 58 | return TM_Unspecified; |
| 59 | } |
| 60 | |
| 61 | // Return the first DebugLoc in the list. |
| 62 | static DebugLoc findFirstDebugLoc(MDNode *MD) { |
| 63 | if (MD) { |
| 64 | for (const MDOperand &X : drop_begin(RangeOrContainer: MD->operands(), N: 1)) { |
| 65 | Metadata *A = X.get(); |
| 66 | if (!isa<DILocation>(Val: A)) |
| 67 | continue; |
| 68 | return cast<DILocation>(Val: A); |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | return {}; |
| 73 | } |
| 74 | |
| 75 | static DebugLoc findTransformationDebugLoc(MDNode *LoopMD, StringRef Name) { |
| 76 | // First find dedicated transformation location |
| 77 | // (such as the location of #pragma clang loop) |
| 78 | MDNode *MD = findOptionMDForLoopID(LoopID: LoopMD, Name); |
| 79 | if (DebugLoc K = findFirstDebugLoc(MD)) |
| 80 | return K; |
| 81 | |
| 82 | // Otherwise, fall back to the location of the loop itself |
| 83 | return findFirstDebugLoc(MD: LoopMD); |
| 84 | } |
| 85 | |
| 86 | /// Apply full or partial unrolling. |
| 87 | static isl::schedule applyLoopUnroll(MDNode *LoopMD, |
| 88 | isl::schedule_node BandToUnroll) { |
| 89 | TransformationMode UnrollMode = ::hasUnrollTransformation(LoopID: LoopMD); |
| 90 | if (UnrollMode & TM_Disable) |
| 91 | return {}; |
| 92 | |
| 93 | assert(!BandToUnroll.is_null()); |
| 94 | // TODO: Isl's codegen also supports unrolling by isl_ast_build via |
| 95 | // isl_schedule_node_band_set_ast_build_options({ unroll[x] }) which would be |
| 96 | // more efficient because the content duplication is delayed. However, the |
| 97 | // unrolled loop could be input of another loop transformation which expects |
| 98 | // the explicit schedule nodes. That is, we would need this explicit expansion |
| 99 | // anyway and using the ISL codegen option is a compile-time optimization. |
| 100 | int64_t Factor = |
| 101 | getOptionalIntLoopAttribute(LoopID: LoopMD, Name: "llvm.loop.unroll.count" ).value_or(u: 0); |
| 102 | bool Full = getBooleanLoopAttribute(LoopID: LoopMD, Name: "llvm.loop.unroll.full" ); |
| 103 | assert((!Full || !(Factor > 0)) && |
| 104 | "Cannot unroll fully and partially at the same time" ); |
| 105 | |
| 106 | if (Full) |
| 107 | return applyFullUnroll(BandToUnroll); |
| 108 | |
| 109 | if (Factor > 0) |
| 110 | return applyPartialUnroll(BandToUnroll, Factor); |
| 111 | |
| 112 | // For heuristic unrolling, fall back to LLVM's LoopUnroll pass. |
| 113 | return {}; |
| 114 | } |
| 115 | |
| 116 | static isl::schedule applyLoopFission(MDNode *LoopMD, |
| 117 | isl::schedule_node BandToFission) { |
| 118 | // TODO: Make it possible to selectively fission substatements. |
| 119 | // TODO: Apply followup loop properties. |
| 120 | // TODO: Instead of fission every statement, find the maximum set that does |
| 121 | // not cause a dependency violation. |
| 122 | return applyMaxFission(BandToFission); |
| 123 | } |
| 124 | |
| 125 | // Return the properties from a LoopID. Scalar properties are ignored. |
| 126 | static auto getLoopMDProps(MDNode *LoopMD) { |
| 127 | return map_range( |
| 128 | C: make_filter_range( |
| 129 | Range: drop_begin(RangeOrContainer: LoopMD->operands(), N: 1), |
| 130 | Pred: [](const MDOperand &MDOp) { return isa<MDNode>(Val: MDOp.get()); }), |
| 131 | F: [](const MDOperand &MDOp) { return cast<MDNode>(Val: MDOp.get()); }); |
| 132 | } |
| 133 | |
| 134 | /// Recursively visit all nodes in a schedule, loop for loop-transformations |
| 135 | /// metadata and apply the first encountered. |
| 136 | class SearchTransformVisitor final |
| 137 | : public RecursiveScheduleTreeVisitor<SearchTransformVisitor> { |
| 138 | private: |
| 139 | using BaseTy = RecursiveScheduleTreeVisitor<SearchTransformVisitor>; |
| 140 | BaseTy &getBase() { return *this; } |
| 141 | const BaseTy &getBase() const { return *this; } |
| 142 | |
| 143 | polly::Scop *S; |
| 144 | const Dependences *D; |
| 145 | OptimizationRemarkEmitter *ORE; |
| 146 | |
| 147 | // Set after a transformation is applied. Recursive search must be aborted |
| 148 | // once this happens to ensure that any new followup transformation is |
| 149 | // transformed in innermost-first order. |
| 150 | isl::schedule Result; |
| 151 | |
| 152 | /// Check whether a schedule after a transformation is legal. Return the old |
| 153 | /// schedule without the transformation. |
| 154 | isl::schedule |
| 155 | checkDependencyViolation(llvm::MDNode *LoopMD, llvm::BasicBlock *CodeRegion, |
| 156 | const isl::schedule_node &OrigBand, |
| 157 | StringRef DebugLocAttr, StringRef TransPrefix, |
| 158 | StringRef , StringRef TransformationName) { |
| 159 | if (D->isValidSchedule(S&: *S, NewSched: Result)) |
| 160 | return Result; |
| 161 | |
| 162 | LLVMContext &Ctx = LoopMD->getContext(); |
| 163 | POLLY_DEBUG(dbgs() << "Dependency violation detected\n" ); |
| 164 | |
| 165 | DebugLoc TransformLoc = findTransformationDebugLoc(LoopMD, Name: DebugLocAttr); |
| 166 | |
| 167 | if (IgnoreDepcheck) { |
| 168 | POLLY_DEBUG(dbgs() << "Still accepting transformation due to " |
| 169 | "-polly-pragma-ignore-depcheck\n" ); |
| 170 | if (ORE) { |
| 171 | ORE->emit( |
| 172 | OptDiag: OptimizationRemark(DEBUG_TYPE, RemarkName, TransformLoc, CodeRegion) |
| 173 | << (Twine("Could not verify dependencies for " ) + |
| 174 | TransformationName + |
| 175 | "; still applying because of -polly-pragma-ignore-depcheck" ) |
| 176 | .str()); |
| 177 | } |
| 178 | return Result; |
| 179 | } |
| 180 | |
| 181 | POLLY_DEBUG(dbgs() << "Rolling back transformation\n" ); |
| 182 | |
| 183 | if (ORE) { |
| 184 | ORE->emit(OptDiag: DiagnosticInfoOptimizationFailure(DEBUG_TYPE, RemarkName, |
| 185 | TransformLoc, CodeRegion) |
| 186 | << (Twine("not applying " ) + TransformationName + |
| 187 | ": cannot ensure semantic equivalence due to possible " |
| 188 | "dependency violations" ) |
| 189 | .str()); |
| 190 | } |
| 191 | |
| 192 | // If illegal, revert and remove the transformation to not risk re-trying |
| 193 | // indefinitely. |
| 194 | MDNode *NewLoopMD = |
| 195 | makePostTransformationMetadata(Context&: Ctx, OrigLoopID: LoopMD, RemovePrefixes: {TransPrefix}, AddAttrs: {}); |
| 196 | BandAttr *Attr = getBandAttr(MarkOrBand: OrigBand); |
| 197 | Attr->Metadata = NewLoopMD; |
| 198 | |
| 199 | // Roll back old schedule. |
| 200 | return OrigBand.get_schedule(); |
| 201 | } |
| 202 | |
| 203 | public: |
| 204 | (polly::Scop *S, const Dependences *D, |
| 205 | OptimizationRemarkEmitter *ORE) |
| 206 | : S(S), D(D), ORE(ORE) {} |
| 207 | |
| 208 | static isl::schedule (polly::Scop *S, |
| 209 | const Dependences *D, |
| 210 | OptimizationRemarkEmitter *ORE, |
| 211 | const isl::schedule &Sched) { |
| 212 | SearchTransformVisitor Transformer(S, D, ORE); |
| 213 | Transformer.visit(Schedule: Sched); |
| 214 | return Transformer.Result; |
| 215 | } |
| 216 | |
| 217 | void visitBand(isl::schedule_node_band Band) { |
| 218 | // Transform inner loops first (depth-first search). |
| 219 | getBase().visitBand(Band); |
| 220 | if (!Result.is_null()) |
| 221 | return; |
| 222 | |
| 223 | // Since it is (currently) not possible to have a BandAttr marker that is |
| 224 | // specific to each loop in a band, we only support single-loop bands. |
| 225 | if (isl_schedule_node_band_n_member(node: Band.get()) != 1) |
| 226 | return; |
| 227 | |
| 228 | BandAttr *Attr = getBandAttr(MarkOrBand: Band); |
| 229 | if (!Attr) { |
| 230 | // Band has no attribute. |
| 231 | return; |
| 232 | } |
| 233 | |
| 234 | // CodeRegion used but ORE to determine code hotness. |
| 235 | // TODO: Works only for original loop; for transformed loops, should track |
| 236 | // where the loop's body code comes from. |
| 237 | Loop *Loop = Attr->OriginalLoop; |
| 238 | BasicBlock *CodeRegion = nullptr; |
| 239 | if (Loop) |
| 240 | CodeRegion = Loop->getHeader(); |
| 241 | |
| 242 | MDNode *LoopMD = Attr->Metadata; |
| 243 | if (!LoopMD) |
| 244 | return; |
| 245 | |
| 246 | // Iterate over loop properties to find the first transformation. |
| 247 | // FIXME: If there are more than one transformation in the LoopMD (making |
| 248 | // the order of transformations ambiguous), all others are silently ignored. |
| 249 | for (MDNode *MD : getLoopMDProps(LoopMD)) { |
| 250 | auto *NameMD = dyn_cast<MDString>(Val: MD->getOperand(I: 0).get()); |
| 251 | if (!NameMD) |
| 252 | continue; |
| 253 | StringRef AttrName = NameMD->getString(); |
| 254 | |
| 255 | // Honor transformation order; transform the first transformation in the |
| 256 | // list first. |
| 257 | if (AttrName == "llvm.loop.unroll.enable" || |
| 258 | AttrName == "llvm.loop.unroll.count" || |
| 259 | AttrName == "llvm.loop.unroll.full" ) { |
| 260 | Result = applyLoopUnroll(LoopMD, BandToUnroll: Band); |
| 261 | if (!Result.is_null()) |
| 262 | return; |
| 263 | } else if (AttrName == "llvm.loop.distribute.enable" ) { |
| 264 | Result = applyLoopFission(LoopMD, BandToFission: Band); |
| 265 | if (!Result.is_null()) |
| 266 | Result = checkDependencyViolation( |
| 267 | LoopMD, CodeRegion, OrigBand: Band, DebugLocAttr: "llvm.loop.distribute.loc" , |
| 268 | TransPrefix: "llvm.loop.distribute." , RemarkName: "FailedRequestedFission" , |
| 269 | TransformationName: "loop fission/distribution" ); |
| 270 | if (!Result.is_null()) |
| 271 | return; |
| 272 | } |
| 273 | |
| 274 | // not a loop transformation; look for next property |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | void visitNode(isl::schedule_node Other) { |
| 279 | if (!Result.is_null()) |
| 280 | return; |
| 281 | getBase().visitNode(Node: Other); |
| 282 | } |
| 283 | }; |
| 284 | |
| 285 | } // namespace |
| 286 | |
| 287 | isl::schedule |
| 288 | polly::(Scop *S, isl::schedule Sched, |
| 289 | const Dependences &D, |
| 290 | OptimizationRemarkEmitter *ORE) { |
| 291 | // Search the loop nest for transformations until fixpoint. |
| 292 | while (true) { |
| 293 | isl::schedule Result = |
| 294 | SearchTransformVisitor::applyOneTransformation(S, D: &D, ORE, Sched); |
| 295 | if (Result.is_null()) { |
| 296 | // No (more) transformation has been found. |
| 297 | break; |
| 298 | } |
| 299 | |
| 300 | // Use transformed schedule and look for more transformations. |
| 301 | Sched = Result; |
| 302 | } |
| 303 | |
| 304 | return Sched; |
| 305 | } |
| 306 | |