| 1 | //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains functions to create scalar loops and orchestrate the |
| 10 | // creation of parallel loops as LLVM-IR. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "polly/CodeGen/LoopGenerators.h" |
| 15 | #include "polly/Options.h" |
| 16 | #include "polly/ScopDetection.h" |
| 17 | #include "llvm/Analysis/LoopInfo.h" |
| 18 | #include "llvm/IR/DataLayout.h" |
| 19 | #include "llvm/IR/DebugInfoMetadata.h" |
| 20 | #include "llvm/IR/Dominators.h" |
| 21 | #include "llvm/IR/Module.h" |
| 22 | #include "llvm/Support/CommandLine.h" |
| 23 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 24 | |
| 25 | using namespace llvm; |
| 26 | using namespace polly; |
| 27 | |
| 28 | int polly::PollyNumThreads; |
| 29 | OMPGeneralSchedulingType polly::PollyScheduling; |
| 30 | int polly::PollyChunkSize; |
| 31 | |
| 32 | static cl::opt<int, true> |
| 33 | XPollyNumThreads("polly-num-threads" , |
| 34 | cl::desc("Number of threads to use (0 = auto)" ), |
| 35 | cl::Hidden, cl::location(L&: polly::PollyNumThreads), |
| 36 | cl::init(Val: 0), cl::cat(PollyCategory)); |
| 37 | |
| 38 | cl::opt<bool> PollyVectorizeMetadata( |
| 39 | "polly-annotate-metadata-vectorize" , |
| 40 | cl::desc("Append vectorize enable/disable metadata from polly" ), |
| 41 | cl::init(Val: false), cl::ZeroOrMore, cl::cat(PollyCategory)); |
| 42 | |
| 43 | static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling( |
| 44 | "polly-scheduling" , |
| 45 | cl::desc("Scheduling type of parallel OpenMP for loops" ), |
| 46 | cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static" , |
| 47 | "Static scheduling" ), |
| 48 | clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic" , |
| 49 | "Dynamic scheduling" ), |
| 50 | clEnumValN(OMPGeneralSchedulingType::Guided, "guided" , |
| 51 | "Guided scheduling" ), |
| 52 | clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime" , |
| 53 | "Runtime determined (OMP_SCHEDULE)" )), |
| 54 | cl::Hidden, cl::location(L&: polly::PollyScheduling), |
| 55 | cl::init(Val: OMPGeneralSchedulingType::Runtime), cl::Optional, |
| 56 | cl::cat(PollyCategory)); |
| 57 | |
| 58 | static cl::opt<int, true> |
| 59 | XPollyChunkSize("polly-scheduling-chunksize" , |
| 60 | cl::desc("Chunksize to use by the OpenMP runtime calls" ), |
| 61 | cl::Hidden, cl::location(L&: polly::PollyChunkSize), |
| 62 | cl::init(Val: 0), cl::Optional, cl::cat(PollyCategory)); |
| 63 | |
| 64 | // We generate a loop of either of the following structures: |
| 65 | // |
| 66 | // BeforeBB BeforeBB |
| 67 | // | | |
| 68 | // v v |
| 69 | // GuardBB PreHeaderBB |
| 70 | // / | | _____ |
| 71 | // __ PreHeaderBB | v \/ | |
| 72 | // / \ / | HeaderBB latch |
| 73 | // latch HeaderBB | |\ | |
| 74 | // \ / \ / | \------/ |
| 75 | // < \ / | |
| 76 | // \ / v |
| 77 | // ExitBB ExitBB |
| 78 | // |
| 79 | // depending on whether or not we know that it is executed at least once. If |
| 80 | // not, GuardBB checks if the loop is executed at least once. If this is the |
| 81 | // case we branch to PreHeaderBB and subsequently to the HeaderBB, which |
| 82 | // contains the loop iv 'polly.indvar', the incremented loop iv |
| 83 | // 'polly.indvar_next' as well as the condition to check if we execute another |
| 84 | // iteration of the loop. After the loop has finished, we branch to ExitBB. |
| 85 | // We expect the type of UB, LB, UB+Stride to be large enough for values that |
| 86 | // UB may take throughout the execution of the loop, including the computation |
| 87 | // of indvar + Stride before the final abort. |
| 88 | Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, |
| 89 | PollyIRBuilder &Builder, LoopInfo &LI, |
| 90 | DominatorTree &DT, BasicBlock *&ExitBB, |
| 91 | ICmpInst::Predicate Predicate, |
| 92 | ScopAnnotator *Annotator, bool Parallel, bool UseGuard, |
| 93 | bool LoopVectDisabled) { |
| 94 | Function *F = Builder.GetInsertBlock()->getParent(); |
| 95 | LLVMContext &Context = F->getContext(); |
| 96 | |
| 97 | assert(LB->getType() == UB->getType() && "Types of loop bounds do not match" ); |
| 98 | IntegerType *LoopIVType = dyn_cast<IntegerType>(Val: UB->getType()); |
| 99 | assert(LoopIVType && "UB is not integer?" ); |
| 100 | |
| 101 | BasicBlock *BeforeBB = Builder.GetInsertBlock(); |
| 102 | BasicBlock *GuardBB = |
| 103 | UseGuard ? BasicBlock::Create(Context, Name: "polly.loop_if" , Parent: F) : nullptr; |
| 104 | BasicBlock * = BasicBlock::Create(Context, Name: "polly.loop_header" , Parent: F); |
| 105 | BasicBlock * = |
| 106 | BasicBlock::Create(Context, Name: "polly.loop_preheader" , Parent: F); |
| 107 | |
| 108 | // Update LoopInfo |
| 109 | Loop *OuterLoop = LI.getLoopFor(BB: BeforeBB); |
| 110 | Loop *NewLoop = LI.AllocateLoop(); |
| 111 | |
| 112 | if (OuterLoop) |
| 113 | OuterLoop->addChildLoop(NewChild: NewLoop); |
| 114 | else |
| 115 | LI.addTopLevelLoop(New: NewLoop); |
| 116 | |
| 117 | if (OuterLoop) { |
| 118 | if (GuardBB) |
| 119 | OuterLoop->addBasicBlockToLoop(NewBB: GuardBB, LI); |
| 120 | OuterLoop->addBasicBlockToLoop(NewBB: PreHeaderBB, LI); |
| 121 | } |
| 122 | |
| 123 | NewLoop->addBasicBlockToLoop(NewBB: HeaderBB, LI); |
| 124 | |
| 125 | // Notify the annotator (if present) that we have a new loop, but only |
| 126 | // after the header block is set. |
| 127 | if (Annotator) |
| 128 | Annotator->pushLoop(L: NewLoop, IsParallel: Parallel); |
| 129 | |
| 130 | // ExitBB |
| 131 | ExitBB = SplitBlock(Old: BeforeBB, SplitPt: Builder.GetInsertPoint(), DT: &DT, LI: &LI); |
| 132 | ExitBB->setName("polly.loop_exit" ); |
| 133 | |
| 134 | // BeforeBB |
| 135 | if (GuardBB) { |
| 136 | BeforeBB->getTerminator()->setSuccessor(Idx: 0, BB: GuardBB); |
| 137 | DT.addNewBlock(BB: GuardBB, DomBB: BeforeBB); |
| 138 | |
| 139 | // GuardBB |
| 140 | Builder.SetInsertPoint(GuardBB); |
| 141 | Value *LoopGuard; |
| 142 | LoopGuard = Builder.CreateICmp(P: Predicate, LHS: LB, RHS: UB); |
| 143 | LoopGuard->setName("polly.loop_guard" ); |
| 144 | Builder.CreateCondBr(Cond: LoopGuard, True: PreHeaderBB, False: ExitBB); |
| 145 | DT.addNewBlock(BB: PreHeaderBB, DomBB: GuardBB); |
| 146 | } else { |
| 147 | BeforeBB->getTerminator()->setSuccessor(Idx: 0, BB: PreHeaderBB); |
| 148 | DT.addNewBlock(BB: PreHeaderBB, DomBB: BeforeBB); |
| 149 | } |
| 150 | |
| 151 | // PreHeaderBB |
| 152 | Builder.SetInsertPoint(PreHeaderBB); |
| 153 | Builder.CreateBr(Dest: HeaderBB); |
| 154 | |
| 155 | // HeaderBB |
| 156 | DT.addNewBlock(BB: HeaderBB, DomBB: PreHeaderBB); |
| 157 | Builder.SetInsertPoint(HeaderBB); |
| 158 | PHINode *IV = Builder.CreatePHI(Ty: LoopIVType, NumReservedValues: 2, Name: "polly.indvar" ); |
| 159 | IV->addIncoming(V: LB, BB: PreHeaderBB); |
| 160 | Stride = Builder.CreateZExtOrBitCast(V: Stride, DestTy: LoopIVType); |
| 161 | Value *IncrementedIV = Builder.CreateNSWAdd(LHS: IV, RHS: Stride, Name: "polly.indvar_next" ); |
| 162 | Value *LoopCondition = |
| 163 | Builder.CreateICmp(P: Predicate, LHS: IncrementedIV, RHS: UB, Name: "polly.loop_cond" ); |
| 164 | |
| 165 | // Create the loop latch and annotate it as such. |
| 166 | BranchInst *B = Builder.CreateCondBr(Cond: LoopCondition, True: HeaderBB, False: ExitBB); |
| 167 | |
| 168 | // Don't annotate vectorize metadata when both LoopVectDisabled and |
| 169 | // PollyVectorizeMetadata are disabled. Annotate vectorize metadata to false |
| 170 | // when LoopVectDisabled is true. Otherwise we annotate the vectorize metadata |
| 171 | // to true. |
| 172 | if (Annotator) { |
| 173 | std::optional<bool> EnableVectorizeMetadata; |
| 174 | if (LoopVectDisabled) |
| 175 | EnableVectorizeMetadata = false; |
| 176 | else if (PollyVectorizeMetadata) |
| 177 | EnableVectorizeMetadata = true; |
| 178 | Annotator->annotateLoopLatch(B, IsParallel: Parallel, EnableVectorizeMetadata); |
| 179 | } |
| 180 | |
| 181 | IV->addIncoming(V: IncrementedIV, BB: HeaderBB); |
| 182 | if (GuardBB) |
| 183 | DT.changeImmediateDominator(BB: ExitBB, NewBB: GuardBB); |
| 184 | else |
| 185 | DT.changeImmediateDominator(BB: ExitBB, NewBB: HeaderBB); |
| 186 | |
| 187 | // The loop body should be added here. |
| 188 | Builder.SetInsertPoint(HeaderBB->getFirstNonPHIIt()); |
| 189 | return IV; |
| 190 | } |
| 191 | |
| 192 | Value *ParallelLoopGenerator::createParallelLoop( |
| 193 | Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, |
| 194 | ValueMapT &Map, BasicBlock::iterator *LoopBody) { |
| 195 | |
| 196 | AllocaInst *Struct = storeValuesIntoStruct(Values&: UsedValues); |
| 197 | BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); |
| 198 | |
| 199 | Value *IV; |
| 200 | Function *SubFn; |
| 201 | std::tie(args&: IV, args&: SubFn) = createSubFn(Stride, Struct, UsedValues, VMap&: Map); |
| 202 | *LoopBody = Builder.GetInsertPoint(); |
| 203 | Builder.SetInsertPoint(BeforeLoop); |
| 204 | |
| 205 | // Add one as the upper bound provided by OpenMP is a < comparison |
| 206 | // whereas the codegenForSequential function creates a <= comparison. |
| 207 | UB = Builder.CreateAdd(LHS: UB, RHS: ConstantInt::get(Ty: LongType, V: 1)); |
| 208 | |
| 209 | // Execute the prepared subfunction in parallel. |
| 210 | deployParallelExecution(SubFn, SubFnParam: Struct, LB, UB, Stride); |
| 211 | |
| 212 | return IV; |
| 213 | } |
| 214 | |
| 215 | Function *ParallelLoopGenerator::createSubFnDefinition() { |
| 216 | Function *F = Builder.GetInsertBlock()->getParent(); |
| 217 | Function *SubFn = prepareSubFnDefinition(F); |
| 218 | |
| 219 | // Certain backends (e.g., NVPTX) do not support '.'s in function names. |
| 220 | // Hence, we ensure that all '.'s are replaced by '_'s. |
| 221 | std::string FunctionName = SubFn->getName().str(); |
| 222 | std::replace(first: FunctionName.begin(), last: FunctionName.end(), old_value: '.', new_value: '_'); |
| 223 | SubFn->setName(FunctionName); |
| 224 | |
| 225 | // Do not run any polly pass on the new function. |
| 226 | SubFn->addFnAttr(Kind: PollySkipFnAttr); |
| 227 | |
| 228 | return SubFn; |
| 229 | } |
| 230 | |
| 231 | AllocaInst * |
| 232 | ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { |
| 233 | SmallVector<Type *, 8> Members; |
| 234 | |
| 235 | for (Value *V : Values) |
| 236 | Members.push_back(Elt: V->getType()); |
| 237 | |
| 238 | const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); |
| 239 | |
| 240 | // We do not want to allocate the alloca inside any loop, thus we allocate it |
| 241 | // in the entry block of the function and use annotations to denote the actual |
| 242 | // live span (similar to clang). |
| 243 | BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); |
| 244 | BasicBlock::iterator IP = EntryBB.getFirstInsertionPt(); |
| 245 | StructType *Ty = StructType::get(Context&: Builder.getContext(), Elements: Members); |
| 246 | AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, |
| 247 | "polly.par.userContext" , IP); |
| 248 | |
| 249 | for (unsigned i = 0; i < Values.size(); i++) { |
| 250 | Value *Address = Builder.CreateStructGEP(Ty, Ptr: Struct, Idx: i); |
| 251 | Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); |
| 252 | Builder.CreateStore(Val: Values[i], Ptr: Address); |
| 253 | } |
| 254 | |
| 255 | return Struct; |
| 256 | } |
| 257 | |
| 258 | void ParallelLoopGenerator::( |
| 259 | SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { |
| 260 | for (unsigned i = 0; i < OldValues.size(); i++) { |
| 261 | Value *Address = Builder.CreateStructGEP(Ty, Ptr: Struct, Idx: i); |
| 262 | Type *ElemTy = cast<GetElementPtrInst>(Val: Address)->getResultElementType(); |
| 263 | Value *NewValue = Builder.CreateLoad(Ty: ElemTy, Ptr: Address); |
| 264 | NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); |
| 265 | Map[OldValues[i]] = NewValue; |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | DebugLoc polly::createDebugLocForGeneratedCode(Function *F) { |
| 270 | if (!F) |
| 271 | return DebugLoc(); |
| 272 | |
| 273 | LLVMContext &Ctx = F->getContext(); |
| 274 | DISubprogram *DILScope = |
| 275 | dyn_cast_or_null<DISubprogram>(Val: F->getMetadata(KindID: LLVMContext::MD_dbg)); |
| 276 | if (!DILScope) |
| 277 | return DebugLoc(); |
| 278 | return DILocation::get(Context&: Ctx, Line: 0, Column: 0, Scope: DILScope); |
| 279 | } |
| 280 | |