1 | //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains functions to create scalar loops and orchestrate the |
10 | // creation of parallel loops as LLVM-IR. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "polly/CodeGen/LoopGenerators.h" |
15 | #include "polly/Options.h" |
16 | #include "polly/ScopDetection.h" |
17 | #include "llvm/Analysis/LoopInfo.h" |
18 | #include "llvm/IR/DataLayout.h" |
19 | #include "llvm/IR/DebugInfoMetadata.h" |
20 | #include "llvm/IR/Dominators.h" |
21 | #include "llvm/IR/Module.h" |
22 | #include "llvm/Support/CommandLine.h" |
23 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
24 | |
25 | using namespace llvm; |
26 | using namespace polly; |
27 | |
28 | int polly::PollyNumThreads; |
29 | OMPGeneralSchedulingType polly::PollyScheduling; |
30 | int polly::PollyChunkSize; |
31 | |
32 | static cl::opt<int, true> |
33 | XPollyNumThreads("polly-num-threads" , |
34 | cl::desc("Number of threads to use (0 = auto)" ), |
35 | cl::Hidden, cl::location(L&: polly::PollyNumThreads), |
36 | cl::init(Val: 0), cl::cat(PollyCategory)); |
37 | |
38 | static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling( |
39 | "polly-scheduling" , |
40 | cl::desc("Scheduling type of parallel OpenMP for loops" ), |
41 | cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static" , |
42 | "Static scheduling" ), |
43 | clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic" , |
44 | "Dynamic scheduling" ), |
45 | clEnumValN(OMPGeneralSchedulingType::Guided, "guided" , |
46 | "Guided scheduling" ), |
47 | clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime" , |
48 | "Runtime determined (OMP_SCHEDULE)" )), |
49 | cl::Hidden, cl::location(L&: polly::PollyScheduling), |
50 | cl::init(Val: OMPGeneralSchedulingType::Runtime), cl::Optional, |
51 | cl::cat(PollyCategory)); |
52 | |
53 | static cl::opt<int, true> |
54 | XPollyChunkSize("polly-scheduling-chunksize" , |
55 | cl::desc("Chunksize to use by the OpenMP runtime calls" ), |
56 | cl::Hidden, cl::location(L&: polly::PollyChunkSize), |
57 | cl::init(Val: 0), cl::Optional, cl::cat(PollyCategory)); |
58 | |
59 | // We generate a loop of either of the following structures: |
60 | // |
61 | // BeforeBB BeforeBB |
62 | // | | |
63 | // v v |
64 | // GuardBB PreHeaderBB |
65 | // / | | _____ |
66 | // __ PreHeaderBB | v \/ | |
67 | // / \ / | HeaderBB latch |
68 | // latch HeaderBB | |\ | |
69 | // \ / \ / | \------/ |
70 | // < \ / | |
71 | // \ / v |
72 | // ExitBB ExitBB |
73 | // |
74 | // depending on whether or not we know that it is executed at least once. If |
75 | // not, GuardBB checks if the loop is executed at least once. If this is the |
76 | // case we branch to PreHeaderBB and subsequently to the HeaderBB, which |
77 | // contains the loop iv 'polly.indvar', the incremented loop iv |
78 | // 'polly.indvar_next' as well as the condition to check if we execute another |
79 | // iteration of the loop. After the loop has finished, we branch to ExitBB. |
80 | // We expect the type of UB, LB, UB+Stride to be large enough for values that |
81 | // UB may take throughout the execution of the loop, including the computation |
82 | // of indvar + Stride before the final abort. |
83 | Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, |
84 | PollyIRBuilder &Builder, LoopInfo &LI, |
85 | DominatorTree &DT, BasicBlock *&ExitBB, |
86 | ICmpInst::Predicate Predicate, |
87 | ScopAnnotator *Annotator, bool Parallel, bool UseGuard, |
88 | bool LoopVectDisabled) { |
89 | Function *F = Builder.GetInsertBlock()->getParent(); |
90 | LLVMContext &Context = F->getContext(); |
91 | |
92 | assert(LB->getType() == UB->getType() && "Types of loop bounds do not match" ); |
93 | IntegerType *LoopIVType = dyn_cast<IntegerType>(Val: UB->getType()); |
94 | assert(LoopIVType && "UB is not integer?" ); |
95 | |
96 | BasicBlock *BeforeBB = Builder.GetInsertBlock(); |
97 | BasicBlock *GuardBB = |
98 | UseGuard ? BasicBlock::Create(Context, Name: "polly.loop_if" , Parent: F) : nullptr; |
99 | BasicBlock * = BasicBlock::Create(Context, Name: "polly.loop_header" , Parent: F); |
100 | BasicBlock * = |
101 | BasicBlock::Create(Context, Name: "polly.loop_preheader" , Parent: F); |
102 | |
103 | // Update LoopInfo |
104 | Loop *OuterLoop = LI.getLoopFor(BB: BeforeBB); |
105 | Loop *NewLoop = LI.AllocateLoop(); |
106 | |
107 | if (OuterLoop) |
108 | OuterLoop->addChildLoop(NewChild: NewLoop); |
109 | else |
110 | LI.addTopLevelLoop(New: NewLoop); |
111 | |
112 | if (OuterLoop) { |
113 | if (GuardBB) |
114 | OuterLoop->addBasicBlockToLoop(NewBB: GuardBB, LI); |
115 | OuterLoop->addBasicBlockToLoop(NewBB: PreHeaderBB, LI); |
116 | } |
117 | |
118 | NewLoop->addBasicBlockToLoop(NewBB: HeaderBB, LI); |
119 | |
120 | // Notify the annotator (if present) that we have a new loop, but only |
121 | // after the header block is set. |
122 | if (Annotator) |
123 | Annotator->pushLoop(L: NewLoop, IsParallel: Parallel); |
124 | |
125 | // ExitBB |
126 | ExitBB = SplitBlock(Old: BeforeBB, SplitPt: &*Builder.GetInsertPoint(), DT: &DT, LI: &LI); |
127 | ExitBB->setName("polly.loop_exit" ); |
128 | |
129 | // BeforeBB |
130 | if (GuardBB) { |
131 | BeforeBB->getTerminator()->setSuccessor(Idx: 0, BB: GuardBB); |
132 | DT.addNewBlock(BB: GuardBB, DomBB: BeforeBB); |
133 | |
134 | // GuardBB |
135 | Builder.SetInsertPoint(GuardBB); |
136 | Value *LoopGuard; |
137 | LoopGuard = Builder.CreateICmp(P: Predicate, LHS: LB, RHS: UB); |
138 | LoopGuard->setName("polly.loop_guard" ); |
139 | Builder.CreateCondBr(Cond: LoopGuard, True: PreHeaderBB, False: ExitBB); |
140 | DT.addNewBlock(BB: PreHeaderBB, DomBB: GuardBB); |
141 | } else { |
142 | BeforeBB->getTerminator()->setSuccessor(Idx: 0, BB: PreHeaderBB); |
143 | DT.addNewBlock(BB: PreHeaderBB, DomBB: BeforeBB); |
144 | } |
145 | |
146 | // PreHeaderBB |
147 | Builder.SetInsertPoint(PreHeaderBB); |
148 | Builder.CreateBr(Dest: HeaderBB); |
149 | |
150 | // HeaderBB |
151 | DT.addNewBlock(BB: HeaderBB, DomBB: PreHeaderBB); |
152 | Builder.SetInsertPoint(HeaderBB); |
153 | PHINode *IV = Builder.CreatePHI(Ty: LoopIVType, NumReservedValues: 2, Name: "polly.indvar" ); |
154 | IV->addIncoming(V: LB, BB: PreHeaderBB); |
155 | Stride = Builder.CreateZExtOrBitCast(V: Stride, DestTy: LoopIVType); |
156 | Value *IncrementedIV = Builder.CreateNSWAdd(LHS: IV, RHS: Stride, Name: "polly.indvar_next" ); |
157 | Value *LoopCondition = |
158 | Builder.CreateICmp(P: Predicate, LHS: IncrementedIV, RHS: UB, Name: "polly.loop_cond" ); |
159 | |
160 | // Create the loop latch and annotate it as such. |
161 | BranchInst *B = Builder.CreateCondBr(Cond: LoopCondition, True: HeaderBB, False: ExitBB); |
162 | if (Annotator) |
163 | Annotator->annotateLoopLatch(B, L: NewLoop, IsParallel: Parallel, IsLoopVectorizerDisabled: LoopVectDisabled); |
164 | |
165 | IV->addIncoming(V: IncrementedIV, BB: HeaderBB); |
166 | if (GuardBB) |
167 | DT.changeImmediateDominator(BB: ExitBB, NewBB: GuardBB); |
168 | else |
169 | DT.changeImmediateDominator(BB: ExitBB, NewBB: HeaderBB); |
170 | |
171 | // The loop body should be added here. |
172 | Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); |
173 | return IV; |
174 | } |
175 | |
176 | Value *ParallelLoopGenerator::createParallelLoop( |
177 | Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, |
178 | ValueMapT &Map, BasicBlock::iterator *LoopBody) { |
179 | |
180 | AllocaInst *Struct = storeValuesIntoStruct(Values&: UsedValues); |
181 | BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); |
182 | |
183 | Value *IV; |
184 | Function *SubFn; |
185 | std::tie(args&: IV, args&: SubFn) = createSubFn(Stride, Struct, UsedValues, VMap&: Map); |
186 | *LoopBody = Builder.GetInsertPoint(); |
187 | Builder.SetInsertPoint(&*BeforeLoop); |
188 | |
189 | // Add one as the upper bound provided by OpenMP is a < comparison |
190 | // whereas the codegenForSequential function creates a <= comparison. |
191 | UB = Builder.CreateAdd(LHS: UB, RHS: ConstantInt::get(Ty: LongType, V: 1)); |
192 | |
193 | // Execute the prepared subfunction in parallel. |
194 | deployParallelExecution(SubFn, SubFnParam: Struct, LB, UB, Stride); |
195 | |
196 | return IV; |
197 | } |
198 | |
199 | Function *ParallelLoopGenerator::createSubFnDefinition() { |
200 | Function *F = Builder.GetInsertBlock()->getParent(); |
201 | Function *SubFn = prepareSubFnDefinition(F); |
202 | |
203 | // Certain backends (e.g., NVPTX) do not support '.'s in function names. |
204 | // Hence, we ensure that all '.'s are replaced by '_'s. |
205 | std::string FunctionName = SubFn->getName().str(); |
206 | std::replace(first: FunctionName.begin(), last: FunctionName.end(), old_value: '.', new_value: '_'); |
207 | SubFn->setName(FunctionName); |
208 | |
209 | // Do not run any polly pass on the new function. |
210 | SubFn->addFnAttr(Kind: PollySkipFnAttr); |
211 | |
212 | return SubFn; |
213 | } |
214 | |
215 | AllocaInst * |
216 | ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { |
217 | SmallVector<Type *, 8> Members; |
218 | |
219 | for (Value *V : Values) |
220 | Members.push_back(Elt: V->getType()); |
221 | |
222 | const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); |
223 | |
224 | // We do not want to allocate the alloca inside any loop, thus we allocate it |
225 | // in the entry block of the function and use annotations to denote the actual |
226 | // live span (similar to clang). |
227 | BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); |
228 | BasicBlock::iterator IP = EntryBB.getFirstInsertionPt(); |
229 | StructType *Ty = StructType::get(Context&: Builder.getContext(), Elements: Members); |
230 | AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, |
231 | "polly.par.userContext" , IP); |
232 | |
233 | for (unsigned i = 0; i < Values.size(); i++) { |
234 | Value *Address = Builder.CreateStructGEP(Ty, Ptr: Struct, Idx: i); |
235 | Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); |
236 | Builder.CreateStore(Val: Values[i], Ptr: Address); |
237 | } |
238 | |
239 | return Struct; |
240 | } |
241 | |
242 | void ParallelLoopGenerator::( |
243 | SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { |
244 | for (unsigned i = 0; i < OldValues.size(); i++) { |
245 | Value *Address = Builder.CreateStructGEP(Ty, Ptr: Struct, Idx: i); |
246 | Type *ElemTy = cast<GetElementPtrInst>(Val: Address)->getResultElementType(); |
247 | Value *NewValue = Builder.CreateLoad(Ty: ElemTy, Ptr: Address); |
248 | NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); |
249 | Map[OldValues[i]] = NewValue; |
250 | } |
251 | } |
252 | |
253 | DebugLoc polly::createDebugLocForGeneratedCode(Function *F) { |
254 | if (!F) |
255 | return DebugLoc(); |
256 | |
257 | LLVMContext &Ctx = F->getContext(); |
258 | DISubprogram *DILScope = |
259 | dyn_cast_or_null<DISubprogram>(Val: F->getMetadata(KindID: LLVMContext::MD_dbg)); |
260 | if (!DILScope) |
261 | return DebugLoc(); |
262 | return DILocation::get(Context&: Ctx, Line: 0, Column: 0, Scope: DILScope); |
263 | } |
264 | |