1 | //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains functions to create parallel loops as LLVM-IR. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "polly/CodeGen/LoopGeneratorsKMP.h" |
14 | #include "llvm/Analysis/LoopInfo.h" |
15 | #include "llvm/IR/Dominators.h" |
16 | #include "llvm/IR/Module.h" |
17 | |
18 | using namespace llvm; |
19 | using namespace polly; |
20 | |
21 | void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn, |
22 | Value *SubFnParam, |
23 | Value *LB, Value *UB, |
24 | Value *Stride) { |
25 | const std::string Name = "__kmpc_fork_call" ; |
26 | Function *F = M->getFunction(Name); |
27 | Type *KMPCMicroTy = StructType::getTypeByName(C&: M->getContext(), Name: "kmpc_micro" ); |
28 | |
29 | if (!KMPCMicroTy) { |
30 | // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...) |
31 | Type *MicroParams[] = {Builder.getPtrTy(AddrSpace: 0), Builder.getPtrTy(AddrSpace: 0)}; |
32 | |
33 | KMPCMicroTy = FunctionType::get(Result: Builder.getVoidTy(), Params: MicroParams, isVarArg: true); |
34 | } |
35 | |
36 | // If F is not available, declare it. |
37 | if (!F) { |
38 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
39 | Type *Params[] = {Builder.getPtrTy(AddrSpace: 0), Builder.getInt32Ty(), |
40 | Builder.getPtrTy(AddrSpace: 0)}; |
41 | |
42 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: true); |
43 | F = Function::Create(Ty, Linkage, N: Name, M); |
44 | } |
45 | |
46 | Value *Task = |
47 | Builder.CreatePointerBitCastOrAddrSpaceCast(V: SubFn, DestTy: Builder.getPtrTy(AddrSpace: 0)); |
48 | |
49 | Value *Args[] = {SourceLocationInfo, |
50 | Builder.getInt32(C: 4) /* Number of arguments (w/o Task) */, |
51 | Task, |
52 | LB, |
53 | UB, |
54 | Stride, |
55 | SubFnParam}; |
56 | |
57 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
58 | Call->setDebugLoc(DLGenerated); |
59 | } |
60 | |
61 | void ParallelLoopGeneratorKMP::deployParallelExecution(Function *SubFn, |
62 | Value *SubFnParam, |
63 | Value *LB, Value *UB, |
64 | Value *Stride) { |
65 | // Inform OpenMP runtime about the number of threads if greater than zero |
66 | if (PollyNumThreads > 0) { |
67 | Value *GlobalThreadID = createCallGlobalThreadNum(); |
68 | createCallPushNumThreads(GlobalThreadID, NumThreads: Builder.getInt32(C: PollyNumThreads)); |
69 | } |
70 | |
71 | // Tell the runtime we start a parallel loop |
72 | createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); |
73 | } |
74 | |
75 | Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const { |
76 | std::vector<Type *> Arguments = { |
77 | Builder.getPtrTy(AddrSpace: 0), Builder.getPtrTy(AddrSpace: 0), LongType, LongType, LongType, |
78 | Builder.getPtrTy()}; |
79 | |
80 | FunctionType *FT = FunctionType::get(Result: Builder.getVoidTy(), Params: Arguments, isVarArg: false); |
81 | Function *SubFn = Function::Create(Ty: FT, Linkage: Function::InternalLinkage, |
82 | N: F->getName() + "_polly_subfn" , M); |
83 | // Name the function's arguments |
84 | Function::arg_iterator AI = SubFn->arg_begin(); |
85 | AI->setName("polly.kmpc.global_tid" ); |
86 | std::advance(i&: AI, n: 1); |
87 | AI->setName("polly.kmpc.bound_tid" ); |
88 | std::advance(i&: AI, n: 1); |
89 | AI->setName("polly.kmpc.lb" ); |
90 | std::advance(i&: AI, n: 1); |
91 | AI->setName("polly.kmpc.ub" ); |
92 | std::advance(i&: AI, n: 1); |
93 | AI->setName("polly.kmpc.inc" ); |
94 | std::advance(i&: AI, n: 1); |
95 | AI->setName("polly.kmpc.shared" ); |
96 | |
97 | return SubFn; |
98 | } |
99 | |
100 | // Create a subfunction of the following (preliminary) structure: |
101 | // |
102 | // PrevBB |
103 | // | |
104 | // v |
105 | // HeaderBB |
106 | // / | _____ |
107 | // / v v | |
108 | // / PreHeaderBB | |
109 | // | | | |
110 | // | v | |
111 | // | CheckNextBB | |
112 | // \ | \_____/ |
113 | // \ | |
114 | // v v |
115 | // ExitBB |
116 | // |
117 | // HeaderBB will hold allocations, loading of variables and kmp-init calls. |
118 | // CheckNextBB will check for more work (dynamic / static chunked) or will be |
119 | // empty (static non chunked). |
120 | // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB. |
121 | // PreHeaderBB loads the new boundaries (& will lead to the loop body later on). |
122 | // Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non |
123 | // chunked scheduling case. ExitBB marks the end of the parallel execution. |
124 | // The possibly empty BasicBlocks will automatically be removed. |
125 | std::tuple<Value *, Function *> |
126 | ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride, |
127 | AllocaInst *StructData, |
128 | SetVector<Value *> Data, ValueMapT &Map) { |
129 | Function *SubFn = createSubFnDefinition(); |
130 | LLVMContext &Context = SubFn->getContext(); |
131 | |
132 | // Create basic blocks. |
133 | BasicBlock * = BasicBlock::Create(Context, Name: "polly.par.setup" , Parent: SubFn); |
134 | SubFnDT = std::make_unique<DominatorTree>(args&: *SubFn); |
135 | SubFnLI = std::make_unique<LoopInfo>(args&: *SubFnDT); |
136 | |
137 | BasicBlock *ExitBB = BasicBlock::Create(Context, Name: "polly.par.exit" , Parent: SubFn); |
138 | BasicBlock *CheckNextBB = |
139 | BasicBlock::Create(Context, Name: "polly.par.checkNext" , Parent: SubFn); |
140 | BasicBlock * = |
141 | BasicBlock::Create(Context, Name: "polly.par.loadIVBounds" , Parent: SubFn); |
142 | |
143 | SubFnDT->addNewBlock(BB: ExitBB, DomBB: HeaderBB); |
144 | SubFnDT->addNewBlock(BB: CheckNextBB, DomBB: HeaderBB); |
145 | SubFnDT->addNewBlock(BB: PreHeaderBB, DomBB: HeaderBB); |
146 | |
147 | // Fill up basic block HeaderBB. |
148 | Builder.SetInsertPoint(HeaderBB); |
149 | Value *LBPtr = Builder.CreateAlloca(Ty: LongType, ArraySize: nullptr, Name: "polly.par.LBPtr" ); |
150 | Value *UBPtr = Builder.CreateAlloca(Ty: LongType, ArraySize: nullptr, Name: "polly.par.UBPtr" ); |
151 | Value *IsLastPtr = Builder.CreateAlloca(Ty: Builder.getInt32Ty(), ArraySize: nullptr, |
152 | Name: "polly.par.lastIterPtr" ); |
153 | Value *StridePtr = |
154 | Builder.CreateAlloca(Ty: LongType, ArraySize: nullptr, Name: "polly.par.StridePtr" ); |
155 | |
156 | // Get iterator for retrieving the previously defined parameters. |
157 | Function::arg_iterator AI = SubFn->arg_begin(); |
158 | // First argument holds "global thread ID". |
159 | Value *IDPtr = &*AI; |
160 | // Skip "bound thread ID" since it is not used (but had to be defined). |
161 | std::advance(i&: AI, n: 2); |
162 | // Move iterator to: LB, UB, Stride, Shared variable struct. |
163 | Value *LB = &*AI; |
164 | std::advance(i&: AI, n: 1); |
165 | Value *UB = &*AI; |
166 | std::advance(i&: AI, n: 1); |
167 | Value *Stride = &*AI; |
168 | std::advance(i&: AI, n: 1); |
169 | Value *Shared = &*AI; |
170 | |
171 | extractValuesFromStruct(Values: Data, Ty: StructData->getAllocatedType(), Struct: Shared, VMap&: Map); |
172 | |
173 | const auto Alignment = llvm::Align(is64BitArch() ? 8 : 4); |
174 | Value *ID = Builder.CreateAlignedLoad(Ty: Builder.getInt32Ty(), Ptr: IDPtr, Align: Alignment, |
175 | Name: "polly.par.global_tid" ); |
176 | |
177 | Builder.CreateAlignedStore(Val: LB, Ptr: LBPtr, Align: Alignment); |
178 | Builder.CreateAlignedStore(Val: UB, Ptr: UBPtr, Align: Alignment); |
179 | Builder.CreateAlignedStore(Val: Builder.getInt32(C: 0), Ptr: IsLastPtr, Align: Alignment); |
180 | Builder.CreateAlignedStore(Val: Stride, Ptr: StridePtr, Align: Alignment); |
181 | |
182 | // Subtract one as the upper bound provided by openmp is a < comparison |
183 | // whereas the codegenForSequential function creates a <= comparison. |
184 | Value *AdjustedUB = Builder.CreateAdd(LHS: UB, RHS: ConstantInt::get(Ty: LongType, V: -1), |
185 | Name: "polly.indvar.UBAdjusted" ); |
186 | |
187 | Value *ChunkSize = |
188 | ConstantInt::get(Ty: LongType, V: std::max<int>(a: PollyChunkSize, b: 1)); |
189 | |
190 | OMPGeneralSchedulingType Scheduling = |
191 | getSchedType(ChunkSize: PollyChunkSize, Scheduling: PollyScheduling); |
192 | |
193 | switch (Scheduling) { |
194 | case OMPGeneralSchedulingType::Dynamic: |
195 | case OMPGeneralSchedulingType::Guided: |
196 | case OMPGeneralSchedulingType::Runtime: |
197 | // "DYNAMIC" scheduling types are handled below (including 'runtime') |
198 | { |
199 | UB = AdjustedUB; |
200 | createCallDispatchInit(GlobalThreadID: ID, LB, UB, Inc: Stride, ChunkSize); |
201 | Value *HasWork = |
202 | createCallDispatchNext(GlobalThreadID: ID, IsLastPtr, LBPtr, UBPtr, StridePtr); |
203 | Value *HasIteration = |
204 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_EQ, LHS: HasWork, |
205 | RHS: Builder.getInt32(C: 1), Name: "polly.hasIteration" ); |
206 | Builder.CreateCondBr(Cond: HasIteration, True: PreHeaderBB, False: ExitBB); |
207 | |
208 | Builder.SetInsertPoint(CheckNextBB); |
209 | HasWork = createCallDispatchNext(GlobalThreadID: ID, IsLastPtr, LBPtr, UBPtr, StridePtr); |
210 | HasIteration = |
211 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_EQ, LHS: HasWork, |
212 | RHS: Builder.getInt32(C: 1), Name: "polly.hasWork" ); |
213 | Builder.CreateCondBr(Cond: HasIteration, True: PreHeaderBB, False: ExitBB); |
214 | |
215 | Builder.SetInsertPoint(PreHeaderBB); |
216 | LB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: LBPtr, Align: Alignment, |
217 | Name: "polly.indvar.LB" ); |
218 | UB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: UBPtr, Align: Alignment, |
219 | Name: "polly.indvar.UB" ); |
220 | } |
221 | break; |
222 | case OMPGeneralSchedulingType::StaticChunked: |
223 | case OMPGeneralSchedulingType::StaticNonChunked: |
224 | // "STATIC" scheduling types are handled below |
225 | { |
226 | Builder.CreateAlignedStore(Val: AdjustedUB, Ptr: UBPtr, Align: Alignment); |
227 | createCallStaticInit(GlobalThreadID: ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize); |
228 | |
229 | Value *ChunkedStride = Builder.CreateAlignedLoad( |
230 | Ty: LongType, Ptr: StridePtr, Align: Alignment, Name: "polly.kmpc.stride" ); |
231 | |
232 | LB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: LBPtr, Align: Alignment, |
233 | Name: "polly.indvar.LB" ); |
234 | UB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: UBPtr, Align: Alignment, |
235 | Name: "polly.indvar.UB.temp" ); |
236 | |
237 | Value *UBInRange = |
238 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_SLE, LHS: UB, RHS: AdjustedUB, |
239 | Name: "polly.indvar.UB.inRange" ); |
240 | UB = Builder.CreateSelect(C: UBInRange, True: UB, False: AdjustedUB, Name: "polly.indvar.UB" ); |
241 | Builder.CreateAlignedStore(Val: UB, Ptr: UBPtr, Align: Alignment); |
242 | |
243 | Value *HasIteration = Builder.CreateICmp( |
244 | P: llvm::CmpInst::Predicate::ICMP_SLE, LHS: LB, RHS: UB, Name: "polly.hasIteration" ); |
245 | Builder.CreateCondBr(Cond: HasIteration, True: PreHeaderBB, False: ExitBB); |
246 | |
247 | if (Scheduling == OMPGeneralSchedulingType::StaticChunked) { |
248 | Builder.SetInsertPoint(PreHeaderBB); |
249 | LB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: LBPtr, Align: Alignment, |
250 | Name: "polly.indvar.LB.entry" ); |
251 | UB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: UBPtr, Align: Alignment, |
252 | Name: "polly.indvar.UB.entry" ); |
253 | } |
254 | |
255 | Builder.SetInsertPoint(CheckNextBB); |
256 | |
257 | if (Scheduling == OMPGeneralSchedulingType::StaticChunked) { |
258 | Value *NextLB = |
259 | Builder.CreateAdd(LHS: LB, RHS: ChunkedStride, Name: "polly.indvar.nextLB" ); |
260 | Value *NextUB = Builder.CreateAdd(LHS: UB, RHS: ChunkedStride); |
261 | |
262 | Value *NextUBOutOfBounds = |
263 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_SGT, LHS: NextUB, |
264 | RHS: AdjustedUB, Name: "polly.indvar.nextUB.outOfBounds" ); |
265 | NextUB = Builder.CreateSelect(C: NextUBOutOfBounds, True: AdjustedUB, False: NextUB, |
266 | Name: "polly.indvar.nextUB" ); |
267 | |
268 | Builder.CreateAlignedStore(Val: NextLB, Ptr: LBPtr, Align: Alignment); |
269 | Builder.CreateAlignedStore(Val: NextUB, Ptr: UBPtr, Align: Alignment); |
270 | |
271 | Value *HasWork = |
272 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_SLE, LHS: NextLB, |
273 | RHS: AdjustedUB, Name: "polly.hasWork" ); |
274 | Builder.CreateCondBr(Cond: HasWork, True: PreHeaderBB, False: ExitBB); |
275 | } else { |
276 | Builder.CreateBr(Dest: ExitBB); |
277 | } |
278 | |
279 | Builder.SetInsertPoint(PreHeaderBB); |
280 | } |
281 | break; |
282 | } |
283 | |
284 | Builder.CreateBr(Dest: CheckNextBB); |
285 | Builder.SetInsertPoint(--Builder.GetInsertPoint()); |
286 | BasicBlock *AfterBB; |
287 | Value *IV = createLoop(LowerBound: LB, UpperBound: UB, Stride: SequentialLoopStride, Builder, LI&: *SubFnLI, |
288 | DT&: *SubFnDT, ExitBlock&: AfterBB, Predicate: ICmpInst::ICMP_SLE, Annotator: nullptr, Parallel: true, |
289 | /* UseGuard */ false); |
290 | |
291 | BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); |
292 | |
293 | // Add code to terminate this subfunction. |
294 | Builder.SetInsertPoint(ExitBB); |
295 | // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call |
296 | if (Scheduling == OMPGeneralSchedulingType::StaticChunked || |
297 | Scheduling == OMPGeneralSchedulingType::StaticNonChunked) { |
298 | createCallStaticFini(GlobalThreadID: ID); |
299 | } |
300 | Builder.CreateRetVoid(); |
301 | Builder.SetInsertPoint(LoopBody); |
302 | |
303 | // FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the |
304 | // DominatorTree/LoopInfo has been created correctly. Alternatively, recreate |
305 | // from scratch since it is not needed here directly. |
306 | |
307 | return std::make_tuple(args&: IV, args&: SubFn); |
308 | } |
309 | |
310 | Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() { |
311 | const std::string Name = "__kmpc_global_thread_num" ; |
312 | Function *F = M->getFunction(Name); |
313 | |
314 | // If F is not available, declare it. |
315 | if (!F) { |
316 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
317 | Type *Params[] = {Builder.getPtrTy(AddrSpace: 0)}; |
318 | |
319 | FunctionType *Ty = FunctionType::get(Result: Builder.getInt32Ty(), Params, isVarArg: false); |
320 | F = Function::Create(Ty, Linkage, N: Name, M); |
321 | } |
322 | |
323 | CallInst *Call = Builder.CreateCall(Callee: F, Args: {SourceLocationInfo}); |
324 | Call->setDebugLoc(DLGenerated); |
325 | return Call; |
326 | } |
327 | |
328 | void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID, |
329 | Value *NumThreads) { |
330 | const std::string Name = "__kmpc_push_num_threads" ; |
331 | Function *F = M->getFunction(Name); |
332 | |
333 | // If F is not available, declare it. |
334 | if (!F) { |
335 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
336 | Type *Params[] = {Builder.getPtrTy(AddrSpace: 0), Builder.getInt32Ty(), |
337 | Builder.getInt32Ty()}; |
338 | |
339 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: false); |
340 | F = Function::Create(Ty, Linkage, N: Name, M); |
341 | } |
342 | |
343 | Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads}; |
344 | |
345 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
346 | Call->setDebugLoc(DLGenerated); |
347 | } |
348 | |
349 | void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID, |
350 | Value *IsLastPtr, |
351 | Value *LBPtr, Value *UBPtr, |
352 | Value *StridePtr, |
353 | Value *ChunkSize) { |
354 | const std::string Name = |
355 | is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4" ; |
356 | Function *F = M->getFunction(Name); |
357 | |
358 | // If F is not available, declare it. |
359 | if (!F) { |
360 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
361 | |
362 | Type *Params[] = {Builder.getPtrTy(AddrSpace: 0), |
363 | Builder.getInt32Ty(), |
364 | Builder.getInt32Ty(), |
365 | Builder.getPtrTy(AddrSpace: 0), |
366 | Builder.getPtrTy(AddrSpace: 0), |
367 | Builder.getPtrTy(AddrSpace: 0), |
368 | Builder.getPtrTy(AddrSpace: 0), |
369 | LongType, |
370 | LongType}; |
371 | |
372 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: false); |
373 | F = Function::Create(Ty, Linkage, N: Name, M); |
374 | } |
375 | |
376 | // The parameter 'ChunkSize' will hold strictly positive integer values, |
377 | // regardless of PollyChunkSize's value |
378 | Value *Args[] = { |
379 | SourceLocationInfo, |
380 | GlobalThreadID, |
381 | Builder.getInt32(C: int(getSchedType(ChunkSize: PollyChunkSize, Scheduling: PollyScheduling))), |
382 | IsLastPtr, |
383 | LBPtr, |
384 | UBPtr, |
385 | StridePtr, |
386 | ConstantInt::get(Ty: LongType, V: 1), |
387 | ChunkSize}; |
388 | |
389 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
390 | Call->setDebugLoc(DLGenerated); |
391 | } |
392 | |
393 | void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) { |
394 | const std::string Name = "__kmpc_for_static_fini" ; |
395 | Function *F = M->getFunction(Name); |
396 | |
397 | // If F is not available, declare it. |
398 | if (!F) { |
399 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
400 | Type *Params[] = {Builder.getPtrTy(AddrSpace: 0), Builder.getInt32Ty()}; |
401 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: false); |
402 | F = Function::Create(Ty, Linkage, N: Name, M); |
403 | } |
404 | |
405 | Value *Args[] = {SourceLocationInfo, GlobalThreadID}; |
406 | |
407 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
408 | Call->setDebugLoc(DLGenerated); |
409 | } |
410 | |
411 | void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID, |
412 | Value *LB, Value *UB, |
413 | Value *Inc, |
414 | Value *ChunkSize) { |
415 | const std::string Name = |
416 | is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4" ; |
417 | Function *F = M->getFunction(Name); |
418 | |
419 | // If F is not available, declare it. |
420 | if (!F) { |
421 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
422 | |
423 | Type *Params[] = {Builder.getPtrTy(AddrSpace: 0), |
424 | Builder.getInt32Ty(), |
425 | Builder.getInt32Ty(), |
426 | LongType, |
427 | LongType, |
428 | LongType, |
429 | LongType}; |
430 | |
431 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: false); |
432 | F = Function::Create(Ty, Linkage, N: Name, M); |
433 | } |
434 | |
435 | // The parameter 'ChunkSize' will hold strictly positive integer values, |
436 | // regardless of PollyChunkSize's value |
437 | Value *Args[] = { |
438 | SourceLocationInfo, |
439 | GlobalThreadID, |
440 | Builder.getInt32(C: int(getSchedType(ChunkSize: PollyChunkSize, Scheduling: PollyScheduling))), |
441 | LB, |
442 | UB, |
443 | Inc, |
444 | ChunkSize}; |
445 | |
446 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
447 | Call->setDebugLoc(DLGenerated); |
448 | } |
449 | |
450 | Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID, |
451 | Value *IsLastPtr, |
452 | Value *LBPtr, |
453 | Value *UBPtr, |
454 | Value *StridePtr) { |
455 | const std::string Name = |
456 | is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4" ; |
457 | Function *F = M->getFunction(Name); |
458 | |
459 | // If F is not available, declare it. |
460 | if (!F) { |
461 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
462 | |
463 | Type *Params[] = {Builder.getPtrTy(AddrSpace: 0), Builder.getInt32Ty(), |
464 | Builder.getPtrTy(AddrSpace: 0), Builder.getPtrTy(AddrSpace: 0), |
465 | Builder.getPtrTy(AddrSpace: 0), Builder.getPtrTy(AddrSpace: 0)}; |
466 | |
467 | FunctionType *Ty = FunctionType::get(Result: Builder.getInt32Ty(), Params, isVarArg: false); |
468 | F = Function::Create(Ty, Linkage, N: Name, M); |
469 | } |
470 | |
471 | Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr, |
472 | StridePtr}; |
473 | |
474 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
475 | Call->setDebugLoc(DLGenerated); |
476 | return Call; |
477 | } |
478 | |
479 | // TODO: This function currently creates a source location dummy. It might be |
480 | // necessary to (actually) provide information, in the future. |
481 | GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() { |
482 | const std::string LocName = ".loc.dummy" ; |
483 | GlobalVariable *SourceLocDummy = M->getGlobalVariable(Name: LocName); |
484 | |
485 | if (SourceLocDummy == nullptr) { |
486 | const std::string StructName = "struct.ident_t" ; |
487 | StructType *IdentTy = |
488 | StructType::getTypeByName(C&: M->getContext(), Name: StructName); |
489 | |
490 | // If the ident_t StructType is not available, declare it. |
491 | // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* } |
492 | if (!IdentTy) { |
493 | Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(), |
494 | Builder.getInt32Ty(), Builder.getInt32Ty(), |
495 | Builder.getPtrTy()}; |
496 | |
497 | IdentTy = |
498 | StructType::create(Context&: M->getContext(), Elements: LocMembers, Name: StructName, isPacked: false); |
499 | } |
500 | |
501 | const auto ArrayType = |
502 | llvm::ArrayType::get(ElementType: Builder.getInt8Ty(), /* Length */ NumElements: 23); |
503 | |
504 | // Global Variable Definitions |
505 | GlobalVariable *StrVar = |
506 | new GlobalVariable(*M, ArrayType, true, GlobalValue::PrivateLinkage, |
507 | nullptr, ".str.ident" ); |
508 | StrVar->setAlignment(llvm::Align(1)); |
509 | |
510 | SourceLocDummy = new GlobalVariable( |
511 | *M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName); |
512 | SourceLocDummy->setAlignment(llvm::Align(8)); |
513 | |
514 | // Constant Definitions |
515 | Constant *InitStr = ConstantDataArray::getString( |
516 | Context&: M->getContext(), Initializer: "Source location dummy." , AddNull: true); |
517 | |
518 | Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP( |
519 | Ty: ArrayType, Ptr: StrVar, IdxList: {Builder.getInt32(C: 0), Builder.getInt32(C: 0)})); |
520 | |
521 | Constant *LocInitStruct = ConstantStruct::get( |
522 | T: IdentTy, V: {Builder.getInt32(C: 0), Builder.getInt32(C: 0), Builder.getInt32(C: 0), |
523 | Builder.getInt32(C: 0), StrPtr}); |
524 | |
525 | // Initialize variables |
526 | StrVar->setInitializer(InitStr); |
527 | SourceLocDummy->setInitializer(LocInitStruct); |
528 | } |
529 | |
530 | return SourceLocDummy; |
531 | } |
532 | |
533 | bool ParallelLoopGeneratorKMP::is64BitArch() { |
534 | return (LongType->getIntegerBitWidth() == 64); |
535 | } |
536 | |
537 | OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType( |
538 | int ChunkSize, OMPGeneralSchedulingType Scheduling) const { |
539 | if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked) |
540 | return OMPGeneralSchedulingType::StaticNonChunked; |
541 | |
542 | return Scheduling; |
543 | } |
544 | |