1 | //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains functions to create parallel loops as LLVM-IR. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "polly/CodeGen/LoopGeneratorsKMP.h" |
14 | #include "llvm/IR/Dominators.h" |
15 | #include "llvm/IR/Module.h" |
16 | |
17 | using namespace llvm; |
18 | using namespace polly; |
19 | |
20 | void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn, |
21 | Value *SubFnParam, |
22 | Value *LB, Value *UB, |
23 | Value *Stride) { |
24 | const std::string Name = "__kmpc_fork_call" ; |
25 | Function *F = M->getFunction(Name); |
26 | Type *KMPCMicroTy = StructType::getTypeByName(C&: M->getContext(), Name: "kmpc_micro" ); |
27 | |
28 | if (!KMPCMicroTy) { |
29 | // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...) |
30 | Type *MicroParams[] = {Builder.getInt32Ty()->getPointerTo(), |
31 | Builder.getInt32Ty()->getPointerTo()}; |
32 | |
33 | KMPCMicroTy = FunctionType::get(Result: Builder.getVoidTy(), Params: MicroParams, isVarArg: true); |
34 | } |
35 | |
36 | // If F is not available, declare it. |
37 | if (!F) { |
38 | StructType *IdentTy = |
39 | StructType::getTypeByName(C&: M->getContext(), Name: "struct.ident_t" ); |
40 | |
41 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
42 | Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(), |
43 | KMPCMicroTy->getPointerTo()}; |
44 | |
45 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: true); |
46 | F = Function::Create(Ty, Linkage, N: Name, M); |
47 | } |
48 | |
49 | Value *Task = Builder.CreatePointerBitCastOrAddrSpaceCast( |
50 | V: SubFn, DestTy: KMPCMicroTy->getPointerTo()); |
51 | |
52 | Value *Args[] = {SourceLocationInfo, |
53 | Builder.getInt32(C: 4) /* Number of arguments (w/o Task) */, |
54 | Task, |
55 | LB, |
56 | UB, |
57 | Stride, |
58 | SubFnParam}; |
59 | |
60 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
61 | Call->setDebugLoc(DLGenerated); |
62 | } |
63 | |
64 | void ParallelLoopGeneratorKMP::deployParallelExecution(Function *SubFn, |
65 | Value *SubFnParam, |
66 | Value *LB, Value *UB, |
67 | Value *Stride) { |
68 | // Inform OpenMP runtime about the number of threads if greater than zero |
69 | if (PollyNumThreads > 0) { |
70 | Value *GlobalThreadID = createCallGlobalThreadNum(); |
71 | createCallPushNumThreads(GlobalThreadID, NumThreads: Builder.getInt32(C: PollyNumThreads)); |
72 | } |
73 | |
74 | // Tell the runtime we start a parallel loop |
75 | createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); |
76 | } |
77 | |
78 | Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const { |
79 | std::vector<Type *> Arguments = {Builder.getInt32Ty()->getPointerTo(), |
80 | Builder.getInt32Ty()->getPointerTo(), |
81 | LongType, |
82 | LongType, |
83 | LongType, |
84 | Builder.getPtrTy()}; |
85 | |
86 | FunctionType *FT = FunctionType::get(Result: Builder.getVoidTy(), Params: Arguments, isVarArg: false); |
87 | Function *SubFn = Function::Create(Ty: FT, Linkage: Function::InternalLinkage, |
88 | N: F->getName() + "_polly_subfn" , M); |
89 | // Name the function's arguments |
90 | Function::arg_iterator AI = SubFn->arg_begin(); |
91 | AI->setName("polly.kmpc.global_tid" ); |
92 | std::advance(i&: AI, n: 1); |
93 | AI->setName("polly.kmpc.bound_tid" ); |
94 | std::advance(i&: AI, n: 1); |
95 | AI->setName("polly.kmpc.lb" ); |
96 | std::advance(i&: AI, n: 1); |
97 | AI->setName("polly.kmpc.ub" ); |
98 | std::advance(i&: AI, n: 1); |
99 | AI->setName("polly.kmpc.inc" ); |
100 | std::advance(i&: AI, n: 1); |
101 | AI->setName("polly.kmpc.shared" ); |
102 | |
103 | return SubFn; |
104 | } |
105 | |
106 | // Create a subfunction of the following (preliminary) structure: |
107 | // |
108 | // PrevBB |
109 | // | |
110 | // v |
111 | // HeaderBB |
112 | // / | _____ |
113 | // / v v | |
114 | // / PreHeaderBB | |
115 | // | | | |
116 | // | v | |
117 | // | CheckNextBB | |
118 | // \ | \_____/ |
119 | // \ | |
120 | // v v |
121 | // ExitBB |
122 | // |
123 | // HeaderBB will hold allocations, loading of variables and kmp-init calls. |
124 | // CheckNextBB will check for more work (dynamic / static chunked) or will be |
125 | // empty (static non chunked). |
126 | // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB. |
127 | // PreHeaderBB loads the new boundaries (& will lead to the loop body later on). |
128 | // Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non |
129 | // chunked scheduling case. ExitBB marks the end of the parallel execution. |
130 | // The possibly empty BasicBlocks will automatically be removed. |
131 | std::tuple<Value *, Function *> |
132 | ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride, |
133 | AllocaInst *StructData, |
134 | SetVector<Value *> Data, ValueMapT &Map) { |
135 | Function *SubFn = createSubFnDefinition(); |
136 | LLVMContext &Context = SubFn->getContext(); |
137 | |
138 | // Store the previous basic block. |
139 | BasicBlock *PrevBB = Builder.GetInsertBlock(); |
140 | |
141 | // Create basic blocks. |
142 | BasicBlock * = BasicBlock::Create(Context, Name: "polly.par.setup" , Parent: SubFn); |
143 | BasicBlock *ExitBB = BasicBlock::Create(Context, Name: "polly.par.exit" , Parent: SubFn); |
144 | BasicBlock *CheckNextBB = |
145 | BasicBlock::Create(Context, Name: "polly.par.checkNext" , Parent: SubFn); |
146 | BasicBlock * = |
147 | BasicBlock::Create(Context, Name: "polly.par.loadIVBounds" , Parent: SubFn); |
148 | |
149 | DT.addNewBlock(BB: HeaderBB, DomBB: PrevBB); |
150 | DT.addNewBlock(BB: ExitBB, DomBB: HeaderBB); |
151 | DT.addNewBlock(BB: CheckNextBB, DomBB: HeaderBB); |
152 | DT.addNewBlock(BB: PreHeaderBB, DomBB: HeaderBB); |
153 | |
154 | // Fill up basic block HeaderBB. |
155 | Builder.SetInsertPoint(HeaderBB); |
156 | Value *LBPtr = Builder.CreateAlloca(Ty: LongType, ArraySize: nullptr, Name: "polly.par.LBPtr" ); |
157 | Value *UBPtr = Builder.CreateAlloca(Ty: LongType, ArraySize: nullptr, Name: "polly.par.UBPtr" ); |
158 | Value *IsLastPtr = Builder.CreateAlloca(Ty: Builder.getInt32Ty(), ArraySize: nullptr, |
159 | Name: "polly.par.lastIterPtr" ); |
160 | Value *StridePtr = |
161 | Builder.CreateAlloca(Ty: LongType, ArraySize: nullptr, Name: "polly.par.StridePtr" ); |
162 | |
163 | // Get iterator for retrieving the previously defined parameters. |
164 | Function::arg_iterator AI = SubFn->arg_begin(); |
165 | // First argument holds "global thread ID". |
166 | Value *IDPtr = &*AI; |
167 | // Skip "bound thread ID" since it is not used (but had to be defined). |
168 | std::advance(i&: AI, n: 2); |
169 | // Move iterator to: LB, UB, Stride, Shared variable struct. |
170 | Value *LB = &*AI; |
171 | std::advance(i&: AI, n: 1); |
172 | Value *UB = &*AI; |
173 | std::advance(i&: AI, n: 1); |
174 | Value *Stride = &*AI; |
175 | std::advance(i&: AI, n: 1); |
176 | Value *Shared = &*AI; |
177 | |
178 | extractValuesFromStruct(Values: Data, Ty: StructData->getAllocatedType(), Struct: Shared, VMap&: Map); |
179 | |
180 | const auto Alignment = llvm::Align(is64BitArch() ? 8 : 4); |
181 | Value *ID = Builder.CreateAlignedLoad(Ty: Builder.getInt32Ty(), Ptr: IDPtr, Align: Alignment, |
182 | Name: "polly.par.global_tid" ); |
183 | |
184 | Builder.CreateAlignedStore(Val: LB, Ptr: LBPtr, Align: Alignment); |
185 | Builder.CreateAlignedStore(Val: UB, Ptr: UBPtr, Align: Alignment); |
186 | Builder.CreateAlignedStore(Val: Builder.getInt32(C: 0), Ptr: IsLastPtr, Align: Alignment); |
187 | Builder.CreateAlignedStore(Val: Stride, Ptr: StridePtr, Align: Alignment); |
188 | |
189 | // Subtract one as the upper bound provided by openmp is a < comparison |
190 | // whereas the codegenForSequential function creates a <= comparison. |
191 | Value *AdjustedUB = Builder.CreateAdd(LHS: UB, RHS: ConstantInt::get(Ty: LongType, V: -1), |
192 | Name: "polly.indvar.UBAdjusted" ); |
193 | |
194 | Value *ChunkSize = |
195 | ConstantInt::get(Ty: LongType, V: std::max<int>(a: PollyChunkSize, b: 1)); |
196 | |
197 | OMPGeneralSchedulingType Scheduling = |
198 | getSchedType(ChunkSize: PollyChunkSize, Scheduling: PollyScheduling); |
199 | |
200 | switch (Scheduling) { |
201 | case OMPGeneralSchedulingType::Dynamic: |
202 | case OMPGeneralSchedulingType::Guided: |
203 | case OMPGeneralSchedulingType::Runtime: |
204 | // "DYNAMIC" scheduling types are handled below (including 'runtime') |
205 | { |
206 | UB = AdjustedUB; |
207 | createCallDispatchInit(GlobalThreadID: ID, LB, UB, Inc: Stride, ChunkSize); |
208 | Value *HasWork = |
209 | createCallDispatchNext(GlobalThreadID: ID, IsLastPtr, LBPtr, UBPtr, StridePtr); |
210 | Value *HasIteration = |
211 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_EQ, LHS: HasWork, |
212 | RHS: Builder.getInt32(C: 1), Name: "polly.hasIteration" ); |
213 | Builder.CreateCondBr(Cond: HasIteration, True: PreHeaderBB, False: ExitBB); |
214 | |
215 | Builder.SetInsertPoint(CheckNextBB); |
216 | HasWork = createCallDispatchNext(GlobalThreadID: ID, IsLastPtr, LBPtr, UBPtr, StridePtr); |
217 | HasIteration = |
218 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_EQ, LHS: HasWork, |
219 | RHS: Builder.getInt32(C: 1), Name: "polly.hasWork" ); |
220 | Builder.CreateCondBr(Cond: HasIteration, True: PreHeaderBB, False: ExitBB); |
221 | |
222 | Builder.SetInsertPoint(PreHeaderBB); |
223 | LB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: LBPtr, Align: Alignment, |
224 | Name: "polly.indvar.LB" ); |
225 | UB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: UBPtr, Align: Alignment, |
226 | Name: "polly.indvar.UB" ); |
227 | } |
228 | break; |
229 | case OMPGeneralSchedulingType::StaticChunked: |
230 | case OMPGeneralSchedulingType::StaticNonChunked: |
231 | // "STATIC" scheduling types are handled below |
232 | { |
233 | Builder.CreateAlignedStore(Val: AdjustedUB, Ptr: UBPtr, Align: Alignment); |
234 | createCallStaticInit(GlobalThreadID: ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize); |
235 | |
236 | Value *ChunkedStride = Builder.CreateAlignedLoad( |
237 | Ty: LongType, Ptr: StridePtr, Align: Alignment, Name: "polly.kmpc.stride" ); |
238 | |
239 | LB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: LBPtr, Align: Alignment, |
240 | Name: "polly.indvar.LB" ); |
241 | UB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: UBPtr, Align: Alignment, |
242 | Name: "polly.indvar.UB.temp" ); |
243 | |
244 | Value *UBInRange = |
245 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_SLE, LHS: UB, RHS: AdjustedUB, |
246 | Name: "polly.indvar.UB.inRange" ); |
247 | UB = Builder.CreateSelect(C: UBInRange, True: UB, False: AdjustedUB, Name: "polly.indvar.UB" ); |
248 | Builder.CreateAlignedStore(Val: UB, Ptr: UBPtr, Align: Alignment); |
249 | |
250 | Value *HasIteration = Builder.CreateICmp( |
251 | P: llvm::CmpInst::Predicate::ICMP_SLE, LHS: LB, RHS: UB, Name: "polly.hasIteration" ); |
252 | Builder.CreateCondBr(Cond: HasIteration, True: PreHeaderBB, False: ExitBB); |
253 | |
254 | if (Scheduling == OMPGeneralSchedulingType::StaticChunked) { |
255 | Builder.SetInsertPoint(PreHeaderBB); |
256 | LB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: LBPtr, Align: Alignment, |
257 | Name: "polly.indvar.LB.entry" ); |
258 | UB = Builder.CreateAlignedLoad(Ty: LongType, Ptr: UBPtr, Align: Alignment, |
259 | Name: "polly.indvar.UB.entry" ); |
260 | } |
261 | |
262 | Builder.SetInsertPoint(CheckNextBB); |
263 | |
264 | if (Scheduling == OMPGeneralSchedulingType::StaticChunked) { |
265 | Value *NextLB = |
266 | Builder.CreateAdd(LHS: LB, RHS: ChunkedStride, Name: "polly.indvar.nextLB" ); |
267 | Value *NextUB = Builder.CreateAdd(LHS: UB, RHS: ChunkedStride); |
268 | |
269 | Value *NextUBOutOfBounds = |
270 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_SGT, LHS: NextUB, |
271 | RHS: AdjustedUB, Name: "polly.indvar.nextUB.outOfBounds" ); |
272 | NextUB = Builder.CreateSelect(C: NextUBOutOfBounds, True: AdjustedUB, False: NextUB, |
273 | Name: "polly.indvar.nextUB" ); |
274 | |
275 | Builder.CreateAlignedStore(Val: NextLB, Ptr: LBPtr, Align: Alignment); |
276 | Builder.CreateAlignedStore(Val: NextUB, Ptr: UBPtr, Align: Alignment); |
277 | |
278 | Value *HasWork = |
279 | Builder.CreateICmp(P: llvm::CmpInst::Predicate::ICMP_SLE, LHS: NextLB, |
280 | RHS: AdjustedUB, Name: "polly.hasWork" ); |
281 | Builder.CreateCondBr(Cond: HasWork, True: PreHeaderBB, False: ExitBB); |
282 | } else { |
283 | Builder.CreateBr(Dest: ExitBB); |
284 | } |
285 | |
286 | Builder.SetInsertPoint(PreHeaderBB); |
287 | } |
288 | break; |
289 | } |
290 | |
291 | Builder.CreateBr(Dest: CheckNextBB); |
292 | Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); |
293 | BasicBlock *AfterBB; |
294 | Value *IV = createLoop(LowerBound: LB, UpperBound: UB, Stride: SequentialLoopStride, Builder, LI, DT, ExitBlock&: AfterBB, |
295 | Predicate: ICmpInst::ICMP_SLE, Annotator: nullptr, Parallel: true, |
296 | /* UseGuard */ false); |
297 | |
298 | BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); |
299 | |
300 | // Add code to terminate this subfunction. |
301 | Builder.SetInsertPoint(ExitBB); |
302 | // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call |
303 | if (Scheduling == OMPGeneralSchedulingType::StaticChunked || |
304 | Scheduling == OMPGeneralSchedulingType::StaticNonChunked) { |
305 | createCallStaticFini(GlobalThreadID: ID); |
306 | } |
307 | Builder.CreateRetVoid(); |
308 | Builder.SetInsertPoint(&*LoopBody); |
309 | |
310 | return std::make_tuple(args&: IV, args&: SubFn); |
311 | } |
312 | |
313 | Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() { |
314 | const std::string Name = "__kmpc_global_thread_num" ; |
315 | Function *F = M->getFunction(Name); |
316 | |
317 | // If F is not available, declare it. |
318 | if (!F) { |
319 | StructType *IdentTy = |
320 | StructType::getTypeByName(C&: M->getContext(), Name: "struct.ident_t" ); |
321 | |
322 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
323 | Type *Params[] = {IdentTy->getPointerTo()}; |
324 | |
325 | FunctionType *Ty = FunctionType::get(Result: Builder.getInt32Ty(), Params, isVarArg: false); |
326 | F = Function::Create(Ty, Linkage, N: Name, M); |
327 | } |
328 | |
329 | CallInst *Call = Builder.CreateCall(Callee: F, Args: {SourceLocationInfo}); |
330 | Call->setDebugLoc(DLGenerated); |
331 | return Call; |
332 | } |
333 | |
334 | void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID, |
335 | Value *NumThreads) { |
336 | const std::string Name = "__kmpc_push_num_threads" ; |
337 | Function *F = M->getFunction(Name); |
338 | |
339 | // If F is not available, declare it. |
340 | if (!F) { |
341 | StructType *IdentTy = |
342 | StructType::getTypeByName(C&: M->getContext(), Name: "struct.ident_t" ); |
343 | |
344 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
345 | Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(), |
346 | Builder.getInt32Ty()}; |
347 | |
348 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: false); |
349 | F = Function::Create(Ty, Linkage, N: Name, M); |
350 | } |
351 | |
352 | Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads}; |
353 | |
354 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
355 | Call->setDebugLoc(DLGenerated); |
356 | } |
357 | |
358 | void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID, |
359 | Value *IsLastPtr, |
360 | Value *LBPtr, Value *UBPtr, |
361 | Value *StridePtr, |
362 | Value *ChunkSize) { |
363 | const std::string Name = |
364 | is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4" ; |
365 | Function *F = M->getFunction(Name); |
366 | StructType *IdentTy = |
367 | StructType::getTypeByName(C&: M->getContext(), Name: "struct.ident_t" ); |
368 | |
369 | // If F is not available, declare it. |
370 | if (!F) { |
371 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
372 | |
373 | Type *Params[] = {IdentTy->getPointerTo(), |
374 | Builder.getInt32Ty(), |
375 | Builder.getInt32Ty(), |
376 | Builder.getInt32Ty()->getPointerTo(), |
377 | LongType->getPointerTo(), |
378 | LongType->getPointerTo(), |
379 | LongType->getPointerTo(), |
380 | LongType, |
381 | LongType}; |
382 | |
383 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: false); |
384 | F = Function::Create(Ty, Linkage, N: Name, M); |
385 | } |
386 | |
387 | // The parameter 'ChunkSize' will hold strictly positive integer values, |
388 | // regardless of PollyChunkSize's value |
389 | Value *Args[] = { |
390 | SourceLocationInfo, |
391 | GlobalThreadID, |
392 | Builder.getInt32(C: int(getSchedType(ChunkSize: PollyChunkSize, Scheduling: PollyScheduling))), |
393 | IsLastPtr, |
394 | LBPtr, |
395 | UBPtr, |
396 | StridePtr, |
397 | ConstantInt::get(Ty: LongType, V: 1), |
398 | ChunkSize}; |
399 | |
400 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
401 | Call->setDebugLoc(DLGenerated); |
402 | } |
403 | |
404 | void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) { |
405 | const std::string Name = "__kmpc_for_static_fini" ; |
406 | Function *F = M->getFunction(Name); |
407 | StructType *IdentTy = |
408 | StructType::getTypeByName(C&: M->getContext(), Name: "struct.ident_t" ); |
409 | |
410 | // If F is not available, declare it. |
411 | if (!F) { |
412 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
413 | Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty()}; |
414 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: false); |
415 | F = Function::Create(Ty, Linkage, N: Name, M); |
416 | } |
417 | |
418 | Value *Args[] = {SourceLocationInfo, GlobalThreadID}; |
419 | |
420 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
421 | Call->setDebugLoc(DLGenerated); |
422 | } |
423 | |
424 | void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID, |
425 | Value *LB, Value *UB, |
426 | Value *Inc, |
427 | Value *ChunkSize) { |
428 | const std::string Name = |
429 | is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4" ; |
430 | Function *F = M->getFunction(Name); |
431 | StructType *IdentTy = |
432 | StructType::getTypeByName(C&: M->getContext(), Name: "struct.ident_t" ); |
433 | |
434 | // If F is not available, declare it. |
435 | if (!F) { |
436 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
437 | |
438 | Type *Params[] = {IdentTy->getPointerTo(), |
439 | Builder.getInt32Ty(), |
440 | Builder.getInt32Ty(), |
441 | LongType, |
442 | LongType, |
443 | LongType, |
444 | LongType}; |
445 | |
446 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: false); |
447 | F = Function::Create(Ty, Linkage, N: Name, M); |
448 | } |
449 | |
450 | // The parameter 'ChunkSize' will hold strictly positive integer values, |
451 | // regardless of PollyChunkSize's value |
452 | Value *Args[] = { |
453 | SourceLocationInfo, |
454 | GlobalThreadID, |
455 | Builder.getInt32(C: int(getSchedType(ChunkSize: PollyChunkSize, Scheduling: PollyScheduling))), |
456 | LB, |
457 | UB, |
458 | Inc, |
459 | ChunkSize}; |
460 | |
461 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
462 | Call->setDebugLoc(DLGenerated); |
463 | } |
464 | |
465 | Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID, |
466 | Value *IsLastPtr, |
467 | Value *LBPtr, |
468 | Value *UBPtr, |
469 | Value *StridePtr) { |
470 | const std::string Name = |
471 | is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4" ; |
472 | Function *F = M->getFunction(Name); |
473 | StructType *IdentTy = |
474 | StructType::getTypeByName(C&: M->getContext(), Name: "struct.ident_t" ); |
475 | |
476 | // If F is not available, declare it. |
477 | if (!F) { |
478 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
479 | |
480 | Type *Params[] = {IdentTy->getPointerTo(), |
481 | Builder.getInt32Ty(), |
482 | Builder.getInt32Ty()->getPointerTo(), |
483 | LongType->getPointerTo(), |
484 | LongType->getPointerTo(), |
485 | LongType->getPointerTo()}; |
486 | |
487 | FunctionType *Ty = FunctionType::get(Result: Builder.getInt32Ty(), Params, isVarArg: false); |
488 | F = Function::Create(Ty, Linkage, N: Name, M); |
489 | } |
490 | |
491 | Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr, |
492 | StridePtr}; |
493 | |
494 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
495 | Call->setDebugLoc(DLGenerated); |
496 | return Call; |
497 | } |
498 | |
499 | // TODO: This function currently creates a source location dummy. It might be |
500 | // necessary to (actually) provide information, in the future. |
501 | GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() { |
502 | const std::string LocName = ".loc.dummy" ; |
503 | GlobalVariable *SourceLocDummy = M->getGlobalVariable(Name: LocName); |
504 | |
505 | if (SourceLocDummy == nullptr) { |
506 | const std::string StructName = "struct.ident_t" ; |
507 | StructType *IdentTy = |
508 | StructType::getTypeByName(C&: M->getContext(), Name: StructName); |
509 | |
510 | // If the ident_t StructType is not available, declare it. |
511 | // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* } |
512 | if (!IdentTy) { |
513 | Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(), |
514 | Builder.getInt32Ty(), Builder.getInt32Ty(), |
515 | Builder.getPtrTy()}; |
516 | |
517 | IdentTy = |
518 | StructType::create(Context&: M->getContext(), Elements: LocMembers, Name: StructName, isPacked: false); |
519 | } |
520 | |
521 | const auto ArrayType = |
522 | llvm::ArrayType::get(ElementType: Builder.getInt8Ty(), /* Length */ NumElements: 23); |
523 | |
524 | // Global Variable Definitions |
525 | GlobalVariable *StrVar = |
526 | new GlobalVariable(*M, ArrayType, true, GlobalValue::PrivateLinkage, |
527 | nullptr, ".str.ident" ); |
528 | StrVar->setAlignment(llvm::Align(1)); |
529 | |
530 | SourceLocDummy = new GlobalVariable( |
531 | *M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName); |
532 | SourceLocDummy->setAlignment(llvm::Align(8)); |
533 | |
534 | // Constant Definitions |
535 | Constant *InitStr = ConstantDataArray::getString( |
536 | Context&: M->getContext(), Initializer: "Source location dummy." , AddNull: true); |
537 | |
538 | Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP( |
539 | Ty: ArrayType, Ptr: StrVar, IdxList: {Builder.getInt32(C: 0), Builder.getInt32(C: 0)})); |
540 | |
541 | Constant *LocInitStruct = ConstantStruct::get( |
542 | T: IdentTy, V: {Builder.getInt32(C: 0), Builder.getInt32(C: 0), Builder.getInt32(C: 0), |
543 | Builder.getInt32(C: 0), StrPtr}); |
544 | |
545 | // Initialize variables |
546 | StrVar->setInitializer(InitStr); |
547 | SourceLocDummy->setInitializer(LocInitStruct); |
548 | } |
549 | |
550 | return SourceLocDummy; |
551 | } |
552 | |
553 | bool ParallelLoopGeneratorKMP::is64BitArch() { |
554 | return (LongType->getIntegerBitWidth() == 64); |
555 | } |
556 | |
557 | OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType( |
558 | int ChunkSize, OMPGeneralSchedulingType Scheduling) const { |
559 | if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked) |
560 | return OMPGeneralSchedulingType::StaticNonChunked; |
561 | |
562 | return Scheduling; |
563 | } |
564 | |