1 | //===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains functions to create parallel loops as LLVM-IR. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "polly/CodeGen/LoopGeneratorsGOMP.h" |
14 | #include "llvm/Analysis/LoopInfo.h" |
15 | #include "llvm/IR/Dominators.h" |
16 | #include "llvm/IR/Module.h" |
17 | |
18 | using namespace llvm; |
19 | using namespace polly; |
20 | |
21 | void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn, |
22 | Value *SubFnParam, |
23 | Value *LB, Value *UB, |
24 | Value *Stride) { |
25 | const std::string Name = "GOMP_parallel_loop_runtime_start" ; |
26 | |
27 | Function *F = M->getFunction(Name); |
28 | |
29 | // If F is not available, declare it. |
30 | if (!F) { |
31 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
32 | |
33 | Type *Params[] = { |
34 | Builder.getPtrTy(), Builder.getPtrTy(), Builder.getInt32Ty(), |
35 | LongType, LongType, LongType}; |
36 | |
37 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), Params, isVarArg: false); |
38 | F = Function::Create(Ty, Linkage, N: Name, M); |
39 | } |
40 | |
41 | Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(C: PollyNumThreads), |
42 | LB, UB, Stride}; |
43 | |
44 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
45 | Call->setDebugLoc(DLGenerated); |
46 | } |
47 | |
48 | void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn, |
49 | Value *SubFnParam, |
50 | Value *LB, Value *UB, |
51 | Value *Stride) { |
52 | // Tell the runtime we start a parallel loop |
53 | createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); |
54 | CallInst *Call = Builder.CreateCall(Callee: SubFn, Args: SubFnParam); |
55 | Call->setDebugLoc(DLGenerated); |
56 | createCallJoinThreads(); |
57 | } |
58 | |
59 | Function *ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function *F) const { |
60 | FunctionType *FT = |
61 | FunctionType::get(Result: Builder.getVoidTy(), Params: {Builder.getPtrTy()}, isVarArg: false); |
62 | Function *SubFn = Function::Create(Ty: FT, Linkage: Function::InternalLinkage, |
63 | N: F->getName() + "_polly_subfn" , M); |
64 | // Name the function's arguments |
65 | SubFn->arg_begin()->setName("polly.par.userContext" ); |
66 | return SubFn; |
67 | } |
68 | |
69 | // Create a subfunction of the following (preliminary) structure: |
70 | // |
71 | // PrevBB |
72 | // | |
73 | // v |
74 | // HeaderBB |
75 | // | _____ |
76 | // v v | |
77 | // CheckNextBB PreHeaderBB |
78 | // |\ | |
79 | // | \______/ |
80 | // | |
81 | // v |
82 | // ExitBB |
83 | // |
84 | // HeaderBB will hold allocations and loading of variables. |
85 | // CheckNextBB will check for more work. |
86 | // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB. |
87 | // PreHeaderBB loads the new boundaries (& will lead to the loop body later on). |
88 | // ExitBB marks the end of the parallel execution. |
89 | std::tuple<Value *, Function *> |
90 | ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData, |
91 | SetVector<Value *> Data, |
92 | ValueMapT &Map) { |
93 | if (PollyScheduling != OMPGeneralSchedulingType::Runtime) { |
94 | // User tried to influence the scheduling type (currently not supported) |
95 | errs() << "warning: Polly's GNU OpenMP backend solely " |
96 | "supports the scheduling type 'runtime'.\n" ; |
97 | } |
98 | |
99 | if (PollyChunkSize != 0) { |
100 | // User tried to influence the chunk size (currently not supported) |
101 | errs() << "warning: Polly's GNU OpenMP backend solely " |
102 | "supports the default chunk size.\n" ; |
103 | } |
104 | |
105 | Function *SubFn = createSubFnDefinition(); |
106 | LLVMContext &Context = SubFn->getContext(); |
107 | |
108 | // Create basic blocks. |
109 | BasicBlock * = BasicBlock::Create(Context, Name: "polly.par.setup" , Parent: SubFn); |
110 | SubFnDT = std::make_unique<DominatorTree>(args&: *SubFn); |
111 | SubFnLI = std::make_unique<LoopInfo>(args&: *SubFnDT); |
112 | |
113 | BasicBlock *ExitBB = BasicBlock::Create(Context, Name: "polly.par.exit" , Parent: SubFn); |
114 | BasicBlock *CheckNextBB = |
115 | BasicBlock::Create(Context, Name: "polly.par.checkNext" , Parent: SubFn); |
116 | BasicBlock * = |
117 | BasicBlock::Create(Context, Name: "polly.par.loadIVBounds" , Parent: SubFn); |
118 | |
119 | SubFnDT->addNewBlock(BB: ExitBB, DomBB: HeaderBB); |
120 | SubFnDT->addNewBlock(BB: CheckNextBB, DomBB: HeaderBB); |
121 | SubFnDT->addNewBlock(BB: PreHeaderBB, DomBB: HeaderBB); |
122 | |
123 | // Fill up basic block HeaderBB. |
124 | Builder.SetInsertPoint(HeaderBB); |
125 | Value *LBPtr = Builder.CreateAlloca(Ty: LongType, ArraySize: nullptr, Name: "polly.par.LBPtr" ); |
126 | Value *UBPtr = Builder.CreateAlloca(Ty: LongType, ArraySize: nullptr, Name: "polly.par.UBPtr" ); |
127 | Value *UserContext = &*SubFn->arg_begin(); |
128 | |
129 | extractValuesFromStruct(Values: Data, Ty: StructData->getAllocatedType(), Struct: UserContext, |
130 | VMap&: Map); |
131 | Builder.CreateBr(Dest: CheckNextBB); |
132 | |
133 | // Add code to check if another set of iterations will be executed. |
134 | Builder.SetInsertPoint(CheckNextBB); |
135 | Value *Next = createCallGetWorkItem(LBPtr, UBPtr); |
136 | Value *HasNextSchedule = Builder.CreateTrunc( |
137 | V: Next, DestTy: Builder.getInt1Ty(), Name: "polly.par.hasNextScheduleBlock" ); |
138 | Builder.CreateCondBr(Cond: HasNextSchedule, True: PreHeaderBB, False: ExitBB); |
139 | |
140 | // Add code to load the iv bounds for this set of iterations. |
141 | Builder.SetInsertPoint(PreHeaderBB); |
142 | Value *LB = Builder.CreateLoad(Ty: LongType, Ptr: LBPtr, Name: "polly.par.LB" ); |
143 | Value *UB = Builder.CreateLoad(Ty: LongType, Ptr: UBPtr, Name: "polly.par.UB" ); |
144 | |
145 | // Subtract one as the upper bound provided by OpenMP is a < comparison |
146 | // whereas the codegenForSequential function creates a <= comparison. |
147 | UB = Builder.CreateSub(LHS: UB, RHS: ConstantInt::get(Ty: LongType, V: 1), |
148 | Name: "polly.par.UBAdjusted" ); |
149 | |
150 | Builder.CreateBr(Dest: CheckNextBB); |
151 | Builder.SetInsertPoint(--Builder.GetInsertPoint()); |
152 | BasicBlock *AfterBB; |
153 | Value *IV = |
154 | createLoop(LowerBound: LB, UpperBound: UB, Stride, Builder, LI&: *SubFnLI, DT&: *SubFnDT, ExitBlock&: AfterBB, |
155 | Predicate: ICmpInst::ICMP_SLE, Annotator: nullptr, Parallel: true, /* UseGuard */ false); |
156 | |
157 | BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); |
158 | |
159 | // Add code to terminate this subfunction. |
160 | Builder.SetInsertPoint(ExitBB); |
161 | createCallCleanupThread(); |
162 | Builder.CreateRetVoid(); |
163 | |
164 | Builder.SetInsertPoint(LoopBody); |
165 | |
166 | // FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the |
167 | // DominatorTree/LoopInfo has been created correctly. Alternatively, recreate |
168 | // from scratch since it is not needed here directly. |
169 | |
170 | return std::make_tuple(args&: IV, args&: SubFn); |
171 | } |
172 | |
173 | Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr, |
174 | Value *UBPtr) { |
175 | const std::string Name = "GOMP_loop_runtime_next" ; |
176 | |
177 | Function *F = M->getFunction(Name); |
178 | |
179 | // If F is not available, declare it. |
180 | if (!F) { |
181 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
182 | Type *Params[] = {Builder.getPtrTy(AddrSpace: 0), Builder.getPtrTy(AddrSpace: 0)}; |
183 | FunctionType *Ty = FunctionType::get(Result: Builder.getInt8Ty(), Params, isVarArg: false); |
184 | F = Function::Create(Ty, Linkage, N: Name, M); |
185 | } |
186 | |
187 | Value *Args[] = {LBPtr, UBPtr}; |
188 | CallInst *Call = Builder.CreateCall(Callee: F, Args); |
189 | Call->setDebugLoc(DLGenerated); |
190 | Value *Return = Builder.CreateICmpNE( |
191 | LHS: Call, RHS: Builder.CreateZExt(V: Builder.getFalse(), DestTy: Call->getType())); |
192 | return Return; |
193 | } |
194 | |
195 | void ParallelLoopGeneratorGOMP::createCallJoinThreads() { |
196 | const std::string Name = "GOMP_parallel_end" ; |
197 | |
198 | Function *F = M->getFunction(Name); |
199 | |
200 | // If F is not available, declare it. |
201 | if (!F) { |
202 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
203 | |
204 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), isVarArg: false); |
205 | F = Function::Create(Ty, Linkage, N: Name, M); |
206 | } |
207 | |
208 | CallInst *Call = Builder.CreateCall(Callee: F, Args: {}); |
209 | Call->setDebugLoc(DLGenerated); |
210 | } |
211 | |
212 | void ParallelLoopGeneratorGOMP::createCallCleanupThread() { |
213 | const std::string Name = "GOMP_loop_end_nowait" ; |
214 | |
215 | Function *F = M->getFunction(Name); |
216 | |
217 | // If F is not available, declare it. |
218 | if (!F) { |
219 | GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; |
220 | |
221 | FunctionType *Ty = FunctionType::get(Result: Builder.getVoidTy(), isVarArg: false); |
222 | F = Function::Create(Ty, Linkage, N: Name, M); |
223 | } |
224 | |
225 | CallInst *Call = Builder.CreateCall(Callee: F, Args: {}); |
226 | Call->setDebugLoc(DLGenerated); |
227 | } |
228 | |