1 | //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the OpenMPIRBuilder class and helpers used as a convenient |
10 | // way to create LLVM instructions for OpenMP directives. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H |
15 | #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H |
16 | |
17 | #include "llvm/Analysis/MemorySSAUpdater.h" |
18 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
19 | #include "llvm/IR/DebugLoc.h" |
20 | #include "llvm/IR/IRBuilder.h" |
21 | #include "llvm/Support/Allocator.h" |
22 | #include <forward_list> |
23 | |
24 | namespace llvm { |
25 | class CanonicalLoopInfo; |
26 | |
27 | /// Move the instruction after an InsertPoint to the beginning of another |
28 | /// BasicBlock. |
29 | /// |
30 | /// The instructions after \p IP are moved to the beginning of \p New which must |
31 | /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to |
32 | /// \p New will be added such that there is no semantic change. Otherwise, the |
33 | /// \p IP insert block remains degenerate and it is up to the caller to insert a |
34 | /// terminator. |
35 | void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, |
36 | bool CreateBranch); |
37 | |
38 | /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new |
39 | /// insert location will stick to after the instruction before the insertion |
40 | /// point (instead of moving with the instruction the InsertPoint stores |
41 | /// internally). |
42 | void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); |
43 | |
44 | /// Split a BasicBlock at an InsertPoint, even if the block is degenerate |
45 | /// (missing the terminator). |
46 | /// |
47 | /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed |
48 | /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch |
49 | /// is true, a branch to the new successor will new created such that |
50 | /// semantically there is no change; otherwise the block of the insertion point |
51 | /// remains degenerate and it is the caller's responsibility to insert a |
52 | /// terminator. Returns the new successor block. |
53 | BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, |
54 | llvm::Twine Name = {}); |
55 | |
56 | /// Split a BasicBlock at \p Builder's insertion point, even if the block is |
57 | /// degenerate (missing the terminator). Its new insert location will stick to |
58 | /// after the instruction before the insertion point (instead of moving with the |
59 | /// instruction the InsertPoint stores internally). |
60 | BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, |
61 | llvm::Twine Name = {}); |
62 | |
63 | /// Split a BasicBlock at \p Builder's insertion point, even if the block is |
64 | /// degenerate (missing the terminator). Its new insert location will stick to |
65 | /// after the instruction before the insertion point (instead of moving with the |
66 | /// instruction the InsertPoint stores internally). |
67 | BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); |
68 | |
69 | /// Like splitBB, but reuses the current block's name for the new name. |
70 | BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, |
71 | llvm::Twine Suffix = ".split" ); |
72 | |
73 | /// An interface to create LLVM-IR for OpenMP directives. |
74 | /// |
75 | /// Each OpenMP directive has a corresponding public generator method. |
76 | class OpenMPIRBuilder { |
77 | public: |
78 | /// Create a new OpenMPIRBuilder operating on the given module \p M. This will |
79 | /// not have an effect on \p M (see initialize). |
80 | OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} |
81 | ~OpenMPIRBuilder(); |
82 | |
83 | /// Initialize the internal state, this will put structures types and |
84 | /// potentially other helpers into the underlying module. Must be called |
85 | /// before any other method and only once! |
86 | void initialize(); |
87 | |
88 | /// Finalize the underlying module, e.g., by outlining regions. |
89 | /// \param Fn The function to be finalized. If not used, |
90 | /// all functions are finalized. |
91 | void finalize(Function *Fn = nullptr); |
92 | |
93 | /// Add attributes known for \p FnID to \p Fn. |
94 | void addAttributes(omp::RuntimeFunction FnID, Function &Fn); |
95 | |
96 | /// Type used throughout for insertion points. |
97 | using InsertPointTy = IRBuilder<>::InsertPoint; |
98 | |
99 | /// Callback type for variable finalization (think destructors). |
100 | /// |
101 | /// \param CodeGenIP is the insertion point at which the finalization code |
102 | /// should be placed. |
103 | /// |
104 | /// A finalize callback knows about all objects that need finalization, e.g. |
105 | /// destruction, when the scope of the currently generated construct is left |
106 | /// at the time, and location, the callback is invoked. |
107 | using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>; |
108 | |
109 | struct FinalizationInfo { |
110 | /// The finalization callback provided by the last in-flight invocation of |
111 | /// createXXXX for the directive of kind DK. |
112 | FinalizeCallbackTy FiniCB; |
113 | |
114 | /// The directive kind of the innermost directive that has an associated |
115 | /// region which might require finalization when it is left. |
116 | omp::Directive DK; |
117 | |
118 | /// Flag to indicate if the directive is cancellable. |
119 | bool IsCancellable; |
120 | }; |
121 | |
122 | /// Push a finalization callback on the finalization stack. |
123 | /// |
124 | /// NOTE: Temporary solution until Clang CG is gone. |
125 | void pushFinalizationCB(const FinalizationInfo &FI) { |
126 | FinalizationStack.push_back(FI); |
127 | } |
128 | |
129 | /// Pop the last finalization callback from the finalization stack. |
130 | /// |
131 | /// NOTE: Temporary solution until Clang CG is gone. |
132 | void popFinalizationCB() { FinalizationStack.pop_back(); } |
133 | |
134 | /// Callback type for body (=inner region) code generation |
135 | /// |
136 | /// The callback takes code locations as arguments, each describing a |
137 | /// location where additional instructions can be inserted. |
138 | /// |
139 | /// The CodeGenIP may be in the middle of a basic block or point to the end of |
140 | /// it. The basic block may have a terminator or be degenerate. The callback |
141 | /// function may just insert instructions at that position, but also split the |
142 | /// block (without the Before argument of BasicBlock::splitBasicBlock such |
143 | /// that the identify of the split predecessor block is preserved) and insert |
144 | /// additional control flow, including branches that do not lead back to what |
145 | /// follows the CodeGenIP. Note that since the callback is allowed to split |
146 | /// the block, callers must assume that InsertPoints to positions in the |
147 | /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If |
148 | /// such InsertPoints need to be preserved, it can split the block itself |
149 | /// before calling the callback. |
150 | /// |
151 | /// AllocaIP and CodeGenIP must not point to the same position. |
152 | /// |
153 | /// \param AllocaIP is the insertion point at which new alloca instructions |
154 | /// should be placed. The BasicBlock it is pointing to must |
155 | /// not be split. |
156 | /// \param CodeGenIP is the insertion point at which the body code should be |
157 | /// placed. |
158 | using BodyGenCallbackTy = |
159 | function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; |
160 | |
161 | // This is created primarily for sections construct as llvm::function_ref |
162 | // (BodyGenCallbackTy) is not storable (as described in the comments of |
163 | // function_ref class - function_ref contains non-ownable reference |
164 | // to the callable. |
165 | using StorableBodyGenCallbackTy = |
166 | std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; |
167 | |
168 | /// Callback type for loop body code generation. |
169 | /// |
170 | /// \param CodeGenIP is the insertion point where the loop's body code must be |
171 | /// placed. This will be a dedicated BasicBlock with a |
172 | /// conditional branch from the loop condition check and |
173 | /// terminated with an unconditional branch to the loop |
174 | /// latch. |
175 | /// \param IndVar is the induction variable usable at the insertion point. |
176 | using LoopBodyGenCallbackTy = |
177 | function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>; |
178 | |
179 | /// Callback type for variable privatization (think copy & default |
180 | /// constructor). |
181 | /// |
182 | /// \param AllocaIP is the insertion point at which new alloca instructions |
183 | /// should be placed. |
184 | /// \param CodeGenIP is the insertion point at which the privatization code |
185 | /// should be placed. |
186 | /// \param Original The value being copied/created, should not be used in the |
187 | /// generated IR. |
188 | /// \param Inner The equivalent of \p Original that should be used in the |
189 | /// generated IR; this is equal to \p Original if the value is |
190 | /// a pointer and can thus be passed directly, otherwise it is |
191 | /// an equivalent but different value. |
192 | /// \param ReplVal The replacement value, thus a copy or new created version |
193 | /// of \p Inner. |
194 | /// |
195 | /// \returns The new insertion point where code generation continues and |
196 | /// \p ReplVal the replacement value. |
197 | using PrivatizeCallbackTy = function_ref<InsertPointTy( |
198 | InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, |
199 | Value &Inner, Value *&ReplVal)>; |
200 | |
201 | /// Description of a LLVM-IR insertion point (IP) and a debug/source location |
202 | /// (filename, line, column, ...). |
203 | struct LocationDescription { |
204 | LocationDescription(const IRBuilderBase &IRB) |
205 | : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {} |
206 | LocationDescription(const InsertPointTy &IP) : IP(IP) {} |
207 | LocationDescription(const InsertPointTy &IP, const DebugLoc &DL) |
208 | : IP(IP), DL(DL) {} |
209 | InsertPointTy IP; |
210 | DebugLoc DL; |
211 | }; |
212 | |
213 | /// Emitter methods for OpenMP directives. |
214 | /// |
215 | ///{ |
216 | |
217 | /// Generator for '#omp barrier' |
218 | /// |
219 | /// \param Loc The location where the barrier directive was encountered. |
220 | /// \param DK The kind of directive that caused the barrier. |
221 | /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. |
222 | /// \param CheckCancelFlag Flag to indicate a cancel barrier return value |
223 | /// should be checked and acted upon. |
224 | /// |
225 | /// \returns The insertion point after the barrier. |
226 | InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, |
227 | bool ForceSimpleCall = false, |
228 | bool CheckCancelFlag = true); |
229 | |
230 | /// Generator for '#omp cancel' |
231 | /// |
232 | /// \param Loc The location where the directive was encountered. |
233 | /// \param IfCondition The evaluated 'if' clause expression, if any. |
234 | /// \param CanceledDirective The kind of directive that is cancled. |
235 | /// |
236 | /// \returns The insertion point after the barrier. |
237 | InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, |
238 | omp::Directive CanceledDirective); |
239 | |
240 | /// Generator for '#omp parallel' |
241 | /// |
242 | /// \param Loc The insert and source location description. |
243 | /// \param AllocaIP The insertion points to be used for alloca instructions. |
244 | /// \param BodyGenCB Callback that will generate the region code. |
245 | /// \param PrivCB Callback to copy a given variable (think copy constructor). |
246 | /// \param FiniCB Callback to finalize variable copies. |
247 | /// \param IfCondition The evaluated 'if' clause expression, if any. |
248 | /// \param NumThreads The evaluated 'num_threads' clause expression, if any. |
249 | /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). |
250 | /// \param IsCancellable Flag to indicate a cancellable parallel region. |
251 | /// |
252 | /// \returns The insertion position *after* the parallel. |
253 | IRBuilder<>::InsertPoint |
254 | createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, |
255 | BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, |
256 | FinalizeCallbackTy FiniCB, Value *IfCondition, |
257 | Value *NumThreads, omp::ProcBindKind ProcBind, |
258 | bool IsCancellable); |
259 | |
260 | /// Generator for the control flow structure of an OpenMP canonical loop. |
261 | /// |
262 | /// This generator operates on the logical iteration space of the loop, i.e. |
263 | /// the caller only has to provide a loop trip count of the loop as defined by |
264 | /// base language semantics. The trip count is interpreted as an unsigned |
265 | /// integer. The induction variable passed to \p BodyGenCB will be of the same |
266 | /// type and run from 0 to \p TripCount - 1. It is up to the callback to |
267 | /// convert the logical iteration variable to the loop counter variable in the |
268 | /// loop body. |
269 | /// |
270 | /// \param Loc The insert and source location description. The insert |
271 | /// location can be between two instructions or the end of a |
272 | /// degenerate block (e.g. a BB under construction). |
273 | /// \param BodyGenCB Callback that will generate the loop body code. |
274 | /// \param TripCount Number of iterations the loop body is executed. |
275 | /// \param Name Base name used to derive BB and instruction names. |
276 | /// |
277 | /// \returns An object representing the created control flow structure which |
278 | /// can be used for loop-associated directives. |
279 | CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, |
280 | LoopBodyGenCallbackTy BodyGenCB, |
281 | Value *TripCount, |
282 | const Twine &Name = "loop" ); |
283 | |
284 | /// Generator for the control flow structure of an OpenMP canonical loop. |
285 | /// |
286 | /// Instead of a logical iteration space, this allows specifying user-defined |
287 | /// loop counter values using increment, upper- and lower bounds. To |
288 | /// disambiguate the terminology when counting downwards, instead of lower |
289 | /// bounds we use \p Start for the loop counter value in the first body |
290 | /// iteration. |
291 | /// |
292 | /// Consider the following limitations: |
293 | /// |
294 | /// * A loop counter space over all integer values of its bit-width cannot be |
295 | /// represented. E.g using uint8_t, its loop trip count of 256 cannot be |
296 | /// stored into an 8 bit integer): |
297 | /// |
298 | /// DO I = 0, 255, 1 |
299 | /// |
300 | /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. |
301 | /// effectively counting downwards: |
302 | /// |
303 | /// for (uint8_t i = 100u; i > 0; i += 127u) |
304 | /// |
305 | /// |
306 | /// TODO: May need to add additional parameters to represent: |
307 | /// |
308 | /// * Allow representing downcounting with unsigned integers. |
309 | /// |
310 | /// * Sign of the step and the comparison operator might disagree: |
311 | /// |
312 | /// for (int i = 0; i < 42; i -= 1u) |
313 | /// |
314 | // |
315 | /// \param Loc The insert and source location description. |
316 | /// \param BodyGenCB Callback that will generate the loop body code. |
317 | /// \param Start Value of the loop counter for the first iterations. |
318 | /// \param Stop Loop counter values past this will stop the loop. |
319 | /// \param Step Loop counter increment after each iteration; negative |
320 | /// means counting down. |
321 | /// \param IsSigned Whether Start, Stop and Step are signed integers. |
322 | /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop |
323 | /// counter. |
324 | /// \param ComputeIP Insertion point for instructions computing the trip |
325 | /// count. Can be used to ensure the trip count is available |
326 | /// at the outermost loop of a loop nest. If not set, |
327 | /// defaults to the preheader of the generated loop. |
328 | /// \param Name Base name used to derive BB and instruction names. |
329 | /// |
330 | /// \returns An object representing the created control flow structure which |
331 | /// can be used for loop-associated directives. |
332 | CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, |
333 | LoopBodyGenCallbackTy BodyGenCB, |
334 | Value *Start, Value *Stop, Value *Step, |
335 | bool IsSigned, bool InclusiveStop, |
336 | InsertPointTy ComputeIP = {}, |
337 | const Twine &Name = "loop" ); |
338 | |
339 | /// Collapse a loop nest into a single loop. |
340 | /// |
341 | /// Merges loops of a loop nest into a single CanonicalLoopNest representation |
342 | /// that has the same number of innermost loop iterations as the origin loop |
343 | /// nest. The induction variables of the input loops are derived from the |
344 | /// collapsed loop's induction variable. This is intended to be used to |
345 | /// implement OpenMP's collapse clause. Before applying a directive, |
346 | /// collapseLoops normalizes a loop nest to contain only a single loop and the |
347 | /// directive's implementation does not need to handle multiple loops itself. |
348 | /// This does not remove the need to handle all loop nest handling by |
349 | /// directives, such as the ordered(<n>) clause or the simd schedule-clause |
350 | /// modifier of the worksharing-loop directive. |
351 | /// |
352 | /// Example: |
353 | /// \code |
354 | /// for (int i = 0; i < 7; ++i) // Canonical loop "i" |
355 | /// for (int j = 0; j < 9; ++j) // Canonical loop "j" |
356 | /// body(i, j); |
357 | /// \endcode |
358 | /// |
359 | /// After collapsing with Loops={i,j}, the loop is changed to |
360 | /// \code |
361 | /// for (int ij = 0; ij < 63; ++ij) { |
362 | /// int i = ij / 9; |
363 | /// int j = ij % 9; |
364 | /// body(i, j); |
365 | /// } |
366 | /// \endcode |
367 | /// |
368 | /// In the current implementation, the following limitations apply: |
369 | /// |
370 | /// * All input loops have an induction variable of the same type. |
371 | /// |
372 | /// * The collapsed loop will have the same trip count integer type as the |
373 | /// input loops. Therefore it is possible that the collapsed loop cannot |
374 | /// represent all iterations of the input loops. For instance, assuming a |
375 | /// 32 bit integer type, and two input loops both iterating 2^16 times, the |
376 | /// theoretical trip count of the collapsed loop would be 2^32 iteration, |
377 | /// which cannot be represented in an 32-bit integer. Behavior is undefined |
378 | /// in this case. |
379 | /// |
380 | /// * The trip counts of every input loop must be available at \p ComputeIP. |
381 | /// Non-rectangular loops are not yet supported. |
382 | /// |
383 | /// * At each nest level, code between a surrounding loop and its nested loop |
384 | /// is hoisted into the loop body, and such code will be executed more |
385 | /// often than before collapsing (or not at all if any inner loop iteration |
386 | /// has a trip count of 0). This is permitted by the OpenMP specification. |
387 | /// |
388 | /// \param DL Debug location for instructions added for collapsing, |
389 | /// such as instructions to compute/derive the input loop's |
390 | /// induction variables. |
391 | /// \param Loops Loops in the loop nest to collapse. Loops are specified |
392 | /// from outermost-to-innermost and every control flow of a |
393 | /// loop's body must pass through its directly nested loop. |
394 | /// \param ComputeIP Where additional instruction that compute the collapsed |
395 | /// trip count. If not set, defaults to before the generated |
396 | /// loop. |
397 | /// |
398 | /// \returns The CanonicalLoopInfo object representing the collapsed loop. |
399 | CanonicalLoopInfo *collapseLoops(DebugLoc DL, |
400 | ArrayRef<CanonicalLoopInfo *> Loops, |
401 | InsertPointTy ComputeIP); |
402 | |
403 | private: |
404 | /// Modifies the canonical loop to be a statically-scheduled workshare loop. |
405 | /// |
406 | /// This takes a \p LoopInfo representing a canonical loop, such as the one |
407 | /// created by \p createCanonicalLoop and emits additional instructions to |
408 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
409 | /// runtime function in the preheader to obtain the loop bounds to be used in |
410 | /// the current thread, updates the relevant instructions in the canonical |
411 | /// loop and calls to an OpenMP runtime finalization function after the loop. |
412 | /// |
413 | /// \param DL Debug location for instructions added for the |
414 | /// workshare-loop construct itself. |
415 | /// \param CLI A descriptor of the canonical loop to workshare. |
416 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
417 | /// preheader of the loop. |
418 | /// \param NeedsBarrier Indicates whether a barrier must be inserted after |
419 | /// the loop. |
420 | /// |
421 | /// \returns Point where to insert code after the workshare construct. |
422 | InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, |
423 | InsertPointTy AllocaIP, |
424 | bool NeedsBarrier); |
425 | |
426 | /// Modifies the canonical loop a statically-scheduled workshare loop with a |
427 | /// user-specified chunk size. |
428 | /// |
429 | /// \param DL Debug location for instructions added for the |
430 | /// workshare-loop construct itself. |
431 | /// \param CLI A descriptor of the canonical loop to workshare. |
432 | /// \param AllocaIP An insertion point for Alloca instructions usable in |
433 | /// the preheader of the loop. |
434 | /// \param NeedsBarrier Indicates whether a barrier must be inserted after the |
435 | /// loop. |
436 | /// \param ChunkSize The user-specified chunk size. |
437 | /// |
438 | /// \returns Point where to insert code after the workshare construct. |
439 | InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, |
440 | CanonicalLoopInfo *CLI, |
441 | InsertPointTy AllocaIP, |
442 | bool NeedsBarrier, |
443 | Value *ChunkSize); |
444 | |
445 | /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. |
446 | /// |
447 | /// This takes a \p LoopInfo representing a canonical loop, such as the one |
448 | /// created by \p createCanonicalLoop and emits additional instructions to |
449 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
450 | /// runtime function in the preheader to obtain, and then in each iteration |
451 | /// to update the loop counter. |
452 | /// |
453 | /// \param DL Debug location for instructions added for the |
454 | /// workshare-loop construct itself. |
455 | /// \param CLI A descriptor of the canonical loop to workshare. |
456 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
457 | /// preheader of the loop. |
458 | /// \param SchedType Type of scheduling to be passed to the init function. |
459 | /// \param NeedsBarrier Indicates whether a barrier must be insterted after |
460 | /// the loop. |
461 | /// \param Chunk The size of loop chunk considered as a unit when |
462 | /// scheduling. If \p nullptr, defaults to 1. |
463 | /// |
464 | /// \returns Point where to insert code after the workshare construct. |
465 | InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, |
466 | InsertPointTy AllocaIP, |
467 | omp::OMPScheduleType SchedType, |
468 | bool NeedsBarrier, |
469 | Value *Chunk = nullptr); |
470 | |
471 | /// Create alternative version of the loop to support if clause |
472 | /// |
473 | /// OpenMP if clause can require to generate second loop. This loop |
474 | /// will be executed when if clause condition is not met. createIfVersion |
475 | /// adds branch instruction to the copied loop if \p ifCond is not met. |
476 | /// |
477 | /// \param Loop Original loop which should be versioned. |
478 | /// \param IfCond Value which corresponds to if clause condition |
479 | /// \param VMap Value to value map to define relation between |
480 | /// original and copied loop values and loop blocks. |
481 | /// \param NamePrefix Optional name prefix for if.then if.else blocks. |
482 | void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond, |
483 | ValueToValueMapTy &VMap, const Twine &NamePrefix = "" ); |
484 | |
485 | public: |
486 | /// Modifies the canonical loop to be a workshare loop. |
487 | /// |
488 | /// This takes a \p LoopInfo representing a canonical loop, such as the one |
489 | /// created by \p createCanonicalLoop and emits additional instructions to |
490 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
491 | /// runtime function in the preheader to obtain the loop bounds to be used in |
492 | /// the current thread, updates the relevant instructions in the canonical |
493 | /// loop and calls to an OpenMP runtime finalization function after the loop. |
494 | /// |
495 | /// The concrete transformation is done by applyStaticWorkshareLoop, |
496 | /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending |
497 | /// on the value of \p SchedKind and \p ChunkSize. |
498 | /// |
499 | /// \param DL Debug location for instructions added for the |
500 | /// workshare-loop construct itself. |
501 | /// \param CLI A descriptor of the canonical loop to workshare. |
502 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
503 | /// preheader of the loop. |
504 | /// \param NeedsBarrier Indicates whether a barrier must be insterted after |
505 | /// the loop. |
506 | /// \param SchedKind Scheduling algorithm to use. |
507 | /// \param ChunkSize The chunk size for the inner loop. |
508 | /// \param HasSimdModifier Whether the simd modifier is present in the |
509 | /// schedule clause. |
510 | /// \param HasMonotonicModifier Whether the monotonic modifier is present in |
511 | /// the schedule clause. |
512 | /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is |
513 | /// present in the schedule clause. |
514 | /// \param HasOrderedClause Whether the (parameterless) ordered clause is |
515 | /// present. |
516 | /// |
517 | /// \returns Point where to insert code after the workshare construct. |
518 | InsertPointTy applyWorkshareLoop( |
519 | DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, |
520 | bool NeedsBarrier, |
521 | llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, |
522 | Value *ChunkSize = nullptr, bool HasSimdModifier = false, |
523 | bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false, |
524 | bool HasOrderedClause = false); |
525 | |
526 | /// Tile a loop nest. |
527 | /// |
528 | /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in |
529 | /// \p/ Loops must be perfectly nested, from outermost to innermost loop |
530 | /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value |
531 | /// of every loop and every tile sizes must be usable in the outermost |
532 | /// loop's preheader. This implies that the loop nest is rectangular. |
533 | /// |
534 | /// Example: |
535 | /// \code |
536 | /// for (int i = 0; i < 15; ++i) // Canonical loop "i" |
537 | /// for (int j = 0; j < 14; ++j) // Canonical loop "j" |
538 | /// body(i, j); |
539 | /// \endcode |
540 | /// |
541 | /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to |
542 | /// \code |
543 | /// for (int i1 = 0; i1 < 3; ++i1) |
544 | /// for (int j1 = 0; j1 < 2; ++j1) |
545 | /// for (int i2 = 0; i2 < 5; ++i2) |
546 | /// for (int j2 = 0; j2 < 7; ++j2) |
547 | /// body(i1*3+i2, j1*3+j2); |
548 | /// \endcode |
549 | /// |
550 | /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are |
551 | /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also |
552 | /// handles non-constant trip counts, non-constant tile sizes and trip counts |
553 | /// that are not multiples of the tile size. In the latter case the tile loop |
554 | /// of the last floor-loop iteration will have fewer iterations than specified |
555 | /// as its tile size. |
556 | /// |
557 | /// |
558 | /// @param DL Debug location for instructions added by tiling, for |
559 | /// instance the floor- and tile trip count computation. |
560 | /// @param Loops Loops to tile. The CanonicalLoopInfo objects are |
561 | /// invalidated by this method, i.e. should not used after |
562 | /// tiling. |
563 | /// @param TileSizes For each loop in \p Loops, the tile size for that |
564 | /// dimensions. |
565 | /// |
566 | /// \returns A list of generated loops. Contains twice as many loops as the |
567 | /// input loop nest; the first half are the floor loops and the |
568 | /// second half are the tile loops. |
569 | std::vector<CanonicalLoopInfo *> |
570 | tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, |
571 | ArrayRef<Value *> TileSizes); |
572 | |
573 | /// Fully unroll a loop. |
574 | /// |
575 | /// Instead of unrolling the loop immediately (and duplicating its body |
576 | /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop |
577 | /// metadata. |
578 | /// |
579 | /// \param DL Debug location for instructions added by unrolling. |
580 | /// \param Loop The loop to unroll. The loop will be invalidated. |
581 | void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); |
582 | |
583 | /// Fully or partially unroll a loop. How the loop is unrolled is determined |
584 | /// using LLVM's LoopUnrollPass. |
585 | /// |
586 | /// \param DL Debug location for instructions added by unrolling. |
587 | /// \param Loop The loop to unroll. The loop will be invalidated. |
588 | void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); |
589 | |
590 | /// Partially unroll a loop. |
591 | /// |
592 | /// The CanonicalLoopInfo of the unrolled loop for use with chained |
593 | /// loop-associated directive can be requested using \p UnrolledCLI. Not |
594 | /// needing the CanonicalLoopInfo allows more efficient code generation by |
595 | /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. |
596 | /// A loop-associated directive applied to the unrolled loop needs to know the |
597 | /// new trip count which means that if using a heuristically determined unroll |
598 | /// factor (\p Factor == 0), that factor must be computed immediately. We are |
599 | /// using the same logic as the LoopUnrollPass to derived the unroll factor, |
600 | /// but which assumes that some canonicalization has taken place (e.g. |
601 | /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform |
602 | /// better when the unrolled loop's CanonicalLoopInfo is not needed. |
603 | /// |
604 | /// \param DL Debug location for instructions added by unrolling. |
605 | /// \param Loop The loop to unroll. The loop will be invalidated. |
606 | /// \param Factor The factor to unroll the loop by. A factor of 0 |
607 | /// indicates that a heuristic should be used to determine |
608 | /// the unroll-factor. |
609 | /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the |
610 | /// partially unrolled loop. Otherwise, uses loop metadata |
611 | /// to defer unrolling to the LoopUnrollPass. |
612 | void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, |
613 | CanonicalLoopInfo **UnrolledCLI); |
614 | |
615 | /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop |
616 | /// is cloned. The metadata which prevents vectorization is added to |
617 | /// to the cloned loop. The cloned loop is executed when ifCond is evaluated |
618 | /// to false. |
619 | /// |
620 | /// \param Loop The loop to simd-ize. |
621 | /// \param IfCond The value which corresponds to the if clause condition. |
622 | /// \param Simdlen The Simdlen length to apply to the simd loop. |
623 | void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, ConstantInt *Simdlen); |
624 | |
625 | /// Generator for '#omp flush' |
626 | /// |
627 | /// \param Loc The location where the flush directive was encountered |
628 | void createFlush(const LocationDescription &Loc); |
629 | |
630 | /// Generator for '#omp taskwait' |
631 | /// |
632 | /// \param Loc The location where the taskwait directive was encountered. |
633 | void createTaskwait(const LocationDescription &Loc); |
634 | |
635 | /// Generator for '#omp taskyield' |
636 | /// |
637 | /// \param Loc The location where the taskyield directive was encountered. |
638 | void createTaskyield(const LocationDescription &Loc); |
639 | |
640 | /// Generator for `#omp task` |
641 | /// |
642 | /// \param Loc The location where the task construct was encountered. |
643 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
644 | /// \param BodyGenCB Callback that will generate the region code. |
645 | /// \param Tied True if the task is tied, false if the task is untied. |
646 | /// \param Final i1 value which is `true` if the task is final, `false` if the |
647 | /// task is not final. |
648 | InsertPointTy createTask(const LocationDescription &Loc, |
649 | InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, |
650 | bool Tied = true, Value *Final = nullptr); |
651 | |
652 | /// Generator for the taskgroup construct |
653 | /// |
654 | /// \param Loc The location where the taskgroup construct was encountered. |
655 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
656 | /// \param BodyGenCB Callback that will generate the region code. |
657 | InsertPointTy createTaskgroup(const LocationDescription &Loc, |
658 | InsertPointTy AllocaIP, |
659 | BodyGenCallbackTy BodyGenCB); |
660 | |
661 | /// Functions used to generate reductions. Such functions take two Values |
662 | /// representing LHS and RHS of the reduction, respectively, and a reference |
663 | /// to the value that is updated to refer to the reduction result. |
664 | using ReductionGenTy = |
665 | function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>; |
666 | |
667 | /// Functions used to generate atomic reductions. Such functions take two |
668 | /// Values representing pointers to LHS and RHS of the reduction, as well as |
669 | /// the element type of these pointers. They are expected to atomically |
670 | /// update the LHS to the reduced value. |
671 | using AtomicReductionGenTy = |
672 | function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>; |
673 | |
674 | /// Information about an OpenMP reduction. |
675 | struct ReductionInfo { |
676 | ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, |
677 | ReductionGenTy ReductionGen, |
678 | AtomicReductionGenTy AtomicReductionGen) |
679 | : ElementType(ElementType), Variable(Variable), |
680 | PrivateVariable(PrivateVariable), ReductionGen(ReductionGen), |
681 | AtomicReductionGen(AtomicReductionGen) { |
682 | assert(cast<PointerType>(Variable->getType()) |
683 | ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type" ); |
684 | } |
685 | |
686 | /// Reduction element type, must match pointee type of variable. |
687 | Type *ElementType; |
688 | |
689 | /// Reduction variable of pointer type. |
690 | Value *Variable; |
691 | |
692 | /// Thread-private partial reduction variable. |
693 | Value *PrivateVariable; |
694 | |
695 | /// Callback for generating the reduction body. The IR produced by this will |
696 | /// be used to combine two values in a thread-safe context, e.g., under |
697 | /// lock or within the same thread, and therefore need not be atomic. |
698 | ReductionGenTy ReductionGen; |
699 | |
700 | /// Callback for generating the atomic reduction body, may be null. The IR |
701 | /// produced by this will be used to atomically combine two values during |
702 | /// reduction. If null, the implementation will use the non-atomic version |
703 | /// along with the appropriate synchronization mechanisms. |
704 | AtomicReductionGenTy AtomicReductionGen; |
705 | }; |
706 | |
707 | // TODO: provide atomic and non-atomic reduction generators for reduction |
708 | // operators defined by the OpenMP specification. |
709 | |
710 | /// Generator for '#omp reduction'. |
711 | /// |
712 | /// Emits the IR instructing the runtime to perform the specific kind of |
713 | /// reductions. Expects reduction variables to have been privatized and |
714 | /// initialized to reduction-neutral values separately. Emits the calls to |
715 | /// runtime functions as well as the reduction function and the basic blocks |
716 | /// performing the reduction atomically and non-atomically. |
717 | /// |
718 | /// The code emitted for the following: |
719 | /// |
720 | /// \code |
721 | /// type var_1; |
722 | /// type var_2; |
723 | /// #pragma omp <directive> reduction(reduction-op:var_1,var_2) |
724 | /// /* body */; |
725 | /// \endcode |
726 | /// |
727 | /// corresponds to the following sketch. |
728 | /// |
729 | /// \code |
730 | /// void _outlined_par() { |
731 | /// // N is the number of different reductions. |
732 | /// void *red_array[] = {privatized_var_1, privatized_var_2, ...}; |
733 | /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array, |
734 | /// _omp_reduction_func, |
735 | /// _gomp_critical_user.reduction.var)) { |
736 | /// case 1: { |
737 | /// var_1 = var_1 <reduction-op> privatized_var_1; |
738 | /// var_2 = var_2 <reduction-op> privatized_var_2; |
739 | /// // ... |
740 | /// __kmpc_end_reduce(...); |
741 | /// break; |
742 | /// } |
743 | /// case 2: { |
744 | /// _Atomic<ReductionOp>(var_1, privatized_var_1); |
745 | /// _Atomic<ReductionOp>(var_2, privatized_var_2); |
746 | /// // ... |
747 | /// break; |
748 | /// } |
749 | /// default: break; |
750 | /// } |
751 | /// } |
752 | /// |
753 | /// void _omp_reduction_func(void **lhs, void **rhs) { |
754 | /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0]; |
755 | /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1]; |
756 | /// // ... |
757 | /// } |
758 | /// \endcode |
759 | /// |
760 | /// \param Loc The location where the reduction was |
761 | /// encountered. Must be within the associate |
762 | /// directive and after the last local access to the |
763 | /// reduction variables. |
764 | /// \param AllocaIP An insertion point suitable for allocas usable |
765 | /// in reductions. |
766 | /// \param ReductionInfos A list of info on each reduction variable. |
767 | /// \param IsNoWait A flag set if the reduction is marked as nowait. |
768 | InsertPointTy createReductions(const LocationDescription &Loc, |
769 | InsertPointTy AllocaIP, |
770 | ArrayRef<ReductionInfo> ReductionInfos, |
771 | bool IsNoWait = false); |
772 | |
773 | ///} |
774 | |
775 | /// Return the insertion point used by the underlying IRBuilder. |
776 | InsertPointTy getInsertionPoint() { return Builder.saveIP(); } |
777 | |
778 | /// Update the internal location to \p Loc. |
779 | bool updateToLocation(const LocationDescription &Loc) { |
780 | Builder.restoreIP(Loc.IP); |
781 | Builder.SetCurrentDebugLocation(Loc.DL); |
782 | return Loc.IP.getBlock() != nullptr; |
783 | } |
784 | |
785 | /// Return the function declaration for the runtime function with \p FnID. |
786 | FunctionCallee getOrCreateRuntimeFunction(Module &M, |
787 | omp::RuntimeFunction FnID); |
788 | |
789 | Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); |
790 | |
791 | /// Return the (LLVM-IR) string describing the source location \p LocStr. |
792 | Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); |
793 | |
794 | /// Return the (LLVM-IR) string describing the default source location. |
795 | Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); |
796 | |
797 | /// Return the (LLVM-IR) string describing the source location identified by |
798 | /// the arguments. |
799 | Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, |
800 | unsigned Line, unsigned Column, |
801 | uint32_t &SrcLocStrSize); |
802 | |
803 | /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as |
804 | /// fallback if \p DL does not specify the function name. |
805 | Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, |
806 | Function *F = nullptr); |
807 | |
808 | /// Return the (LLVM-IR) string describing the source location \p Loc. |
809 | Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, |
810 | uint32_t &SrcLocStrSize); |
811 | |
812 | /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. |
813 | /// TODO: Create a enum class for the Reserve2Flags |
814 | Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, |
815 | omp::IdentFlag Flags = omp::IdentFlag(0), |
816 | unsigned Reserve2Flags = 0); |
817 | |
818 | /// Create a hidden global flag \p Name in the module with initial value \p |
819 | /// Value. |
820 | GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); |
821 | |
822 | /// Create an offloading section struct used to register this global at |
823 | /// runtime. |
824 | /// |
825 | /// Type struct __tgt_offload_entry{ |
826 | /// void *addr; // Pointer to the offload entry info. |
827 | /// // (function or global) |
828 | /// char *name; // Name of the function or global. |
829 | /// size_t size; // Size of the entry info (0 if it a function). |
830 | /// int32_t flags; |
831 | /// int32_t reserved; |
832 | /// }; |
833 | /// |
834 | /// \param Addr The pointer to the global being registered. |
835 | /// \param Name The symbol name associated with the global. |
836 | /// \param Size The size in bytes of the global (0 for functions). |
837 | /// \param Flags Flags associated with the entry. |
838 | /// \param SectionName The section this entry will be placed at. |
839 | void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, |
840 | int32_t Flags, |
841 | StringRef SectionName = "omp_offloading_entries" ); |
842 | |
843 | /// Generate control flow and cleanup for cancellation. |
844 | /// |
845 | /// \param CancelFlag Flag indicating if the cancellation is performed. |
846 | /// \param CanceledDirective The kind of directive that is cancled. |
847 | /// \param ExitCB Extra code to be generated in the exit block. |
848 | void emitCancelationCheckImpl(Value *CancelFlag, |
849 | omp::Directive CanceledDirective, |
850 | FinalizeCallbackTy ExitCB = {}); |
851 | |
852 | /// Generate a target region entry call. |
853 | /// |
854 | /// \param Loc The location at which the request originated and is fulfilled. |
855 | /// \param Return Return value of the created function returned by reference. |
856 | /// \param DeviceID Identifier for the device via the 'device' clause. |
857 | /// \param NumTeams Numer of teams for the region via the 'num_teams' clause |
858 | /// or 0 if unspecified and -1 if there is no 'teams' clause. |
859 | /// \param NumThreads Number of threads via the 'thread_limit' clause. |
860 | /// \param HostPtr Pointer to the host-side pointer of the target kernel. |
861 | /// \param KernelArgs Array of arguments to the kernel. |
862 | /// \param NoWaitArgs Optional array of arguments to the nowait kernel. |
863 | InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return, |
864 | Value *Ident, Value *DeviceID, Value *NumTeams, |
865 | Value *NumThreads, Value *HostPtr, |
866 | ArrayRef<Value *> KernelArgs, |
867 | ArrayRef<Value *> NoWaitArgs = {}); |
868 | |
869 | /// Generate a barrier runtime call. |
870 | /// |
871 | /// \param Loc The location at which the request originated and is fulfilled. |
872 | /// \param DK The directive which caused the barrier |
873 | /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. |
874 | /// \param CheckCancelFlag Flag to indicate a cancel barrier return value |
875 | /// should be checked and acted upon. |
876 | /// |
877 | /// \returns The insertion point after the barrier. |
878 | InsertPointTy emitBarrierImpl(const LocationDescription &Loc, |
879 | omp::Directive DK, bool ForceSimpleCall, |
880 | bool CheckCancelFlag); |
881 | |
882 | /// Generate a flush runtime call. |
883 | /// |
884 | /// \param Loc The location at which the request originated and is fulfilled. |
885 | void emitFlush(const LocationDescription &Loc); |
886 | |
887 | /// The finalization stack made up of finalize callbacks currently in-flight, |
888 | /// wrapped into FinalizationInfo objects that reference also the finalization |
889 | /// target block and the kind of cancellable directive. |
890 | SmallVector<FinalizationInfo, 8> FinalizationStack; |
891 | |
892 | /// Return true if the last entry in the finalization stack is of kind \p DK |
893 | /// and cancellable. |
894 | bool isLastFinalizationInfoCancellable(omp::Directive DK) { |
895 | return !FinalizationStack.empty() && |
896 | FinalizationStack.back().IsCancellable && |
897 | FinalizationStack.back().DK == DK; |
898 | } |
899 | |
900 | /// Generate a taskwait runtime call. |
901 | /// |
902 | /// \param Loc The location at which the request originated and is fulfilled. |
903 | void emitTaskwaitImpl(const LocationDescription &Loc); |
904 | |
905 | /// Generate a taskyield runtime call. |
906 | /// |
907 | /// \param Loc The location at which the request originated and is fulfilled. |
908 | void emitTaskyieldImpl(const LocationDescription &Loc); |
909 | |
910 | /// Return the current thread ID. |
911 | /// |
912 | /// \param Ident The ident (ident_t*) describing the query origin. |
913 | Value *getOrCreateThreadID(Value *Ident); |
914 | |
915 | /// The underlying LLVM-IR module |
916 | Module &M; |
917 | |
918 | /// The LLVM-IR Builder used to create IR. |
919 | IRBuilder<> Builder; |
920 | |
921 | /// Map to remember source location strings |
922 | StringMap<Constant *> SrcLocStrMap; |
923 | |
924 | /// Map to remember existing ident_t*. |
925 | DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap; |
926 | |
927 | /// Helper that contains information about regions we need to outline |
928 | /// during finalization. |
929 | struct OutlineInfo { |
930 | using PostOutlineCBTy = std::function<void(Function &)>; |
931 | PostOutlineCBTy PostOutlineCB; |
932 | BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; |
933 | SmallVector<Value *, 2> ExcludeArgsFromAggregate; |
934 | |
935 | /// Collect all blocks in between EntryBB and ExitBB in both the given |
936 | /// vector and set. |
937 | void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet, |
938 | SmallVectorImpl<BasicBlock *> &BlockVector); |
939 | |
940 | /// Return the function that contains the region to be outlined. |
941 | Function *getFunction() const { return EntryBB->getParent(); } |
942 | }; |
943 | |
944 | /// Collection of regions that need to be outlined during finalization. |
945 | SmallVector<OutlineInfo, 16> OutlineInfos; |
946 | |
947 | /// Collection of owned canonical loop objects that eventually need to be |
948 | /// free'd. |
949 | std::forward_list<CanonicalLoopInfo> LoopInfos; |
950 | |
951 | /// Add a new region that will be outlined later. |
952 | void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } |
953 | |
954 | /// An ordered map of auto-generated variables to their unique names. |
955 | /// It stores variables with the following names: 1) ".gomp_critical_user_" + |
956 | /// <critical_section_name> + ".var" for "omp critical" directives; 2) |
957 | /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate |
958 | /// variables. |
959 | StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars; |
960 | |
961 | /// Create the global variable holding the offload mappings information. |
962 | GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, |
963 | std::string VarName); |
964 | |
965 | /// Create the global variable holding the offload names information. |
966 | GlobalVariable * |
967 | createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, |
968 | std::string VarName); |
969 | |
970 | struct MapperAllocas { |
971 | AllocaInst *ArgsBase = nullptr; |
972 | AllocaInst *Args = nullptr; |
973 | AllocaInst *ArgSizes = nullptr; |
974 | }; |
975 | |
976 | /// Create the allocas instruction used in call to mapper functions. |
977 | void createMapperAllocas(const LocationDescription &Loc, |
978 | InsertPointTy AllocaIP, unsigned NumOperands, |
979 | struct MapperAllocas &MapperAllocas); |
980 | |
981 | /// Create the call for the target mapper function. |
982 | /// \param Loc The source location description. |
983 | /// \param MapperFunc Function to be called. |
984 | /// \param SrcLocInfo Source location information global. |
985 | /// \param MaptypesArg The argument types. |
986 | /// \param MapnamesArg The argument names. |
987 | /// \param MapperAllocas The AllocaInst used for the call. |
988 | /// \param DeviceID Device ID for the call. |
989 | /// \param NumOperands Number of operands in the call. |
990 | void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, |
991 | Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, |
992 | struct MapperAllocas &MapperAllocas, int64_t DeviceID, |
993 | unsigned NumOperands); |
994 | |
995 | public: |
996 | /// Generator for __kmpc_copyprivate |
997 | /// |
998 | /// \param Loc The source location description. |
999 | /// \param BufSize Number of elements in the buffer. |
1000 | /// \param CpyBuf List of pointers to data to be copied. |
1001 | /// \param CpyFn function to call for copying data. |
1002 | /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise. |
1003 | /// |
1004 | /// \return The insertion position *after* the CopyPrivate call. |
1005 | |
1006 | InsertPointTy createCopyPrivate(const LocationDescription &Loc, |
1007 | llvm::Value *BufSize, llvm::Value *CpyBuf, |
1008 | llvm::Value *CpyFn, llvm::Value *DidIt); |
1009 | |
1010 | /// Generator for '#omp single' |
1011 | /// |
1012 | /// \param Loc The source location description. |
1013 | /// \param BodyGenCB Callback that will generate the region code. |
1014 | /// \param FiniCB Callback to finalize variable copies. |
1015 | /// \param IsNowait If false, a barrier is emitted. |
1016 | /// \param DidIt Local variable used as a flag to indicate 'single' thread |
1017 | /// |
1018 | /// \returns The insertion position *after* the single call. |
1019 | InsertPointTy createSingle(const LocationDescription &Loc, |
1020 | BodyGenCallbackTy BodyGenCB, |
1021 | FinalizeCallbackTy FiniCB, bool IsNowait, |
1022 | llvm::Value *DidIt); |
1023 | |
1024 | /// Generator for '#omp master' |
1025 | /// |
1026 | /// \param Loc The insert and source location description. |
1027 | /// \param BodyGenCB Callback that will generate the region code. |
1028 | /// \param FiniCB Callback to finalize variable copies. |
1029 | /// |
1030 | /// \returns The insertion position *after* the master. |
1031 | InsertPointTy createMaster(const LocationDescription &Loc, |
1032 | BodyGenCallbackTy BodyGenCB, |
1033 | FinalizeCallbackTy FiniCB); |
1034 | |
1035 | /// Generator for '#omp masked' |
1036 | /// |
1037 | /// \param Loc The insert and source location description. |
1038 | /// \param BodyGenCB Callback that will generate the region code. |
1039 | /// \param FiniCB Callback to finialize variable copies. |
1040 | /// |
1041 | /// \returns The insertion position *after* the masked. |
1042 | InsertPointTy createMasked(const LocationDescription &Loc, |
1043 | BodyGenCallbackTy BodyGenCB, |
1044 | FinalizeCallbackTy FiniCB, Value *Filter); |
1045 | |
1046 | /// Generator for '#omp critical' |
1047 | /// |
1048 | /// \param Loc The insert and source location description. |
1049 | /// \param BodyGenCB Callback that will generate the region body code. |
1050 | /// \param FiniCB Callback to finalize variable copies. |
1051 | /// \param CriticalName name of the lock used by the critical directive |
1052 | /// \param HintInst Hint Instruction for hint clause associated with critical |
1053 | /// |
1054 | /// \returns The insertion position *after* the critical. |
1055 | InsertPointTy createCritical(const LocationDescription &Loc, |
1056 | BodyGenCallbackTy BodyGenCB, |
1057 | FinalizeCallbackTy FiniCB, |
1058 | StringRef CriticalName, Value *HintInst); |
1059 | |
1060 | /// Generator for '#omp ordered depend (source | sink)' |
1061 | /// |
1062 | /// \param Loc The insert and source location description. |
1063 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
1064 | /// \param NumLoops The number of loops in depend clause. |
1065 | /// \param StoreValues The value will be stored in vector address. |
1066 | /// \param Name The name of alloca instruction. |
1067 | /// \param IsDependSource If true, depend source; otherwise, depend sink. |
1068 | /// |
1069 | /// \return The insertion position *after* the ordered. |
1070 | InsertPointTy createOrderedDepend(const LocationDescription &Loc, |
1071 | InsertPointTy AllocaIP, unsigned NumLoops, |
1072 | ArrayRef<llvm::Value *> StoreValues, |
1073 | const Twine &Name, bool IsDependSource); |
1074 | |
1075 | /// Generator for '#omp ordered [threads | simd]' |
1076 | /// |
1077 | /// \param Loc The insert and source location description. |
1078 | /// \param BodyGenCB Callback that will generate the region code. |
1079 | /// \param FiniCB Callback to finalize variable copies. |
1080 | /// \param IsThreads If true, with threads clause or without clause; |
1081 | /// otherwise, with simd clause; |
1082 | /// |
1083 | /// \returns The insertion position *after* the ordered. |
1084 | InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, |
1085 | BodyGenCallbackTy BodyGenCB, |
1086 | FinalizeCallbackTy FiniCB, |
1087 | bool IsThreads); |
1088 | |
1089 | /// Generator for '#omp sections' |
1090 | /// |
1091 | /// \param Loc The insert and source location description. |
1092 | /// \param AllocaIP The insertion points to be used for alloca instructions. |
1093 | /// \param SectionCBs Callbacks that will generate body of each section. |
1094 | /// \param PrivCB Callback to copy a given variable (think copy constructor). |
1095 | /// \param FiniCB Callback to finalize variable copies. |
1096 | /// \param IsCancellable Flag to indicate a cancellable parallel region. |
1097 | /// \param IsNowait If true, barrier - to ensure all sections are executed |
1098 | /// before moving forward will not be generated. |
1099 | /// \returns The insertion position *after* the sections. |
1100 | InsertPointTy createSections(const LocationDescription &Loc, |
1101 | InsertPointTy AllocaIP, |
1102 | ArrayRef<StorableBodyGenCallbackTy> SectionCBs, |
1103 | PrivatizeCallbackTy PrivCB, |
1104 | FinalizeCallbackTy FiniCB, bool IsCancellable, |
1105 | bool IsNowait); |
1106 | |
1107 | /// Generator for '#omp section' |
1108 | /// |
1109 | /// \param Loc The insert and source location description. |
1110 | /// \param BodyGenCB Callback that will generate the region body code. |
1111 | /// \param FiniCB Callback to finalize variable copies. |
1112 | /// \returns The insertion position *after* the section. |
1113 | InsertPointTy createSection(const LocationDescription &Loc, |
1114 | BodyGenCallbackTy BodyGenCB, |
1115 | FinalizeCallbackTy FiniCB); |
1116 | |
1117 | /// Generate conditional branch and relevant BasicBlocks through which private |
1118 | /// threads copy the 'copyin' variables from Master copy to threadprivate |
1119 | /// copies. |
1120 | /// |
1121 | /// \param IP insertion block for copyin conditional |
1122 | /// \param MasterVarPtr a pointer to the master variable |
1123 | /// \param PrivateVarPtr a pointer to the threadprivate variable |
1124 | /// \param IntPtrTy Pointer size type |
1125 | /// \param BranchtoEnd Create a branch between the copyin.not.master blocks |
1126 | // and copy.in.end block |
1127 | /// |
1128 | /// \returns The insertion point where copying operation to be emitted. |
1129 | InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, |
1130 | Value *PrivateAddr, |
1131 | llvm::IntegerType *IntPtrTy, |
1132 | bool BranchtoEnd = true); |
1133 | |
1134 | /// Create a runtime call for kmpc_Alloc |
1135 | /// |
1136 | /// \param Loc The insert and source location description. |
1137 | /// \param Size Size of allocated memory space |
1138 | /// \param Allocator Allocator information instruction |
1139 | /// \param Name Name of call Instruction for OMP_alloc |
1140 | /// |
1141 | /// \returns CallInst to the OMP_Alloc call |
1142 | CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size, |
1143 | Value *Allocator, std::string Name = "" ); |
1144 | |
1145 | /// Create a runtime call for kmpc_free |
1146 | /// |
1147 | /// \param Loc The insert and source location description. |
1148 | /// \param Addr Address of memory space to be freed |
1149 | /// \param Allocator Allocator information instruction |
1150 | /// \param Name Name of call Instruction for OMP_Free |
1151 | /// |
1152 | /// \returns CallInst to the OMP_Free call |
1153 | CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr, |
1154 | Value *Allocator, std::string Name = "" ); |
1155 | |
1156 | /// Create a runtime call for kmpc_threadprivate_cached |
1157 | /// |
1158 | /// \param Loc The insert and source location description. |
1159 | /// \param Pointer pointer to data to be cached |
1160 | /// \param Size size of data to be cached |
1161 | /// \param Name Name of call Instruction for callinst |
1162 | /// |
1163 | /// \returns CallInst to the thread private cache call. |
1164 | CallInst *createCachedThreadPrivate(const LocationDescription &Loc, |
1165 | llvm::Value *Pointer, |
1166 | llvm::ConstantInt *Size, |
1167 | const llvm::Twine &Name = Twine("" )); |
1168 | |
1169 | /// Create a runtime call for __tgt_interop_init |
1170 | /// |
1171 | /// \param Loc The insert and source location description. |
1172 | /// \param InteropVar variable to be allocated |
1173 | /// \param InteropType type of interop operation |
1174 | /// \param Device devide to which offloading will occur |
1175 | /// \param NumDependences number of dependence variables |
1176 | /// \param DependenceAddress pointer to dependence variables |
1177 | /// \param HaveNowaitClause does nowait clause exist |
1178 | /// |
1179 | /// \returns CallInst to the __tgt_interop_init call |
1180 | CallInst *createOMPInteropInit(const LocationDescription &Loc, |
1181 | Value *InteropVar, |
1182 | omp::OMPInteropType InteropType, Value *Device, |
1183 | Value *NumDependences, |
1184 | Value *DependenceAddress, |
1185 | bool HaveNowaitClause); |
1186 | |
1187 | /// Create a runtime call for __tgt_interop_destroy |
1188 | /// |
1189 | /// \param Loc The insert and source location description. |
1190 | /// \param InteropVar variable to be allocated |
1191 | /// \param Device devide to which offloading will occur |
1192 | /// \param NumDependences number of dependence variables |
1193 | /// \param DependenceAddress pointer to dependence variables |
1194 | /// \param HaveNowaitClause does nowait clause exist |
1195 | /// |
1196 | /// \returns CallInst to the __tgt_interop_destroy call |
1197 | CallInst *createOMPInteropDestroy(const LocationDescription &Loc, |
1198 | Value *InteropVar, Value *Device, |
1199 | Value *NumDependences, |
1200 | Value *DependenceAddress, |
1201 | bool HaveNowaitClause); |
1202 | |
1203 | /// Create a runtime call for __tgt_interop_use |
1204 | /// |
1205 | /// \param Loc The insert and source location description. |
1206 | /// \param InteropVar variable to be allocated |
1207 | /// \param Device devide to which offloading will occur |
1208 | /// \param NumDependences number of dependence variables |
1209 | /// \param DependenceAddress pointer to dependence variables |
1210 | /// \param HaveNowaitClause does nowait clause exist |
1211 | /// |
1212 | /// \returns CallInst to the __tgt_interop_use call |
1213 | CallInst *createOMPInteropUse(const LocationDescription &Loc, |
1214 | Value *InteropVar, Value *Device, |
1215 | Value *NumDependences, Value *DependenceAddress, |
1216 | bool HaveNowaitClause); |
1217 | |
1218 | /// The `omp target` interface |
1219 | /// |
1220 | /// For more information about the usage of this interface, |
1221 | /// \see openmp/libomptarget/deviceRTLs/common/include/target.h |
1222 | /// |
1223 | ///{ |
1224 | |
1225 | /// Create a runtime call for kmpc_target_init |
1226 | /// |
1227 | /// \param Loc The insert and source location description. |
1228 | /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. |
1229 | /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. |
1230 | InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, |
1231 | bool RequiresFullRuntime); |
1232 | |
1233 | /// Create a runtime call for kmpc_target_deinit |
1234 | /// |
1235 | /// \param Loc The insert and source location description. |
1236 | /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. |
1237 | /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. |
1238 | void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, |
1239 | bool RequiresFullRuntime); |
1240 | |
1241 | ///} |
1242 | |
1243 | /// Declarations for LLVM-IR types (simple, array, function and structure) are |
1244 | /// generated below. Their names are defined and used in OpenMPKinds.def. Here |
1245 | /// we provide the declarations, the initializeTypes function will provide the |
1246 | /// values. |
1247 | /// |
1248 | ///{ |
1249 | #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr; |
1250 | #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ |
1251 | ArrayType *VarName##Ty = nullptr; \ |
1252 | PointerType *VarName##PtrTy = nullptr; |
1253 | #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ |
1254 | FunctionType *VarName = nullptr; \ |
1255 | PointerType *VarName##Ptr = nullptr; |
1256 | #define OMP_STRUCT_TYPE(VarName, StrName, ...) \ |
1257 | StructType *VarName = nullptr; \ |
1258 | PointerType *VarName##Ptr = nullptr; |
1259 | #include "llvm/Frontend/OpenMP/OMPKinds.def" |
1260 | |
1261 | ///} |
1262 | |
1263 | private: |
1264 | /// Create all simple and struct types exposed by the runtime and remember |
1265 | /// the llvm::PointerTypes of them for easy access later. |
1266 | void initializeTypes(Module &M); |
1267 | |
1268 | /// Common interface for generating entry calls for OMP Directives. |
1269 | /// if the directive has a region/body, It will set the insertion |
1270 | /// point to the body |
1271 | /// |
1272 | /// \param OMPD Directive to generate entry blocks for |
1273 | /// \param EntryCall Call to the entry OMP Runtime Function |
1274 | /// \param ExitBB block where the region ends. |
1275 | /// \param Conditional indicate if the entry call result will be used |
1276 | /// to evaluate a conditional of whether a thread will execute |
1277 | /// body code or not. |
1278 | /// |
1279 | /// \return The insertion position in exit block |
1280 | InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall, |
1281 | BasicBlock *ExitBB, |
1282 | bool Conditional = false); |
1283 | |
1284 | /// Common interface to finalize the region |
1285 | /// |
1286 | /// \param OMPD Directive to generate exiting code for |
1287 | /// \param FinIP Insertion point for emitting Finalization code and exit call |
1288 | /// \param ExitCall Call to the ending OMP Runtime Function |
1289 | /// \param HasFinalize indicate if the directive will require finalization |
1290 | /// and has a finalization callback in the stack that |
1291 | /// should be called. |
1292 | /// |
1293 | /// \return The insertion position in exit block |
1294 | InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, |
1295 | InsertPointTy FinIP, |
1296 | Instruction *ExitCall, |
1297 | bool HasFinalize = true); |
1298 | |
1299 | /// Common Interface to generate OMP inlined regions |
1300 | /// |
1301 | /// \param OMPD Directive to generate inlined region for |
1302 | /// \param EntryCall Call to the entry OMP Runtime Function |
1303 | /// \param ExitCall Call to the ending OMP Runtime Function |
1304 | /// \param BodyGenCB Body code generation callback. |
1305 | /// \param FiniCB Finalization Callback. Will be called when finalizing region |
1306 | /// \param Conditional indicate if the entry call result will be used |
1307 | /// to evaluate a conditional of whether a thread will execute |
1308 | /// body code or not. |
1309 | /// \param HasFinalize indicate if the directive will require finalization |
1310 | /// and has a finalization callback in the stack that |
1311 | /// should be called. |
1312 | /// \param IsCancellable if HasFinalize is set to true, indicate if the |
1313 | /// the directive should be cancellable. |
1314 | /// \return The insertion point after the region |
1315 | |
1316 | InsertPointTy |
1317 | EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, |
1318 | Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, |
1319 | FinalizeCallbackTy FiniCB, bool Conditional = false, |
1320 | bool HasFinalize = true, bool IsCancellable = false); |
1321 | |
1322 | /// Get the platform-specific name separator. |
1323 | /// \param Parts different parts of the final name that needs separation |
1324 | /// \param FirstSeparator First separator used between the initial two |
1325 | /// parts of the name. |
1326 | /// \param Separator separator used between all of the rest consecutive |
1327 | /// parts of the name |
1328 | static std::string getNameWithSeparators(ArrayRef<StringRef> Parts, |
1329 | StringRef FirstSeparator, |
1330 | StringRef Separator); |
1331 | |
1332 | /// Gets (if variable with the given name already exist) or creates |
1333 | /// internal global variable with the specified Name. The created variable has |
1334 | /// linkage CommonLinkage by default and is initialized by null value. |
1335 | /// \param Ty Type of the global variable. If it is exist already the type |
1336 | /// must be the same. |
1337 | /// \param Name Name of the variable. |
1338 | Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name, |
1339 | unsigned AddressSpace = 0); |
1340 | |
1341 | /// Returns corresponding lock object for the specified critical region |
1342 | /// name. If the lock object does not exist it is created, otherwise the |
1343 | /// reference to the existing copy is returned. |
1344 | /// \param CriticalName Name of the critical region. |
1345 | /// |
1346 | Value *getOMPCriticalRegionLock(StringRef CriticalName); |
1347 | |
1348 | /// Callback type for Atomic Expression update |
1349 | /// ex: |
1350 | /// \code{.cpp} |
1351 | /// unsigned x = 0; |
1352 | /// #pragma omp atomic update |
1353 | /// x = Expr(x_old); //Expr() is any legal operation |
1354 | /// \endcode |
1355 | /// |
1356 | /// \param XOld the value of the atomic memory address to use for update |
1357 | /// \param IRB reference to the IRBuilder to use |
1358 | /// |
1359 | /// \returns Value to update X to. |
1360 | using AtomicUpdateCallbackTy = |
1361 | const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>; |
1362 | |
1363 | private: |
1364 | enum AtomicKind { Read, Write, Update, Capture, Compare }; |
1365 | |
1366 | /// Determine whether to emit flush or not |
1367 | /// |
1368 | /// \param Loc The insert and source location description. |
1369 | /// \param AO The required atomic ordering |
1370 | /// \param AK The OpenMP atomic operation kind used. |
1371 | /// |
1372 | /// \returns wether a flush was emitted or not |
1373 | bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc, |
1374 | AtomicOrdering AO, AtomicKind AK); |
1375 | |
1376 | /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X |
1377 | /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) |
1378 | /// Only Scalar data types. |
1379 | /// |
1380 | /// \param AllocaIP The insertion point to be used for alloca |
1381 | /// instructions. |
1382 | /// \param X The target atomic pointer to be updated |
1383 | /// \param XElemTy The element type of the atomic pointer. |
1384 | /// \param Expr The value to update X with. |
1385 | /// \param AO Atomic ordering of the generated atomic |
1386 | /// instructions. |
1387 | /// \param RMWOp The binary operation used for update. If |
1388 | /// operation is not supported by atomicRMW, |
1389 | /// or belong to {FADD, FSUB, BAD_BINOP}. |
1390 | /// Then a `cmpExch` based atomic will be generated. |
1391 | /// \param UpdateOp Code generator for complex expressions that cannot be |
1392 | /// expressed through atomicrmw instruction. |
1393 | /// \param VolatileX true if \a X volatile? |
1394 | /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the |
1395 | /// update expression, false otherwise. |
1396 | /// (e.g. true for X = X BinOp Expr) |
1397 | /// |
1398 | /// \returns A pair of the old value of X before the update, and the value |
1399 | /// used for the update. |
1400 | std::pair<Value *, Value *> |
1401 | emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, |
1402 | AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, |
1403 | AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, |
1404 | bool IsXBinopExpr); |
1405 | |
1406 | /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . |
1407 | /// |
1408 | /// \Return The instruction |
1409 | Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, |
1410 | AtomicRMWInst::BinOp RMWOp); |
1411 | |
1412 | public: |
1413 | /// a struct to pack relevant information while generating atomic Ops |
1414 | struct AtomicOpValue { |
1415 | Value *Var = nullptr; |
1416 | Type *ElemTy = nullptr; |
1417 | bool IsSigned = false; |
1418 | bool IsVolatile = false; |
1419 | }; |
1420 | |
1421 | /// Emit atomic Read for : V = X --- Only Scalar data types. |
1422 | /// |
1423 | /// \param Loc The insert and source location description. |
1424 | /// \param X The target pointer to be atomically read |
1425 | /// \param V Memory address where to store atomically read |
1426 | /// value |
1427 | /// \param AO Atomic ordering of the generated atomic |
1428 | /// instructions. |
1429 | /// |
1430 | /// \return Insertion point after generated atomic read IR. |
1431 | InsertPointTy createAtomicRead(const LocationDescription &Loc, |
1432 | AtomicOpValue &X, AtomicOpValue &V, |
1433 | AtomicOrdering AO); |
1434 | |
1435 | /// Emit atomic write for : X = Expr --- Only Scalar data types. |
1436 | /// |
1437 | /// \param Loc The insert and source location description. |
1438 | /// \param X The target pointer to be atomically written to |
1439 | /// \param Expr The value to store. |
1440 | /// \param AO Atomic ordering of the generated atomic |
1441 | /// instructions. |
1442 | /// |
1443 | /// \return Insertion point after generated atomic Write IR. |
1444 | InsertPointTy createAtomicWrite(const LocationDescription &Loc, |
1445 | AtomicOpValue &X, Value *Expr, |
1446 | AtomicOrdering AO); |
1447 | |
1448 | /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X |
1449 | /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) |
1450 | /// Only Scalar data types. |
1451 | /// |
1452 | /// \param Loc The insert and source location description. |
1453 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
1454 | /// \param X The target atomic pointer to be updated |
1455 | /// \param Expr The value to update X with. |
1456 | /// \param AO Atomic ordering of the generated atomic instructions. |
1457 | /// \param RMWOp The binary operation used for update. If operation |
1458 | /// is not supported by atomicRMW, or belong to |
1459 | /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based |
1460 | /// atomic will be generated. |
1461 | /// \param UpdateOp Code generator for complex expressions that cannot be |
1462 | /// expressed through atomicrmw instruction. |
1463 | /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the |
1464 | /// update expression, false otherwise. |
1465 | /// (e.g. true for X = X BinOp Expr) |
1466 | /// |
1467 | /// \return Insertion point after generated atomic update IR. |
1468 | InsertPointTy createAtomicUpdate(const LocationDescription &Loc, |
1469 | InsertPointTy AllocaIP, AtomicOpValue &X, |
1470 | Value *Expr, AtomicOrdering AO, |
1471 | AtomicRMWInst::BinOp RMWOp, |
1472 | AtomicUpdateCallbackTy &UpdateOp, |
1473 | bool IsXBinopExpr); |
1474 | |
1475 | /// Emit atomic update for constructs: --- Only Scalar data types |
1476 | /// V = X; X = X BinOp Expr , |
1477 | /// X = X BinOp Expr; V = X, |
1478 | /// V = X; X = Expr BinOp X, |
1479 | /// X = Expr BinOp X; V = X, |
1480 | /// V = X; X = UpdateOp(X), |
1481 | /// X = UpdateOp(X); V = X, |
1482 | /// |
1483 | /// \param Loc The insert and source location description. |
1484 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
1485 | /// \param X The target atomic pointer to be updated |
1486 | /// \param V Memory address where to store captured value |
1487 | /// \param Expr The value to update X with. |
1488 | /// \param AO Atomic ordering of the generated atomic instructions |
1489 | /// \param RMWOp The binary operation used for update. If |
1490 | /// operation is not supported by atomicRMW, or belong to |
1491 | /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based |
1492 | /// atomic will be generated. |
1493 | /// \param UpdateOp Code generator for complex expressions that cannot be |
1494 | /// expressed through atomicrmw instruction. |
1495 | /// \param UpdateExpr true if X is an in place update of the form |
1496 | /// X = X BinOp Expr or X = Expr BinOp X |
1497 | /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the |
1498 | /// update expression, false otherwise. |
1499 | /// (e.g. true for X = X BinOp Expr) |
1500 | /// \param IsPostfixUpdate true if original value of 'x' must be stored in |
1501 | /// 'v', not an updated one. |
1502 | /// |
1503 | /// \return Insertion point after generated atomic capture IR. |
1504 | InsertPointTy |
1505 | createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, |
1506 | AtomicOpValue &X, AtomicOpValue &V, Value *Expr, |
1507 | AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, |
1508 | AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, |
1509 | bool IsPostfixUpdate, bool IsXBinopExpr); |
1510 | |
1511 | /// Emit atomic compare for constructs: --- Only scalar data types |
1512 | /// cond-expr-stmt: |
1513 | /// x = x ordop expr ? expr : x; |
1514 | /// x = expr ordop x ? expr : x; |
1515 | /// x = x == e ? d : x; |
1516 | /// x = e == x ? d : x; (this one is not in the spec) |
1517 | /// cond-update-stmt: |
1518 | /// if (x ordop expr) { x = expr; } |
1519 | /// if (expr ordop x) { x = expr; } |
1520 | /// if (x == e) { x = d; } |
1521 | /// if (e == x) { x = d; } (this one is not in the spec) |
1522 | /// conditional-update-capture-atomic: |
1523 | /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false) |
1524 | /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false) |
1525 | /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false, |
1526 | /// IsFailOnly=true) |
1527 | /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false) |
1528 | /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false, |
1529 | /// IsFailOnly=true) |
1530 | /// |
1531 | /// \param Loc The insert and source location description. |
1532 | /// \param X The target atomic pointer to be updated. |
1533 | /// \param V Memory address where to store captured value (for |
1534 | /// compare capture only). |
1535 | /// \param R Memory address where to store comparison result |
1536 | /// (for compare capture with '==' only). |
1537 | /// \param E The expected value ('e') for forms that use an |
1538 | /// equality comparison or an expression ('expr') for |
1539 | /// forms that use 'ordop' (logically an atomic maximum or |
1540 | /// minimum). |
1541 | /// \param D The desired value for forms that use an equality |
1542 | /// comparison. If forms that use 'ordop', it should be |
1543 | /// \p nullptr. |
1544 | /// \param AO Atomic ordering of the generated atomic instructions. |
1545 | /// \param Op Atomic compare operation. It can only be ==, <, or >. |
1546 | /// \param IsXBinopExpr True if the conditional statement is in the form where |
1547 | /// x is on LHS. It only matters for < or >. |
1548 | /// \param IsPostfixUpdate True if original value of 'x' must be stored in |
1549 | /// 'v', not an updated one (for compare capture |
1550 | /// only). |
1551 | /// \param IsFailOnly True if the original value of 'x' is stored to 'v' |
1552 | /// only when the comparison fails. This is only valid for |
1553 | /// the case the comparison is '=='. |
1554 | /// |
1555 | /// \return Insertion point after generated atomic capture IR. |
1556 | InsertPointTy |
1557 | createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, |
1558 | AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, |
1559 | AtomicOrdering AO, omp::OMPAtomicCompareOp Op, |
1560 | bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); |
1561 | |
1562 | /// Create the control flow structure of a canonical OpenMP loop. |
1563 | /// |
1564 | /// The emitted loop will be disconnected, i.e. no edge to the loop's |
1565 | /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's |
1566 | /// IRBuilder location is not preserved. |
1567 | /// |
1568 | /// \param DL DebugLoc used for the instructions in the skeleton. |
1569 | /// \param TripCount Value to be used for the trip count. |
1570 | /// \param F Function in which to insert the BasicBlocks. |
1571 | /// \param PreInsertBefore Where to insert BBs that execute before the body, |
1572 | /// typically the body itself. |
1573 | /// \param PostInsertBefore Where to insert BBs that execute after the body. |
1574 | /// \param Name Base name used to derive BB |
1575 | /// and instruction names. |
1576 | /// |
1577 | /// \returns The CanonicalLoopInfo that represents the emitted loop. |
1578 | CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, |
1579 | Function *F, |
1580 | BasicBlock *PreInsertBefore, |
1581 | BasicBlock *PostInsertBefore, |
1582 | const Twine &Name = {}); |
1583 | }; |
1584 | |
1585 | /// Class to represented the control flow structure of an OpenMP canonical loop. |
1586 | /// |
1587 | /// The control-flow structure is standardized for easy consumption by |
1588 | /// directives associated with loops. For instance, the worksharing-loop |
1589 | /// construct may change this control flow such that each loop iteration is |
1590 | /// executed on only one thread. The constraints of a canonical loop in brief |
1591 | /// are: |
1592 | /// |
1593 | /// * The number of loop iterations must have been computed before entering the |
1594 | /// loop. |
1595 | /// |
1596 | /// * Has an (unsigned) logical induction variable that starts at zero and |
1597 | /// increments by one. |
1598 | /// |
1599 | /// * The loop's CFG itself has no side-effects. The OpenMP specification |
1600 | /// itself allows side-effects, but the order in which they happen, including |
1601 | /// how often or whether at all, is unspecified. We expect that the frontend |
1602 | /// will emit those side-effect instructions somewhere (e.g. before the loop) |
1603 | /// such that the CanonicalLoopInfo itself can be side-effect free. |
1604 | /// |
1605 | /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated |
1606 | /// execution of a loop body that satifies these constraints. It does NOT |
1607 | /// represent arbitrary SESE regions that happen to contain a loop. Do not use |
1608 | /// CanonicalLoopInfo for such purposes. |
1609 | /// |
1610 | /// The control flow can be described as follows: |
1611 | /// |
1612 | /// Preheader |
1613 | /// | |
1614 | /// /-> Header |
1615 | /// | | |
1616 | /// | Cond---\ |
1617 | /// | | | |
1618 | /// | Body | |
1619 | /// | | | | |
1620 | /// | <...> | |
1621 | /// | | | | |
1622 | /// \--Latch | |
1623 | /// | |
1624 | /// Exit |
1625 | /// | |
1626 | /// After |
1627 | /// |
1628 | /// The loop is thought to start at PreheaderIP (at the Preheader's terminator, |
1629 | /// including) and end at AfterIP (at the After's first instruction, excluding). |
1630 | /// That is, instructions in the Preheader and After blocks (except the |
1631 | /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have |
1632 | /// side-effects. Typically, the Preheader is used to compute the loop's trip |
1633 | /// count. The instructions from BodyIP (at the Body block's first instruction, |
1634 | /// excluding) until the Latch are also considered outside CanonicalLoopInfo's |
1635 | /// control and thus can have side-effects. The body block is the single entry |
1636 | /// point into the loop body, which may contain arbitrary control flow as long |
1637 | /// as all control paths eventually branch to the Latch block. |
1638 | /// |
1639 | /// TODO: Consider adding another standardized BasicBlock between Body CFG and |
1640 | /// Latch to guarantee that there is only a single edge to the latch. It would |
1641 | /// make loop transformations easier to not needing to consider multiple |
1642 | /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us |
1643 | /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that |
1644 | /// executes after each body iteration. |
1645 | /// |
1646 | /// There must be no loop-carried dependencies through llvm::Values. This is |
1647 | /// equivalant to that the Latch has no PHINode and the Header's only PHINode is |
1648 | /// for the induction variable. |
1649 | /// |
1650 | /// All code in Header, Cond, Latch and Exit (plus the terminator of the |
1651 | /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked |
1652 | /// by assertOK(). They are expected to not be modified unless explicitly |
1653 | /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP |
1654 | /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop, |
1655 | /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its |
1656 | /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used |
1657 | /// anymore as its underlying control flow may not exist anymore. |
1658 | /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop |
1659 | /// may also return a new CanonicalLoopInfo that can be passed to other |
1660 | /// loop-associated construct implementing methods. These loop-transforming |
1661 | /// methods may either create a new CanonicalLoopInfo usually using |
1662 | /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and |
1663 | /// modify one of the input CanonicalLoopInfo and return it as representing the |
1664 | /// modified loop. What is done is an implementation detail of |
1665 | /// transformation-implementing method and callers should always assume that the |
1666 | /// CanonicalLoopInfo passed to it is invalidated and a new object is returned. |
1667 | /// Returned CanonicalLoopInfo have the same structure and guarantees as the one |
1668 | /// created by createCanonicalLoop, such that transforming methods do not have |
1669 | /// to special case where the CanonicalLoopInfo originated from. |
1670 | /// |
1671 | /// Generally, methods consuming CanonicalLoopInfo do not need an |
1672 | /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the |
1673 | /// CanonicalLoopInfo to insert new or modify existing instructions. Unless |
1674 | /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate |
1675 | /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically, |
1676 | /// any InsertPoint in the Preheader, After or Block can still be used after |
1677 | /// calling such a method. |
1678 | /// |
1679 | /// TODO: Provide mechanisms for exception handling and cancellation points. |
1680 | /// |
1681 | /// Defined outside OpenMPIRBuilder because nested classes cannot be |
1682 | /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h. |
1683 | class CanonicalLoopInfo { |
1684 | friend class OpenMPIRBuilder; |
1685 | |
1686 | private: |
1687 | BasicBlock * = nullptr; |
1688 | BasicBlock *Cond = nullptr; |
1689 | BasicBlock *Latch = nullptr; |
1690 | BasicBlock *Exit = nullptr; |
1691 | |
1692 | /// Add the control blocks of this loop to \p BBs. |
1693 | /// |
1694 | /// This does not include any block from the body, including the one returned |
1695 | /// by getBody(). |
1696 | /// |
1697 | /// FIXME: This currently includes the Preheader and After blocks even though |
1698 | /// their content is (mostly) not under CanonicalLoopInfo's control. |
1699 | /// Re-evaluated whether this makes sense. |
1700 | void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); |
1701 | |
1702 | /// Sets the number of loop iterations to the given value. This value must be |
1703 | /// valid in the condition block (i.e., defined in the preheader) and is |
1704 | /// interpreted as an unsigned integer. |
1705 | void setTripCount(Value *TripCount); |
1706 | |
1707 | /// Replace all uses of the canonical induction variable in the loop body with |
1708 | /// a new one. |
1709 | /// |
1710 | /// The intended use case is to update the induction variable for an updated |
1711 | /// iteration space such that it can stay normalized in the 0...tripcount-1 |
1712 | /// range. |
1713 | /// |
1714 | /// The \p Updater is called with the (presumable updated) current normalized |
1715 | /// induction variable and is expected to return the value that uses of the |
1716 | /// pre-updated induction values should use instead, typically dependent on |
1717 | /// the new induction variable. This is a lambda (instead of e.g. just passing |
1718 | /// the new value) to be able to distinguish the uses of the pre-updated |
1719 | /// induction variable and uses of the induction varible to compute the |
1720 | /// updated induction variable value. |
1721 | void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater); |
1722 | |
1723 | public: |
1724 | /// Returns whether this object currently represents the IR of a loop. If |
1725 | /// returning false, it may have been consumed by a loop transformation or not |
1726 | /// been intialized. Do not use in this case; |
1727 | bool isValid() const { return Header; } |
1728 | |
1729 | /// The preheader ensures that there is only a single edge entering the loop. |
1730 | /// Code that must be execute before any loop iteration can be emitted here, |
1731 | /// such as computing the loop trip count and begin lifetime markers. Code in |
1732 | /// the preheader is not considered part of the canonical loop. |
1733 | BasicBlock *() const; |
1734 | |
1735 | /// The header is the entry for each iteration. In the canonical control flow, |
1736 | /// it only contains the PHINode for the induction variable. |
1737 | BasicBlock *() const { |
1738 | assert(isValid() && "Requires a valid canonical loop" ); |
1739 | return Header; |
1740 | } |
1741 | |
1742 | /// The condition block computes whether there is another loop iteration. If |
1743 | /// yes, branches to the body; otherwise to the exit block. |
1744 | BasicBlock *getCond() const { |
1745 | assert(isValid() && "Requires a valid canonical loop" ); |
1746 | return Cond; |
1747 | } |
1748 | |
1749 | /// The body block is the single entry for a loop iteration and not controlled |
1750 | /// by CanonicalLoopInfo. It can contain arbitrary control flow but must |
1751 | /// eventually branch to the \p Latch block. |
1752 | BasicBlock *getBody() const { |
1753 | assert(isValid() && "Requires a valid canonical loop" ); |
1754 | return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0); |
1755 | } |
1756 | |
1757 | /// Reaching the latch indicates the end of the loop body code. In the |
1758 | /// canonical control flow, it only contains the increment of the induction |
1759 | /// variable. |
1760 | BasicBlock *getLatch() const { |
1761 | assert(isValid() && "Requires a valid canonical loop" ); |
1762 | return Latch; |
1763 | } |
1764 | |
1765 | /// Reaching the exit indicates no more iterations are being executed. |
1766 | BasicBlock *getExit() const { |
1767 | assert(isValid() && "Requires a valid canonical loop" ); |
1768 | return Exit; |
1769 | } |
1770 | |
1771 | /// The after block is intended for clean-up code such as lifetime end |
1772 | /// markers. It is separate from the exit block to ensure, analogous to the |
1773 | /// preheader, it having just a single entry edge and being free from PHI |
1774 | /// nodes should there be multiple loop exits (such as from break |
1775 | /// statements/cancellations). |
1776 | BasicBlock *getAfter() const { |
1777 | assert(isValid() && "Requires a valid canonical loop" ); |
1778 | return Exit->getSingleSuccessor(); |
1779 | } |
1780 | |
1781 | /// Returns the llvm::Value containing the number of loop iterations. It must |
1782 | /// be valid in the preheader and always interpreted as an unsigned integer of |
1783 | /// any bit-width. |
1784 | Value *getTripCount() const { |
1785 | assert(isValid() && "Requires a valid canonical loop" ); |
1786 | Instruction *CmpI = &Cond->front(); |
1787 | assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount" ); |
1788 | return CmpI->getOperand(1); |
1789 | } |
1790 | |
1791 | /// Returns the instruction representing the current logical induction |
1792 | /// variable. Always unsigned, always starting at 0 with an increment of one. |
1793 | Instruction *getIndVar() const { |
1794 | assert(isValid() && "Requires a valid canonical loop" ); |
1795 | Instruction *IndVarPHI = &Header->front(); |
1796 | assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI" ); |
1797 | return IndVarPHI; |
1798 | } |
1799 | |
1800 | /// Return the type of the induction variable (and the trip count). |
1801 | Type *getIndVarType() const { |
1802 | assert(isValid() && "Requires a valid canonical loop" ); |
1803 | return getIndVar()->getType(); |
1804 | } |
1805 | |
1806 | /// Return the insertion point for user code before the loop. |
1807 | OpenMPIRBuilder::InsertPointTy () const { |
1808 | assert(isValid() && "Requires a valid canonical loop" ); |
1809 | BasicBlock * = getPreheader(); |
1810 | return {Preheader, std::prev(Preheader->end())}; |
1811 | }; |
1812 | |
1813 | /// Return the insertion point for user code in the body. |
1814 | OpenMPIRBuilder::InsertPointTy getBodyIP() const { |
1815 | assert(isValid() && "Requires a valid canonical loop" ); |
1816 | BasicBlock *Body = getBody(); |
1817 | return {Body, Body->begin()}; |
1818 | }; |
1819 | |
1820 | /// Return the insertion point for user code after the loop. |
1821 | OpenMPIRBuilder::InsertPointTy getAfterIP() const { |
1822 | assert(isValid() && "Requires a valid canonical loop" ); |
1823 | BasicBlock *After = getAfter(); |
1824 | return {After, After->begin()}; |
1825 | }; |
1826 | |
1827 | Function *getFunction() const { |
1828 | assert(isValid() && "Requires a valid canonical loop" ); |
1829 | return Header->getParent(); |
1830 | } |
1831 | |
1832 | /// Consistency self-check. |
1833 | void assertOK() const; |
1834 | |
1835 | /// Invalidate this loop. That is, the underlying IR does not fulfill the |
1836 | /// requirements of an OpenMP canonical loop anymore. |
1837 | void invalidate(); |
1838 | }; |
1839 | |
1840 | } // end namespace llvm |
1841 | |
1842 | #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H |
1843 | |