1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
17#include "llvm/Analysis/MemorySSAUpdater.h"
18#include "llvm/Frontend/OpenMP/OMPConstants.h"
19#include "llvm/IR/DebugLoc.h"
20#include "llvm/IR/IRBuilder.h"
21#include "llvm/Support/Allocator.h"
22#include <forward_list>
23
24namespace llvm {
25class CanonicalLoopInfo;
26
27/// Move the instruction after an InsertPoint to the beginning of another
28/// BasicBlock.
29///
30/// The instructions after \p IP are moved to the beginning of \p New which must
31/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
32/// \p New will be added such that there is no semantic change. Otherwise, the
33/// \p IP insert block remains degenerate and it is up to the caller to insert a
34/// terminator.
35void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
36 bool CreateBranch);
37
38/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
39/// insert location will stick to after the instruction before the insertion
40/// point (instead of moving with the instruction the InsertPoint stores
41/// internally).
42void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
43
44/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
45/// (missing the terminator).
46///
47/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
48/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
49/// is true, a branch to the new successor will new created such that
50/// semantically there is no change; otherwise the block of the insertion point
51/// remains degenerate and it is the caller's responsibility to insert a
52/// terminator. Returns the new successor block.
53BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
54 llvm::Twine Name = {});
55
56/// Split a BasicBlock at \p Builder's insertion point, even if the block is
57/// degenerate (missing the terminator). Its new insert location will stick to
58/// after the instruction before the insertion point (instead of moving with the
59/// instruction the InsertPoint stores internally).
60BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
61 llvm::Twine Name = {});
62
63/// Split a BasicBlock at \p Builder's insertion point, even if the block is
64/// degenerate (missing the terminator). Its new insert location will stick to
65/// after the instruction before the insertion point (instead of moving with the
66/// instruction the InsertPoint stores internally).
67BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
68
69/// Like splitBB, but reuses the current block's name for the new name.
70BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
71 llvm::Twine Suffix = ".split");
72
73/// An interface to create LLVM-IR for OpenMP directives.
74///
75/// Each OpenMP directive has a corresponding public generator method.
76class OpenMPIRBuilder {
77public:
78 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
79 /// not have an effect on \p M (see initialize).
80 OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
81 ~OpenMPIRBuilder();
82
83 /// Initialize the internal state, this will put structures types and
84 /// potentially other helpers into the underlying module. Must be called
85 /// before any other method and only once!
86 void initialize();
87
88 /// Finalize the underlying module, e.g., by outlining regions.
89 /// \param Fn The function to be finalized. If not used,
90 /// all functions are finalized.
91 void finalize(Function *Fn = nullptr);
92
93 /// Add attributes known for \p FnID to \p Fn.
94 void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
95
96 /// Type used throughout for insertion points.
97 using InsertPointTy = IRBuilder<>::InsertPoint;
98
99 /// Callback type for variable finalization (think destructors).
100 ///
101 /// \param CodeGenIP is the insertion point at which the finalization code
102 /// should be placed.
103 ///
104 /// A finalize callback knows about all objects that need finalization, e.g.
105 /// destruction, when the scope of the currently generated construct is left
106 /// at the time, and location, the callback is invoked.
107 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
108
109 struct FinalizationInfo {
110 /// The finalization callback provided by the last in-flight invocation of
111 /// createXXXX for the directive of kind DK.
112 FinalizeCallbackTy FiniCB;
113
114 /// The directive kind of the innermost directive that has an associated
115 /// region which might require finalization when it is left.
116 omp::Directive DK;
117
118 /// Flag to indicate if the directive is cancellable.
119 bool IsCancellable;
120 };
121
122 /// Push a finalization callback on the finalization stack.
123 ///
124 /// NOTE: Temporary solution until Clang CG is gone.
125 void pushFinalizationCB(const FinalizationInfo &FI) {
126 FinalizationStack.push_back(FI);
127 }
128
129 /// Pop the last finalization callback from the finalization stack.
130 ///
131 /// NOTE: Temporary solution until Clang CG is gone.
132 void popFinalizationCB() { FinalizationStack.pop_back(); }
133
134 /// Callback type for body (=inner region) code generation
135 ///
136 /// The callback takes code locations as arguments, each describing a
137 /// location where additional instructions can be inserted.
138 ///
139 /// The CodeGenIP may be in the middle of a basic block or point to the end of
140 /// it. The basic block may have a terminator or be degenerate. The callback
141 /// function may just insert instructions at that position, but also split the
142 /// block (without the Before argument of BasicBlock::splitBasicBlock such
143 /// that the identify of the split predecessor block is preserved) and insert
144 /// additional control flow, including branches that do not lead back to what
145 /// follows the CodeGenIP. Note that since the callback is allowed to split
146 /// the block, callers must assume that InsertPoints to positions in the
147 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
148 /// such InsertPoints need to be preserved, it can split the block itself
149 /// before calling the callback.
150 ///
151 /// AllocaIP and CodeGenIP must not point to the same position.
152 ///
153 /// \param AllocaIP is the insertion point at which new alloca instructions
154 /// should be placed. The BasicBlock it is pointing to must
155 /// not be split.
156 /// \param CodeGenIP is the insertion point at which the body code should be
157 /// placed.
158 using BodyGenCallbackTy =
159 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
160
161 // This is created primarily for sections construct as llvm::function_ref
162 // (BodyGenCallbackTy) is not storable (as described in the comments of
163 // function_ref class - function_ref contains non-ownable reference
164 // to the callable.
165 using StorableBodyGenCallbackTy =
166 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
167
168 /// Callback type for loop body code generation.
169 ///
170 /// \param CodeGenIP is the insertion point where the loop's body code must be
171 /// placed. This will be a dedicated BasicBlock with a
172 /// conditional branch from the loop condition check and
173 /// terminated with an unconditional branch to the loop
174 /// latch.
175 /// \param IndVar is the induction variable usable at the insertion point.
176 using LoopBodyGenCallbackTy =
177 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
178
179 /// Callback type for variable privatization (think copy & default
180 /// constructor).
181 ///
182 /// \param AllocaIP is the insertion point at which new alloca instructions
183 /// should be placed.
184 /// \param CodeGenIP is the insertion point at which the privatization code
185 /// should be placed.
186 /// \param Original The value being copied/created, should not be used in the
187 /// generated IR.
188 /// \param Inner The equivalent of \p Original that should be used in the
189 /// generated IR; this is equal to \p Original if the value is
190 /// a pointer and can thus be passed directly, otherwise it is
191 /// an equivalent but different value.
192 /// \param ReplVal The replacement value, thus a copy or new created version
193 /// of \p Inner.
194 ///
195 /// \returns The new insertion point where code generation continues and
196 /// \p ReplVal the replacement value.
197 using PrivatizeCallbackTy = function_ref<InsertPointTy(
198 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
199 Value &Inner, Value *&ReplVal)>;
200
201 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
202 /// (filename, line, column, ...).
203 struct LocationDescription {
204 LocationDescription(const IRBuilderBase &IRB)
205 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
206 LocationDescription(const InsertPointTy &IP) : IP(IP) {}
207 LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
208 : IP(IP), DL(DL) {}
209 InsertPointTy IP;
210 DebugLoc DL;
211 };
212
213 /// Emitter methods for OpenMP directives.
214 ///
215 ///{
216
217 /// Generator for '#omp barrier'
218 ///
219 /// \param Loc The location where the barrier directive was encountered.
220 /// \param DK The kind of directive that caused the barrier.
221 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
222 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
223 /// should be checked and acted upon.
224 ///
225 /// \returns The insertion point after the barrier.
226 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
227 bool ForceSimpleCall = false,
228 bool CheckCancelFlag = true);
229
230 /// Generator for '#omp cancel'
231 ///
232 /// \param Loc The location where the directive was encountered.
233 /// \param IfCondition The evaluated 'if' clause expression, if any.
234 /// \param CanceledDirective The kind of directive that is cancled.
235 ///
236 /// \returns The insertion point after the barrier.
237 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
238 omp::Directive CanceledDirective);
239
240 /// Generator for '#omp parallel'
241 ///
242 /// \param Loc The insert and source location description.
243 /// \param AllocaIP The insertion points to be used for alloca instructions.
244 /// \param BodyGenCB Callback that will generate the region code.
245 /// \param PrivCB Callback to copy a given variable (think copy constructor).
246 /// \param FiniCB Callback to finalize variable copies.
247 /// \param IfCondition The evaluated 'if' clause expression, if any.
248 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
249 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
250 /// \param IsCancellable Flag to indicate a cancellable parallel region.
251 ///
252 /// \returns The insertion position *after* the parallel.
253 IRBuilder<>::InsertPoint
254 createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
255 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
256 FinalizeCallbackTy FiniCB, Value *IfCondition,
257 Value *NumThreads, omp::ProcBindKind ProcBind,
258 bool IsCancellable);
259
260 /// Generator for the control flow structure of an OpenMP canonical loop.
261 ///
262 /// This generator operates on the logical iteration space of the loop, i.e.
263 /// the caller only has to provide a loop trip count of the loop as defined by
264 /// base language semantics. The trip count is interpreted as an unsigned
265 /// integer. The induction variable passed to \p BodyGenCB will be of the same
266 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
267 /// convert the logical iteration variable to the loop counter variable in the
268 /// loop body.
269 ///
270 /// \param Loc The insert and source location description. The insert
271 /// location can be between two instructions or the end of a
272 /// degenerate block (e.g. a BB under construction).
273 /// \param BodyGenCB Callback that will generate the loop body code.
274 /// \param TripCount Number of iterations the loop body is executed.
275 /// \param Name Base name used to derive BB and instruction names.
276 ///
277 /// \returns An object representing the created control flow structure which
278 /// can be used for loop-associated directives.
279 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
280 LoopBodyGenCallbackTy BodyGenCB,
281 Value *TripCount,
282 const Twine &Name = "loop");
283
284 /// Generator for the control flow structure of an OpenMP canonical loop.
285 ///
286 /// Instead of a logical iteration space, this allows specifying user-defined
287 /// loop counter values using increment, upper- and lower bounds. To
288 /// disambiguate the terminology when counting downwards, instead of lower
289 /// bounds we use \p Start for the loop counter value in the first body
290 /// iteration.
291 ///
292 /// Consider the following limitations:
293 ///
294 /// * A loop counter space over all integer values of its bit-width cannot be
295 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
296 /// stored into an 8 bit integer):
297 ///
298 /// DO I = 0, 255, 1
299 ///
300 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
301 /// effectively counting downwards:
302 ///
303 /// for (uint8_t i = 100u; i > 0; i += 127u)
304 ///
305 ///
306 /// TODO: May need to add additional parameters to represent:
307 ///
308 /// * Allow representing downcounting with unsigned integers.
309 ///
310 /// * Sign of the step and the comparison operator might disagree:
311 ///
312 /// for (int i = 0; i < 42; i -= 1u)
313 ///
314 //
315 /// \param Loc The insert and source location description.
316 /// \param BodyGenCB Callback that will generate the loop body code.
317 /// \param Start Value of the loop counter for the first iterations.
318 /// \param Stop Loop counter values past this will stop the loop.
319 /// \param Step Loop counter increment after each iteration; negative
320 /// means counting down.
321 /// \param IsSigned Whether Start, Stop and Step are signed integers.
322 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
323 /// counter.
324 /// \param ComputeIP Insertion point for instructions computing the trip
325 /// count. Can be used to ensure the trip count is available
326 /// at the outermost loop of a loop nest. If not set,
327 /// defaults to the preheader of the generated loop.
328 /// \param Name Base name used to derive BB and instruction names.
329 ///
330 /// \returns An object representing the created control flow structure which
331 /// can be used for loop-associated directives.
332 CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
333 LoopBodyGenCallbackTy BodyGenCB,
334 Value *Start, Value *Stop, Value *Step,
335 bool IsSigned, bool InclusiveStop,
336 InsertPointTy ComputeIP = {},
337 const Twine &Name = "loop");
338
339 /// Collapse a loop nest into a single loop.
340 ///
341 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
342 /// that has the same number of innermost loop iterations as the origin loop
343 /// nest. The induction variables of the input loops are derived from the
344 /// collapsed loop's induction variable. This is intended to be used to
345 /// implement OpenMP's collapse clause. Before applying a directive,
346 /// collapseLoops normalizes a loop nest to contain only a single loop and the
347 /// directive's implementation does not need to handle multiple loops itself.
348 /// This does not remove the need to handle all loop nest handling by
349 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
350 /// modifier of the worksharing-loop directive.
351 ///
352 /// Example:
353 /// \code
354 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
355 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
356 /// body(i, j);
357 /// \endcode
358 ///
359 /// After collapsing with Loops={i,j}, the loop is changed to
360 /// \code
361 /// for (int ij = 0; ij < 63; ++ij) {
362 /// int i = ij / 9;
363 /// int j = ij % 9;
364 /// body(i, j);
365 /// }
366 /// \endcode
367 ///
368 /// In the current implementation, the following limitations apply:
369 ///
370 /// * All input loops have an induction variable of the same type.
371 ///
372 /// * The collapsed loop will have the same trip count integer type as the
373 /// input loops. Therefore it is possible that the collapsed loop cannot
374 /// represent all iterations of the input loops. For instance, assuming a
375 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
376 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
377 /// which cannot be represented in an 32-bit integer. Behavior is undefined
378 /// in this case.
379 ///
380 /// * The trip counts of every input loop must be available at \p ComputeIP.
381 /// Non-rectangular loops are not yet supported.
382 ///
383 /// * At each nest level, code between a surrounding loop and its nested loop
384 /// is hoisted into the loop body, and such code will be executed more
385 /// often than before collapsing (or not at all if any inner loop iteration
386 /// has a trip count of 0). This is permitted by the OpenMP specification.
387 ///
388 /// \param DL Debug location for instructions added for collapsing,
389 /// such as instructions to compute/derive the input loop's
390 /// induction variables.
391 /// \param Loops Loops in the loop nest to collapse. Loops are specified
392 /// from outermost-to-innermost and every control flow of a
393 /// loop's body must pass through its directly nested loop.
394 /// \param ComputeIP Where additional instruction that compute the collapsed
395 /// trip count. If not set, defaults to before the generated
396 /// loop.
397 ///
398 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
399 CanonicalLoopInfo *collapseLoops(DebugLoc DL,
400 ArrayRef<CanonicalLoopInfo *> Loops,
401 InsertPointTy ComputeIP);
402
403private:
404 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
405 ///
406 /// This takes a \p LoopInfo representing a canonical loop, such as the one
407 /// created by \p createCanonicalLoop and emits additional instructions to
408 /// turn it into a workshare loop. In particular, it calls to an OpenMP
409 /// runtime function in the preheader to obtain the loop bounds to be used in
410 /// the current thread, updates the relevant instructions in the canonical
411 /// loop and calls to an OpenMP runtime finalization function after the loop.
412 ///
413 /// \param DL Debug location for instructions added for the
414 /// workshare-loop construct itself.
415 /// \param CLI A descriptor of the canonical loop to workshare.
416 /// \param AllocaIP An insertion point for Alloca instructions usable in the
417 /// preheader of the loop.
418 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
419 /// the loop.
420 ///
421 /// \returns Point where to insert code after the workshare construct.
422 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
423 InsertPointTy AllocaIP,
424 bool NeedsBarrier);
425
426 /// Modifies the canonical loop a statically-scheduled workshare loop with a
427 /// user-specified chunk size.
428 ///
429 /// \param DL Debug location for instructions added for the
430 /// workshare-loop construct itself.
431 /// \param CLI A descriptor of the canonical loop to workshare.
432 /// \param AllocaIP An insertion point for Alloca instructions usable in
433 /// the preheader of the loop.
434 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
435 /// loop.
436 /// \param ChunkSize The user-specified chunk size.
437 ///
438 /// \returns Point where to insert code after the workshare construct.
439 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
440 CanonicalLoopInfo *CLI,
441 InsertPointTy AllocaIP,
442 bool NeedsBarrier,
443 Value *ChunkSize);
444
445 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
446 ///
447 /// This takes a \p LoopInfo representing a canonical loop, such as the one
448 /// created by \p createCanonicalLoop and emits additional instructions to
449 /// turn it into a workshare loop. In particular, it calls to an OpenMP
450 /// runtime function in the preheader to obtain, and then in each iteration
451 /// to update the loop counter.
452 ///
453 /// \param DL Debug location for instructions added for the
454 /// workshare-loop construct itself.
455 /// \param CLI A descriptor of the canonical loop to workshare.
456 /// \param AllocaIP An insertion point for Alloca instructions usable in the
457 /// preheader of the loop.
458 /// \param SchedType Type of scheduling to be passed to the init function.
459 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
460 /// the loop.
461 /// \param Chunk The size of loop chunk considered as a unit when
462 /// scheduling. If \p nullptr, defaults to 1.
463 ///
464 /// \returns Point where to insert code after the workshare construct.
465 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
466 InsertPointTy AllocaIP,
467 omp::OMPScheduleType SchedType,
468 bool NeedsBarrier,
469 Value *Chunk = nullptr);
470
471 /// Create alternative version of the loop to support if clause
472 ///
473 /// OpenMP if clause can require to generate second loop. This loop
474 /// will be executed when if clause condition is not met. createIfVersion
475 /// adds branch instruction to the copied loop if \p ifCond is not met.
476 ///
477 /// \param Loop Original loop which should be versioned.
478 /// \param IfCond Value which corresponds to if clause condition
479 /// \param VMap Value to value map to define relation between
480 /// original and copied loop values and loop blocks.
481 /// \param NamePrefix Optional name prefix for if.then if.else blocks.
482 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
483 ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
484
485public:
486 /// Modifies the canonical loop to be a workshare loop.
487 ///
488 /// This takes a \p LoopInfo representing a canonical loop, such as the one
489 /// created by \p createCanonicalLoop and emits additional instructions to
490 /// turn it into a workshare loop. In particular, it calls to an OpenMP
491 /// runtime function in the preheader to obtain the loop bounds to be used in
492 /// the current thread, updates the relevant instructions in the canonical
493 /// loop and calls to an OpenMP runtime finalization function after the loop.
494 ///
495 /// The concrete transformation is done by applyStaticWorkshareLoop,
496 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
497 /// on the value of \p SchedKind and \p ChunkSize.
498 ///
499 /// \param DL Debug location for instructions added for the
500 /// workshare-loop construct itself.
501 /// \param CLI A descriptor of the canonical loop to workshare.
502 /// \param AllocaIP An insertion point for Alloca instructions usable in the
503 /// preheader of the loop.
504 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
505 /// the loop.
506 /// \param SchedKind Scheduling algorithm to use.
507 /// \param ChunkSize The chunk size for the inner loop.
508 /// \param HasSimdModifier Whether the simd modifier is present in the
509 /// schedule clause.
510 /// \param HasMonotonicModifier Whether the monotonic modifier is present in
511 /// the schedule clause.
512 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
513 /// present in the schedule clause.
514 /// \param HasOrderedClause Whether the (parameterless) ordered clause is
515 /// present.
516 ///
517 /// \returns Point where to insert code after the workshare construct.
518 InsertPointTy applyWorkshareLoop(
519 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
520 bool NeedsBarrier,
521 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
522 Value *ChunkSize = nullptr, bool HasSimdModifier = false,
523 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
524 bool HasOrderedClause = false);
525
526 /// Tile a loop nest.
527 ///
528 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
529 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
530 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
531 /// of every loop and every tile sizes must be usable in the outermost
532 /// loop's preheader. This implies that the loop nest is rectangular.
533 ///
534 /// Example:
535 /// \code
536 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
537 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
538 /// body(i, j);
539 /// \endcode
540 ///
541 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
542 /// \code
543 /// for (int i1 = 0; i1 < 3; ++i1)
544 /// for (int j1 = 0; j1 < 2; ++j1)
545 /// for (int i2 = 0; i2 < 5; ++i2)
546 /// for (int j2 = 0; j2 < 7; ++j2)
547 /// body(i1*3+i2, j1*3+j2);
548 /// \endcode
549 ///
550 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
551 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
552 /// handles non-constant trip counts, non-constant tile sizes and trip counts
553 /// that are not multiples of the tile size. In the latter case the tile loop
554 /// of the last floor-loop iteration will have fewer iterations than specified
555 /// as its tile size.
556 ///
557 ///
558 /// @param DL Debug location for instructions added by tiling, for
559 /// instance the floor- and tile trip count computation.
560 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
561 /// invalidated by this method, i.e. should not used after
562 /// tiling.
563 /// @param TileSizes For each loop in \p Loops, the tile size for that
564 /// dimensions.
565 ///
566 /// \returns A list of generated loops. Contains twice as many loops as the
567 /// input loop nest; the first half are the floor loops and the
568 /// second half are the tile loops.
569 std::vector<CanonicalLoopInfo *>
570 tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
571 ArrayRef<Value *> TileSizes);
572
573 /// Fully unroll a loop.
574 ///
575 /// Instead of unrolling the loop immediately (and duplicating its body
576 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
577 /// metadata.
578 ///
579 /// \param DL Debug location for instructions added by unrolling.
580 /// \param Loop The loop to unroll. The loop will be invalidated.
581 void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
582
583 /// Fully or partially unroll a loop. How the loop is unrolled is determined
584 /// using LLVM's LoopUnrollPass.
585 ///
586 /// \param DL Debug location for instructions added by unrolling.
587 /// \param Loop The loop to unroll. The loop will be invalidated.
588 void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
589
590 /// Partially unroll a loop.
591 ///
592 /// The CanonicalLoopInfo of the unrolled loop for use with chained
593 /// loop-associated directive can be requested using \p UnrolledCLI. Not
594 /// needing the CanonicalLoopInfo allows more efficient code generation by
595 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
596 /// A loop-associated directive applied to the unrolled loop needs to know the
597 /// new trip count which means that if using a heuristically determined unroll
598 /// factor (\p Factor == 0), that factor must be computed immediately. We are
599 /// using the same logic as the LoopUnrollPass to derived the unroll factor,
600 /// but which assumes that some canonicalization has taken place (e.g.
601 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
602 /// better when the unrolled loop's CanonicalLoopInfo is not needed.
603 ///
604 /// \param DL Debug location for instructions added by unrolling.
605 /// \param Loop The loop to unroll. The loop will be invalidated.
606 /// \param Factor The factor to unroll the loop by. A factor of 0
607 /// indicates that a heuristic should be used to determine
608 /// the unroll-factor.
609 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
610 /// partially unrolled loop. Otherwise, uses loop metadata
611 /// to defer unrolling to the LoopUnrollPass.
612 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
613 CanonicalLoopInfo **UnrolledCLI);
614
615 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
616 /// is cloned. The metadata which prevents vectorization is added to
617 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
618 /// to false.
619 ///
620 /// \param Loop The loop to simd-ize.
621 /// \param IfCond The value which corresponds to the if clause condition.
622 /// \param Simdlen The Simdlen length to apply to the simd loop.
623 void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, ConstantInt *Simdlen);
624
625 /// Generator for '#omp flush'
626 ///
627 /// \param Loc The location where the flush directive was encountered
628 void createFlush(const LocationDescription &Loc);
629
630 /// Generator for '#omp taskwait'
631 ///
632 /// \param Loc The location where the taskwait directive was encountered.
633 void createTaskwait(const LocationDescription &Loc);
634
635 /// Generator for '#omp taskyield'
636 ///
637 /// \param Loc The location where the taskyield directive was encountered.
638 void createTaskyield(const LocationDescription &Loc);
639
640 /// Generator for `#omp task`
641 ///
642 /// \param Loc The location where the task construct was encountered.
643 /// \param AllocaIP The insertion point to be used for alloca instructions.
644 /// \param BodyGenCB Callback that will generate the region code.
645 /// \param Tied True if the task is tied, false if the task is untied.
646 /// \param Final i1 value which is `true` if the task is final, `false` if the
647 /// task is not final.
648 InsertPointTy createTask(const LocationDescription &Loc,
649 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
650 bool Tied = true, Value *Final = nullptr);
651
652 /// Generator for the taskgroup construct
653 ///
654 /// \param Loc The location where the taskgroup construct was encountered.
655 /// \param AllocaIP The insertion point to be used for alloca instructions.
656 /// \param BodyGenCB Callback that will generate the region code.
657 InsertPointTy createTaskgroup(const LocationDescription &Loc,
658 InsertPointTy AllocaIP,
659 BodyGenCallbackTy BodyGenCB);
660
661 /// Functions used to generate reductions. Such functions take two Values
662 /// representing LHS and RHS of the reduction, respectively, and a reference
663 /// to the value that is updated to refer to the reduction result.
664 using ReductionGenTy =
665 function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
666
667 /// Functions used to generate atomic reductions. Such functions take two
668 /// Values representing pointers to LHS and RHS of the reduction, as well as
669 /// the element type of these pointers. They are expected to atomically
670 /// update the LHS to the reduced value.
671 using AtomicReductionGenTy =
672 function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
673
674 /// Information about an OpenMP reduction.
675 struct ReductionInfo {
676 ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
677 ReductionGenTy ReductionGen,
678 AtomicReductionGenTy AtomicReductionGen)
679 : ElementType(ElementType), Variable(Variable),
680 PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
681 AtomicReductionGen(AtomicReductionGen) {
682 assert(cast<PointerType>(Variable->getType())
683 ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
684 }
685
686 /// Reduction element type, must match pointee type of variable.
687 Type *ElementType;
688
689 /// Reduction variable of pointer type.
690 Value *Variable;
691
692 /// Thread-private partial reduction variable.
693 Value *PrivateVariable;
694
695 /// Callback for generating the reduction body. The IR produced by this will
696 /// be used to combine two values in a thread-safe context, e.g., under
697 /// lock or within the same thread, and therefore need not be atomic.
698 ReductionGenTy ReductionGen;
699
700 /// Callback for generating the atomic reduction body, may be null. The IR
701 /// produced by this will be used to atomically combine two values during
702 /// reduction. If null, the implementation will use the non-atomic version
703 /// along with the appropriate synchronization mechanisms.
704 AtomicReductionGenTy AtomicReductionGen;
705 };
706
707 // TODO: provide atomic and non-atomic reduction generators for reduction
708 // operators defined by the OpenMP specification.
709
710 /// Generator for '#omp reduction'.
711 ///
712 /// Emits the IR instructing the runtime to perform the specific kind of
713 /// reductions. Expects reduction variables to have been privatized and
714 /// initialized to reduction-neutral values separately. Emits the calls to
715 /// runtime functions as well as the reduction function and the basic blocks
716 /// performing the reduction atomically and non-atomically.
717 ///
718 /// The code emitted for the following:
719 ///
720 /// \code
721 /// type var_1;
722 /// type var_2;
723 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
724 /// /* body */;
725 /// \endcode
726 ///
727 /// corresponds to the following sketch.
728 ///
729 /// \code
730 /// void _outlined_par() {
731 /// // N is the number of different reductions.
732 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
733 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
734 /// _omp_reduction_func,
735 /// _gomp_critical_user.reduction.var)) {
736 /// case 1: {
737 /// var_1 = var_1 <reduction-op> privatized_var_1;
738 /// var_2 = var_2 <reduction-op> privatized_var_2;
739 /// // ...
740 /// __kmpc_end_reduce(...);
741 /// break;
742 /// }
743 /// case 2: {
744 /// _Atomic<ReductionOp>(var_1, privatized_var_1);
745 /// _Atomic<ReductionOp>(var_2, privatized_var_2);
746 /// // ...
747 /// break;
748 /// }
749 /// default: break;
750 /// }
751 /// }
752 ///
753 /// void _omp_reduction_func(void **lhs, void **rhs) {
754 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
755 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
756 /// // ...
757 /// }
758 /// \endcode
759 ///
760 /// \param Loc The location where the reduction was
761 /// encountered. Must be within the associate
762 /// directive and after the last local access to the
763 /// reduction variables.
764 /// \param AllocaIP An insertion point suitable for allocas usable
765 /// in reductions.
766 /// \param ReductionInfos A list of info on each reduction variable.
767 /// \param IsNoWait A flag set if the reduction is marked as nowait.
768 InsertPointTy createReductions(const LocationDescription &Loc,
769 InsertPointTy AllocaIP,
770 ArrayRef<ReductionInfo> ReductionInfos,
771 bool IsNoWait = false);
772
773 ///}
774
775 /// Return the insertion point used by the underlying IRBuilder.
776 InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
777
778 /// Update the internal location to \p Loc.
779 bool updateToLocation(const LocationDescription &Loc) {
780 Builder.restoreIP(Loc.IP);
781 Builder.SetCurrentDebugLocation(Loc.DL);
782 return Loc.IP.getBlock() != nullptr;
783 }
784
785 /// Return the function declaration for the runtime function with \p FnID.
786 FunctionCallee getOrCreateRuntimeFunction(Module &M,
787 omp::RuntimeFunction FnID);
788
789 Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
790
791 /// Return the (LLVM-IR) string describing the source location \p LocStr.
792 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
793
794 /// Return the (LLVM-IR) string describing the default source location.
795 Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
796
797 /// Return the (LLVM-IR) string describing the source location identified by
798 /// the arguments.
799 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
800 unsigned Line, unsigned Column,
801 uint32_t &SrcLocStrSize);
802
803 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
804 /// fallback if \p DL does not specify the function name.
805 Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
806 Function *F = nullptr);
807
808 /// Return the (LLVM-IR) string describing the source location \p Loc.
809 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
810 uint32_t &SrcLocStrSize);
811
812 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
813 /// TODO: Create a enum class for the Reserve2Flags
814 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
815 omp::IdentFlag Flags = omp::IdentFlag(0),
816 unsigned Reserve2Flags = 0);
817
818 /// Create a hidden global flag \p Name in the module with initial value \p
819 /// Value.
820 GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
821
822 /// Create an offloading section struct used to register this global at
823 /// runtime.
824 ///
825 /// Type struct __tgt_offload_entry{
826 /// void *addr; // Pointer to the offload entry info.
827 /// // (function or global)
828 /// char *name; // Name of the function or global.
829 /// size_t size; // Size of the entry info (0 if it a function).
830 /// int32_t flags;
831 /// int32_t reserved;
832 /// };
833 ///
834 /// \param Addr The pointer to the global being registered.
835 /// \param Name The symbol name associated with the global.
836 /// \param Size The size in bytes of the global (0 for functions).
837 /// \param Flags Flags associated with the entry.
838 /// \param SectionName The section this entry will be placed at.
839 void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size,
840 int32_t Flags,
841 StringRef SectionName = "omp_offloading_entries");
842
843 /// Generate control flow and cleanup for cancellation.
844 ///
845 /// \param CancelFlag Flag indicating if the cancellation is performed.
846 /// \param CanceledDirective The kind of directive that is cancled.
847 /// \param ExitCB Extra code to be generated in the exit block.
848 void emitCancelationCheckImpl(Value *CancelFlag,
849 omp::Directive CanceledDirective,
850 FinalizeCallbackTy ExitCB = {});
851
852 /// Generate a target region entry call.
853 ///
854 /// \param Loc The location at which the request originated and is fulfilled.
855 /// \param Return Return value of the created function returned by reference.
856 /// \param DeviceID Identifier for the device via the 'device' clause.
857 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
858 /// or 0 if unspecified and -1 if there is no 'teams' clause.
859 /// \param NumThreads Number of threads via the 'thread_limit' clause.
860 /// \param HostPtr Pointer to the host-side pointer of the target kernel.
861 /// \param KernelArgs Array of arguments to the kernel.
862 /// \param NoWaitArgs Optional array of arguments to the nowait kernel.
863 InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return,
864 Value *Ident, Value *DeviceID, Value *NumTeams,
865 Value *NumThreads, Value *HostPtr,
866 ArrayRef<Value *> KernelArgs,
867 ArrayRef<Value *> NoWaitArgs = {});
868
869 /// Generate a barrier runtime call.
870 ///
871 /// \param Loc The location at which the request originated and is fulfilled.
872 /// \param DK The directive which caused the barrier
873 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
874 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
875 /// should be checked and acted upon.
876 ///
877 /// \returns The insertion point after the barrier.
878 InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
879 omp::Directive DK, bool ForceSimpleCall,
880 bool CheckCancelFlag);
881
882 /// Generate a flush runtime call.
883 ///
884 /// \param Loc The location at which the request originated and is fulfilled.
885 void emitFlush(const LocationDescription &Loc);
886
887 /// The finalization stack made up of finalize callbacks currently in-flight,
888 /// wrapped into FinalizationInfo objects that reference also the finalization
889 /// target block and the kind of cancellable directive.
890 SmallVector<FinalizationInfo, 8> FinalizationStack;
891
892 /// Return true if the last entry in the finalization stack is of kind \p DK
893 /// and cancellable.
894 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
895 return !FinalizationStack.empty() &&
896 FinalizationStack.back().IsCancellable &&
897 FinalizationStack.back().DK == DK;
898 }
899
900 /// Generate a taskwait runtime call.
901 ///
902 /// \param Loc The location at which the request originated and is fulfilled.
903 void emitTaskwaitImpl(const LocationDescription &Loc);
904
905 /// Generate a taskyield runtime call.
906 ///
907 /// \param Loc The location at which the request originated and is fulfilled.
908 void emitTaskyieldImpl(const LocationDescription &Loc);
909
910 /// Return the current thread ID.
911 ///
912 /// \param Ident The ident (ident_t*) describing the query origin.
913 Value *getOrCreateThreadID(Value *Ident);
914
915 /// The underlying LLVM-IR module
916 Module &M;
917
918 /// The LLVM-IR Builder used to create IR.
919 IRBuilder<> Builder;
920
921 /// Map to remember source location strings
922 StringMap<Constant *> SrcLocStrMap;
923
924 /// Map to remember existing ident_t*.
925 DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
926
927 /// Helper that contains information about regions we need to outline
928 /// during finalization.
929 struct OutlineInfo {
930 using PostOutlineCBTy = std::function<void(Function &)>;
931 PostOutlineCBTy PostOutlineCB;
932 BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
933 SmallVector<Value *, 2> ExcludeArgsFromAggregate;
934
935 /// Collect all blocks in between EntryBB and ExitBB in both the given
936 /// vector and set.
937 void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
938 SmallVectorImpl<BasicBlock *> &BlockVector);
939
940 /// Return the function that contains the region to be outlined.
941 Function *getFunction() const { return EntryBB->getParent(); }
942 };
943
944 /// Collection of regions that need to be outlined during finalization.
945 SmallVector<OutlineInfo, 16> OutlineInfos;
946
947 /// Collection of owned canonical loop objects that eventually need to be
948 /// free'd.
949 std::forward_list<CanonicalLoopInfo> LoopInfos;
950
951 /// Add a new region that will be outlined later.
952 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
953
954 /// An ordered map of auto-generated variables to their unique names.
955 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
956 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
957 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
958 /// variables.
959 StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars;
960
961 /// Create the global variable holding the offload mappings information.
962 GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
963 std::string VarName);
964
965 /// Create the global variable holding the offload names information.
966 GlobalVariable *
967 createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
968 std::string VarName);
969
970 struct MapperAllocas {
971 AllocaInst *ArgsBase = nullptr;
972 AllocaInst *Args = nullptr;
973 AllocaInst *ArgSizes = nullptr;
974 };
975
976 /// Create the allocas instruction used in call to mapper functions.
977 void createMapperAllocas(const LocationDescription &Loc,
978 InsertPointTy AllocaIP, unsigned NumOperands,
979 struct MapperAllocas &MapperAllocas);
980
981 /// Create the call for the target mapper function.
982 /// \param Loc The source location description.
983 /// \param MapperFunc Function to be called.
984 /// \param SrcLocInfo Source location information global.
985 /// \param MaptypesArg The argument types.
986 /// \param MapnamesArg The argument names.
987 /// \param MapperAllocas The AllocaInst used for the call.
988 /// \param DeviceID Device ID for the call.
989 /// \param NumOperands Number of operands in the call.
990 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
991 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
992 struct MapperAllocas &MapperAllocas, int64_t DeviceID,
993 unsigned NumOperands);
994
995public:
996 /// Generator for __kmpc_copyprivate
997 ///
998 /// \param Loc The source location description.
999 /// \param BufSize Number of elements in the buffer.
1000 /// \param CpyBuf List of pointers to data to be copied.
1001 /// \param CpyFn function to call for copying data.
1002 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1003 ///
1004 /// \return The insertion position *after* the CopyPrivate call.
1005
1006 InsertPointTy createCopyPrivate(const LocationDescription &Loc,
1007 llvm::Value *BufSize, llvm::Value *CpyBuf,
1008 llvm::Value *CpyFn, llvm::Value *DidIt);
1009
1010 /// Generator for '#omp single'
1011 ///
1012 /// \param Loc The source location description.
1013 /// \param BodyGenCB Callback that will generate the region code.
1014 /// \param FiniCB Callback to finalize variable copies.
1015 /// \param IsNowait If false, a barrier is emitted.
1016 /// \param DidIt Local variable used as a flag to indicate 'single' thread
1017 ///
1018 /// \returns The insertion position *after* the single call.
1019 InsertPointTy createSingle(const LocationDescription &Loc,
1020 BodyGenCallbackTy BodyGenCB,
1021 FinalizeCallbackTy FiniCB, bool IsNowait,
1022 llvm::Value *DidIt);
1023
1024 /// Generator for '#omp master'
1025 ///
1026 /// \param Loc The insert and source location description.
1027 /// \param BodyGenCB Callback that will generate the region code.
1028 /// \param FiniCB Callback to finalize variable copies.
1029 ///
1030 /// \returns The insertion position *after* the master.
1031 InsertPointTy createMaster(const LocationDescription &Loc,
1032 BodyGenCallbackTy BodyGenCB,
1033 FinalizeCallbackTy FiniCB);
1034
1035 /// Generator for '#omp masked'
1036 ///
1037 /// \param Loc The insert and source location description.
1038 /// \param BodyGenCB Callback that will generate the region code.
1039 /// \param FiniCB Callback to finialize variable copies.
1040 ///
1041 /// \returns The insertion position *after* the masked.
1042 InsertPointTy createMasked(const LocationDescription &Loc,
1043 BodyGenCallbackTy BodyGenCB,
1044 FinalizeCallbackTy FiniCB, Value *Filter);
1045
1046 /// Generator for '#omp critical'
1047 ///
1048 /// \param Loc The insert and source location description.
1049 /// \param BodyGenCB Callback that will generate the region body code.
1050 /// \param FiniCB Callback to finalize variable copies.
1051 /// \param CriticalName name of the lock used by the critical directive
1052 /// \param HintInst Hint Instruction for hint clause associated with critical
1053 ///
1054 /// \returns The insertion position *after* the critical.
1055 InsertPointTy createCritical(const LocationDescription &Loc,
1056 BodyGenCallbackTy BodyGenCB,
1057 FinalizeCallbackTy FiniCB,
1058 StringRef CriticalName, Value *HintInst);
1059
1060 /// Generator for '#omp ordered depend (source | sink)'
1061 ///
1062 /// \param Loc The insert and source location description.
1063 /// \param AllocaIP The insertion point to be used for alloca instructions.
1064 /// \param NumLoops The number of loops in depend clause.
1065 /// \param StoreValues The value will be stored in vector address.
1066 /// \param Name The name of alloca instruction.
1067 /// \param IsDependSource If true, depend source; otherwise, depend sink.
1068 ///
1069 /// \return The insertion position *after* the ordered.
1070 InsertPointTy createOrderedDepend(const LocationDescription &Loc,
1071 InsertPointTy AllocaIP, unsigned NumLoops,
1072 ArrayRef<llvm::Value *> StoreValues,
1073 const Twine &Name, bool IsDependSource);
1074
1075 /// Generator for '#omp ordered [threads | simd]'
1076 ///
1077 /// \param Loc The insert and source location description.
1078 /// \param BodyGenCB Callback that will generate the region code.
1079 /// \param FiniCB Callback to finalize variable copies.
1080 /// \param IsThreads If true, with threads clause or without clause;
1081 /// otherwise, with simd clause;
1082 ///
1083 /// \returns The insertion position *after* the ordered.
1084 InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
1085 BodyGenCallbackTy BodyGenCB,
1086 FinalizeCallbackTy FiniCB,
1087 bool IsThreads);
1088
1089 /// Generator for '#omp sections'
1090 ///
1091 /// \param Loc The insert and source location description.
1092 /// \param AllocaIP The insertion points to be used for alloca instructions.
1093 /// \param SectionCBs Callbacks that will generate body of each section.
1094 /// \param PrivCB Callback to copy a given variable (think copy constructor).
1095 /// \param FiniCB Callback to finalize variable copies.
1096 /// \param IsCancellable Flag to indicate a cancellable parallel region.
1097 /// \param IsNowait If true, barrier - to ensure all sections are executed
1098 /// before moving forward will not be generated.
1099 /// \returns The insertion position *after* the sections.
1100 InsertPointTy createSections(const LocationDescription &Loc,
1101 InsertPointTy AllocaIP,
1102 ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
1103 PrivatizeCallbackTy PrivCB,
1104 FinalizeCallbackTy FiniCB, bool IsCancellable,
1105 bool IsNowait);
1106
1107 /// Generator for '#omp section'
1108 ///
1109 /// \param Loc The insert and source location description.
1110 /// \param BodyGenCB Callback that will generate the region body code.
1111 /// \param FiniCB Callback to finalize variable copies.
1112 /// \returns The insertion position *after* the section.
1113 InsertPointTy createSection(const LocationDescription &Loc,
1114 BodyGenCallbackTy BodyGenCB,
1115 FinalizeCallbackTy FiniCB);
1116
1117 /// Generate conditional branch and relevant BasicBlocks through which private
1118 /// threads copy the 'copyin' variables from Master copy to threadprivate
1119 /// copies.
1120 ///
1121 /// \param IP insertion block for copyin conditional
1122 /// \param MasterVarPtr a pointer to the master variable
1123 /// \param PrivateVarPtr a pointer to the threadprivate variable
1124 /// \param IntPtrTy Pointer size type
1125 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1126 // and copy.in.end block
1127 ///
1128 /// \returns The insertion point where copying operation to be emitted.
1129 InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
1130 Value *PrivateAddr,
1131 llvm::IntegerType *IntPtrTy,
1132 bool BranchtoEnd = true);
1133
1134 /// Create a runtime call for kmpc_Alloc
1135 ///
1136 /// \param Loc The insert and source location description.
1137 /// \param Size Size of allocated memory space
1138 /// \param Allocator Allocator information instruction
1139 /// \param Name Name of call Instruction for OMP_alloc
1140 ///
1141 /// \returns CallInst to the OMP_Alloc call
1142 CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
1143 Value *Allocator, std::string Name = "");
1144
1145 /// Create a runtime call for kmpc_free
1146 ///
1147 /// \param Loc The insert and source location description.
1148 /// \param Addr Address of memory space to be freed
1149 /// \param Allocator Allocator information instruction
1150 /// \param Name Name of call Instruction for OMP_Free
1151 ///
1152 /// \returns CallInst to the OMP_Free call
1153 CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
1154 Value *Allocator, std::string Name = "");
1155
1156 /// Create a runtime call for kmpc_threadprivate_cached
1157 ///
1158 /// \param Loc The insert and source location description.
1159 /// \param Pointer pointer to data to be cached
1160 /// \param Size size of data to be cached
1161 /// \param Name Name of call Instruction for callinst
1162 ///
1163 /// \returns CallInst to the thread private cache call.
1164 CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
1165 llvm::Value *Pointer,
1166 llvm::ConstantInt *Size,
1167 const llvm::Twine &Name = Twine(""));
1168
1169 /// Create a runtime call for __tgt_interop_init
1170 ///
1171 /// \param Loc The insert and source location description.
1172 /// \param InteropVar variable to be allocated
1173 /// \param InteropType type of interop operation
1174 /// \param Device devide to which offloading will occur
1175 /// \param NumDependences number of dependence variables
1176 /// \param DependenceAddress pointer to dependence variables
1177 /// \param HaveNowaitClause does nowait clause exist
1178 ///
1179 /// \returns CallInst to the __tgt_interop_init call
1180 CallInst *createOMPInteropInit(const LocationDescription &Loc,
1181 Value *InteropVar,
1182 omp::OMPInteropType InteropType, Value *Device,
1183 Value *NumDependences,
1184 Value *DependenceAddress,
1185 bool HaveNowaitClause);
1186
1187 /// Create a runtime call for __tgt_interop_destroy
1188 ///
1189 /// \param Loc The insert and source location description.
1190 /// \param InteropVar variable to be allocated
1191 /// \param Device devide to which offloading will occur
1192 /// \param NumDependences number of dependence variables
1193 /// \param DependenceAddress pointer to dependence variables
1194 /// \param HaveNowaitClause does nowait clause exist
1195 ///
1196 /// \returns CallInst to the __tgt_interop_destroy call
1197 CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
1198 Value *InteropVar, Value *Device,
1199 Value *NumDependences,
1200 Value *DependenceAddress,
1201 bool HaveNowaitClause);
1202
1203 /// Create a runtime call for __tgt_interop_use
1204 ///
1205 /// \param Loc The insert and source location description.
1206 /// \param InteropVar variable to be allocated
1207 /// \param Device devide to which offloading will occur
1208 /// \param NumDependences number of dependence variables
1209 /// \param DependenceAddress pointer to dependence variables
1210 /// \param HaveNowaitClause does nowait clause exist
1211 ///
1212 /// \returns CallInst to the __tgt_interop_use call
1213 CallInst *createOMPInteropUse(const LocationDescription &Loc,
1214 Value *InteropVar, Value *Device,
1215 Value *NumDependences, Value *DependenceAddress,
1216 bool HaveNowaitClause);
1217
1218 /// The `omp target` interface
1219 ///
1220 /// For more information about the usage of this interface,
1221 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
1222 ///
1223 ///{
1224
1225 /// Create a runtime call for kmpc_target_init
1226 ///
1227 /// \param Loc The insert and source location description.
1228 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1229 /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
1230 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
1231 bool RequiresFullRuntime);
1232
1233 /// Create a runtime call for kmpc_target_deinit
1234 ///
1235 /// \param Loc The insert and source location description.
1236 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1237 /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
1238 void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
1239 bool RequiresFullRuntime);
1240
1241 ///}
1242
1243 /// Declarations for LLVM-IR types (simple, array, function and structure) are
1244 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
1245 /// we provide the declarations, the initializeTypes function will provide the
1246 /// values.
1247 ///
1248 ///{
1249#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
1250#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
1251 ArrayType *VarName##Ty = nullptr; \
1252 PointerType *VarName##PtrTy = nullptr;
1253#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
1254 FunctionType *VarName = nullptr; \
1255 PointerType *VarName##Ptr = nullptr;
1256#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
1257 StructType *VarName = nullptr; \
1258 PointerType *VarName##Ptr = nullptr;
1259#include "llvm/Frontend/OpenMP/OMPKinds.def"
1260
1261 ///}
1262
1263private:
1264 /// Create all simple and struct types exposed by the runtime and remember
1265 /// the llvm::PointerTypes of them for easy access later.
1266 void initializeTypes(Module &M);
1267
1268 /// Common interface for generating entry calls for OMP Directives.
1269 /// if the directive has a region/body, It will set the insertion
1270 /// point to the body
1271 ///
1272 /// \param OMPD Directive to generate entry blocks for
1273 /// \param EntryCall Call to the entry OMP Runtime Function
1274 /// \param ExitBB block where the region ends.
1275 /// \param Conditional indicate if the entry call result will be used
1276 /// to evaluate a conditional of whether a thread will execute
1277 /// body code or not.
1278 ///
1279 /// \return The insertion position in exit block
1280 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
1281 BasicBlock *ExitBB,
1282 bool Conditional = false);
1283
1284 /// Common interface to finalize the region
1285 ///
1286 /// \param OMPD Directive to generate exiting code for
1287 /// \param FinIP Insertion point for emitting Finalization code and exit call
1288 /// \param ExitCall Call to the ending OMP Runtime Function
1289 /// \param HasFinalize indicate if the directive will require finalization
1290 /// and has a finalization callback in the stack that
1291 /// should be called.
1292 ///
1293 /// \return The insertion position in exit block
1294 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
1295 InsertPointTy FinIP,
1296 Instruction *ExitCall,
1297 bool HasFinalize = true);
1298
1299 /// Common Interface to generate OMP inlined regions
1300 ///
1301 /// \param OMPD Directive to generate inlined region for
1302 /// \param EntryCall Call to the entry OMP Runtime Function
1303 /// \param ExitCall Call to the ending OMP Runtime Function
1304 /// \param BodyGenCB Body code generation callback.
1305 /// \param FiniCB Finalization Callback. Will be called when finalizing region
1306 /// \param Conditional indicate if the entry call result will be used
1307 /// to evaluate a conditional of whether a thread will execute
1308 /// body code or not.
1309 /// \param HasFinalize indicate if the directive will require finalization
1310 /// and has a finalization callback in the stack that
1311 /// should be called.
1312 /// \param IsCancellable if HasFinalize is set to true, indicate if the
1313 /// the directive should be cancellable.
1314 /// \return The insertion point after the region
1315
1316 InsertPointTy
1317 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
1318 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
1319 FinalizeCallbackTy FiniCB, bool Conditional = false,
1320 bool HasFinalize = true, bool IsCancellable = false);
1321
1322 /// Get the platform-specific name separator.
1323 /// \param Parts different parts of the final name that needs separation
1324 /// \param FirstSeparator First separator used between the initial two
1325 /// parts of the name.
1326 /// \param Separator separator used between all of the rest consecutive
1327 /// parts of the name
1328 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
1329 StringRef FirstSeparator,
1330 StringRef Separator);
1331
1332 /// Gets (if variable with the given name already exist) or creates
1333 /// internal global variable with the specified Name. The created variable has
1334 /// linkage CommonLinkage by default and is initialized by null value.
1335 /// \param Ty Type of the global variable. If it is exist already the type
1336 /// must be the same.
1337 /// \param Name Name of the variable.
1338 Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name,
1339 unsigned AddressSpace = 0);
1340
1341 /// Returns corresponding lock object for the specified critical region
1342 /// name. If the lock object does not exist it is created, otherwise the
1343 /// reference to the existing copy is returned.
1344 /// \param CriticalName Name of the critical region.
1345 ///
1346 Value *getOMPCriticalRegionLock(StringRef CriticalName);
1347
1348 /// Callback type for Atomic Expression update
1349 /// ex:
1350 /// \code{.cpp}
1351 /// unsigned x = 0;
1352 /// #pragma omp atomic update
1353 /// x = Expr(x_old); //Expr() is any legal operation
1354 /// \endcode
1355 ///
1356 /// \param XOld the value of the atomic memory address to use for update
1357 /// \param IRB reference to the IRBuilder to use
1358 ///
1359 /// \returns Value to update X to.
1360 using AtomicUpdateCallbackTy =
1361 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
1362
1363private:
1364 enum AtomicKind { Read, Write, Update, Capture, Compare };
1365
1366 /// Determine whether to emit flush or not
1367 ///
1368 /// \param Loc The insert and source location description.
1369 /// \param AO The required atomic ordering
1370 /// \param AK The OpenMP atomic operation kind used.
1371 ///
1372 /// \returns wether a flush was emitted or not
1373 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
1374 AtomicOrdering AO, AtomicKind AK);
1375
1376 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
1377 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
1378 /// Only Scalar data types.
1379 ///
1380 /// \param AllocaIP The insertion point to be used for alloca
1381 /// instructions.
1382 /// \param X The target atomic pointer to be updated
1383 /// \param XElemTy The element type of the atomic pointer.
1384 /// \param Expr The value to update X with.
1385 /// \param AO Atomic ordering of the generated atomic
1386 /// instructions.
1387 /// \param RMWOp The binary operation used for update. If
1388 /// operation is not supported by atomicRMW,
1389 /// or belong to {FADD, FSUB, BAD_BINOP}.
1390 /// Then a `cmpExch` based atomic will be generated.
1391 /// \param UpdateOp Code generator for complex expressions that cannot be
1392 /// expressed through atomicrmw instruction.
1393 /// \param VolatileX true if \a X volatile?
1394 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
1395 /// update expression, false otherwise.
1396 /// (e.g. true for X = X BinOp Expr)
1397 ///
1398 /// \returns A pair of the old value of X before the update, and the value
1399 /// used for the update.
1400 std::pair<Value *, Value *>
1401 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
1402 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1403 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
1404 bool IsXBinopExpr);
1405
1406 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
1407 ///
1408 /// \Return The instruction
1409 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
1410 AtomicRMWInst::BinOp RMWOp);
1411
1412public:
1413 /// a struct to pack relevant information while generating atomic Ops
1414 struct AtomicOpValue {
1415 Value *Var = nullptr;
1416 Type *ElemTy = nullptr;
1417 bool IsSigned = false;
1418 bool IsVolatile = false;
1419 };
1420
1421 /// Emit atomic Read for : V = X --- Only Scalar data types.
1422 ///
1423 /// \param Loc The insert and source location description.
1424 /// \param X The target pointer to be atomically read
1425 /// \param V Memory address where to store atomically read
1426 /// value
1427 /// \param AO Atomic ordering of the generated atomic
1428 /// instructions.
1429 ///
1430 /// \return Insertion point after generated atomic read IR.
1431 InsertPointTy createAtomicRead(const LocationDescription &Loc,
1432 AtomicOpValue &X, AtomicOpValue &V,
1433 AtomicOrdering AO);
1434
1435 /// Emit atomic write for : X = Expr --- Only Scalar data types.
1436 ///
1437 /// \param Loc The insert and source location description.
1438 /// \param X The target pointer to be atomically written to
1439 /// \param Expr The value to store.
1440 /// \param AO Atomic ordering of the generated atomic
1441 /// instructions.
1442 ///
1443 /// \return Insertion point after generated atomic Write IR.
1444 InsertPointTy createAtomicWrite(const LocationDescription &Loc,
1445 AtomicOpValue &X, Value *Expr,
1446 AtomicOrdering AO);
1447
1448 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
1449 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
1450 /// Only Scalar data types.
1451 ///
1452 /// \param Loc The insert and source location description.
1453 /// \param AllocaIP The insertion point to be used for alloca instructions.
1454 /// \param X The target atomic pointer to be updated
1455 /// \param Expr The value to update X with.
1456 /// \param AO Atomic ordering of the generated atomic instructions.
1457 /// \param RMWOp The binary operation used for update. If operation
1458 /// is not supported by atomicRMW, or belong to
1459 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
1460 /// atomic will be generated.
1461 /// \param UpdateOp Code generator for complex expressions that cannot be
1462 /// expressed through atomicrmw instruction.
1463 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
1464 /// update expression, false otherwise.
1465 /// (e.g. true for X = X BinOp Expr)
1466 ///
1467 /// \return Insertion point after generated atomic update IR.
1468 InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
1469 InsertPointTy AllocaIP, AtomicOpValue &X,
1470 Value *Expr, AtomicOrdering AO,
1471 AtomicRMWInst::BinOp RMWOp,
1472 AtomicUpdateCallbackTy &UpdateOp,
1473 bool IsXBinopExpr);
1474
1475 /// Emit atomic update for constructs: --- Only Scalar data types
1476 /// V = X; X = X BinOp Expr ,
1477 /// X = X BinOp Expr; V = X,
1478 /// V = X; X = Expr BinOp X,
1479 /// X = Expr BinOp X; V = X,
1480 /// V = X; X = UpdateOp(X),
1481 /// X = UpdateOp(X); V = X,
1482 ///
1483 /// \param Loc The insert and source location description.
1484 /// \param AllocaIP The insertion point to be used for alloca instructions.
1485 /// \param X The target atomic pointer to be updated
1486 /// \param V Memory address where to store captured value
1487 /// \param Expr The value to update X with.
1488 /// \param AO Atomic ordering of the generated atomic instructions
1489 /// \param RMWOp The binary operation used for update. If
1490 /// operation is not supported by atomicRMW, or belong to
1491 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
1492 /// atomic will be generated.
1493 /// \param UpdateOp Code generator for complex expressions that cannot be
1494 /// expressed through atomicrmw instruction.
1495 /// \param UpdateExpr true if X is an in place update of the form
1496 /// X = X BinOp Expr or X = Expr BinOp X
1497 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
1498 /// update expression, false otherwise.
1499 /// (e.g. true for X = X BinOp Expr)
1500 /// \param IsPostfixUpdate true if original value of 'x' must be stored in
1501 /// 'v', not an updated one.
1502 ///
1503 /// \return Insertion point after generated atomic capture IR.
1504 InsertPointTy
1505 createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
1506 AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
1507 AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1508 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
1509 bool IsPostfixUpdate, bool IsXBinopExpr);
1510
1511 /// Emit atomic compare for constructs: --- Only scalar data types
1512 /// cond-expr-stmt:
1513 /// x = x ordop expr ? expr : x;
1514 /// x = expr ordop x ? expr : x;
1515 /// x = x == e ? d : x;
1516 /// x = e == x ? d : x; (this one is not in the spec)
1517 /// cond-update-stmt:
1518 /// if (x ordop expr) { x = expr; }
1519 /// if (expr ordop x) { x = expr; }
1520 /// if (x == e) { x = d; }
1521 /// if (e == x) { x = d; } (this one is not in the spec)
1522 /// conditional-update-capture-atomic:
1523 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
1524 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
1525 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
1526 /// IsFailOnly=true)
1527 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
1528 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
1529 /// IsFailOnly=true)
1530 ///
1531 /// \param Loc The insert and source location description.
1532 /// \param X The target atomic pointer to be updated.
1533 /// \param V Memory address where to store captured value (for
1534 /// compare capture only).
1535 /// \param R Memory address where to store comparison result
1536 /// (for compare capture with '==' only).
1537 /// \param E The expected value ('e') for forms that use an
1538 /// equality comparison or an expression ('expr') for
1539 /// forms that use 'ordop' (logically an atomic maximum or
1540 /// minimum).
1541 /// \param D The desired value for forms that use an equality
1542 /// comparison. If forms that use 'ordop', it should be
1543 /// \p nullptr.
1544 /// \param AO Atomic ordering of the generated atomic instructions.
1545 /// \param Op Atomic compare operation. It can only be ==, <, or >.
1546 /// \param IsXBinopExpr True if the conditional statement is in the form where
1547 /// x is on LHS. It only matters for < or >.
1548 /// \param IsPostfixUpdate True if original value of 'x' must be stored in
1549 /// 'v', not an updated one (for compare capture
1550 /// only).
1551 /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
1552 /// only when the comparison fails. This is only valid for
1553 /// the case the comparison is '=='.
1554 ///
1555 /// \return Insertion point after generated atomic capture IR.
1556 InsertPointTy
1557 createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
1558 AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
1559 AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
1560 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
1561
1562 /// Create the control flow structure of a canonical OpenMP loop.
1563 ///
1564 /// The emitted loop will be disconnected, i.e. no edge to the loop's
1565 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
1566 /// IRBuilder location is not preserved.
1567 ///
1568 /// \param DL DebugLoc used for the instructions in the skeleton.
1569 /// \param TripCount Value to be used for the trip count.
1570 /// \param F Function in which to insert the BasicBlocks.
1571 /// \param PreInsertBefore Where to insert BBs that execute before the body,
1572 /// typically the body itself.
1573 /// \param PostInsertBefore Where to insert BBs that execute after the body.
1574 /// \param Name Base name used to derive BB
1575 /// and instruction names.
1576 ///
1577 /// \returns The CanonicalLoopInfo that represents the emitted loop.
1578 CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
1579 Function *F,
1580 BasicBlock *PreInsertBefore,
1581 BasicBlock *PostInsertBefore,
1582 const Twine &Name = {});
1583};
1584
1585/// Class to represented the control flow structure of an OpenMP canonical loop.
1586///
1587/// The control-flow structure is standardized for easy consumption by
1588/// directives associated with loops. For instance, the worksharing-loop
1589/// construct may change this control flow such that each loop iteration is
1590/// executed on only one thread. The constraints of a canonical loop in brief
1591/// are:
1592///
1593/// * The number of loop iterations must have been computed before entering the
1594/// loop.
1595///
1596/// * Has an (unsigned) logical induction variable that starts at zero and
1597/// increments by one.
1598///
1599/// * The loop's CFG itself has no side-effects. The OpenMP specification
1600/// itself allows side-effects, but the order in which they happen, including
1601/// how often or whether at all, is unspecified. We expect that the frontend
1602/// will emit those side-effect instructions somewhere (e.g. before the loop)
1603/// such that the CanonicalLoopInfo itself can be side-effect free.
1604///
1605/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
1606/// execution of a loop body that satifies these constraints. It does NOT
1607/// represent arbitrary SESE regions that happen to contain a loop. Do not use
1608/// CanonicalLoopInfo for such purposes.
1609///
1610/// The control flow can be described as follows:
1611///
1612/// Preheader
1613/// |
1614/// /-> Header
1615/// | |
1616/// | Cond---\
1617/// | | |
1618/// | Body |
1619/// | | | |
1620/// | <...> |
1621/// | | | |
1622/// \--Latch |
1623/// |
1624/// Exit
1625/// |
1626/// After
1627///
1628/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
1629/// including) and end at AfterIP (at the After's first instruction, excluding).
1630/// That is, instructions in the Preheader and After blocks (except the
1631/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
1632/// side-effects. Typically, the Preheader is used to compute the loop's trip
1633/// count. The instructions from BodyIP (at the Body block's first instruction,
1634/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
1635/// control and thus can have side-effects. The body block is the single entry
1636/// point into the loop body, which may contain arbitrary control flow as long
1637/// as all control paths eventually branch to the Latch block.
1638///
1639/// TODO: Consider adding another standardized BasicBlock between Body CFG and
1640/// Latch to guarantee that there is only a single edge to the latch. It would
1641/// make loop transformations easier to not needing to consider multiple
1642/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
1643/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
1644/// executes after each body iteration.
1645///
1646/// There must be no loop-carried dependencies through llvm::Values. This is
1647/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
1648/// for the induction variable.
1649///
1650/// All code in Header, Cond, Latch and Exit (plus the terminator of the
1651/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
1652/// by assertOK(). They are expected to not be modified unless explicitly
1653/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
1654/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
1655/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
1656/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
1657/// anymore as its underlying control flow may not exist anymore.
1658/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
1659/// may also return a new CanonicalLoopInfo that can be passed to other
1660/// loop-associated construct implementing methods. These loop-transforming
1661/// methods may either create a new CanonicalLoopInfo usually using
1662/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
1663/// modify one of the input CanonicalLoopInfo and return it as representing the
1664/// modified loop. What is done is an implementation detail of
1665/// transformation-implementing method and callers should always assume that the
1666/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
1667/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
1668/// created by createCanonicalLoop, such that transforming methods do not have
1669/// to special case where the CanonicalLoopInfo originated from.
1670///
1671/// Generally, methods consuming CanonicalLoopInfo do not need an
1672/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
1673/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
1674/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
1675/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
1676/// any InsertPoint in the Preheader, After or Block can still be used after
1677/// calling such a method.
1678///
1679/// TODO: Provide mechanisms for exception handling and cancellation points.
1680///
1681/// Defined outside OpenMPIRBuilder because nested classes cannot be
1682/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
1683class CanonicalLoopInfo {
1684 friend class OpenMPIRBuilder;
1685
1686private:
1687 BasicBlock *Header = nullptr;
1688 BasicBlock *Cond = nullptr;
1689 BasicBlock *Latch = nullptr;
1690 BasicBlock *Exit = nullptr;
1691
1692 /// Add the control blocks of this loop to \p BBs.
1693 ///
1694 /// This does not include any block from the body, including the one returned
1695 /// by getBody().
1696 ///
1697 /// FIXME: This currently includes the Preheader and After blocks even though
1698 /// their content is (mostly) not under CanonicalLoopInfo's control.
1699 /// Re-evaluated whether this makes sense.
1700 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
1701
1702 /// Sets the number of loop iterations to the given value. This value must be
1703 /// valid in the condition block (i.e., defined in the preheader) and is
1704 /// interpreted as an unsigned integer.
1705 void setTripCount(Value *TripCount);
1706
1707 /// Replace all uses of the canonical induction variable in the loop body with
1708 /// a new one.
1709 ///
1710 /// The intended use case is to update the induction variable for an updated
1711 /// iteration space such that it can stay normalized in the 0...tripcount-1
1712 /// range.
1713 ///
1714 /// The \p Updater is called with the (presumable updated) current normalized
1715 /// induction variable and is expected to return the value that uses of the
1716 /// pre-updated induction values should use instead, typically dependent on
1717 /// the new induction variable. This is a lambda (instead of e.g. just passing
1718 /// the new value) to be able to distinguish the uses of the pre-updated
1719 /// induction variable and uses of the induction varible to compute the
1720 /// updated induction variable value.
1721 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
1722
1723public:
1724 /// Returns whether this object currently represents the IR of a loop. If
1725 /// returning false, it may have been consumed by a loop transformation or not
1726 /// been intialized. Do not use in this case;
1727 bool isValid() const { return Header; }
1728
1729 /// The preheader ensures that there is only a single edge entering the loop.
1730 /// Code that must be execute before any loop iteration can be emitted here,
1731 /// such as computing the loop trip count and begin lifetime markers. Code in
1732 /// the preheader is not considered part of the canonical loop.
1733 BasicBlock *getPreheader() const;
1734
1735 /// The header is the entry for each iteration. In the canonical control flow,
1736 /// it only contains the PHINode for the induction variable.
1737 BasicBlock *getHeader() const {
1738 assert(isValid() && "Requires a valid canonical loop");
1739 return Header;
1740 }
1741
1742 /// The condition block computes whether there is another loop iteration. If
1743 /// yes, branches to the body; otherwise to the exit block.
1744 BasicBlock *getCond() const {
1745 assert(isValid() && "Requires a valid canonical loop");
1746 return Cond;
1747 }
1748
1749 /// The body block is the single entry for a loop iteration and not controlled
1750 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
1751 /// eventually branch to the \p Latch block.
1752 BasicBlock *getBody() const {
1753 assert(isValid() && "Requires a valid canonical loop");
1754 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
1755 }
1756
1757 /// Reaching the latch indicates the end of the loop body code. In the
1758 /// canonical control flow, it only contains the increment of the induction
1759 /// variable.
1760 BasicBlock *getLatch() const {
1761 assert(isValid() && "Requires a valid canonical loop");
1762 return Latch;
1763 }
1764
1765 /// Reaching the exit indicates no more iterations are being executed.
1766 BasicBlock *getExit() const {
1767 assert(isValid() && "Requires a valid canonical loop");
1768 return Exit;
1769 }
1770
1771 /// The after block is intended for clean-up code such as lifetime end
1772 /// markers. It is separate from the exit block to ensure, analogous to the
1773 /// preheader, it having just a single entry edge and being free from PHI
1774 /// nodes should there be multiple loop exits (such as from break
1775 /// statements/cancellations).
1776 BasicBlock *getAfter() const {
1777 assert(isValid() && "Requires a valid canonical loop");
1778 return Exit->getSingleSuccessor();
1779 }
1780
1781 /// Returns the llvm::Value containing the number of loop iterations. It must
1782 /// be valid in the preheader and always interpreted as an unsigned integer of
1783 /// any bit-width.
1784 Value *getTripCount() const {
1785 assert(isValid() && "Requires a valid canonical loop");
1786 Instruction *CmpI = &Cond->front();
1787 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1788 return CmpI->getOperand(1);
1789 }
1790
1791 /// Returns the instruction representing the current logical induction
1792 /// variable. Always unsigned, always starting at 0 with an increment of one.
1793 Instruction *getIndVar() const {
1794 assert(isValid() && "Requires a valid canonical loop");
1795 Instruction *IndVarPHI = &Header->front();
1796 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
1797 return IndVarPHI;
1798 }
1799
1800 /// Return the type of the induction variable (and the trip count).
1801 Type *getIndVarType() const {
1802 assert(isValid() && "Requires a valid canonical loop");
1803 return getIndVar()->getType();
1804 }
1805
1806 /// Return the insertion point for user code before the loop.
1807 OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
1808 assert(isValid() && "Requires a valid canonical loop");
1809 BasicBlock *Preheader = getPreheader();
1810 return {Preheader, std::prev(Preheader->end())};
1811 };
1812
1813 /// Return the insertion point for user code in the body.
1814 OpenMPIRBuilder::InsertPointTy getBodyIP() const {
1815 assert(isValid() && "Requires a valid canonical loop");
1816 BasicBlock *Body = getBody();
1817 return {Body, Body->begin()};
1818 };
1819
1820 /// Return the insertion point for user code after the loop.
1821 OpenMPIRBuilder::InsertPointTy getAfterIP() const {
1822 assert(isValid() && "Requires a valid canonical loop");
1823 BasicBlock *After = getAfter();
1824 return {After, After->begin()};
1825 };
1826
1827 Function *getFunction() const {
1828 assert(isValid() && "Requires a valid canonical loop");
1829 return Header->getParent();
1830 }
1831
1832 /// Consistency self-check.
1833 void assertOK() const;
1834
1835 /// Invalidate this loop. That is, the underlying IR does not fulfill the
1836 /// requirements of an OpenMP canonical loop anymore.
1837 void invalidate();
1838};
1839
1840} // end namespace llvm
1841
1842#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
1843

source code of llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h