1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5Copyright (C) 2005-2023 Free Software Foundation, Inc.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "alloc-pool.h"
56#include "symbol-summary.h"
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
59#include "stringpool.h"
60#include "attribs.h"
61#include "tree-eh.h"
62#include "opts.h"
63
64/* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
67
68struct omp_region
69{
70 /* The enclosing region. */
71 struct omp_region *outer;
72
73 /* First child region. */
74 struct omp_region *inner;
75
76 /* Next peer region. */
77 struct omp_region *next;
78
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
81
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
84
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
87
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
92
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
95
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
98
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
101
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
104
105 /* Copy of fd.lastprivate_conditional != 0. */
106 bool has_lastprivate_conditional;
107
108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 a depend clause. */
110 gomp_ordered *ord_stmt;
111};
112
113static struct omp_region *root_omp_region;
114static bool omp_any_child_fn_dumped;
115
116static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 bool = false);
118static gphi *find_phi_with_arg_on_edge (tree, edge);
119static void expand_omp (struct omp_region *region);
120
121/* Return true if REGION is a combined parallel+workshare region. */
122
123static inline bool
124is_combined_parallel (struct omp_region *region)
125{
126 return region->is_combined_parallel;
127}
128
129/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130 is the immediate dominator of PAR_ENTRY_BB, return true if there
131 are no data dependencies that would prevent expanding the parallel
132 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133
134 When expanding a combined parallel+workshare region, the call to
135 the child function may need additional arguments in the case of
136 GIMPLE_OMP_FOR regions. In some cases, these arguments are
137 computed out of variables passed in from the parent to the child
138 via 'struct .omp_data_s'. For instance:
139
140 #pragma omp parallel for schedule (guided, i * 4)
141 for (j ...)
142
143 Is lowered into:
144
145 # BLOCK 2 (PAR_ENTRY_BB)
146 .omp_data_o.i = i;
147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148
149 # BLOCK 3 (WS_ENTRY_BB)
150 .omp_data_i = &.omp_data_o;
151 D.1667 = .omp_data_i->i;
152 D.1598 = D.1667 * 4;
153 #pragma omp for schedule (guided, D.1598)
154
155 When we outline the parallel region, the call to the child function
156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157 that value is computed *after* the call site. So, in principle we
158 cannot do the transformation.
159
160 To see whether the code in WS_ENTRY_BB blocks the combined
161 parallel+workshare call, we collect all the variables used in the
162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
164 call.
165
166 FIXME. If we had the SSA form built at this point, we could merely
167 hoist the code in block 3 into block 2 and be done with it. But at
168 this point we don't have dataflow information and though we could
169 hack something up here, it is really not worth the aggravation. */
170
171static bool
172workshare_safe_to_combine_p (basic_block ws_entry_bb)
173{
174 struct omp_for_data fd;
175 gimple *ws_stmt = last_nondebug_stmt (ws_entry_bb);
176
177 if (gimple_code (g: ws_stmt) == GIMPLE_OMP_SECTIONS)
178 return true;
179
180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181 if (gimple_omp_for_kind (g: ws_stmt) != GF_OMP_FOR_KIND_FOR)
182 return false;
183
184 omp_extract_for_data (for_stmt: as_a <gomp_for *> (p: ws_stmt), fd: &fd, NULL);
185
186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187 return false;
188 if (fd.iter_type != long_integer_type_node)
189 return false;
190
191 /* FIXME. We give up too easily here. If any of these arguments
192 are not constants, they will likely involve variables that have
193 been mapped into fields of .omp_data_s for sharing with the child
194 function. With appropriate data flow, it would be possible to
195 see through this. */
196 if (!is_gimple_min_invariant (fd.loop.n1)
197 || !is_gimple_min_invariant (fd.loop.n2)
198 || !is_gimple_min_invariant (fd.loop.step)
199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200 return false;
201
202 return true;
203}
204
205/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206 presence (SIMD_SCHEDULE). */
207
208static tree
209omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210{
211 if (!simd_schedule || integer_zerop (chunk_size))
212 return chunk_size;
213
214 poly_uint64 vf = omp_max_vf ();
215 if (known_eq (vf, 1U))
216 return chunk_size;
217
218 tree type = TREE_TYPE (chunk_size);
219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220 build_int_cst (type, vf - 1));
221 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222 build_int_cst (type, -vf));
223}
224
225/* Collect additional arguments needed to emit a combined
226 parallel+workshare call. WS_STMT is the workshare directive being
227 expanded. */
228
229static vec<tree, va_gc> *
230get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231{
232 tree t;
233 location_t loc = gimple_location (g: ws_stmt);
234 vec<tree, va_gc> *ws_args;
235
236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (p: ws_stmt))
237 {
238 struct omp_for_data fd;
239 tree n1, n2;
240
241 omp_extract_for_data (for_stmt, fd: &fd, NULL);
242 n1 = fd.loop.n1;
243 n2 = fd.loop.n2;
244
245 if (gimple_omp_for_combined_into_p (g: for_stmt))
246 {
247 tree innerc
248 = omp_find_clause (clauses: gimple_omp_parallel_clauses (gs: par_stmt),
249 kind: OMP_CLAUSE__LOOPTEMP_);
250 gcc_assert (innerc);
251 n1 = OMP_CLAUSE_DECL (innerc);
252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253 kind: OMP_CLAUSE__LOOPTEMP_);
254 gcc_assert (innerc);
255 n2 = OMP_CLAUSE_DECL (innerc);
256 }
257
258 vec_alloc (v&: ws_args, nelems: 3 + (fd.chunk_size != 0));
259
260 t = fold_convert_loc (loc, long_integer_type_node, n1);
261 ws_args->quick_push (obj: t);
262
263 t = fold_convert_loc (loc, long_integer_type_node, n2);
264 ws_args->quick_push (obj: t);
265
266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267 ws_args->quick_push (obj: t);
268
269 if (fd.chunk_size)
270 {
271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272 t = omp_adjust_chunk_size (chunk_size: t, simd_schedule: fd.simd_schedule);
273 ws_args->quick_push (obj: t);
274 }
275
276 return ws_args;
277 }
278 else if (gimple_code (g: ws_stmt) == GIMPLE_OMP_SECTIONS)
279 {
280 /* Number of sections is equal to the number of edges from the
281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282 the exit of the sections region. */
283 basic_block bb = single_succ (bb: gimple_bb (g: ws_stmt));
284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285 vec_alloc (v&: ws_args, nelems: 1);
286 ws_args->quick_push (obj: t);
287 return ws_args;
288 }
289
290 gcc_unreachable ();
291}
292
293/* Discover whether REGION is a combined parallel+workshare region. */
294
295static void
296determine_parallel_type (struct omp_region *region)
297{
298 basic_block par_entry_bb, par_exit_bb;
299 basic_block ws_entry_bb, ws_exit_bb;
300
301 if (region == NULL || region->inner == NULL
302 || region->exit == NULL || region->inner->exit == NULL
303 || region->inner->cont == NULL)
304 return;
305
306 /* We only support parallel+for and parallel+sections. */
307 if (region->type != GIMPLE_OMP_PARALLEL
308 || (region->inner->type != GIMPLE_OMP_FOR
309 && region->inner->type != GIMPLE_OMP_SECTIONS))
310 return;
311
312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313 WS_EXIT_BB -> PAR_EXIT_BB. */
314 par_entry_bb = region->entry;
315 par_exit_bb = region->exit;
316 ws_entry_bb = region->inner->entry;
317 ws_exit_bb = region->inner->exit;
318
319 /* Give up for task reductions on the parallel, while it is implementable,
320 adding another big set of APIs or slowing down the normal paths is
321 not acceptable. */
322 tree pclauses
323 = gimple_omp_parallel_clauses (gs: last_nondebug_stmt (par_entry_bb));
324 if (omp_find_clause (clauses: pclauses, kind: OMP_CLAUSE__REDUCTEMP_))
325 return;
326
327 if (single_succ (bb: par_entry_bb) == ws_entry_bb
328 && single_succ (bb: ws_exit_bb) == par_exit_bb
329 && workshare_safe_to_combine_p (ws_entry_bb)
330 && (gimple_omp_parallel_combined_p (g: last_nondebug_stmt (par_entry_bb))
331 || (last_and_only_stmt (ws_entry_bb)
332 && last_and_only_stmt (par_exit_bb))))
333 {
334 gimple *par_stmt = last_nondebug_stmt (par_entry_bb);
335 gimple *ws_stmt = last_nondebug_stmt (ws_entry_bb);
336
337 if (region->inner->type == GIMPLE_OMP_FOR)
338 {
339 /* If this is a combined parallel loop, we need to determine
340 whether or not to use the combined library calls. There
341 are two cases where we do not apply the transformation:
342 static loops and any kind of ordered loop. In the first
343 case, we already open code the loop so there is no need
344 to do anything else. In the latter case, the combined
345 parallel loop call would still need extra synchronization
346 to implement ordered semantics, so there would not be any
347 gain in using the combined call. */
348 tree clauses = gimple_omp_for_clauses (gs: ws_stmt);
349 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE_SCHEDULE);
350 if (c == NULL
351 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
352 == OMP_CLAUSE_SCHEDULE_STATIC)
353 || omp_find_clause (clauses, kind: OMP_CLAUSE_ORDERED)
354 || omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_)
355 || ((c = omp_find_clause (clauses, kind: OMP_CLAUSE__CONDTEMP_))
356 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
357 return;
358 }
359 else if (region->inner->type == GIMPLE_OMP_SECTIONS
360 && (omp_find_clause (clauses: gimple_omp_sections_clauses (gs: ws_stmt),
361 kind: OMP_CLAUSE__REDUCTEMP_)
362 || omp_find_clause (clauses: gimple_omp_sections_clauses (gs: ws_stmt),
363 kind: OMP_CLAUSE__CONDTEMP_)))
364 return;
365
366 region->is_combined_parallel = true;
367 region->inner->is_combined_parallel = true;
368 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
369 }
370}
371
372/* Debugging dumps for parallel regions. */
373void dump_omp_region (FILE *, struct omp_region *, int);
374void debug_omp_region (struct omp_region *);
375void debug_all_omp_regions (void);
376
377/* Dump the parallel region tree rooted at REGION. */
378
379void
380dump_omp_region (FILE *file, struct omp_region *region, int indent)
381{
382 fprintf (stream: file, format: "%*sbb %d: %s\n", indent, "", region->entry->index,
383 gimple_code_name[region->type]);
384
385 if (region->inner)
386 dump_omp_region (file, region: region->inner, indent: indent + 4);
387
388 if (region->cont)
389 {
390 fprintf (stream: file, format: "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
391 region->cont->index);
392 }
393
394 if (region->exit)
395 fprintf (stream: file, format: "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
396 region->exit->index);
397 else
398 fprintf (stream: file, format: "%*s[no exit marker]\n", indent, "");
399
400 if (region->next)
401 dump_omp_region (file, region: region->next, indent);
402}
403
404DEBUG_FUNCTION void
405debug_omp_region (struct omp_region *region)
406{
407 dump_omp_region (stderr, region, indent: 0);
408}
409
410DEBUG_FUNCTION void
411debug_all_omp_regions (void)
412{
413 dump_omp_region (stderr, region: root_omp_region, indent: 0);
414}
415
416/* Create a new parallel region starting at STMT inside region PARENT. */
417
418static struct omp_region *
419new_omp_region (basic_block bb, enum gimple_code type,
420 struct omp_region *parent)
421{
422 struct omp_region *region = XCNEW (struct omp_region);
423
424 region->outer = parent;
425 region->entry = bb;
426 region->type = type;
427
428 if (parent)
429 {
430 /* This is a nested region. Add it to the list of inner
431 regions in PARENT. */
432 region->next = parent->inner;
433 parent->inner = region;
434 }
435 else
436 {
437 /* This is a toplevel region. Add it to the list of toplevel
438 regions in ROOT_OMP_REGION. */
439 region->next = root_omp_region;
440 root_omp_region = region;
441 }
442
443 return region;
444}
445
446/* Release the memory associated with the region tree rooted at REGION. */
447
448static void
449free_omp_region_1 (struct omp_region *region)
450{
451 struct omp_region *i, *n;
452
453 for (i = region->inner; i ; i = n)
454 {
455 n = i->next;
456 free_omp_region_1 (region: i);
457 }
458
459 free (ptr: region);
460}
461
462/* Release the memory for the entire omp region tree. */
463
464void
465omp_free_regions (void)
466{
467 struct omp_region *r, *n;
468 for (r = root_omp_region; r ; r = n)
469 {
470 n = r->next;
471 free_omp_region_1 (region: r);
472 }
473 root_omp_region = NULL;
474}
475
476/* A convenience function to build an empty GIMPLE_COND with just the
477 condition. */
478
479static gcond *
480gimple_build_cond_empty (tree cond)
481{
482 enum tree_code pred_code;
483 tree lhs, rhs;
484
485 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
486 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
487}
488
489/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
490 Add CHILD_FNDECL to decl chain of the supercontext of the block
491 ENTRY_BLOCK - this is the block which originally contained the
492 code from which CHILD_FNDECL was created.
493
494 Together, these actions ensure that the debug info for the outlined
495 function will be emitted with the correct lexical scope. */
496
497static void
498adjust_context_and_scope (struct omp_region *region, tree entry_block,
499 tree child_fndecl)
500{
501 tree parent_fndecl = NULL_TREE;
502 gimple *entry_stmt;
503 /* OMP expansion expands inner regions before outer ones, so if
504 we e.g. have explicit task region nested in parallel region, when
505 expanding the task region current_function_decl will be the original
506 source function, but we actually want to use as context the child
507 function of the parallel. */
508 for (region = region->outer;
509 region && parent_fndecl == NULL_TREE; region = region->outer)
510 switch (region->type)
511 {
512 case GIMPLE_OMP_PARALLEL:
513 case GIMPLE_OMP_TASK:
514 case GIMPLE_OMP_TEAMS:
515 entry_stmt = last_nondebug_stmt (region->entry);
516 parent_fndecl = gimple_omp_taskreg_child_fn (gs: entry_stmt);
517 break;
518 case GIMPLE_OMP_TARGET:
519 entry_stmt = last_nondebug_stmt (region->entry);
520 parent_fndecl
521 = gimple_omp_target_child_fn (omp_target_stmt: as_a <gomp_target *> (p: entry_stmt));
522 break;
523 default:
524 break;
525 }
526
527 if (parent_fndecl == NULL_TREE)
528 parent_fndecl = current_function_decl;
529 DECL_CONTEXT (child_fndecl) = parent_fndecl;
530
531 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
532 {
533 tree b = BLOCK_SUPERCONTEXT (entry_block);
534 if (TREE_CODE (b) == BLOCK)
535 {
536 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
537 BLOCK_VARS (b) = child_fndecl;
538 }
539 }
540}
541
542/* Build the function calls to GOMP_parallel etc to actually
543 generate the parallel operation. REGION is the parallel region
544 being expanded. BB is the block where to insert the code. WS_ARGS
545 will be set if this is a call to a combined parallel+workshare
546 construct, it contains the list of additional arguments needed by
547 the workshare construct. */
548
549static void
550expand_parallel_call (struct omp_region *region, basic_block bb,
551 gomp_parallel *entry_stmt,
552 vec<tree, va_gc> *ws_args)
553{
554 tree t, t1, t2, val, cond, c, clauses, flags;
555 gimple_stmt_iterator gsi;
556 gimple *stmt;
557 enum built_in_function start_ix;
558 int start_ix2;
559 location_t clause_loc;
560 vec<tree, va_gc> *args;
561
562 clauses = gimple_omp_parallel_clauses (gs: entry_stmt);
563
564 /* Determine what flavor of GOMP_parallel we will be
565 emitting. */
566 start_ix = BUILT_IN_GOMP_PARALLEL;
567 tree rtmp = omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_);
568 if (rtmp)
569 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
570 else if (is_combined_parallel (region))
571 {
572 switch (region->inner->type)
573 {
574 case GIMPLE_OMP_FOR:
575 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
576 switch (region->inner->sched_kind)
577 {
578 case OMP_CLAUSE_SCHEDULE_RUNTIME:
579 /* For lastprivate(conditional:), our implementation
580 requires monotonic behavior. */
581 if (region->inner->has_lastprivate_conditional != 0)
582 start_ix2 = 3;
583 else if ((region->inner->sched_modifiers
584 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
585 start_ix2 = 6;
586 else if ((region->inner->sched_modifiers
587 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
588 start_ix2 = 7;
589 else
590 start_ix2 = 3;
591 break;
592 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
593 case OMP_CLAUSE_SCHEDULE_GUIDED:
594 if ((region->inner->sched_modifiers
595 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
596 && !region->inner->has_lastprivate_conditional)
597 {
598 start_ix2 = 3 + region->inner->sched_kind;
599 break;
600 }
601 /* FALLTHRU */
602 default:
603 start_ix2 = region->inner->sched_kind;
604 break;
605 }
606 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
607 start_ix = (enum built_in_function) start_ix2;
608 break;
609 case GIMPLE_OMP_SECTIONS:
610 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
611 break;
612 default:
613 gcc_unreachable ();
614 }
615 }
616
617 /* By default, the value of NUM_THREADS is zero (selected at run time)
618 and there is no conditional. */
619 cond = NULL_TREE;
620 val = build_int_cst (unsigned_type_node, 0);
621 flags = build_int_cst (unsigned_type_node, 0);
622
623 c = omp_find_clause (clauses, kind: OMP_CLAUSE_IF);
624 if (c)
625 cond = OMP_CLAUSE_IF_EXPR (c);
626
627 c = omp_find_clause (clauses, kind: OMP_CLAUSE_NUM_THREADS);
628 if (c)
629 {
630 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
631 clause_loc = OMP_CLAUSE_LOCATION (c);
632 }
633 else
634 clause_loc = gimple_location (g: entry_stmt);
635
636 c = omp_find_clause (clauses, kind: OMP_CLAUSE_PROC_BIND);
637 if (c)
638 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
639
640 /* Ensure 'val' is of the correct type. */
641 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
642
643 /* If we found the clause 'if (cond)', build either
644 (cond != 0) or (cond ? val : 1u). */
645 if (cond)
646 {
647 cond = gimple_boolify (cond);
648
649 if (integer_zerop (val))
650 val = fold_build2_loc (clause_loc,
651 EQ_EXPR, unsigned_type_node, cond,
652 build_int_cst (TREE_TYPE (cond), 0));
653 else
654 {
655 basic_block cond_bb, then_bb, else_bb;
656 edge e, e_then, e_else;
657 tree tmp_then, tmp_else, tmp_join, tmp_var;
658
659 tmp_var = create_tmp_var (TREE_TYPE (val));
660 if (gimple_in_ssa_p (cfun))
661 {
662 tmp_then = make_ssa_name (var: tmp_var);
663 tmp_else = make_ssa_name (var: tmp_var);
664 tmp_join = make_ssa_name (var: tmp_var);
665 }
666 else
667 {
668 tmp_then = tmp_var;
669 tmp_else = tmp_var;
670 tmp_join = tmp_var;
671 }
672
673 e = split_block_after_labels (bb);
674 cond_bb = e->src;
675 bb = e->dest;
676 remove_edge (e);
677
678 then_bb = create_empty_bb (cond_bb);
679 else_bb = create_empty_bb (then_bb);
680 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
681 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
682
683 stmt = gimple_build_cond_empty (cond);
684 gsi = gsi_start_bb (bb: cond_bb);
685 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
686
687 gsi = gsi_start_bb (bb: then_bb);
688 expand_omp_build_assign (&gsi, tmp_then, val, true);
689
690 gsi = gsi_start_bb (bb: else_bb);
691 expand_omp_build_assign (&gsi, tmp_else,
692 build_int_cst (unsigned_type_node, 1),
693 true);
694
695 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
696 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
697 add_bb_to_loop (then_bb, cond_bb->loop_father);
698 add_bb_to_loop (else_bb, cond_bb->loop_father);
699 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
700 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
701
702 if (gimple_in_ssa_p (cfun))
703 {
704 gphi *phi = create_phi_node (tmp_join, bb);
705 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
706 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
707 }
708
709 val = tmp_join;
710 }
711
712 gsi = gsi_start_bb (bb);
713 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
714 false, GSI_CONTINUE_LINKING);
715 }
716
717 gsi = gsi_last_nondebug_bb (bb);
718 t = gimple_omp_parallel_data_arg (omp_parallel_stmt: entry_stmt);
719 if (t == NULL)
720 t1 = null_pointer_node;
721 else
722 t1 = build_fold_addr_expr (t);
723 tree child_fndecl = gimple_omp_parallel_child_fn (omp_parallel_stmt: entry_stmt);
724 t2 = build_fold_addr_expr (child_fndecl);
725
726 vec_alloc (v&: args, nelems: 4 + vec_safe_length (v: ws_args));
727 args->quick_push (obj: t2);
728 args->quick_push (obj: t1);
729 args->quick_push (obj: val);
730 if (ws_args)
731 args->splice (src: *ws_args);
732 args->quick_push (obj: flags);
733
734 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
735 builtin_decl_explicit (fncode: start_ix), args);
736
737 if (rtmp)
738 {
739 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
740 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
741 fold_convert (type,
742 fold_convert (pointer_sized_int_node, t)));
743 }
744 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
745 false, GSI_CONTINUE_LINKING);
746}
747
748/* Build the function call to GOMP_task to actually
749 generate the task operation. BB is the block where to insert the code. */
750
751static void
752expand_task_call (struct omp_region *region, basic_block bb,
753 gomp_task *entry_stmt)
754{
755 tree t1, t2, t3;
756 gimple_stmt_iterator gsi;
757 location_t loc = gimple_location (g: entry_stmt);
758
759 tree clauses = gimple_omp_task_clauses (gs: entry_stmt);
760
761 tree ifc = omp_find_clause (clauses, kind: OMP_CLAUSE_IF);
762 tree untied = omp_find_clause (clauses, kind: OMP_CLAUSE_UNTIED);
763 tree mergeable = omp_find_clause (clauses, kind: OMP_CLAUSE_MERGEABLE);
764 tree depend = omp_find_clause (clauses, kind: OMP_CLAUSE_DEPEND);
765 tree finalc = omp_find_clause (clauses, kind: OMP_CLAUSE_FINAL);
766 tree priority = omp_find_clause (clauses, kind: OMP_CLAUSE_PRIORITY);
767 tree detach = omp_find_clause (clauses, kind: OMP_CLAUSE_DETACH);
768
769 unsigned int iflags
770 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
771 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
772 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
773
774 bool taskloop_p = gimple_omp_task_taskloop_p (g: entry_stmt);
775 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
776 tree num_tasks = NULL_TREE;
777 bool ull = false;
778 if (taskloop_p)
779 {
780 gimple *g = last_nondebug_stmt (region->outer->entry);
781 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
782 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
783 struct omp_for_data fd;
784 omp_extract_for_data (for_stmt: as_a <gomp_for *> (p: g), fd: &fd, NULL);
785 startvar = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
786 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
787 kind: OMP_CLAUSE__LOOPTEMP_);
788 startvar = OMP_CLAUSE_DECL (startvar);
789 endvar = OMP_CLAUSE_DECL (endvar);
790 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
791 if (fd.loop.cond_code == LT_EXPR)
792 iflags |= GOMP_TASK_FLAG_UP;
793 tree tclauses = gimple_omp_for_clauses (gs: g);
794 num_tasks = omp_find_clause (clauses: tclauses, kind: OMP_CLAUSE_NUM_TASKS);
795 if (num_tasks)
796 {
797 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
798 iflags |= GOMP_TASK_FLAG_STRICT;
799 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
800 }
801 else
802 {
803 num_tasks = omp_find_clause (clauses: tclauses, kind: OMP_CLAUSE_GRAINSIZE);
804 if (num_tasks)
805 {
806 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
807 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
808 iflags |= GOMP_TASK_FLAG_STRICT;
809 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
810 }
811 else
812 num_tasks = integer_zero_node;
813 }
814 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
815 if (ifc == NULL_TREE)
816 iflags |= GOMP_TASK_FLAG_IF;
817 if (omp_find_clause (clauses: tclauses, kind: OMP_CLAUSE_NOGROUP))
818 iflags |= GOMP_TASK_FLAG_NOGROUP;
819 ull = fd.iter_type == long_long_unsigned_type_node;
820 if (omp_find_clause (clauses, kind: OMP_CLAUSE_REDUCTION))
821 iflags |= GOMP_TASK_FLAG_REDUCTION;
822 }
823 else
824 {
825 if (priority)
826 iflags |= GOMP_TASK_FLAG_PRIORITY;
827 if (detach)
828 iflags |= GOMP_TASK_FLAG_DETACH;
829 }
830
831 tree flags = build_int_cst (unsigned_type_node, iflags);
832
833 tree cond = boolean_true_node;
834 if (ifc)
835 {
836 if (taskloop_p)
837 {
838 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_IF),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
844 flags, t);
845 }
846 else
847 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
848 }
849
850 if (finalc)
851 {
852 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
853 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
854 build_int_cst (unsigned_type_node,
855 GOMP_TASK_FLAG_FINAL),
856 build_int_cst (unsigned_type_node, 0));
857 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
858 }
859 if (depend)
860 depend = OMP_CLAUSE_DECL (depend);
861 else
862 depend = build_int_cst (ptr_type_node, 0);
863 if (priority)
864 priority = fold_convert (integer_type_node,
865 OMP_CLAUSE_PRIORITY_EXPR (priority));
866 else
867 priority = integer_zero_node;
868
869 gsi = gsi_last_nondebug_bb (bb);
870
871 detach = (detach
872 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
873 : null_pointer_node);
874
875 tree t = gimple_omp_task_data_arg (gs: entry_stmt);
876 if (t == NULL)
877 t2 = null_pointer_node;
878 else
879 t2 = build_fold_addr_expr_loc (loc, t);
880 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (gs: entry_stmt));
881 t = gimple_omp_task_copy_fn (gs: entry_stmt);
882 if (t == NULL)
883 t3 = null_pointer_node;
884 else
885 t3 = build_fold_addr_expr_loc (loc, t);
886
887 if (taskloop_p)
888 t = build_call_expr (ull
889 ? builtin_decl_explicit (fncode: BUILT_IN_GOMP_TASKLOOP_ULL)
890 : builtin_decl_explicit (fncode: BUILT_IN_GOMP_TASKLOOP),
891 11, t1, t2, t3,
892 gimple_omp_task_arg_size (gs: entry_stmt),
893 gimple_omp_task_arg_align (gs: entry_stmt), flags,
894 num_tasks, priority, startvar, endvar, step);
895 else
896 t = build_call_expr (builtin_decl_explicit (fncode: BUILT_IN_GOMP_TASK),
897 10, t1, t2, t3,
898 gimple_omp_task_arg_size (gs: entry_stmt),
899 gimple_omp_task_arg_align (gs: entry_stmt), cond, flags,
900 depend, priority, detach);
901
902 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
903 false, GSI_CONTINUE_LINKING);
904}
905
906/* Build the function call to GOMP_taskwait_depend to actually
907 generate the taskwait operation. BB is the block where to insert the
908 code. */
909
910static void
911expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
912{
913 tree clauses = gimple_omp_task_clauses (gs: entry_stmt);
914 tree depend = omp_find_clause (clauses, kind: OMP_CLAUSE_DEPEND);
915 if (depend == NULL_TREE)
916 return;
917
918 depend = OMP_CLAUSE_DECL (depend);
919
920 bool nowait = omp_find_clause (clauses, kind: OMP_CLAUSE_NOWAIT) != NULL_TREE;
921 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
922 enum built_in_function f = (nowait
923 ? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT
924 : BUILT_IN_GOMP_TASKWAIT_DEPEND);
925 tree t = build_call_expr (builtin_decl_explicit (fncode: f), 1, depend);
926
927 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
928 false, GSI_CONTINUE_LINKING);
929}
930
931/* Build the function call to GOMP_teams_reg to actually
932 generate the host teams operation. REGION is the teams region
933 being expanded. BB is the block where to insert the code. */
934
935static void
936expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
937{
938 tree clauses = gimple_omp_teams_clauses (gs: entry_stmt);
939 tree num_teams = omp_find_clause (clauses, kind: OMP_CLAUSE_NUM_TEAMS);
940 if (num_teams == NULL_TREE)
941 num_teams = build_int_cst (unsigned_type_node, 0);
942 else
943 {
944 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
945 num_teams = fold_convert (unsigned_type_node, num_teams);
946 }
947 tree thread_limit = omp_find_clause (clauses, kind: OMP_CLAUSE_THREAD_LIMIT);
948 if (thread_limit == NULL_TREE)
949 thread_limit = build_int_cst (unsigned_type_node, 0);
950 else
951 {
952 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
953 thread_limit = fold_convert (unsigned_type_node, thread_limit);
954 }
955
956 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
957 tree t = gimple_omp_teams_data_arg (omp_teams_stmt: entry_stmt), t1;
958 if (t == NULL)
959 t1 = null_pointer_node;
960 else
961 t1 = build_fold_addr_expr (t);
962 tree child_fndecl = gimple_omp_teams_child_fn (omp_teams_stmt: entry_stmt);
963 tree t2 = build_fold_addr_expr (child_fndecl);
964
965 vec<tree, va_gc> *args;
966 vec_alloc (v&: args, nelems: 5);
967 args->quick_push (obj: t2);
968 args->quick_push (obj: t1);
969 args->quick_push (obj: num_teams);
970 args->quick_push (obj: thread_limit);
971 /* For future extensibility. */
972 args->quick_push (obj: build_zero_cst (unsigned_type_node));
973
974 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
975 builtin_decl_explicit (fncode: BUILT_IN_GOMP_TEAMS_REG),
976 args);
977
978 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
979 false, GSI_CONTINUE_LINKING);
980}
981
982/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
983
984static tree
985vec2chain (vec<tree, va_gc> *v)
986{
987 tree chain = NULL_TREE, t;
988 unsigned ix;
989
990 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
991 {
992 DECL_CHAIN (t) = chain;
993 chain = t;
994 }
995
996 return chain;
997}
998
999/* Remove barriers in REGION->EXIT's block. Note that this is only
1000 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1001 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1002 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1003 removed. */
1004
1005static void
1006remove_exit_barrier (struct omp_region *region)
1007{
1008 gimple_stmt_iterator gsi;
1009 basic_block exit_bb;
1010 edge_iterator ei;
1011 edge e;
1012 gimple *stmt;
1013 int any_addressable_vars = -1;
1014
1015 exit_bb = region->exit;
1016
1017 /* If the parallel region doesn't return, we don't have REGION->EXIT
1018 block at all. */
1019 if (! exit_bb)
1020 return;
1021
1022 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1023 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1024 statements that can appear in between are extremely limited -- no
1025 memory operations at all. Here, we allow nothing at all, so the
1026 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1027 gsi = gsi_last_nondebug_bb (bb: exit_bb);
1028 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1029 gsi_prev_nondebug (i: &gsi);
1030 if (!gsi_end_p (i: gsi) && gimple_code (g: gsi_stmt (i: gsi)) != GIMPLE_LABEL)
1031 return;
1032
1033 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1034 {
1035 gsi = gsi_last_nondebug_bb (bb: e->src);
1036 if (gsi_end_p (i: gsi))
1037 continue;
1038 stmt = gsi_stmt (i: gsi);
1039 if (gimple_code (g: stmt) == GIMPLE_OMP_RETURN
1040 && !gimple_omp_return_nowait_p (g: stmt))
1041 {
1042 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1043 in many cases. If there could be tasks queued, the barrier
1044 might be needed to let the tasks run before some local
1045 variable of the parallel that the task uses as shared
1046 runs out of scope. The task can be spawned either
1047 from within current function (this would be easy to check)
1048 or from some function it calls and gets passed an address
1049 of such a variable. */
1050 if (any_addressable_vars < 0)
1051 {
1052 gomp_parallel *parallel_stmt
1053 = as_a <gomp_parallel *> (p: last_nondebug_stmt (region->entry));
1054 tree child_fun = gimple_omp_parallel_child_fn (omp_parallel_stmt: parallel_stmt);
1055 tree local_decls, block, decl;
1056 unsigned ix;
1057
1058 any_addressable_vars = 0;
1059 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1060 if (TREE_ADDRESSABLE (decl))
1061 {
1062 any_addressable_vars = 1;
1063 break;
1064 }
1065 for (block = gimple_block (g: stmt);
1066 !any_addressable_vars
1067 && block
1068 && TREE_CODE (block) == BLOCK;
1069 block = BLOCK_SUPERCONTEXT (block))
1070 {
1071 for (local_decls = BLOCK_VARS (block);
1072 local_decls;
1073 local_decls = DECL_CHAIN (local_decls))
1074 if (TREE_ADDRESSABLE (local_decls))
1075 {
1076 any_addressable_vars = 1;
1077 break;
1078 }
1079 if (block == gimple_block (g: parallel_stmt))
1080 break;
1081 }
1082 }
1083 if (!any_addressable_vars)
1084 gimple_omp_return_set_nowait (s: stmt);
1085 }
1086 }
1087}
1088
1089static void
1090remove_exit_barriers (struct omp_region *region)
1091{
1092 if (region->type == GIMPLE_OMP_PARALLEL)
1093 remove_exit_barrier (region);
1094
1095 if (region->inner)
1096 {
1097 region = region->inner;
1098 remove_exit_barriers (region);
1099 while (region->next)
1100 {
1101 region = region->next;
1102 remove_exit_barriers (region);
1103 }
1104 }
1105}
1106
1107/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1108 calls. These can't be declared as const functions, but
1109 within one parallel body they are constant, so they can be
1110 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1111 which are declared const. Similarly for task body, except
1112 that in untied task omp_get_thread_num () can change at any task
1113 scheduling point. */
1114
1115static void
1116optimize_omp_library_calls (gimple *entry_stmt)
1117{
1118 basic_block bb;
1119 gimple_stmt_iterator gsi;
1120 tree thr_num_tree = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_THREAD_NUM);
1121 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1122 tree num_thr_tree = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
1123 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1124 bool untied_task = (gimple_code (g: entry_stmt) == GIMPLE_OMP_TASK
1125 && omp_find_clause (clauses: gimple_omp_task_clauses (gs: entry_stmt),
1126 kind: OMP_CLAUSE_UNTIED) != NULL);
1127
1128 FOR_EACH_BB_FN (bb, cfun)
1129 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1130 {
1131 gimple *call = gsi_stmt (i: gsi);
1132 tree decl;
1133
1134 if (is_gimple_call (gs: call)
1135 && (decl = gimple_call_fndecl (gs: call))
1136 && DECL_EXTERNAL (decl)
1137 && TREE_PUBLIC (decl)
1138 && DECL_INITIAL (decl) == NULL)
1139 {
1140 tree built_in;
1141
1142 if (DECL_NAME (decl) == thr_num_id)
1143 {
1144 /* In #pragma omp task untied omp_get_thread_num () can change
1145 during the execution of the task region. */
1146 if (untied_task)
1147 continue;
1148 built_in = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_THREAD_NUM);
1149 }
1150 else if (DECL_NAME (decl) == num_thr_id)
1151 built_in = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
1152 else
1153 continue;
1154
1155 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1156 || gimple_call_num_args (gs: call) != 0)
1157 continue;
1158
1159 if (flag_exceptions && !TREE_NOTHROW (decl))
1160 continue;
1161
1162 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1163 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1164 TREE_TYPE (TREE_TYPE (built_in))))
1165 continue;
1166
1167 gimple_call_set_fndecl (gs: call, decl: built_in);
1168 }
1169 }
1170}
1171
1172/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1173 regimplified. */
1174
1175static tree
1176expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1177{
1178 tree t = *tp;
1179
1180 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1181 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1182 return t;
1183
1184 if (TREE_CODE (t) == ADDR_EXPR)
1185 recompute_tree_invariant_for_addr_expr (t);
1186
1187 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1188 return NULL_TREE;
1189}
1190
1191/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1192
1193static void
1194expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1195 bool after)
1196{
1197 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1198 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1199 !after, after ? GSI_CONTINUE_LINKING
1200 : GSI_SAME_STMT);
1201 gimple *stmt = gimple_build_assign (to, from);
1202 if (after)
1203 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1204 else
1205 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1206 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1207 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1208 {
1209 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1210 gimple_regimplify_operands (stmt, &gsi);
1211 }
1212}
1213
1214/* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1215
1216static gcond *
1217expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1218 tree lhs, tree rhs, bool after = false)
1219{
1220 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1221 if (after)
1222 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1223 else
1224 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1225 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1226 NULL, NULL)
1227 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1228 NULL, NULL))
1229 {
1230 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1231 gimple_regimplify_operands (cond_stmt, &gsi);
1232 }
1233 return cond_stmt;
1234}
1235
1236/* Expand the OpenMP parallel or task directive starting at REGION. */
1237
1238static void
1239expand_omp_taskreg (struct omp_region *region)
1240{
1241 basic_block entry_bb, exit_bb, new_bb;
1242 struct function *child_cfun;
1243 tree child_fn, block, t;
1244 gimple_stmt_iterator gsi;
1245 gimple *entry_stmt, *stmt;
1246 edge e;
1247 vec<tree, va_gc> *ws_args;
1248
1249 entry_stmt = last_nondebug_stmt (region->entry);
1250 if (gimple_code (g: entry_stmt) == GIMPLE_OMP_TASK
1251 && gimple_omp_task_taskwait_p (g: entry_stmt))
1252 {
1253 new_bb = region->entry;
1254 gsi = gsi_last_nondebug_bb (bb: region->entry);
1255 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1256 gsi_remove (&gsi, true);
1257 expand_taskwait_call (bb: new_bb, entry_stmt: as_a <gomp_task *> (p: entry_stmt));
1258 return;
1259 }
1260
1261 child_fn = gimple_omp_taskreg_child_fn (gs: entry_stmt);
1262 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1263
1264 entry_bb = region->entry;
1265 if (gimple_code (g: entry_stmt) == GIMPLE_OMP_TASK)
1266 exit_bb = region->cont;
1267 else
1268 exit_bb = region->exit;
1269
1270 if (is_combined_parallel (region))
1271 ws_args = region->ws_args;
1272 else
1273 ws_args = NULL;
1274
1275 if (child_cfun->cfg)
1276 {
1277 /* Due to inlining, it may happen that we have already outlined
1278 the region, in which case all we need to do is make the
1279 sub-graph unreachable and emit the parallel call. */
1280 edge entry_succ_e, exit_succ_e;
1281
1282 entry_succ_e = single_succ_edge (bb: entry_bb);
1283
1284 gsi = gsi_last_nondebug_bb (bb: entry_bb);
1285 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1286 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1287 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1288 gsi_remove (&gsi, true);
1289
1290 new_bb = entry_bb;
1291 if (exit_bb)
1292 {
1293 exit_succ_e = single_succ_edge (bb: exit_bb);
1294 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1295 }
1296 remove_edge_and_dominated_blocks (entry_succ_e);
1297 }
1298 else
1299 {
1300 unsigned srcidx, dstidx, num;
1301
1302 /* If the parallel region needs data sent from the parent
1303 function, then the very first statement (except possible
1304 tree profile counter updates) of the parallel body
1305 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1306 &.OMP_DATA_O is passed as an argument to the child function,
1307 we need to replace it with the argument as seen by the child
1308 function.
1309
1310 In most cases, this will end up being the identity assignment
1311 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1312 a function call that has been inlined, the original PARM_DECL
1313 .OMP_DATA_I may have been converted into a different local
1314 variable. In which case, we need to keep the assignment. */
1315 if (gimple_omp_taskreg_data_arg (gs: entry_stmt))
1316 {
1317 basic_block entry_succ_bb
1318 = single_succ_p (bb: entry_bb) ? single_succ (bb: entry_bb)
1319 : FALLTHRU_EDGE (entry_bb)->dest;
1320 tree arg;
1321 gimple *parcopy_stmt = NULL;
1322
1323 for (gsi = gsi_start_bb (bb: entry_succ_bb); ; gsi_next (i: &gsi))
1324 {
1325 gimple *stmt;
1326
1327 gcc_assert (!gsi_end_p (gsi));
1328 stmt = gsi_stmt (i: gsi);
1329 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
1330 continue;
1331
1332 if (gimple_num_ops (gs: stmt) == 2)
1333 {
1334 tree arg = gimple_assign_rhs1 (gs: stmt);
1335
1336 /* We're ignore the subcode because we're
1337 effectively doing a STRIP_NOPS. */
1338
1339 if (TREE_CODE (arg) == ADDR_EXPR
1340 && (TREE_OPERAND (arg, 0)
1341 == gimple_omp_taskreg_data_arg (gs: entry_stmt)))
1342 {
1343 parcopy_stmt = stmt;
1344 break;
1345 }
1346 }
1347 }
1348
1349 gcc_assert (parcopy_stmt != NULL);
1350 arg = DECL_ARGUMENTS (child_fn);
1351
1352 if (!gimple_in_ssa_p (cfun))
1353 {
1354 if (gimple_assign_lhs (gs: parcopy_stmt) == arg)
1355 gsi_remove (&gsi, true);
1356 else
1357 {
1358 /* ?? Is setting the subcode really necessary ?? */
1359 gimple_omp_set_subcode (s: parcopy_stmt, TREE_CODE (arg));
1360 gimple_assign_set_rhs1 (gs: parcopy_stmt, rhs: arg);
1361 }
1362 }
1363 else
1364 {
1365 tree lhs = gimple_assign_lhs (gs: parcopy_stmt);
1366 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1367 /* We'd like to set the rhs to the default def in the child_fn,
1368 but it's too early to create ssa names in the child_fn.
1369 Instead, we set the rhs to the parm. In
1370 move_sese_region_to_fn, we introduce a default def for the
1371 parm, map the parm to it's default def, and once we encounter
1372 this stmt, replace the parm with the default def. */
1373 gimple_assign_set_rhs1 (gs: parcopy_stmt, rhs: arg);
1374 update_stmt (s: parcopy_stmt);
1375 }
1376 }
1377
1378 /* Declare local variables needed in CHILD_CFUN. */
1379 block = DECL_INITIAL (child_fn);
1380 BLOCK_VARS (block) = vec2chain (v: child_cfun->local_decls);
1381 /* The gimplifier could record temporaries in parallel/task block
1382 rather than in containing function's local_decls chain,
1383 which would mean cgraph missed finalizing them. Do it now. */
1384 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1385 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1386 varpool_node::finalize_decl (decl: t);
1387 DECL_SAVED_TREE (child_fn) = NULL;
1388 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1389 gimple_set_body (child_fn, NULL);
1390 TREE_USED (block) = 1;
1391
1392 /* Reset DECL_CONTEXT on function arguments. */
1393 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1394 DECL_CONTEXT (t) = child_fn;
1395
1396 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1397 so that it can be moved to the child function. */
1398 gsi = gsi_last_nondebug_bb (bb: entry_bb);
1399 stmt = gsi_stmt (i: gsi);
1400 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1401 || gimple_code (stmt) == GIMPLE_OMP_TASK
1402 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1403 e = split_block (entry_bb, stmt);
1404 gsi_remove (&gsi, true);
1405 entry_bb = e->dest;
1406 edge e2 = NULL;
1407 if (gimple_code (g: entry_stmt) != GIMPLE_OMP_TASK)
1408 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
1409 else
1410 {
1411 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1412 gcc_assert (e2->dest == region->exit);
1413 remove_edge (BRANCH_EDGE (entry_bb));
1414 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1415 gsi = gsi_last_nondebug_bb (bb: region->exit);
1416 gcc_assert (!gsi_end_p (gsi)
1417 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1418 gsi_remove (&gsi, true);
1419 }
1420
1421 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1422 if (exit_bb)
1423 {
1424 gsi = gsi_last_nondebug_bb (bb: exit_bb);
1425 gcc_assert (!gsi_end_p (gsi)
1426 && (gimple_code (gsi_stmt (gsi))
1427 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1428 stmt = gimple_build_return (NULL);
1429 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1430 gsi_remove (&gsi, true);
1431 }
1432
1433 /* Move the parallel region into CHILD_CFUN. */
1434
1435 if (gimple_in_ssa_p (cfun))
1436 {
1437 init_tree_ssa (child_cfun);
1438 init_ssa_operands (fn: child_cfun);
1439 child_cfun->gimple_df->in_ssa_p = true;
1440 block = NULL_TREE;
1441 }
1442 else
1443 block = gimple_block (g: entry_stmt);
1444
1445 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1446 if (exit_bb)
1447 single_succ_edge (bb: new_bb)->flags = EDGE_FALLTHRU;
1448 if (e2)
1449 {
1450 basic_block dest_bb = e2->dest;
1451 if (!exit_bb)
1452 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1453 remove_edge (e2);
1454 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1455 }
1456 /* When the OMP expansion process cannot guarantee an up-to-date
1457 loop tree arrange for the child function to fixup loops. */
1458 if (loops_state_satisfies_p (flags: LOOPS_NEED_FIXUP))
1459 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1460
1461 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1462 num = vec_safe_length (v: child_cfun->local_decls);
1463 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1464 {
1465 t = (*child_cfun->local_decls)[srcidx];
1466 if (DECL_CONTEXT (t) == cfun->decl)
1467 continue;
1468 if (srcidx != dstidx)
1469 (*child_cfun->local_decls)[dstidx] = t;
1470 dstidx++;
1471 }
1472 if (dstidx != num)
1473 vec_safe_truncate (v: child_cfun->local_decls, size: dstidx);
1474
1475 /* Inform the callgraph about the new function. */
1476 child_cfun->curr_properties = cfun->curr_properties;
1477 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1478 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1479 cgraph_node *node = cgraph_node::get_create (child_fn);
1480 node->parallelized_function = 1;
1481 cgraph_node::add_new_function (fndecl: child_fn, lowered: true);
1482
1483 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1484 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1485
1486 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1487 fixed in a following pass. */
1488 push_cfun (new_cfun: child_cfun);
1489 if (need_asm)
1490 assign_assembler_name_if_needed (child_fn);
1491
1492 if (optimize)
1493 optimize_omp_library_calls (entry_stmt);
1494 update_max_bb_count ();
1495 cgraph_edge::rebuild_edges ();
1496
1497 /* Some EH regions might become dead, see PR34608. If
1498 pass_cleanup_cfg isn't the first pass to happen with the
1499 new child, these dead EH edges might cause problems.
1500 Clean them up now. */
1501 if (flag_exceptions)
1502 {
1503 basic_block bb;
1504 bool changed = false;
1505
1506 FOR_EACH_BB_FN (bb, cfun)
1507 changed |= gimple_purge_dead_eh_edges (bb);
1508 if (changed)
1509 cleanup_tree_cfg ();
1510 }
1511 if (gimple_in_ssa_p (cfun))
1512 update_ssa (TODO_update_ssa);
1513 if (flag_checking && !loops_state_satisfies_p (flags: LOOPS_NEED_FIXUP))
1514 verify_loop_structure ();
1515 pop_cfun ();
1516
1517 if (dump_file && !gimple_in_ssa_p (cfun))
1518 {
1519 omp_any_child_fn_dumped = true;
1520 dump_function_header (dump_file, child_fn, dump_flags);
1521 dump_function_to_file (child_fn, dump_file, dump_flags);
1522 }
1523 }
1524
1525 adjust_context_and_scope (region, entry_block: gimple_block (g: entry_stmt), child_fndecl: child_fn);
1526
1527 if (gimple_code (g: entry_stmt) == GIMPLE_OMP_PARALLEL)
1528 expand_parallel_call (region, bb: new_bb,
1529 entry_stmt: as_a <gomp_parallel *> (p: entry_stmt), ws_args);
1530 else if (gimple_code (g: entry_stmt) == GIMPLE_OMP_TEAMS)
1531 expand_teams_call (bb: new_bb, entry_stmt: as_a <gomp_teams *> (p: entry_stmt));
1532 else
1533 expand_task_call (region, bb: new_bb, entry_stmt: as_a <gomp_task *> (p: entry_stmt));
1534}
1535
1536/* Information about members of an OpenACC collapsed loop nest. */
1537
1538struct oacc_collapse
1539{
1540 tree base; /* Base value. */
1541 tree iters; /* Number of steps. */
1542 tree step; /* Step size. */
1543 tree tile; /* Tile increment (if tiled). */
1544 tree outer; /* Tile iterator var. */
1545};
1546
1547/* Helper for expand_oacc_for. Determine collapsed loop information.
1548 Fill in COUNTS array. Emit any initialization code before GSI.
1549 Return the calculated outer loop bound of BOUND_TYPE. */
1550
1551static tree
1552expand_oacc_collapse_init (const struct omp_for_data *fd,
1553 gimple_stmt_iterator *gsi,
1554 oacc_collapse *counts, tree diff_type,
1555 tree bound_type, location_t loc)
1556{
1557 tree tiling = fd->tiling;
1558 tree total = build_int_cst (bound_type, 1);
1559 int ix;
1560
1561 gcc_assert (integer_onep (fd->loop.step));
1562 gcc_assert (integer_zerop (fd->loop.n1));
1563
1564 /* When tiling, the first operand of the tile clause applies to the
1565 innermost loop, and we work outwards from there. Seems
1566 backwards, but whatever. */
1567 for (ix = fd->collapse; ix--;)
1568 {
1569 const omp_for_data_loop *loop = &fd->loops[ix];
1570
1571 tree iter_type = TREE_TYPE (loop->v);
1572 tree plus_type = iter_type;
1573
1574 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1575
1576 if (POINTER_TYPE_P (iter_type))
1577 plus_type = sizetype;
1578
1579 if (tiling)
1580 {
1581 tree num = build_int_cst (integer_type_node, fd->collapse);
1582 tree loop_no = build_int_cst (integer_type_node, ix);
1583 tree tile = TREE_VALUE (tiling);
1584 gcall *call
1585 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1586 /* gwv-outer=*/integer_zero_node,
1587 /* gwv-inner=*/integer_zero_node);
1588
1589 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1590 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1591 gimple_call_set_lhs (gs: call, lhs: counts[ix].tile);
1592 gimple_set_location (g: call, location: loc);
1593 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1594
1595 tiling = TREE_CHAIN (tiling);
1596 }
1597 else
1598 {
1599 counts[ix].tile = NULL;
1600 counts[ix].outer = loop->v;
1601 }
1602
1603 tree b = loop->n1;
1604 tree e = loop->n2;
1605 tree s = loop->step;
1606 bool up = loop->cond_code == LT_EXPR;
1607 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1608 bool negating;
1609 tree expr;
1610
1611 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1612 true, GSI_SAME_STMT);
1613 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1614 true, GSI_SAME_STMT);
1615
1616 /* Convert the step, avoiding possible unsigned->signed overflow. */
1617 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1618 if (negating)
1619 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1620 s = fold_convert (diff_type, s);
1621 if (negating)
1622 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1623 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1624 true, GSI_SAME_STMT);
1625
1626 /* Determine the range, avoiding possible unsigned->signed overflow. */
1627 negating = !up && TYPE_UNSIGNED (iter_type);
1628 expr = fold_build2 (MINUS_EXPR, plus_type,
1629 fold_convert (plus_type, negating ? b : e),
1630 fold_convert (plus_type, negating ? e : b));
1631 expr = fold_convert (diff_type, expr);
1632 if (negating)
1633 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1634 tree range = force_gimple_operand_gsi
1635 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1636
1637 /* Determine number of iterations. */
1638 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1639 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1640 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1641
1642 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1643 true, GSI_SAME_STMT);
1644
1645 counts[ix].base = b;
1646 counts[ix].iters = iters;
1647 counts[ix].step = s;
1648
1649 total = fold_build2 (MULT_EXPR, bound_type, total,
1650 fold_convert (bound_type, iters));
1651 }
1652
1653 return total;
1654}
1655
1656/* Emit initializers for collapsed loop members. INNER is true if
1657 this is for the element loop of a TILE. IVAR is the outer
1658 loop iteration variable, from which collapsed loop iteration values
1659 are calculated. COUNTS array has been initialized by
1660 expand_oacc_collapse_inits. */
1661
1662static void
1663expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1664 gimple_stmt_iterator *gsi,
1665 const oacc_collapse *counts, tree ivar,
1666 tree diff_type)
1667{
1668 tree ivar_type = TREE_TYPE (ivar);
1669
1670 /* The most rapidly changing iteration variable is the innermost
1671 one. */
1672 for (int ix = fd->collapse; ix--;)
1673 {
1674 const omp_for_data_loop *loop = &fd->loops[ix];
1675 const oacc_collapse *collapse = &counts[ix];
1676 tree v = inner ? loop->v : collapse->outer;
1677 tree iter_type = TREE_TYPE (v);
1678 tree plus_type = iter_type;
1679 enum tree_code plus_code = PLUS_EXPR;
1680 tree expr;
1681
1682 if (POINTER_TYPE_P (iter_type))
1683 {
1684 plus_code = POINTER_PLUS_EXPR;
1685 plus_type = sizetype;
1686 }
1687
1688 expr = ivar;
1689 if (ix)
1690 {
1691 tree mod = fold_convert (ivar_type, collapse->iters);
1692 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1693 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1694 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1695 true, GSI_SAME_STMT);
1696 }
1697
1698 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1699 fold_convert (diff_type, collapse->step));
1700 expr = fold_build2 (plus_code, iter_type,
1701 inner ? collapse->outer : collapse->base,
1702 fold_convert (plus_type, expr));
1703 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1704 true, GSI_SAME_STMT);
1705 gassign *ass = gimple_build_assign (v, expr);
1706 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1707 }
1708}
1709
1710/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1711 of the combined collapse > 1 loop constructs, generate code like:
1712 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1713 if (cond3 is <)
1714 adj = STEP3 - 1;
1715 else
1716 adj = STEP3 + 1;
1717 count3 = (adj + N32 - N31) / STEP3;
1718 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1719 if (cond2 is <)
1720 adj = STEP2 - 1;
1721 else
1722 adj = STEP2 + 1;
1723 count2 = (adj + N22 - N21) / STEP2;
1724 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1725 if (cond1 is <)
1726 adj = STEP1 - 1;
1727 else
1728 adj = STEP1 + 1;
1729 count1 = (adj + N12 - N11) / STEP1;
1730 count = count1 * count2 * count3;
1731 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1732 count = 0;
1733 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1734 of the combined loop constructs, just initialize COUNTS array
1735 from the _looptemp_ clauses. For loop nests with non-rectangular
1736 loops, do this only for the rectangular loops. Then pick
1737 the loops which reference outer vars in their bound expressions
1738 and the loops which they refer to and for this sub-nest compute
1739 number of iterations. For triangular loops use Faulhaber's formula,
1740 otherwise as a fallback, compute by iterating the loops.
1741 If e.g. the sub-nest is
1742 for (I = N11; I COND1 N12; I += STEP1)
1743 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1744 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1745 do:
1746 COUNT = 0;
1747 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1748 for (tmpj = M21 * tmpi + N21;
1749 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1750 {
1751 int tmpk1 = M31 * tmpj + N31;
1752 int tmpk2 = M32 * tmpj + N32;
1753 if (tmpk1 COND3 tmpk2)
1754 {
1755 if (COND3 is <)
1756 adj = STEP3 - 1;
1757 else
1758 adj = STEP3 + 1;
1759 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1760 }
1761 }
1762 and finally multiply the counts of the rectangular loops not
1763 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1764 store number of iterations of the loops from fd->first_nonrect
1765 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1766 by the counts of rectangular loops not referenced in any non-rectangular
1767 loops sandwitched in between those. */
1768
1769/* NOTE: It *could* be better to moosh all of the BBs together,
1770 creating one larger BB with all the computation and the unexpected
1771 jump at the end. I.e.
1772
1773 bool zero3, zero2, zero1, zero;
1774
1775 zero3 = N32 c3 N31;
1776 count3 = (N32 - N31) /[cl] STEP3;
1777 zero2 = N22 c2 N21;
1778 count2 = (N22 - N21) /[cl] STEP2;
1779 zero1 = N12 c1 N11;
1780 count1 = (N12 - N11) /[cl] STEP1;
1781 zero = zero3 || zero2 || zero1;
1782 count = count1 * count2 * count3;
1783 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1784
1785 After all, we expect the zero=false, and thus we expect to have to
1786 evaluate all of the comparison expressions, so short-circuiting
1787 oughtn't be a win. Since the condition isn't protecting a
1788 denominator, we're not concerned about divide-by-zero, so we can
1789 fully evaluate count even if a numerator turned out to be wrong.
1790
1791 It seems like putting this all together would create much better
1792 scheduling opportunities, and less pressure on the chip's branch
1793 predictor. */
1794
1795static void
1796expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1797 basic_block &entry_bb, tree *counts,
1798 basic_block &zero_iter1_bb, int &first_zero_iter1,
1799 basic_block &zero_iter2_bb, int &first_zero_iter2,
1800 basic_block &l2_dom_bb)
1801{
1802 tree t, type = TREE_TYPE (fd->loop.v);
1803 edge e, ne;
1804 int i;
1805
1806 /* Collapsed loops need work for expansion into SSA form. */
1807 gcc_assert (!gimple_in_ssa_p (cfun));
1808
1809 if (gimple_omp_for_combined_into_p (g: fd->for_stmt)
1810 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1811 {
1812 gcc_assert (fd->ordered == 0);
1813 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1814 isn't supposed to be handled, as the inner loop doesn't
1815 use it. */
1816 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
1817 kind: OMP_CLAUSE__LOOPTEMP_);
1818 gcc_assert (innerc);
1819 for (i = 0; i < fd->collapse; i++)
1820 {
1821 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1822 kind: OMP_CLAUSE__LOOPTEMP_);
1823 gcc_assert (innerc);
1824 if (i)
1825 counts[i] = OMP_CLAUSE_DECL (innerc);
1826 else
1827 counts[0] = NULL_TREE;
1828 }
1829 if (fd->non_rect
1830 && fd->last_nonrect == fd->first_nonrect + 1
1831 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1832 {
1833 tree c[4];
1834 for (i = 0; i < 4; i++)
1835 {
1836 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1837 kind: OMP_CLAUSE__LOOPTEMP_);
1838 gcc_assert (innerc);
1839 c[i] = OMP_CLAUSE_DECL (innerc);
1840 }
1841 counts[0] = c[0];
1842 fd->first_inner_iterations = c[1];
1843 fd->factor = c[2];
1844 fd->adjn1 = c[3];
1845 }
1846 return;
1847 }
1848
1849 for (i = fd->collapse; i < fd->ordered; i++)
1850 {
1851 tree itype = TREE_TYPE (fd->loops[i].v);
1852 counts[i] = NULL_TREE;
1853 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1854 fold_convert (itype, fd->loops[i].n1),
1855 fold_convert (itype, fd->loops[i].n2));
1856 if (t && integer_zerop (t))
1857 {
1858 for (i = fd->collapse; i < fd->ordered; i++)
1859 counts[i] = build_int_cst (type, 0);
1860 break;
1861 }
1862 }
1863 bool rect_count_seen = false;
1864 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1865 {
1866 tree itype = TREE_TYPE (fd->loops[i].v);
1867
1868 if (i >= fd->collapse && counts[i])
1869 continue;
1870 if (fd->non_rect)
1871 {
1872 /* Skip loops that use outer iterators in their expressions
1873 during this phase. */
1874 if (fd->loops[i].m1 || fd->loops[i].m2)
1875 {
1876 counts[i] = build_zero_cst (type);
1877 continue;
1878 }
1879 }
1880 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1881 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1882 fold_convert (itype, fd->loops[i].n1),
1883 fold_convert (itype, fd->loops[i].n2)))
1884 == NULL_TREE || !integer_onep (t)))
1885 {
1886 gcond *cond_stmt;
1887 tree n1, n2;
1888 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1889 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1890 true, GSI_SAME_STMT);
1891 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1892 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1893 true, GSI_SAME_STMT);
1894 cond_stmt = expand_omp_build_cond (gsi_p: gsi, code: fd->loops[i].cond_code,
1895 lhs: n1, rhs: n2);
1896 e = split_block (entry_bb, cond_stmt);
1897 basic_block &zero_iter_bb
1898 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1899 int &first_zero_iter
1900 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1901 if (zero_iter_bb == NULL)
1902 {
1903 gassign *assign_stmt;
1904 first_zero_iter = i;
1905 zero_iter_bb = create_empty_bb (entry_bb);
1906 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1907 *gsi = gsi_after_labels (bb: zero_iter_bb);
1908 if (i < fd->collapse)
1909 assign_stmt = gimple_build_assign (fd->loop.n2,
1910 build_zero_cst (type));
1911 else
1912 {
1913 counts[i] = create_tmp_reg (type, ".count");
1914 assign_stmt
1915 = gimple_build_assign (counts[i], build_zero_cst (type));
1916 }
1917 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1918 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1919 entry_bb);
1920 }
1921 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1922 ne->probability = profile_probability::very_unlikely ();
1923 e->flags = EDGE_TRUE_VALUE;
1924 e->probability = ne->probability.invert ();
1925 if (l2_dom_bb == NULL)
1926 l2_dom_bb = entry_bb;
1927 entry_bb = e->dest;
1928 *gsi = gsi_last_nondebug_bb (bb: entry_bb);
1929 }
1930
1931 if (POINTER_TYPE_P (itype))
1932 itype = signed_type_for (itype);
1933 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1934 ? -1 : 1));
1935 t = fold_build2 (PLUS_EXPR, itype,
1936 fold_convert (itype, fd->loops[i].step), t);
1937 t = fold_build2 (PLUS_EXPR, itype, t,
1938 fold_convert (itype, fd->loops[i].n2));
1939 t = fold_build2 (MINUS_EXPR, itype, t,
1940 fold_convert (itype, fd->loops[i].n1));
1941 /* ?? We could probably use CEIL_DIV_EXPR instead of
1942 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1943 generate the same code in the end because generically we
1944 don't know that the values involved must be negative for
1945 GT?? */
1946 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1947 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1948 fold_build1 (NEGATE_EXPR, itype, t),
1949 fold_build1 (NEGATE_EXPR, itype,
1950 fold_convert (itype,
1951 fd->loops[i].step)));
1952 else
1953 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1954 fold_convert (itype, fd->loops[i].step));
1955 t = fold_convert (type, t);
1956 if (TREE_CODE (t) == INTEGER_CST)
1957 counts[i] = t;
1958 else
1959 {
1960 if (i < fd->collapse || i != first_zero_iter2)
1961 counts[i] = create_tmp_reg (type, ".count");
1962 expand_omp_build_assign (gsi_p: gsi, to: counts[i], from: t);
1963 }
1964 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1965 {
1966 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1967 continue;
1968 if (!rect_count_seen)
1969 {
1970 t = counts[i];
1971 rect_count_seen = true;
1972 }
1973 else
1974 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1975 expand_omp_build_assign (gsi_p: gsi, to: fd->loop.n2, from: t);
1976 }
1977 }
1978 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1979 {
1980 gcc_assert (fd->last_nonrect != -1);
1981
1982 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1983 expand_omp_build_assign (gsi_p: gsi, to: counts[fd->last_nonrect],
1984 from: build_zero_cst (type));
1985 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1986 if (fd->loops[i].m1
1987 || fd->loops[i].m2
1988 || fd->loops[i].non_rect_referenced)
1989 break;
1990 if (i == fd->last_nonrect
1991 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1992 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1993 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1994 {
1995 int o = fd->first_nonrect;
1996 tree itype = TREE_TYPE (fd->loops[o].v);
1997 tree n1o = create_tmp_reg (itype, ".n1o");
1998 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1999 expand_omp_build_assign (gsi_p: gsi, to: n1o, from: t);
2000 tree n2o = create_tmp_reg (itype, ".n2o");
2001 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2002 expand_omp_build_assign (gsi_p: gsi, to: n2o, from: t);
2003 if (fd->loops[i].m1 && fd->loops[i].m2)
2004 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2005 unshare_expr (fd->loops[i].m1));
2006 else if (fd->loops[i].m1)
2007 t = fold_build1 (NEGATE_EXPR, itype,
2008 unshare_expr (fd->loops[i].m1));
2009 else
2010 t = unshare_expr (fd->loops[i].m2);
2011 tree m2minusm1
2012 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2013 true, GSI_SAME_STMT);
2014
2015 gimple_stmt_iterator gsi2 = *gsi;
2016 gsi_prev (i: &gsi2);
2017 e = split_block (entry_bb, gsi_stmt (i: gsi2));
2018 e = split_block (e->dest, (gimple *) NULL);
2019 basic_block bb1 = e->src;
2020 entry_bb = e->dest;
2021 *gsi = gsi_after_labels (bb: entry_bb);
2022
2023 gsi2 = gsi_after_labels (bb: bb1);
2024 tree ostep = fold_convert (itype, fd->loops[o].step);
2025 t = build_int_cst (itype, (fd->loops[o].cond_code
2026 == LT_EXPR ? -1 : 1));
2027 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2028 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2029 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2030 if (TYPE_UNSIGNED (itype)
2031 && fd->loops[o].cond_code == GT_EXPR)
2032 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2033 fold_build1 (NEGATE_EXPR, itype, t),
2034 fold_build1 (NEGATE_EXPR, itype, ostep));
2035 else
2036 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2037 tree outer_niters
2038 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2039 true, GSI_SAME_STMT);
2040 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2041 build_one_cst (itype));
2042 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2043 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2044 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2045 true, GSI_SAME_STMT);
2046 tree n1, n2, n1e, n2e;
2047 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2048 if (fd->loops[i].m1)
2049 {
2050 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2051 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2052 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2053 }
2054 else
2055 n1 = t;
2056 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2057 true, GSI_SAME_STMT);
2058 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2059 if (fd->loops[i].m2)
2060 {
2061 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2062 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2063 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2064 }
2065 else
2066 n2 = t;
2067 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2068 true, GSI_SAME_STMT);
2069 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2070 if (fd->loops[i].m1)
2071 {
2072 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2073 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2074 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2075 }
2076 else
2077 n1e = t;
2078 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2079 true, GSI_SAME_STMT);
2080 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2081 if (fd->loops[i].m2)
2082 {
2083 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2084 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2085 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2086 }
2087 else
2088 n2e = t;
2089 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2090 true, GSI_SAME_STMT);
2091 gcond *cond_stmt
2092 = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2093 lhs: n1, rhs: n2);
2094 e = split_block (bb1, cond_stmt);
2095 e->flags = EDGE_TRUE_VALUE;
2096 e->probability = profile_probability::likely ().guessed ();
2097 basic_block bb2 = e->dest;
2098 gsi2 = gsi_after_labels (bb: bb2);
2099
2100 cond_stmt = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2101 lhs: n1e, rhs: n2e);
2102 e = split_block (bb2, cond_stmt);
2103 e->flags = EDGE_TRUE_VALUE;
2104 e->probability = profile_probability::likely ().guessed ();
2105 gsi2 = gsi_after_labels (bb: e->dest);
2106
2107 tree step = fold_convert (itype, fd->loops[i].step);
2108 t = build_int_cst (itype, (fd->loops[i].cond_code
2109 == LT_EXPR ? -1 : 1));
2110 t = fold_build2 (PLUS_EXPR, itype, step, t);
2111 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2112 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2113 if (TYPE_UNSIGNED (itype)
2114 && fd->loops[i].cond_code == GT_EXPR)
2115 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2116 fold_build1 (NEGATE_EXPR, itype, t),
2117 fold_build1 (NEGATE_EXPR, itype, step));
2118 else
2119 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2120 tree first_inner_iterations
2121 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2122 true, GSI_SAME_STMT);
2123 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2124 if (TYPE_UNSIGNED (itype)
2125 && fd->loops[i].cond_code == GT_EXPR)
2126 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2127 fold_build1 (NEGATE_EXPR, itype, t),
2128 fold_build1 (NEGATE_EXPR, itype, step));
2129 else
2130 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2131 tree factor
2132 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2133 true, GSI_SAME_STMT);
2134 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2135 build_one_cst (itype));
2136 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2137 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2138 t = fold_build2 (MULT_EXPR, itype, factor, t);
2139 t = fold_build2 (PLUS_EXPR, itype,
2140 fold_build2 (MULT_EXPR, itype, outer_niters,
2141 first_inner_iterations), t);
2142 expand_omp_build_assign (gsi_p: &gsi2, to: counts[fd->last_nonrect],
2143 fold_convert (type, t));
2144
2145 basic_block bb3 = create_empty_bb (bb1);
2146 add_bb_to_loop (bb3, bb1->loop_father);
2147
2148 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2149 e->probability = profile_probability::unlikely ().guessed ();
2150
2151 gsi2 = gsi_after_labels (bb: bb3);
2152 cond_stmt = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2153 lhs: n1e, rhs: n2e);
2154 e = split_block (bb3, cond_stmt);
2155 e->flags = EDGE_TRUE_VALUE;
2156 e->probability = profile_probability::likely ().guessed ();
2157 basic_block bb4 = e->dest;
2158
2159 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2160 ne->probability = e->probability.invert ();
2161
2162 basic_block bb5 = create_empty_bb (bb2);
2163 add_bb_to_loop (bb5, bb2->loop_father);
2164
2165 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2166 ne->probability = profile_probability::unlikely ().guessed ();
2167
2168 for (int j = 0; j < 2; j++)
2169 {
2170 gsi2 = gsi_after_labels (bb: j ? bb5 : bb4);
2171 t = fold_build2 (MINUS_EXPR, itype,
2172 unshare_expr (fd->loops[i].n1),
2173 unshare_expr (fd->loops[i].n2));
2174 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2175 tree tem
2176 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2177 true, GSI_SAME_STMT);
2178 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2179 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2180 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2181 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2182 true, GSI_SAME_STMT);
2183 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2184 if (fd->loops[i].m1)
2185 {
2186 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2187 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2188 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2189 }
2190 else
2191 n1 = t;
2192 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2193 true, GSI_SAME_STMT);
2194 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2195 if (fd->loops[i].m2)
2196 {
2197 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2198 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2199 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2200 }
2201 else
2202 n2 = t;
2203 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2204 true, GSI_SAME_STMT);
2205 expand_omp_build_assign (gsi_p: &gsi2, to: j ? n2o : n1o, from: tem);
2206
2207 cond_stmt = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2208 lhs: n1, rhs: n2);
2209 e = split_block (gsi_bb (i: gsi2), cond_stmt);
2210 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2211 e->probability = profile_probability::unlikely ().guessed ();
2212 ne = make_edge (e->src, bb1,
2213 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2214 ne->probability = e->probability.invert ();
2215 gsi2 = gsi_after_labels (bb: e->dest);
2216
2217 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2218 expand_omp_build_assign (gsi_p: &gsi2, to: j ? n2o : n1o, from: t);
2219
2220 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2221 }
2222
2223 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2224 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2225 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2226
2227 if (fd->first_nonrect + 1 == fd->last_nonrect)
2228 {
2229 fd->first_inner_iterations = first_inner_iterations;
2230 fd->factor = factor;
2231 fd->adjn1 = n1o;
2232 }
2233 }
2234 else
2235 {
2236 /* Fallback implementation. Evaluate the loops with m1/m2
2237 non-NULL as well as their outer loops at runtime using temporaries
2238 instead of the original iteration variables, and in the
2239 body just bump the counter. */
2240 gimple_stmt_iterator gsi2 = *gsi;
2241 gsi_prev (i: &gsi2);
2242 e = split_block (entry_bb, gsi_stmt (i: gsi2));
2243 e = split_block (e->dest, (gimple *) NULL);
2244 basic_block cur_bb = e->src;
2245 basic_block next_bb = e->dest;
2246 entry_bb = e->dest;
2247 *gsi = gsi_after_labels (bb: entry_bb);
2248
2249 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2250 memset (s: vs, c: 0, n: fd->last_nonrect * sizeof (tree));
2251
2252 for (i = 0; i <= fd->last_nonrect; i++)
2253 {
2254 if (fd->loops[i].m1 == NULL_TREE
2255 && fd->loops[i].m2 == NULL_TREE
2256 && !fd->loops[i].non_rect_referenced)
2257 continue;
2258
2259 tree itype = TREE_TYPE (fd->loops[i].v);
2260
2261 gsi2 = gsi_after_labels (bb: cur_bb);
2262 tree n1, n2;
2263 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2264 if (fd->loops[i].m1 == NULL_TREE)
2265 n1 = t;
2266 else if (POINTER_TYPE_P (itype))
2267 {
2268 gcc_assert (integer_onep (fd->loops[i].m1));
2269 t = unshare_expr (fd->loops[i].n1);
2270 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2271 }
2272 else
2273 {
2274 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2275 n1 = fold_build2 (MULT_EXPR, itype,
2276 vs[i - fd->loops[i].outer], n1);
2277 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2278 }
2279 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2280 true, GSI_SAME_STMT);
2281 if (i < fd->last_nonrect)
2282 {
2283 vs[i] = create_tmp_reg (itype, ".it");
2284 expand_omp_build_assign (gsi_p: &gsi2, to: vs[i], from: n1);
2285 }
2286 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2287 if (fd->loops[i].m2 == NULL_TREE)
2288 n2 = t;
2289 else if (POINTER_TYPE_P (itype))
2290 {
2291 gcc_assert (integer_onep (fd->loops[i].m2));
2292 t = unshare_expr (fd->loops[i].n2);
2293 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2294 }
2295 else
2296 {
2297 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2298 n2 = fold_build2 (MULT_EXPR, itype,
2299 vs[i - fd->loops[i].outer], n2);
2300 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2301 }
2302 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2303 true, GSI_SAME_STMT);
2304 if (POINTER_TYPE_P (itype))
2305 itype = signed_type_for (itype);
2306 if (i == fd->last_nonrect)
2307 {
2308 gcond *cond_stmt
2309 = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2310 lhs: n1, rhs: n2);
2311 e = split_block (cur_bb, cond_stmt);
2312 e->flags = EDGE_TRUE_VALUE;
2313 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2314 e->probability = profile_probability::likely ().guessed ();
2315 ne->probability = e->probability.invert ();
2316 gsi2 = gsi_after_labels (bb: e->dest);
2317
2318 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2319 ? -1 : 1));
2320 t = fold_build2 (PLUS_EXPR, itype,
2321 fold_convert (itype, fd->loops[i].step), t);
2322 t = fold_build2 (PLUS_EXPR, itype, t,
2323 fold_convert (itype, n2));
2324 t = fold_build2 (MINUS_EXPR, itype, t,
2325 fold_convert (itype, n1));
2326 tree step = fold_convert (itype, fd->loops[i].step);
2327 if (TYPE_UNSIGNED (itype)
2328 && fd->loops[i].cond_code == GT_EXPR)
2329 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2330 fold_build1 (NEGATE_EXPR, itype, t),
2331 fold_build1 (NEGATE_EXPR, itype, step));
2332 else
2333 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2334 t = fold_convert (type, t);
2335 t = fold_build2 (PLUS_EXPR, type,
2336 counts[fd->last_nonrect], t);
2337 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2338 true, GSI_SAME_STMT);
2339 expand_omp_build_assign (gsi_p: &gsi2, to: counts[fd->last_nonrect], from: t);
2340 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2341 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2342 break;
2343 }
2344 e = split_block (cur_bb, last_nondebug_stmt (cur_bb));
2345
2346 basic_block new_cur_bb = create_empty_bb (cur_bb);
2347 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2348
2349 gsi2 = gsi_after_labels (bb: e->dest);
2350 tree step = fold_convert (itype,
2351 unshare_expr (fd->loops[i].step));
2352 if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2353 t = fold_build_pointer_plus (vs[i], step);
2354 else
2355 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2356 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2357 true, GSI_SAME_STMT);
2358 expand_omp_build_assign (gsi_p: &gsi2, to: vs[i], from: t);
2359
2360 ne = split_block (e->dest, last_nondebug_stmt (e->dest));
2361 gsi2 = gsi_after_labels (bb: ne->dest);
2362
2363 expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code, lhs: vs[i], rhs: n2);
2364 edge e3, e4;
2365 if (next_bb == entry_bb)
2366 {
2367 e3 = find_edge (ne->dest, next_bb);
2368 e3->flags = EDGE_FALSE_VALUE;
2369 }
2370 else
2371 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2372 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2373 e4->probability = profile_probability::likely ().guessed ();
2374 e3->probability = e4->probability.invert ();
2375 basic_block esrc = e->src;
2376 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2377 cur_bb = new_cur_bb;
2378 basic_block latch_bb = next_bb;
2379 next_bb = e->dest;
2380 remove_edge (e);
2381 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2382 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2383 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2384 }
2385 }
2386 t = NULL_TREE;
2387 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2388 if (!fd->loops[i].non_rect_referenced
2389 && fd->loops[i].m1 == NULL_TREE
2390 && fd->loops[i].m2 == NULL_TREE)
2391 {
2392 if (t == NULL_TREE)
2393 t = counts[i];
2394 else
2395 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2396 }
2397 if (t)
2398 {
2399 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2400 expand_omp_build_assign (gsi_p: gsi, to: counts[fd->last_nonrect], from: t);
2401 }
2402 if (!rect_count_seen)
2403 t = counts[fd->last_nonrect];
2404 else
2405 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2406 counts[fd->last_nonrect]);
2407 expand_omp_build_assign (gsi_p: gsi, to: fd->loop.n2, from: t);
2408 }
2409 else if (fd->non_rect)
2410 {
2411 tree t = fd->loop.n2;
2412 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2413 int non_rect_referenced = 0, non_rect = 0;
2414 for (i = 0; i < fd->collapse; i++)
2415 {
2416 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2417 && !integer_zerop (counts[i]))
2418 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2419 if (fd->loops[i].non_rect_referenced)
2420 non_rect_referenced++;
2421 if (fd->loops[i].m1 || fd->loops[i].m2)
2422 non_rect++;
2423 }
2424 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2425 counts[fd->last_nonrect] = t;
2426 }
2427}
2428
2429/* Helper function for expand_omp_{for_*,simd}. Generate code like:
2430 T = V;
2431 V3 = N31 + (T % count3) * STEP3;
2432 T = T / count3;
2433 V2 = N21 + (T % count2) * STEP2;
2434 T = T / count2;
2435 V1 = N11 + T * STEP1;
2436 if this loop doesn't have an inner loop construct combined with it.
2437 If it does have an inner loop construct combined with it and the
2438 iteration count isn't known constant, store values from counts array
2439 into its _looptemp_ temporaries instead.
2440 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2441 inclusive), use the count of all those loops together, and either
2442 find quadratic etc. equation roots, or as a fallback, do:
2443 COUNT = 0;
2444 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2445 for (tmpj = M21 * tmpi + N21;
2446 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2447 {
2448 int tmpk1 = M31 * tmpj + N31;
2449 int tmpk2 = M32 * tmpj + N32;
2450 if (tmpk1 COND3 tmpk2)
2451 {
2452 if (COND3 is <)
2453 adj = STEP3 - 1;
2454 else
2455 adj = STEP3 + 1;
2456 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2457 if (COUNT + temp > T)
2458 {
2459 V1 = tmpi;
2460 V2 = tmpj;
2461 V3 = tmpk1 + (T - COUNT) * STEP3;
2462 goto done;
2463 }
2464 else
2465 COUNT += temp;
2466 }
2467 }
2468 done:;
2469 but for optional innermost or outermost rectangular loops that aren't
2470 referenced by other loop expressions keep doing the division/modulo. */
2471
2472static void
2473expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2474 tree *counts, tree *nonrect_bounds,
2475 gimple *inner_stmt, tree startvar)
2476{
2477 int i;
2478 if (gimple_omp_for_combined_p (g: fd->for_stmt))
2479 {
2480 /* If fd->loop.n2 is constant, then no propagation of the counts
2481 is needed, they are constant. */
2482 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2483 return;
2484
2485 tree clauses = gimple_code (g: inner_stmt) != GIMPLE_OMP_FOR
2486 ? gimple_omp_taskreg_clauses (gs: inner_stmt)
2487 : gimple_omp_for_clauses (gs: inner_stmt);
2488 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2489 isn't supposed to be handled, as the inner loop doesn't
2490 use it. */
2491 tree innerc = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
2492 gcc_assert (innerc);
2493 int count = 0;
2494 if (fd->non_rect
2495 && fd->last_nonrect == fd->first_nonrect + 1
2496 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2497 count = 4;
2498 for (i = 0; i < fd->collapse + count; i++)
2499 {
2500 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2501 kind: OMP_CLAUSE__LOOPTEMP_);
2502 gcc_assert (innerc);
2503 if (i)
2504 {
2505 tree tem = OMP_CLAUSE_DECL (innerc);
2506 tree t;
2507 if (i < fd->collapse)
2508 t = counts[i];
2509 else
2510 switch (i - fd->collapse)
2511 {
2512 case 0: t = counts[0]; break;
2513 case 1: t = fd->first_inner_iterations; break;
2514 case 2: t = fd->factor; break;
2515 case 3: t = fd->adjn1; break;
2516 default: gcc_unreachable ();
2517 }
2518 t = fold_convert (TREE_TYPE (tem), t);
2519 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2520 false, GSI_CONTINUE_LINKING);
2521 gassign *stmt = gimple_build_assign (tem, t);
2522 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2523 }
2524 }
2525 return;
2526 }
2527
2528 tree type = TREE_TYPE (fd->loop.v);
2529 tree tem = create_tmp_reg (type, ".tem");
2530 gassign *stmt = gimple_build_assign (tem, startvar);
2531 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2532
2533 for (i = fd->collapse - 1; i >= 0; i--)
2534 {
2535 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2536 itype = vtype;
2537 if (POINTER_TYPE_P (vtype))
2538 itype = signed_type_for (vtype);
2539 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2540 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2541 else
2542 t = tem;
2543 if (i == fd->last_nonrect)
2544 {
2545 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2546 false, GSI_CONTINUE_LINKING);
2547 tree stopval = t;
2548 tree idx = create_tmp_reg (type, ".count");
2549 expand_omp_build_assign (gsi_p: gsi, to: idx,
2550 from: build_zero_cst (type), after: true);
2551 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2552 if (fd->first_nonrect + 1 == fd->last_nonrect
2553 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2554 || fd->first_inner_iterations)
2555 && (optab_handler (op: sqrt_optab, TYPE_MODE (double_type_node))
2556 != CODE_FOR_nothing)
2557 && !integer_zerop (fd->loop.n2))
2558 {
2559 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2560 tree itype = TREE_TYPE (fd->loops[i].v);
2561 tree first_inner_iterations = fd->first_inner_iterations;
2562 tree factor = fd->factor;
2563 gcond *cond_stmt
2564 = expand_omp_build_cond (gsi_p: gsi, code: NE_EXPR, lhs: factor,
2565 rhs: build_zero_cst (TREE_TYPE (factor)),
2566 after: true);
2567 edge e = split_block (gsi_bb (i: *gsi), cond_stmt);
2568 basic_block bb0 = e->src;
2569 e->flags = EDGE_TRUE_VALUE;
2570 e->probability = profile_probability::likely ();
2571 bb_triang_dom = bb0;
2572 *gsi = gsi_after_labels (bb: e->dest);
2573 tree slltype = long_long_integer_type_node;
2574 tree ulltype = long_long_unsigned_type_node;
2575 tree stopvalull = fold_convert (ulltype, stopval);
2576 stopvalull
2577 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2578 false, GSI_CONTINUE_LINKING);
2579 first_inner_iterations
2580 = fold_convert (slltype, first_inner_iterations);
2581 first_inner_iterations
2582 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2583 NULL_TREE, false,
2584 GSI_CONTINUE_LINKING);
2585 factor = fold_convert (slltype, factor);
2586 factor
2587 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2588 false, GSI_CONTINUE_LINKING);
2589 tree first_inner_iterationsd
2590 = fold_build1 (FLOAT_EXPR, double_type_node,
2591 first_inner_iterations);
2592 first_inner_iterationsd
2593 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2594 NULL_TREE, false,
2595 GSI_CONTINUE_LINKING);
2596 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2597 factor);
2598 factord = force_gimple_operand_gsi (gsi, factord, true,
2599 NULL_TREE, false,
2600 GSI_CONTINUE_LINKING);
2601 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2602 stopvalull);
2603 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2604 NULL_TREE, false,
2605 GSI_CONTINUE_LINKING);
2606 /* Temporarily disable flag_rounding_math, values will be
2607 decimal numbers divided by 2 and worst case imprecisions
2608 due to too large values ought to be caught later by the
2609 checks for fallback. */
2610 int save_flag_rounding_math = flag_rounding_math;
2611 flag_rounding_math = 0;
2612 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2613 build_real (double_type_node, dconst2));
2614 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2615 first_inner_iterationsd, t);
2616 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2617 GSI_CONTINUE_LINKING);
2618 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2619 build_real (double_type_node, dconst2));
2620 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2621 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2622 fold_build2 (MULT_EXPR, double_type_node,
2623 t3, t3));
2624 flag_rounding_math = save_flag_rounding_math;
2625 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2626 GSI_CONTINUE_LINKING);
2627 if (flag_exceptions
2628 && cfun->can_throw_non_call_exceptions
2629 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2630 {
2631 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2632 build_zero_cst (double_type_node));
2633 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2634 false, GSI_CONTINUE_LINKING);
2635 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2636 boolean_false_node,
2637 NULL_TREE, NULL_TREE);
2638 }
2639 else
2640 cond_stmt
2641 = gimple_build_cond (LT_EXPR, t,
2642 build_zero_cst (double_type_node),
2643 NULL_TREE, NULL_TREE);
2644 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2645 e = split_block (gsi_bb (i: *gsi), cond_stmt);
2646 basic_block bb1 = e->src;
2647 e->flags = EDGE_FALSE_VALUE;
2648 e->probability = profile_probability::very_likely ();
2649 *gsi = gsi_after_labels (bb: e->dest);
2650 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2651 tree sqrtr = create_tmp_var (double_type_node);
2652 gimple_call_set_lhs (gs: call, lhs: sqrtr);
2653 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2654 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2655 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2656 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2657 tree c = create_tmp_var (ulltype);
2658 tree d = create_tmp_var (ulltype);
2659 expand_omp_build_assign (gsi_p: gsi, to: c, from: t, after: true);
2660 t = fold_build2 (MINUS_EXPR, ulltype, c,
2661 build_one_cst (ulltype));
2662 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2663 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2664 t = fold_build2 (MULT_EXPR, ulltype,
2665 fold_convert (ulltype, fd->factor), t);
2666 tree t2
2667 = fold_build2 (MULT_EXPR, ulltype, c,
2668 fold_convert (ulltype,
2669 fd->first_inner_iterations));
2670 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2671 expand_omp_build_assign (gsi_p: gsi, to: d, from: t, after: true);
2672 t = fold_build2 (MULT_EXPR, ulltype,
2673 fold_convert (ulltype, fd->factor), c);
2674 t = fold_build2 (PLUS_EXPR, ulltype,
2675 t, fold_convert (ulltype,
2676 fd->first_inner_iterations));
2677 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2678 GSI_CONTINUE_LINKING);
2679 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2680 NULL_TREE, NULL_TREE);
2681 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2682 e = split_block (gsi_bb (i: *gsi), cond_stmt);
2683 basic_block bb2 = e->src;
2684 e->flags = EDGE_TRUE_VALUE;
2685 e->probability = profile_probability::very_likely ();
2686 *gsi = gsi_after_labels (bb: e->dest);
2687 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2688 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2689 GSI_CONTINUE_LINKING);
2690 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2691 NULL_TREE, NULL_TREE);
2692 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2693 e = split_block (gsi_bb (i: *gsi), cond_stmt);
2694 basic_block bb3 = e->src;
2695 e->flags = EDGE_FALSE_VALUE;
2696 e->probability = profile_probability::very_likely ();
2697 *gsi = gsi_after_labels (bb: e->dest);
2698 t = fold_convert (itype, c);
2699 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2700 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2701 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2702 GSI_CONTINUE_LINKING);
2703 expand_omp_build_assign (gsi_p: gsi, to: fd->loops[i - 1].v, from: t, after: true);
2704 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2705 t2 = fold_convert (itype, t2);
2706 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2707 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2708 if (fd->loops[i].m1)
2709 {
2710 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2711 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2712 }
2713 expand_omp_build_assign (gsi_p: gsi, to: fd->loops[i].v, from: t2, after: true);
2714 e = split_block (gsi_bb (i: *gsi), gsi_stmt (i: *gsi));
2715 bb_triang = e->src;
2716 *gsi = gsi_after_labels (bb: e->dest);
2717 remove_edge (e);
2718 e = make_edge (bb1, gsi_bb (i: *gsi), EDGE_TRUE_VALUE);
2719 e->probability = profile_probability::very_unlikely ();
2720 e = make_edge (bb2, gsi_bb (i: *gsi), EDGE_FALSE_VALUE);
2721 e->probability = profile_probability::very_unlikely ();
2722 e = make_edge (bb3, gsi_bb (i: *gsi), EDGE_TRUE_VALUE);
2723 e->probability = profile_probability::very_unlikely ();
2724
2725 basic_block bb4 = create_empty_bb (bb0);
2726 add_bb_to_loop (bb4, bb0->loop_father);
2727 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2728 e->probability = profile_probability::unlikely ();
2729 make_edge (bb4, gsi_bb (i: *gsi), EDGE_FALLTHRU);
2730 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2731 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (i: *gsi), bb0);
2732 gimple_stmt_iterator gsi2 = gsi_after_labels (bb: bb4);
2733 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2734 counts[i], counts[i - 1]);
2735 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2736 GSI_CONTINUE_LINKING);
2737 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2738 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2739 t = fold_convert (itype, t);
2740 t2 = fold_convert (itype, t2);
2741 t = fold_build2 (MULT_EXPR, itype, t,
2742 fold_convert (itype, fd->loops[i].step));
2743 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2744 t2 = fold_build2 (MULT_EXPR, itype, t2,
2745 fold_convert (itype, fd->loops[i - 1].step));
2746 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2747 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2748 false, GSI_CONTINUE_LINKING);
2749 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2750 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2751 if (fd->loops[i].m1)
2752 {
2753 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2754 fd->loops[i - 1].v);
2755 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2756 }
2757 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2758 false, GSI_CONTINUE_LINKING);
2759 stmt = gimple_build_assign (fd->loops[i].v, t);
2760 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2761 }
2762 /* Fallback implementation. Evaluate the loops in between
2763 (inclusive) fd->first_nonrect and fd->last_nonrect at
2764 runtime unsing temporaries instead of the original iteration
2765 variables, in the body just bump the counter and compare
2766 with the desired value. */
2767 gimple_stmt_iterator gsi2 = *gsi;
2768 basic_block entry_bb = gsi_bb (i: gsi2);
2769 edge e = split_block (entry_bb, gsi_stmt (i: gsi2));
2770 e = split_block (e->dest, (gimple *) NULL);
2771 basic_block dom_bb = NULL;
2772 basic_block cur_bb = e->src;
2773 basic_block next_bb = e->dest;
2774 entry_bb = e->dest;
2775 *gsi = gsi_after_labels (bb: entry_bb);
2776
2777 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2778 tree n1 = NULL_TREE, n2 = NULL_TREE;
2779 memset (s: vs, c: 0, n: fd->last_nonrect * sizeof (tree));
2780
2781 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2782 {
2783 tree itype = TREE_TYPE (fd->loops[j].v);
2784 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2785 && fd->loops[j].m2 == NULL_TREE
2786 && !fd->loops[j].non_rect_referenced);
2787 gsi2 = gsi_after_labels (bb: cur_bb);
2788 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2789 if (fd->loops[j].m1 == NULL_TREE)
2790 n1 = rect_p ? build_zero_cst (type) : t;
2791 else if (POINTER_TYPE_P (itype))
2792 {
2793 gcc_assert (integer_onep (fd->loops[j].m1));
2794 t = unshare_expr (fd->loops[j].n1);
2795 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2796 }
2797 else
2798 {
2799 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2800 n1 = fold_build2 (MULT_EXPR, itype,
2801 vs[j - fd->loops[j].outer], n1);
2802 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2803 }
2804 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2805 true, GSI_SAME_STMT);
2806 if (j < fd->last_nonrect)
2807 {
2808 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2809 expand_omp_build_assign (gsi_p: &gsi2, to: vs[j], from: n1);
2810 }
2811 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2812 if (fd->loops[j].m2 == NULL_TREE)
2813 n2 = rect_p ? counts[j] : t;
2814 else if (POINTER_TYPE_P (itype))
2815 {
2816 gcc_assert (integer_onep (fd->loops[j].m2));
2817 t = unshare_expr (fd->loops[j].n2);
2818 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2819 }
2820 else
2821 {
2822 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2823 n2 = fold_build2 (MULT_EXPR, itype,
2824 vs[j - fd->loops[j].outer], n2);
2825 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2826 }
2827 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2828 true, GSI_SAME_STMT);
2829 if (POINTER_TYPE_P (itype))
2830 itype = signed_type_for (itype);
2831 if (j == fd->last_nonrect)
2832 {
2833 gcond *cond_stmt
2834 = expand_omp_build_cond (gsi_p: &gsi2, code: fd->loops[i].cond_code,
2835 lhs: n1, rhs: n2);
2836 e = split_block (cur_bb, cond_stmt);
2837 e->flags = EDGE_TRUE_VALUE;
2838 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2839 e->probability = profile_probability::likely ().guessed ();
2840 ne->probability = e->probability.invert ();
2841 gsi2 = gsi_after_labels (bb: e->dest);
2842
2843 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2844 ? -1 : 1));
2845 t = fold_build2 (PLUS_EXPR, itype,
2846 fold_convert (itype, fd->loops[j].step), t);
2847 t = fold_build2 (PLUS_EXPR, itype, t,
2848 fold_convert (itype, n2));
2849 t = fold_build2 (MINUS_EXPR, itype, t,
2850 fold_convert (itype, n1));
2851 tree step = fold_convert (itype, fd->loops[j].step);
2852 if (TYPE_UNSIGNED (itype)
2853 && fd->loops[j].cond_code == GT_EXPR)
2854 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2855 fold_build1 (NEGATE_EXPR, itype, t),
2856 fold_build1 (NEGATE_EXPR, itype, step));
2857 else
2858 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2859 t = fold_convert (type, t);
2860 t = fold_build2 (PLUS_EXPR, type, idx, t);
2861 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2862 true, GSI_SAME_STMT);
2863 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2864 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2865 cond_stmt
2866 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2867 NULL_TREE);
2868 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2869 e = split_block (gsi_bb (i: gsi2), cond_stmt);
2870 e->flags = EDGE_TRUE_VALUE;
2871 e->probability = profile_probability::likely ().guessed ();
2872 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2873 ne->probability = e->probability.invert ();
2874 gsi2 = gsi_after_labels (bb: e->dest);
2875 expand_omp_build_assign (gsi_p: &gsi2, to: idx, from: t);
2876 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2877 break;
2878 }
2879 e = split_block (cur_bb, last_nondebug_stmt (cur_bb));
2880
2881 basic_block new_cur_bb = create_empty_bb (cur_bb);
2882 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2883
2884 gsi2 = gsi_after_labels (bb: e->dest);
2885 if (rect_p)
2886 t = fold_build2 (PLUS_EXPR, type, vs[j],
2887 build_one_cst (type));
2888 else
2889 {
2890 tree step
2891 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2892 if (POINTER_TYPE_P (vtype))
2893 t = fold_build_pointer_plus (vs[j], step);
2894 else
2895 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2896 }
2897 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2898 true, GSI_SAME_STMT);
2899 expand_omp_build_assign (gsi_p: &gsi2, to: vs[j], from: t);
2900
2901 edge ne = split_block (e->dest, last_nondebug_stmt (e->dest));
2902 gsi2 = gsi_after_labels (bb: ne->dest);
2903
2904 gcond *cond_stmt;
2905 if (next_bb == entry_bb)
2906 /* No need to actually check the outermost condition. */
2907 cond_stmt
2908 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2909 boolean_true_node,
2910 NULL_TREE, NULL_TREE);
2911 else
2912 cond_stmt
2913 = gimple_build_cond (rect_p ? LT_EXPR
2914 : fd->loops[j].cond_code,
2915 vs[j], n2, NULL_TREE, NULL_TREE);
2916 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2917 edge e3, e4;
2918 if (next_bb == entry_bb)
2919 {
2920 e3 = find_edge (ne->dest, next_bb);
2921 e3->flags = EDGE_FALSE_VALUE;
2922 dom_bb = ne->dest;
2923 }
2924 else
2925 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2926 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2927 e4->probability = profile_probability::likely ().guessed ();
2928 e3->probability = e4->probability.invert ();
2929 basic_block esrc = e->src;
2930 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2931 cur_bb = new_cur_bb;
2932 basic_block latch_bb = next_bb;
2933 next_bb = e->dest;
2934 remove_edge (e);
2935 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2936 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2937 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2938 }
2939 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2940 {
2941 tree vtype = TREE_TYPE (fd->loops[j].v);
2942 tree itype = vtype;
2943 if (POINTER_TYPE_P (itype))
2944 itype = signed_type_for (itype);
2945 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2946 && fd->loops[j].m2 == NULL_TREE
2947 && !fd->loops[j].non_rect_referenced);
2948 if (j == fd->last_nonrect)
2949 {
2950 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2951 t = fold_convert (itype, t);
2952 tree t2
2953 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2954 t = fold_build2 (MULT_EXPR, itype, t, t2);
2955 if (POINTER_TYPE_P (vtype))
2956 t = fold_build_pointer_plus (n1, t);
2957 else
2958 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2959 }
2960 else if (rect_p)
2961 {
2962 t = fold_convert (itype, vs[j]);
2963 t = fold_build2 (MULT_EXPR, itype, t,
2964 fold_convert (itype, fd->loops[j].step));
2965 if (POINTER_TYPE_P (vtype))
2966 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2967 else
2968 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2969 }
2970 else
2971 t = vs[j];
2972 t = force_gimple_operand_gsi (gsi, t, false,
2973 NULL_TREE, true,
2974 GSI_SAME_STMT);
2975 stmt = gimple_build_assign (fd->loops[j].v, t);
2976 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2977 }
2978 if (gsi_end_p (i: *gsi))
2979 *gsi = gsi_last_bb (bb: gsi_bb (i: *gsi));
2980 else
2981 gsi_prev (i: gsi);
2982 if (bb_triang)
2983 {
2984 e = split_block (gsi_bb (i: *gsi), gsi_stmt (i: *gsi));
2985 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2986 *gsi = gsi_after_labels (bb: e->dest);
2987 if (!gsi_end_p (i: *gsi))
2988 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2989 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2990 }
2991 }
2992 else
2993 {
2994 t = fold_convert (itype, t);
2995 t = fold_build2 (MULT_EXPR, itype, t,
2996 fold_convert (itype, fd->loops[i].step));
2997 if (POINTER_TYPE_P (vtype))
2998 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2999 else
3000 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3001 t = force_gimple_operand_gsi (gsi, t,
3002 DECL_P (fd->loops[i].v)
3003 && TREE_ADDRESSABLE (fd->loops[i].v),
3004 NULL_TREE, false,
3005 GSI_CONTINUE_LINKING);
3006 stmt = gimple_build_assign (fd->loops[i].v, t);
3007 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3008 }
3009 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3010 {
3011 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3012 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3013 false, GSI_CONTINUE_LINKING);
3014 stmt = gimple_build_assign (tem, t);
3015 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3016 }
3017 if (i == fd->last_nonrect)
3018 i = fd->first_nonrect;
3019 }
3020 if (fd->non_rect)
3021 for (i = 0; i <= fd->last_nonrect; i++)
3022 if (fd->loops[i].m2)
3023 {
3024 tree itype = TREE_TYPE (fd->loops[i].v);
3025
3026 tree t;
3027 if (POINTER_TYPE_P (itype))
3028 {
3029 gcc_assert (integer_onep (fd->loops[i].m2));
3030 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3031 unshare_expr (fd->loops[i].n2));
3032 }
3033 else
3034 {
3035 t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3036 t = fold_build2 (MULT_EXPR, itype,
3037 fd->loops[i - fd->loops[i].outer].v, t);
3038 t = fold_build2 (PLUS_EXPR, itype, t,
3039 fold_convert (itype,
3040 unshare_expr (fd->loops[i].n2)));
3041 }
3042 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3043 t = force_gimple_operand_gsi (gsi, t, false,
3044 NULL_TREE, false,
3045 GSI_CONTINUE_LINKING);
3046 stmt = gimple_build_assign (nonrect_bounds[i], t);
3047 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3048 }
3049}
3050
3051/* Helper function for expand_omp_for_*. Generate code like:
3052 L10:
3053 V3 += STEP3;
3054 if (V3 cond3 N32) goto BODY_BB; else goto L11;
3055 L11:
3056 V3 = N31;
3057 V2 += STEP2;
3058 if (V2 cond2 N22) goto BODY_BB; else goto L12;
3059 L12:
3060 V2 = N21;
3061 V1 += STEP1;
3062 goto BODY_BB;
3063 For non-rectangular loops, use temporaries stored in nonrect_bounds
3064 for the upper bounds if M?2 multiplier is present. Given e.g.
3065 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3066 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3067 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3068 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3069 do:
3070 L10:
3071 V4 += STEP4;
3072 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3073 L11:
3074 V4 = N41 + M41 * V2; // This can be left out if the loop
3075 // refers to the immediate parent loop
3076 V3 += STEP3;
3077 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3078 L12:
3079 V3 = N31;
3080 V2 += STEP2;
3081 if (V2 cond2 N22) goto L120; else goto L13;
3082 L120:
3083 V4 = N41 + M41 * V2;
3084 NONRECT_BOUND4 = N42 + M42 * V2;
3085 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3086 L13:
3087 V2 = N21;
3088 V1 += STEP1;
3089 goto L120; */
3090
3091static basic_block
3092extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3093 basic_block cont_bb, basic_block body_bb)
3094{
3095 basic_block last_bb, bb, collapse_bb = NULL;
3096 int i;
3097 gimple_stmt_iterator gsi;
3098 edge e;
3099 tree t;
3100 gimple *stmt;
3101
3102 last_bb = cont_bb;
3103 for (i = fd->collapse - 1; i >= 0; i--)
3104 {
3105 tree vtype = TREE_TYPE (fd->loops[i].v);
3106
3107 bb = create_empty_bb (last_bb);
3108 add_bb_to_loop (bb, last_bb->loop_father);
3109 gsi = gsi_start_bb (bb);
3110
3111 if (i < fd->collapse - 1)
3112 {
3113 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3114 e->probability = profile_probability::guessed_always () / 8;
3115
3116 struct omp_for_data_loop *l = &fd->loops[i + 1];
3117 if (l->m1 == NULL_TREE || l->outer != 1)
3118 {
3119 t = l->n1;
3120 if (l->m1)
3121 {
3122 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3123 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3124 t);
3125 else
3126 {
3127 tree t2
3128 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3129 fd->loops[i + 1 - l->outer].v, l->m1);
3130 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3131 }
3132 }
3133 t = force_gimple_operand_gsi (&gsi, t,
3134 DECL_P (l->v)
3135 && TREE_ADDRESSABLE (l->v),
3136 NULL_TREE, false,
3137 GSI_CONTINUE_LINKING);
3138 stmt = gimple_build_assign (l->v, t);
3139 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3140 }
3141 }
3142 else
3143 collapse_bb = bb;
3144
3145 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3146
3147 if (POINTER_TYPE_P (vtype))
3148 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3149 else
3150 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3151 t = force_gimple_operand_gsi (&gsi, t,
3152 DECL_P (fd->loops[i].v)
3153 && TREE_ADDRESSABLE (fd->loops[i].v),
3154 NULL_TREE, false, GSI_CONTINUE_LINKING);
3155 stmt = gimple_build_assign (fd->loops[i].v, t);
3156 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3157
3158 if (fd->loops[i].non_rect_referenced)
3159 {
3160 basic_block update_bb = NULL, prev_bb = NULL;
3161 for (int j = i + 1; j <= fd->last_nonrect; j++)
3162 if (j - fd->loops[j].outer == i)
3163 {
3164 tree n1, n2;
3165 struct omp_for_data_loop *l = &fd->loops[j];
3166 basic_block this_bb = create_empty_bb (last_bb);
3167 add_bb_to_loop (this_bb, last_bb->loop_father);
3168 gimple_stmt_iterator gsi2 = gsi_start_bb (bb: this_bb);
3169 if (prev_bb)
3170 {
3171 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3172 e->probability
3173 = profile_probability::guessed_always ().apply_scale (num: 7,
3174 den: 8);
3175 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3176 }
3177 if (l->m1)
3178 {
3179 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3180 t = fold_build_pointer_plus (fd->loops[i].v, l->n1);
3181 else
3182 {
3183 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3184 fd->loops[i].v);
3185 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3186 t, l->n1);
3187 }
3188 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3189 false,
3190 GSI_CONTINUE_LINKING);
3191 stmt = gimple_build_assign (l->v, n1);
3192 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3193 n1 = l->v;
3194 }
3195 else
3196 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3197 NULL_TREE, false,
3198 GSI_CONTINUE_LINKING);
3199 if (l->m2)
3200 {
3201 if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3202 t = fold_build_pointer_plus (fd->loops[i].v, l->n2);
3203 else
3204 {
3205 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3206 fd->loops[i].v);
3207 t = fold_build2 (PLUS_EXPR,
3208 TREE_TYPE (nonrect_bounds[j]),
3209 t, unshare_expr (l->n2));
3210 }
3211 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3212 false,
3213 GSI_CONTINUE_LINKING);
3214 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3215 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3216 n2 = nonrect_bounds[j];
3217 }
3218 else
3219 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3220 true, NULL_TREE, false,
3221 GSI_CONTINUE_LINKING);
3222 gcond *cond_stmt
3223 = gimple_build_cond (l->cond_code, n1, n2,
3224 NULL_TREE, NULL_TREE);
3225 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3226 if (update_bb == NULL)
3227 update_bb = this_bb;
3228 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3229 e->probability = profile_probability::guessed_always () / 8;
3230 if (prev_bb == NULL)
3231 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3232 prev_bb = this_bb;
3233 }
3234 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3235 e->probability
3236 = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
3237 body_bb = update_bb;
3238 }
3239
3240 if (i > 0)
3241 {
3242 if (fd->loops[i].m2)
3243 t = nonrect_bounds[i];
3244 else
3245 t = unshare_expr (fd->loops[i].n2);
3246 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3247 false, GSI_CONTINUE_LINKING);
3248 tree v = fd->loops[i].v;
3249 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3250 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3251 false, GSI_CONTINUE_LINKING);
3252 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3253 stmt = gimple_build_cond_empty (cond: t);
3254 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3255 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3256 expand_omp_regimplify_p, NULL, NULL)
3257 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3258 expand_omp_regimplify_p, NULL, NULL))
3259 gimple_regimplify_operands (stmt, &gsi);
3260 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3261 e->probability = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
3262 }
3263 else
3264 make_edge (bb, body_bb, EDGE_FALLTHRU);
3265 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3266 last_bb = bb;
3267 }
3268
3269 return collapse_bb;
3270}
3271
3272/* Expand #pragma omp ordered depend(source). */
3273
3274static void
3275expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3276 tree *counts, location_t loc)
3277{
3278 enum built_in_function source_ix
3279 = fd->iter_type == long_integer_type_node
3280 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3281 gimple *g
3282 = gimple_build_call (builtin_decl_explicit (fncode: source_ix), 1,
3283 build_fold_addr_expr (counts[fd->ordered]));
3284 gimple_set_location (g, location: loc);
3285 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3286}
3287
3288/* Expand a single depend from #pragma omp ordered depend(sink:...). */
3289
3290static void
3291expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3292 tree *counts, tree c, location_t loc,
3293 basic_block cont_bb)
3294{
3295 auto_vec<tree, 10> args;
3296 enum built_in_function sink_ix
3297 = fd->iter_type == long_integer_type_node
3298 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3299 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3300 int i;
3301 gimple_stmt_iterator gsi2 = *gsi;
3302 bool warned_step = false;
3303
3304 if (deps == NULL)
3305 {
3306 /* Handle doacross(sink: omp_cur_iteration - 1). */
3307 gsi_prev (i: &gsi2);
3308 edge e1 = split_block (gsi_bb (i: gsi2), gsi_stmt (i: gsi2));
3309 edge e2 = split_block_after_labels (e1->dest);
3310 gsi2 = gsi_after_labels (bb: e1->dest);
3311 *gsi = gsi_last_bb (bb: e1->src);
3312 gimple_stmt_iterator gsi3 = *gsi;
3313
3314 if (counts[fd->collapse - 1])
3315 {
3316 gcc_assert (fd->collapse == 1);
3317 t = counts[fd->collapse - 1];
3318 }
3319 else if (fd->collapse > 1)
3320 t = fd->loop.v;
3321 else
3322 {
3323 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3324 fd->loops[0].v, fd->loops[0].n1);
3325 t = fold_convert (fd->iter_type, t);
3326 }
3327
3328 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
3329 false, GSI_CONTINUE_LINKING);
3330 gsi_insert_after (gsi, gimple_build_cond (NE_EXPR, t,
3331 build_zero_cst (TREE_TYPE (t)),
3332 NULL_TREE, NULL_TREE),
3333 GSI_NEW_STMT);
3334
3335 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3336 build_minus_one_cst (TREE_TYPE (t)));
3337 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3338 true, GSI_SAME_STMT);
3339 args.safe_push (obj: t);
3340 for (i = fd->collapse; i < fd->ordered; i++)
3341 {
3342 t = counts[fd->ordered + 2 + (i - fd->collapse)];
3343 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3344 build_minus_one_cst (TREE_TYPE (t)));
3345 t = fold_convert (fd->iter_type, t);
3346 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3347 true, GSI_SAME_STMT);
3348 args.safe_push (obj: t);
3349 }
3350
3351 gimple *g = gimple_build_call_vec (builtin_decl_explicit (fncode: sink_ix),
3352 args);
3353 gimple_set_location (g, location: loc);
3354 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3355
3356 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3357 e3->probability = profile_probability::guessed_always () / 8;
3358 e1->probability = e3->probability.invert ();
3359 e1->flags = EDGE_TRUE_VALUE;
3360 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3361
3362 if (fd->ordered > fd->collapse && cont_bb)
3363 {
3364 if (counts[fd->ordered + 1] == NULL_TREE)
3365 counts[fd->ordered + 1]
3366 = create_tmp_var (boolean_type_node, ".first");
3367
3368 edge e4;
3369 if (gsi_end_p (i: gsi3))
3370 e4 = split_block_after_labels (e1->src);
3371 else
3372 {
3373 gsi_prev (i: &gsi3);
3374 e4 = split_block (gsi_bb (i: gsi3), gsi_stmt (i: gsi3));
3375 }
3376 gsi3 = gsi_last_bb (bb: e4->src);
3377
3378 gsi_insert_after (&gsi3,
3379 gimple_build_cond (NE_EXPR,
3380 counts[fd->ordered + 1],
3381 boolean_false_node,
3382 NULL_TREE, NULL_TREE),
3383 GSI_NEW_STMT);
3384
3385 edge e5 = make_edge (e4->src, e2->dest, EDGE_FALSE_VALUE);
3386 e4->probability = profile_probability::guessed_always () / 8;
3387 e5->probability = e4->probability.invert ();
3388 e4->flags = EDGE_TRUE_VALUE;
3389 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e4->src);
3390 }
3391
3392 *gsi = gsi_after_labels (bb: e2->dest);
3393 return;
3394 }
3395 for (i = 0; i < fd->ordered; i++)
3396 {
3397 tree step = NULL_TREE;
3398 off = TREE_PURPOSE (deps);
3399 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3400 {
3401 step = TREE_OPERAND (off, 1);
3402 off = TREE_OPERAND (off, 0);
3403 }
3404 if (!integer_zerop (off))
3405 {
3406 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3407 || fd->loops[i].cond_code == GT_EXPR);
3408 bool forward = fd->loops[i].cond_code == LT_EXPR;
3409 if (step)
3410 {
3411 /* Non-simple Fortran DO loops. If step is variable,
3412 we don't know at compile even the direction, so can't
3413 warn. */
3414 if (TREE_CODE (step) != INTEGER_CST)
3415 break;
3416 forward = tree_int_cst_sgn (step) != -1;
3417 }
3418 if (forward ^ OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3419 warning_at (loc, 0, "%qs clause with %<sink%> modifier "
3420 "waiting for lexically later iteration",
3421 OMP_CLAUSE_DOACROSS_DEPEND (c)
3422 ? "depend" : "doacross");
3423 break;
3424 }
3425 deps = TREE_CHAIN (deps);
3426 }
3427 /* If all offsets corresponding to the collapsed loops are zero,
3428 this depend clause can be ignored. FIXME: but there is still a
3429 flush needed. We need to emit one __sync_synchronize () for it
3430 though (perhaps conditionally)? Solve this together with the
3431 conservative dependence folding optimization.
3432 if (i >= fd->collapse)
3433 return; */
3434
3435 deps = OMP_CLAUSE_DECL (c);
3436 gsi_prev (i: &gsi2);
3437 edge e1 = split_block (gsi_bb (i: gsi2), gsi_stmt (i: gsi2));
3438 edge e2 = split_block_after_labels (e1->dest);
3439
3440 gsi2 = gsi_after_labels (bb: e1->dest);
3441 *gsi = gsi_last_bb (bb: e1->src);
3442 for (i = 0; i < fd->ordered; i++)
3443 {
3444 tree itype = TREE_TYPE (fd->loops[i].v);
3445 tree step = NULL_TREE;
3446 tree orig_off = NULL_TREE;
3447 if (POINTER_TYPE_P (itype))
3448 itype = sizetype;
3449 if (i)
3450 deps = TREE_CHAIN (deps);
3451 off = TREE_PURPOSE (deps);
3452 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3453 {
3454 step = TREE_OPERAND (off, 1);
3455 off = TREE_OPERAND (off, 0);
3456 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3457 && integer_onep (fd->loops[i].step)
3458 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3459 }
3460 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3461 if (step)
3462 {
3463 off = fold_convert_loc (loc, itype, off);
3464 orig_off = off;
3465 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3466 }
3467
3468 if (integer_zerop (off))
3469 t = boolean_true_node;
3470 else
3471 {
3472 tree a;
3473 tree co = fold_convert_loc (loc, itype, off);
3474 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3475 {
3476 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3477 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3478 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3479 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3480 co);
3481 }
3482 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3483 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3484 fd->loops[i].v, co);
3485 else
3486 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3487 fd->loops[i].v, co);
3488 if (step)
3489 {
3490 tree t1, t2;
3491 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3492 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3493 fd->loops[i].n1);
3494 else
3495 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3496 fd->loops[i].n2);
3497 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3498 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3499 fd->loops[i].n2);
3500 else
3501 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3502 fd->loops[i].n1);
3503 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3504 step, build_int_cst (TREE_TYPE (step), 0));
3505 if (TREE_CODE (step) != INTEGER_CST)
3506 {
3507 t1 = unshare_expr (t1);
3508 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3509 false, GSI_CONTINUE_LINKING);
3510 t2 = unshare_expr (t2);
3511 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3512 false, GSI_CONTINUE_LINKING);
3513 }
3514 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3515 t, t2, t1);
3516 }
3517 else if (fd->loops[i].cond_code == LT_EXPR)
3518 {
3519 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3520 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3521 fd->loops[i].n1);
3522 else
3523 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3524 fd->loops[i].n2);
3525 }
3526 else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3527 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3528 fd->loops[i].n2);
3529 else
3530 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3531 fd->loops[i].n1);
3532 }
3533 if (cond)
3534 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3535 else
3536 cond = t;
3537
3538 off = fold_convert_loc (loc, itype, off);
3539
3540 if (step
3541 || (fd->loops[i].cond_code == LT_EXPR
3542 ? !integer_onep (fd->loops[i].step)
3543 : !integer_minus_onep (fd->loops[i].step)))
3544 {
3545 if (step == NULL_TREE
3546 && TYPE_UNSIGNED (itype)
3547 && fd->loops[i].cond_code == GT_EXPR)
3548 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3549 fold_build1_loc (loc, NEGATE_EXPR, itype,
3550 s));
3551 else
3552 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3553 orig_off ? orig_off : off, s);
3554 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3555 build_int_cst (itype, 0));
3556 if (integer_zerop (t) && !warned_step)
3557 {
3558 warning_at (loc, 0, "%qs clause with %<sink%> modifier "
3559 "refers to iteration never in the iteration "
3560 "space",
3561 OMP_CLAUSE_DOACROSS_DEPEND (c)
3562 ? "depend" : "doacross");
3563 warned_step = true;
3564 }
3565 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3566 cond, t);
3567 }
3568
3569 if (i <= fd->collapse - 1 && fd->collapse > 1)
3570 t = fd->loop.v;
3571 else if (counts[i])
3572 t = counts[i];
3573 else
3574 {
3575 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3576 fd->loops[i].v, fd->loops[i].n1);
3577 t = fold_convert_loc (loc, fd->iter_type, t);
3578 }
3579 if (step)
3580 /* We have divided off by step already earlier. */;
3581 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3582 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3583 fold_build1_loc (loc, NEGATE_EXPR, itype,
3584 s));
3585 else
3586 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3587 if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3588 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3589 off = fold_convert_loc (loc, fd->iter_type, off);
3590 if (i <= fd->collapse - 1 && fd->collapse > 1)
3591 {
3592 if (i)
3593 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3594 off);
3595 if (i < fd->collapse - 1)
3596 {
3597 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3598 counts[i]);
3599 continue;
3600 }
3601 }
3602 off = unshare_expr (off);
3603 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3604 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3605 true, GSI_SAME_STMT);
3606 args.safe_push (obj: t);
3607 }
3608 gimple *g = gimple_build_call_vec (builtin_decl_explicit (fncode: sink_ix), args);
3609 gimple_set_location (g, location: loc);
3610 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3611
3612 cond = unshare_expr (cond);
3613 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3614 GSI_CONTINUE_LINKING);
3615 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3616 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3617 e3->probability = profile_probability::guessed_always () / 8;
3618 e1->probability = e3->probability.invert ();
3619 e1->flags = EDGE_TRUE_VALUE;
3620 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3621
3622 *gsi = gsi_after_labels (bb: e2->dest);
3623}
3624
3625/* Expand all #pragma omp ordered depend(source) and
3626 #pragma omp ordered depend(sink:...) constructs in the current
3627 #pragma omp for ordered(n) region. */
3628
3629static void
3630expand_omp_ordered_source_sink (struct omp_region *region,
3631 struct omp_for_data *fd, tree *counts,
3632 basic_block cont_bb)
3633{
3634 struct omp_region *inner;
3635 int i;
3636 for (i = fd->collapse - 1; i < fd->ordered; i++)
3637 if (i == fd->collapse - 1 && fd->collapse > 1)
3638 counts[i] = NULL_TREE;
3639 else if (i >= fd->collapse && !cont_bb)
3640 counts[i] = build_zero_cst (fd->iter_type);
3641 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3642 && integer_onep (fd->loops[i].step))
3643 counts[i] = NULL_TREE;
3644 else
3645 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3646 tree atype
3647 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3648 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3649 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3650 counts[fd->ordered + 1] = NULL_TREE;
3651
3652 for (inner = region->inner; inner; inner = inner->next)
3653 if (inner->type == GIMPLE_OMP_ORDERED)
3654 {
3655 gomp_ordered *ord_stmt = inner->ord_stmt;
3656 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3657 location_t loc = gimple_location (g: ord_stmt);
3658 tree c;
3659 for (c = gimple_omp_ordered_clauses (ord_stmt);
3660 c; c = OMP_CLAUSE_CHAIN (c))
3661 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SOURCE)
3662 break;
3663 if (c)
3664 expand_omp_ordered_source (gsi: &gsi, fd, counts, loc);
3665 for (c = gimple_omp_ordered_clauses (ord_stmt);
3666 c; c = OMP_CLAUSE_CHAIN (c))
3667 if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SINK)
3668 expand_omp_ordered_sink (gsi: &gsi, fd, counts, c, loc, cont_bb);
3669 gsi_remove (&gsi, true);
3670 }
3671}
3672
3673/* Wrap the body into fd->ordered - fd->collapse loops that aren't
3674 collapsed. */
3675
3676static basic_block
3677expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3678 basic_block cont_bb, basic_block body_bb,
3679 basic_block l0_bb, bool ordered_lastprivate)
3680{
3681 if (fd->ordered == fd->collapse)
3682 return cont_bb;
3683
3684 if (!cont_bb)
3685 {
3686 gimple_stmt_iterator gsi = gsi_after_labels (bb: body_bb);
3687 for (int i = fd->collapse; i < fd->ordered; i++)
3688 {
3689 tree type = TREE_TYPE (fd->loops[i].v);
3690 tree n1 = fold_convert (type, fd->loops[i].n1);
3691 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: n1);
3692 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3693 size_int (i - fd->collapse + 1),
3694 NULL_TREE, NULL_TREE);
3695 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: build_zero_cst (fd->iter_type));
3696 }
3697 return NULL;
3698 }
3699
3700 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3701 {
3702 tree t, type = TREE_TYPE (fd->loops[i].v);
3703 gimple_stmt_iterator gsi = gsi_after_labels (bb: body_bb);
3704 if (counts[fd->ordered + 1] && i == fd->collapse)
3705 expand_omp_build_assign (gsi_p: &gsi, to: counts[fd->ordered + 1],
3706 boolean_true_node);
3707 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v,
3708 fold_convert (type, fd->loops[i].n1));
3709 if (counts[i])
3710 expand_omp_build_assign (gsi_p: &gsi, to: counts[i],
3711 from: build_zero_cst (fd->iter_type));
3712 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3713 size_int (i - fd->collapse + 1),
3714 NULL_TREE, NULL_TREE);
3715 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: build_zero_cst (fd->iter_type));
3716 if (!gsi_end_p (i: gsi))
3717 gsi_prev (i: &gsi);
3718 else
3719 gsi = gsi_last_bb (bb: body_bb);
3720 edge e1 = split_block (body_bb, gsi_stmt (i: gsi));
3721 basic_block new_body = e1->dest;
3722 if (body_bb == cont_bb)
3723 cont_bb = new_body;
3724 edge e2 = NULL;
3725 basic_block new_header;
3726 if (EDGE_COUNT (cont_bb->preds) > 0)
3727 {
3728 gsi = gsi_last_bb (bb: cont_bb);
3729 if (POINTER_TYPE_P (type))
3730 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3731 else
3732 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3733 fold_convert (type, fd->loops[i].step));
3734 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: t);
3735 if (counts[i])
3736 {
3737 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3738 build_int_cst (fd->iter_type, 1));
3739 expand_omp_build_assign (gsi_p: &gsi, to: counts[i], from: t);
3740 t = counts[i];
3741 }
3742 else
3743 {
3744 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3745 fd->loops[i].v, fd->loops[i].n1);
3746 t = fold_convert (fd->iter_type, t);
3747 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3748 true, GSI_SAME_STMT);
3749 }
3750 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3751 size_int (i - fd->collapse + 1),
3752 NULL_TREE, NULL_TREE);
3753 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: t);
3754 if (counts[fd->ordered + 1] && i == fd->ordered - 1)
3755 expand_omp_build_assign (gsi_p: &gsi, to: counts[fd->ordered + 1],
3756 boolean_false_node);
3757 gsi_prev (i: &gsi);
3758 e2 = split_block (cont_bb, gsi_stmt (i: gsi));
3759 new_header = e2->dest;
3760 }
3761 else
3762 new_header = cont_bb;
3763 gsi = gsi_after_labels (bb: new_header);
3764 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3765 true, GSI_SAME_STMT);
3766 tree n2
3767 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3768 true, NULL_TREE, true, GSI_SAME_STMT);
3769 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3770 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_NEW_STMT);
3771 edge e3 = split_block (new_header, gsi_stmt (i: gsi));
3772 cont_bb = e3->dest;
3773 remove_edge (e1);
3774 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3775 e3->flags = EDGE_FALSE_VALUE;
3776 e3->probability = profile_probability::guessed_always () / 8;
3777 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3778 e1->probability = e3->probability.invert ();
3779
3780 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3781 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3782
3783 if (e2)
3784 {
3785 class loop *loop = alloc_loop ();
3786 loop->header = new_header;
3787 loop->latch = e2->src;
3788 add_loop (loop, l0_bb->loop_father);
3789 }
3790 }
3791
3792 /* If there are any lastprivate clauses and it is possible some loops
3793 might have zero iterations, ensure all the decls are initialized,
3794 otherwise we could crash evaluating C++ class iterators with lastprivate
3795 clauses. */
3796 bool need_inits = false;
3797 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3798 if (need_inits)
3799 {
3800 tree type = TREE_TYPE (fd->loops[i].v);
3801 gimple_stmt_iterator gsi = gsi_after_labels (bb: body_bb);
3802 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v,
3803 fold_convert (type, fd->loops[i].n1));
3804 }
3805 else
3806 {
3807 tree type = TREE_TYPE (fd->loops[i].v);
3808 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3809 boolean_type_node,
3810 fold_convert (type, fd->loops[i].n1),
3811 fold_convert (type, fd->loops[i].n2));
3812 if (!integer_onep (this_cond))
3813 need_inits = true;
3814 }
3815
3816 return cont_bb;
3817}
3818
3819/* A subroutine of expand_omp_for. Generate code for a parallel
3820 loop with any schedule. Given parameters:
3821
3822 for (V = N1; V cond N2; V += STEP) BODY;
3823
3824 where COND is "<" or ">", we generate pseudocode
3825
3826 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3827 if (more) goto L0; else goto L3;
3828 L0:
3829 V = istart0;
3830 iend = iend0;
3831 L1:
3832 BODY;
3833 V += STEP;
3834 if (V cond iend) goto L1; else goto L2;
3835 L2:
3836 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3837 L3:
3838
3839 If this is a combined omp parallel loop, instead of the call to
3840 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3841 If this is gimple_omp_for_combined_p loop, then instead of assigning
3842 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3843 inner GIMPLE_OMP_FOR and V += STEP; and
3844 if (V cond iend) goto L1; else goto L2; are removed.
3845
3846 For collapsed loops, given parameters:
3847 collapse(3)
3848 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3849 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3850 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3851 BODY;
3852
3853 we generate pseudocode
3854
3855 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3856 if (cond3 is <)
3857 adj = STEP3 - 1;
3858 else
3859 adj = STEP3 + 1;
3860 count3 = (adj + N32 - N31) / STEP3;
3861 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3862 if (cond2 is <)
3863 adj = STEP2 - 1;
3864 else
3865 adj = STEP2 + 1;
3866 count2 = (adj + N22 - N21) / STEP2;
3867 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3868 if (cond1 is <)
3869 adj = STEP1 - 1;
3870 else
3871 adj = STEP1 + 1;
3872 count1 = (adj + N12 - N11) / STEP1;
3873 count = count1 * count2 * count3;
3874 goto Z1;
3875 Z0:
3876 count = 0;
3877 Z1:
3878 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3879 if (more) goto L0; else goto L3;
3880 L0:
3881 V = istart0;
3882 T = V;
3883 V3 = N31 + (T % count3) * STEP3;
3884 T = T / count3;
3885 V2 = N21 + (T % count2) * STEP2;
3886 T = T / count2;
3887 V1 = N11 + T * STEP1;
3888 iend = iend0;
3889 L1:
3890 BODY;
3891 V += 1;
3892 if (V < iend) goto L10; else goto L2;
3893 L10:
3894 V3 += STEP3;
3895 if (V3 cond3 N32) goto L1; else goto L11;
3896 L11:
3897 V3 = N31;
3898 V2 += STEP2;
3899 if (V2 cond2 N22) goto L1; else goto L12;
3900 L12:
3901 V2 = N21;
3902 V1 += STEP1;
3903 goto L1;
3904 L2:
3905 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3906 L3:
3907
3908 */
3909
3910static void
3911expand_omp_for_generic (struct omp_region *region,
3912 struct omp_for_data *fd,
3913 enum built_in_function start_fn,
3914 enum built_in_function next_fn,
3915 tree sched_arg,
3916 gimple *inner_stmt)
3917{
3918 tree type, istart0, iend0, iend;
3919 tree t, vmain, vback, bias = NULL_TREE;
3920 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3921 basic_block l2_bb = NULL, l3_bb = NULL;
3922 gimple_stmt_iterator gsi;
3923 gassign *assign_stmt;
3924 bool in_combined_parallel = is_combined_parallel (region);
3925 bool broken_loop = region->cont == NULL;
3926 edge e, ne;
3927 tree *counts = NULL;
3928 int i;
3929 bool ordered_lastprivate = false;
3930
3931 gcc_assert (!broken_loop || !in_combined_parallel);
3932 gcc_assert (fd->iter_type == long_integer_type_node
3933 || !in_combined_parallel);
3934
3935 entry_bb = region->entry;
3936 cont_bb = region->cont;
3937 collapse_bb = NULL;
3938 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3939 gcc_assert (broken_loop
3940 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3941 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3942 l1_bb = single_succ (bb: l0_bb);
3943 if (!broken_loop)
3944 {
3945 l2_bb = create_empty_bb (cont_bb);
3946 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3947 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3948 == l1_bb));
3949 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3950 }
3951 else
3952 l2_bb = NULL;
3953 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3954 exit_bb = region->exit;
3955
3956 gsi = gsi_last_nondebug_bb (bb: entry_bb);
3957
3958 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3959 if (fd->ordered
3960 && omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
3961 kind: OMP_CLAUSE_LASTPRIVATE))
3962 ordered_lastprivate = false;
3963 tree reductions = NULL_TREE;
3964 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3965 tree memv = NULL_TREE;
3966 if (fd->lastprivate_conditional)
3967 {
3968 tree c = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
3969 kind: OMP_CLAUSE__CONDTEMP_);
3970 if (fd->have_pointer_condtemp)
3971 condtemp = OMP_CLAUSE_DECL (c);
3972 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), kind: OMP_CLAUSE__CONDTEMP_);
3973 cond_var = OMP_CLAUSE_DECL (c);
3974 }
3975 if (sched_arg)
3976 {
3977 if (fd->have_reductemp)
3978 {
3979 tree c = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
3980 kind: OMP_CLAUSE__REDUCTEMP_);
3981 reductions = OMP_CLAUSE_DECL (c);
3982 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3983 gimple *g = SSA_NAME_DEF_STMT (reductions);
3984 reductions = gimple_assign_rhs1 (gs: g);
3985 OMP_CLAUSE_DECL (c) = reductions;
3986 entry_bb = gimple_bb (g);
3987 edge e = split_block (entry_bb, g);
3988 if (region->entry == entry_bb)
3989 region->entry = e->dest;
3990 gsi = gsi_last_bb (bb: entry_bb);
3991 }
3992 else
3993 reductions = null_pointer_node;
3994 if (fd->have_pointer_condtemp)
3995 {
3996 tree type = TREE_TYPE (condtemp);
3997 memv = create_tmp_var (type);
3998 TREE_ADDRESSABLE (memv) = 1;
3999 unsigned HOST_WIDE_INT sz
4000 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4001 sz *= fd->lastprivate_conditional;
4002 expand_omp_build_assign (gsi_p: &gsi, to: memv, from: build_int_cst (type, sz),
4003 after: false);
4004 mem = build_fold_addr_expr (memv);
4005 }
4006 else
4007 mem = null_pointer_node;
4008 }
4009 if (fd->collapse > 1 || fd->ordered)
4010 {
4011 int first_zero_iter1 = -1, first_zero_iter2 = -1;
4012 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
4013
4014 counts = XALLOCAVEC (tree, fd->ordered
4015 ? fd->ordered + 2
4016 + (fd->ordered - fd->collapse)
4017 : fd->collapse);
4018 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
4019 zero_iter1_bb, first_zero_iter1,
4020 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
4021
4022 if (zero_iter1_bb)
4023 {
4024 /* Some counts[i] vars might be uninitialized if
4025 some loop has zero iterations. But the body shouldn't
4026 be executed in that case, so just avoid uninit warnings. */
4027 for (i = first_zero_iter1;
4028 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
4029 if (SSA_VAR_P (counts[i]))
4030 suppress_warning (counts[i], OPT_Wuninitialized);
4031 gsi_prev (i: &gsi);
4032 e = split_block (entry_bb, gsi_stmt (i: gsi));
4033 entry_bb = e->dest;
4034 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
4035 gsi = gsi_last_nondebug_bb (bb: entry_bb);
4036 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4037 get_immediate_dominator (CDI_DOMINATORS,
4038 zero_iter1_bb));
4039 }
4040 if (zero_iter2_bb)
4041 {
4042 /* Some counts[i] vars might be uninitialized if
4043 some loop has zero iterations. But the body shouldn't
4044 be executed in that case, so just avoid uninit warnings. */
4045 for (i = first_zero_iter2; i < fd->ordered; i++)
4046 if (SSA_VAR_P (counts[i]))
4047 suppress_warning (counts[i], OPT_Wuninitialized);
4048 if (zero_iter1_bb)
4049 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4050 else
4051 {
4052 gsi_prev (i: &gsi);
4053 e = split_block (entry_bb, gsi_stmt (i: gsi));
4054 entry_bb = e->dest;
4055 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4056 gsi = gsi_last_nondebug_bb (bb: entry_bb);
4057 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4058 get_immediate_dominator
4059 (CDI_DOMINATORS, zero_iter2_bb));
4060 }
4061 }
4062 if (fd->collapse == 1)
4063 {
4064 counts[0] = fd->loop.n2;
4065 fd->loop = fd->loops[0];
4066 }
4067 }
4068
4069 type = TREE_TYPE (fd->loop.v);
4070 istart0 = create_tmp_var (fd->iter_type, ".istart0");
4071 iend0 = create_tmp_var (fd->iter_type, ".iend0");
4072 TREE_ADDRESSABLE (istart0) = 1;
4073 TREE_ADDRESSABLE (iend0) = 1;
4074
4075 /* See if we need to bias by LLONG_MIN. */
4076 if (fd->iter_type == long_long_unsigned_type_node
4077 && TREE_CODE (type) == INTEGER_TYPE
4078 && !TYPE_UNSIGNED (type)
4079 && fd->ordered == 0)
4080 {
4081 tree n1, n2;
4082
4083 if (fd->loop.cond_code == LT_EXPR)
4084 {
4085 n1 = fd->loop.n1;
4086 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4087 }
4088 else
4089 {
4090 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4091 n2 = fd->loop.n1;
4092 }
4093 if (TREE_CODE (n1) != INTEGER_CST
4094 || TREE_CODE (n2) != INTEGER_CST
4095 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4096 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4097 }
4098
4099 gimple_stmt_iterator gsif = gsi;
4100 gsi_prev (i: &gsif);
4101
4102 tree arr = NULL_TREE;
4103 if (in_combined_parallel)
4104 {
4105 gcc_assert (fd->ordered == 0);
4106 /* In a combined parallel loop, emit a call to
4107 GOMP_loop_foo_next. */
4108 t = build_call_expr (builtin_decl_explicit (fncode: next_fn), 2,
4109 build_fold_addr_expr (istart0),
4110 build_fold_addr_expr (iend0));
4111 }
4112 else
4113 {
4114 tree t0, t1, t2, t3, t4;
4115 /* If this is not a combined parallel loop, emit a call to
4116 GOMP_loop_foo_start in ENTRY_BB. */
4117 t4 = build_fold_addr_expr (iend0);
4118 t3 = build_fold_addr_expr (istart0);
4119 if (fd->ordered)
4120 {
4121 t0 = build_int_cst (unsigned_type_node,
4122 fd->ordered - fd->collapse + 1);
4123 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4124 fd->ordered
4125 - fd->collapse + 1),
4126 ".omp_counts");
4127 DECL_NAMELESS (arr) = 1;
4128 TREE_ADDRESSABLE (arr) = 1;
4129 TREE_STATIC (arr) = 1;
4130 vec<constructor_elt, va_gc> *v;
4131 vec_alloc (v, nelems: fd->ordered - fd->collapse + 1);
4132 int idx;
4133
4134 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4135 {
4136 tree c;
4137 if (idx == 0 && fd->collapse > 1)
4138 c = fd->loop.n2;
4139 else
4140 c = counts[idx + fd->collapse - 1];
4141 tree purpose = size_int (idx);
4142 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4143 if (TREE_CODE (c) != INTEGER_CST)
4144 TREE_STATIC (arr) = 0;
4145 }
4146
4147 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4148 if (!TREE_STATIC (arr))
4149 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4150 void_type_node, arr),
4151 true, NULL_TREE, true, GSI_SAME_STMT);
4152 t1 = build_fold_addr_expr (arr);
4153 t2 = NULL_TREE;
4154 }
4155 else
4156 {
4157 t2 = fold_convert (fd->iter_type, fd->loop.step);
4158 t1 = fd->loop.n2;
4159 t0 = fd->loop.n1;
4160 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
4161 {
4162 tree innerc
4163 = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
4164 kind: OMP_CLAUSE__LOOPTEMP_);
4165 gcc_assert (innerc);
4166 t0 = OMP_CLAUSE_DECL (innerc);
4167 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4168 kind: OMP_CLAUSE__LOOPTEMP_);
4169 gcc_assert (innerc);
4170 t1 = OMP_CLAUSE_DECL (innerc);
4171 }
4172 if (POINTER_TYPE_P (TREE_TYPE (t0))
4173 && TYPE_PRECISION (TREE_TYPE (t0))
4174 != TYPE_PRECISION (fd->iter_type))
4175 {
4176 /* Avoid casting pointers to integer of a different size. */
4177 tree itype = signed_type_for (type);
4178 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4179 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4180 }
4181 else
4182 {
4183 t1 = fold_convert (fd->iter_type, t1);
4184 t0 = fold_convert (fd->iter_type, t0);
4185 }
4186 if (bias)
4187 {
4188 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4189 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4190 }
4191 }
4192 if (fd->iter_type == long_integer_type_node || fd->ordered)
4193 {
4194 if (fd->chunk_size)
4195 {
4196 t = fold_convert (fd->iter_type, fd->chunk_size);
4197 t = omp_adjust_chunk_size (chunk_size: t, simd_schedule: fd->simd_schedule);
4198 if (sched_arg)
4199 {
4200 if (fd->ordered)
4201 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4202 8, t0, t1, sched_arg, t, t3, t4,
4203 reductions, mem);
4204 else
4205 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4206 9, t0, t1, t2, sched_arg, t, t3, t4,
4207 reductions, mem);
4208 }
4209 else if (fd->ordered)
4210 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4211 5, t0, t1, t, t3, t4);
4212 else
4213 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4214 6, t0, t1, t2, t, t3, t4);
4215 }
4216 else if (fd->ordered)
4217 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4218 4, t0, t1, t3, t4);
4219 else
4220 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4221 5, t0, t1, t2, t3, t4);
4222 }
4223 else
4224 {
4225 tree t5;
4226 tree c_bool_type;
4227 tree bfn_decl;
4228
4229 /* The GOMP_loop_ull_*start functions have additional boolean
4230 argument, true for < loops and false for > loops.
4231 In Fortran, the C bool type can be different from
4232 boolean_type_node. */
4233 bfn_decl = builtin_decl_explicit (fncode: start_fn);
4234 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4235 t5 = build_int_cst (c_bool_type,
4236 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4237 if (fd->chunk_size)
4238 {
4239 tree bfn_decl = builtin_decl_explicit (fncode: start_fn);
4240 t = fold_convert (fd->iter_type, fd->chunk_size);
4241 t = omp_adjust_chunk_size (chunk_size: t, simd_schedule: fd->simd_schedule);
4242 if (sched_arg)
4243 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4244 t, t3, t4, reductions, mem);
4245 else
4246 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4247 }
4248 else
4249 t = build_call_expr (builtin_decl_explicit (fncode: start_fn),
4250 6, t5, t0, t1, t2, t3, t4);
4251 }
4252 }
4253 if (TREE_TYPE (t) != boolean_type_node)
4254 t = fold_build2 (NE_EXPR, boolean_type_node,
4255 t, build_int_cst (TREE_TYPE (t), 0));
4256 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4257 true, GSI_SAME_STMT);
4258 if (arr && !TREE_STATIC (arr))
4259 {
4260 tree clobber = build_clobber (TREE_TYPE (arr));
4261 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4262 GSI_SAME_STMT);
4263 }
4264 if (fd->have_pointer_condtemp)
4265 expand_omp_build_assign (gsi_p: &gsi, to: condtemp, from: memv, after: false);
4266 if (fd->have_reductemp)
4267 {
4268 gimple *g = gsi_stmt (i: gsi);
4269 gsi_remove (&gsi, true);
4270 release_ssa_name (name: gimple_assign_lhs (gs: g));
4271
4272 entry_bb = region->entry;
4273 gsi = gsi_last_nondebug_bb (bb: entry_bb);
4274
4275 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4276 }
4277 gsi_insert_after (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
4278
4279 /* Remove the GIMPLE_OMP_FOR statement. */
4280 gsi_remove (&gsi, true);
4281
4282 if (gsi_end_p (i: gsif))
4283 gsif = gsi_after_labels (bb: gsi_bb (i: gsif));
4284 gsi_next (i: &gsif);
4285
4286 /* Iteration setup for sequential loop goes in L0_BB. */
4287 tree startvar = fd->loop.v;
4288 tree endvar = NULL_TREE;
4289
4290 if (gimple_omp_for_combined_p (g: fd->for_stmt))
4291 {
4292 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4293 && gimple_omp_for_kind (inner_stmt)
4294 == GF_OMP_FOR_KIND_SIMD);
4295 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: inner_stmt),
4296 kind: OMP_CLAUSE__LOOPTEMP_);
4297 gcc_assert (innerc);
4298 startvar = OMP_CLAUSE_DECL (innerc);
4299 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4300 kind: OMP_CLAUSE__LOOPTEMP_);
4301 gcc_assert (innerc);
4302 endvar = OMP_CLAUSE_DECL (innerc);
4303 }
4304
4305 gsi = gsi_start_bb (bb: l0_bb);
4306 t = istart0;
4307 if (fd->ordered && fd->collapse == 1)
4308 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4309 fold_convert (fd->iter_type, fd->loop.step));
4310 else if (bias)
4311 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4312 if (fd->ordered && fd->collapse == 1)
4313 {
4314 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4315 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4316 fd->loop.n1, fold_convert (sizetype, t));
4317 else
4318 {
4319 t = fold_convert (TREE_TYPE (startvar), t);
4320 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4321 fd->loop.n1, t);
4322 }
4323 }
4324 else
4325 {
4326 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4327 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4328 t = fold_convert (TREE_TYPE (startvar), t);
4329 }
4330 t = force_gimple_operand_gsi (&gsi, t,
4331 DECL_P (startvar)
4332 && TREE_ADDRESSABLE (startvar),
4333 NULL_TREE, false, GSI_CONTINUE_LINKING);
4334 assign_stmt = gimple_build_assign (startvar, t);
4335 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4336 if (cond_var)
4337 {
4338 tree itype = TREE_TYPE (cond_var);
4339 /* For lastprivate(conditional:) itervar, we need some iteration
4340 counter that starts at unsigned non-zero and increases.
4341 Prefer as few IVs as possible, so if we can use startvar
4342 itself, use that, or startvar + constant (those would be
4343 incremented with step), and as last resort use the s0 + 1
4344 incremented by 1. */
4345 if ((fd->ordered && fd->collapse == 1)
4346 || bias
4347 || POINTER_TYPE_P (type)
4348 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4349 || fd->loop.cond_code != LT_EXPR)
4350 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4351 build_int_cst (itype, 1));
4352 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4353 t = fold_convert (itype, t);
4354 else
4355 {
4356 tree c = fold_convert (itype, fd->loop.n1);
4357 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4358 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4359 }
4360 t = force_gimple_operand_gsi (&gsi, t, false,
4361 NULL_TREE, false, GSI_CONTINUE_LINKING);
4362 assign_stmt = gimple_build_assign (cond_var, t);
4363 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4364 }
4365
4366 t = iend0;
4367 if (fd->ordered && fd->collapse == 1)
4368 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4369 fold_convert (fd->iter_type, fd->loop.step));
4370 else if (bias)
4371 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4372 if (fd->ordered && fd->collapse == 1)
4373 {
4374 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4375 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4376 fd->loop.n1, fold_convert (sizetype, t));
4377 else
4378 {
4379 t = fold_convert (TREE_TYPE (startvar), t);
4380 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4381 fd->loop.n1, t);
4382 }
4383 }
4384 else
4385 {
4386 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4387 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4388 t = fold_convert (TREE_TYPE (startvar), t);
4389 }
4390 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4391 false, GSI_CONTINUE_LINKING);
4392 if (endvar)
4393 {
4394 assign_stmt = gimple_build_assign (endvar, iend);
4395 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4396 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4397 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4398 else
4399 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4400 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4401 }
4402 /* Handle linear clause adjustments. */
4403 tree itercnt = NULL_TREE;
4404 if (gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4405 for (tree c = gimple_omp_for_clauses (gs: fd->for_stmt);
4406 c; c = OMP_CLAUSE_CHAIN (c))
4407 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4408 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4409 {
4410 tree d = OMP_CLAUSE_DECL (c);
4411 tree t = d, a, dest;
4412 if (omp_privatize_by_reference (decl: t))
4413 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4414 tree type = TREE_TYPE (t);
4415 if (POINTER_TYPE_P (type))
4416 type = sizetype;
4417 dest = unshare_expr (t);
4418 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4419 expand_omp_build_assign (gsi_p: &gsif, to: v, from: t);
4420 if (itercnt == NULL_TREE)
4421 {
4422 itercnt = startvar;
4423 tree n1 = fd->loop.n1;
4424 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4425 {
4426 itercnt
4427 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4428 itercnt);
4429 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4430 }
4431 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4432 itercnt, n1);
4433 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4434 itercnt, fd->loop.step);
4435 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4436 NULL_TREE, false,
4437 GSI_CONTINUE_LINKING);
4438 }
4439 a = fold_build2 (MULT_EXPR, type,
4440 fold_convert (type, itercnt),
4441 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4442 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4443 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4444 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4445 false, GSI_CONTINUE_LINKING);
4446 expand_omp_build_assign (gsi_p: &gsi, to: dest, from: t, after: true);
4447 }
4448 if (fd->collapse > 1)
4449 expand_omp_for_init_vars (fd, gsi: &gsi, counts, NULL, inner_stmt, startvar);
4450
4451 if (fd->ordered)
4452 {
4453 /* Until now, counts array contained number of iterations or
4454 variable containing it for ith loop. From now on, we usually need
4455 those counts only for collapsed loops, and only for the 2nd
4456 till the last collapsed one. Move those one element earlier,
4457 we'll use counts[fd->collapse - 1] for the first source/sink
4458 iteration counter and so on and counts[fd->ordered]
4459 as the array holding the current counter values for
4460 depend(source). For doacross(sink:omp_cur_iteration - 1) we need
4461 the counts from fd->collapse to fd->ordered - 1; make a copy of
4462 those to counts[fd->ordered + 2] and onwards.
4463 counts[fd->ordered + 1] can be a flag whether it is the first
4464 iteration with a new collapsed counter (used only if
4465 fd->ordered > fd->collapse). */
4466 if (fd->ordered > fd->collapse)
4467 memcpy (dest: counts + fd->ordered + 2, src: counts + fd->collapse,
4468 n: (fd->ordered - fd->collapse) * sizeof (counts[0]));
4469 if (fd->collapse > 1)
4470 memmove (dest: counts, src: counts + 1, n: (fd->collapse - 1) * sizeof (counts[0]));
4471 if (broken_loop)
4472 {
4473 int i;
4474 for (i = fd->collapse; i < fd->ordered; i++)
4475 {
4476 tree type = TREE_TYPE (fd->loops[i].v);
4477 tree this_cond
4478 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4479 fold_convert (type, fd->loops[i].n1),
4480 fold_convert (type, fd->loops[i].n2));
4481 if (!integer_onep (this_cond))
4482 break;
4483 }
4484 if (i < fd->ordered)
4485 {
4486 if (entry_bb->loop_father != l0_bb->loop_father)
4487 {
4488 remove_bb_from_loops (l0_bb);
4489 add_bb_to_loop (l0_bb, entry_bb->loop_father);
4490 gcc_assert (single_succ (l0_bb) == l1_bb);
4491 }
4492 cont_bb
4493 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4494 add_bb_to_loop (cont_bb, l0_bb->loop_father);
4495 gimple_stmt_iterator gsi = gsi_after_labels (bb: cont_bb);
4496 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4497 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4498 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4499 make_edge (cont_bb, l1_bb, 0);
4500 l2_bb = create_empty_bb (cont_bb);
4501 broken_loop = false;
4502 }
4503 }
4504 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4505 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, body_bb: l1_bb,
4506 l0_bb, ordered_lastprivate);
4507 if (counts[fd->collapse - 1])
4508 {
4509 gcc_assert (fd->collapse == 1);
4510 gsi = gsi_last_bb (bb: l0_bb);
4511 expand_omp_build_assign (gsi_p: &gsi, to: counts[fd->collapse - 1],
4512 from: istart0, after: true);
4513 if (cont_bb)
4514 {
4515 gsi = gsi_last_bb (bb: cont_bb);
4516 t = fold_build2 (PLUS_EXPR, fd->iter_type,
4517 counts[fd->collapse - 1],
4518 build_int_cst (fd->iter_type, 1));
4519 expand_omp_build_assign (gsi_p: &gsi, to: counts[fd->collapse - 1], from: t);
4520 tree aref = build4 (ARRAY_REF, fd->iter_type,
4521 counts[fd->ordered], size_zero_node,
4522 NULL_TREE, NULL_TREE);
4523 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: counts[fd->collapse - 1]);
4524 }
4525 t = counts[fd->collapse - 1];
4526 }
4527 else if (fd->collapse > 1)
4528 t = fd->loop.v;
4529 else
4530 {
4531 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4532 fd->loops[0].v, fd->loops[0].n1);
4533 t = fold_convert (fd->iter_type, t);
4534 }
4535 gsi = gsi_last_bb (bb: l0_bb);
4536 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4537 size_zero_node, NULL_TREE, NULL_TREE);
4538 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4539 false, GSI_CONTINUE_LINKING);
4540 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: t, after: true);
4541 }
4542
4543 if (!broken_loop)
4544 {
4545 /* Code to control the increment and predicate for the sequential
4546 loop goes in the CONT_BB. */
4547 gsi = gsi_last_nondebug_bb (bb: cont_bb);
4548 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
4549 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4550 vmain = gimple_omp_continue_control_use (cont_stmt);
4551 vback = gimple_omp_continue_control_def (cont_stmt);
4552
4553 if (cond_var)
4554 {
4555 tree itype = TREE_TYPE (cond_var);
4556 tree t2;
4557 if ((fd->ordered && fd->collapse == 1)
4558 || bias
4559 || POINTER_TYPE_P (type)
4560 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4561 || fd->loop.cond_code != LT_EXPR)
4562 t2 = build_int_cst (itype, 1);
4563 else
4564 t2 = fold_convert (itype, fd->loop.step);
4565 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4566 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4567 NULL_TREE, true, GSI_SAME_STMT);
4568 assign_stmt = gimple_build_assign (cond_var, t2);
4569 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4570 }
4571
4572 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
4573 {
4574 if (POINTER_TYPE_P (type))
4575 t = fold_build_pointer_plus (vmain, fd->loop.step);
4576 else
4577 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4578 t = force_gimple_operand_gsi (&gsi, t,
4579 DECL_P (vback)
4580 && TREE_ADDRESSABLE (vback),
4581 NULL_TREE, true, GSI_SAME_STMT);
4582 assign_stmt = gimple_build_assign (vback, t);
4583 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4584
4585 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4586 {
4587 tree tem;
4588 if (fd->collapse > 1)
4589 tem = fd->loop.v;
4590 else
4591 {
4592 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4593 fd->loops[0].v, fd->loops[0].n1);
4594 tem = fold_convert (fd->iter_type, tem);
4595 }
4596 tree aref = build4 (ARRAY_REF, fd->iter_type,
4597 counts[fd->ordered], size_zero_node,
4598 NULL_TREE, NULL_TREE);
4599 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4600 true, GSI_SAME_STMT);
4601 expand_omp_build_assign (gsi_p: &gsi, to: aref, from: tem);
4602 }
4603
4604 t = build2 (fd->loop.cond_code, boolean_type_node,
4605 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4606 iend);
4607 gcond *cond_stmt = gimple_build_cond_empty (cond: t);
4608 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4609 }
4610
4611 /* Remove GIMPLE_OMP_CONTINUE. */
4612 gsi_remove (&gsi, true);
4613
4614 if (fd->collapse > 1 && !gimple_omp_for_combined_p (g: fd->for_stmt))
4615 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb: l1_bb);
4616
4617 /* Emit code to get the next parallel iteration in L2_BB. */
4618 gsi = gsi_start_bb (bb: l2_bb);
4619
4620 t = build_call_expr (builtin_decl_explicit (fncode: next_fn), 2,
4621 build_fold_addr_expr (istart0),
4622 build_fold_addr_expr (iend0));
4623 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4624 false, GSI_CONTINUE_LINKING);
4625 if (TREE_TYPE (t) != boolean_type_node)
4626 t = fold_build2 (NE_EXPR, boolean_type_node,
4627 t, build_int_cst (TREE_TYPE (t), 0));
4628 gcond *cond_stmt = gimple_build_cond_empty (cond: t);
4629 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4630 }
4631
4632 /* Add the loop cleanup function. */
4633 gsi = gsi_last_nondebug_bb (bb: exit_bb);
4634 if (gimple_omp_return_nowait_p (g: gsi_stmt (i: gsi)))
4635 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_NOWAIT);
4636 else if (gimple_omp_return_lhs (g: gsi_stmt (i: gsi)))
4637 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_CANCEL);
4638 else
4639 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END);
4640 gcall *call_stmt = gimple_build_call (t, 0);
4641 if (fd->ordered)
4642 {
4643 tree arr = counts[fd->ordered];
4644 tree clobber = build_clobber (TREE_TYPE (arr));
4645 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4646 GSI_SAME_STMT);
4647 }
4648 if (gimple_omp_return_lhs (g: gsi_stmt (i: gsi)))
4649 {
4650 gimple_call_set_lhs (gs: call_stmt, lhs: gimple_omp_return_lhs (g: gsi_stmt (i: gsi)));
4651 if (fd->have_reductemp)
4652 {
4653 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4654 gimple_call_lhs (gs: call_stmt));
4655 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4656 }
4657 }
4658 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4659 gsi_remove (&gsi, true);
4660
4661 /* Connect the new blocks. */
4662 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4663 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4664
4665 if (!broken_loop)
4666 {
4667 gimple_seq phis;
4668
4669 e = find_edge (cont_bb, l3_bb);
4670 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4671
4672 phis = phi_nodes (bb: l3_bb);
4673 for (gsi = gsi_start (seq&: phis); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
4674 {
4675 gimple *phi = gsi_stmt (i: gsi);
4676 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4677 PHI_ARG_DEF_FROM_EDGE (phi, e));
4678 }
4679 remove_edge (e);
4680
4681 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4682 e = find_edge (cont_bb, l1_bb);
4683 if (e == NULL)
4684 {
4685 e = BRANCH_EDGE (cont_bb);
4686 gcc_assert (single_succ (e->dest) == l1_bb);
4687 }
4688 if (gimple_omp_for_combined_p (g: fd->for_stmt))
4689 {
4690 remove_edge (e);
4691 e = NULL;
4692 }
4693 else if (fd->collapse > 1)
4694 {
4695 remove_edge (e);
4696 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4697 }
4698 else
4699 e->flags = EDGE_TRUE_VALUE;
4700 if (e)
4701 {
4702 e->probability = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
4703 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4704 }
4705 else
4706 {
4707 e = find_edge (cont_bb, l2_bb);
4708 e->flags = EDGE_FALLTHRU;
4709 }
4710 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4711
4712 if (gimple_in_ssa_p (cfun))
4713 {
4714 /* Add phis to the outer loop that connect to the phis in the inner,
4715 original loop, and move the loop entry value of the inner phi to
4716 the loop entry value of the outer phi. */
4717 gphi_iterator psi;
4718 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (i: psi); gsi_next (i: &psi))
4719 {
4720 location_t locus;
4721 gphi *nphi;
4722 gphi *exit_phi = psi.phi ();
4723
4724 if (virtual_operand_p (op: gimple_phi_result (gs: exit_phi)))
4725 continue;
4726
4727 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4728 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4729
4730 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4731 edge latch_to_l1 = find_edge (latch, l1_bb);
4732 gphi *inner_phi
4733 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4734
4735 tree t = gimple_phi_result (gs: exit_phi);
4736 tree new_res = copy_ssa_name (var: t, NULL);
4737 nphi = create_phi_node (new_res, l0_bb);
4738
4739 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4740 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4741 locus = gimple_phi_arg_location_from_edge (phi: inner_phi, e: l0_to_l1);
4742 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4743 add_phi_arg (nphi, t, entry_to_l0, locus);
4744
4745 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4746 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4747
4748 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4749 }
4750 }
4751
4752 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4753 recompute_dominator (CDI_DOMINATORS, l2_bb));
4754 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4755 recompute_dominator (CDI_DOMINATORS, l3_bb));
4756 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4757 recompute_dominator (CDI_DOMINATORS, l0_bb));
4758 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4759 recompute_dominator (CDI_DOMINATORS, l1_bb));
4760
4761 /* We enter expand_omp_for_generic with a loop. This original loop may
4762 have its own loop struct, or it may be part of an outer loop struct
4763 (which may be the fake loop). */
4764 class loop *outer_loop = entry_bb->loop_father;
4765 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4766
4767 add_bb_to_loop (l2_bb, outer_loop);
4768
4769 /* We've added a new loop around the original loop. Allocate the
4770 corresponding loop struct. */
4771 class loop *new_loop = alloc_loop ();
4772 new_loop->header = l0_bb;
4773 new_loop->latch = l2_bb;
4774 add_loop (new_loop, outer_loop);
4775
4776 /* Allocate a loop structure for the original loop unless we already
4777 had one. */
4778 if (!orig_loop_has_loop_struct
4779 && !gimple_omp_for_combined_p (g: fd->for_stmt))
4780 {
4781 class loop *orig_loop = alloc_loop ();
4782 orig_loop->header = l1_bb;
4783 /* The loop may have multiple latches. */
4784 add_loop (orig_loop, new_loop);
4785 }
4786 }
4787}
4788
4789/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4790 compute needed allocation size. If !ALLOC of team allocations,
4791 if ALLOC of thread allocation. SZ is the initial needed size for
4792 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4793 CNT number of elements of each array, for !ALLOC this is
4794 omp_get_num_threads (), for ALLOC number of iterations handled by the
4795 current thread. If PTR is non-NULL, it is the start of the allocation
4796 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4797 clauses pointers to the corresponding arrays. */
4798
4799static tree
4800expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4801 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4802 gimple_stmt_iterator *gsi, bool alloc)
4803{
4804 tree eltsz = NULL_TREE;
4805 unsigned HOST_WIDE_INT preval = 0;
4806 if (ptr && sz)
4807 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4808 ptr, size_int (sz));
4809 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4810 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4811 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4812 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4813 {
4814 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4815 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4816 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4817 {
4818 unsigned HOST_WIDE_INT szl
4819 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4820 szl = least_bit_hwi (x: szl);
4821 if (szl)
4822 al = MIN (al, szl);
4823 }
4824 if (ptr == NULL_TREE)
4825 {
4826 if (eltsz == NULL_TREE)
4827 eltsz = TYPE_SIZE_UNIT (pointee_type);
4828 else
4829 eltsz = size_binop (PLUS_EXPR, eltsz,
4830 TYPE_SIZE_UNIT (pointee_type));
4831 }
4832 if (preval == 0 && al <= alloc_align)
4833 {
4834 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4835 sz += diff;
4836 if (diff && ptr)
4837 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4838 ptr, size_int (diff));
4839 }
4840 else if (al > preval)
4841 {
4842 if (ptr)
4843 {
4844 ptr = fold_convert (pointer_sized_int_node, ptr);
4845 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4846 build_int_cst (pointer_sized_int_node,
4847 al - 1));
4848 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4849 build_int_cst (pointer_sized_int_node,
4850 -(HOST_WIDE_INT) al));
4851 ptr = fold_convert (ptr_type_node, ptr);
4852 }
4853 else
4854 sz += al - 1;
4855 }
4856 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4857 preval = al;
4858 else
4859 preval = 1;
4860 if (ptr)
4861 {
4862 expand_omp_build_assign (gsi_p: gsi, OMP_CLAUSE_DECL (c), from: ptr, after: false);
4863 ptr = OMP_CLAUSE_DECL (c);
4864 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4865 size_binop (MULT_EXPR, cnt,
4866 TYPE_SIZE_UNIT (pointee_type)));
4867 }
4868 }
4869
4870 if (ptr == NULL_TREE)
4871 {
4872 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4873 if (sz)
4874 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4875 return eltsz;
4876 }
4877 else
4878 return ptr;
4879}
4880
4881/* Return the last _looptemp_ clause if one has been created for
4882 lastprivate on distribute parallel for{, simd} or taskloop.
4883 FD is the loop data and INNERC should be the second _looptemp_
4884 clause (the one holding the end of the range).
4885 This is followed by collapse - 1 _looptemp_ clauses for the
4886 counts[1] and up, and for triangular loops followed by 4
4887 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4888 one factor and one adjn1). After this there is optionally one
4889 _looptemp_ clause that this function returns. */
4890
4891static tree
4892find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4893{
4894 gcc_assert (innerc);
4895 int count = fd->collapse - 1;
4896 if (fd->non_rect
4897 && fd->last_nonrect == fd->first_nonrect + 1
4898 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4899 count += 4;
4900 for (int i = 0; i < count; i++)
4901 {
4902 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4903 kind: OMP_CLAUSE__LOOPTEMP_);
4904 gcc_assert (innerc);
4905 }
4906 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4907 kind: OMP_CLAUSE__LOOPTEMP_);
4908}
4909
4910/* A subroutine of expand_omp_for. Generate code for a parallel
4911 loop with static schedule and no specified chunk size. Given
4912 parameters:
4913
4914 for (V = N1; V cond N2; V += STEP) BODY;
4915
4916 where COND is "<" or ">", we generate pseudocode
4917
4918 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4919 if (cond is <)
4920 adj = STEP - 1;
4921 else
4922 adj = STEP + 1;
4923 if ((__typeof (V)) -1 > 0 && cond is >)
4924 n = -(adj + N2 - N1) / -STEP;
4925 else
4926 n = (adj + N2 - N1) / STEP;
4927 q = n / nthreads;
4928 tt = n % nthreads;
4929 if (threadid < tt) goto L3; else goto L4;
4930 L3:
4931 tt = 0;
4932 q = q + 1;
4933 L4:
4934 s0 = q * threadid + tt;
4935 e0 = s0 + q;
4936 V = s0 * STEP + N1;
4937 if (s0 >= e0) goto L2; else goto L0;
4938 L0:
4939 e = e0 * STEP + N1;
4940 L1:
4941 BODY;
4942 V += STEP;
4943 if (V cond e) goto L1;
4944 L2:
4945*/
4946
4947static void
4948expand_omp_for_static_nochunk (struct omp_region *region,
4949 struct omp_for_data *fd,
4950 gimple *inner_stmt)
4951{
4952 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4953 tree type, itype, vmain, vback;
4954 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4955 basic_block body_bb, cont_bb, collapse_bb = NULL;
4956 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4957 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4958 gimple_stmt_iterator gsi, gsip;
4959 edge ep;
4960 bool broken_loop = region->cont == NULL;
4961 tree *counts = NULL;
4962 tree n1, n2, step;
4963 tree reductions = NULL_TREE;
4964 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4965
4966 itype = type = TREE_TYPE (fd->loop.v);
4967 if (POINTER_TYPE_P (type))
4968 itype = signed_type_for (type);
4969
4970 entry_bb = region->entry;
4971 cont_bb = region->cont;
4972 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4973 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4974 gcc_assert (broken_loop
4975 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4976 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4977 body_bb = single_succ (bb: seq_start_bb);
4978 if (!broken_loop)
4979 {
4980 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4981 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4982 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4983 }
4984 exit_bb = region->exit;
4985
4986 /* Iteration space partitioning goes in ENTRY_BB. */
4987 gsi = gsi_last_nondebug_bb (bb: entry_bb);
4988 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4989 gsip = gsi;
4990 gsi_prev (i: &gsip);
4991
4992 if (fd->collapse > 1)
4993 {
4994 int first_zero_iter = -1, dummy = -1;
4995 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4996
4997 counts = XALLOCAVEC (tree, fd->collapse);
4998 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
4999 zero_iter1_bb&: fin_bb, first_zero_iter1&: first_zero_iter,
5000 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
5001 t = NULL_TREE;
5002 }
5003 else if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5004 t = integer_one_node;
5005 else
5006 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5007 fold_convert (type, fd->loop.n1),
5008 fold_convert (type, fd->loop.n2));
5009 if (fd->collapse == 1
5010 && TYPE_UNSIGNED (type)
5011 && (t == NULL_TREE || !integer_onep (t)))
5012 {
5013 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5014 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5015 true, GSI_SAME_STMT);
5016 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5017 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5018 true, GSI_SAME_STMT);
5019 gcond *cond_stmt = expand_omp_build_cond (gsi_p: &gsi, code: fd->loop.cond_code,
5020 lhs: n1, rhs: n2);
5021 ep = split_block (entry_bb, cond_stmt);
5022 ep->flags = EDGE_TRUE_VALUE;
5023 entry_bb = ep->dest;
5024 ep->probability = profile_probability::very_likely ();
5025 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
5026 ep->probability = profile_probability::very_unlikely ();
5027 if (gimple_in_ssa_p (cfun))
5028 {
5029 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
5030 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5031 !gsi_end_p (i: gpi); gsi_next (i: &gpi))
5032 {
5033 gphi *phi = gpi.phi ();
5034 add_phi_arg (phi, gimple_phi_arg_def (gs: phi, index: dest_idx),
5035 ep, UNKNOWN_LOCATION);
5036 }
5037 }
5038 gsi = gsi_last_bb (bb: entry_bb);
5039 }
5040
5041 if (fd->lastprivate_conditional)
5042 {
5043 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5044 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE__CONDTEMP_);
5045 if (fd->have_pointer_condtemp)
5046 condtemp = OMP_CLAUSE_DECL (c);
5047 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), kind: OMP_CLAUSE__CONDTEMP_);
5048 cond_var = OMP_CLAUSE_DECL (c);
5049 }
5050 if (fd->have_reductemp
5051 /* For scan, we don't want to reinitialize condtemp before the
5052 second loop. */
5053 || (fd->have_pointer_condtemp && !fd->have_scantemp)
5054 || fd->have_nonctrl_scantemp)
5055 {
5056 tree t1 = build_int_cst (long_integer_type_node, 0);
5057 tree t2 = build_int_cst (long_integer_type_node, 1);
5058 tree t3 = build_int_cstu (long_integer_type_node,
5059 (HOST_WIDE_INT_1U << 31) + 1);
5060 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5061 gimple_stmt_iterator gsi2 = gsi_none ();
5062 gimple *g = NULL;
5063 tree mem = null_pointer_node, memv = NULL_TREE;
5064 unsigned HOST_WIDE_INT condtemp_sz = 0;
5065 unsigned HOST_WIDE_INT alloc_align = 0;
5066 if (fd->have_reductemp)
5067 {
5068 gcc_assert (!fd->have_nonctrl_scantemp);
5069 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_);
5070 reductions = OMP_CLAUSE_DECL (c);
5071 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5072 g = SSA_NAME_DEF_STMT (reductions);
5073 reductions = gimple_assign_rhs1 (gs: g);
5074 OMP_CLAUSE_DECL (c) = reductions;
5075 gsi2 = gsi_for_stmt (g);
5076 }
5077 else
5078 {
5079 if (gsi_end_p (i: gsip))
5080 gsi2 = gsi_after_labels (bb: region->entry);
5081 else
5082 gsi2 = gsip;
5083 reductions = null_pointer_node;
5084 }
5085 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
5086 {
5087 tree type;
5088 if (fd->have_pointer_condtemp)
5089 type = TREE_TYPE (condtemp);
5090 else
5091 type = ptr_type_node;
5092 memv = create_tmp_var (type);
5093 TREE_ADDRESSABLE (memv) = 1;
5094 unsigned HOST_WIDE_INT sz = 0;
5095 tree size = NULL_TREE;
5096 if (fd->have_pointer_condtemp)
5097 {
5098 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5099 sz *= fd->lastprivate_conditional;
5100 condtemp_sz = sz;
5101 }
5102 if (fd->have_nonctrl_scantemp)
5103 {
5104 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
5105 gimple *g = gimple_build_call (nthreads, 0);
5106 nthreads = create_tmp_var (integer_type_node);
5107 gimple_call_set_lhs (gs: g, lhs: nthreads);
5108 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5109 nthreads = fold_convert (sizetype, nthreads);
5110 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5111 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5112 alloc_align, cnt: nthreads, NULL,
5113 alloc: false);
5114 size = fold_convert (type, size);
5115 }
5116 else
5117 size = build_int_cst (type, sz);
5118 expand_omp_build_assign (gsi_p: &gsi2, to: memv, from: size, after: false);
5119 mem = build_fold_addr_expr (memv);
5120 }
5121 tree t
5122 = build_call_expr (builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_START),
5123 9, t1, t2, t2, t3, t1, null_pointer_node,
5124 null_pointer_node, reductions, mem);
5125 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5126 true, GSI_SAME_STMT);
5127 if (fd->have_pointer_condtemp)
5128 expand_omp_build_assign (gsi_p: &gsi2, to: condtemp, from: memv, after: false);
5129 if (fd->have_nonctrl_scantemp)
5130 {
5131 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5132 expand_omp_scantemp_alloc (clauses, ptr, sz: condtemp_sz,
5133 alloc_align, cnt: nthreads, gsi: &gsi2, alloc: false);
5134 }
5135 if (fd->have_reductemp)
5136 {
5137 gsi_remove (&gsi2, true);
5138 release_ssa_name (name: gimple_assign_lhs (gs: g));
5139 }
5140 }
5141 switch (gimple_omp_for_kind (g: fd->for_stmt))
5142 {
5143 case GF_OMP_FOR_KIND_FOR:
5144 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
5145 threadid = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_THREAD_NUM);
5146 break;
5147 case GF_OMP_FOR_KIND_DISTRIBUTE:
5148 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_TEAMS);
5149 threadid = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_TEAM_NUM);
5150 break;
5151 default:
5152 gcc_unreachable ();
5153 }
5154 nthreads = build_call_expr (nthreads, 0);
5155 nthreads = fold_convert (itype, nthreads);
5156 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5157 true, GSI_SAME_STMT);
5158 threadid = build_call_expr (threadid, 0);
5159 threadid = fold_convert (itype, threadid);
5160 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5161 true, GSI_SAME_STMT);
5162
5163 n1 = fd->loop.n1;
5164 n2 = fd->loop.n2;
5165 step = fd->loop.step;
5166 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5167 {
5168 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
5169 kind: OMP_CLAUSE__LOOPTEMP_);
5170 gcc_assert (innerc);
5171 n1 = OMP_CLAUSE_DECL (innerc);
5172 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5173 kind: OMP_CLAUSE__LOOPTEMP_);
5174 gcc_assert (innerc);
5175 n2 = OMP_CLAUSE_DECL (innerc);
5176 }
5177 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5178 true, NULL_TREE, true, GSI_SAME_STMT);
5179 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5180 true, NULL_TREE, true, GSI_SAME_STMT);
5181 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5182 true, NULL_TREE, true, GSI_SAME_STMT);
5183
5184 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5185 t = fold_build2 (PLUS_EXPR, itype, step, t);
5186 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5187 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5188 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5189 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5190 fold_build1 (NEGATE_EXPR, itype, t),
5191 fold_build1 (NEGATE_EXPR, itype, step));
5192 else
5193 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5194 t = fold_convert (itype, t);
5195 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5196
5197 q = create_tmp_reg (itype, "q");
5198 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5199 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5200 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5201
5202 tt = create_tmp_reg (itype, "tt");
5203 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5204 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5205 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5206
5207 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5208 gcond *cond_stmt = gimple_build_cond_empty (cond: t);
5209 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5210
5211 second_bb = split_block (entry_bb, cond_stmt)->dest;
5212 gsi = gsi_last_nondebug_bb (bb: second_bb);
5213 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5214
5215 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5216 GSI_SAME_STMT);
5217 gassign *assign_stmt
5218 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5219 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5220
5221 third_bb = split_block (second_bb, assign_stmt)->dest;
5222 gsi = gsi_last_nondebug_bb (bb: third_bb);
5223 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5224
5225 if (fd->have_nonctrl_scantemp)
5226 {
5227 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5228 tree controlp = NULL_TREE, controlb = NULL_TREE;
5229 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5230 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5231 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5232 {
5233 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5234 controlb = OMP_CLAUSE_DECL (c);
5235 else
5236 controlp = OMP_CLAUSE_DECL (c);
5237 if (controlb && controlp)
5238 break;
5239 }
5240 gcc_assert (controlp && controlb);
5241 tree cnt = create_tmp_var (sizetype);
5242 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5243 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5244 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5245 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz: 0,
5246 alloc_align, cnt, NULL, alloc: true);
5247 tree size = create_tmp_var (sizetype);
5248 expand_omp_build_assign (gsi_p: &gsi, to: size, from: sz, after: false);
5249 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5250 size, size_int (16384));
5251 expand_omp_build_assign (gsi_p: &gsi, to: controlb, from: cmp);
5252 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5253 NULL_TREE, NULL_TREE);
5254 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5255 fourth_bb = split_block (third_bb, g)->dest;
5256 gsi = gsi_last_nondebug_bb (bb: fourth_bb);
5257 /* FIXME: Once we have allocators, this should use allocator. */
5258 g = gimple_build_call (builtin_decl_explicit (fncode: BUILT_IN_MALLOC), 1, size);
5259 gimple_call_set_lhs (gs: g, lhs: controlp);
5260 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5261 expand_omp_scantemp_alloc (clauses, ptr: controlp, sz: 0, alloc_align, cnt,
5262 gsi: &gsi, alloc: true);
5263 gsi_prev (i: &gsi);
5264 g = gsi_stmt (i: gsi);
5265 fifth_bb = split_block (fourth_bb, g)->dest;
5266 gsi = gsi_last_nondebug_bb (bb: fifth_bb);
5267
5268 g = gimple_build_call (builtin_decl_implicit (fncode: BUILT_IN_STACK_SAVE), 0);
5269 gimple_call_set_lhs (gs: g, lhs: controlp);
5270 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5271 tree alloca_decl = builtin_decl_explicit (fncode: BUILT_IN_ALLOCA_WITH_ALIGN);
5272 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5273 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5274 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5275 {
5276 tree tmp = create_tmp_var (sizetype);
5277 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5278 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5279 TYPE_SIZE_UNIT (pointee_type));
5280 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5281 g = gimple_build_call (alloca_decl, 2, tmp,
5282 size_int (TYPE_ALIGN (pointee_type)));
5283 gimple_call_set_lhs (gs: g, OMP_CLAUSE_DECL (c));
5284 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5285 }
5286
5287 sixth_bb = split_block (fifth_bb, g)->dest;
5288 gsi = gsi_last_nondebug_bb (bb: sixth_bb);
5289 }
5290
5291 t = build2 (MULT_EXPR, itype, q, threadid);
5292 t = build2 (PLUS_EXPR, itype, t, tt);
5293 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5294
5295 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5296 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5297
5298 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5299 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
5300
5301 /* Remove the GIMPLE_OMP_FOR statement. */
5302 gsi_remove (&gsi, true);
5303
5304 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5305 gsi = gsi_start_bb (bb: seq_start_bb);
5306
5307 tree startvar = fd->loop.v;
5308 tree endvar = NULL_TREE;
5309
5310 if (gimple_omp_for_combined_p (g: fd->for_stmt))
5311 {
5312 tree clauses = gimple_code (g: inner_stmt) == GIMPLE_OMP_PARALLEL
5313 ? gimple_omp_parallel_clauses (gs: inner_stmt)
5314 : gimple_omp_for_clauses (gs: inner_stmt);
5315 tree innerc = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
5316 gcc_assert (innerc);
5317 startvar = OMP_CLAUSE_DECL (innerc);
5318 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5319 kind: OMP_CLAUSE__LOOPTEMP_);
5320 gcc_assert (innerc);
5321 endvar = OMP_CLAUSE_DECL (innerc);
5322 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5323 && gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5324 {
5325 innerc = find_lastprivate_looptemp (fd, innerc);
5326 if (innerc)
5327 {
5328 /* If needed (distribute parallel for with lastprivate),
5329 propagate down the total number of iterations. */
5330 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5331 fd->loop.n2);
5332 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5333 GSI_CONTINUE_LINKING);
5334 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5335 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5336 }
5337 }
5338 }
5339 t = fold_convert (itype, s0);
5340 t = fold_build2 (MULT_EXPR, itype, t, step);
5341 if (POINTER_TYPE_P (type))
5342 {
5343 t = fold_build_pointer_plus (n1, t);
5344 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5345 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5346 t = fold_convert (signed_type_for (type), t);
5347 }
5348 else
5349 t = fold_build2 (PLUS_EXPR, type, t, n1);
5350 t = fold_convert (TREE_TYPE (startvar), t);
5351 t = force_gimple_operand_gsi (&gsi, t,
5352 DECL_P (startvar)
5353 && TREE_ADDRESSABLE (startvar),
5354 NULL_TREE, false, GSI_CONTINUE_LINKING);
5355 assign_stmt = gimple_build_assign (startvar, t);
5356 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5357 if (cond_var)
5358 {
5359 tree itype = TREE_TYPE (cond_var);
5360 /* For lastprivate(conditional:) itervar, we need some iteration
5361 counter that starts at unsigned non-zero and increases.
5362 Prefer as few IVs as possible, so if we can use startvar
5363 itself, use that, or startvar + constant (those would be
5364 incremented with step), and as last resort use the s0 + 1
5365 incremented by 1. */
5366 if (POINTER_TYPE_P (type)
5367 || TREE_CODE (n1) != INTEGER_CST
5368 || fd->loop.cond_code != LT_EXPR)
5369 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5370 build_int_cst (itype, 1));
5371 else if (tree_int_cst_sgn (n1) == 1)
5372 t = fold_convert (itype, t);
5373 else
5374 {
5375 tree c = fold_convert (itype, n1);
5376 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5377 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5378 }
5379 t = force_gimple_operand_gsi (&gsi, t, false,
5380 NULL_TREE, false, GSI_CONTINUE_LINKING);
5381 assign_stmt = gimple_build_assign (cond_var, t);
5382 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5383 }
5384
5385 t = fold_convert (itype, e0);
5386 t = fold_build2 (MULT_EXPR, itype, t, step);
5387 if (POINTER_TYPE_P (type))
5388 {
5389 t = fold_build_pointer_plus (n1, t);
5390 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5391 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5392 t = fold_convert (signed_type_for (type), t);
5393 }
5394 else
5395 t = fold_build2 (PLUS_EXPR, type, t, n1);
5396 t = fold_convert (TREE_TYPE (startvar), t);
5397 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5398 false, GSI_CONTINUE_LINKING);
5399 if (endvar)
5400 {
5401 assign_stmt = gimple_build_assign (endvar, e);
5402 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5403 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5404 assign_stmt = gimple_build_assign (fd->loop.v, e);
5405 else
5406 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5407 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5408 }
5409 /* Handle linear clause adjustments. */
5410 tree itercnt = NULL_TREE;
5411 tree *nonrect_bounds = NULL;
5412 if (gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5413 for (tree c = gimple_omp_for_clauses (gs: fd->for_stmt);
5414 c; c = OMP_CLAUSE_CHAIN (c))
5415 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5416 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5417 {
5418 tree d = OMP_CLAUSE_DECL (c);
5419 tree t = d, a, dest;
5420 if (omp_privatize_by_reference (decl: t))
5421 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5422 if (itercnt == NULL_TREE)
5423 {
5424 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5425 {
5426 itercnt = fold_build2 (MINUS_EXPR, itype,
5427 fold_convert (itype, n1),
5428 fold_convert (itype, fd->loop.n1));
5429 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5430 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5431 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5432 NULL_TREE, false,
5433 GSI_CONTINUE_LINKING);
5434 }
5435 else
5436 itercnt = s0;
5437 }
5438 tree type = TREE_TYPE (t);
5439 if (POINTER_TYPE_P (type))
5440 type = sizetype;
5441 a = fold_build2 (MULT_EXPR, type,
5442 fold_convert (type, itercnt),
5443 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5444 dest = unshare_expr (t);
5445 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5446 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5447 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5448 false, GSI_CONTINUE_LINKING);
5449 expand_omp_build_assign (gsi_p: &gsi, to: dest, from: t, after: true);
5450 }
5451 if (fd->collapse > 1)
5452 {
5453 if (fd->non_rect)
5454 {
5455 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5456 memset (s: nonrect_bounds, c: 0, n: sizeof (tree) * (fd->last_nonrect + 1));
5457 }
5458 expand_omp_for_init_vars (fd, gsi: &gsi, counts, nonrect_bounds, inner_stmt,
5459 startvar);
5460 }
5461
5462 if (!broken_loop)
5463 {
5464 /* The code controlling the sequential loop replaces the
5465 GIMPLE_OMP_CONTINUE. */
5466 gsi = gsi_last_nondebug_bb (bb: cont_bb);
5467 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
5468 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5469 vmain = gimple_omp_continue_control_use (cont_stmt);
5470 vback = gimple_omp_continue_control_def (cont_stmt);
5471
5472 if (cond_var)
5473 {
5474 tree itype = TREE_TYPE (cond_var);
5475 tree t2;
5476 if (POINTER_TYPE_P (type)
5477 || TREE_CODE (n1) != INTEGER_CST
5478 || fd->loop.cond_code != LT_EXPR)
5479 t2 = build_int_cst (itype, 1);
5480 else
5481 t2 = fold_convert (itype, step);
5482 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5483 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5484 NULL_TREE, true, GSI_SAME_STMT);
5485 assign_stmt = gimple_build_assign (cond_var, t2);
5486 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5487 }
5488
5489 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
5490 {
5491 if (POINTER_TYPE_P (type))
5492 t = fold_build_pointer_plus (vmain, step);
5493 else
5494 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5495 t = force_gimple_operand_gsi (&gsi, t,
5496 DECL_P (vback)
5497 && TREE_ADDRESSABLE (vback),
5498 NULL_TREE, true, GSI_SAME_STMT);
5499 assign_stmt = gimple_build_assign (vback, t);
5500 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5501
5502 t = build2 (fd->loop.cond_code, boolean_type_node,
5503 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5504 ? t : vback, e);
5505 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
5506 }
5507
5508 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5509 gsi_remove (&gsi, true);
5510
5511 if (fd->collapse > 1 && !gimple_omp_for_combined_p (g: fd->for_stmt))
5512 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5513 cont_bb, body_bb);
5514 }
5515
5516 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5517 gsi = gsi_last_nondebug_bb (bb: exit_bb);
5518 if (!gimple_omp_return_nowait_p (g: gsi_stmt (i: gsi)))
5519 {
5520 t = gimple_omp_return_lhs (g: gsi_stmt (i: gsi));
5521 if (fd->have_reductemp
5522 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5523 && !fd->have_nonctrl_scantemp))
5524 {
5525 tree fn;
5526 if (t)
5527 fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_CANCEL);
5528 else
5529 fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END);
5530 gcall *g = gimple_build_call (fn, 0);
5531 if (t)
5532 {
5533 gimple_call_set_lhs (gs: g, lhs: t);
5534 if (fd->have_reductemp)
5535 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5536 NOP_EXPR, t),
5537 GSI_SAME_STMT);
5538 }
5539 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5540 }
5541 else
5542 gsi_insert_after (&gsi, omp_build_barrier (lhs: t), GSI_SAME_STMT);
5543 }
5544 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5545 && !fd->have_nonctrl_scantemp)
5546 {
5547 tree fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_NOWAIT);
5548 gcall *g = gimple_build_call (fn, 0);
5549 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5550 }
5551 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5552 {
5553 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5554 tree controlp = NULL_TREE, controlb = NULL_TREE;
5555 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5556 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5557 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5558 {
5559 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5560 controlb = OMP_CLAUSE_DECL (c);
5561 else
5562 controlp = OMP_CLAUSE_DECL (c);
5563 if (controlb && controlp)
5564 break;
5565 }
5566 gcc_assert (controlp && controlb);
5567 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5568 NULL_TREE, NULL_TREE);
5569 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5570 exit1_bb = split_block (exit_bb, g)->dest;
5571 gsi = gsi_after_labels (bb: exit1_bb);
5572 g = gimple_build_call (builtin_decl_explicit (fncode: BUILT_IN_FREE), 1,
5573 controlp);
5574 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5575 exit2_bb = split_block (exit1_bb, g)->dest;
5576 gsi = gsi_after_labels (bb: exit2_bb);
5577 g = gimple_build_call (builtin_decl_implicit (fncode: BUILT_IN_STACK_RESTORE), 1,
5578 controlp);
5579 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5580 exit3_bb = split_block (exit2_bb, g)->dest;
5581 gsi = gsi_after_labels (bb: exit3_bb);
5582 }
5583 gsi_remove (&gsi, true);
5584
5585 /* Connect all the blocks. */
5586 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5587 ep->probability = profile_probability::guessed_always ().apply_scale (num: 3, den: 4);
5588 ep = find_edge (entry_bb, second_bb);
5589 ep->flags = EDGE_TRUE_VALUE;
5590 ep->probability = profile_probability::guessed_always () / 4;
5591 if (fourth_bb)
5592 {
5593 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5594 ep->probability = profile_probability::guessed_always () / 2;
5595 ep = find_edge (third_bb, fourth_bb);
5596 ep->flags = EDGE_TRUE_VALUE;
5597 ep->probability = profile_probability::guessed_always () / 2;
5598 ep = find_edge (fourth_bb, fifth_bb);
5599 redirect_edge_and_branch (ep, sixth_bb);
5600 }
5601 else
5602 sixth_bb = third_bb;
5603 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5604 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5605 if (exit1_bb)
5606 {
5607 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5608 ep->probability = profile_probability::guessed_always () / 2;
5609 ep = find_edge (exit_bb, exit1_bb);
5610 ep->flags = EDGE_TRUE_VALUE;
5611 ep->probability = profile_probability::guessed_always () / 2;
5612 ep = find_edge (exit1_bb, exit2_bb);
5613 redirect_edge_and_branch (ep, exit3_bb);
5614 }
5615
5616 if (!broken_loop)
5617 {
5618 ep = find_edge (cont_bb, body_bb);
5619 if (ep == NULL)
5620 {
5621 ep = BRANCH_EDGE (cont_bb);
5622 gcc_assert (single_succ (ep->dest) == body_bb);
5623 }
5624 if (gimple_omp_for_combined_p (g: fd->for_stmt))
5625 {
5626 remove_edge (ep);
5627 ep = NULL;
5628 }
5629 else if (fd->collapse > 1)
5630 {
5631 remove_edge (ep);
5632 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5633 }
5634 else
5635 ep->flags = EDGE_TRUE_VALUE;
5636 find_edge (cont_bb, fin_bb)->flags
5637 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5638 }
5639
5640 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5641 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5642 if (fourth_bb)
5643 {
5644 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5645 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5646 }
5647 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5648
5649 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5650 recompute_dominator (CDI_DOMINATORS, body_bb));
5651 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5652 recompute_dominator (CDI_DOMINATORS, fin_bb));
5653 if (exit1_bb)
5654 {
5655 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5656 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5657 }
5658
5659 class loop *loop = body_bb->loop_father;
5660 if (loop != entry_bb->loop_father)
5661 {
5662 gcc_assert (broken_loop || loop->header == body_bb);
5663 gcc_assert (broken_loop
5664 || loop->latch == region->cont
5665 || single_pred (loop->latch) == region->cont);
5666 return;
5667 }
5668
5669 if (!broken_loop && !gimple_omp_for_combined_p (g: fd->for_stmt))
5670 {
5671 loop = alloc_loop ();
5672 loop->header = body_bb;
5673 if (collapse_bb == NULL)
5674 loop->latch = cont_bb;
5675 add_loop (loop, body_bb->loop_father);
5676 }
5677}
5678
5679/* Return phi in E->DEST with ARG on edge E. */
5680
5681static gphi *
5682find_phi_with_arg_on_edge (tree arg, edge e)
5683{
5684 basic_block bb = e->dest;
5685
5686 for (gphi_iterator gpi = gsi_start_phis (bb);
5687 !gsi_end_p (i: gpi);
5688 gsi_next (i: &gpi))
5689 {
5690 gphi *phi = gpi.phi ();
5691 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5692 return phi;
5693 }
5694
5695 return NULL;
5696}
5697
5698/* A subroutine of expand_omp_for. Generate code for a parallel
5699 loop with static schedule and a specified chunk size. Given
5700 parameters:
5701
5702 for (V = N1; V cond N2; V += STEP) BODY;
5703
5704 where COND is "<" or ">", we generate pseudocode
5705
5706 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5707 if (cond is <)
5708 adj = STEP - 1;
5709 else
5710 adj = STEP + 1;
5711 if ((__typeof (V)) -1 > 0 && cond is >)
5712 n = -(adj + N2 - N1) / -STEP;
5713 else
5714 n = (adj + N2 - N1) / STEP;
5715 trip = 0;
5716 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5717 here so that V is defined
5718 if the loop is not entered
5719 L0:
5720 s0 = (trip * nthreads + threadid) * CHUNK;
5721 e0 = min (s0 + CHUNK, n);
5722 if (s0 < n) goto L1; else goto L4;
5723 L1:
5724 V = s0 * STEP + N1;
5725 e = e0 * STEP + N1;
5726 L2:
5727 BODY;
5728 V += STEP;
5729 if (V cond e) goto L2; else goto L3;
5730 L3:
5731 trip += 1;
5732 goto L0;
5733 L4:
5734*/
5735
5736static void
5737expand_omp_for_static_chunk (struct omp_region *region,
5738 struct omp_for_data *fd, gimple *inner_stmt)
5739{
5740 tree n, s0, e0, e, t;
5741 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5742 tree type, itype, vmain, vback, vextra;
5743 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5744 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5745 gimple_stmt_iterator gsi, gsip;
5746 edge se;
5747 bool broken_loop = region->cont == NULL;
5748 tree *counts = NULL;
5749 tree n1, n2, step;
5750 tree reductions = NULL_TREE;
5751 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5752
5753 itype = type = TREE_TYPE (fd->loop.v);
5754 if (POINTER_TYPE_P (type))
5755 itype = signed_type_for (type);
5756
5757 entry_bb = region->entry;
5758 se = split_block (entry_bb, last_nondebug_stmt (entry_bb));
5759 entry_bb = se->src;
5760 iter_part_bb = se->dest;
5761 cont_bb = region->cont;
5762 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5763 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5764 gcc_assert (broken_loop
5765 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5766 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5767 body_bb = single_succ (bb: seq_start_bb);
5768 if (!broken_loop)
5769 {
5770 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5771 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5772 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5773 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5774 }
5775 exit_bb = region->exit;
5776
5777 /* Trip and adjustment setup goes in ENTRY_BB. */
5778 gsi = gsi_last_nondebug_bb (bb: entry_bb);
5779 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5780 gsip = gsi;
5781 gsi_prev (i: &gsip);
5782
5783 if (fd->collapse > 1)
5784 {
5785 int first_zero_iter = -1, dummy = -1;
5786 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5787
5788 counts = XALLOCAVEC (tree, fd->collapse);
5789 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
5790 zero_iter1_bb&: fin_bb, first_zero_iter1&: first_zero_iter,
5791 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
5792 t = NULL_TREE;
5793 }
5794 else if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5795 t = integer_one_node;
5796 else
5797 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5798 fold_convert (type, fd->loop.n1),
5799 fold_convert (type, fd->loop.n2));
5800 if (fd->collapse == 1
5801 && TYPE_UNSIGNED (type)
5802 && (t == NULL_TREE || !integer_onep (t)))
5803 {
5804 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5805 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5806 true, GSI_SAME_STMT);
5807 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5808 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5809 true, GSI_SAME_STMT);
5810 gcond *cond_stmt = expand_omp_build_cond (gsi_p: &gsi, code: fd->loop.cond_code,
5811 lhs: n1, rhs: n2);
5812 se = split_block (entry_bb, cond_stmt);
5813 se->flags = EDGE_TRUE_VALUE;
5814 entry_bb = se->dest;
5815 se->probability = profile_probability::very_likely ();
5816 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5817 se->probability = profile_probability::very_unlikely ();
5818 if (gimple_in_ssa_p (cfun))
5819 {
5820 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5821 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5822 !gsi_end_p (i: gpi); gsi_next (i: &gpi))
5823 {
5824 gphi *phi = gpi.phi ();
5825 add_phi_arg (phi, gimple_phi_arg_def (gs: phi, index: dest_idx),
5826 se, UNKNOWN_LOCATION);
5827 }
5828 }
5829 gsi = gsi_last_bb (bb: entry_bb);
5830 }
5831
5832 if (fd->lastprivate_conditional)
5833 {
5834 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5835 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE__CONDTEMP_);
5836 if (fd->have_pointer_condtemp)
5837 condtemp = OMP_CLAUSE_DECL (c);
5838 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), kind: OMP_CLAUSE__CONDTEMP_);
5839 cond_var = OMP_CLAUSE_DECL (c);
5840 }
5841 if (fd->have_reductemp || fd->have_pointer_condtemp)
5842 {
5843 tree t1 = build_int_cst (long_integer_type_node, 0);
5844 tree t2 = build_int_cst (long_integer_type_node, 1);
5845 tree t3 = build_int_cstu (long_integer_type_node,
5846 (HOST_WIDE_INT_1U << 31) + 1);
5847 tree clauses = gimple_omp_for_clauses (gs: fd->for_stmt);
5848 gimple_stmt_iterator gsi2 = gsi_none ();
5849 gimple *g = NULL;
5850 tree mem = null_pointer_node, memv = NULL_TREE;
5851 if (fd->have_reductemp)
5852 {
5853 tree c = omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_);
5854 reductions = OMP_CLAUSE_DECL (c);
5855 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5856 g = SSA_NAME_DEF_STMT (reductions);
5857 reductions = gimple_assign_rhs1 (gs: g);
5858 OMP_CLAUSE_DECL (c) = reductions;
5859 gsi2 = gsi_for_stmt (g);
5860 }
5861 else
5862 {
5863 if (gsi_end_p (i: gsip))
5864 gsi2 = gsi_after_labels (bb: region->entry);
5865 else
5866 gsi2 = gsip;
5867 reductions = null_pointer_node;
5868 }
5869 if (fd->have_pointer_condtemp)
5870 {
5871 tree type = TREE_TYPE (condtemp);
5872 memv = create_tmp_var (type);
5873 TREE_ADDRESSABLE (memv) = 1;
5874 unsigned HOST_WIDE_INT sz
5875 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5876 sz *= fd->lastprivate_conditional;
5877 expand_omp_build_assign (gsi_p: &gsi2, to: memv, from: build_int_cst (type, sz),
5878 after: false);
5879 mem = build_fold_addr_expr (memv);
5880 }
5881 tree t
5882 = build_call_expr (builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_START),
5883 9, t1, t2, t2, t3, t1, null_pointer_node,
5884 null_pointer_node, reductions, mem);
5885 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5886 true, GSI_SAME_STMT);
5887 if (fd->have_pointer_condtemp)
5888 expand_omp_build_assign (gsi_p: &gsi2, to: condtemp, from: memv, after: false);
5889 if (fd->have_reductemp)
5890 {
5891 gsi_remove (&gsi2, true);
5892 release_ssa_name (name: gimple_assign_lhs (gs: g));
5893 }
5894 }
5895 switch (gimple_omp_for_kind (g: fd->for_stmt))
5896 {
5897 case GF_OMP_FOR_KIND_FOR:
5898 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_THREADS);
5899 threadid = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_THREAD_NUM);
5900 break;
5901 case GF_OMP_FOR_KIND_DISTRIBUTE:
5902 nthreads = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_NUM_TEAMS);
5903 threadid = builtin_decl_explicit (fncode: BUILT_IN_OMP_GET_TEAM_NUM);
5904 break;
5905 default:
5906 gcc_unreachable ();
5907 }
5908 nthreads = build_call_expr (nthreads, 0);
5909 nthreads = fold_convert (itype, nthreads);
5910 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5911 true, GSI_SAME_STMT);
5912 threadid = build_call_expr (threadid, 0);
5913 threadid = fold_convert (itype, threadid);
5914 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5915 true, GSI_SAME_STMT);
5916
5917 n1 = fd->loop.n1;
5918 n2 = fd->loop.n2;
5919 step = fd->loop.step;
5920 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
5921 {
5922 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
5923 kind: OMP_CLAUSE__LOOPTEMP_);
5924 gcc_assert (innerc);
5925 n1 = OMP_CLAUSE_DECL (innerc);
5926 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5927 kind: OMP_CLAUSE__LOOPTEMP_);
5928 gcc_assert (innerc);
5929 n2 = OMP_CLAUSE_DECL (innerc);
5930 }
5931 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5932 true, NULL_TREE, true, GSI_SAME_STMT);
5933 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5934 true, NULL_TREE, true, GSI_SAME_STMT);
5935 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5936 true, NULL_TREE, true, GSI_SAME_STMT);
5937 tree chunk_size = fold_convert (itype, fd->chunk_size);
5938 chunk_size = omp_adjust_chunk_size (chunk_size, simd_schedule: fd->simd_schedule);
5939 chunk_size
5940 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5941 GSI_SAME_STMT);
5942
5943 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5944 t = fold_build2 (PLUS_EXPR, itype, step, t);
5945 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5946 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5947 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5948 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5949 fold_build1 (NEGATE_EXPR, itype, t),
5950 fold_build1 (NEGATE_EXPR, itype, step));
5951 else
5952 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5953 t = fold_convert (itype, t);
5954 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5955 true, GSI_SAME_STMT);
5956
5957 trip_var = create_tmp_reg (itype, ".trip");
5958 if (gimple_in_ssa_p (cfun))
5959 {
5960 trip_init = make_ssa_name (var: trip_var);
5961 trip_main = make_ssa_name (var: trip_var);
5962 trip_back = make_ssa_name (var: trip_var);
5963 }
5964 else
5965 {
5966 trip_init = trip_var;
5967 trip_main = trip_var;
5968 trip_back = trip_var;
5969 }
5970
5971 gassign *assign_stmt
5972 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5973 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5974
5975 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5976 t = fold_build2 (MULT_EXPR, itype, t, step);
5977 if (POINTER_TYPE_P (type))
5978 t = fold_build_pointer_plus (n1, t);
5979 else
5980 t = fold_build2 (PLUS_EXPR, type, t, n1);
5981 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5982 true, GSI_SAME_STMT);
5983
5984 /* Remove the GIMPLE_OMP_FOR. */
5985 gsi_remove (&gsi, true);
5986
5987 gimple_stmt_iterator gsif = gsi;
5988
5989 /* Iteration space partitioning goes in ITER_PART_BB. */
5990 gsi = gsi_last_bb (bb: iter_part_bb);
5991
5992 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5993 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5994 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5995 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5996 false, GSI_CONTINUE_LINKING);
5997
5998 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5999 t = fold_build2 (MIN_EXPR, itype, t, n);
6000 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6001 false, GSI_CONTINUE_LINKING);
6002
6003 t = build2 (LT_EXPR, boolean_type_node, s0, n);
6004 gsi_insert_after (&gsi, gimple_build_cond_empty (cond: t), GSI_CONTINUE_LINKING);
6005
6006 /* Setup code for sequential iteration goes in SEQ_START_BB. */
6007 gsi = gsi_start_bb (bb: seq_start_bb);
6008
6009 tree startvar = fd->loop.v;
6010 tree endvar = NULL_TREE;
6011
6012 if (gimple_omp_for_combined_p (g: fd->for_stmt))
6013 {
6014 tree clauses = gimple_code (g: inner_stmt) == GIMPLE_OMP_PARALLEL
6015 ? gimple_omp_parallel_clauses (gs: inner_stmt)
6016 : gimple_omp_for_clauses (gs: inner_stmt);
6017 tree innerc = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
6018 gcc_assert (innerc);
6019 startvar = OMP_CLAUSE_DECL (innerc);
6020 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6021 kind: OMP_CLAUSE__LOOPTEMP_);
6022 gcc_assert (innerc);
6023 endvar = OMP_CLAUSE_DECL (innerc);
6024 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
6025 && gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
6026 {
6027 innerc = find_lastprivate_looptemp (fd, innerc);
6028 if (innerc)
6029 {
6030 /* If needed (distribute parallel for with lastprivate),
6031 propagate down the total number of iterations. */
6032 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
6033 fd->loop.n2);
6034 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
6035 GSI_CONTINUE_LINKING);
6036 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
6037 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6038 }
6039 }
6040 }
6041
6042 t = fold_convert (itype, s0);
6043 t = fold_build2 (MULT_EXPR, itype, t, step);
6044 if (POINTER_TYPE_P (type))
6045 {
6046 t = fold_build_pointer_plus (n1, t);
6047 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6048 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6049 t = fold_convert (signed_type_for (type), t);
6050 }
6051 else
6052 t = fold_build2 (PLUS_EXPR, type, t, n1);
6053 t = fold_convert (TREE_TYPE (startvar), t);
6054 t = force_gimple_operand_gsi (&gsi, t,
6055 DECL_P (startvar)
6056 && TREE_ADDRESSABLE (startvar),
6057 NULL_TREE, false, GSI_CONTINUE_LINKING);
6058 assign_stmt = gimple_build_assign (startvar, t);
6059 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6060 if (cond_var)
6061 {
6062 tree itype = TREE_TYPE (cond_var);
6063 /* For lastprivate(conditional:) itervar, we need some iteration
6064 counter that starts at unsigned non-zero and increases.
6065 Prefer as few IVs as possible, so if we can use startvar
6066 itself, use that, or startvar + constant (those would be
6067 incremented with step), and as last resort use the s0 + 1
6068 incremented by 1. */
6069 if (POINTER_TYPE_P (type)
6070 || TREE_CODE (n1) != INTEGER_CST
6071 || fd->loop.cond_code != LT_EXPR)
6072 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
6073 build_int_cst (itype, 1));
6074 else if (tree_int_cst_sgn (n1) == 1)
6075 t = fold_convert (itype, t);
6076 else
6077 {
6078 tree c = fold_convert (itype, n1);
6079 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
6080 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
6081 }
6082 t = force_gimple_operand_gsi (&gsi, t, false,
6083 NULL_TREE, false, GSI_CONTINUE_LINKING);
6084 assign_stmt = gimple_build_assign (cond_var, t);
6085 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6086 }
6087
6088 t = fold_convert (itype, e0);
6089 t = fold_build2 (MULT_EXPR, itype, t, step);
6090 if (POINTER_TYPE_P (type))
6091 {
6092 t = fold_build_pointer_plus (n1, t);
6093 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6094 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6095 t = fold_convert (signed_type_for (type), t);
6096 }
6097 else
6098 t = fold_build2 (PLUS_EXPR, type, t, n1);
6099 t = fold_convert (TREE_TYPE (startvar), t);
6100 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6101 false, GSI_CONTINUE_LINKING);
6102 if (endvar)
6103 {
6104 assign_stmt = gimple_build_assign (endvar, e);
6105 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6106 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6107 assign_stmt = gimple_build_assign (fd->loop.v, e);
6108 else
6109 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6110 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6111 }
6112 /* Handle linear clause adjustments. */
6113 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6114 if (gimple_omp_for_kind (g: fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6115 for (tree c = gimple_omp_for_clauses (gs: fd->for_stmt);
6116 c; c = OMP_CLAUSE_CHAIN (c))
6117 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6118 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6119 {
6120 tree d = OMP_CLAUSE_DECL (c);
6121 tree t = d, a, dest;
6122 if (omp_privatize_by_reference (decl: t))
6123 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6124 tree type = TREE_TYPE (t);
6125 if (POINTER_TYPE_P (type))
6126 type = sizetype;
6127 dest = unshare_expr (t);
6128 tree v = create_tmp_var (TREE_TYPE (t), NULL);
6129 expand_omp_build_assign (gsi_p: &gsif, to: v, from: t);
6130 if (itercnt == NULL_TREE)
6131 {
6132 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
6133 {
6134 itercntbias
6135 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6136 fold_convert (itype, fd->loop.n1));
6137 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6138 itercntbias, step);
6139 itercntbias
6140 = force_gimple_operand_gsi (&gsif, itercntbias, true,
6141 NULL_TREE, true,
6142 GSI_SAME_STMT);
6143 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6144 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6145 NULL_TREE, false,
6146 GSI_CONTINUE_LINKING);
6147 }
6148 else
6149 itercnt = s0;
6150 }
6151 a = fold_build2 (MULT_EXPR, type,
6152 fold_convert (type, itercnt),
6153 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6154 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6155 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6156 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6157 false, GSI_CONTINUE_LINKING);
6158 expand_omp_build_assign (gsi_p: &gsi, to: dest, from: t, after: true);
6159 }
6160 if (fd->collapse > 1)
6161 expand_omp_for_init_vars (fd, gsi: &gsi, counts, NULL, inner_stmt, startvar);
6162
6163 if (!broken_loop)
6164 {
6165 /* The code controlling the sequential loop goes in CONT_BB,
6166 replacing the GIMPLE_OMP_CONTINUE. */
6167 gsi = gsi_last_nondebug_bb (bb: cont_bb);
6168 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
6169 vmain = gimple_omp_continue_control_use (cont_stmt);
6170 vback = gimple_omp_continue_control_def (cont_stmt);
6171
6172 if (cond_var)
6173 {
6174 tree itype = TREE_TYPE (cond_var);
6175 tree t2;
6176 if (POINTER_TYPE_P (type)
6177 || TREE_CODE (n1) != INTEGER_CST
6178 || fd->loop.cond_code != LT_EXPR)
6179 t2 = build_int_cst (itype, 1);
6180 else
6181 t2 = fold_convert (itype, step);
6182 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6183 t2 = force_gimple_operand_gsi (&gsi, t2, false,
6184 NULL_TREE, true, GSI_SAME_STMT);
6185 assign_stmt = gimple_build_assign (cond_var, t2);
6186 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6187 }
6188
6189 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
6190 {
6191 if (POINTER_TYPE_P (type))
6192 t = fold_build_pointer_plus (vmain, step);
6193 else
6194 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6195 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6196 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6197 true, GSI_SAME_STMT);
6198 assign_stmt = gimple_build_assign (vback, t);
6199 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6200
6201 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6202 t = build2 (EQ_EXPR, boolean_type_node,
6203 build_int_cst (itype, 0),
6204 build_int_cst (itype, 1));
6205 else
6206 t = build2 (fd->loop.cond_code, boolean_type_node,
6207 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6208 ? t : vback, e);
6209 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
6210 }
6211
6212 /* Remove GIMPLE_OMP_CONTINUE. */
6213 gsi_remove (&gsi, true);
6214
6215 if (fd->collapse > 1 && !gimple_omp_for_combined_p (g: fd->for_stmt))
6216 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6217
6218 /* Trip update code goes into TRIP_UPDATE_BB. */
6219 gsi = gsi_start_bb (bb: trip_update_bb);
6220
6221 t = build_int_cst (itype, 1);
6222 t = build2 (PLUS_EXPR, itype, trip_main, t);
6223 assign_stmt = gimple_build_assign (trip_back, t);
6224 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6225 }
6226
6227 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6228 gsi = gsi_last_nondebug_bb (bb: exit_bb);
6229 if (!gimple_omp_return_nowait_p (g: gsi_stmt (i: gsi)))
6230 {
6231 t = gimple_omp_return_lhs (g: gsi_stmt (i: gsi));
6232 if (fd->have_reductemp || fd->have_pointer_condtemp)
6233 {
6234 tree fn;
6235 if (t)
6236 fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_CANCEL);
6237 else
6238 fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END);
6239 gcall *g = gimple_build_call (fn, 0);
6240 if (t)
6241 {
6242 gimple_call_set_lhs (gs: g, lhs: t);
6243 if (fd->have_reductemp)
6244 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6245 NOP_EXPR, t),
6246 GSI_SAME_STMT);
6247 }
6248 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6249 }
6250 else
6251 gsi_insert_after (&gsi, omp_build_barrier (lhs: t), GSI_SAME_STMT);
6252 }
6253 else if (fd->have_pointer_condtemp)
6254 {
6255 tree fn = builtin_decl_explicit (fncode: BUILT_IN_GOMP_LOOP_END_NOWAIT);
6256 gcall *g = gimple_build_call (fn, 0);
6257 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6258 }
6259 gsi_remove (&gsi, true);
6260
6261 /* Connect the new blocks. */
6262 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6263 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6264
6265 if (!broken_loop)
6266 {
6267 se = find_edge (cont_bb, body_bb);
6268 if (se == NULL)
6269 {
6270 se = BRANCH_EDGE (cont_bb);
6271 gcc_assert (single_succ (se->dest) == body_bb);
6272 }
6273 if (gimple_omp_for_combined_p (g: fd->for_stmt))
6274 {
6275 remove_edge (se);
6276 se = NULL;
6277 }
6278 else if (fd->collapse > 1)
6279 {
6280 remove_edge (se);
6281 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6282 }
6283 else
6284 se->flags = EDGE_TRUE_VALUE;
6285 find_edge (cont_bb, trip_update_bb)->flags
6286 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6287
6288 redirect_edge_and_branch (single_succ_edge (bb: trip_update_bb),
6289 iter_part_bb);
6290 }
6291
6292 if (gimple_in_ssa_p (cfun))
6293 {
6294 gphi_iterator psi;
6295 gphi *phi;
6296 edge re, ene;
6297 edge_var_map *vm;
6298 size_t i;
6299
6300 gcc_assert (fd->collapse == 1 && !broken_loop);
6301
6302 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6303 remove arguments of the phi nodes in fin_bb. We need to create
6304 appropriate phi nodes in iter_part_bb instead. */
6305 se = find_edge (iter_part_bb, fin_bb);
6306 re = single_succ_edge (bb: trip_update_bb);
6307 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6308 ene = single_succ_edge (bb: entry_bb);
6309
6310 psi = gsi_start_phis (fin_bb);
6311 for (i = 0; !gsi_end_p (i: psi) && head->iterate (ix: i, ptr: &vm);
6312 gsi_next (i: &psi), ++i)
6313 {
6314 gphi *nphi;
6315 location_t locus;
6316
6317 phi = psi.phi ();
6318 if (operand_equal_p (gimple_phi_arg_def (gs: phi, index: 0),
6319 redirect_edge_var_map_def (v: vm), flags: 0))
6320 continue;
6321
6322 t = gimple_phi_result (gs: phi);
6323 gcc_assert (t == redirect_edge_var_map_result (vm));
6324
6325 if (!single_pred_p (bb: fin_bb))
6326 t = copy_ssa_name (var: t, stmt: phi);
6327
6328 nphi = create_phi_node (t, iter_part_bb);
6329
6330 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6331 locus = gimple_phi_arg_location_from_edge (phi, e: se);
6332
6333 /* A special case -- fd->loop.v is not yet computed in
6334 iter_part_bb, we need to use vextra instead. */
6335 if (t == fd->loop.v)
6336 t = vextra;
6337 add_phi_arg (nphi, t, ene, locus);
6338 locus = redirect_edge_var_map_location (v: vm);
6339 tree back_arg = redirect_edge_var_map_def (v: vm);
6340 add_phi_arg (nphi, back_arg, re, locus);
6341 edge ce = find_edge (cont_bb, body_bb);
6342 if (ce == NULL)
6343 {
6344 ce = BRANCH_EDGE (cont_bb);
6345 gcc_assert (single_succ (ce->dest) == body_bb);
6346 ce = single_succ_edge (bb: ce->dest);
6347 }
6348 gphi *inner_loop_phi = find_phi_with_arg_on_edge (arg: back_arg, e: ce);
6349 gcc_assert (inner_loop_phi != NULL);
6350 add_phi_arg (inner_loop_phi, gimple_phi_result (gs: nphi),
6351 find_edge (seq_start_bb, body_bb), locus);
6352
6353 if (!single_pred_p (bb: fin_bb))
6354 add_phi_arg (phi, gimple_phi_result (gs: nphi), se, locus);
6355 }
6356 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6357 redirect_edge_var_map_clear (re);
6358 if (single_pred_p (bb: fin_bb))
6359 while (1)
6360 {
6361 psi = gsi_start_phis (fin_bb);
6362 if (gsi_end_p (i: psi))
6363 break;
6364 remove_phi_node (&psi, false);
6365 }
6366
6367 /* Make phi node for trip. */
6368 phi = create_phi_node (trip_main, iter_part_bb);
6369 add_phi_arg (phi, trip_back, single_succ_edge (bb: trip_update_bb),
6370 UNKNOWN_LOCATION);
6371 add_phi_arg (phi, trip_init, single_succ_edge (bb: entry_bb),
6372 UNKNOWN_LOCATION);
6373 }
6374
6375 if (!broken_loop)
6376 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6377 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6378 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6379 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6380 recompute_dominator (CDI_DOMINATORS, fin_bb));
6381 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6382 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6383 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6384 recompute_dominator (CDI_DOMINATORS, body_bb));
6385
6386 if (!broken_loop)
6387 {
6388 class loop *loop = body_bb->loop_father;
6389 class loop *trip_loop = alloc_loop ();
6390 trip_loop->header = iter_part_bb;
6391 trip_loop->latch = trip_update_bb;
6392 add_loop (trip_loop, iter_part_bb->loop_father);
6393
6394 if (loop != entry_bb->loop_father)
6395 {
6396 gcc_assert (loop->header == body_bb);
6397 gcc_assert (loop->latch == region->cont
6398 || single_pred (loop->latch) == region->cont);
6399 trip_loop->inner = loop;
6400 return;
6401 }
6402
6403 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
6404 {
6405 loop = alloc_loop ();
6406 loop->header = body_bb;
6407 if (collapse_bb == NULL)
6408 loop->latch = cont_bb;
6409 add_loop (loop, trip_loop);
6410 }
6411 }
6412}
6413
6414/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6415 loop. Given parameters:
6416
6417 for (V = N1; V cond N2; V += STEP) BODY;
6418
6419 where COND is "<" or ">", we generate pseudocode
6420
6421 V = N1;
6422 goto L1;
6423 L0:
6424 BODY;
6425 V += STEP;
6426 L1:
6427 if (V cond N2) goto L0; else goto L2;
6428 L2:
6429
6430 For collapsed loops, emit the outer loops as scalar
6431 and only try to vectorize the innermost loop. */
6432
6433static void
6434expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6435{
6436 tree type, t;
6437 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6438 gimple_stmt_iterator gsi;
6439 gimple *stmt;
6440 gcond *cond_stmt;
6441 bool broken_loop = region->cont == NULL;
6442 edge e, ne;
6443 tree *counts = NULL;
6444 int i;
6445 int safelen_int = INT_MAX;
6446 bool dont_vectorize = false;
6447 tree safelen = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6448 kind: OMP_CLAUSE_SAFELEN);
6449 tree simduid = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6450 kind: OMP_CLAUSE__SIMDUID_);
6451 tree ifc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6452 kind: OMP_CLAUSE_IF);
6453 tree simdlen = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6454 kind: OMP_CLAUSE_SIMDLEN);
6455 tree condtemp = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6456 kind: OMP_CLAUSE__CONDTEMP_);
6457 tree n1, n2;
6458 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6459
6460 if (safelen)
6461 {
6462 poly_uint64 val;
6463 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6464 if (!poly_int_tree_p (t: safelen, value: &val))
6465 safelen_int = 0;
6466 else
6467 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6468 if (safelen_int == 1)
6469 safelen_int = 0;
6470 }
6471 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6472 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6473 {
6474 safelen_int = 0;
6475 dont_vectorize = true;
6476 }
6477 type = TREE_TYPE (fd->loop.v);
6478 entry_bb = region->entry;
6479 cont_bb = region->cont;
6480 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6481 gcc_assert (broken_loop
6482 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6483 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6484 if (!broken_loop)
6485 {
6486 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6487 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6488 l1_bb = split_block (cont_bb, last_nondebug_stmt (cont_bb))->dest;
6489 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6490 }
6491 else
6492 {
6493 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6494 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6495 l2_bb = single_succ (bb: l1_bb);
6496 }
6497 exit_bb = region->exit;
6498 l2_dom_bb = NULL;
6499
6500 gsi = gsi_last_nondebug_bb (bb: entry_bb);
6501
6502 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6503 /* Not needed in SSA form right now. */
6504 gcc_assert (!gimple_in_ssa_p (cfun));
6505 if (fd->collapse > 1
6506 && (gimple_omp_for_combined_into_p (g: fd->for_stmt)
6507 || broken_loop))
6508 {
6509 int first_zero_iter = -1, dummy = -1;
6510 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6511
6512 counts = XALLOCAVEC (tree, fd->collapse);
6513 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
6514 zero_iter1_bb&: zero_iter_bb, first_zero_iter1&: first_zero_iter,
6515 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
6516 }
6517 if (l2_dom_bb == NULL)
6518 l2_dom_bb = l1_bb;
6519
6520 n1 = fd->loop.n1;
6521 n2 = fd->loop.n2;
6522 if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
6523 {
6524 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6525 kind: OMP_CLAUSE__LOOPTEMP_);
6526 gcc_assert (innerc);
6527 n1 = OMP_CLAUSE_DECL (innerc);
6528 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6529 kind: OMP_CLAUSE__LOOPTEMP_);
6530 gcc_assert (innerc);
6531 n2 = OMP_CLAUSE_DECL (innerc);
6532 }
6533 tree step = fd->loop.step;
6534 tree orig_step = step; /* May be different from step if is_simt. */
6535
6536 bool is_simt = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
6537 kind: OMP_CLAUSE__SIMT_);
6538 if (is_simt)
6539 {
6540 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6541 is_simt = safelen_int > 1;
6542 }
6543 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6544 if (is_simt)
6545 {
6546 simt_lane = create_tmp_var (unsigned_type_node);
6547 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6548 gimple_call_set_lhs (gs: g, lhs: simt_lane);
6549 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6550 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6551 fold_convert (TREE_TYPE (step), simt_lane));
6552 n1 = fold_convert (type, n1);
6553 if (POINTER_TYPE_P (type))
6554 n1 = fold_build_pointer_plus (n1, offset);
6555 else
6556 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6557
6558 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6559 if (fd->collapse > 1)
6560 simt_maxlane = build_one_cst (unsigned_type_node);
6561 else if (safelen_int < omp_max_simt_vf ())
6562 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6563 tree vf
6564 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6565 unsigned_type_node, 0);
6566 if (simt_maxlane)
6567 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6568 vf = fold_convert (TREE_TYPE (step), vf);
6569 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6570 }
6571
6572 tree n2var = NULL_TREE;
6573 tree n2v = NULL_TREE;
6574 tree *nonrect_bounds = NULL;
6575 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6576 if (fd->collapse > 1)
6577 {
6578 if (broken_loop || gimple_omp_for_combined_into_p (g: fd->for_stmt))
6579 {
6580 if (fd->non_rect)
6581 {
6582 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6583 memset (s: nonrect_bounds, c: 0,
6584 n: sizeof (tree) * (fd->last_nonrect + 1));
6585 }
6586 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, fold_convert (type, n1));
6587 gcc_assert (entry_bb == gsi_bb (gsi));
6588 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6589 gsi_prev (i: &gsi);
6590 entry_bb = split_block (entry_bb, gsi_stmt (i: gsi))->dest;
6591 expand_omp_for_init_vars (fd, gsi: &gsi, counts, nonrect_bounds,
6592 NULL, startvar: n1);
6593 gsi = gsi_for_stmt (fd->for_stmt);
6594 }
6595 if (broken_loop)
6596 ;
6597 else if (gimple_omp_for_combined_into_p (g: fd->for_stmt))
6598 {
6599 /* Compute in n2var the limit for the first innermost loop,
6600 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6601 where cnt is how many iterations would the loop have if
6602 all further iterations were assigned to the current task. */
6603 n2var = create_tmp_var (type);
6604 i = fd->collapse - 1;
6605 tree itype = TREE_TYPE (fd->loops[i].v);
6606 if (POINTER_TYPE_P (itype))
6607 itype = signed_type_for (itype);
6608 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6609 ? -1 : 1));
6610 t = fold_build2 (PLUS_EXPR, itype,
6611 fold_convert (itype, fd->loops[i].step), t);
6612 t = fold_build2 (PLUS_EXPR, itype, t,
6613 fold_convert (itype, fd->loops[i].n2));
6614 if (fd->loops[i].m2)
6615 {
6616 tree t2 = fold_convert (itype,
6617 fd->loops[i - fd->loops[i].outer].v);
6618 tree t3 = fold_convert (itype, fd->loops[i].m2);
6619 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6620 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6621 }
6622 t = fold_build2 (MINUS_EXPR, itype, t,
6623 fold_convert (itype, fd->loops[i].v));
6624 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6625 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6626 fold_build1 (NEGATE_EXPR, itype, t),
6627 fold_build1 (NEGATE_EXPR, itype,
6628 fold_convert (itype,
6629 fd->loops[i].step)));
6630 else
6631 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6632 fold_convert (itype, fd->loops[i].step));
6633 t = fold_convert (type, t);
6634 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6635 min_arg1 = create_tmp_var (type);
6636 expand_omp_build_assign (gsi_p: &gsi, to: min_arg1, from: t2);
6637 min_arg2 = create_tmp_var (type);
6638 expand_omp_build_assign (gsi_p: &gsi, to: min_arg2, from: t);
6639 }
6640 else
6641 {
6642 if (TREE_CODE (n2) == INTEGER_CST)
6643 {
6644 /* Indicate for lastprivate handling that at least one iteration
6645 has been performed, without wasting runtime. */
6646 if (integer_nonzerop (n2))
6647 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v,
6648 fold_convert (type, n2));
6649 else
6650 /* Indicate that no iteration has been performed. */
6651 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v,
6652 from: build_one_cst (type));
6653 }
6654 else
6655 {
6656 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v,
6657 from: build_zero_cst (type));
6658 expand_omp_build_assign (gsi_p: &gsi, to: n2, from: build_one_cst (type));
6659 }
6660 for (i = 0; i < fd->collapse; i++)
6661 {
6662 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6663 if (fd->loops[i].m1)
6664 {
6665 tree t2
6666 = fold_convert (TREE_TYPE (t),
6667 fd->loops[i - fd->loops[i].outer].v);
6668 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6669 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6670 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6671 }
6672 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: t);
6673 /* For normal non-combined collapsed loops just initialize
6674 the outermost iterator in the entry_bb. */
6675 if (!broken_loop)
6676 break;
6677 }
6678 }
6679 }
6680 else
6681 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, fold_convert (type, n1));
6682 tree altv = NULL_TREE, altn2 = NULL_TREE;
6683 if (fd->collapse == 1
6684 && !broken_loop
6685 && TREE_CODE (orig_step) != INTEGER_CST)
6686 {
6687 /* The vectorizer currently punts on loops with non-constant steps
6688 for the main IV (can't compute number of iterations and gives up
6689 because of that). As for OpenMP loops it is always possible to
6690 compute the number of iterations upfront, use an alternate IV
6691 as the loop iterator:
6692 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6693 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6694 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6695 expand_omp_build_assign (gsi_p: &gsi, to: altv, from: build_zero_cst (TREE_TYPE (altv)));
6696 tree itype = TREE_TYPE (fd->loop.v);
6697 if (POINTER_TYPE_P (itype))
6698 itype = signed_type_for (itype);
6699 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6700 t = fold_build2 (PLUS_EXPR, itype,
6701 fold_convert (itype, step), t);
6702 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6703 t = fold_build2 (MINUS_EXPR, itype, t,
6704 fold_convert (itype, fd->loop.v));
6705 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6706 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6707 fold_build1 (NEGATE_EXPR, itype, t),
6708 fold_build1 (NEGATE_EXPR, itype,
6709 fold_convert (itype, step)));
6710 else
6711 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6712 fold_convert (itype, step));
6713 t = fold_convert (TREE_TYPE (altv), t);
6714 altn2 = create_tmp_var (TREE_TYPE (altv));
6715 expand_omp_build_assign (gsi_p: &gsi, to: altn2, from: t);
6716 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6717 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6718 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6719 true, GSI_SAME_STMT);
6720 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6721 build_zero_cst (TREE_TYPE (altv)));
6722 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6723 }
6724 else if (fd->collapse > 1
6725 && !broken_loop
6726 && !gimple_omp_for_combined_into_p (g: fd->for_stmt)
6727 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6728 {
6729 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6730 altn2 = create_tmp_var (TREE_TYPE (altv));
6731 }
6732 if (cond_var)
6733 {
6734 if (POINTER_TYPE_P (type)
6735 || TREE_CODE (n1) != INTEGER_CST
6736 || fd->loop.cond_code != LT_EXPR
6737 || tree_int_cst_sgn (n1) != 1)
6738 expand_omp_build_assign (gsi_p: &gsi, to: cond_var,
6739 from: build_one_cst (TREE_TYPE (cond_var)));
6740 else
6741 expand_omp_build_assign (gsi_p: &gsi, to: cond_var,
6742 fold_convert (TREE_TYPE (cond_var), n1));
6743 }
6744
6745 /* Remove the GIMPLE_OMP_FOR statement. */
6746 gsi_remove (&gsi, true);
6747
6748 if (!broken_loop)
6749 {
6750 /* Code to control the increment goes in the CONT_BB. */
6751 gsi = gsi_last_nondebug_bb (bb: cont_bb);
6752 stmt = gsi_stmt (i: gsi);
6753 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6754
6755 if (fd->collapse == 1
6756 || gimple_omp_for_combined_into_p (g: fd->for_stmt))
6757 {
6758 if (POINTER_TYPE_P (type))
6759 t = fold_build_pointer_plus (fd->loop.v, step);
6760 else
6761 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6762 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, from: t);
6763 }
6764 else if (TREE_CODE (n2) != INTEGER_CST)
6765 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, from: build_one_cst (type));
6766 if (altv)
6767 {
6768 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6769 build_one_cst (TREE_TYPE (altv)));
6770 expand_omp_build_assign (gsi_p: &gsi, to: altv, from: t);
6771 }
6772
6773 if (fd->collapse > 1)
6774 {
6775 i = fd->collapse - 1;
6776 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6777 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6778 else
6779 {
6780 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6781 fd->loops[i].step);
6782 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6783 fd->loops[i].v, t);
6784 }
6785 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: t);
6786 }
6787 if (cond_var)
6788 {
6789 if (POINTER_TYPE_P (type)
6790 || TREE_CODE (n1) != INTEGER_CST
6791 || fd->loop.cond_code != LT_EXPR
6792 || tree_int_cst_sgn (n1) != 1)
6793 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6794 build_one_cst (TREE_TYPE (cond_var)));
6795 else
6796 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6797 fold_convert (TREE_TYPE (cond_var), step));
6798 expand_omp_build_assign (gsi_p: &gsi, to: cond_var, from: t);
6799 }
6800
6801 /* Remove GIMPLE_OMP_CONTINUE. */
6802 gsi_remove (&gsi, true);
6803 }
6804
6805 /* Emit the condition in L1_BB. */
6806 gsi = gsi_start_bb (bb: l1_bb);
6807
6808 if (altv)
6809 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6810 else if (fd->collapse > 1
6811 && !gimple_omp_for_combined_into_p (g: fd->for_stmt)
6812 && !broken_loop)
6813 {
6814 i = fd->collapse - 1;
6815 tree itype = TREE_TYPE (fd->loops[i].v);
6816 if (fd->loops[i].m2)
6817 t = n2v = create_tmp_var (itype);
6818 else
6819 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6820 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6821 false, GSI_CONTINUE_LINKING);
6822 tree v = fd->loops[i].v;
6823 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6824 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6825 false, GSI_CONTINUE_LINKING);
6826 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6827 }
6828 else
6829 {
6830 if (fd->collapse > 1 && !broken_loop)
6831 t = n2var;
6832 else
6833 t = fold_convert (type, unshare_expr (n2));
6834 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6835 false, GSI_CONTINUE_LINKING);
6836 tree v = fd->loop.v;
6837 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6838 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6839 false, GSI_CONTINUE_LINKING);
6840 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6841 }
6842 cond_stmt = gimple_build_cond_empty (cond: t);
6843 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6844 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6845 NULL, NULL)
6846 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6847 NULL, NULL))
6848 {
6849 gsi = gsi_for_stmt (cond_stmt);
6850 gimple_regimplify_operands (cond_stmt, &gsi);
6851 }
6852
6853 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6854 if (is_simt)
6855 {
6856 gsi = gsi_start_bb (bb: l2_bb);
6857 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6858 if (POINTER_TYPE_P (type))
6859 t = fold_build_pointer_plus (fd->loop.v, step);
6860 else
6861 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6862 expand_omp_build_assign (gsi_p: &gsi, to: fd->loop.v, from: t);
6863 }
6864
6865 /* Remove GIMPLE_OMP_RETURN. */
6866 gsi = gsi_last_nondebug_bb (bb: exit_bb);
6867 gsi_remove (&gsi, true);
6868
6869 /* Connect the new blocks. */
6870 remove_edge (FALLTHRU_EDGE (entry_bb));
6871
6872 if (!broken_loop)
6873 {
6874 remove_edge (BRANCH_EDGE (entry_bb));
6875 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6876
6877 e = BRANCH_EDGE (l1_bb);
6878 ne = FALLTHRU_EDGE (l1_bb);
6879 e->flags = EDGE_TRUE_VALUE;
6880 }
6881 else
6882 {
6883 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
6884
6885 ne = single_succ_edge (bb: l1_bb);
6886 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6887
6888 }
6889 ne->flags = EDGE_FALSE_VALUE;
6890 e->probability = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
6891 ne->probability = e->probability.invert ();
6892
6893 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6894 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6895
6896 if (simt_maxlane)
6897 {
6898 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6899 NULL_TREE, NULL_TREE);
6900 gsi = gsi_last_bb (bb: entry_bb);
6901 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6902 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6903 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6904 FALLTHRU_EDGE (entry_bb)->probability
6905 = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
6906 BRANCH_EDGE (entry_bb)->probability
6907 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6908 l2_dom_bb = entry_bb;
6909 }
6910 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6911
6912 if (!broken_loop && fd->collapse > 1)
6913 {
6914 basic_block last_bb = l1_bb;
6915 basic_block init_bb = NULL;
6916 for (i = fd->collapse - 2; i >= 0; i--)
6917 {
6918 tree nextn2v = NULL_TREE;
6919 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6920 e = EDGE_SUCC (last_bb, 0);
6921 else
6922 e = EDGE_SUCC (last_bb, 1);
6923 basic_block bb = split_edge (e);
6924 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6925 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6926 else
6927 {
6928 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6929 fd->loops[i].step);
6930 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6931 fd->loops[i].v, t);
6932 }
6933 gsi = gsi_after_labels (bb);
6934 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i].v, from: t);
6935
6936 bb = split_block (bb, last_nondebug_stmt (bb))->dest;
6937 gsi = gsi_start_bb (bb);
6938 tree itype = TREE_TYPE (fd->loops[i].v);
6939 if (fd->loops[i].m2)
6940 t = nextn2v = create_tmp_var (itype);
6941 else
6942 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6943 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6944 false, GSI_CONTINUE_LINKING);
6945 tree v = fd->loops[i].v;
6946 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6947 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6948 false, GSI_CONTINUE_LINKING);
6949 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6950 cond_stmt = gimple_build_cond_empty (cond: t);
6951 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6952 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6953 expand_omp_regimplify_p, NULL, NULL)
6954 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6955 expand_omp_regimplify_p, NULL, NULL))
6956 {
6957 gsi = gsi_for_stmt (cond_stmt);
6958 gimple_regimplify_operands (cond_stmt, &gsi);
6959 }
6960 ne = single_succ_edge (bb);
6961 ne->flags = EDGE_FALSE_VALUE;
6962
6963 init_bb = create_empty_bb (bb);
6964 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6965 add_bb_to_loop (init_bb, bb->loop_father);
6966 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6967 e->probability
6968 = profile_probability::guessed_always ().apply_scale (num: 7, den: 8);
6969 ne->probability = e->probability.invert ();
6970
6971 gsi = gsi_after_labels (bb: init_bb);
6972 if (fd->loops[i + 1].m1)
6973 {
6974 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6975 fd->loops[i + 1
6976 - fd->loops[i + 1].outer].v);
6977 if (POINTER_TYPE_P (TREE_TYPE (t2)))
6978 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
6979 else
6980 {
6981 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6982 fd->loops[i + 1].n1);
6983 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6984 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6985 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6986 }
6987 }
6988 else
6989 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6990 fd->loops[i + 1].n1);
6991 expand_omp_build_assign (gsi_p: &gsi, to: fd->loops[i + 1].v, from: t);
6992 if (fd->loops[i + 1].m2)
6993 {
6994 if (i + 2 == fd->collapse && (n2var || altv))
6995 {
6996 gcc_assert (n2v == NULL_TREE);
6997 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6998 }
6999 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7000 fd->loops[i + 1
7001 - fd->loops[i + 1].outer].v);
7002 if (POINTER_TYPE_P (TREE_TYPE (t2)))
7003 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
7004 else
7005 {
7006 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7007 fd->loops[i + 1].n2);
7008 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
7009 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
7010 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
7011 }
7012 expand_omp_build_assign (gsi_p: &gsi, to: n2v, from: t);
7013 }
7014 if (i + 2 == fd->collapse && n2var)
7015 {
7016 /* For composite simd, n2 is the first iteration the current
7017 task shouldn't already handle, so we effectively want to use
7018 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
7019 as the vectorized loop. Except the vectorizer will not
7020 vectorize that, so instead compute N2VAR as
7021 N2VAR = V + MIN (N2 - V, COUNTS3) and use
7022 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
7023 as the loop to vectorize. */
7024 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
7025 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
7026 {
7027 tree itype = TREE_TYPE (fd->loops[i].v);
7028 if (POINTER_TYPE_P (itype))
7029 itype = signed_type_for (itype);
7030 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
7031 == LT_EXPR ? -1 : 1));
7032 t = fold_build2 (PLUS_EXPR, itype,
7033 fold_convert (itype,
7034 fd->loops[i + 1].step), t);
7035 if (fd->loops[i + 1].m2 == NULL_TREE)
7036 t = fold_build2 (PLUS_EXPR, itype, t,
7037 fold_convert (itype,
7038 fd->loops[i + 1].n2));
7039 else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
7040 {
7041 t = fold_build_pointer_plus (n2v, t);
7042 t = fold_convert (itype, t);
7043 }
7044 else
7045 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
7046 t = fold_build2 (MINUS_EXPR, itype, t,
7047 fold_convert (itype, fd->loops[i + 1].v));
7048 tree step = fold_convert (itype, fd->loops[i + 1].step);
7049 if (TYPE_UNSIGNED (itype)
7050 && fd->loops[i + 1].cond_code == GT_EXPR)
7051 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7052 fold_build1 (NEGATE_EXPR, itype, t),
7053 fold_build1 (NEGATE_EXPR, itype, step));
7054 else
7055 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7056 t = fold_convert (type, t);
7057 }
7058 else
7059 t = counts[i + 1];
7060 expand_omp_build_assign (gsi_p: &gsi, to: min_arg1, from: t2);
7061 expand_omp_build_assign (gsi_p: &gsi, to: min_arg2, from: t);
7062 e = split_block (init_bb, last_nondebug_stmt (init_bb));
7063 gsi = gsi_after_labels (bb: e->dest);
7064 init_bb = e->dest;
7065 remove_edge (FALLTHRU_EDGE (entry_bb));
7066 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
7067 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
7068 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
7069 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
7070 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
7071 expand_omp_build_assign (gsi_p: &gsi, to: n2var, from: t);
7072 }
7073 if (i + 2 == fd->collapse && altv)
7074 {
7075 /* The vectorizer currently punts on loops with non-constant
7076 steps for the main IV (can't compute number of iterations
7077 and gives up because of that). As for OpenMP loops it is
7078 always possible to compute the number of iterations upfront,
7079 use an alternate IV as the loop iterator. */
7080 expand_omp_build_assign (gsi_p: &gsi, to: altv,
7081 from: build_zero_cst (TREE_TYPE (altv)));
7082 tree itype = TREE_TYPE (fd->loops[i + 1].v);
7083 if (POINTER_TYPE_P (itype))
7084 itype = signed_type_for (itype);
7085 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
7086 ? -1 : 1));
7087 t = fold_build2 (PLUS_EXPR, itype,
7088 fold_convert (itype, fd->loops[i + 1].step), t);
7089 t = fold_build2 (PLUS_EXPR, itype, t,
7090 fold_convert (itype,
7091 fd->loops[i + 1].m2
7092 ? n2v : fd->loops[i + 1].n2));
7093 t = fold_build2 (MINUS_EXPR, itype, t,
7094 fold_convert (itype, fd->loops[i + 1].v));
7095 tree step = fold_convert (itype, fd->loops[i + 1].step);
7096 if (TYPE_UNSIGNED (itype)
7097 && fd->loops[i + 1].cond_code == GT_EXPR)
7098 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7099 fold_build1 (NEGATE_EXPR, itype, t),
7100 fold_build1 (NEGATE_EXPR, itype, step));
7101 else
7102 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7103 t = fold_convert (TREE_TYPE (altv), t);
7104 expand_omp_build_assign (gsi_p: &gsi, to: altn2, from: t);
7105 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7106 fd->loops[i + 1].m2
7107 ? n2v : fd->loops[i + 1].n2);
7108 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7109 fd->loops[i + 1].v, t2);
7110 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7111 true, GSI_SAME_STMT);
7112 gassign *g
7113 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7114 build_zero_cst (TREE_TYPE (altv)));
7115 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7116 }
7117 n2v = nextn2v;
7118
7119 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7120 if (!gimple_omp_for_combined_into_p (g: fd->for_stmt))
7121 {
7122 e = find_edge (entry_bb, last_bb);
7123 redirect_edge_succ (e, bb);
7124 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7125 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7126 }
7127
7128 last_bb = bb;
7129 }
7130 }
7131 if (!broken_loop)
7132 {
7133 class loop *loop = alloc_loop ();
7134 loop->header = l1_bb;
7135 loop->latch = cont_bb;
7136 add_loop (loop, l1_bb->loop_father);
7137 loop->safelen = safelen_int;
7138 if (simduid)
7139 {
7140 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7141 cfun->has_simduid_loops = true;
7142 }
7143 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7144 the loop. */
7145 if ((flag_tree_loop_vectorize
7146 || !OPTION_SET_P (flag_tree_loop_vectorize))
7147 && flag_tree_loop_optimize
7148 && loop->safelen > 1)
7149 {
7150 loop->force_vectorize = true;
7151 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7152 {
7153 unsigned HOST_WIDE_INT v
7154 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7155 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7156 loop->simdlen = v;
7157 }
7158 cfun->has_force_vectorize_loops = true;
7159 }
7160 else if (dont_vectorize)
7161 loop->dont_vectorize = true;
7162 }
7163 else if (simduid)
7164 cfun->has_simduid_loops = true;
7165}
7166
7167/* Taskloop construct is represented after gimplification with
7168 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7169 in between them. This routine expands the outer GIMPLE_OMP_FOR,
7170 which should just compute all the needed loop temporaries
7171 for GIMPLE_OMP_TASK. */
7172
7173static void
7174expand_omp_taskloop_for_outer (struct omp_region *region,
7175 struct omp_for_data *fd,
7176 gimple *inner_stmt)
7177{
7178 tree type, bias = NULL_TREE;
7179 basic_block entry_bb, cont_bb, exit_bb;
7180 gimple_stmt_iterator gsi;
7181 gassign *assign_stmt;
7182 tree *counts = NULL;
7183 int i;
7184
7185 gcc_assert (inner_stmt);
7186 gcc_assert (region->cont);
7187 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7188 && gimple_omp_task_taskloop_p (inner_stmt));
7189 type = TREE_TYPE (fd->loop.v);
7190
7191 /* See if we need to bias by LLONG_MIN. */
7192 if (fd->iter_type == long_long_unsigned_type_node
7193 && TREE_CODE (type) == INTEGER_TYPE
7194 && !TYPE_UNSIGNED (type))
7195 {
7196 tree n1, n2;
7197
7198 if (fd->loop.cond_code == LT_EXPR)
7199 {
7200 n1 = fd->loop.n1;
7201 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7202 }
7203 else
7204 {
7205 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7206 n2 = fd->loop.n1;
7207 }
7208 if (TREE_CODE (n1) != INTEGER_CST
7209 || TREE_CODE (n2) != INTEGER_CST
7210 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7211 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7212 }
7213
7214 entry_bb = region->entry;
7215 cont_bb = region->cont;
7216 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7217 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7218 exit_bb = region->exit;
7219
7220 gsi = gsi_last_nondebug_bb (bb: entry_bb);
7221 gimple *for_stmt = gsi_stmt (i: gsi);
7222 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7223 if (fd->collapse > 1)
7224 {
7225 int first_zero_iter = -1, dummy = -1;
7226 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7227
7228 counts = XALLOCAVEC (tree, fd->collapse);
7229 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
7230 zero_iter1_bb&: zero_iter_bb, first_zero_iter1&: first_zero_iter,
7231 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
7232
7233 if (zero_iter_bb)
7234 {
7235 /* Some counts[i] vars might be uninitialized if
7236 some loop has zero iterations. But the body shouldn't
7237 be executed in that case, so just avoid uninit warnings. */
7238 for (i = first_zero_iter; i < fd->collapse; i++)
7239 if (SSA_VAR_P (counts[i]))
7240 suppress_warning (counts[i], OPT_Wuninitialized);
7241 gsi_prev (i: &gsi);
7242 edge e = split_block (entry_bb, gsi_stmt (i: gsi));
7243 entry_bb = e->dest;
7244 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7245 gsi = gsi_last_bb (bb: entry_bb);
7246 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7247 get_immediate_dominator (CDI_DOMINATORS,
7248 zero_iter_bb));
7249 }
7250 }
7251
7252 tree t0, t1;
7253 t1 = fd->loop.n2;
7254 t0 = fd->loop.n1;
7255 if (POINTER_TYPE_P (TREE_TYPE (t0))
7256 && TYPE_PRECISION (TREE_TYPE (t0))
7257 != TYPE_PRECISION (fd->iter_type))
7258 {
7259 /* Avoid casting pointers to integer of a different size. */
7260 tree itype = signed_type_for (type);
7261 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7262 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7263 }
7264 else
7265 {
7266 t1 = fold_convert (fd->iter_type, t1);
7267 t0 = fold_convert (fd->iter_type, t0);
7268 }
7269 if (bias)
7270 {
7271 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7272 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7273 }
7274
7275 tree innerc = omp_find_clause (clauses: gimple_omp_task_clauses (gs: inner_stmt),
7276 kind: OMP_CLAUSE__LOOPTEMP_);
7277 gcc_assert (innerc);
7278 tree startvar = OMP_CLAUSE_DECL (innerc);
7279 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), kind: OMP_CLAUSE__LOOPTEMP_);
7280 gcc_assert (innerc);
7281 tree endvar = OMP_CLAUSE_DECL (innerc);
7282 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7283 {
7284 innerc = find_lastprivate_looptemp (fd, innerc);
7285 if (innerc)
7286 {
7287 /* If needed (inner taskloop has lastprivate clause), propagate
7288 down the total number of iterations. */
7289 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7290 NULL_TREE, false,
7291 GSI_CONTINUE_LINKING);
7292 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7293 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7294 }
7295 }
7296
7297 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7298 GSI_CONTINUE_LINKING);
7299 assign_stmt = gimple_build_assign (startvar, t0);
7300 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7301
7302 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7303 GSI_CONTINUE_LINKING);
7304 assign_stmt = gimple_build_assign (endvar, t1);
7305 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7306 if (fd->collapse > 1)
7307 expand_omp_for_init_vars (fd, gsi: &gsi, counts, NULL, inner_stmt, startvar);
7308
7309 /* Remove the GIMPLE_OMP_FOR statement. */
7310 gsi = gsi_for_stmt (for_stmt);
7311 gsi_remove (&gsi, true);
7312
7313 gsi = gsi_last_nondebug_bb (bb: cont_bb);
7314 gsi_remove (&gsi, true);
7315
7316 gsi = gsi_last_nondebug_bb (bb: exit_bb);
7317 gsi_remove (&gsi, true);
7318
7319 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7320 remove_edge (BRANCH_EDGE (entry_bb));
7321 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7322 remove_edge (BRANCH_EDGE (cont_bb));
7323 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7324 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7325 recompute_dominator (CDI_DOMINATORS, region->entry));
7326}
7327
7328/* Taskloop construct is represented after gimplification with
7329 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7330 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7331 GOMP_taskloop{,_ull} function arranges for each task to be given just
7332 a single range of iterations. */
7333
7334static void
7335expand_omp_taskloop_for_inner (struct omp_region *region,
7336 struct omp_for_data *fd,
7337 gimple *inner_stmt)
7338{
7339 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7340 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7341 basic_block fin_bb;
7342 gimple_stmt_iterator gsi;
7343 edge ep;
7344 bool broken_loop = region->cont == NULL;
7345 tree *counts = NULL;
7346 tree n1, n2, step;
7347
7348 itype = type = TREE_TYPE (fd->loop.v);
7349 if (POINTER_TYPE_P (type))
7350 itype = signed_type_for (type);
7351
7352 /* See if we need to bias by LLONG_MIN. */
7353 if (fd->iter_type == long_long_unsigned_type_node
7354 && TREE_CODE (type) == INTEGER_TYPE
7355 && !TYPE_UNSIGNED (type))
7356 {
7357 tree n1, n2;
7358
7359 if (fd->loop.cond_code == LT_EXPR)
7360 {
7361 n1 = fd->loop.n1;
7362 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7363 }
7364 else
7365 {
7366 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7367 n2 = fd->loop.n1;
7368 }
7369 if (TREE_CODE (n1) != INTEGER_CST
7370 || TREE_CODE (n2) != INTEGER_CST
7371 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7372 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7373 }
7374
7375 entry_bb = region->entry;
7376 cont_bb = region->cont;
7377 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7378 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7379 gcc_assert (broken_loop
7380 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7381 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7382 if (!broken_loop)
7383 {
7384 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7385 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7386 }
7387 exit_bb = region->exit;
7388
7389 /* Iteration space partitioning goes in ENTRY_BB. */
7390 gsi = gsi_last_nondebug_bb (bb: entry_bb);
7391 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7392
7393 if (fd->collapse > 1)
7394 {
7395 int first_zero_iter = -1, dummy = -1;
7396 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7397
7398 counts = XALLOCAVEC (tree, fd->collapse);
7399 expand_omp_for_init_counts (fd, gsi: &gsi, entry_bb, counts,
7400 zero_iter1_bb&: fin_bb, first_zero_iter1&: first_zero_iter,
7401 zero_iter2_bb&: dummy_bb, first_zero_iter2&: dummy, l2_dom_bb);
7402 t = NULL_TREE;
7403 }
7404 else
7405 t = integer_one_node;
7406
7407 step = fd->loop.step;
7408 tree innerc = omp_find_clause (clauses: gimple_omp_for_clauses (gs: fd->for_stmt),
7409 kind: OMP_CLAUSE__LOOPTEMP_);
7410 gcc_assert (innerc);
7411 n1 = OMP_CLAUSE_DECL (innerc);
7412 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), kind: OMP_CLAUSE__LOOPTEMP_);
7413 gcc_assert (innerc);
7414 n2 = OMP_CLAUSE_DECL (innerc);
7415 if (bias)
7416 {
7417 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7418 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7419 }
7420 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7421 true, NULL_TREE, true, GSI_SAME_STMT);
7422 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7423 true, NULL_TREE, true, GSI_SAME_STMT);
7424 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7425 true, NULL_TREE, true, GSI_SAME_STMT);
7426
7427 tree startvar = fd->loop.v;
7428 tree endvar = NULL_TREE;
7429
7430 if (gimple_omp_for_combined_p (g: fd->for_stmt))
7431 {
7432 tree clauses = gimple_omp_for_clauses (gs: inner_stmt);
7433 tree innerc = omp_find_clause (clauses, kind: OMP_CLAUSE__LOOPTEMP_);
7434 gcc_assert (innerc);
7435 startvar = OMP_CLAUSE_DECL (innerc);
7436 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7437 kind: OMP_CLAUSE__LOOPTEMP_);
7438 gcc_assert (innerc);
7439 endvar = OMP_CLAUSE_DECL (innerc);
7440 }
7441 t = fold_convert (TREE_TYPE (startvar), n1);
7442 t = force_gimple_operand_gsi (&gsi, t,
7443 DECL_P (startvar)
7444 && TREE_ADDRESSABLE (startvar),
7445 NULL_TREE, false, GSI_CONTINUE_LINKING);
7446 gimple *assign_stmt = gimple_build_assign (startvar, t);
7447 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7448
7449 t = fold_convert (TREE_TYPE (startvar), n2);
7450 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7451 false, GSI_CONTINUE_LINKING);
7452 if (endvar)
7453 {
7454 assign_stmt = gimple_build_assign (endvar, e);
7455 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7456 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7457 assign_stmt = gimple_build_assign (fd->loop.v, e);
7458 else
7459 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7460 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7461 }
7462
7463 tree *nonrect_bounds = NULL;
7464 if (fd->collapse > 1)
7465 {
7466 if (fd->non_rect)
7467 {
7468 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7469 memset (s: nonrect_bounds, c: 0, n: sizeof (tree) * (fd->last_nonrect + 1));
7470 }
7471 gcc_assert (gsi_bb (gsi) == entry_bb);
7472 expand_omp_for_init_vars (fd, gsi: &gsi, counts, nonrect_bounds, inner_stmt,
7473 startvar);
7474 entry_bb = gsi_bb (i: gsi);
7475 }
7476
7477 if (!broken_loop)
7478 {
7479 /* The code controlling the sequential loop replaces the
7480 GIMPLE_OMP_CONTINUE. */
7481 gsi = gsi_last_nondebug_bb (bb: cont_bb);
7482 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
7483 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7484 vmain = gimple_omp_continue_control_use (cont_stmt);
7485 vback = gimple_omp_continue_control_def (cont_stmt);
7486
7487 if (!gimple_omp_for_combined_p (g: fd->for_stmt))
7488 {
7489 if (POINTER_TYPE_P (type))
7490 t = fold_build_pointer_plus (vmain, step);
7491 else
7492 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7493 t = force_gimple_operand_gsi (&gsi, t,
7494 DECL_P (vback)
7495 && TREE_ADDRESSABLE (vback),
7496 NULL_TREE, true, GSI_SAME_STMT);
7497 assign_stmt = gimple_build_assign (vback, t);
7498 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7499
7500 t = build2 (fd->loop.cond_code, boolean_type_node,
7501 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7502 ? t : vback, e);
7503 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: t), GSI_SAME_STMT);
7504 }
7505
7506 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7507 gsi_remove (&gsi, true);
7508
7509 if (fd->collapse > 1 && !gimple_omp_for_combined_p (g: fd->for_stmt))
7510 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7511 cont_bb, body_bb);
7512 }
7513
7514 /* Remove the GIMPLE_OMP_FOR statement. */
7515 gsi = gsi_for_stmt (fd->for_stmt);
7516 gsi_remove (&gsi, true);
7517
7518 /* Remove the GIMPLE_OMP_RETURN statement. */
7519 gsi = gsi_last_nondebug_bb (bb: exit_bb);
7520 gsi_remove (&gsi, true);
7521
7522 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7523 if (!broken_loop)
7524 remove_edge (BRANCH_EDGE (entry_bb));
7525 else
7526 {
7527 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7528 region->outer->cont = NULL;
7529 }
7530
7531 /* Connect all the blocks. */
7532 if (!broken_loop)
7533 {
7534 ep = find_edge (cont_bb, body_bb);
7535 if (gimple_omp_for_combined_p (g: fd->for_stmt))
7536 {
7537 remove_edge (ep);
7538 ep = NULL;
7539 }
7540 else if (fd->collapse > 1)
7541 {
7542 remove_edge (ep);
7543 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7544 }
7545 else
7546 ep->flags = EDGE_TRUE_VALUE;
7547 find_edge (cont_bb, fin_bb)->flags
7548 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7549 }
7550
7551 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7552 recompute_dominator (CDI_DOMINATORS, body_bb));
7553 if (!broken_loop)
7554 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7555 recompute_dominator (CDI_DOMINATORS, fin_bb));
7556
7557 if (!broken_loop && !gimple_omp_for_combined_p (g: fd->for_stmt))
7558 {
7559 class loop *loop = alloc_loop ();
7560 loop->header = body_bb;
7561 if (collapse_bb == NULL)
7562 loop->latch = cont_bb;
7563 add_loop (loop, body_bb->loop_father);
7564 }
7565}
7566
7567/* A subroutine of expand_omp_for. Generate code for an OpenACC
7568 partitioned loop. The lowering here is abstracted, in that the
7569 loop parameters are passed through internal functions, which are
7570 further lowered by oacc_device_lower, once we get to the target
7571 compiler. The loop is of the form:
7572
7573 for (V = B; V LTGT E; V += S) {BODY}
7574
7575 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7576 (constant 0 for no chunking) and we will have a GWV partitioning
7577 mask, specifying dimensions over which the loop is to be
7578 partitioned (see note below). We generate code that looks like
7579 (this ignores tiling):
7580
7581 <entry_bb> [incoming FALL->body, BRANCH->exit]
7582 typedef signedintify (typeof (V)) T; // underlying signed integral type
7583 T range = E - B;
7584 T chunk_no = 0;
7585 T DIR = LTGT == '<' ? +1 : -1;
7586 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7587 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7588
7589 <head_bb> [created by splitting end of entry_bb]
7590 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7591 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7592 if (!(offset LTGT bound)) goto bottom_bb;
7593
7594 <body_bb> [incoming]
7595 V = B + offset;
7596 {BODY}
7597
7598 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7599 offset += step;
7600 if (offset LTGT bound) goto body_bb; [*]
7601
7602 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7603 chunk_no++;
7604 if (chunk < chunk_max) goto head_bb;
7605
7606 <exit_bb> [incoming]
7607 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7608
7609 [*] Needed if V live at end of loop. */
7610
7611static void
7612expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7613{
7614 bool is_oacc_kernels_parallelized
7615 = (lookup_attribute (attr_name: "oacc kernels parallelized",
7616 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7617 {
7618 bool is_oacc_kernels
7619 = (lookup_attribute (attr_name: "oacc kernels",
7620 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7621 if (is_oacc_kernels_parallelized)
7622 gcc_checking_assert (is_oacc_kernels);
7623 }
7624 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7625 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7626 for SSA specifics, and some are for 'parloops' OpenACC
7627 'kernels'-parallelized specifics. */
7628
7629 tree v = fd->loop.v;
7630 enum tree_code cond_code = fd->loop.cond_code;
7631 enum tree_code plus_code = PLUS_EXPR;
7632
7633 tree chunk_size = integer_minus_one_node;
7634 tree gwv = integer_zero_node;
7635 tree iter_type = TREE_TYPE (v);
7636 tree diff_type = iter_type;
7637 tree plus_type = iter_type;
7638 struct oacc_collapse *counts = NULL;
7639
7640 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7641 == GF_OMP_FOR_KIND_OACC_LOOP);
7642 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7643 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7644
7645 if (POINTER_TYPE_P (iter_type))
7646 {
7647 plus_code = POINTER_PLUS_EXPR;
7648 plus_type = sizetype;
7649 }
7650 for (int ix = fd->collapse; ix--;)
7651 {
7652 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7653 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7654 diff_type = diff_type2;
7655 }
7656 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7657 diff_type = signed_type_for (diff_type);
7658 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7659 diff_type = integer_type_node;
7660
7661 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7662 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7663 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7664 basic_block bottom_bb = NULL;
7665
7666 /* entry_bb has two successors; the branch edge is to the exit
7667 block, fallthrough edge to body. */
7668 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7669 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7670
7671 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7672 body_bb, or to a block whose only successor is the body_bb. Its
7673 fallthrough successor is the final block (same as the branch
7674 successor of the entry_bb). */
7675 if (cont_bb)
7676 {
7677 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7678 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7679
7680 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7681 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7682 }
7683 else
7684 gcc_assert (!gimple_in_ssa_p (cfun));
7685
7686 /* The exit block only has entry_bb and cont_bb as predecessors. */
7687 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7688
7689 tree chunk_no;
7690 tree chunk_max = NULL_TREE;
7691 tree bound, offset;
7692 tree step = create_tmp_var (diff_type, ".step");
7693 bool up = cond_code == LT_EXPR;
7694 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7695 bool chunking = !gimple_in_ssa_p (cfun);
7696 bool negating;
7697
7698 /* Tiling vars. */
7699 tree tile_size = NULL_TREE;
7700 tree element_s = NULL_TREE;
7701 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7702 basic_block elem_body_bb = NULL;
7703 basic_block elem_cont_bb = NULL;
7704
7705 /* SSA instances. */
7706 tree offset_incr = NULL_TREE;
7707 tree offset_init = NULL_TREE;
7708
7709 gimple_stmt_iterator gsi;
7710 gassign *ass;
7711 gcall *call;
7712 gimple *stmt;
7713 tree expr;
7714 location_t loc;
7715 edge split, be, fte;
7716
7717 /* Split the end of entry_bb to create head_bb. */
7718 split = split_block (entry_bb, last_nondebug_stmt (entry_bb));
7719 basic_block head_bb = split->dest;
7720 entry_bb = split->src;
7721
7722 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7723 gsi = gsi_last_nondebug_bb (bb: entry_bb);
7724 gomp_for *for_stmt = as_a <gomp_for *> (p: gsi_stmt (i: gsi));
7725 loc = gimple_location (g: for_stmt);
7726
7727 if (gimple_in_ssa_p (cfun))
7728 {
7729 offset_init = gimple_omp_for_index (gs: for_stmt, i: 0);
7730 gcc_assert (integer_zerop (fd->loop.n1));
7731 /* The SSA parallelizer does gang parallelism. */
7732 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7733 }
7734
7735 if (fd->collapse > 1 || fd->tiling)
7736 {
7737 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7738 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7739 tree total = expand_oacc_collapse_init (fd, gsi: &gsi, counts, diff_type,
7740 TREE_TYPE (fd->loop.n2), loc);
7741
7742 if (SSA_VAR_P (fd->loop.n2))
7743 {
7744 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7745 true, GSI_SAME_STMT);
7746 ass = gimple_build_assign (fd->loop.n2, total);
7747 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7748 }
7749 }
7750
7751 tree b = fd->loop.n1;
7752 tree e = fd->loop.n2;
7753 tree s = fd->loop.step;
7754
7755 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7756 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7757
7758 /* Convert the step, avoiding possible unsigned->signed overflow. */
7759 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7760 if (negating)
7761 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7762 s = fold_convert (diff_type, s);
7763 if (negating)
7764 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7765 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7766
7767 if (!chunking)
7768 chunk_size = integer_zero_node;
7769 expr = fold_convert (diff_type, chunk_size);
7770 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7771 NULL_TREE, true, GSI_SAME_STMT);
7772
7773 if (fd->tiling)
7774 {
7775 /* Determine the tile size and element step,
7776 modify the outer loop step size. */
7777 tile_size = create_tmp_var (diff_type, ".tile_size");
7778 expr = build_int_cst (diff_type, 1);
7779 for (int ix = 0; ix < fd->collapse; ix++)
7780 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7781 expr = force_gimple_operand_gsi (&gsi, expr, true,
7782 NULL_TREE, true, GSI_SAME_STMT);
7783 ass = gimple_build_assign (tile_size, expr);
7784 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7785
7786 element_s = create_tmp_var (diff_type, ".element_s");
7787 ass = gimple_build_assign (element_s, s);
7788 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7789
7790 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7791 s = force_gimple_operand_gsi (&gsi, expr, true,
7792 NULL_TREE, true, GSI_SAME_STMT);
7793 }
7794
7795 /* Determine the range, avoiding possible unsigned->signed overflow. */
7796 negating = !up && TYPE_UNSIGNED (iter_type);
7797 expr = fold_build2 (MINUS_EXPR, plus_type,
7798 fold_convert (plus_type, negating ? b : e),
7799 fold_convert (plus_type, negating ? e : b));
7800 expr = fold_convert (diff_type, expr);
7801 if (negating)
7802 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7803 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7804 NULL_TREE, true, GSI_SAME_STMT);
7805
7806 chunk_no = build_int_cst (diff_type, 0);
7807 if (chunking)
7808 {
7809 gcc_assert (!gimple_in_ssa_p (cfun));
7810
7811 expr = chunk_no;
7812 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7813 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7814
7815 ass = gimple_build_assign (chunk_no, expr);
7816 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7817
7818 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7819 build_int_cst (integer_type_node,
7820 IFN_GOACC_LOOP_CHUNKS),
7821 dir, range, s, chunk_size, gwv);
7822 gimple_call_set_lhs (gs: call, lhs: chunk_max);
7823 gimple_set_location (g: call, location: loc);
7824 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7825 }
7826 else
7827 chunk_size = chunk_no;
7828
7829 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7830 build_int_cst (integer_type_node,
7831 IFN_GOACC_LOOP_STEP),
7832 dir, range, s, chunk_size, gwv);
7833 gimple_call_set_lhs (gs: call, lhs: step);
7834 gimple_set_location (g: call, location: loc);
7835 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7836
7837 /* Remove the GIMPLE_OMP_FOR. */
7838 gsi_remove (&gsi, true);
7839
7840 /* Fixup edges from head_bb. */
7841 be = BRANCH_EDGE (head_bb);
7842 fte = FALLTHRU_EDGE (head_bb);
7843 be->flags |= EDGE_FALSE_VALUE;
7844 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7845
7846 basic_block body_bb = fte->dest;
7847
7848 if (gimple_in_ssa_p (cfun))
7849 {
7850 gsi = gsi_last_nondebug_bb (bb: cont_bb);
7851 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
7852
7853 offset = gimple_omp_continue_control_use (cont_stmt);
7854 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7855 }
7856 else
7857 {
7858 offset = create_tmp_var (diff_type, ".offset");
7859 offset_init = offset_incr = offset;
7860 }
7861 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7862
7863 /* Loop offset & bound go into head_bb. */
7864 gsi = gsi_start_bb (bb: head_bb);
7865
7866 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7867 build_int_cst (integer_type_node,
7868 IFN_GOACC_LOOP_OFFSET),
7869 dir, range, s,
7870 chunk_size, gwv, chunk_no);
7871 gimple_call_set_lhs (gs: call, lhs: offset_init);
7872 gimple_set_location (g: call, location: loc);
7873 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7874
7875 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7876 build_int_cst (integer_type_node,
7877 IFN_GOACC_LOOP_BOUND),
7878 dir, range, s,
7879 chunk_size, gwv, offset_init);
7880 gimple_call_set_lhs (gs: call, lhs: bound);
7881 gimple_set_location (g: call, location: loc);
7882 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7883
7884 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7885 gsi_insert_after (&gsi, gimple_build_cond_empty (cond: expr),
7886 GSI_CONTINUE_LINKING);
7887
7888 /* V assignment goes into body_bb. */
7889 if (!gimple_in_ssa_p (cfun))
7890 {
7891 gsi = gsi_start_bb (bb: body_bb);
7892
7893 expr = build2 (plus_code, iter_type, b,
7894 fold_convert (plus_type, offset));
7895 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7896 true, GSI_SAME_STMT);
7897 ass = gimple_build_assign (v, expr);
7898 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7899
7900 if (fd->collapse > 1 || fd->tiling)
7901 expand_oacc_collapse_vars (fd, inner: false, gsi: &gsi, counts, ivar: v, diff_type);
7902
7903 if (fd->tiling)
7904 {
7905 /* Determine the range of the element loop -- usually simply
7906 the tile_size, but could be smaller if the final
7907 iteration of the outer loop is a partial tile. */
7908 tree e_range = create_tmp_var (diff_type, ".e_range");
7909
7910 expr = build2 (MIN_EXPR, diff_type,
7911 build2 (MINUS_EXPR, diff_type, bound, offset),
7912 build2 (MULT_EXPR, diff_type, tile_size,
7913 element_s));
7914 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7915 true, GSI_SAME_STMT);
7916 ass = gimple_build_assign (e_range, expr);
7917 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7918
7919 /* Determine bound, offset & step of inner loop. */
7920 e_bound = create_tmp_var (diff_type, ".e_bound");
7921 e_offset = create_tmp_var (diff_type, ".e_offset");
7922 e_step = create_tmp_var (diff_type, ".e_step");
7923
7924 /* Mark these as element loops. */
7925 tree t, e_gwv = integer_minus_one_node;
7926 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7927
7928 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7929 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7930 element_s, chunk, e_gwv, chunk);
7931 gimple_call_set_lhs (gs: call, lhs: e_offset);
7932 gimple_set_location (g: call, location: loc);
7933 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7934
7935 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7936 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7937 element_s, chunk, e_gwv, e_offset);
7938 gimple_call_set_lhs (gs: call, lhs: e_bound);
7939 gimple_set_location (g: call, location: loc);
7940 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7941
7942 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7943 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7944 element_s, chunk, e_gwv);
7945 gimple_call_set_lhs (gs: call, lhs: e_step);
7946 gimple_set_location (g: call, location: loc);
7947 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7948
7949 /* Add test and split block. */
7950 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7951 stmt = gimple_build_cond_empty (cond: expr);
7952 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7953 split = split_block (body_bb, stmt);
7954 elem_body_bb = split->dest;
7955 if (cont_bb == body_bb)
7956 cont_bb = elem_body_bb;
7957 body_bb = split->src;
7958
7959 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7960
7961 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7962 if (cont_bb == NULL)
7963 {
7964 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7965 e->probability = profile_probability::even ();
7966 split->probability = profile_probability::even ();
7967 }
7968
7969 /* Initialize the user's loop vars. */
7970 gsi = gsi_start_bb (bb: elem_body_bb);
7971 expand_oacc_collapse_vars (fd, inner: true, gsi: &gsi, counts, ivar: e_offset,
7972 diff_type);
7973 }
7974 }
7975
7976 /* Loop increment goes into cont_bb. If this is not a loop, we
7977 will have spawned threads as if it was, and each one will
7978 execute one iteration. The specification is not explicit about
7979 whether such constructs are ill-formed or not, and they can
7980 occur, especially when noreturn routines are involved. */
7981 if (cont_bb)
7982 {
7983 gsi = gsi_last_nondebug_bb (bb: cont_bb);
7984 gomp_continue *cont_stmt = as_a <gomp_continue *> (p: gsi_stmt (i: gsi));
7985 loc = gimple_location (g: cont_stmt);
7986
7987 if (fd->tiling)
7988 {
7989 /* Insert element loop increment and test. */
7990 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7991 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7992 true, GSI_SAME_STMT);
7993 ass = gimple_build_assign (e_offset, expr);
7994 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7995 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7996
7997 stmt = gimple_build_cond_empty (cond: expr);
7998 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7999 split = split_block (cont_bb, stmt);
8000 elem_cont_bb = split->src;
8001 cont_bb = split->dest;
8002
8003 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8004 split->probability = profile_probability::unlikely ().guessed ();
8005 edge latch_edge
8006 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
8007 latch_edge->probability = profile_probability::likely ().guessed ();
8008
8009 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
8010 skip_edge->probability = profile_probability::unlikely ().guessed ();
8011 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
8012 loop_entry_edge->probability
8013 = profile_probability::likely ().guessed ();
8014
8015 gsi = gsi_for_stmt (cont_stmt);
8016 }
8017
8018 /* Increment offset. */
8019 if (gimple_in_ssa_p (cfun))
8020 expr = build2 (plus_code, iter_type, offset,
8021 fold_convert (plus_type, step));
8022 else
8023 expr = build2 (PLUS_EXPR, diff_type, offset, step);
8024 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8025 true, GSI_SAME_STMT);
8026 ass = gimple_build_assign (offset_incr, expr);
8027 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8028 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
8029 gsi_insert_before (&gsi, gimple_build_cond_empty (cond: expr), GSI_SAME_STMT);
8030
8031 /* Remove the GIMPLE_OMP_CONTINUE. */
8032 gsi_remove (&gsi, true);
8033
8034 /* Fixup edges from cont_bb. */
8035 be = BRANCH_EDGE (cont_bb);
8036 fte = FALLTHRU_EDGE (cont_bb);
8037 be->flags |= EDGE_TRUE_VALUE;
8038 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8039
8040 if (chunking)
8041 {
8042 /* Split the beginning of exit_bb to make bottom_bb. We
8043 need to insert a nop at the start, because splitting is
8044 after a stmt, not before. */
8045 gsi = gsi_start_bb (bb: exit_bb);
8046 stmt = gimple_build_nop ();
8047 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8048 split = split_block (exit_bb, stmt);
8049 bottom_bb = split->src;
8050 exit_bb = split->dest;
8051 gsi = gsi_last_bb (bb: bottom_bb);
8052
8053 /* Chunk increment and test goes into bottom_bb. */
8054 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
8055 build_int_cst (diff_type, 1));
8056 ass = gimple_build_assign (chunk_no, expr);
8057 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
8058
8059 /* Chunk test at end of bottom_bb. */
8060 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
8061 gsi_insert_after (&gsi, gimple_build_cond_empty (cond: expr),
8062 GSI_CONTINUE_LINKING);
8063
8064 /* Fixup edges from bottom_bb. */
8065 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8066 split->probability = profile_probability::unlikely ().guessed ();
8067 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
8068 latch_edge->probability = profile_probability::likely ().guessed ();
8069 }
8070 }
8071
8072 gsi = gsi_last_nondebug_bb (bb: exit_bb);
8073 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8074 loc = gimple_location (g: gsi_stmt (i: gsi));
8075
8076 if (!gimple_in_ssa_p (cfun))
8077 {
8078 /* Insert the final value of V, in case it is live. This is the
8079 value for the only thread that survives past the join. */
8080 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
8081 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
8082 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
8083 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
8084 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
8085 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8086 true, GSI_SAME_STMT);
8087 ass = gimple_build_assign (v, expr);
8088 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8089 }
8090
8091 /* Remove the OMP_RETURN. */
8092 gsi_remove (&gsi, true);
8093
8094 if (cont_bb)
8095 {
8096 /* We now have one, two or three nested loops. Update the loop
8097 structures. */
8098 class loop *parent = entry_bb->loop_father;
8099 class loop *body = body_bb->loop_father;
8100
8101 if (chunking)
8102 {
8103 class loop *chunk_loop = alloc_loop ();
8104 chunk_loop->header = head_bb;
8105 chunk_loop->latch = bottom_bb;
8106 add_loop (chunk_loop, parent);
8107 parent = chunk_loop;
8108 }
8109 else if (parent != body)
8110 {
8111 gcc_assert (body->header == body_bb);
8112 gcc_assert (body->latch == cont_bb
8113 || single_pred (body->latch) == cont_bb);
8114 parent = NULL;
8115 }
8116
8117 if (parent)
8118 {
8119 class loop *body_loop = alloc_loop ();
8120 body_loop->header = body_bb;
8121 body_loop->latch = cont_bb;
8122 add_loop (body_loop, parent);
8123
8124 if (fd->tiling)
8125 {
8126 /* Insert tiling's element loop. */
8127 class loop *inner_loop = alloc_loop ();
8128 inner_loop->header = elem_body_bb;
8129 inner_loop->latch = elem_cont_bb;
8130 add_loop (inner_loop, body_loop);
8131 }
8132 }
8133 }
8134}
8135
8136/* Expand the OMP loop defined by REGION. */
8137
8138static void
8139expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8140{
8141 struct omp_for_data fd;
8142 struct omp_for_data_loop *loops;
8143
8144 loops = XALLOCAVEC (struct omp_for_data_loop,
8145 gimple_omp_for_collapse
8146 (last_nondebug_stmt (region->entry)));
8147 omp_extract_for_data (for_stmt: as_a <gomp_for *> (p: last_nondebug_stmt (region->entry)),
8148 fd: &fd, loops);
8149 region->sched_kind = fd.sched_kind;
8150 region->sched_modifiers = fd.sched_modifiers;
8151 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8152 if (fd.non_rect && !gimple_omp_for_combined_into_p (g: fd.for_stmt))
8153 {
8154 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8155 if ((loops[i].m1 || loops[i].m2)
8156 && (loops[i].m1 == NULL_TREE
8157 || TREE_CODE (loops[i].m1) == INTEGER_CST)
8158 && (loops[i].m2 == NULL_TREE
8159 || TREE_CODE (loops[i].m2) == INTEGER_CST)
8160 && TREE_CODE (loops[i].step) == INTEGER_CST
8161 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8162 {
8163 tree t;
8164 tree itype = TREE_TYPE (loops[i].v);
8165 if (loops[i].m1 && loops[i].m2)
8166 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8167 else if (loops[i].m1)
8168 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8169 else
8170 t = loops[i].m2;
8171 t = fold_build2 (MULT_EXPR, itype, t,
8172 fold_convert (itype,
8173 loops[i - loops[i].outer].step));
8174 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8175 t = fold_build2 (TRUNC_MOD_EXPR, itype,
8176 fold_build1 (NEGATE_EXPR, itype, t),
8177 fold_build1 (NEGATE_EXPR, itype,
8178 fold_convert (itype,
8179 loops[i].step)));
8180 else
8181 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8182 fold_convert (itype, loops[i].step));
8183 if (integer_nonzerop (t))
8184 error_at (gimple_location (g: fd.for_stmt),
8185 "invalid OpenMP non-rectangular loop step; "
8186 "%<(%E - %E) * %E%> is not a multiple of loop %d "
8187 "step %qE",
8188 loops[i].m2 ? loops[i].m2 : integer_zero_node,
8189 loops[i].m1 ? loops[i].m1 : integer_zero_node,
8190 loops[i - loops[i].outer].step, i + 1,
8191 loops[i].step);
8192 }
8193 }
8194
8195 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8196 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8197 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8198 if (region->cont)
8199 {
8200 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8201 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8202 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8203 }
8204 else
8205 /* If there isn't a continue then this is a degerate case where
8206 the introduction of abnormal edges during lowering will prevent
8207 original loops from being detected. Fix that up. */
8208 loops_state_set (flags: LOOPS_NEED_FIXUP);
8209
8210 if (gimple_omp_for_kind (g: fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8211 expand_omp_simd (region, fd: &fd);
8212 else if (gimple_omp_for_kind (g: fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8213 {
8214 gcc_assert (!inner_stmt && !fd.non_rect);
8215 expand_oacc_for (region, fd: &fd);
8216 }
8217 else if (gimple_omp_for_kind (g: fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8218 {
8219 if (gimple_omp_for_combined_into_p (g: fd.for_stmt))
8220 expand_omp_taskloop_for_inner (region, fd: &fd, inner_stmt);
8221 else
8222 expand_omp_taskloop_for_outer (region, fd: &fd, inner_stmt);
8223 }
8224 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8225 && !fd.have_ordered)
8226 {
8227 if (fd.chunk_size == NULL)
8228 expand_omp_for_static_nochunk (region, fd: &fd, inner_stmt);
8229 else
8230 expand_omp_for_static_chunk (region, fd: &fd, inner_stmt);
8231 }
8232 else
8233 {
8234 int fn_index, start_ix, next_ix;
8235 unsigned HOST_WIDE_INT sched = 0;
8236 tree sched_arg = NULL_TREE;
8237
8238 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8239 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8240 if (fd.chunk_size == NULL
8241 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8242 fd.chunk_size = integer_zero_node;
8243 switch (fd.sched_kind)
8244 {
8245 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8246 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8247 && fd.lastprivate_conditional == 0)
8248 {
8249 gcc_assert (!fd.have_ordered);
8250 fn_index = 6;
8251 sched = 4;
8252 }
8253 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8254 && !fd.have_ordered
8255 && fd.lastprivate_conditional == 0)
8256 fn_index = 7;
8257 else
8258 {
8259 fn_index = 3;
8260 sched = (HOST_WIDE_INT_1U << 31);
8261 }
8262 break;
8263 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8264 case OMP_CLAUSE_SCHEDULE_GUIDED:
8265 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8266 && !fd.have_ordered
8267 && fd.lastprivate_conditional == 0)
8268 {
8269 fn_index = 3 + fd.sched_kind;
8270 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8271 break;
8272 }
8273 fn_index = fd.sched_kind;
8274 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8275 sched += (HOST_WIDE_INT_1U << 31);
8276 break;
8277 case OMP_CLAUSE_SCHEDULE_STATIC:
8278 gcc_assert (fd.have_ordered);
8279 fn_index = 0;
8280 sched = (HOST_WIDE_INT_1U << 31) + 1;
8281 break;
8282 default:
8283 gcc_unreachable ();
8284 }
8285 if (!fd.ordered)
8286 fn_index += fd.have_ordered * 8;
8287 if (fd.ordered)
8288 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8289 else
8290 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8291 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8292 if (fd.have_reductemp || fd.have_pointer_condtemp)
8293 {
8294 if (fd.ordered)
8295 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8296 else if (fd.have_ordered)
8297 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8298 else
8299 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8300 sched_arg = build_int_cstu (long_integer_type_node, sched);
8301 if (!fd.chunk_size)
8302 fd.chunk_size = integer_zero_node;
8303 }
8304 if (fd.iter_type == long_long_unsigned_type_node)
8305 {
8306 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8307 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8308 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8309 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8310 }
8311 expand_omp_for_generic (region, fd: &fd, start_fn: (enum built_in_function) start_ix,
8312 next_fn: (enum built_in_function) next_ix, sched_arg,
8313 inner_stmt);
8314 }
8315}
8316
8317/* Expand code for an OpenMP sections directive. In pseudo code, we generate
8318
8319 v = GOMP_sections_start (n);
8320 L0:
8321 switch (v)
8322 {
8323 case 0:
8324 goto L2;
8325 case 1:
8326 section 1;
8327 goto L1;
8328 case 2:
8329 ...
8330 case n:
8331 ...
8332 default:
8333 abort ();
8334 }
8335 L1:
8336 v = GOMP_sections_next ();
8337 goto L0;
8338 L2:
8339 reduction;
8340
8341 If this is a combined parallel sections, replace the call to
8342 GOMP_sections_start with call to GOMP_sections_next. */
8343
8344static void
8345expand_omp_sections (struct omp_region *region)
8346{
8347 tree t, u, vin = NULL, vmain, vnext, l2;
8348 unsigned len;
8349 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8350 gimple_stmt_iterator si, switch_si;
8351 gomp_sections *sections_stmt;
8352 gimple *stmt;
8353 gomp_continue *cont;
8354 edge_iterator ei;
8355 edge e;
8356 struct omp_region *inner;
8357 unsigned i, casei;
8358 bool exit_reachable = region->cont != NULL;
8359
8360 gcc_assert (region->exit != NULL);
8361 entry_bb = region->entry;
8362 l0_bb = single_succ (bb: entry_bb);
8363 l1_bb = region->cont;
8364 l2_bb = region->exit;
8365 if (single_pred_p (bb: l2_bb) && single_pred (bb: l2_bb) == l0_bb)
8366 l2 = gimple_block_label (l2_bb);
8367 else
8368 {
8369 /* This can happen if there are reductions. */
8370 len = EDGE_COUNT (l0_bb->succs);
8371 gcc_assert (len > 0);
8372 e = EDGE_SUCC (l0_bb, len - 1);
8373 si = gsi_last_nondebug_bb (bb: e->dest);
8374 l2 = NULL_TREE;
8375 if (gsi_end_p (i: si)
8376 || gimple_code (g: gsi_stmt (i: si)) != GIMPLE_OMP_SECTION)
8377 l2 = gimple_block_label (e->dest);
8378 else
8379 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8380 {
8381 si = gsi_last_nondebug_bb (bb: e->dest);
8382 if (gsi_end_p (i: si)
8383 || gimple_code (g: gsi_stmt (i: si)) != GIMPLE_OMP_SECTION)
8384 {
8385 l2 = gimple_block_label (e->dest);
8386 break;
8387 }
8388 }
8389 }
8390 if (exit_reachable)
8391 default_bb = create_empty_bb (l1_bb->prev_bb);
8392 else
8393 default_bb = create_empty_bb (l0_bb);
8394
8395 /* We will build a switch() with enough cases for all the
8396 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8397 and a default case to abort if something goes wrong. */
8398 len = EDGE_COUNT (l0_bb->succs);
8399
8400 /* Use vec::quick_push on label_vec throughout, since we know the size
8401 in advance. */
8402 auto_vec<tree> label_vec (len);
8403
8404 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8405 GIMPLE_OMP_SECTIONS statement. */
8406 si = gsi_last_nondebug_bb (bb: entry_bb);
8407 sections_stmt = as_a <gomp_sections *> (p: gsi_stmt (i: si));
8408 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8409 vin = gimple_omp_sections_control (gs: sections_stmt);
8410 tree clauses = gimple_omp_sections_clauses (gs: sections_stmt);
8411 tree reductmp = omp_find_clause (clauses, kind: OMP_CLAUSE__REDUCTEMP_);
8412 tree condtmp = omp_find_clause (clauses, kind: OMP_CLAUSE__CONDTEMP_);
8413 tree cond_var = NULL_TREE;
8414 if (reductmp || condtmp)
8415 {
8416 tree reductions = null_pointer_node, mem = null_pointer_node;
8417 tree memv = NULL_TREE, condtemp = NULL_TREE;
8418 gimple_stmt_iterator gsi = gsi_none ();
8419 gimple *g = NULL;
8420 if (reductmp)
8421 {
8422 reductions = OMP_CLAUSE_DECL (reductmp);
8423 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8424 g = SSA_NAME_DEF_STMT (reductions);
8425 reductions = gimple_assign_rhs1 (gs: g);
8426 OMP_CLAUSE_DECL (reductmp) = reductions;
8427 gsi = gsi_for_stmt (g);
8428 }
8429 else
8430 gsi = si;
8431 if (condtmp)
8432 {
8433 condtemp = OMP_CLAUSE_DECL (condtmp);
8434 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8435 kind: OMP_CLAUSE__CONDTEMP_);
8436 cond_var = OMP_CLAUSE_DECL (c);
8437 tree type = TREE_TYPE (condtemp);
8438 memv = create_tmp_var (type);
8439 TREE_ADDRESSABLE (memv) = 1;
8440 unsigned cnt = 0;
8441 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8442 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8443 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8444 ++cnt;
8445 unsigned HOST_WIDE_INT sz
8446 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8447 expand_omp_build_assign (gsi_p: &gsi, to: memv, from: build_int_cst (type, sz),
8448 after: false);
8449 mem = build_fold_addr_expr (memv);
8450 }
8451 t = build_int_cst (unsigned_type_node, len - 1);
8452 u = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS2_START);
8453 stmt = gimple_build_call (u, 3, t, reductions, mem);
8454 gimple_call_set_lhs (gs: stmt, lhs: vin);
8455 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8456 if (condtmp)
8457 {
8458 expand_omp_build_assign (gsi_p: &gsi, to: condtemp, from: memv, after: false);
8459 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8460 vin, build_one_cst (TREE_TYPE (cond_var)));
8461 expand_omp_build_assign (gsi_p: &gsi, to: cond_var, from: t, after: false);
8462 }
8463 if (reductmp)
8464 {
8465 gsi_remove (&gsi, true);
8466 release_ssa_name (name: gimple_assign_lhs (gs: g));
8467 }
8468 }
8469 else if (!is_combined_parallel (region))
8470 {
8471 /* If we are not inside a combined parallel+sections region,
8472 call GOMP_sections_start. */
8473 t = build_int_cst (unsigned_type_node, len - 1);
8474 u = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_START);
8475 stmt = gimple_build_call (u, 1, t);
8476 }
8477 else
8478 {
8479 /* Otherwise, call GOMP_sections_next. */
8480 u = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_NEXT);
8481 stmt = gimple_build_call (u, 0);
8482 }
8483 if (!reductmp && !condtmp)
8484 {
8485 gimple_call_set_lhs (gs: stmt, lhs: vin);
8486 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8487 }
8488 gsi_remove (&si, true);
8489
8490 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8491 L0_BB. */
8492 switch_si = gsi_last_nondebug_bb (bb: l0_bb);
8493 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8494 if (exit_reachable)
8495 {
8496 cont = as_a <gomp_continue *> (p: last_nondebug_stmt (l1_bb));
8497 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8498 vmain = gimple_omp_continue_control_use (cont_stmt: cont);
8499 vnext = gimple_omp_continue_control_def (cont_stmt: cont);
8500 }
8501 else
8502 {
8503 vmain = vin;
8504 vnext = NULL_TREE;
8505 }
8506
8507 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8508 label_vec.quick_push (obj: t);
8509 i = 1;
8510
8511 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8512 for (inner = region->inner, casei = 1;
8513 inner;
8514 inner = inner->next, i++, casei++)
8515 {
8516 basic_block s_entry_bb, s_exit_bb;
8517
8518 /* Skip optional reduction region. */
8519 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8520 {
8521 --i;
8522 --casei;
8523 continue;
8524 }
8525
8526 s_entry_bb = inner->entry;
8527 s_exit_bb = inner->exit;
8528
8529 t = gimple_block_label (s_entry_bb);
8530 u = build_int_cst (unsigned_type_node, casei);
8531 u = build_case_label (u, NULL, t);
8532 label_vec.quick_push (obj: u);
8533
8534 si = gsi_last_nondebug_bb (bb: s_entry_bb);
8535 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8536 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8537 gsi_remove (&si, true);
8538 single_succ_edge (bb: s_entry_bb)->flags = EDGE_FALLTHRU;
8539
8540 if (s_exit_bb == NULL)
8541 continue;
8542
8543 si = gsi_last_nondebug_bb (bb: s_exit_bb);
8544 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8545 gsi_remove (&si, true);
8546
8547 single_succ_edge (bb: s_exit_bb)->flags = EDGE_FALLTHRU;
8548 }
8549
8550 /* Error handling code goes in DEFAULT_BB. */
8551 t = gimple_block_label (default_bb);
8552 u = build_case_label (NULL, NULL, t);
8553 make_edge (l0_bb, default_bb, 0);
8554 add_bb_to_loop (default_bb, current_loops->tree_root);
8555
8556 stmt = gimple_build_switch (vmain, u, label_vec);
8557 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8558 gsi_remove (&switch_si, true);
8559
8560 si = gsi_start_bb (bb: default_bb);
8561 stmt = gimple_build_call (builtin_decl_explicit (fncode: BUILT_IN_TRAP), 0);
8562 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8563
8564 if (exit_reachable)
8565 {
8566 tree bfn_decl;
8567
8568 /* Code to get the next section goes in L1_BB. */
8569 si = gsi_last_nondebug_bb (bb: l1_bb);
8570 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8571
8572 bfn_decl = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_NEXT);
8573 stmt = gimple_build_call (bfn_decl, 0);
8574 gimple_call_set_lhs (gs: stmt, lhs: vnext);
8575 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8576 if (cond_var)
8577 {
8578 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8579 vnext, build_one_cst (TREE_TYPE (cond_var)));
8580 expand_omp_build_assign (gsi_p: &si, to: cond_var, from: t, after: false);
8581 }
8582 gsi_remove (&si, true);
8583
8584 single_succ_edge (bb: l1_bb)->flags = EDGE_FALLTHRU;
8585 }
8586
8587 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8588 si = gsi_last_nondebug_bb (bb: l2_bb);
8589 if (gimple_omp_return_nowait_p (g: gsi_stmt (i: si)))
8590 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8591 else if (gimple_omp_return_lhs (g: gsi_stmt (i: si)))
8592 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8593 else
8594 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_SECTIONS_END);
8595 stmt = gimple_build_call (t, 0);
8596 if (gimple_omp_return_lhs (g: gsi_stmt (i: si)))
8597 gimple_call_set_lhs (gs: stmt, lhs: gimple_omp_return_lhs (g: gsi_stmt (i: si)));
8598 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8599 gsi_remove (&si, true);
8600
8601 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8602}
8603
8604/* Expand code for an OpenMP single or scope directive. We've already expanded
8605 much of the code, here we simply place the GOMP_barrier call. */
8606
8607static void
8608expand_omp_single (struct omp_region *region)
8609{
8610 basic_block entry_bb, exit_bb;
8611 gimple_stmt_iterator si;
8612
8613 entry_bb = region->entry;
8614 exit_bb = region->exit;
8615
8616 si = gsi_last_nondebug_bb (bb: entry_bb);
8617 enum gimple_code code = gimple_code (g: gsi_stmt (i: si));
8618 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8619 gsi_remove (&si, true);
8620 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
8621
8622 if (exit_bb == NULL)
8623 {
8624 gcc_assert (code == GIMPLE_OMP_SCOPE);
8625 return;
8626 }
8627
8628 si = gsi_last_nondebug_bb (bb: exit_bb);
8629 if (!gimple_omp_return_nowait_p (g: gsi_stmt (i: si)))
8630 {
8631 tree t = gimple_omp_return_lhs (g: gsi_stmt (i: si));
8632 gsi_insert_after (&si, omp_build_barrier (lhs: t), GSI_SAME_STMT);
8633 }
8634 gsi_remove (&si, true);
8635 single_succ_edge (bb: exit_bb)->flags = EDGE_FALLTHRU;
8636}
8637
8638/* Generic expansion for OpenMP synchronization directives: master,
8639 ordered and critical. All we need to do here is remove the entry
8640 and exit markers for REGION. */
8641
8642static void
8643expand_omp_synch (struct omp_region *region)
8644{
8645 basic_block entry_bb, exit_bb;
8646 gimple_stmt_iterator si;
8647
8648 entry_bb = region->entry;
8649 exit_bb = region->exit;
8650
8651 si = gsi_last_nondebug_bb (bb: entry_bb);
8652 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8653 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8654 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8655 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8656 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8657 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8658 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8659 if (gimple_code (g: gsi_stmt (i: si)) == GIMPLE_OMP_TEAMS
8660 && gimple_omp_teams_host (omp_teams_stmt: as_a <gomp_teams *> (p: gsi_stmt (i: si))))
8661 {
8662 expand_omp_taskreg (region);
8663 return;
8664 }
8665 gsi_remove (&si, true);
8666 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
8667
8668 if (exit_bb)
8669 {
8670 si = gsi_last_nondebug_bb (bb: exit_bb);
8671 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8672 gsi_remove (&si, true);
8673 single_succ_edge (bb: exit_bb)->flags = EDGE_FALLTHRU;
8674 }
8675}
8676
8677/* Translate enum omp_memory_order to enum memmodel for the embedded
8678 fail clause in there. */
8679
8680static enum memmodel
8681omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8682{
8683 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8684 {
8685 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8686 switch (mo & OMP_MEMORY_ORDER_MASK)
8687 {
8688 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8689 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8690 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8691 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8692 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8693 default: break;
8694 }
8695 gcc_unreachable ();
8696 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8697 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8698 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8699 default: gcc_unreachable ();
8700 }
8701}
8702
8703/* Translate enum omp_memory_order to enum memmodel. The two enums
8704 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8705 is 0 and omp_memory_order has the fail mode encoded in it too. */
8706
8707static enum memmodel
8708omp_memory_order_to_memmodel (enum omp_memory_order mo)
8709{
8710 enum memmodel ret, fail_ret;
8711 switch (mo & OMP_MEMORY_ORDER_MASK)
8712 {
8713 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8714 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8715 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8716 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8717 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8718 default: gcc_unreachable ();
8719 }
8720 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8721 we can just return ret here unconditionally. Otherwise, work around
8722 it here and make sure fail memmodel is not stronger. */
8723 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8724 return ret;
8725 fail_ret = omp_memory_order_to_fail_memmodel (mo);
8726 if (fail_ret > ret)
8727 return fail_ret;
8728 return ret;
8729}
8730
8731/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8732 operation as a normal volatile load. */
8733
8734static bool
8735expand_omp_atomic_load (basic_block load_bb, tree addr,
8736 tree loaded_val, int index)
8737{
8738 enum built_in_function tmpbase;
8739 gimple_stmt_iterator gsi;
8740 basic_block store_bb;
8741 location_t loc;
8742 gimple *stmt;
8743 tree decl, type, itype;
8744
8745 gsi = gsi_last_nondebug_bb (bb: load_bb);
8746 stmt = gsi_stmt (i: gsi);
8747 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8748 loc = gimple_location (g: stmt);
8749
8750 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8751 is smaller than word size, then expand_atomic_load assumes that the load
8752 is atomic. We could avoid the builtin entirely in this case. */
8753
8754 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8755 decl = builtin_decl_explicit (fncode: tmpbase);
8756 if (decl == NULL_TREE)
8757 return false;
8758
8759 type = TREE_TYPE (loaded_val);
8760 itype = TREE_TYPE (TREE_TYPE (decl));
8761
8762 enum omp_memory_order omo = gimple_omp_atomic_memory_order (g: stmt);
8763 tree mo = build_int_cst (integer_type_node,
8764 omp_memory_order_to_memmodel (mo: omo));
8765 gcall *call = gimple_build_call (decl, 2, addr, mo);
8766 gimple_set_location (g: call, location: loc);
8767 gimple_set_vuse (g: call, vuse: gimple_vuse (g: stmt));
8768 gimple *repl;
8769 if (!useless_type_conversion_p (type, itype))
8770 {
8771 tree lhs = make_ssa_name (var: itype);
8772 gimple_call_set_lhs (gs: call, lhs);
8773 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8774 repl = gimple_build_assign (loaded_val,
8775 build1 (VIEW_CONVERT_EXPR, type, lhs));
8776 gimple_set_location (g: repl, location: loc);
8777 }
8778 else
8779 {
8780 gimple_call_set_lhs (gs: call, lhs: loaded_val);
8781 repl = call;
8782 }
8783 gsi_replace (&gsi, repl, true);
8784
8785 store_bb = single_succ (bb: load_bb);
8786 gsi = gsi_last_nondebug_bb (bb: store_bb);
8787 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8788 gsi_remove (&gsi, true);
8789
8790 return true;
8791}
8792
8793/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8794 operation as a normal volatile store. */
8795
8796static bool
8797expand_omp_atomic_store (basic_block load_bb, tree addr,
8798 tree loaded_val, tree stored_val, int index)
8799{
8800 enum built_in_function tmpbase;
8801 gimple_stmt_iterator gsi;
8802 basic_block store_bb = single_succ (bb: load_bb);
8803 location_t loc;
8804 gimple *stmt;
8805 tree decl, type, itype;
8806 machine_mode imode;
8807 bool exchange;
8808
8809 gsi = gsi_last_nondebug_bb (bb: load_bb);
8810 stmt = gsi_stmt (i: gsi);
8811 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8812
8813 /* If the load value is needed, then this isn't a store but an exchange. */
8814 exchange = gimple_omp_atomic_need_value_p (g: stmt);
8815
8816 gsi = gsi_last_nondebug_bb (bb: store_bb);
8817 stmt = gsi_stmt (i: gsi);
8818 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8819 loc = gimple_location (g: stmt);
8820
8821 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8822 is smaller than word size, then expand_atomic_store assumes that the store
8823 is atomic. We could avoid the builtin entirely in this case. */
8824
8825 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8826 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8827 decl = builtin_decl_explicit (fncode: tmpbase);
8828 if (decl == NULL_TREE)
8829 return false;
8830
8831 type = TREE_TYPE (stored_val);
8832
8833 /* Dig out the type of the function's second argument. */
8834 itype = TREE_TYPE (decl);
8835 itype = TYPE_ARG_TYPES (itype);
8836 itype = TREE_CHAIN (itype);
8837 itype = TREE_VALUE (itype);
8838 imode = TYPE_MODE (itype);
8839
8840 if (exchange && !can_atomic_exchange_p (imode, true))
8841 return false;
8842
8843 if (!useless_type_conversion_p (itype, type))
8844 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8845 enum omp_memory_order omo = gimple_omp_atomic_memory_order (g: stmt);
8846 tree mo = build_int_cst (integer_type_node,
8847 omp_memory_order_to_memmodel (mo: omo));
8848 stored_val = force_gimple_operand_gsi (&gsi, stored_val, true, NULL_TREE,
8849 true, GSI_SAME_STMT);
8850 gcall *call = gimple_build_call (decl, 3, addr, stored_val, mo);
8851 gimple_set_location (g: call, location: loc);
8852 gimple_set_vuse (g: call, vuse: gimple_vuse (g: stmt));
8853 gimple_set_vdef (g: call, vdef: gimple_vdef (g: stmt));
8854
8855 gimple *repl = call;
8856 if (exchange)
8857 {
8858 if (!useless_type_conversion_p (type, itype))
8859 {
8860 tree lhs = make_ssa_name (var: itype);
8861 gimple_call_set_lhs (gs: call, lhs);
8862 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8863 repl = gimple_build_assign (loaded_val,
8864 build1 (VIEW_CONVERT_EXPR, type, lhs));
8865 gimple_set_location (g: repl, location: loc);
8866 }
8867 else
8868 gimple_call_set_lhs (gs: call, lhs: loaded_val);
8869 }
8870 gsi_replace (&gsi, repl, true);
8871
8872 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8873 gsi = gsi_last_nondebug_bb (bb: load_bb);
8874 gsi_remove (&gsi, true);
8875
8876 return true;
8877}
8878
8879/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8880 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8881 size of the data type, and thus usable to find the index of the builtin
8882 decl. Returns false if the expression is not of the proper form. */
8883
8884static bool
8885expand_omp_atomic_fetch_op (basic_block load_bb,
8886 tree addr, tree loaded_val,
8887 tree stored_val, int index)
8888{
8889 enum built_in_function oldbase, newbase, tmpbase;
8890 tree decl, itype, call;
8891 tree lhs, rhs;
8892 basic_block store_bb = single_succ (bb: load_bb);
8893 gimple_stmt_iterator gsi;
8894 gimple *stmt;
8895 location_t loc;
8896 enum tree_code code;
8897 bool need_old, need_new;
8898 machine_mode imode;
8899
8900 /* We expect to find the following sequences:
8901
8902 load_bb:
8903 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8904
8905 store_bb:
8906 val = tmp OP something; (or: something OP tmp)
8907 GIMPLE_OMP_STORE (val)
8908
8909 ???FIXME: Allow a more flexible sequence.
8910 Perhaps use data flow to pick the statements.
8911
8912 */
8913
8914 gsi = gsi_after_labels (bb: store_bb);
8915 stmt = gsi_stmt (i: gsi);
8916 if (is_gimple_debug (gs: stmt))
8917 {
8918 gsi_next_nondebug (i: &gsi);
8919 if (gsi_end_p (i: gsi))
8920 return false;
8921 stmt = gsi_stmt (i: gsi);
8922 }
8923 loc = gimple_location (g: stmt);
8924 if (!is_gimple_assign (gs: stmt))
8925 return false;
8926 gsi_next_nondebug (i: &gsi);
8927 if (gimple_code (g: gsi_stmt (i: gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8928 return false;
8929 need_new = gimple_omp_atomic_need_value_p (g: gsi_stmt (i: gsi));
8930 need_old = gimple_omp_atomic_need_value_p (g: last_nondebug_stmt (load_bb));
8931 enum omp_memory_order omo
8932 = gimple_omp_atomic_memory_order (g: last_nondebug_stmt (load_bb));
8933 enum memmodel mo = omp_memory_order_to_memmodel (mo: omo);
8934 gcc_checking_assert (!need_old || !need_new);
8935
8936 if (!operand_equal_p (gimple_assign_lhs (gs: stmt), stored_val, flags: 0))
8937 return false;
8938
8939 /* Check for one of the supported fetch-op operations. */
8940 code = gimple_assign_rhs_code (gs: stmt);
8941 switch (code)
8942 {
8943 case PLUS_EXPR:
8944 case POINTER_PLUS_EXPR:
8945 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8946 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8947 break;
8948 case MINUS_EXPR:
8949 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8950 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8951 break;
8952 case BIT_AND_EXPR:
8953 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8954 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8955 break;
8956 case BIT_IOR_EXPR:
8957 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8958 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8959 break;
8960 case BIT_XOR_EXPR:
8961 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8962 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8963 break;
8964 default:
8965 return false;
8966 }
8967
8968 /* Make sure the expression is of the proper form. */
8969 if (operand_equal_p (gimple_assign_rhs1 (gs: stmt), loaded_val, flags: 0))
8970 rhs = gimple_assign_rhs2 (gs: stmt);
8971 else if (commutative_tree_code (gimple_assign_rhs_code (gs: stmt))
8972 && operand_equal_p (gimple_assign_rhs2 (gs: stmt), loaded_val, flags: 0))
8973 rhs = gimple_assign_rhs1 (gs: stmt);
8974 else
8975 return false;
8976
8977 tmpbase = ((enum built_in_function)
8978 ((need_new ? newbase : oldbase) + index + 1));
8979 decl = builtin_decl_explicit (fncode: tmpbase);
8980 if (decl == NULL_TREE)
8981 return false;
8982 itype = TREE_TYPE (TREE_TYPE (decl));
8983 imode = TYPE_MODE (itype);
8984
8985 /* We could test all of the various optabs involved, but the fact of the
8986 matter is that (with the exception of i486 vs i586 and xadd) all targets
8987 that support any atomic operaton optab also implements compare-and-swap.
8988 Let optabs.cc take care of expanding any compare-and-swap loop. */
8989 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8990 return false;
8991
8992 gsi = gsi_last_nondebug_bb (bb: load_bb);
8993 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8994
8995 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8996 It only requires that the operation happen atomically. Thus we can
8997 use the RELAXED memory model. */
8998 call = build_call_expr_loc (loc, decl, 3, addr,
8999 fold_convert_loc (loc, itype, rhs),
9000 build_int_cst (NULL, mo));
9001
9002 if (need_old || need_new)
9003 {
9004 lhs = need_old ? loaded_val : stored_val;
9005 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
9006 call = build2_loc (loc, code: MODIFY_EXPR, void_type_node, arg0: lhs, arg1: call);
9007 }
9008 else
9009 call = fold_convert_loc (loc, void_type_node, call);
9010 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
9011 gsi_remove (&gsi, true);
9012
9013 gsi = gsi_last_nondebug_bb (bb: store_bb);
9014 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
9015 gsi_remove (&gsi, true);
9016 gsi = gsi_last_nondebug_bb (bb: store_bb);
9017 stmt = gsi_stmt (i: gsi);
9018 gsi_remove (&gsi, true);
9019
9020 if (gimple_in_ssa_p (cfun))
9021 release_defs (stmt);
9022
9023 return true;
9024}
9025
9026/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
9027 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
9028 Returns false if the expression is not of the proper form. */
9029
9030static bool
9031expand_omp_atomic_cas (basic_block load_bb, tree addr,
9032 tree loaded_val, tree stored_val, int index)
9033{
9034 /* We expect to find the following sequences:
9035
9036 load_bb:
9037 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
9038
9039 store_bb:
9040 val = tmp == e ? d : tmp;
9041 GIMPLE_OMP_ATOMIC_STORE (val)
9042
9043 or in store_bb instead:
9044 tmp2 = tmp == e;
9045 val = tmp2 ? d : tmp;
9046 GIMPLE_OMP_ATOMIC_STORE (val)
9047
9048 or:
9049 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
9050 val = e == tmp3 ? d : tmp;
9051 GIMPLE_OMP_ATOMIC_STORE (val)
9052
9053 etc. */
9054
9055
9056 basic_block store_bb = single_succ (bb: load_bb);
9057 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb: store_bb);
9058 gimple *store_stmt = gsi_stmt (i: gsi);
9059 if (!store_stmt || gimple_code (g: store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
9060 return false;
9061 gsi_prev_nondebug (i: &gsi);
9062 if (gsi_end_p (i: gsi))
9063 return false;
9064 gimple *condexpr_stmt = gsi_stmt (i: gsi);
9065 if (!is_gimple_assign (gs: condexpr_stmt)
9066 || gimple_assign_rhs_code (gs: condexpr_stmt) != COND_EXPR)
9067 return false;
9068 if (!operand_equal_p (gimple_assign_lhs (gs: condexpr_stmt), stored_val, flags: 0))
9069 return false;
9070 gimple *cond_stmt = NULL;
9071 gimple *vce_stmt = NULL;
9072 gsi_prev_nondebug (i: &gsi);
9073 if (!gsi_end_p (i: gsi))
9074 {
9075 cond_stmt = gsi_stmt (i: gsi);
9076 if (!is_gimple_assign (gs: cond_stmt))
9077 return false;
9078 if (gimple_assign_rhs_code (gs: cond_stmt) == EQ_EXPR)
9079 {
9080 gsi_prev_nondebug (i: &gsi);
9081 if (!gsi_end_p (i: gsi))
9082 {
9083 vce_stmt = gsi_stmt (i: gsi);
9084 if (!is_gimple_assign (gs: vce_stmt)
9085 || gimple_assign_rhs_code (gs: vce_stmt) != VIEW_CONVERT_EXPR)
9086 return false;
9087 }
9088 }
9089 else if (gimple_assign_rhs_code (gs: cond_stmt) == VIEW_CONVERT_EXPR)
9090 std::swap (a&: vce_stmt, b&: cond_stmt);
9091 else
9092 return false;
9093 if (vce_stmt)
9094 {
9095 tree vce_rhs = gimple_assign_rhs1 (gs: vce_stmt);
9096 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
9097 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
9098 return false;
9099 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
9100 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
9101 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
9102 TYPE_SIZE (TREE_TYPE (loaded_val))))
9103 return false;
9104 gsi_prev_nondebug (i: &gsi);
9105 if (!gsi_end_p (i: gsi))
9106 return false;
9107 }
9108 }
9109 tree cond = gimple_assign_rhs1 (gs: condexpr_stmt);
9110 tree cond_op1, cond_op2;
9111 if (cond_stmt)
9112 {
9113 /* We should now always get a separate cond_stmt. */
9114 if (!operand_equal_p (cond, gimple_assign_lhs (gs: cond_stmt)))
9115 return false;
9116 cond_op1 = gimple_assign_rhs1 (gs: cond_stmt);
9117 cond_op2 = gimple_assign_rhs2 (gs: cond_stmt);
9118 }
9119 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9120 return false;
9121 else
9122 {
9123 cond_op1 = TREE_OPERAND (cond, 0);
9124 cond_op2 = TREE_OPERAND (cond, 1);
9125 }
9126 tree d;
9127 if (TREE_CODE (cond) == NE_EXPR)
9128 {
9129 if (!operand_equal_p (gimple_assign_rhs2 (gs: condexpr_stmt), loaded_val))
9130 return false;
9131 d = gimple_assign_rhs3 (gs: condexpr_stmt);
9132 }
9133 else if (!operand_equal_p (gimple_assign_rhs3 (gs: condexpr_stmt), loaded_val))
9134 return false;
9135 else
9136 d = gimple_assign_rhs2 (gs: condexpr_stmt);
9137 tree e = vce_stmt ? gimple_assign_lhs (gs: vce_stmt) : loaded_val;
9138 if (operand_equal_p (e, cond_op1))
9139 e = cond_op2;
9140 else if (operand_equal_p (e, cond_op2))
9141 e = cond_op1;
9142 else
9143 return false;
9144
9145 location_t loc = gimple_location (g: store_stmt);
9146 gimple *load_stmt = last_nondebug_stmt (load_bb);
9147 bool need_new = gimple_omp_atomic_need_value_p (g: store_stmt);
9148 bool need_old = gimple_omp_atomic_need_value_p (g: load_stmt);
9149 bool weak = gimple_omp_atomic_weak_p (g: load_stmt);
9150 enum omp_memory_order omo = gimple_omp_atomic_memory_order (g: load_stmt);
9151 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (mo: omo));
9152 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (mo: omo));
9153 gcc_checking_assert (!need_old || !need_new);
9154
9155 enum built_in_function fncode
9156 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9157 + index + 1);
9158 tree cmpxchg = builtin_decl_explicit (fncode);
9159 if (cmpxchg == NULL_TREE)
9160 return false;
9161 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9162
9163 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9164 || !can_atomic_load_p (TYPE_MODE (itype)))
9165 return false;
9166
9167 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9168 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9169 return false;
9170
9171 gsi = gsi_for_stmt (store_stmt);
9172 if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9173 {
9174 tree ne = create_tmp_reg (itype);
9175 gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9176 gimple_set_location (g, location: loc);
9177 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9178 e = ne;
9179 }
9180 if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9181 {
9182 tree nd = create_tmp_reg (itype);
9183 enum tree_code code;
9184 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9185 {
9186 code = VIEW_CONVERT_EXPR;
9187 d = build1 (VIEW_CONVERT_EXPR, itype, d);
9188 }
9189 else
9190 code = NOP_EXPR;
9191 gimple *g = gimple_build_assign (nd, code, d);
9192 gimple_set_location (g, location: loc);
9193 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9194 d = nd;
9195 }
9196
9197 tree ctype = build_complex_type (itype);
9198 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9199 gimple *g
9200 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9201 build_int_cst (integer_type_node, flag),
9202 mo, fmo);
9203 tree cres = create_tmp_reg (ctype);
9204 gimple_call_set_lhs (gs: g, lhs: cres);
9205 gimple_set_location (g, location: loc);
9206 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9207
9208 if (cond_stmt || need_old || need_new)
9209 {
9210 tree im = create_tmp_reg (itype);
9211 g = gimple_build_assign (im, IMAGPART_EXPR,
9212 build1 (IMAGPART_EXPR, itype, cres));
9213 gimple_set_location (g, location: loc);
9214 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9215
9216 tree re = NULL_TREE;
9217 if (need_old || need_new)
9218 {
9219 re = create_tmp_reg (itype);
9220 g = gimple_build_assign (re, REALPART_EXPR,
9221 build1 (REALPART_EXPR, itype, cres));
9222 gimple_set_location (g, location: loc);
9223 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9224 }
9225
9226 if (cond_stmt)
9227 {
9228 g = gimple_build_assign (cond, NOP_EXPR, im);
9229 gimple_set_location (g, location: loc);
9230 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9231 }
9232
9233 if (need_new)
9234 {
9235 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9236 cond_stmt
9237 ? cond : build2 (NE_EXPR, boolean_type_node,
9238 im, build_zero_cst (itype)),
9239 d, re);
9240 gimple_set_location (g, location: loc);
9241 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9242 re = gimple_assign_lhs (gs: g);
9243 }
9244
9245 if (need_old || need_new)
9246 {
9247 tree v = need_old ? loaded_val : stored_val;
9248 enum tree_code code;
9249 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9250 {
9251 code = VIEW_CONVERT_EXPR;
9252 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9253 }
9254 else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9255 code = NOP_EXPR;
9256 else
9257 code = TREE_CODE (re);
9258 g = gimple_build_assign (v, code, re);
9259 gimple_set_location (g, location: loc);
9260 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9261 }
9262 }
9263
9264 gsi_remove (&gsi, true);
9265 gsi = gsi_for_stmt (load_stmt);
9266 gsi_remove (&gsi, true);
9267 gsi = gsi_for_stmt (condexpr_stmt);
9268 gsi_remove (&gsi, true);
9269 if (cond_stmt)
9270 {
9271 gsi = gsi_for_stmt (cond_stmt);
9272 gsi_remove (&gsi, true);
9273 }
9274 if (vce_stmt)
9275 {
9276 gsi = gsi_for_stmt (vce_stmt);
9277 gsi_remove (&gsi, true);
9278 }
9279
9280 return true;
9281}
9282
9283/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9284
9285 oldval = *addr;
9286 repeat:
9287 newval = rhs; // with oldval replacing *addr in rhs
9288 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9289 if (oldval != newval)
9290 goto repeat;
9291
9292 INDEX is log2 of the size of the data type, and thus usable to find the
9293 index of the builtin decl. */
9294
9295static bool
9296expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9297 tree addr, tree loaded_val, tree stored_val,
9298 int index)
9299{
9300 tree loadedi, storedi, initial, new_storedi, old_vali;
9301 tree type, itype, cmpxchg, iaddr, atype;
9302 gimple_stmt_iterator si;
9303 basic_block loop_header = single_succ (bb: load_bb);
9304 gimple *phi, *stmt;
9305 edge e;
9306 enum built_in_function fncode;
9307
9308 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9309 + index + 1);
9310 cmpxchg = builtin_decl_explicit (fncode);
9311 if (cmpxchg == NULL_TREE)
9312 return false;
9313 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9314 atype = type;
9315 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9316
9317 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9318 || !can_atomic_load_p (TYPE_MODE (itype)))
9319 return false;
9320
9321 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9322 si = gsi_last_nondebug_bb (bb: load_bb);
9323 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9324 location_t loc = gimple_location (g: gsi_stmt (i: si));
9325 enum omp_memory_order omo = gimple_omp_atomic_memory_order (g: gsi_stmt (i: si));
9326 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (mo: omo));
9327 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (mo: omo));
9328
9329 /* For floating-point values, we'll need to view-convert them to integers
9330 so that we can perform the atomic compare and swap. Simplify the
9331 following code by always setting up the "i"ntegral variables. */
9332 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9333 {
9334 tree iaddr_val;
9335
9336 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9337 true));
9338 atype = itype;
9339 iaddr_val
9340 = force_gimple_operand_gsi (&si,
9341 fold_convert (TREE_TYPE (iaddr), addr),
9342 false, NULL_TREE, true, GSI_SAME_STMT);
9343 stmt = gimple_build_assign (iaddr, iaddr_val);
9344 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9345 loadedi = create_tmp_var (itype);
9346 if (gimple_in_ssa_p (cfun))
9347 loadedi = make_ssa_name (var: loadedi);
9348 }
9349 else
9350 {
9351 iaddr = addr;
9352 loadedi = loaded_val;
9353 }
9354
9355 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9356 tree loaddecl = builtin_decl_explicit (fncode);
9357 if (loaddecl)
9358 initial
9359 = fold_convert (atype,
9360 build_call_expr (loaddecl, 2, iaddr,
9361 build_int_cst (NULL_TREE,
9362 MEMMODEL_RELAXED)));
9363 else
9364 {
9365 tree off
9366 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9367 true), 0);
9368 initial = build2 (MEM_REF, atype, iaddr, off);
9369 }
9370
9371 initial
9372 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9373 GSI_SAME_STMT);
9374
9375 /* Move the value to the LOADEDI temporary. */
9376 if (gimple_in_ssa_p (cfun))
9377 {
9378 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9379 phi = create_phi_node (loadedi, loop_header);
9380 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9381 initial);
9382 }
9383 else
9384 gsi_insert_before (&si,
9385 gimple_build_assign (loadedi, initial),
9386 GSI_SAME_STMT);
9387 if (loadedi != loaded_val)
9388 {
9389 gimple_stmt_iterator gsi2;
9390 tree x;
9391
9392 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9393 gsi2 = gsi_start_bb (bb: loop_header);
9394 if (gimple_in_ssa_p (cfun))
9395 {
9396 gassign *stmt;
9397 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9398 true, GSI_SAME_STMT);
9399 stmt = gimple_build_assign (loaded_val, x);
9400 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9401 }
9402 else
9403 {
9404 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9405 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9406 true, GSI_SAME_STMT);
9407 }
9408 }
9409 gsi_remove (&si, true);
9410
9411 si = gsi_last_nondebug_bb (bb: store_bb);
9412 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9413
9414 if (iaddr == addr)
9415 storedi = stored_val;
9416 else
9417 storedi
9418 = force_gimple_operand_gsi (&si,
9419 build1 (VIEW_CONVERT_EXPR, itype,
9420 stored_val), true, NULL_TREE, true,
9421 GSI_SAME_STMT);
9422
9423 /* Build the compare&swap statement. */
9424 tree ctype = build_complex_type (itype);
9425 int flag = int_size_in_bytes (itype);
9426 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9427 ctype, 6, iaddr, loadedi,
9428 storedi,
9429 build_int_cst (integer_type_node,
9430 flag),
9431 mo, fmo);
9432 new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9433 new_storedi = force_gimple_operand_gsi (&si,
9434 fold_convert (TREE_TYPE (loadedi),
9435 new_storedi),
9436 true, NULL_TREE,
9437 true, GSI_SAME_STMT);
9438
9439 if (gimple_in_ssa_p (cfun))
9440 old_vali = loadedi;
9441 else
9442 {
9443 old_vali = create_tmp_var (TREE_TYPE (loadedi));
9444 stmt = gimple_build_assign (old_vali, loadedi);
9445 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9446
9447 stmt = gimple_build_assign (loadedi, new_storedi);
9448 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9449 }
9450
9451 /* Note that we always perform the comparison as an integer, even for
9452 floating point. This allows the atomic operation to properly
9453 succeed even with NaNs and -0.0. */
9454 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9455 stmt = gimple_build_cond_empty (cond: ne);
9456 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9457
9458 /* Update cfg. */
9459 e = single_succ_edge (bb: store_bb);
9460 e->flags &= ~EDGE_FALLTHRU;
9461 e->flags |= EDGE_FALSE_VALUE;
9462 /* Expect no looping. */
9463 e->probability = profile_probability::guessed_always ();
9464
9465 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9466 e->probability = profile_probability::guessed_never ();
9467
9468 /* Copy the new value to loadedi (we already did that before the condition
9469 if we are not in SSA). */
9470 if (gimple_in_ssa_p (cfun))
9471 {
9472 phi = gimple_seq_first_stmt (s: phi_nodes (bb: loop_header));
9473 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9474 }
9475
9476 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9477 stmt = gsi_stmt (i: si);
9478 gsi_remove (&si, true);
9479 if (gimple_in_ssa_p (cfun))
9480 release_defs (stmt);
9481
9482 class loop *loop = alloc_loop ();
9483 loop->header = loop_header;
9484 loop->latch = store_bb;
9485 add_loop (loop, loop_header->loop_father);
9486
9487 return true;
9488}
9489
9490/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9491
9492 GOMP_atomic_start ();
9493 *addr = rhs;
9494 GOMP_atomic_end ();
9495
9496 The result is not globally atomic, but works so long as all parallel
9497 references are within #pragma omp atomic directives. According to
9498 responses received from omp@openmp.org, appears to be within spec.
9499 Which makes sense, since that's how several other compilers handle
9500 this situation as well.
9501 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9502 expanding. STORED_VAL is the operand of the matching
9503 GIMPLE_OMP_ATOMIC_STORE.
9504
9505 We replace
9506 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9507 loaded_val = *addr;
9508
9509 and replace
9510 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9511 *addr = stored_val;
9512*/
9513
9514static bool
9515expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9516 tree addr, tree loaded_val, tree stored_val)
9517{
9518 gimple_stmt_iterator si;
9519 gassign *stmt;
9520 tree t;
9521
9522 si = gsi_last_nondebug_bb (bb: load_bb);
9523 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9524
9525 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_ATOMIC_START);
9526 t = build_call_expr (t, 0);
9527 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9528
9529 tree mem = build_simple_mem_ref (addr);
9530 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9531 TREE_OPERAND (mem, 1)
9532 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9533 true),
9534 TREE_OPERAND (mem, 1));
9535 stmt = gimple_build_assign (loaded_val, mem);
9536 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9537 gsi_remove (&si, true);
9538
9539 si = gsi_last_nondebug_bb (bb: store_bb);
9540 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9541
9542 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9543 gimple_set_vuse (g: stmt, vuse: gimple_vuse (g: gsi_stmt (i: si)));
9544 gimple_set_vdef (g: stmt, vdef: gimple_vdef (g: gsi_stmt (i: si)));
9545 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9546
9547 t = builtin_decl_explicit (fncode: BUILT_IN_GOMP_ATOMIC_END);
9548 t = build_call_expr (t, 0);
9549 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9550 gsi_remove (&si, true);
9551 return true;
9552}
9553
9554/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9555 using expand_omp_atomic_fetch_op. If it failed, we try to
9556 call expand_omp_atomic_pipeline, and if it fails too, the
9557 ultimate fallback is wrapping the operation in a mutex
9558 (expand_omp_atomic_mutex). REGION is the atomic region built
9559 by build_omp_regions_1(). */
9560
9561static void
9562expand_omp_atomic (struct omp_region *region)
9563{
9564 basic_block load_bb = region->entry, store_bb = region->exit;
9565 gomp_atomic_load *load
9566 = as_a <gomp_atomic_load *> (p: last_nondebug_stmt (load_bb));
9567 gomp_atomic_store *store
9568 = as_a <gomp_atomic_store *> (p: last_nondebug_stmt (store_bb));
9569 tree loaded_val = gimple_omp_atomic_load_lhs (load_stmt: load);
9570 tree addr = gimple_omp_atomic_load_rhs (load_stmt: load);
9571 tree stored_val = gimple_omp_atomic_store_val (store_stmt: store);
9572 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9573 HOST_WIDE_INT index;
9574
9575 /* Make sure the type is one of the supported sizes. */
9576 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9577 index = exact_log2 (x: index);
9578 if (index >= 0 && index <= 4)
9579 {
9580 unsigned int align = TYPE_ALIGN_UNIT (type);
9581
9582 /* __sync builtins require strict data alignment. */
9583 if (exact_log2 (x: align) >= index)
9584 {
9585 /* Atomic load. */
9586 scalar_mode smode;
9587 if (loaded_val == stored_val
9588 && (is_int_mode (TYPE_MODE (type), int_mode: &smode)
9589 || is_float_mode (TYPE_MODE (type), float_mode: &smode))
9590 && GET_MODE_BITSIZE (mode: smode) <= BITS_PER_WORD
9591 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9592 return;
9593
9594 /* Atomic store. */
9595 if ((is_int_mode (TYPE_MODE (type), int_mode: &smode)
9596 || is_float_mode (TYPE_MODE (type), float_mode: &smode))
9597 && GET_MODE_BITSIZE (mode: smode) <= BITS_PER_WORD
9598 && store_bb == single_succ (bb: load_bb)
9599 && first_stmt (store_bb) == store
9600 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9601 stored_val, index))
9602 return;
9603
9604 /* When possible, use specialized atomic update functions. */
9605 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9606 && store_bb == single_succ (bb: load_bb)
9607 && expand_omp_atomic_fetch_op (load_bb, addr,
9608 loaded_val, stored_val, index))
9609 return;
9610
9611 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9612 if (store_bb == single_succ (bb: load_bb)
9613 && !gimple_in_ssa_p (cfun)
9614 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9615 index))
9616 return;
9617
9618 /* If we don't have specialized __sync builtins, try and implement
9619 as a compare and swap loop. */
9620 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9621 loaded_val, stored_val, index))
9622 return;
9623 }
9624 }
9625
9626 /* The ultimate fallback is wrapping the operation in a mutex. */
9627 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9628}
9629
9630/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9631 at REGION_EXIT. */
9632
9633static void
9634mark_loops_in_oacc_kernels_region (basic_block region_entry,
9635 basic_block region_exit)
9636{
9637 class loop *outer = region_entry->loop_father;
9638 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9639
9640 /* Don't parallelize the kernels region if it contains more than one outer
9641 loop. */
9642 unsigned int nr_outer_loops = 0;
9643 class loop *single_outer = NULL;
9644 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9645 {
9646 gcc_assert (loop_outer (loop) == outer);
9647
9648 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9649 continue;
9650
9651 if (region_exit != NULL
9652 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9653 continue;
9654
9655 nr_outer_loops++;
9656 single_outer = loop;
9657 }
9658 if (nr_outer_loops != 1)
9659 return;
9660
9661 for (class loop *loop = single_outer->inner;
9662 loop != NULL;
9663 loop = loop->inner)
9664 if (loop->next)
9665 return;
9666
9667 /* Mark the loops in the region. */
9668 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9669 loop->in_oacc_kernels_region = true;
9670}
9671
9672/* Build target argument identifier from the DEVICE identifier, value
9673 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9674
9675static tree
9676get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9677{
9678 tree t = build_int_cst (integer_type_node, device);
9679 if (subseqent_param)
9680 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9681 build_int_cst (integer_type_node,
9682 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9683 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9684 build_int_cst (integer_type_node, id));
9685 return t;
9686}
9687
9688/* Like above but return it in type that can be directly stored as an element
9689 of the argument array. */
9690
9691static tree
9692get_target_argument_identifier (int device, bool subseqent_param, int id)
9693{
9694 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9695 return fold_convert (ptr_type_node, t);
9696}
9697
9698/* Return a target argument consisting of DEVICE identifier, value identifier
9699 ID, and the actual VALUE. */
9700
9701static tree
9702get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9703 tree value)
9704{
9705 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9706 fold_convert (integer_type_node, value),
9707 build_int_cst (unsigned_type_node,
9708 GOMP_TARGET_ARG_VALUE_SHIFT));
9709 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9710 get_target_argument_identifier_1 (device, false, id));
9711 t = fold_convert (ptr_type_node, t);
9712 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9713}
9714
9715/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9716 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9717 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9718 arguments. */
9719
9720static void
9721push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9722 int id, tree value, vec <tree> *args)
9723{
9724 if (tree_fits_shwi_p (value)
9725 && tree_to_shwi (value) > -(1 << 15)
9726 && tree_to_shwi (value) < (1 << 15))
9727 args->quick_push (obj: get_target_argument_value (gsi, device, id, value));
9728 else
9729 {
9730 args->quick_push (obj: get_target_argument_identifier (device, subseqent_param: true, id));
9731 value = fold_convert (ptr_type_node, value);
9732 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9733 GSI_SAME_STMT);
9734 args->quick_push (obj: value);
9735 }
9736}
9737
9738/* Create an array of arguments that is then passed to GOMP_target. */
9739
9740static tree
9741get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9742{
9743 auto_vec <tree, 6> args;
9744 tree clauses = gimple_omp_target_clauses (gs: tgt_stmt);
9745 tree t, c = omp_find_clause (clauses, kind: OMP_CLAUSE_NUM_TEAMS);
9746 if (c)
9747 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9748 else
9749 t = integer_minus_one_node;
9750 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9751 GOMP_TARGET_ARG_NUM_TEAMS, value: t, args: &args);
9752
9753 c = omp_find_clause (clauses, kind: OMP_CLAUSE_THREAD_LIMIT);
9754 if (c)
9755 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9756 else
9757 t = integer_minus_one_node;
9758 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9759 GOMP_TARGET_ARG_THREAD_LIMIT, value: t,
9760 args: &args);
9761
9762 /* Produce more, perhaps device specific, arguments here. */
9763
9764 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9765 args.length () + 1),
9766 ".omp_target_args");
9767 for (unsigned i = 0; i < args.length (); i++)
9768 {
9769 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9770 build_int_cst (integer_type_node, i),
9771 NULL_TREE, NULL_TREE);
9772 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9773 GSI_SAME_STMT);
9774 }
9775 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9776 build_int_cst (integer_type_node, args.length ()),
9777 NULL_TREE, NULL_TREE);
9778 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9779 GSI_SAME_STMT);
9780 TREE_ADDRESSABLE (argarray) = 1;
9781 return build_fold_addr_expr (argarray);
9782}
9783
9784/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9785
9786static void
9787expand_omp_target (struct omp_region *region)
9788{
9789 basic_block entry_bb, exit_bb, new_bb;
9790 struct function *child_cfun;
9791 tree child_fn, child_fn2, block, t, c;
9792 gimple_stmt_iterator gsi;
9793 gomp_target *entry_stmt;
9794 gimple *stmt;
9795 edge e;
9796 bool offloaded;
9797 int target_kind;
9798
9799 entry_stmt = as_a <gomp_target *> (p: last_nondebug_stmt (region->entry));
9800 target_kind = gimple_omp_target_kind (g: entry_stmt);
9801 new_bb = region->entry;
9802
9803 offloaded = is_gimple_omp_offloaded (stmt: entry_stmt);
9804 switch (target_kind)
9805 {
9806 case GF_OMP_TARGET_KIND_REGION:
9807 case GF_OMP_TARGET_KIND_UPDATE:
9808 case GF_OMP_TARGET_KIND_ENTER_DATA:
9809 case GF_OMP_TARGET_KIND_EXIT_DATA:
9810 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9811 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9812 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9813 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9814 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9815 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9816 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9817 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9818 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9819 case GF_OMP_TARGET_KIND_DATA:
9820 case GF_OMP_TARGET_KIND_OACC_DATA:
9821 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9822 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9823 break;
9824 default:
9825 gcc_unreachable ();
9826 }
9827
9828 tree clauses = gimple_omp_target_clauses (gs: entry_stmt);
9829
9830 bool is_ancestor = false;
9831 child_fn = child_fn2 = NULL_TREE;
9832 child_cfun = NULL;
9833 if (offloaded)
9834 {
9835 c = omp_find_clause (clauses, kind: OMP_CLAUSE_DEVICE);
9836 if (ENABLE_OFFLOADING && c)
9837 is_ancestor = OMP_CLAUSE_DEVICE_ANCESTOR (c);
9838 child_fn = gimple_omp_target_child_fn (omp_target_stmt: entry_stmt);
9839 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9840 }
9841
9842 /* Supported by expand_omp_taskreg, but not here. */
9843 if (child_cfun != NULL)
9844 gcc_checking_assert (!child_cfun->cfg);
9845 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9846
9847 entry_bb = region->entry;
9848 exit_bb = region->exit;
9849
9850 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9851 mark_loops_in_oacc_kernels_region (region_entry: region->entry, region_exit: region->exit);
9852
9853 /* Going on, all OpenACC compute constructs are mapped to
9854 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9855 To distinguish between them, we attach attributes. */
9856 switch (target_kind)
9857 {
9858 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9859 DECL_ATTRIBUTES (child_fn)
9860 = tree_cons (get_identifier ("oacc parallel"),
9861 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9862 break;
9863 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9864 DECL_ATTRIBUTES (child_fn)
9865 = tree_cons (get_identifier ("oacc kernels"),
9866 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9867 break;
9868 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9869 DECL_ATTRIBUTES (child_fn)
9870 = tree_cons (get_identifier ("oacc serial"),
9871 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9872 break;
9873 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9874 DECL_ATTRIBUTES (child_fn)
9875 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9876 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9877 break;
9878 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9879 DECL_ATTRIBUTES (child_fn)
9880 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9881 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9882 break;
9883 default:
9884 /* Make sure we don't miss any. */
9885 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9886 && is_gimple_omp_offloaded (entry_stmt)));
9887 break;
9888 }
9889
9890 if (offloaded)
9891 {
9892 unsigned srcidx, dstidx, num;
9893
9894 /* If the offloading region needs data sent from the parent
9895 function, then the very first statement (except possible
9896 tree profile counter updates) of the offloading body
9897 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9898 &.OMP_DATA_O is passed as an argument to the child function,
9899 we need to replace it with the argument as seen by the child
9900 function.
9901
9902 In most cases, this will end up being the identity assignment
9903 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9904 a function call that has been inlined, the original PARM_DECL
9905 .OMP_DATA_I may have been converted into a different local
9906 variable. In which case, we need to keep the assignment. */
9907 tree data_arg = gimple_omp_target_data_arg (omp_target_stmt: entry_stmt);
9908 if (data_arg)
9909 {
9910 basic_block entry_succ_bb = single_succ (bb: entry_bb);
9911 gimple_stmt_iterator gsi;
9912 tree arg;
9913 gimple *tgtcopy_stmt = NULL;
9914 tree sender = TREE_VEC_ELT (data_arg, 0);
9915
9916 for (gsi = gsi_start_bb (bb: entry_succ_bb); ; gsi_next (i: &gsi))
9917 {
9918 gcc_assert (!gsi_end_p (gsi));
9919 stmt = gsi_stmt (i: gsi);
9920 if (gimple_code (g: stmt) != GIMPLE_ASSIGN)
9921 continue;
9922
9923 if (gimple_num_ops (gs: stmt) == 2)
9924 {
9925 tree arg = gimple_assign_rhs1 (gs: stmt);
9926
9927 /* We're ignoring the subcode because we're
9928 effectively doing a STRIP_NOPS. */
9929
9930 if (TREE_CODE (arg) == ADDR_EXPR
9931 && TREE_OPERAND (arg, 0) == sender)
9932 {
9933 tgtcopy_stmt = stmt;
9934 break;
9935 }
9936 }
9937 }
9938
9939 gcc_assert (tgtcopy_stmt != NULL);
9940 arg = DECL_ARGUMENTS (child_fn);
9941
9942 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9943 gsi_remove (&gsi, true);
9944 }
9945
9946 /* Declare local variables needed in CHILD_CFUN. */
9947 block = DECL_INITIAL (child_fn);
9948 BLOCK_VARS (block) = vec2chain (v: child_cfun->local_decls);
9949 /* The gimplifier could record temporaries in the offloading block
9950 rather than in containing function's local_decls chain,
9951 which would mean cgraph missed finalizing them. Do it now. */
9952 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9953 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9954 varpool_node::finalize_decl (decl: t);
9955 DECL_SAVED_TREE (child_fn) = NULL;
9956 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9957 gimple_set_body (child_fn, NULL);
9958 TREE_USED (block) = 1;
9959
9960 /* Reset DECL_CONTEXT on function arguments. */
9961 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9962 DECL_CONTEXT (t) = child_fn;
9963
9964 /* Split ENTRY_BB at GIMPLE_*,
9965 so that it can be moved to the child function. */
9966 gsi = gsi_last_nondebug_bb (bb: entry_bb);
9967 stmt = gsi_stmt (i: gsi);
9968 gcc_assert (stmt
9969 && gimple_code (stmt) == gimple_code (entry_stmt));
9970 e = split_block (entry_bb, stmt);
9971 gsi_remove (&gsi, true);
9972 entry_bb = e->dest;
9973 single_succ_edge (bb: entry_bb)->flags = EDGE_FALLTHRU;
9974
9975 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9976 if (exit_bb)
9977 {
9978 gsi = gsi_last_nondebug_bb (bb: exit_bb);
9979 gcc_assert (!gsi_end_p (gsi)
9980 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9981 stmt = gimple_build_return (NULL);
9982 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9983 gsi_remove (&gsi, true);
9984 }
9985
9986 /* Move the offloading region into CHILD_CFUN. */
9987
9988 block = gimple_block (g: entry_stmt);
9989
9990 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9991 if (exit_bb)
9992 single_succ_edge (bb: new_bb)->flags = EDGE_FALLTHRU;
9993 /* When the OMP expansion process cannot guarantee an up-to-date
9994 loop tree arrange for the child function to fixup loops. */
9995 if (loops_state_satisfies_p (flags: LOOPS_NEED_FIXUP))
9996 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9997
9998 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9999 num = vec_safe_length (v: child_cfun->local_decls);
10000 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
10001 {
10002 t = (*child_cfun->local_decls)[srcidx];
10003 if (DECL_CONTEXT (t) == cfun->decl)
10004 continue;
10005 if (srcidx != dstidx)
10006 (*child_cfun->local_decls)[dstidx] = t;
10007 dstidx++;
10008 }
10009 if (dstidx != num)
10010 vec_safe_truncate (v: child_cfun->local_decls, size: dstidx);
10011
10012 /* Inform the callgraph about the new function. */
10013 child_cfun->curr_properties = cfun->curr_properties;
10014 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
10015 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
10016 cgraph_node *node = cgraph_node::get_create (child_fn);
10017 node->parallelized_function = 1;
10018 cgraph_node::add_new_function (fndecl: child_fn, lowered: true);
10019
10020 /* Add the new function to the offload table. */
10021 if (ENABLE_OFFLOADING)
10022 {
10023 if (in_lto_p)
10024 DECL_PRESERVE_P (child_fn) = 1;
10025 if (!is_ancestor)
10026 vec_safe_push (v&: offload_funcs, obj: child_fn);
10027 }
10028
10029 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
10030 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
10031
10032 /* Fix the callgraph edges for child_cfun. Those for cfun will be
10033 fixed in a following pass. */
10034 push_cfun (new_cfun: child_cfun);
10035 if (need_asm)
10036 assign_assembler_name_if_needed (child_fn);
10037 cgraph_edge::rebuild_edges ();
10038
10039 /* Some EH regions might become dead, see PR34608. If
10040 pass_cleanup_cfg isn't the first pass to happen with the
10041 new child, these dead EH edges might cause problems.
10042 Clean them up now. */
10043 if (flag_exceptions)
10044 {
10045 basic_block bb;
10046 bool changed = false;
10047
10048 FOR_EACH_BB_FN (bb, cfun)
10049 changed |= gimple_purge_dead_eh_edges (bb);
10050 if (changed)
10051 cleanup_tree_cfg ();
10052 }
10053 if (flag_checking && !loops_state_satisfies_p (flags: LOOPS_NEED_FIXUP))
10054 verify_loop_structure ();
10055 pop_cfun ();
10056
10057 if (dump_file && !gimple_in_ssa_p (cfun))
10058 {
10059 omp_any_child_fn_dumped = true;
10060 dump_function_header (dump_file, child_fn, dump_flags);
10061 dump_function_to_file (child_fn, dump_file, dump_flags);
10062 }
10063
10064 adjust_context_and_scope (region, entry_block: gimple_block (g: entry_stmt), child_fndecl: child_fn);
10065
10066 /* Handle the case that an inner ancestor:1 target is called by an outer
10067 target region. */
10068 if (is_ancestor)
10069 {
10070 cgraph_node *fn2_node;
10071 child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn),
10072 FUNCTION_DECL,
10073 clone_function_name (decl: child_fn, suffix: "nohost"),
10074 TREE_TYPE (child_fn));
10075 if (in_lto_p)
10076 DECL_PRESERVE_P (child_fn2) = 1;
10077 TREE_STATIC (child_fn2) = 1;
10078 DECL_ARTIFICIAL (child_fn2) = 1;
10079 DECL_IGNORED_P (child_fn2) = 0;
10080 TREE_PUBLIC (child_fn2) = 0;
10081 DECL_UNINLINABLE (child_fn2) = 1;
10082 DECL_EXTERNAL (child_fn2) = 0;
10083 DECL_CONTEXT (child_fn2) = DECL_CONTEXT (child_fn);
10084 DECL_INITIAL (child_fn2) = make_node (BLOCK);
10085 BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2;
10086 DECL_ATTRIBUTES (child_fn)
10087 = remove_attribute ("omp target entrypoint",
10088 DECL_ATTRIBUTES (child_fn));
10089 DECL_ATTRIBUTES (child_fn2)
10090 = tree_cons (get_identifier ("omp target device_ancestor_nohost"),
10091 NULL_TREE, copy_list (DECL_ATTRIBUTES (child_fn)));
10092 DECL_ATTRIBUTES (child_fn)
10093 = tree_cons (get_identifier ("omp target device_ancestor_host"),
10094 NULL_TREE, DECL_ATTRIBUTES (child_fn));
10095 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (child_fn2)
10096 = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (current_function_decl);
10097 DECL_FUNCTION_SPECIFIC_TARGET (child_fn2)
10098 = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
10099 DECL_FUNCTION_VERSIONED (child_fn2)
10100 = DECL_FUNCTION_VERSIONED (current_function_decl);
10101
10102 fn2_node = cgraph_node::get_create (child_fn2);
10103 fn2_node->offloadable = 1;
10104 fn2_node->force_output = 1;
10105 node->offloadable = 0;
10106
10107 /* Enable pass_omp_device_lower pass. */
10108 fn2_node = cgraph_node::get (DECL_CONTEXT (child_fn));
10109 fn2_node->calls_declare_variant_alt = 1;
10110
10111 t = build_decl (DECL_SOURCE_LOCATION (child_fn),
10112 RESULT_DECL, NULL_TREE, void_type_node);
10113 DECL_ARTIFICIAL (t) = 1;
10114 DECL_IGNORED_P (t) = 1;
10115 DECL_CONTEXT (t) = child_fn2;
10116 DECL_RESULT (child_fn2) = t;
10117 DECL_SAVED_TREE (child_fn2) = build1 (RETURN_EXPR,
10118 void_type_node, NULL);
10119 tree tmp = DECL_ARGUMENTS (child_fn);
10120 t = build_decl (DECL_SOURCE_LOCATION (child_fn), PARM_DECL,
10121 DECL_NAME (tmp), TREE_TYPE (tmp));
10122 DECL_ARTIFICIAL (t) = 1;
10123 DECL_NAMELESS (t) = 1;
10124 DECL_ARG_TYPE (t) = ptr_type_node;
10125 DECL_CONTEXT (t) = current_function_decl;
10126 TREE_USED (t) = 1;
10127 TREE_READONLY (t) = 1;
10128 DECL_ARGUMENTS (child_fn2) = t;
10129 gcc_assert (TREE_CHAIN (tmp) == NULL_TREE);
10130
10131 gimplify_function_tree (child_fn2);
10132 cgraph_node::add_new_function (fndecl: child_fn2, lowered: true);
10133
10134 vec_safe_push (v&: offload_funcs, obj: child_fn2);
10135 if (dump_file && !gimple_in_ssa_p (cfun))
10136 {
10137 dump_function_header (dump_file, child_fn2, dump_flags);
10138 dump_function_to_file (child_fn2, dump_file, dump_flags);
10139 }
10140 }
10141 }
10142
10143 /* Emit a library call to launch the offloading region, or do data
10144 transfers. */
10145 tree t1, t2, t3, t4, depend;
10146 enum built_in_function start_ix;
10147 unsigned int flags_i = 0;
10148
10149 switch (gimple_omp_target_kind (g: entry_stmt))
10150 {
10151 case GF_OMP_TARGET_KIND_REGION:
10152 start_ix = BUILT_IN_GOMP_TARGET;
10153 break;
10154 case GF_OMP_TARGET_KIND_DATA:
10155 start_ix = BUILT_IN_GOMP_TARGET_DATA;
10156 break;
10157 case GF_OMP_TARGET_KIND_UPDATE:
10158 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
10159 break;
10160 case GF_OMP_TARGET_KIND_ENTER_DATA:
10161 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10162 break;
10163 case GF_OMP_TARGET_KIND_EXIT_DATA:
10164 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10165 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
10166 break;
10167 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10168 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10169 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10170 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10171 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10172 start_ix = BUILT_IN_GOACC_PARALLEL;
10173 break;
10174 case GF_OMP_TARGET_KIND_OACC_DATA:
10175 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10176 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10177 start_ix = BUILT_IN_GOACC_DATA_START;
10178 break;
10179 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10180 start_ix = BUILT_IN_GOACC_UPDATE;
10181 break;
10182 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10183 start_ix = BUILT_IN_GOACC_ENTER_DATA;
10184 break;
10185 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10186 start_ix = BUILT_IN_GOACC_EXIT_DATA;
10187 break;
10188 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10189 start_ix = BUILT_IN_GOACC_DECLARE;
10190 break;
10191 default:
10192 gcc_unreachable ();
10193 }
10194
10195 tree device = NULL_TREE;
10196 location_t device_loc = UNKNOWN_LOCATION;
10197 tree goacc_flags = NULL_TREE;
10198 bool need_device_adjustment = false;
10199 gimple_stmt_iterator adj_gsi;
10200 if (is_gimple_omp_oacc (stmt: entry_stmt))
10201 {
10202 /* By default, no GOACC_FLAGs are set. */
10203 goacc_flags = integer_zero_node;
10204 }
10205 else
10206 {
10207 c = omp_find_clause (clauses, kind: OMP_CLAUSE_DEVICE);
10208 if (c)
10209 {
10210 device = OMP_CLAUSE_DEVICE_ID (c);
10211 /* Ensure 'device' is of the correct type. */
10212 device = fold_convert_loc (device_loc, integer_type_node, device);
10213 if (TREE_CODE (device) == INTEGER_CST)
10214 {
10215 if (wi::to_wide (t: device) == GOMP_DEVICE_ICV)
10216 device = build_int_cst (integer_type_node,
10217 GOMP_DEVICE_HOST_FALLBACK);
10218 else if (wi::to_wide (t: device) == GOMP_DEVICE_HOST_FALLBACK)
10219 device = build_int_cst (integer_type_node,
10220 GOMP_DEVICE_HOST_FALLBACK - 1);
10221 }
10222 else
10223 need_device_adjustment = true;
10224 device_loc = OMP_CLAUSE_LOCATION (c);
10225 if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10226 device = build_int_cst (integer_type_node,
10227 GOMP_DEVICE_HOST_FALLBACK);
10228 }
10229 else
10230 {
10231 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10232 library choose). */
10233 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10234 device_loc = gimple_location (g: entry_stmt);
10235 }
10236
10237 c = omp_find_clause (clauses, kind: OMP_CLAUSE_NOWAIT);
10238 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10239 nowait doesn't appear. */
10240 if (c && omp_find_clause (clauses, kind: OMP_CLAUSE_IN_REDUCTION))
10241 c = NULL;
10242 if (c)
10243 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10244 }
10245
10246 /* By default, there is no conditional. */
10247 tree cond = NULL_TREE;
10248 c = omp_find_clause (clauses, kind: OMP_CLAUSE_IF);
10249 if (c)
10250 cond = OMP_CLAUSE_IF_EXPR (c);
10251 /* If we found the clause 'if (cond)', build:
10252 OpenACC: goacc_flags = (cond ? goacc_flags
10253 : goacc_flags | GOACC_FLAG_HOST_FALLBACK)
10254 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10255 if (cond)
10256 {
10257 tree *tp;
10258 if (is_gimple_omp_oacc (stmt: entry_stmt))
10259 tp = &goacc_flags;
10260 else
10261 tp = &device;
10262
10263 cond = gimple_boolify (cond);
10264
10265 basic_block cond_bb, then_bb, else_bb;
10266 edge e;
10267 tree tmp_var = create_tmp_var (TREE_TYPE (*tp));
10268 if (offloaded)
10269 e = split_block_after_labels (new_bb);
10270 else
10271 {
10272 gsi = gsi_last_nondebug_bb (bb: new_bb);
10273 gsi_prev (i: &gsi);
10274 e = split_block (new_bb, gsi_stmt (i: gsi));
10275 }
10276 cond_bb = e->src;
10277 new_bb = e->dest;
10278 remove_edge (e);
10279
10280 then_bb = create_empty_bb (cond_bb);
10281 else_bb = create_empty_bb (then_bb);
10282 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10283 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10284
10285 stmt = gimple_build_cond_empty (cond);
10286 gsi = gsi_last_bb (bb: cond_bb);
10287 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10288
10289 gsi = gsi_start_bb (bb: then_bb);
10290 stmt = gimple_build_assign (tmp_var, *tp);
10291 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10292 adj_gsi = gsi;
10293
10294 gsi = gsi_start_bb (bb: else_bb);
10295 if (is_gimple_omp_oacc (stmt: entry_stmt))
10296 stmt = gimple_build_assign (tmp_var,
10297 BIT_IOR_EXPR,
10298 *tp,
10299 build_int_cst (integer_type_node,
10300 GOACC_FLAG_HOST_FALLBACK));
10301 else
10302 stmt = gimple_build_assign (tmp_var,
10303 build_int_cst (integer_type_node,
10304 GOMP_DEVICE_HOST_FALLBACK));
10305 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10306
10307 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10308 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10309 add_bb_to_loop (then_bb, cond_bb->loop_father);
10310 add_bb_to_loop (else_bb, cond_bb->loop_father);
10311 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10312 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10313
10314 *tp = tmp_var;
10315
10316 gsi = gsi_last_nondebug_bb (bb: new_bb);
10317 }
10318 else
10319 {
10320 gsi = gsi_last_nondebug_bb (bb: new_bb);
10321
10322 if (device != NULL_TREE)
10323 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10324 true, GSI_SAME_STMT);
10325 if (need_device_adjustment)
10326 {
10327 tree tmp_var = create_tmp_var (TREE_TYPE (device));
10328 stmt = gimple_build_assign (tmp_var, device);
10329 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10330 adj_gsi = gsi_for_stmt (stmt);
10331 device = tmp_var;
10332 }
10333 }
10334
10335 if ((c = omp_find_clause (clauses, kind: OMP_CLAUSE_SELF)) != NULL_TREE)
10336 {
10337 gcc_assert ((is_gimple_omp_oacc (entry_stmt) && offloaded)
10338 || (gimple_omp_target_kind (entry_stmt)
10339 == GF_OMP_TARGET_KIND_OACC_DATA_KERNELS));
10340
10341 edge e;
10342 if (offloaded)
10343 e = split_block_after_labels (new_bb);
10344 else
10345 {
10346 gsi = gsi_last_nondebug_bb (bb: new_bb);
10347 gsi_prev (i: &gsi);
10348 e = split_block (new_bb, gsi_stmt (i: gsi));
10349 }
10350 basic_block cond_bb = e->src;
10351 new_bb = e->dest;
10352 remove_edge (e);
10353
10354 basic_block then_bb = create_empty_bb (cond_bb);
10355 basic_block else_bb = create_empty_bb (then_bb);
10356 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10357 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10358
10359 tree self_cond = gimple_boolify (OMP_CLAUSE_SELF_EXPR (c));
10360 stmt = gimple_build_cond_empty (cond: self_cond);
10361 gsi = gsi_last_bb (bb: cond_bb);
10362 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10363
10364 tree tmp_var = create_tmp_var (TREE_TYPE (goacc_flags));
10365 stmt = gimple_build_assign (tmp_var, BIT_IOR_EXPR, goacc_flags,
10366 build_int_cst (integer_type_node,
10367 GOACC_FLAG_LOCAL_DEVICE));
10368 gsi = gsi_start_bb (bb: then_bb);
10369 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10370
10371 gsi = gsi_start_bb (bb: else_bb);
10372 stmt = gimple_build_assign (tmp_var, goacc_flags);
10373 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10374
10375 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10376 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10377 add_bb_to_loop (then_bb, cond_bb->loop_father);
10378 add_bb_to_loop (else_bb, cond_bb->loop_father);
10379 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10380 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10381
10382 goacc_flags = tmp_var;
10383 gsi = gsi_last_nondebug_bb (bb: new_bb);
10384 }
10385
10386 if (need_device_adjustment)
10387 {
10388 tree uns = fold_convert (unsigned_type_node, device);
10389 uns = force_gimple_operand_gsi (&adj_gsi, uns, true, NULL_TREE,
10390 false, GSI_CONTINUE_LINKING);
10391 edge e = split_block (gsi_bb (i: adj_gsi), gsi_stmt (i: adj_gsi));
10392 basic_block cond_bb = e->src;
10393 basic_block else_bb = e->dest;
10394 if (gsi_bb (i: adj_gsi) == new_bb)
10395 {
10396 new_bb = else_bb;
10397 gsi = gsi_last_nondebug_bb (bb: new_bb);
10398 }
10399
10400 basic_block then_bb = create_empty_bb (cond_bb);
10401 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10402
10403 cond = build2 (GT_EXPR, boolean_type_node, uns,
10404 build_int_cst (unsigned_type_node,
10405 GOMP_DEVICE_HOST_FALLBACK - 1));
10406 stmt = gimple_build_cond_empty (cond);
10407 adj_gsi = gsi_last_bb (bb: cond_bb);
10408 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10409
10410 adj_gsi = gsi_start_bb (bb: then_bb);
10411 tree add = build2 (PLUS_EXPR, integer_type_node, device,
10412 build_int_cst (integer_type_node, -1));
10413 stmt = gimple_build_assign (device, add);
10414 gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10415
10416 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10417 e->flags = EDGE_FALSE_VALUE;
10418 add_bb_to_loop (then_bb, cond_bb->loop_father);
10419 make_edge (then_bb, else_bb, EDGE_FALLTHRU);
10420 }
10421
10422 t = gimple_omp_target_data_arg (omp_target_stmt: entry_stmt);
10423 if (t == NULL)
10424 {
10425 t1 = size_zero_node;
10426 t2 = build_zero_cst (ptr_type_node);
10427 t3 = t2;
10428 t4 = t2;
10429 }
10430 else
10431 {
10432 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10433 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10434 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10435 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10436 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10437 }
10438
10439 gimple *g;
10440 bool tagging = false;
10441 /* The maximum number used by any start_ix, without varargs. */
10442 auto_vec<tree, 11> args;
10443 if (is_gimple_omp_oacc (stmt: entry_stmt))
10444 {
10445 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10446 TREE_TYPE (goacc_flags), goacc_flags);
10447 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10448 NULL_TREE, true,
10449 GSI_SAME_STMT);
10450 args.quick_push (obj: goacc_flags_m);
10451 }
10452 else
10453 args.quick_push (obj: device);
10454 if (offloaded)
10455 args.quick_push (build_fold_addr_expr (child_fn2 ? child_fn2 : child_fn));
10456 args.quick_push (obj: t1);
10457 args.quick_push (obj: t2);
10458 args.quick_push (obj: t3);
10459 args.quick_push (obj: t4);
10460 switch (start_ix)
10461 {
10462 case BUILT_IN_GOACC_DATA_START:
10463 case BUILT_IN_GOACC_DECLARE:
10464 case BUILT_IN_GOMP_TARGET_DATA:
10465 break;
10466 case BUILT_IN_GOMP_TARGET:
10467 case BUILT_IN_GOMP_TARGET_UPDATE:
10468 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10469 args.quick_push (obj: build_int_cst (unsigned_type_node, flags_i));
10470 c = omp_find_clause (clauses, kind: OMP_CLAUSE_DEPEND);
10471 if (c)
10472 depend = OMP_CLAUSE_DECL (c);
10473 else
10474 depend = build_int_cst (ptr_type_node, 0);
10475 args.quick_push (obj: depend);
10476 if (start_ix == BUILT_IN_GOMP_TARGET)
10477 args.quick_push (obj: get_target_arguments (gsi: &gsi, tgt_stmt: entry_stmt));
10478 break;
10479 case BUILT_IN_GOACC_PARALLEL:
10480 if (lookup_attribute (attr_name: "oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10481 {
10482 tree dims = NULL_TREE;
10483 unsigned int ix;
10484
10485 /* For serial constructs we set all dimensions to 1. */
10486 for (ix = GOMP_DIM_MAX; ix--;)
10487 dims = tree_cons (NULL_TREE, integer_one_node, dims);
10488 oacc_replace_fn_attrib (fn: child_fn, dims);
10489 }
10490 else
10491 oacc_set_fn_attrib (fn: child_fn, clauses, args: &args);
10492 tagging = true;
10493 /* FALLTHRU */
10494 case BUILT_IN_GOACC_ENTER_DATA:
10495 case BUILT_IN_GOACC_EXIT_DATA:
10496 case BUILT_IN_GOACC_UPDATE:
10497 {
10498 tree t_async = NULL_TREE;
10499
10500 /* If present, use the value specified by the respective
10501 clause, making sure that is of the correct type. */
10502 c = omp_find_clause (clauses, kind: OMP_CLAUSE_ASYNC);
10503 if (c)
10504 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10505 integer_type_node,
10506 OMP_CLAUSE_ASYNC_EXPR (c));
10507 else if (!tagging)
10508 /* Default values for t_async. */
10509 t_async = fold_convert_loc (gimple_location (g: entry_stmt),
10510 integer_type_node,
10511 build_int_cst (integer_type_node,
10512 GOMP_ASYNC_SYNC));
10513 if (tagging && t_async)
10514 {
10515 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10516
10517 if (TREE_CODE (t_async) == INTEGER_CST)
10518 {
10519 /* See if we can pack the async arg in to the tag's
10520 operand. */
10521 i_async = TREE_INT_CST_LOW (t_async);
10522 if (i_async < GOMP_LAUNCH_OP_MAX)
10523 t_async = NULL_TREE;
10524 else
10525 i_async = GOMP_LAUNCH_OP_MAX;
10526 }
10527 args.safe_push (obj: oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10528 op: i_async));
10529 }
10530 if (t_async)
10531 args.safe_push (obj: force_gimple_operand_gsi (&gsi, t_async, true,
10532 NULL_TREE, true,
10533 GSI_SAME_STMT));
10534
10535 /* Save the argument index, and ... */
10536 unsigned t_wait_idx = args.length ();
10537 unsigned num_waits = 0;
10538 c = omp_find_clause (clauses, kind: OMP_CLAUSE_WAIT);
10539 if (!tagging || c)
10540 /* ... push a placeholder. */
10541 args.safe_push (integer_zero_node);
10542
10543 for (; c; c = OMP_CLAUSE_CHAIN (c))
10544 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10545 {
10546 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10547 integer_type_node,
10548 OMP_CLAUSE_WAIT_EXPR (c));
10549 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10550 GSI_SAME_STMT);
10551 args.safe_push (obj: arg);
10552 num_waits++;
10553 }
10554
10555 if (!tagging || num_waits)
10556 {
10557 tree len;
10558
10559 /* Now that we know the number, update the placeholder. */
10560 if (tagging)
10561 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, op: num_waits);
10562 else
10563 len = build_int_cst (integer_type_node, num_waits);
10564 len = fold_convert_loc (gimple_location (g: entry_stmt),
10565 unsigned_type_node, len);
10566 args[t_wait_idx] = len;
10567 }
10568 }
10569 break;
10570 default:
10571 gcc_unreachable ();
10572 }
10573 if (tagging)
10574 /* Push terminal marker - zero. */
10575 args.safe_push (obj: oacc_launch_pack (code: 0, NULL_TREE, op: 0));
10576
10577 if (child_fn2)
10578 {
10579 g = gimple_build_call_internal (IFN_GOMP_TARGET_REV, 1,
10580 build_fold_addr_expr (child_fn));
10581 gimple_set_location (g, location: gimple_location (g: entry_stmt));
10582 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10583 }
10584
10585 g = gimple_build_call_vec (builtin_decl_explicit (fncode: start_ix), args);
10586 gimple_set_location (g, location: gimple_location (g: entry_stmt));
10587 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10588 if (!offloaded)
10589 {
10590 g = gsi_stmt (i: gsi);
10591 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10592 gsi_remove (&gsi, true);
10593 }
10594}
10595
10596/* Expand the parallel region tree rooted at REGION. Expansion
10597 proceeds in depth-first order. Innermost regions are expanded
10598 first. This way, parallel regions that require a new function to
10599 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10600 internal dependencies in their body. */
10601
10602static void
10603expand_omp (struct omp_region *region)
10604{
10605 omp_any_child_fn_dumped = false;
10606 while (region)
10607 {
10608 location_t saved_location;
10609 gimple *inner_stmt = NULL;
10610
10611 /* First, determine whether this is a combined parallel+workshare
10612 region. */
10613 if (region->type == GIMPLE_OMP_PARALLEL)
10614 determine_parallel_type (region);
10615
10616 if (region->type == GIMPLE_OMP_FOR
10617 && gimple_omp_for_combined_p (g: last_nondebug_stmt (region->entry)))
10618 inner_stmt = last_nondebug_stmt (region->inner->entry);
10619
10620 if (region->inner)
10621 expand_omp (region: region->inner);
10622
10623 saved_location = input_location;
10624 if (gimple_has_location (g: last_nondebug_stmt (region->entry)))
10625 input_location = gimple_location (g: last_nondebug_stmt (region->entry));
10626
10627 switch (region->type)
10628 {
10629 case GIMPLE_OMP_PARALLEL:
10630 case GIMPLE_OMP_TASK:
10631 expand_omp_taskreg (region);
10632 break;
10633
10634 case GIMPLE_OMP_FOR:
10635 expand_omp_for (region, inner_stmt);
10636 break;
10637
10638 case GIMPLE_OMP_SECTIONS:
10639 expand_omp_sections (region);
10640 break;
10641
10642 case GIMPLE_OMP_SECTION:
10643 /* Individual omp sections are handled together with their
10644 parent GIMPLE_OMP_SECTIONS region. */
10645 break;
10646
10647 case GIMPLE_OMP_STRUCTURED_BLOCK:
10648 /* We should have gotten rid of these in gimple lowering. */
10649 gcc_unreachable ();
10650
10651 case GIMPLE_OMP_SINGLE:
10652 case GIMPLE_OMP_SCOPE:
10653 expand_omp_single (region);
10654 break;
10655
10656 case GIMPLE_OMP_ORDERED:
10657 {
10658 gomp_ordered *ord_stmt
10659 = as_a <gomp_ordered *> (p: last_nondebug_stmt (region->entry));
10660 if (gimple_omp_ordered_standalone_p (g: ord_stmt))
10661 {
10662 /* We'll expand these when expanding corresponding
10663 worksharing region with ordered(n) clause. */
10664 gcc_assert (region->outer
10665 && region->outer->type == GIMPLE_OMP_FOR);
10666 region->ord_stmt = ord_stmt;
10667 break;
10668 }
10669 }
10670 /* FALLTHRU */
10671 case GIMPLE_OMP_MASTER:
10672 case GIMPLE_OMP_MASKED:
10673 case GIMPLE_OMP_TASKGROUP:
10674 case GIMPLE_OMP_CRITICAL:
10675 case GIMPLE_OMP_TEAMS:
10676 expand_omp_synch (region);
10677 break;
10678
10679 case GIMPLE_OMP_ATOMIC_LOAD:
10680 expand_omp_atomic (region);
10681 break;
10682
10683 case GIMPLE_OMP_TARGET:
10684 expand_omp_target (region);
10685 break;
10686
10687 default:
10688 gcc_unreachable ();
10689 }
10690
10691 input_location = saved_location;
10692 region = region->next;
10693 }
10694 if (omp_any_child_fn_dumped)
10695 {
10696 if (dump_file)
10697 dump_function_header (dump_file, current_function_decl, dump_flags);
10698 omp_any_child_fn_dumped = false;
10699 }
10700}
10701
10702/* Helper for build_omp_regions. Scan the dominator tree starting at
10703 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10704 true, the function ends once a single tree is built (otherwise, whole
10705 forest of OMP constructs may be built). */
10706
10707static void
10708build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10709 bool single_tree)
10710{
10711 gimple_stmt_iterator gsi;
10712 gimple *stmt;
10713 basic_block son;
10714
10715 gsi = gsi_last_nondebug_bb (bb);
10716 if (!gsi_end_p (i: gsi) && is_gimple_omp (stmt: gsi_stmt (i: gsi)))
10717 {
10718 struct omp_region *region;
10719 enum gimple_code code;
10720
10721 stmt = gsi_stmt (i: gsi);
10722 code = gimple_code (g: stmt);
10723 if (code == GIMPLE_OMP_RETURN)
10724 {
10725 /* STMT is the return point out of region PARENT. Mark it
10726 as the exit point and make PARENT the immediately
10727 enclosing region. */
10728 gcc_assert (parent);
10729 region = parent;
10730 region->exit = bb;
10731 parent = parent->outer;
10732 }
10733 else if (code == GIMPLE_OMP_ATOMIC_STORE)
10734 {
10735 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10736 GIMPLE_OMP_RETURN, but matches with
10737 GIMPLE_OMP_ATOMIC_LOAD. */
10738 gcc_assert (parent);
10739 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10740 region = parent;
10741 region->exit = bb;
10742 parent = parent->outer;
10743 }
10744 else if (code == GIMPLE_OMP_CONTINUE)
10745 {
10746 gcc_assert (parent);
10747 parent->cont = bb;
10748 }
10749 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10750 {
10751 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10752 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10753 }
10754 else
10755 {
10756 region = new_omp_region (bb, type: code, parent);
10757 /* Otherwise... */
10758 if (code == GIMPLE_OMP_TARGET)
10759 {
10760 switch (gimple_omp_target_kind (g: stmt))
10761 {
10762 case GF_OMP_TARGET_KIND_REGION:
10763 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10764 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10765 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10766 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10767 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10768 break;
10769 case GF_OMP_TARGET_KIND_UPDATE:
10770 case GF_OMP_TARGET_KIND_ENTER_DATA:
10771 case GF_OMP_TARGET_KIND_EXIT_DATA:
10772 case GF_OMP_TARGET_KIND_DATA:
10773 case GF_OMP_TARGET_KIND_OACC_DATA:
10774 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10775 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10776 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10777 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10778 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10779 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10780 /* ..., other than for those stand-alone directives...
10781 To be precise, target data isn't stand-alone, but
10782 gimplifier put the end API call into try finally block
10783 for it, so omp expansion can treat it as such. */
10784 region = NULL;
10785 break;
10786 default:
10787 gcc_unreachable ();
10788 }
10789 }
10790 else if (code == GIMPLE_OMP_ORDERED
10791 && gimple_omp_ordered_standalone_p (g: stmt))
10792 /* #pragma omp ordered depend is also just a stand-alone
10793 directive. */
10794 region = NULL;
10795 else if (code == GIMPLE_OMP_TASK
10796 && gimple_omp_task_taskwait_p (g: stmt))
10797 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10798 region = NULL;
10799 else if (code == GIMPLE_OMP_TASKGROUP)
10800 /* #pragma omp taskgroup isn't a stand-alone directive, but
10801 gimplifier put the end API call into try finall block
10802 for it, so omp expansion can treat it as such. */
10803 region = NULL;
10804 /* ..., this directive becomes the parent for a new region. */
10805 if (region)
10806 parent = region;
10807 }
10808 }
10809
10810 if (single_tree && !parent)
10811 return;
10812
10813 for (son = first_dom_son (CDI_DOMINATORS, bb);
10814 son;
10815 son = next_dom_son (CDI_DOMINATORS, son))
10816 build_omp_regions_1 (bb: son, parent, single_tree);
10817}
10818
10819/* Builds the tree of OMP regions rooted at ROOT, storing it to
10820 root_omp_region. */
10821
10822static void
10823build_omp_regions_root (basic_block root)
10824{
10825 gcc_assert (root_omp_region == NULL);
10826 build_omp_regions_1 (bb: root, NULL, single_tree: true);
10827 gcc_assert (root_omp_region != NULL);
10828}
10829
10830/* Expands omp construct (and its subconstructs) starting in HEAD. */
10831
10832void
10833omp_expand_local (basic_block head)
10834{
10835 build_omp_regions_root (root: head);
10836 if (dump_file && (dump_flags & TDF_DETAILS))
10837 {
10838 fprintf (stream: dump_file, format: "\nOMP region tree\n\n");
10839 dump_omp_region (file: dump_file, region: root_omp_region, indent: 0);
10840 fprintf (stream: dump_file, format: "\n");
10841 }
10842
10843 remove_exit_barriers (region: root_omp_region);
10844 expand_omp (region: root_omp_region);
10845
10846 omp_free_regions ();
10847}
10848
10849/* Scan the CFG and build a tree of OMP regions. Return the root of
10850 the OMP region tree. */
10851
10852static void
10853build_omp_regions (void)
10854{
10855 gcc_assert (root_omp_region == NULL);
10856 calculate_dominance_info (CDI_DOMINATORS);
10857 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, single_tree: false);
10858}
10859
10860/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10861
10862static unsigned int
10863execute_expand_omp (void)
10864{
10865 build_omp_regions ();
10866
10867 if (!root_omp_region)
10868 return 0;
10869
10870 if (dump_file)
10871 {
10872 fprintf (stream: dump_file, format: "\nOMP region tree\n\n");
10873 dump_omp_region (file: dump_file, region: root_omp_region, indent: 0);
10874 fprintf (stream: dump_file, format: "\n");
10875 }
10876
10877 remove_exit_barriers (region: root_omp_region);
10878
10879 expand_omp (region: root_omp_region);
10880
10881 omp_free_regions ();
10882
10883 return (TODO_cleanup_cfg
10884 | (gimple_in_ssa_p (cfun) ? TODO_update_ssa_only_virtuals : 0));
10885}
10886
10887/* OMP expansion -- the default pass, run before creation of SSA form. */
10888
10889namespace {
10890
10891const pass_data pass_data_expand_omp =
10892{
10893 .type: GIMPLE_PASS, /* type */
10894 .name: "ompexp", /* name */
10895 .optinfo_flags: OPTGROUP_OMP, /* optinfo_flags */
10896 .tv_id: TV_NONE, /* tv_id */
10897 PROP_gimple_any, /* properties_required */
10898 PROP_gimple_eomp, /* properties_provided */
10899 .properties_destroyed: 0, /* properties_destroyed */
10900 .todo_flags_start: 0, /* todo_flags_start */
10901 .todo_flags_finish: 0, /* todo_flags_finish */
10902};
10903
10904class pass_expand_omp : public gimple_opt_pass
10905{
10906public:
10907 pass_expand_omp (gcc::context *ctxt)
10908 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10909 {}
10910
10911 /* opt_pass methods: */
10912 unsigned int execute (function *) final override
10913 {
10914 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10915 || flag_openmp_simd != 0)
10916 && !seen_error ());
10917
10918 /* This pass always runs, to provide PROP_gimple_eomp.
10919 But often, there is nothing to do. */
10920 if (!gate)
10921 return 0;
10922
10923 return execute_expand_omp ();
10924 }
10925
10926}; // class pass_expand_omp
10927
10928} // anon namespace
10929
10930gimple_opt_pass *
10931make_pass_expand_omp (gcc::context *ctxt)
10932{
10933 return new pass_expand_omp (ctxt);
10934}
10935
10936namespace {
10937
10938const pass_data pass_data_expand_omp_ssa =
10939{
10940 .type: GIMPLE_PASS, /* type */
10941 .name: "ompexpssa", /* name */
10942 .optinfo_flags: OPTGROUP_OMP, /* optinfo_flags */
10943 .tv_id: TV_NONE, /* tv_id */
10944 PROP_cfg | PROP_ssa, /* properties_required */
10945 PROP_gimple_eomp, /* properties_provided */
10946 .properties_destroyed: 0, /* properties_destroyed */
10947 .todo_flags_start: 0, /* todo_flags_start */
10948 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10949};
10950
10951class pass_expand_omp_ssa : public gimple_opt_pass
10952{
10953public:
10954 pass_expand_omp_ssa (gcc::context *ctxt)
10955 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10956 {}
10957
10958 /* opt_pass methods: */
10959 bool gate (function *fun) final override
10960 {
10961 return !(fun->curr_properties & PROP_gimple_eomp);
10962 }
10963 unsigned int execute (function *) final override
10964 {
10965 return execute_expand_omp ();
10966 }
10967 opt_pass * clone () final override
10968 {
10969 return new pass_expand_omp_ssa (m_ctxt);
10970 }
10971
10972}; // class pass_expand_omp_ssa
10973
10974} // anon namespace
10975
10976gimple_opt_pass *
10977make_pass_expand_omp_ssa (gcc::context *ctxt)
10978{
10979 return new pass_expand_omp_ssa (ctxt);
10980}
10981
10982/* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10983 GIMPLE_* codes. */
10984
10985bool
10986omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10987 int *region_idx)
10988{
10989 gimple *last = last_nondebug_stmt (bb);
10990 enum gimple_code code = gimple_code (g: last);
10991 struct omp_region *cur_region = *region;
10992 bool fallthru = false;
10993
10994 switch (code)
10995 {
10996 case GIMPLE_OMP_PARALLEL:
10997 case GIMPLE_OMP_FOR:
10998 case GIMPLE_OMP_SINGLE:
10999 case GIMPLE_OMP_TEAMS:
11000 case GIMPLE_OMP_MASTER:
11001 case GIMPLE_OMP_MASKED:
11002 case GIMPLE_OMP_SCOPE:
11003 case GIMPLE_OMP_CRITICAL:
11004 case GIMPLE_OMP_SECTION:
11005 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11006 fallthru = true;
11007 break;
11008
11009 case GIMPLE_OMP_TASKGROUP:
11010 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11011 fallthru = true;
11012 cur_region = cur_region->outer;
11013 break;
11014
11015 case GIMPLE_OMP_TASK:
11016 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11017 fallthru = true;
11018 if (gimple_omp_task_taskwait_p (g: last))
11019 cur_region = cur_region->outer;
11020 break;
11021
11022 case GIMPLE_OMP_ORDERED:
11023 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11024 fallthru = true;
11025 if (gimple_omp_ordered_standalone_p (g: last))
11026 cur_region = cur_region->outer;
11027 break;
11028
11029 case GIMPLE_OMP_TARGET:
11030 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11031 fallthru = true;
11032 switch (gimple_omp_target_kind (g: last))
11033 {
11034 case GF_OMP_TARGET_KIND_REGION:
11035 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
11036 case GF_OMP_TARGET_KIND_OACC_KERNELS:
11037 case GF_OMP_TARGET_KIND_OACC_SERIAL:
11038 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
11039 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
11040 break;
11041 case GF_OMP_TARGET_KIND_UPDATE:
11042 case GF_OMP_TARGET_KIND_ENTER_DATA:
11043 case GF_OMP_TARGET_KIND_EXIT_DATA:
11044 case GF_OMP_TARGET_KIND_DATA:
11045 case GF_OMP_TARGET_KIND_OACC_DATA:
11046 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
11047 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
11048 case GF_OMP_TARGET_KIND_OACC_UPDATE:
11049 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
11050 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
11051 case GF_OMP_TARGET_KIND_OACC_DECLARE:
11052 cur_region = cur_region->outer;
11053 break;
11054 default:
11055 gcc_unreachable ();
11056 }
11057 break;
11058
11059 case GIMPLE_OMP_SECTIONS:
11060 cur_region = new_omp_region (bb, type: code, parent: cur_region);
11061 fallthru = true;
11062 break;
11063
11064 case GIMPLE_OMP_SECTIONS_SWITCH:
11065 fallthru = false;
11066 break;
11067
11068 case GIMPLE_OMP_ATOMIC_LOAD:
11069 case GIMPLE_OMP_ATOMIC_STORE:
11070 fallthru = true;
11071 break;
11072
11073 case GIMPLE_OMP_RETURN:
11074 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
11075 somewhere other than the next block. This will be
11076 created later. */
11077 cur_region->exit = bb;
11078 if (cur_region->type == GIMPLE_OMP_TASK)
11079 /* Add an edge corresponding to not scheduling the task
11080 immediately. */
11081 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
11082 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
11083 cur_region = cur_region->outer;
11084 break;
11085
11086 case GIMPLE_OMP_CONTINUE:
11087 cur_region->cont = bb;
11088 switch (cur_region->type)
11089 {
11090 case GIMPLE_OMP_FOR:
11091 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
11092 succs edges as abnormal to prevent splitting
11093 them. */
11094 single_succ_edge (bb: cur_region->entry)->flags |= EDGE_ABNORMAL;
11095 /* Make the loopback edge. */
11096 make_edge (bb, single_succ (bb: cur_region->entry),
11097 EDGE_ABNORMAL);
11098
11099 /* Create an edge from GIMPLE_OMP_FOR to exit, which
11100 corresponds to the case that the body of the loop
11101 is not executed at all. */
11102 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
11103 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
11104 fallthru = false;
11105 break;
11106
11107 case GIMPLE_OMP_SECTIONS:
11108 /* Wire up the edges into and out of the nested sections. */
11109 {
11110 basic_block switch_bb = single_succ (bb: cur_region->entry);
11111
11112 struct omp_region *i;
11113 for (i = cur_region->inner; i ; i = i->next)
11114 {
11115 gcc_assert (i->type == GIMPLE_OMP_SECTION);
11116 make_edge (switch_bb, i->entry, 0);
11117 make_edge (i->exit, bb, EDGE_FALLTHRU);
11118 }
11119
11120 /* Make the loopback edge to the block with
11121 GIMPLE_OMP_SECTIONS_SWITCH. */
11122 make_edge (bb, switch_bb, 0);
11123
11124 /* Make the edge from the switch to exit. */
11125 make_edge (switch_bb, bb->next_bb, 0);
11126 fallthru = false;
11127 }
11128 break;
11129
11130 case GIMPLE_OMP_TASK:
11131 fallthru = true;
11132 break;
11133
11134 default:
11135 gcc_unreachable ();
11136 }
11137 break;
11138
11139 default:
11140 gcc_unreachable ();
11141 }
11142
11143 if (*region != cur_region)
11144 {
11145 *region = cur_region;
11146 if (cur_region)
11147 *region_idx = cur_region->entry->index;
11148 else
11149 *region_idx = 0;
11150 }
11151
11152 return fallthru;
11153}
11154

source code of gcc/omp-expand.cc