1/* OpenACC worker partitioning via middle end neutering/broadcasting scheme
2
3 Copyright (C) 2015-2023 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "rtl.h"
26#include "tree.h"
27#include "gimple.h"
28#include "tree-pass.h"
29#include "ssa.h"
30#include "cgraph.h"
31#include "pretty-print.h"
32#include "fold-const.h"
33#include "gimplify.h"
34#include "gimple-iterator.h"
35#include "gimple-walk.h"
36#include "tree-inline.h"
37#include "langhooks.h"
38#include "omp-general.h"
39#include "omp-low.h"
40#include "gimple-pretty-print.h"
41#include "cfghooks.h"
42#include "insn-config.h"
43#include "recog.h"
44#include "internal-fn.h"
45#include "bitmap.h"
46#include "tree-nested.h"
47#include "stor-layout.h"
48#include "tree-ssa-threadupdate.h"
49#include "tree-into-ssa.h"
50#include "splay-tree.h"
51#include "target.h"
52#include "cfgloop.h"
53#include "tree-cfg.h"
54#include "omp-offload.h"
55#include "attribs.h"
56#include "targhooks.h"
57#include "diagnostic-core.h"
58
59/* Loop structure of the function. The entire function is described as
60 a NULL loop. */
61/* Adapted from 'gcc/config/nvptx/nvptx.cc:struct parallel'. */
62
63struct parallel_g
64{
65 /* Parent parallel. */
66 parallel_g *parent;
67
68 /* Next sibling parallel. */
69 parallel_g *next;
70
71 /* First child parallel. */
72 parallel_g *inner;
73
74 /* Partitioning mask of the parallel. */
75 unsigned mask;
76
77 /* Partitioning used within inner parallels. */
78 unsigned inner_mask;
79
80 /* Location of parallel forked and join. The forked is the first
81 block in the parallel and the join is the first block after of
82 the partition. */
83 basic_block forked_block;
84 basic_block join_block;
85
86 gimple *forked_stmt;
87 gimple *join_stmt;
88
89 gimple *fork_stmt;
90 gimple *joining_stmt;
91
92 /* Basic blocks in this parallel, but not in child parallels. The
93 FORKED and JOINING blocks are in the partition. The FORK and JOIN
94 blocks are not. */
95 auto_vec<basic_block> blocks;
96
97 tree record_type;
98 tree sender_decl;
99 tree receiver_decl;
100
101public:
102 parallel_g (parallel_g *parent, unsigned mode);
103 ~parallel_g ();
104};
105
106/* Constructor links the new parallel into it's parent's chain of
107 children. */
108
109parallel_g::parallel_g (parallel_g *parent_, unsigned mask_)
110 :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0)
111{
112 forked_block = join_block = 0;
113 forked_stmt = join_stmt = NULL;
114 fork_stmt = joining_stmt = NULL;
115
116 record_type = NULL_TREE;
117 sender_decl = NULL_TREE;
118 receiver_decl = NULL_TREE;
119
120 if (parent)
121 {
122 next = parent->inner;
123 parent->inner = this;
124 }
125}
126
127parallel_g::~parallel_g ()
128{
129 delete inner;
130 delete next;
131}
132
133static bool
134local_var_based_p (tree decl)
135{
136 switch (TREE_CODE (decl))
137 {
138 case VAR_DECL:
139 return !is_global_var (t: decl);
140
141 case COMPONENT_REF:
142 case BIT_FIELD_REF:
143 case ARRAY_REF:
144 return local_var_based_p (TREE_OPERAND (decl, 0));
145
146 default:
147 return false;
148 }
149}
150
151/* Map of basic blocks to gimple stmts. */
152typedef hash_map<basic_block, gimple *> bb_stmt_map_t;
153
154/* Calls to OpenACC routines are made by all workers/wavefronts/warps, since
155 the routine likely contains partitioned loops (else will do its own
156 neutering and variable propagation). Return TRUE if a function call CALL
157 should be made in (worker) single mode instead, rather than redundant
158 mode. */
159
160static bool
161omp_sese_active_worker_call (gcall *call)
162{
163#define GOMP_DIM_SEQ GOMP_DIM_MAX
164 tree fndecl = gimple_call_fndecl (gs: call);
165
166 if (!fndecl)
167 return true;
168
169 tree attrs = oacc_get_fn_attrib (fn: fndecl);
170
171 if (!attrs)
172 return true;
173
174 int level = oacc_fn_attrib_level (attr: attrs);
175
176 /* Neither regular functions nor "seq" routines should be run by all threads
177 in worker-single mode. */
178 return level == -1 || level == GOMP_DIM_SEQ;
179#undef GOMP_DIM_SEQ
180}
181
182/* Split basic blocks such that each forked and join unspecs are at
183 the start of their basic blocks. Thus afterwards each block will
184 have a single partitioning mode. We also do the same for return
185 insns, as they are executed by every thread. Return the
186 partitioning mode of the function as a whole. Populate MAP with
187 head and tail blocks. We also clear the BB visited flag, which is
188 used when finding partitions. */
189/* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_split_blocks'. */
190
191static void
192omp_sese_split_blocks (bb_stmt_map_t *map)
193{
194 auto_vec<gimple *> worklist;
195 basic_block block;
196
197 /* Locate all the reorg instructions of interest. */
198 FOR_ALL_BB_FN (block, cfun)
199 {
200 /* Clear visited flag, for use by parallel locator */
201 block->flags &= ~BB_VISITED;
202
203 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: block);
204 !gsi_end_p (i: gsi);
205 gsi_next (i: &gsi))
206 {
207 gimple *stmt = gsi_stmt (i: gsi);
208
209 if (gimple_call_internal_p (gs: stmt, fn: IFN_UNIQUE))
210 {
211 enum ifn_unique_kind k = ((enum ifn_unique_kind)
212 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
213
214 if (k == IFN_UNIQUE_OACC_JOIN)
215 worklist.safe_push (obj: stmt);
216 else if (k == IFN_UNIQUE_OACC_FORK)
217 {
218 gcc_assert (gsi_one_before_end_p (gsi));
219 basic_block forked_block = single_succ (bb: block);
220 gimple_stmt_iterator gsi2 = gsi_start_bb (bb: forked_block);
221
222 /* We push a NOP as a placeholder for the "forked" stmt.
223 This is then recognized in omp_sese_find_par. */
224 gimple *nop = gimple_build_nop ();
225 gsi_insert_before (&gsi2, nop, GSI_SAME_STMT);
226
227 worklist.safe_push (obj: nop);
228 }
229 }
230 else if (gimple_code (g: stmt) == GIMPLE_RETURN
231 || gimple_code (g: stmt) == GIMPLE_COND
232 || gimple_code (g: stmt) == GIMPLE_SWITCH
233 || (gimple_code (g: stmt) == GIMPLE_CALL
234 && !gimple_call_internal_p (gs: stmt)
235 && !omp_sese_active_worker_call (call: as_a <gcall *> (p: stmt))))
236 worklist.safe_push (obj: stmt);
237 else if (is_gimple_assign (gs: stmt))
238 {
239 tree lhs = gimple_assign_lhs (gs: stmt);
240
241 /* Force assignments to components/fields/elements of local
242 aggregates into fully-partitioned (redundant) mode. This
243 avoids having to broadcast the whole aggregate. The RHS of
244 the assignment will be propagated using the normal
245 mechanism. */
246
247 switch (TREE_CODE (lhs))
248 {
249 case COMPONENT_REF:
250 case BIT_FIELD_REF:
251 case ARRAY_REF:
252 {
253 tree aggr = TREE_OPERAND (lhs, 0);
254
255 if (local_var_based_p (decl: aggr))
256 worklist.safe_push (obj: stmt);
257 }
258 break;
259
260 default:
261 ;
262 }
263 }
264 }
265 }
266
267 /* Split blocks on the worklist. */
268 unsigned ix;
269 gimple *stmt;
270
271 for (ix = 0; worklist.iterate (ix, ptr: &stmt); ix++)
272 {
273 basic_block block = gimple_bb (g: stmt);
274
275 if (gimple_code (g: stmt) == GIMPLE_COND)
276 {
277 gcond *orig_cond = as_a <gcond *> (p: stmt);
278 tree_code code = gimple_expr_code (stmt: orig_cond);
279 tree pred = make_ssa_name (boolean_type_node);
280 gimple *asgn = gimple_build_assign (pred, code,
281 gimple_cond_lhs (gs: orig_cond),
282 gimple_cond_rhs (gs: orig_cond));
283 gcond *new_cond
284 = gimple_build_cond (NE_EXPR, pred, boolean_false_node,
285 gimple_cond_true_label (gs: orig_cond),
286 gimple_cond_false_label (gs: orig_cond));
287
288 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
289 gsi_insert_before (&gsi, asgn, GSI_SAME_STMT);
290 gsi_replace (&gsi, new_cond, true);
291
292 edge e = split_block (block, asgn);
293 block = e->dest;
294 map->get_or_insert (k: block) = new_cond;
295 }
296 else if ((gimple_code (g: stmt) == GIMPLE_CALL
297 && !gimple_call_internal_p (gs: stmt))
298 || is_gimple_assign (gs: stmt))
299 {
300 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
301 gsi_prev (i: &gsi);
302
303 edge call = split_block (block, gsi_stmt (i: gsi));
304
305 gimple *call_stmt = gsi_stmt (i: gsi_start_bb (bb: call->dest));
306
307 edge call_to_ret = split_block (call->dest, call_stmt);
308
309 map->get_or_insert (k: call_to_ret->src) = call_stmt;
310 }
311 else
312 {
313 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
314 gsi_prev (i: &gsi);
315
316 if (gsi_end_p (i: gsi))
317 map->get_or_insert (k: block) = stmt;
318 else
319 {
320 /* Split block before insn. The insn is in the new block. */
321 edge e = split_block (block, gsi_stmt (i: gsi));
322
323 block = e->dest;
324 map->get_or_insert (k: block) = stmt;
325 }
326 }
327 }
328}
329
330static const char *
331mask_name (unsigned mask)
332{
333 switch (mask)
334 {
335 case 0: return "gang redundant";
336 case 1: return "gang partitioned";
337 case 2: return "worker partitioned";
338 case 3: return "gang+worker partitioned";
339 case 4: return "vector partitioned";
340 case 5: return "gang+vector partitioned";
341 case 6: return "worker+vector partitioned";
342 case 7: return "fully partitioned";
343 default: return "<illegal>";
344 }
345}
346
347/* Dump this parallel and all its inner parallels. */
348/* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_dump_pars'. */
349
350static void
351omp_sese_dump_pars (parallel_g *par, unsigned depth)
352{
353 fprintf (stream: dump_file, format: "%u: mask %d (%s) head=%d, tail=%d\n",
354 depth, par->mask, mask_name (mask: par->mask),
355 par->forked_block ? par->forked_block->index : -1,
356 par->join_block ? par->join_block->index : -1);
357
358 fprintf (stream: dump_file, format: " blocks:");
359
360 basic_block block;
361 for (unsigned ix = 0; par->blocks.iterate (ix, ptr: &block); ix++)
362 fprintf (stream: dump_file, format: " %d", block->index);
363 fprintf (stream: dump_file, format: "\n");
364 if (par->inner)
365 omp_sese_dump_pars (par: par->inner, depth: depth + 1);
366
367 if (par->next)
368 omp_sese_dump_pars (par: par->next, depth);
369}
370
371/* If BLOCK contains a fork/join marker, process it to create or
372 terminate a loop structure. Add this block to the current loop,
373 and then walk successor blocks. */
374/* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_find_par'. */
375
376static parallel_g *
377omp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block)
378{
379 if (block->flags & BB_VISITED)
380 return par;
381 block->flags |= BB_VISITED;
382
383 if (gimple **stmtp = map->get (k: block))
384 {
385 gimple *stmt = *stmtp;
386
387 if (gimple_code (g: stmt) == GIMPLE_COND
388 || gimple_code (g: stmt) == GIMPLE_SWITCH
389 || gimple_code (g: stmt) == GIMPLE_RETURN
390 || (gimple_code (g: stmt) == GIMPLE_CALL
391 && !gimple_call_internal_p (gs: stmt))
392 || is_gimple_assign (gs: stmt))
393 {
394 /* A single block that is forced to be at the maximum partition
395 level. Make a singleton par for it. */
396 par = new parallel_g (par, GOMP_DIM_MASK (GOMP_DIM_GANG)
397 | GOMP_DIM_MASK (GOMP_DIM_WORKER)
398 | GOMP_DIM_MASK (GOMP_DIM_VECTOR));
399 par->forked_block = block;
400 par->forked_stmt = stmt;
401 par->blocks.safe_push (obj: block);
402 par = par->parent;
403 goto walk_successors;
404 }
405 else if (gimple_nop_p (g: stmt))
406 {
407 basic_block pred = single_pred (bb: block);
408 gcc_assert (pred);
409 gimple_stmt_iterator gsi = gsi_last_bb (bb: pred);
410 gimple *final_stmt = gsi_stmt (i: gsi);
411
412 if (gimple_call_internal_p (gs: final_stmt, fn: IFN_UNIQUE))
413 {
414 gcall *call = as_a <gcall *> (p: final_stmt);
415 enum ifn_unique_kind k = ((enum ifn_unique_kind)
416 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
417
418 if (k == IFN_UNIQUE_OACC_FORK)
419 {
420 HOST_WIDE_INT dim
421 = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
422 unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
423
424 par = new parallel_g (par, mask);
425 par->forked_block = block;
426 par->forked_stmt = final_stmt;
427 par->fork_stmt = stmt;
428 }
429 else
430 gcc_unreachable ();
431 }
432 else
433 gcc_unreachable ();
434 }
435 else if (gimple_call_internal_p (gs: stmt, fn: IFN_UNIQUE))
436 {
437 gcall *call = as_a <gcall *> (p: stmt);
438 enum ifn_unique_kind k = ((enum ifn_unique_kind)
439 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
440 if (k == IFN_UNIQUE_OACC_JOIN)
441 {
442 HOST_WIDE_INT dim = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
443 unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
444
445 gcc_assert (par->mask == mask);
446 par->join_block = block;
447 par->join_stmt = stmt;
448 par = par->parent;
449 }
450 else
451 gcc_unreachable ();
452 }
453 else
454 gcc_unreachable ();
455 }
456
457 if (par)
458 /* Add this block onto the current loop's list of blocks. */
459 par->blocks.safe_push (obj: block);
460 else
461 /* This must be the entry block. Create a NULL parallel. */
462 par = new parallel_g (0, 0);
463
464walk_successors:
465 /* Walk successor blocks. */
466 edge e;
467 edge_iterator ei;
468
469 FOR_EACH_EDGE (e, ei, block->succs)
470 omp_sese_find_par (map, par, block: e->dest);
471
472 return par;
473}
474
475/* DFS walk the CFG looking for fork & join markers. Construct
476 loop structures as we go. MAP is a mapping of basic blocks
477 to head & tail markers, discovered when splitting blocks. This
478 speeds up the discovery. We rely on the BB visited flag having
479 been cleared when splitting blocks. */
480/* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_discover_pars'. */
481
482static parallel_g *
483omp_sese_discover_pars (bb_stmt_map_t *map)
484{
485 basic_block block;
486
487 /* Mark exit blocks as visited. */
488 block = EXIT_BLOCK_PTR_FOR_FN (cfun);
489 block->flags |= BB_VISITED;
490
491 /* And entry block as not. */
492 block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
493 block->flags &= ~BB_VISITED;
494
495 parallel_g *par = omp_sese_find_par (map, par: 0, block);
496
497 if (dump_file)
498 {
499 fprintf (stream: dump_file, format: "\nLoops\n");
500 omp_sese_dump_pars (par, depth: 0);
501 fprintf (stream: dump_file, format: "\n");
502 }
503
504 return par;
505}
506
507static void
508populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single,
509 bitmap vector_single, unsigned outer_mask,
510 int depth)
511{
512 unsigned mask = outer_mask | par->mask;
513
514 basic_block block;
515
516 for (unsigned i = 0; par->blocks.iterate (ix: i, ptr: &block); i++)
517 {
518 if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
519 bitmap_set_bit (worker_single, block->index);
520
521 if ((mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) == 0)
522 bitmap_set_bit (vector_single, block->index);
523 }
524
525 if (par->inner)
526 populate_single_mode_bitmaps (par: par->inner, worker_single, vector_single,
527 outer_mask: mask, depth: depth + 1);
528 if (par->next)
529 populate_single_mode_bitmaps (par: par->next, worker_single, vector_single,
530 outer_mask, depth);
531}
532
533/* A map from SSA names or var decls to record fields. */
534
535typedef hash_map<tree, tree> field_map_t;
536
537/* For each propagation record type, this is a map from SSA names or var decls
538 to propagate, to the field in the record type that should be used for
539 transmission and reception. */
540
541typedef hash_map<tree, field_map_t> record_field_map_t;
542
543static void
544install_var_field (tree var, tree record_type, field_map_t *fields)
545{
546 tree name;
547 char tmp[20];
548
549 if (TREE_CODE (var) == SSA_NAME)
550 {
551 name = SSA_NAME_IDENTIFIER (var);
552 if (!name)
553 {
554 sprintf (s: tmp, format: "_%u", (unsigned) SSA_NAME_VERSION (var));
555 name = get_identifier (tmp);
556 }
557 }
558 else if (VAR_P (var))
559 {
560 name = DECL_NAME (var);
561 if (!name)
562 {
563 sprintf (s: tmp, format: "D_%u", (unsigned) DECL_UID (var));
564 name = get_identifier (tmp);
565 }
566 }
567 else
568 gcc_unreachable ();
569
570 gcc_assert (!fields->get (var));
571
572 tree type = TREE_TYPE (var);
573
574 if (POINTER_TYPE_P (type)
575 && TYPE_RESTRICT (type))
576 type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT);
577
578 tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, name, type);
579
580 if (VAR_P (var) && type == TREE_TYPE (var))
581 {
582 SET_DECL_ALIGN (field, DECL_ALIGN (var));
583 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
584 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
585 }
586 else
587 SET_DECL_ALIGN (field, TYPE_ALIGN (type));
588
589 fields->put (k: var, v: field);
590
591 insert_field_into_struct (record_type, field);
592}
593
594/* Sets of SSA_NAMES or VAR_DECLs to propagate. */
595typedef hash_set<tree> propagation_set;
596
597static void
598find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask,
599 bitmap worker_single, bitmap vector_single,
600 vec<propagation_set *> *prop_set)
601{
602 unsigned mask = outer_mask | par->mask;
603
604 if (par->inner)
605 find_ssa_names_to_propagate (par: par->inner, outer_mask: mask, worker_single,
606 vector_single, prop_set);
607 if (par->next)
608 find_ssa_names_to_propagate (par: par->next, outer_mask, worker_single,
609 vector_single, prop_set);
610
611 if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
612 {
613 basic_block block;
614 int ix;
615
616 for (ix = 0; par->blocks.iterate (ix, ptr: &block); ix++)
617 {
618 for (gphi_iterator psi = gsi_start_phis (block);
619 !gsi_end_p (i: psi); gsi_next (i: &psi))
620 {
621 gphi *phi = psi.phi ();
622 use_operand_p use;
623 ssa_op_iter iter;
624
625 FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE)
626 {
627 tree var = USE_FROM_PTR (use);
628
629 if (TREE_CODE (var) != SSA_NAME)
630 continue;
631
632 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
633
634 if (gimple_nop_p (g: def_stmt))
635 continue;
636
637 basic_block def_bb = gimple_bb (g: def_stmt);
638
639 if (bitmap_bit_p (worker_single, def_bb->index))
640 {
641 if (!(*prop_set)[def_bb->index])
642 (*prop_set)[def_bb->index] = new propagation_set;
643
644 propagation_set *ws_prop = (*prop_set)[def_bb->index];
645
646 ws_prop->add (k: var);
647 }
648 }
649 }
650
651 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: block);
652 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
653 {
654 use_operand_p use;
655 ssa_op_iter iter;
656 gimple *stmt = gsi_stmt (i: gsi);
657
658 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
659 {
660 tree var = USE_FROM_PTR (use);
661
662 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
663
664 if (gimple_nop_p (g: def_stmt))
665 continue;
666
667 basic_block def_bb = gimple_bb (g: def_stmt);
668
669 if (bitmap_bit_p (worker_single, def_bb->index))
670 {
671 if (!(*prop_set)[def_bb->index])
672 (*prop_set)[def_bb->index] = new propagation_set;
673
674 propagation_set *ws_prop = (*prop_set)[def_bb->index];
675
676 ws_prop->add (k: var);
677 }
678 }
679 }
680 }
681 }
682}
683
684/* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a
685 statement. */
686
687static tree
688find_partitioned_var_uses_1 (tree *node, int *, void *data)
689{
690 walk_stmt_info *wi = (walk_stmt_info *) data;
691 hash_set<tree> *partitioned_var_uses = (hash_set<tree> *) wi->info;
692
693 if (!wi->is_lhs && VAR_P (*node))
694 partitioned_var_uses->add (k: *node);
695
696 return NULL_TREE;
697}
698
699static void
700find_partitioned_var_uses (parallel_g *par, unsigned outer_mask,
701 hash_set<tree> *partitioned_var_uses)
702{
703 unsigned mask = outer_mask | par->mask;
704
705 if (par->inner)
706 find_partitioned_var_uses (par: par->inner, outer_mask: mask, partitioned_var_uses);
707 if (par->next)
708 find_partitioned_var_uses (par: par->next, outer_mask, partitioned_var_uses);
709
710 if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
711 {
712 basic_block block;
713 int ix;
714
715 for (ix = 0; par->blocks.iterate (ix, ptr: &block); ix++)
716 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: block);
717 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
718 {
719 walk_stmt_info wi;
720 memset (s: &wi, c: 0, n: sizeof (wi));
721 wi.info = (void *) partitioned_var_uses;
722 walk_gimple_stmt (&gsi, NULL, find_partitioned_var_uses_1, &wi);
723 }
724 }
725}
726
727/* Gang-private variables (typically placed in a GPU's shared memory) do not
728 need to be processed by the worker-propagation mechanism. Populate the
729 GANG_PRIVATE_VARS set with any such variables found in the current
730 function. */
731
732static void
733find_gang_private_vars (hash_set<tree> *gang_private_vars)
734{
735 basic_block block;
736
737 FOR_EACH_BB_FN (block, cfun)
738 {
739 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: block);
740 !gsi_end_p (i: gsi);
741 gsi_next (i: &gsi))
742 {
743 gimple *stmt = gsi_stmt (i: gsi);
744
745 if (gimple_call_internal_p (gs: stmt, fn: IFN_UNIQUE))
746 {
747 enum ifn_unique_kind k = ((enum ifn_unique_kind)
748 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
749 if (k == IFN_UNIQUE_OACC_PRIVATE)
750 {
751 HOST_WIDE_INT level
752 = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
753 if (level != GOMP_DIM_GANG)
754 continue;
755 for (unsigned i = 3; i < gimple_call_num_args (gs: stmt); i++)
756 {
757 tree arg = gimple_call_arg (gs: stmt, index: i);
758 gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
759 tree decl = TREE_OPERAND (arg, 0);
760 gang_private_vars->add (k: decl);
761 }
762 }
763 }
764 }
765 }
766}
767
768static void
769find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask,
770 hash_set<tree> *partitioned_var_uses,
771 hash_set<tree> *gang_private_vars,
772 bitmap writes_gang_private,
773 vec<propagation_set *> *prop_set)
774{
775 unsigned mask = outer_mask | par->mask;
776
777 if (par->inner)
778 find_local_vars_to_propagate (par: par->inner, outer_mask: mask, partitioned_var_uses,
779 gang_private_vars, writes_gang_private,
780 prop_set);
781 if (par->next)
782 find_local_vars_to_propagate (par: par->next, outer_mask, partitioned_var_uses,
783 gang_private_vars, writes_gang_private,
784 prop_set);
785
786 if (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)))
787 {
788 basic_block block;
789 int ix;
790
791 for (ix = 0; par->blocks.iterate (ix, ptr: &block); ix++)
792 {
793 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: block);
794 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
795 {
796 gimple *stmt = gsi_stmt (i: gsi);
797 tree var;
798 unsigned i;
799
800 FOR_EACH_LOCAL_DECL (cfun, i, var)
801 {
802 if (!VAR_P (var)
803 || is_global_var (t: var)
804 || AGGREGATE_TYPE_P (TREE_TYPE (var))
805 || !partitioned_var_uses->contains (k: var))
806 continue;
807
808 if (stmt_may_clobber_ref_p (stmt, var))
809 {
810 if (dump_file)
811 {
812 fprintf (stream: dump_file, format: "bb %u: local variable may be "
813 "clobbered in %s mode: ", block->index,
814 mask_name (mask));
815 print_generic_expr (dump_file, var, TDF_SLIM);
816 fprintf (stream: dump_file, format: "\n");
817 }
818
819 if (gang_private_vars->contains (k: var))
820 {
821 /* If we write a gang-private variable, we want a
822 barrier at the end of the block. */
823 bitmap_set_bit (writes_gang_private, block->index);
824 continue;
825 }
826
827 if (!(*prop_set)[block->index])
828 (*prop_set)[block->index] = new propagation_set;
829
830 propagation_set *ws_prop
831 = (*prop_set)[block->index];
832
833 ws_prop->add (k: var);
834 }
835 }
836 }
837 }
838 }
839}
840
841/* Transform basic blocks FROM, TO (which may be the same block) into:
842 if (GOACC_single_start ())
843 BLOCK;
844 GOACC_barrier ();
845 \ | /
846 +----+
847 | | (new) predicate block
848 +----+--
849 \ | / \ | / |t \
850 +----+ +----+ +----+ |
851 | | | | ===> | | | f (old) from block
852 +----+ +----+ +----+ |
853 | t/ \f | /
854 +----+/
855 (split (split before | | skip block
856 at end) condition) +----+
857 t/ \f
858*/
859
860static void
861worker_single_simple (basic_block from, basic_block to,
862 hash_set<tree> *def_escapes_block)
863{
864 gimple *call, *cond;
865 tree lhs, decl;
866 basic_block skip_block;
867
868 gimple_stmt_iterator gsi = gsi_last_bb (bb: to);
869 if (EDGE_COUNT (to->succs) > 1)
870 {
871 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND);
872 gsi_prev (i: &gsi);
873 }
874 edge e = split_block (to, gsi_stmt (i: gsi));
875 skip_block = e->dest;
876
877 gimple_stmt_iterator start = gsi_after_labels (bb: from);
878
879 decl = builtin_decl_explicit (fncode: BUILT_IN_GOACC_SINGLE_START);
880 lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
881 call = gimple_build_call (decl, 0);
882 gimple_call_set_lhs (gs: call, lhs);
883 gsi_insert_before (&start, call, GSI_NEW_STMT);
884 update_stmt (s: call);
885
886 cond = gimple_build_cond (EQ_EXPR, lhs,
887 fold_convert_loc (UNKNOWN_LOCATION,
888 TREE_TYPE (lhs),
889 boolean_true_node),
890 NULL_TREE, NULL_TREE);
891 gsi_insert_after (&start, cond, GSI_NEW_STMT);
892 update_stmt (s: cond);
893
894 edge et = split_block (from, cond);
895 et->flags &= ~EDGE_FALLTHRU;
896 et->flags |= EDGE_TRUE_VALUE;
897 /* Make the active worker the more probable path so we prefer fallthrough
898 (letting the idle workers jump around more). */
899 et->probability = profile_probability::likely ();
900
901 edge ef = make_edge (from, skip_block, EDGE_FALSE_VALUE);
902 ef->probability = et->probability.invert ();
903
904 basic_block neutered = split_edge (ef);
905 gimple_stmt_iterator neut_gsi = gsi_last_bb (bb: neutered);
906
907 for (gsi = gsi_start_bb (bb: et->dest); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
908 {
909 gimple *stmt = gsi_stmt (i: gsi);
910 ssa_op_iter iter;
911 tree var;
912
913 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF)
914 {
915 if (def_escapes_block->contains (k: var))
916 {
917 gphi *join_phi = create_phi_node (NULL_TREE, skip_block);
918 create_new_def_for (var, join_phi,
919 gimple_phi_result_ptr (gs: join_phi));
920 add_phi_arg (join_phi, var, e, UNKNOWN_LOCATION);
921
922 tree neutered_def = copy_ssa_name (var, NULL);
923 /* We really want "don't care" or some value representing
924 undefined here, but optimizers will probably get rid of the
925 zero-assignments anyway. */
926 gassign *zero = gimple_build_assign (neutered_def,
927 build_zero_cst (TREE_TYPE (neutered_def)));
928
929 gsi_insert_after (&neut_gsi, zero, GSI_CONTINUE_LINKING);
930 update_stmt (s: zero);
931
932 add_phi_arg (join_phi, neutered_def, single_succ_edge (bb: neutered),
933 UNKNOWN_LOCATION);
934 update_stmt (s: join_phi);
935 }
936 }
937 }
938}
939
940static tree
941build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields)
942{
943 tree x = build_simple_mem_ref (receiver_decl);
944 tree field = *fields->get (k: var);
945 TREE_THIS_NOTRAP (x) = 1;
946 x = omp_build_component_ref (obj: x, field);
947 return x;
948}
949
950static tree
951build_sender_ref (tree var, tree sender_decl, field_map_t *fields)
952{
953 if (POINTER_TYPE_P (TREE_TYPE (sender_decl)))
954 sender_decl = build_simple_mem_ref (sender_decl);
955 tree field = *fields->get (k: var);
956 return omp_build_component_ref (obj: sender_decl, field);
957}
958
959static int
960sort_by_ssa_version_or_uid (const void *p1, const void *p2)
961{
962 const tree t1 = *(const tree *)p1;
963 const tree t2 = *(const tree *)p2;
964
965 if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) == SSA_NAME)
966 return SSA_NAME_VERSION (t1) - SSA_NAME_VERSION (t2);
967 else if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) != SSA_NAME)
968 return -1;
969 else if (TREE_CODE (t1) != SSA_NAME && TREE_CODE (t2) == SSA_NAME)
970 return 1;
971 else
972 return DECL_UID (t1) - DECL_UID (t2);
973}
974
975static int
976sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2)
977{
978 const tree t1 = *(const tree *)p1;
979 const tree t2 = *(const tree *)p2;
980 unsigned HOST_WIDE_INT s1 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1)));
981 unsigned HOST_WIDE_INT s2 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2)));
982 if (s1 != s2)
983 return s2 - s1;
984 else
985 return sort_by_ssa_version_or_uid (p1, p2);
986}
987
988static void
989worker_single_copy (basic_block from, basic_block to,
990 hash_set<tree> *def_escapes_block,
991 hash_set<tree> *worker_partitioned_uses,
992 tree record_type, record_field_map_t *record_field_map,
993 unsigned HOST_WIDE_INT placement,
994 bool isolate_broadcasts, bool has_gang_private_write)
995{
996 /* If we only have virtual defs, we'll have no record type, but we still want
997 to emit single_copy_start and (particularly) single_copy_end to act as
998 a vdef source on the neutered edge representing memory writes on the
999 non-neutered edge. */
1000 if (!record_type)
1001 record_type = char_type_node;
1002
1003 tree sender_decl
1004 = targetm.goacc.create_worker_broadcast_record (record_type, true,
1005 ".oacc_worker_o",
1006 placement);
1007 tree receiver_decl
1008 = targetm.goacc.create_worker_broadcast_record (record_type, false,
1009 ".oacc_worker_i",
1010 placement);
1011
1012 gimple_stmt_iterator gsi = gsi_last_bb (bb: to);
1013 if (EDGE_COUNT (to->succs) > 1)
1014 gsi_prev (i: &gsi);
1015 edge e = split_block (to, gsi_stmt (i: gsi));
1016 basic_block barrier_block = e->dest;
1017
1018 gimple_stmt_iterator start = gsi_after_labels (bb: from);
1019
1020 tree decl = builtin_decl_explicit (fncode: BUILT_IN_GOACC_SINGLE_COPY_START);
1021
1022 tree lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
1023
1024 gimple *call
1025 = gimple_build_call (decl, 1,
1026 POINTER_TYPE_P (TREE_TYPE (sender_decl))
1027 ? sender_decl : build_fold_addr_expr (sender_decl));
1028 gimple_call_set_lhs (gs: call, lhs);
1029 gsi_insert_before (&start, call, GSI_NEW_STMT);
1030 update_stmt (s: call);
1031
1032 /* The shared-memory range for this block overflowed. Add a barrier before
1033 the GOACC_single_copy_start call. */
1034 if (isolate_broadcasts)
1035 {
1036 decl = builtin_decl_explicit (fncode: BUILT_IN_GOACC_BARRIER);
1037 gimple *acc_bar = gimple_build_call (decl, 0);
1038 gsi_insert_before (&start, acc_bar, GSI_SAME_STMT);
1039 }
1040
1041 tree conv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
1042
1043 gimple *conv = gimple_build_assign (conv_tmp,
1044 fold_convert (TREE_TYPE (receiver_decl),
1045 lhs));
1046 update_stmt (s: conv);
1047 gsi_insert_after (&start, conv, GSI_NEW_STMT);
1048 gimple *asgn = gimple_build_assign (receiver_decl, conv_tmp);
1049 gsi_insert_after (&start, asgn, GSI_NEW_STMT);
1050 update_stmt (s: asgn);
1051
1052 tree zero_ptr = build_int_cst (TREE_TYPE (receiver_decl), 0);
1053
1054 tree recv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
1055 asgn = gimple_build_assign (recv_tmp, receiver_decl);
1056 gsi_insert_after (&start, asgn, GSI_NEW_STMT);
1057 update_stmt (s: asgn);
1058
1059 gimple *cond = gimple_build_cond (EQ_EXPR, recv_tmp, zero_ptr, NULL_TREE,
1060 NULL_TREE);
1061 update_stmt (s: cond);
1062
1063 gsi_insert_after (&start, cond, GSI_NEW_STMT);
1064
1065 edge et = split_block (from, cond);
1066 et->flags &= ~EDGE_FALLTHRU;
1067 et->flags |= EDGE_TRUE_VALUE;
1068 /* Make the active worker the more probable path so we prefer fallthrough
1069 (letting the idle workers jump around more). */
1070 et->probability = profile_probability::likely ();
1071
1072 basic_block body = et->dest;
1073
1074 edge ef = make_edge (from, barrier_block, EDGE_FALSE_VALUE);
1075 ef->probability = et->probability.invert ();
1076
1077 gimple_stmt_iterator bar_gsi = gsi_start_bb (bb: barrier_block);
1078 cond = gimple_build_cond (NE_EXPR, recv_tmp, zero_ptr, NULL_TREE, NULL_TREE);
1079
1080 if (record_type != char_type_node || has_gang_private_write)
1081 {
1082 decl = builtin_decl_explicit (fncode: BUILT_IN_GOACC_BARRIER);
1083 gimple *acc_bar = gimple_build_call (decl, 0);
1084
1085 gsi_insert_before (&bar_gsi, acc_bar, GSI_NEW_STMT);
1086 gsi_insert_after (&bar_gsi, cond, GSI_NEW_STMT);
1087 }
1088 else
1089 gsi_insert_before (&bar_gsi, cond, GSI_NEW_STMT);
1090
1091 edge et2 = split_block (barrier_block, cond);
1092 et2->flags &= ~EDGE_FALLTHRU;
1093 et2->flags |= EDGE_TRUE_VALUE;
1094 et2->probability = profile_probability::unlikely ();
1095
1096 basic_block exit_block = et2->dest;
1097
1098 basic_block copyout_block = split_edge (et2);
1099 edge ef2 = make_edge (barrier_block, exit_block, EDGE_FALSE_VALUE);
1100 ef2->probability = et2->probability.invert ();
1101
1102 gimple_stmt_iterator copyout_gsi = gsi_start_bb (bb: copyout_block);
1103
1104 edge copyout_to_exit = single_succ_edge (bb: copyout_block);
1105
1106 gimple_seq sender_seq = NULL;
1107
1108 /* Make sure we iterate over definitions in a stable order. */
1109 auto_vec<tree> escape_vec (def_escapes_block->elements ());
1110 for (hash_set<tree>::iterator it = def_escapes_block->begin ();
1111 it != def_escapes_block->end (); ++it)
1112 escape_vec.quick_push (obj: *it);
1113 escape_vec.qsort (sort_by_ssa_version_or_uid);
1114
1115 for (unsigned i = 0; i < escape_vec.length (); i++)
1116 {
1117 tree var = escape_vec[i];
1118
1119 if (TREE_CODE (var) == SSA_NAME && SSA_NAME_IS_VIRTUAL_OPERAND (var))
1120 continue;
1121
1122 tree barrier_def = 0;
1123
1124 if (TREE_CODE (var) == SSA_NAME)
1125 {
1126 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
1127
1128 if (gimple_nop_p (g: def_stmt))
1129 continue;
1130
1131 /* The barrier phi takes one result from the actual work of the
1132 block we're neutering, and the other result is constant zero of
1133 the same type. */
1134
1135 gphi *barrier_phi = create_phi_node (NULL_TREE, barrier_block);
1136 barrier_def = create_new_def_for (var, barrier_phi,
1137 gimple_phi_result_ptr (gs: barrier_phi));
1138
1139 add_phi_arg (barrier_phi, var, e, UNKNOWN_LOCATION);
1140 add_phi_arg (barrier_phi, build_zero_cst (TREE_TYPE (var)), ef,
1141 UNKNOWN_LOCATION);
1142
1143 update_stmt (s: barrier_phi);
1144 }
1145 else
1146 gcc_assert (VAR_P (var));
1147
1148 /* If we had no record type, we will have no fields map. */
1149 field_map_t *fields = record_field_map->get (k: record_type);
1150
1151 if (worker_partitioned_uses->contains (k: var)
1152 && fields
1153 && fields->get (k: var))
1154 {
1155 tree neutered_def = make_ssa_name (TREE_TYPE (var));
1156
1157 /* Receive definition from shared memory block. */
1158
1159 tree receiver_ref = build_receiver_ref (var, receiver_decl, fields);
1160 gassign *recv = gimple_build_assign (neutered_def,
1161 receiver_ref);
1162 gsi_insert_after (&copyout_gsi, recv, GSI_CONTINUE_LINKING);
1163 update_stmt (s: recv);
1164
1165 if (VAR_P (var))
1166 {
1167 /* If it's a VAR_DECL, we only copied to an SSA temporary. Copy
1168 to the final location now. */
1169 gassign *asgn = gimple_build_assign (var, neutered_def);
1170 gsi_insert_after (&copyout_gsi, asgn, GSI_CONTINUE_LINKING);
1171 update_stmt (s: asgn);
1172 }
1173 else
1174 {
1175 /* If it's an SSA name, create a new phi at the join node to
1176 represent either the output from the active worker (the
1177 barrier) or the inactive workers (the copyout block). */
1178 gphi *join_phi = create_phi_node (NULL_TREE, exit_block);
1179 create_new_def_for (barrier_def, join_phi,
1180 gimple_phi_result_ptr (gs: join_phi));
1181 add_phi_arg (join_phi, barrier_def, ef2, UNKNOWN_LOCATION);
1182 add_phi_arg (join_phi, neutered_def, copyout_to_exit,
1183 UNKNOWN_LOCATION);
1184 update_stmt (s: join_phi);
1185 }
1186
1187 /* Send definition to shared memory block. */
1188
1189 tree sender_ref = build_sender_ref (var, sender_decl, fields);
1190
1191 if (TREE_CODE (var) == SSA_NAME)
1192 {
1193 gassign *send = gimple_build_assign (sender_ref, var);
1194 gimple_seq_add_stmt (&sender_seq, send);
1195 update_stmt (s: send);
1196 }
1197 else if (VAR_P (var))
1198 {
1199 tree tmp = make_ssa_name (TREE_TYPE (var));
1200 gassign *send = gimple_build_assign (tmp, var);
1201 gimple_seq_add_stmt (&sender_seq, send);
1202 update_stmt (s: send);
1203 send = gimple_build_assign (sender_ref, tmp);
1204 gimple_seq_add_stmt (&sender_seq, send);
1205 update_stmt (s: send);
1206 }
1207 else
1208 gcc_unreachable ();
1209 }
1210 }
1211
1212 /* The shared-memory range for this block overflowed. Add a barrier at the
1213 end. */
1214 if (isolate_broadcasts)
1215 {
1216 gsi = gsi_start_bb (bb: exit_block);
1217 decl = builtin_decl_explicit (fncode: BUILT_IN_GOACC_BARRIER);
1218 gimple *acc_bar = gimple_build_call (decl, 0);
1219 gsi_insert_before (&gsi, acc_bar, GSI_SAME_STMT);
1220 }
1221
1222 /* It's possible for the ET->DEST block (the work done by the active thread)
1223 to finish with a control-flow insn, e.g. a UNIQUE function call. Split
1224 the block and add SENDER_SEQ in the latter part to avoid having control
1225 flow in the middle of a BB. */
1226
1227 decl = builtin_decl_explicit (fncode: BUILT_IN_GOACC_SINGLE_COPY_END);
1228 call = gimple_build_call (decl, 1,
1229 POINTER_TYPE_P (TREE_TYPE (sender_decl))
1230 ? sender_decl
1231 : build_fold_addr_expr (sender_decl));
1232 gimple_seq_add_stmt (&sender_seq, call);
1233
1234 gsi = gsi_last_bb (bb: body);
1235 gimple *last = gsi_stmt (i: gsi);
1236 basic_block sender_block = split_block (body, last)->dest;
1237 gsi = gsi_last_bb (bb: sender_block);
1238 gsi_insert_seq_after (&gsi, sender_seq, GSI_CONTINUE_LINKING);
1239}
1240
1241typedef hash_map<basic_block, std::pair<unsigned HOST_WIDE_INT, bool> >
1242 blk_offset_map_t;
1243
1244static void
1245neuter_worker_single (parallel_g *par, unsigned outer_mask,
1246 bitmap worker_single, bitmap vector_single,
1247 vec<propagation_set *> *prop_set,
1248 hash_set<tree> *partitioned_var_uses,
1249 record_field_map_t *record_field_map,
1250 blk_offset_map_t *blk_offset_map,
1251 bitmap writes_gang_private)
1252{
1253 unsigned mask = outer_mask | par->mask;
1254
1255 if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
1256 {
1257 basic_block block;
1258
1259 for (unsigned i = 0; par->blocks.iterate (ix: i, ptr: &block); i++)
1260 {
1261 bool has_defs = false;
1262 hash_set<tree> def_escapes_block;
1263 hash_set<tree> worker_partitioned_uses;
1264 unsigned j;
1265 tree var;
1266
1267 FOR_EACH_SSA_NAME (j, var, cfun)
1268 {
1269 if (SSA_NAME_IS_VIRTUAL_OPERAND (var))
1270 {
1271 has_defs = true;
1272 continue;
1273 }
1274
1275 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
1276
1277 if (gimple_nop_p (g: def_stmt))
1278 continue;
1279
1280 if (gimple_bb (g: def_stmt)->index != block->index)
1281 continue;
1282
1283 gimple *use_stmt;
1284 imm_use_iterator use_iter;
1285 bool uses_outside_block = false;
1286 bool worker_partitioned_use = false;
1287
1288 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, var)
1289 {
1290 int blocknum = gimple_bb (g: use_stmt)->index;
1291
1292 /* Don't propagate SSA names that are only used in the
1293 current block, unless the usage is in a phi node: that
1294 means the name left the block, then came back in at the
1295 top. */
1296 if (blocknum != block->index
1297 || gimple_code (g: use_stmt) == GIMPLE_PHI)
1298 uses_outside_block = true;
1299 if (!bitmap_bit_p (worker_single, blocknum))
1300 worker_partitioned_use = true;
1301 }
1302
1303 if (uses_outside_block)
1304 def_escapes_block.add (k: var);
1305
1306 if (worker_partitioned_use)
1307 {
1308 worker_partitioned_uses.add (k: var);
1309 has_defs = true;
1310 }
1311 }
1312
1313 propagation_set *ws_prop = (*prop_set)[block->index];
1314
1315 if (ws_prop)
1316 {
1317 for (propagation_set::iterator it = ws_prop->begin ();
1318 it != ws_prop->end ();
1319 ++it)
1320 {
1321 tree var = *it;
1322 if (TREE_CODE (var) == VAR_DECL)
1323 {
1324 def_escapes_block.add (k: var);
1325 if (partitioned_var_uses->contains (k: var))
1326 {
1327 worker_partitioned_uses.add (k: var);
1328 has_defs = true;
1329 }
1330 }
1331 }
1332
1333 delete ws_prop;
1334 (*prop_set)[block->index] = 0;
1335 }
1336
1337 bool only_marker_fns = true;
1338 bool join_block = false;
1339
1340 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: block);
1341 !gsi_end_p (i: gsi);
1342 gsi_next (i: &gsi))
1343 {
1344 gimple *stmt = gsi_stmt (i: gsi);
1345 if (gimple_code (g: stmt) == GIMPLE_CALL
1346 && gimple_call_internal_p (gs: stmt, fn: IFN_UNIQUE))
1347 {
1348 enum ifn_unique_kind k = ((enum ifn_unique_kind)
1349 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1350 if (k != IFN_UNIQUE_OACC_PRIVATE
1351 && k != IFN_UNIQUE_OACC_JOIN
1352 && k != IFN_UNIQUE_OACC_FORK
1353 && k != IFN_UNIQUE_OACC_HEAD_MARK
1354 && k != IFN_UNIQUE_OACC_TAIL_MARK)
1355 only_marker_fns = false;
1356 else if (k == IFN_UNIQUE_OACC_JOIN)
1357 /* The JOIN marker is special in that it *cannot* be
1358 predicated for worker zero, because it may be lowered
1359 to a barrier instruction and all workers must typically
1360 execute that barrier. We shouldn't be doing any
1361 broadcasts from the join block anyway. */
1362 join_block = true;
1363 }
1364 else if (gimple_code (g: stmt) == GIMPLE_CALL
1365 && gimple_call_internal_p (gs: stmt, fn: IFN_GOACC_LOOP))
1366 /* Empty. */;
1367 else if (gimple_nop_p (g: stmt))
1368 /* Empty. */;
1369 else
1370 only_marker_fns = false;
1371 }
1372
1373 /* We can skip predicating this block for worker zero if the only
1374 thing it contains is marker functions that will be removed in the
1375 oaccdevlow pass anyway.
1376 Don't do this if the block has (any) phi nodes, because those
1377 might define SSA names that need broadcasting.
1378 TODO: We might be able to skip transforming blocks that only
1379 contain some other trivial statements too. */
1380 if (only_marker_fns && !phi_nodes (bb: block))
1381 continue;
1382
1383 gcc_assert (!join_block);
1384
1385 if (has_defs)
1386 {
1387 tree record_type = (tree) block->aux;
1388 std::pair<unsigned HOST_WIDE_INT, bool> *off_rngalloc
1389 = blk_offset_map->get (k: block);
1390 gcc_assert (!record_type || off_rngalloc);
1391 unsigned HOST_WIDE_INT offset
1392 = off_rngalloc ? off_rngalloc->first : 0;
1393 bool range_allocated
1394 = off_rngalloc ? off_rngalloc->second : true;
1395 bool has_gang_private_write
1396 = bitmap_bit_p (writes_gang_private, block->index);
1397 worker_single_copy (from: block, to: block, def_escapes_block: &def_escapes_block,
1398 worker_partitioned_uses: &worker_partitioned_uses, record_type,
1399 record_field_map,
1400 placement: offset, isolate_broadcasts: !range_allocated,
1401 has_gang_private_write);
1402 }
1403 else
1404 worker_single_simple (from: block, to: block, def_escapes_block: &def_escapes_block);
1405 }
1406 }
1407
1408 if ((outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
1409 {
1410 basic_block block;
1411
1412 for (unsigned i = 0; par->blocks.iterate (ix: i, ptr: &block); i++)
1413 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: block);
1414 !gsi_end_p (i: gsi);
1415 gsi_next (i: &gsi))
1416 {
1417 gimple *stmt = gsi_stmt (i: gsi);
1418
1419 if (gimple_code (g: stmt) == GIMPLE_CALL
1420 && !gimple_call_internal_p (gs: stmt)
1421 && !omp_sese_active_worker_call (call: as_a <gcall *> (p: stmt)))
1422 {
1423 /* If we have an OpenACC routine call in worker-single mode,
1424 place barriers before and afterwards to prevent
1425 clobbering re-used shared memory regions (as are used
1426 for AMDGCN at present, for example). */
1427 tree decl = builtin_decl_explicit (fncode: BUILT_IN_GOACC_BARRIER);
1428 gsi_insert_before (&gsi, gimple_build_call (decl, 0),
1429 GSI_SAME_STMT);
1430 gsi_insert_after (&gsi, gimple_build_call (decl, 0),
1431 GSI_NEW_STMT);
1432 }
1433 }
1434 }
1435
1436 if (par->inner)
1437 neuter_worker_single (par: par->inner, outer_mask: mask, worker_single, vector_single,
1438 prop_set, partitioned_var_uses, record_field_map,
1439 blk_offset_map, writes_gang_private);
1440 if (par->next)
1441 neuter_worker_single (par: par->next, outer_mask, worker_single, vector_single,
1442 prop_set, partitioned_var_uses, record_field_map,
1443 blk_offset_map, writes_gang_private);
1444}
1445
1446static void
1447dfs_broadcast_reachable_1 (basic_block bb, sbitmap reachable)
1448{
1449 if (bb->flags & BB_VISITED)
1450 return;
1451
1452 bb->flags |= BB_VISITED;
1453
1454 if (bb->succs)
1455 {
1456 edge e;
1457 edge_iterator ei;
1458 FOR_EACH_EDGE (e, ei, bb->succs)
1459 {
1460 basic_block dest = e->dest;
1461 if (dest->aux)
1462 bitmap_set_bit (map: reachable, bitno: dest->index);
1463 else
1464 dfs_broadcast_reachable_1 (bb: dest, reachable);
1465 }
1466 }
1467}
1468
1469typedef std::pair<int, tree> idx_decl_pair_t;
1470
1471typedef auto_vec<splay_tree> used_range_vec_t;
1472
1473static int
1474sort_size_descending (const void *a, const void *b)
1475{
1476 const idx_decl_pair_t *pa = (const idx_decl_pair_t *) a;
1477 const idx_decl_pair_t *pb = (const idx_decl_pair_t *) b;
1478 unsigned HOST_WIDE_INT asize = tree_to_uhwi (TYPE_SIZE_UNIT (pa->second));
1479 unsigned HOST_WIDE_INT bsize = tree_to_uhwi (TYPE_SIZE_UNIT (pb->second));
1480 return bsize - asize;
1481}
1482
1483class addr_range
1484{
1485public:
1486 addr_range (unsigned HOST_WIDE_INT addr_lo, unsigned HOST_WIDE_INT addr_hi)
1487 : lo (addr_lo), hi (addr_hi)
1488 { }
1489 addr_range (const addr_range &ar) : lo (ar.lo), hi (ar.hi)
1490 { }
1491 addr_range () : lo (0), hi (0)
1492 { }
1493
1494 bool invalid () { return lo == 0 && hi == 0; }
1495
1496 unsigned HOST_WIDE_INT lo;
1497 unsigned HOST_WIDE_INT hi;
1498};
1499
1500static int
1501splay_tree_compare_addr_range (splay_tree_key a, splay_tree_key b)
1502{
1503 addr_range *ar = (addr_range *) a;
1504 addr_range *br = (addr_range *) b;
1505 if (ar->lo == br->lo && ar->hi == br->hi)
1506 return 0;
1507 if (ar->hi <= br->lo)
1508 return -1;
1509 else if (ar->lo >= br->hi)
1510 return 1;
1511 return 0;
1512}
1513
1514static void
1515splay_tree_free_key (splay_tree_key k)
1516{
1517 addr_range *ar = (addr_range *) k;
1518 delete ar;
1519}
1520
1521static addr_range
1522first_fit_range (splay_tree s, unsigned HOST_WIDE_INT size,
1523 unsigned HOST_WIDE_INT align, addr_range *bounds)
1524{
1525 splay_tree_node min = splay_tree_min (s);
1526 if (min)
1527 {
1528 splay_tree_node next;
1529 while ((next = splay_tree_successor (s, min->key)))
1530 {
1531 unsigned HOST_WIDE_INT lo = ((addr_range *) min->key)->hi;
1532 unsigned HOST_WIDE_INT hi = ((addr_range *) next->key)->lo;
1533 unsigned HOST_WIDE_INT base = (lo + align - 1) & ~(align - 1);
1534 if (base + size <= hi)
1535 return addr_range (base, base + size);
1536 min = next;
1537 }
1538
1539 unsigned HOST_WIDE_INT base = ((addr_range *)min->key)->hi;
1540 base = (base + align - 1) & ~(align - 1);
1541 if (base + size <= bounds->hi)
1542 return addr_range (base, base + size);
1543 else
1544 return addr_range ();
1545 }
1546 else
1547 {
1548 unsigned HOST_WIDE_INT lo = bounds->lo;
1549 lo = (lo + align - 1) & ~(align - 1);
1550 if (lo + size <= bounds->hi)
1551 return addr_range (lo, lo + size);
1552 else
1553 return addr_range ();
1554 }
1555}
1556
1557static int
1558merge_ranges_1 (splay_tree_node n, void *ptr)
1559{
1560 splay_tree accum = (splay_tree) ptr;
1561 addr_range ar = *(addr_range *) n->key;
1562
1563 splay_tree_node old = splay_tree_lookup (accum, n->key);
1564
1565 /* We might have an overlap. Create a new range covering the
1566 overlapping parts. */
1567 if (old)
1568 {
1569 addr_range *old_ar = (addr_range *) old->key;
1570 ar.lo = MIN (old_ar->lo, ar.lo);
1571 ar.hi = MAX (old_ar->hi, ar.hi);
1572 splay_tree_remove (accum, old->key);
1573 }
1574
1575 addr_range *new_ar = new addr_range (ar);
1576
1577 splay_tree_insert (accum, (splay_tree_key) new_ar, n->value);
1578
1579 return 0;
1580}
1581
1582static void
1583merge_ranges (splay_tree accum, splay_tree sp)
1584{
1585 splay_tree_foreach (sp, merge_ranges_1, (void *) accum);
1586}
1587
1588static void
1589oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo,
1590 unsigned HOST_WIDE_INT bounds_hi)
1591{
1592 bb_stmt_map_t bb_stmt_map;
1593 auto_bitmap worker_single, vector_single;
1594
1595 omp_sese_split_blocks (map: &bb_stmt_map);
1596
1597 if (dump_file)
1598 {
1599 fprintf (stream: dump_file, format: "\n\nAfter splitting:\n\n");
1600 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1601 }
1602
1603 unsigned mask = 0;
1604
1605 /* If this is a routine, calculate MASK as if the outer levels are already
1606 partitioned. */
1607 {
1608 tree attr = oacc_get_fn_attrib (fn: current_function_decl);
1609 tree dims = TREE_VALUE (attr);
1610 unsigned ix;
1611 for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
1612 {
1613 tree allowed = TREE_PURPOSE (dims);
1614 if (allowed && integer_zerop (allowed))
1615 mask |= GOMP_DIM_MASK (ix);
1616 }
1617 }
1618
1619 parallel_g *par = omp_sese_discover_pars (map: &bb_stmt_map);
1620 populate_single_mode_bitmaps (par, worker_single, vector_single, outer_mask: mask, depth: 0);
1621
1622 basic_block bb;
1623 FOR_ALL_BB_FN (bb, cfun)
1624 bb->aux = NULL;
1625
1626 vec<propagation_set *> prop_set (vNULL);
1627 prop_set.safe_grow_cleared (last_basic_block_for_fn (cfun), exact: true);
1628
1629 find_ssa_names_to_propagate (par, outer_mask: mask, worker_single, vector_single,
1630 prop_set: &prop_set);
1631
1632 hash_set<tree> partitioned_var_uses;
1633 hash_set<tree> gang_private_vars;
1634 auto_bitmap writes_gang_private;
1635
1636 find_gang_private_vars (gang_private_vars: &gang_private_vars);
1637 find_partitioned_var_uses (par, outer_mask: mask, partitioned_var_uses: &partitioned_var_uses);
1638 find_local_vars_to_propagate (par, outer_mask: mask, partitioned_var_uses: &partitioned_var_uses,
1639 gang_private_vars: &gang_private_vars, writes_gang_private,
1640 prop_set: &prop_set);
1641
1642 record_field_map_t record_field_map;
1643
1644 FOR_ALL_BB_FN (bb, cfun)
1645 {
1646 propagation_set *ws_prop = prop_set[bb->index];
1647 if (ws_prop)
1648 {
1649 tree record_type = lang_hooks.types.make_type (RECORD_TYPE);
1650 tree name = create_tmp_var_name (".oacc_ws_data_s");
1651 name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, record_type);
1652 DECL_ARTIFICIAL (name) = 1;
1653 DECL_NAMELESS (name) = 1;
1654 TYPE_NAME (record_type) = name;
1655 TYPE_ARTIFICIAL (record_type) = 1;
1656
1657 auto_vec<tree> field_vec (ws_prop->elements ());
1658 for (hash_set<tree>::iterator it = ws_prop->begin ();
1659 it != ws_prop->end (); ++it)
1660 field_vec.quick_push (obj: *it);
1661
1662 field_vec.qsort (sort_by_size_then_ssa_version_or_uid);
1663
1664 bool existed;
1665 field_map_t *fields
1666 = &record_field_map.get_or_insert (k: record_type, existed: &existed);
1667 gcc_checking_assert (!existed);
1668
1669 /* Insert var fields in reverse order, so the last inserted element
1670 is the first in the structure. */
1671 for (int i = field_vec.length () - 1; i >= 0; i--)
1672 install_var_field (var: field_vec[i], record_type, fields);
1673
1674 layout_type (record_type);
1675
1676 bb->aux = (tree) record_type;
1677 }
1678 }
1679
1680 sbitmap *reachable
1681 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
1682 last_basic_block_for_fn (cfun));
1683
1684 bitmap_vector_clear (reachable, last_basic_block_for_fn (cfun));
1685
1686 auto_vec<std::pair<int, tree> > priority;
1687
1688 FOR_ALL_BB_FN (bb, cfun)
1689 {
1690 if (bb->aux)
1691 {
1692 tree record_type = (tree) bb->aux;
1693
1694 basic_block bb2;
1695 FOR_ALL_BB_FN (bb2, cfun)
1696 bb2->flags &= ~BB_VISITED;
1697
1698 priority.safe_push (obj: std::make_pair (x&: bb->index, y&: record_type));
1699 dfs_broadcast_reachable_1 (bb, reachable: reachable[bb->index]);
1700 }
1701 }
1702
1703 sbitmap *inverted
1704 = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
1705 last_basic_block_for_fn (cfun));
1706
1707 bitmap_vector_clear (inverted, last_basic_block_for_fn (cfun));
1708
1709 for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1710 {
1711 sbitmap_iterator bi;
1712 unsigned int j;
1713 EXECUTE_IF_SET_IN_BITMAP (reachable[i], 0, j, bi)
1714 bitmap_set_bit (map: inverted[j], bitno: i);
1715 }
1716
1717 for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1718 bitmap_ior (reachable[i], reachable[i], inverted[i]);
1719
1720 sbitmap_vector_free (vec: inverted);
1721
1722 used_range_vec_t used_ranges;
1723
1724 used_ranges.safe_grow_cleared (last_basic_block_for_fn (cfun));
1725
1726 blk_offset_map_t blk_offset_map;
1727
1728 addr_range worker_shm_bounds (bounds_lo, bounds_hi);
1729
1730 priority.qsort (sort_size_descending);
1731 for (unsigned int i = 0; i < priority.length (); i++)
1732 {
1733 idx_decl_pair_t p = priority[i];
1734 int blkno = p.first;
1735 tree record_type = p.second;
1736 HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (record_type));
1737 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (record_type);
1738
1739 splay_tree conflicts = splay_tree_new (splay_tree_compare_addr_range,
1740 splay_tree_free_key, NULL);
1741
1742 if (!used_ranges[blkno])
1743 used_ranges[blkno] = splay_tree_new (splay_tree_compare_addr_range,
1744 splay_tree_free_key, NULL);
1745 else
1746 merge_ranges (accum: conflicts, sp: used_ranges[blkno]);
1747
1748 sbitmap_iterator bi;
1749 unsigned int j;
1750 EXECUTE_IF_SET_IN_BITMAP (reachable[blkno], 0, j, bi)
1751 if (used_ranges[j])
1752 merge_ranges (accum: conflicts, sp: used_ranges[j]);
1753
1754 addr_range ar
1755 = first_fit_range (s: conflicts, size, align, bounds: &worker_shm_bounds);
1756
1757 splay_tree_delete (conflicts);
1758
1759 if (ar.invalid ())
1760 {
1761 unsigned HOST_WIDE_INT base
1762 = (bounds_lo + align - 1) & ~(align - 1);
1763 if (base + size > bounds_hi)
1764 error_at (UNKNOWN_LOCATION, "shared-memory region overflow");
1765 std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
1766 = std::make_pair (x&: base, y: false);
1767 blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), v: base_inrng);
1768 }
1769 else
1770 {
1771 splay_tree_node old = splay_tree_lookup (used_ranges[blkno],
1772 (splay_tree_key) &ar);
1773 if (old)
1774 {
1775 fprintf (stderr, format: "trying to map [%d..%d] but [%d..%d] is "
1776 "already mapped in block %d\n", (int) ar.lo,
1777 (int) ar.hi, (int) ((addr_range *) old->key)->lo,
1778 (int) ((addr_range *) old->key)->hi, blkno);
1779 abort ();
1780 }
1781
1782 addr_range *arp = new addr_range (ar);
1783 splay_tree_insert (used_ranges[blkno], (splay_tree_key) arp,
1784 (splay_tree_value) blkno);
1785 std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
1786 = std::make_pair (x&: ar.lo, y: true);
1787 blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), v: base_inrng);
1788 }
1789 }
1790
1791 sbitmap_vector_free (vec: reachable);
1792
1793 neuter_worker_single (par, outer_mask: mask, worker_single, vector_single, prop_set: &prop_set,
1794 partitioned_var_uses: &partitioned_var_uses, record_field_map: &record_field_map,
1795 blk_offset_map: &blk_offset_map, writes_gang_private);
1796
1797 record_field_map.empty ();
1798
1799 /* These are supposed to have been 'delete'd by 'neuter_worker_single'. */
1800 for (auto it : prop_set)
1801 gcc_checking_assert (!it);
1802 prop_set.release ();
1803
1804 delete par;
1805
1806 /* This doesn't seem to make a difference. */
1807 loops_state_clear (flags: LOOP_CLOSED_SSA);
1808
1809 /* Neutering worker-single neutered blocks will invalidate dominance info.
1810 It may be possible to incrementally update just the affected blocks, but
1811 obliterate everything for now. */
1812 free_dominance_info (CDI_DOMINATORS);
1813 free_dominance_info (CDI_POST_DOMINATORS);
1814
1815 if (dump_file)
1816 {
1817 fprintf (stream: dump_file, format: "\n\nAfter neutering:\n\n");
1818 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1819 }
1820}
1821
1822static int
1823execute_omp_oacc_neuter_broadcast ()
1824{
1825 unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX];
1826 unsigned HOST_WIDE_INT private_size[GOMP_DIM_MAX];
1827
1828 for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
1829 {
1830 reduction_size[i] = 0;
1831 private_size[i] = 0;
1832 }
1833
1834 /* Calculate shared memory size required for reduction variables and
1835 gang-private memory for this offloaded function. */
1836 basic_block bb;
1837 FOR_ALL_BB_FN (bb, cfun)
1838 {
1839 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1840 !gsi_end_p (i: gsi);
1841 gsi_next (i: &gsi))
1842 {
1843 gimple *stmt = gsi_stmt (i: gsi);
1844 if (!is_gimple_call (gs: stmt))
1845 continue;
1846 gcall *call = as_a <gcall *> (p: stmt);
1847 if (!gimple_call_internal_p (gs: call))
1848 continue;
1849 enum internal_fn ifn_code = gimple_call_internal_fn (gs: call);
1850 switch (ifn_code)
1851 {
1852 default: break;
1853 case IFN_GOACC_REDUCTION:
1854 if (integer_minus_onep (gimple_call_arg (gs: call, index: 3)))
1855 continue;
1856 else
1857 {
1858 unsigned code = TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1859 /* Only count reduction variables once: the choice to pick
1860 the setup call is fairly arbitrary. */
1861 if (code == IFN_GOACC_REDUCTION_SETUP)
1862 {
1863 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
1864 tree var = gimple_call_arg (gs: call, index: 2);
1865 tree offset = gimple_call_arg (gs: call, index: 5);
1866 tree var_type = TREE_TYPE (var);
1867 unsigned HOST_WIDE_INT limit
1868 = (tree_to_uhwi (offset)
1869 + tree_to_uhwi (TYPE_SIZE_UNIT (var_type)));
1870 reduction_size[level]
1871 = MAX (reduction_size[level], limit);
1872 }
1873 }
1874 break;
1875 case IFN_UNIQUE:
1876 {
1877 enum ifn_unique_kind kind
1878 = ((enum ifn_unique_kind)
1879 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1880
1881 if (kind == IFN_UNIQUE_OACC_PRIVATE)
1882 {
1883 HOST_WIDE_INT level
1884 = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1885 if (level == -1)
1886 break;
1887 for (unsigned i = 3;
1888 i < gimple_call_num_args (gs: call);
1889 i++)
1890 {
1891 tree arg = gimple_call_arg (gs: call, index: i);
1892 gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
1893 tree decl = TREE_OPERAND (arg, 0);
1894 unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (decl);
1895 private_size[level] = ((private_size[level] + align - 1)
1896 & ~(align - 1));
1897 unsigned HOST_WIDE_INT decl_size
1898 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (decl)));
1899 private_size[level] += decl_size;
1900 }
1901 }
1902 }
1903 break;
1904 }
1905 }
1906 }
1907
1908 int dims[GOMP_DIM_MAX];
1909 for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
1910 dims[i] = oacc_get_fn_dim_size (fn: current_function_decl, axis: i);
1911
1912 /* Find bounds of shared-memory buffer space we can use. */
1913 unsigned HOST_WIDE_INT bounds_lo = 0, bounds_hi = 0;
1914 if (targetm.goacc.shared_mem_layout)
1915 targetm.goacc.shared_mem_layout (&bounds_lo, &bounds_hi, dims,
1916 private_size, reduction_size);
1917
1918 /* Perform worker partitioning unless we know 'num_workers(1)'. */
1919 if (dims[GOMP_DIM_WORKER] != 1)
1920 oacc_do_neutering (bounds_lo, bounds_hi);
1921
1922 return 0;
1923}
1924
1925namespace {
1926
1927const pass_data pass_data_omp_oacc_neuter_broadcast =
1928{
1929 .type: GIMPLE_PASS, /* type */
1930 .name: "omp_oacc_neuter_broadcast", /* name */
1931 .optinfo_flags: OPTGROUP_OMP, /* optinfo_flags */
1932 .tv_id: TV_NONE, /* tv_id */
1933 PROP_cfg, /* properties_required */
1934 .properties_provided: 0, /* properties_provided */
1935 .properties_destroyed: 0, /* properties_destroyed */
1936 .todo_flags_start: 0, /* todo_flags_start */
1937 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1938};
1939
1940class pass_omp_oacc_neuter_broadcast : public gimple_opt_pass
1941{
1942public:
1943 pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
1944 : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast, ctxt)
1945 {}
1946
1947 /* opt_pass methods: */
1948 bool gate (function *fun) final override
1949 {
1950 if (!flag_openacc)
1951 return false;
1952
1953 if (!targetm.goacc.create_worker_broadcast_record)
1954 return false;
1955
1956 /* Only relevant for OpenACC offloaded functions. */
1957 tree attr = oacc_get_fn_attrib (fn: fun->decl);
1958 if (!attr)
1959 return false;
1960
1961 return true;
1962 }
1963
1964 unsigned int execute (function *) final override
1965 {
1966 return execute_omp_oacc_neuter_broadcast ();
1967 }
1968
1969}; // class pass_omp_oacc_neuter_broadcast
1970
1971} // anon namespace
1972
1973gimple_opt_pass *
1974make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
1975{
1976 return new pass_omp_oacc_neuter_broadcast (ctxt);
1977}
1978

source code of gcc/omp-oacc-neuter-broadcast.cc