1 | /* General types and functions that are uselful for processing of OpenMP, |
2 | OpenACC and similar directivers at various stages of compilation. |
3 | |
4 | Copyright (C) 2005-2023 Free Software Foundation, Inc. |
5 | |
6 | This file is part of GCC. |
7 | |
8 | GCC is free software; you can redistribute it and/or modify it under |
9 | the terms of the GNU General Public License as published by the Free |
10 | Software Foundation; either version 3, or (at your option) any later |
11 | version. |
12 | |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
16 | for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ |
21 | |
22 | /* Find an OMP clause of type KIND within CLAUSES. */ |
23 | |
24 | #include "config.h" |
25 | #include "system.h" |
26 | #include "coretypes.h" |
27 | #include "backend.h" |
28 | #include "target.h" |
29 | #include "tree.h" |
30 | #include "gimple.h" |
31 | #include "ssa.h" |
32 | #include "diagnostic-core.h" |
33 | #include "fold-const.h" |
34 | #include "langhooks.h" |
35 | #include "omp-general.h" |
36 | #include "stringpool.h" |
37 | #include "attribs.h" |
38 | #include "gimplify.h" |
39 | #include "cgraph.h" |
40 | #include "alloc-pool.h" |
41 | #include "symbol-summary.h" |
42 | #include "tree-pass.h" |
43 | #include "omp-device-properties.h" |
44 | #include "tree-iterator.h" |
45 | #include "data-streamer.h" |
46 | #include "streamer-hooks.h" |
47 | #include "opts.h" |
48 | |
49 | enum omp_requires omp_requires_mask; |
50 | |
51 | tree |
52 | omp_find_clause (tree clauses, enum omp_clause_code kind) |
53 | { |
54 | for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) |
55 | if (OMP_CLAUSE_CODE (clauses) == kind) |
56 | return clauses; |
57 | |
58 | return NULL_TREE; |
59 | } |
60 | |
61 | /* True if OpenMP should regard this DECL as being a scalar which has Fortran's |
62 | allocatable or pointer attribute. */ |
63 | bool |
64 | omp_is_allocatable_or_ptr (tree decl) |
65 | { |
66 | return lang_hooks.decls.omp_is_allocatable_or_ptr (decl); |
67 | } |
68 | |
69 | /* Check whether this DECL belongs to a Fortran optional argument. |
70 | With 'for_present_check' set to false, decls which are optional parameters |
71 | themselve are returned as tree - or a NULL_TREE otherwise. Those decls are |
72 | always pointers. With 'for_present_check' set to true, the decl for checking |
73 | whether an argument is present is returned; for arguments with value |
74 | attribute this is the hidden argument and of BOOLEAN_TYPE. If the decl is |
75 | unrelated to optional arguments, NULL_TREE is returned. */ |
76 | |
77 | tree |
78 | omp_check_optional_argument (tree decl, bool for_present_check) |
79 | { |
80 | return lang_hooks.decls.omp_check_optional_argument (decl, for_present_check); |
81 | } |
82 | |
83 | /* Return true if TYPE is an OpenMP mappable type. */ |
84 | |
85 | bool |
86 | omp_mappable_type (tree type) |
87 | { |
88 | /* Mappable type has to be complete. */ |
89 | if (type == error_mark_node || !COMPLETE_TYPE_P (type)) |
90 | return false; |
91 | return true; |
92 | } |
93 | |
94 | /* True if OpenMP should privatize what this DECL points to rather |
95 | than the DECL itself. */ |
96 | |
97 | bool |
98 | omp_privatize_by_reference (tree decl) |
99 | { |
100 | return lang_hooks.decls.omp_privatize_by_reference (decl); |
101 | } |
102 | |
103 | /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or GT_EXPR, |
104 | given that V is the loop index variable and STEP is loop step. */ |
105 | |
106 | void |
107 | omp_adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2, |
108 | tree v, tree step) |
109 | { |
110 | switch (*cond_code) |
111 | { |
112 | case LT_EXPR: |
113 | case GT_EXPR: |
114 | break; |
115 | |
116 | case NE_EXPR: |
117 | gcc_assert (TREE_CODE (step) == INTEGER_CST); |
118 | if (TREE_CODE (TREE_TYPE (v)) == INTEGER_TYPE) |
119 | { |
120 | if (integer_onep (step)) |
121 | *cond_code = LT_EXPR; |
122 | else |
123 | { |
124 | gcc_assert (integer_minus_onep (step)); |
125 | *cond_code = GT_EXPR; |
126 | } |
127 | } |
128 | else |
129 | { |
130 | tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (v))); |
131 | gcc_assert (TREE_CODE (unit) == INTEGER_CST); |
132 | if (tree_int_cst_equal (unit, step)) |
133 | *cond_code = LT_EXPR; |
134 | else |
135 | { |
136 | gcc_assert (wi::neg (wi::to_widest (unit)) |
137 | == wi::to_widest (step)); |
138 | *cond_code = GT_EXPR; |
139 | } |
140 | } |
141 | |
142 | break; |
143 | |
144 | case LE_EXPR: |
145 | if (POINTER_TYPE_P (TREE_TYPE (*n2))) |
146 | *n2 = fold_build_pointer_plus_hwi_loc (loc, ptr: *n2, off: 1); |
147 | else |
148 | *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2, |
149 | build_int_cst (TREE_TYPE (*n2), 1)); |
150 | *cond_code = LT_EXPR; |
151 | break; |
152 | case GE_EXPR: |
153 | if (POINTER_TYPE_P (TREE_TYPE (*n2))) |
154 | *n2 = fold_build_pointer_plus_hwi_loc (loc, ptr: *n2, off: -1); |
155 | else |
156 | *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2, |
157 | build_int_cst (TREE_TYPE (*n2), 1)); |
158 | *cond_code = GT_EXPR; |
159 | break; |
160 | default: |
161 | gcc_unreachable (); |
162 | } |
163 | } |
164 | |
165 | /* Return the looping step from INCR, extracted from the step of a gimple omp |
166 | for statement. */ |
167 | |
168 | tree |
169 | omp_get_for_step_from_incr (location_t loc, tree incr) |
170 | { |
171 | tree step; |
172 | switch (TREE_CODE (incr)) |
173 | { |
174 | case PLUS_EXPR: |
175 | step = TREE_OPERAND (incr, 1); |
176 | break; |
177 | case POINTER_PLUS_EXPR: |
178 | step = fold_convert (ssizetype, TREE_OPERAND (incr, 1)); |
179 | break; |
180 | case MINUS_EXPR: |
181 | step = TREE_OPERAND (incr, 1); |
182 | step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step); |
183 | break; |
184 | default: |
185 | gcc_unreachable (); |
186 | } |
187 | return step; |
188 | } |
189 | |
190 | /* Extract the header elements of parallel loop FOR_STMT and store |
191 | them into *FD. */ |
192 | |
193 | void |
194 | (gomp_for *for_stmt, struct omp_for_data *fd, |
195 | struct omp_for_data_loop *loops) |
196 | { |
197 | tree t, var, *collapse_iter, *collapse_count; |
198 | tree count = NULL_TREE, iter_type = long_integer_type_node; |
199 | struct omp_for_data_loop *loop; |
200 | int i; |
201 | struct omp_for_data_loop dummy_loop; |
202 | location_t loc = gimple_location (g: for_stmt); |
203 | bool simd = gimple_omp_for_kind (g: for_stmt) == GF_OMP_FOR_KIND_SIMD; |
204 | bool distribute = gimple_omp_for_kind (g: for_stmt) |
205 | == GF_OMP_FOR_KIND_DISTRIBUTE; |
206 | bool taskloop = gimple_omp_for_kind (g: for_stmt) |
207 | == GF_OMP_FOR_KIND_TASKLOOP; |
208 | bool order_reproducible = false; |
209 | tree iterv, countv; |
210 | |
211 | fd->for_stmt = for_stmt; |
212 | fd->pre = NULL; |
213 | fd->have_nowait = distribute || simd; |
214 | fd->have_ordered = false; |
215 | fd->have_reductemp = false; |
216 | fd->have_pointer_condtemp = false; |
217 | fd->have_scantemp = false; |
218 | fd->have_nonctrl_scantemp = false; |
219 | fd->non_rect = false; |
220 | fd->lastprivate_conditional = 0; |
221 | fd->tiling = NULL_TREE; |
222 | fd->collapse = 1; |
223 | fd->ordered = 0; |
224 | fd->first_nonrect = -1; |
225 | fd->last_nonrect = -1; |
226 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; |
227 | fd->sched_modifiers = 0; |
228 | fd->chunk_size = NULL_TREE; |
229 | fd->simd_schedule = false; |
230 | fd->first_inner_iterations = NULL_TREE; |
231 | fd->factor = NULL_TREE; |
232 | fd->adjn1 = NULL_TREE; |
233 | collapse_iter = NULL; |
234 | collapse_count = NULL; |
235 | |
236 | for (t = gimple_omp_for_clauses (gs: for_stmt); t ; t = OMP_CLAUSE_CHAIN (t)) |
237 | switch (OMP_CLAUSE_CODE (t)) |
238 | { |
239 | case OMP_CLAUSE_NOWAIT: |
240 | fd->have_nowait = true; |
241 | break; |
242 | case OMP_CLAUSE_ORDERED: |
243 | fd->have_ordered = true; |
244 | if (OMP_CLAUSE_ORDERED_DOACROSS (t)) |
245 | { |
246 | if (OMP_CLAUSE_ORDERED_EXPR (t)) |
247 | fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t)); |
248 | else |
249 | fd->ordered = -1; |
250 | } |
251 | break; |
252 | case OMP_CLAUSE_SCHEDULE: |
253 | gcc_assert (!distribute && !taskloop); |
254 | fd->sched_kind |
255 | = (enum omp_clause_schedule_kind) |
256 | (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK); |
257 | fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t) |
258 | & ~OMP_CLAUSE_SCHEDULE_MASK); |
259 | fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); |
260 | fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t); |
261 | break; |
262 | case OMP_CLAUSE_DIST_SCHEDULE: |
263 | gcc_assert (distribute); |
264 | fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t); |
265 | break; |
266 | case OMP_CLAUSE_COLLAPSE: |
267 | fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t)); |
268 | if (fd->collapse > 1) |
269 | { |
270 | collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t); |
271 | collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t); |
272 | } |
273 | break; |
274 | case OMP_CLAUSE_TILE: |
275 | fd->tiling = OMP_CLAUSE_TILE_LIST (t); |
276 | fd->collapse = list_length (fd->tiling); |
277 | gcc_assert (fd->collapse); |
278 | collapse_iter = &OMP_CLAUSE_TILE_ITERVAR (t); |
279 | collapse_count = &OMP_CLAUSE_TILE_COUNT (t); |
280 | break; |
281 | case OMP_CLAUSE__REDUCTEMP_: |
282 | fd->have_reductemp = true; |
283 | break; |
284 | case OMP_CLAUSE_LASTPRIVATE: |
285 | if (OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (t)) |
286 | fd->lastprivate_conditional++; |
287 | break; |
288 | case OMP_CLAUSE__CONDTEMP_: |
289 | if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t)))) |
290 | fd->have_pointer_condtemp = true; |
291 | break; |
292 | case OMP_CLAUSE__SCANTEMP_: |
293 | fd->have_scantemp = true; |
294 | if (!OMP_CLAUSE__SCANTEMP__ALLOC (t) |
295 | && !OMP_CLAUSE__SCANTEMP__CONTROL (t)) |
296 | fd->have_nonctrl_scantemp = true; |
297 | break; |
298 | case OMP_CLAUSE_ORDER: |
299 | /* FIXME: For OpenMP 5.2 this should change to |
300 | if (OMP_CLAUSE_ORDER_REPRODUCIBLE (t)) |
301 | (with the exception of loop construct but that lowers to |
302 | no schedule/dist_schedule clauses currently). */ |
303 | if (!OMP_CLAUSE_ORDER_UNCONSTRAINED (t)) |
304 | order_reproducible = true; |
305 | default: |
306 | break; |
307 | } |
308 | |
309 | if (fd->ordered == -1) |
310 | fd->ordered = fd->collapse; |
311 | |
312 | /* For order(reproducible:concurrent) schedule ({dynamic,guided,runtime}) |
313 | we have either the option to expensively remember at runtime how we've |
314 | distributed work from first loop and reuse that in following loops with |
315 | the same number of iterations and schedule, or just force static schedule. |
316 | OpenMP API calls etc. aren't allowed in order(concurrent) bodies so |
317 | users can't observe it easily anyway. */ |
318 | if (order_reproducible) |
319 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; |
320 | if (fd->collapse > 1 || fd->tiling) |
321 | fd->loops = loops; |
322 | else |
323 | fd->loops = &fd->loop; |
324 | |
325 | if (fd->ordered && fd->collapse == 1 && loops != NULL) |
326 | { |
327 | fd->loops = loops; |
328 | iterv = NULL_TREE; |
329 | countv = NULL_TREE; |
330 | collapse_iter = &iterv; |
331 | collapse_count = &countv; |
332 | } |
333 | |
334 | /* FIXME: for now map schedule(auto) to schedule(static). |
335 | There should be analysis to determine whether all iterations |
336 | are approximately the same amount of work (then schedule(static) |
337 | is best) or if it varies (then schedule(dynamic,N) is better). */ |
338 | if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO) |
339 | { |
340 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; |
341 | gcc_assert (fd->chunk_size == NULL); |
342 | } |
343 | gcc_assert ((fd->collapse == 1 && !fd->tiling) || collapse_iter != NULL); |
344 | if (taskloop) |
345 | fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME; |
346 | if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) |
347 | gcc_assert (fd->chunk_size == NULL); |
348 | else if (fd->chunk_size == NULL) |
349 | { |
350 | /* We only need to compute a default chunk size for ordered |
351 | static loops and dynamic loops. */ |
352 | if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC |
353 | || fd->have_ordered) |
354 | fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) |
355 | ? integer_zero_node : integer_one_node; |
356 | } |
357 | |
358 | int cnt = fd->ordered ? fd->ordered : fd->collapse; |
359 | int single_nonrect = -1; |
360 | tree single_nonrect_count = NULL_TREE; |
361 | enum tree_code single_nonrect_cond_code = ERROR_MARK; |
362 | for (i = 1; i < cnt; i++) |
363 | { |
364 | tree n1 = gimple_omp_for_initial (gs: for_stmt, i); |
365 | tree n2 = gimple_omp_for_final (gs: for_stmt, i); |
366 | if (TREE_CODE (n1) == TREE_VEC) |
367 | { |
368 | if (fd->non_rect) |
369 | { |
370 | single_nonrect = -1; |
371 | break; |
372 | } |
373 | for (int j = i - 1; j >= 0; j--) |
374 | if (TREE_VEC_ELT (n1, 0) == gimple_omp_for_index (gs: for_stmt, i: j)) |
375 | { |
376 | single_nonrect = j; |
377 | break; |
378 | } |
379 | fd->non_rect = true; |
380 | } |
381 | else if (TREE_CODE (n2) == TREE_VEC) |
382 | { |
383 | if (fd->non_rect) |
384 | { |
385 | single_nonrect = -1; |
386 | break; |
387 | } |
388 | for (int j = i - 1; j >= 0; j--) |
389 | if (TREE_VEC_ELT (n2, 0) == gimple_omp_for_index (gs: for_stmt, i: j)) |
390 | { |
391 | single_nonrect = j; |
392 | break; |
393 | } |
394 | fd->non_rect = true; |
395 | } |
396 | } |
397 | for (i = 0; i < cnt; i++) |
398 | { |
399 | if (i == 0 |
400 | && fd->collapse == 1 |
401 | && !fd->tiling |
402 | && (fd->ordered == 0 || loops == NULL)) |
403 | loop = &fd->loop; |
404 | else if (loops != NULL) |
405 | loop = loops + i; |
406 | else |
407 | loop = &dummy_loop; |
408 | |
409 | loop->v = gimple_omp_for_index (gs: for_stmt, i); |
410 | gcc_assert (SSA_VAR_P (loop->v)); |
411 | gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE |
412 | || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE); |
413 | var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v; |
414 | loop->n1 = gimple_omp_for_initial (gs: for_stmt, i); |
415 | loop->m1 = NULL_TREE; |
416 | loop->m2 = NULL_TREE; |
417 | loop->outer = 0; |
418 | loop->non_rect_referenced = false; |
419 | if (TREE_CODE (loop->n1) == TREE_VEC) |
420 | { |
421 | for (int j = i - 1; j >= 0; j--) |
422 | if (TREE_VEC_ELT (loop->n1, 0) == gimple_omp_for_index (gs: for_stmt, i: j)) |
423 | { |
424 | loop->outer = i - j; |
425 | if (loops != NULL) |
426 | loops[j].non_rect_referenced = true; |
427 | if (fd->first_nonrect == -1 || fd->first_nonrect > j) |
428 | fd->first_nonrect = j; |
429 | break; |
430 | } |
431 | gcc_assert (loop->outer); |
432 | loop->m1 = TREE_VEC_ELT (loop->n1, 1); |
433 | loop->n1 = TREE_VEC_ELT (loop->n1, 2); |
434 | fd->non_rect = true; |
435 | fd->last_nonrect = i; |
436 | } |
437 | |
438 | loop->cond_code = gimple_omp_for_cond (gs: for_stmt, i); |
439 | loop->n2 = gimple_omp_for_final (gs: for_stmt, i); |
440 | gcc_assert (loop->cond_code != NE_EXPR |
441 | || (gimple_omp_for_kind (for_stmt) |
442 | != GF_OMP_FOR_KIND_OACC_LOOP)); |
443 | if (TREE_CODE (loop->n2) == TREE_VEC) |
444 | { |
445 | if (loop->outer) |
446 | gcc_assert (TREE_VEC_ELT (loop->n2, 0) |
447 | == gimple_omp_for_index (for_stmt, i - loop->outer)); |
448 | else |
449 | for (int j = i - 1; j >= 0; j--) |
450 | if (TREE_VEC_ELT (loop->n2, 0) == gimple_omp_for_index (gs: for_stmt, i: j)) |
451 | { |
452 | loop->outer = i - j; |
453 | if (loops != NULL) |
454 | loops[j].non_rect_referenced = true; |
455 | if (fd->first_nonrect == -1 || fd->first_nonrect > j) |
456 | fd->first_nonrect = j; |
457 | break; |
458 | } |
459 | gcc_assert (loop->outer); |
460 | loop->m2 = TREE_VEC_ELT (loop->n2, 1); |
461 | loop->n2 = TREE_VEC_ELT (loop->n2, 2); |
462 | fd->non_rect = true; |
463 | fd->last_nonrect = i; |
464 | } |
465 | |
466 | t = gimple_omp_for_incr (gs: for_stmt, i); |
467 | gcc_assert (TREE_OPERAND (t, 0) == var); |
468 | loop->step = omp_get_for_step_from_incr (loc, incr: t); |
469 | |
470 | omp_adjust_for_condition (loc, cond_code: &loop->cond_code, n2: &loop->n2, v: loop->v, |
471 | step: loop->step); |
472 | |
473 | if (simd |
474 | || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC |
475 | && !fd->have_ordered)) |
476 | { |
477 | if (fd->collapse == 1 && !fd->tiling) |
478 | iter_type = TREE_TYPE (loop->v); |
479 | else if (i == 0 |
480 | || TYPE_PRECISION (iter_type) |
481 | < TYPE_PRECISION (TREE_TYPE (loop->v))) |
482 | iter_type |
483 | = build_nonstandard_integer_type |
484 | (TYPE_PRECISION (TREE_TYPE (loop->v)), 1); |
485 | } |
486 | else if (iter_type != long_long_unsigned_type_node) |
487 | { |
488 | if (POINTER_TYPE_P (TREE_TYPE (loop->v))) |
489 | iter_type = long_long_unsigned_type_node; |
490 | else if (TYPE_UNSIGNED (TREE_TYPE (loop->v)) |
491 | && TYPE_PRECISION (TREE_TYPE (loop->v)) |
492 | >= TYPE_PRECISION (iter_type)) |
493 | { |
494 | tree n; |
495 | |
496 | if (loop->cond_code == LT_EXPR) |
497 | n = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v), |
498 | loop->n2, loop->step); |
499 | else |
500 | n = loop->n1; |
501 | if (loop->m1 |
502 | || loop->m2 |
503 | || TREE_CODE (n) != INTEGER_CST |
504 | || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), t2: n)) |
505 | iter_type = long_long_unsigned_type_node; |
506 | } |
507 | else if (TYPE_PRECISION (TREE_TYPE (loop->v)) |
508 | > TYPE_PRECISION (iter_type)) |
509 | { |
510 | tree n1, n2; |
511 | |
512 | if (loop->cond_code == LT_EXPR) |
513 | { |
514 | n1 = loop->n1; |
515 | n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v), |
516 | loop->n2, loop->step); |
517 | } |
518 | else |
519 | { |
520 | n1 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (loop->v), |
521 | loop->n2, loop->step); |
522 | n2 = loop->n1; |
523 | } |
524 | if (loop->m1 |
525 | || loop->m2 |
526 | || TREE_CODE (n1) != INTEGER_CST |
527 | || TREE_CODE (n2) != INTEGER_CST |
528 | || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), t2: n1) |
529 | || !tree_int_cst_lt (t1: n2, TYPE_MAX_VALUE (iter_type))) |
530 | iter_type = long_long_unsigned_type_node; |
531 | } |
532 | } |
533 | |
534 | if (i >= fd->collapse) |
535 | continue; |
536 | |
537 | if (collapse_count && *collapse_count == NULL) |
538 | { |
539 | if (count && integer_zerop (count)) |
540 | continue; |
541 | tree n1first = NULL_TREE, n2first = NULL_TREE; |
542 | tree n1last = NULL_TREE, n2last = NULL_TREE; |
543 | tree ostep = NULL_TREE; |
544 | if (loop->m1 || loop->m2) |
545 | { |
546 | if (count == NULL_TREE) |
547 | continue; |
548 | if (single_nonrect == -1 |
549 | || (loop->m1 && TREE_CODE (loop->m1) != INTEGER_CST) |
550 | || (loop->m2 && TREE_CODE (loop->m2) != INTEGER_CST) |
551 | || TREE_CODE (loop->n1) != INTEGER_CST |
552 | || TREE_CODE (loop->n2) != INTEGER_CST |
553 | || TREE_CODE (loop->step) != INTEGER_CST) |
554 | { |
555 | count = NULL_TREE; |
556 | continue; |
557 | } |
558 | tree var = gimple_omp_for_initial (gs: for_stmt, i: single_nonrect); |
559 | tree itype = TREE_TYPE (var); |
560 | tree first = gimple_omp_for_initial (gs: for_stmt, i: single_nonrect); |
561 | t = gimple_omp_for_incr (gs: for_stmt, i: single_nonrect); |
562 | ostep = omp_get_for_step_from_incr (loc, incr: t); |
563 | t = fold_binary (MINUS_EXPR, long_long_unsigned_type_node, |
564 | single_nonrect_count, |
565 | build_one_cst (long_long_unsigned_type_node)); |
566 | t = fold_convert (itype, t); |
567 | first = fold_convert (itype, first); |
568 | ostep = fold_convert (itype, ostep); |
569 | tree last = fold_binary (PLUS_EXPR, itype, first, |
570 | fold_binary (MULT_EXPR, itype, t, |
571 | ostep)); |
572 | if (TREE_CODE (first) != INTEGER_CST |
573 | || TREE_CODE (last) != INTEGER_CST) |
574 | { |
575 | count = NULL_TREE; |
576 | continue; |
577 | } |
578 | if (loop->m1) |
579 | { |
580 | tree m1 = fold_convert (itype, loop->m1); |
581 | tree n1 = fold_convert (itype, loop->n1); |
582 | n1first = fold_binary (PLUS_EXPR, itype, |
583 | fold_binary (MULT_EXPR, itype, |
584 | first, m1), n1); |
585 | n1last = fold_binary (PLUS_EXPR, itype, |
586 | fold_binary (MULT_EXPR, itype, |
587 | last, m1), n1); |
588 | } |
589 | else |
590 | n1first = n1last = loop->n1; |
591 | if (loop->m2) |
592 | { |
593 | tree n2 = fold_convert (itype, loop->n2); |
594 | tree m2 = fold_convert (itype, loop->m2); |
595 | n2first = fold_binary (PLUS_EXPR, itype, |
596 | fold_binary (MULT_EXPR, itype, |
597 | first, m2), n2); |
598 | n2last = fold_binary (PLUS_EXPR, itype, |
599 | fold_binary (MULT_EXPR, itype, |
600 | last, m2), n2); |
601 | } |
602 | else |
603 | n2first = n2last = loop->n2; |
604 | n1first = fold_convert (TREE_TYPE (loop->v), n1first); |
605 | n2first = fold_convert (TREE_TYPE (loop->v), n2first); |
606 | n1last = fold_convert (TREE_TYPE (loop->v), n1last); |
607 | n2last = fold_convert (TREE_TYPE (loop->v), n2last); |
608 | t = fold_binary (loop->cond_code, boolean_type_node, |
609 | n1first, n2first); |
610 | tree t2 = fold_binary (loop->cond_code, boolean_type_node, |
611 | n1last, n2last); |
612 | if (t && t2 && integer_nonzerop (t) && integer_nonzerop (t2)) |
613 | /* All outer loop iterators have at least one inner loop |
614 | iteration. Try to compute the count at compile time. */ |
615 | t = NULL_TREE; |
616 | else if (t && t2 && integer_zerop (t) && integer_zerop (t2)) |
617 | /* No iterations of the inner loop. count will be set to |
618 | zero cst below. */; |
619 | else if (TYPE_UNSIGNED (itype) |
620 | || t == NULL_TREE |
621 | || t2 == NULL_TREE |
622 | || TREE_CODE (t) != INTEGER_CST |
623 | || TREE_CODE (t2) != INTEGER_CST) |
624 | { |
625 | /* Punt (for now). */ |
626 | count = NULL_TREE; |
627 | continue; |
628 | } |
629 | else |
630 | { |
631 | /* Some iterations of the outer loop have zero iterations |
632 | of the inner loop, while others have at least one. |
633 | In this case, we need to adjust one of those outer |
634 | loop bounds. If ADJ_FIRST, we need to adjust outer n1 |
635 | (first), otherwise outer n2 (last). */ |
636 | bool adj_first = integer_zerop (t); |
637 | tree n1 = fold_convert (itype, loop->n1); |
638 | tree n2 = fold_convert (itype, loop->n2); |
639 | tree m1 = loop->m1 ? fold_convert (itype, loop->m1) |
640 | : build_zero_cst (itype); |
641 | tree m2 = loop->m2 ? fold_convert (itype, loop->m2) |
642 | : build_zero_cst (itype); |
643 | t = fold_binary (MINUS_EXPR, itype, n1, n2); |
644 | t2 = fold_binary (MINUS_EXPR, itype, m2, m1); |
645 | t = fold_binary (TRUNC_DIV_EXPR, itype, t, t2); |
646 | t2 = fold_binary (MINUS_EXPR, itype, t, first); |
647 | t2 = fold_binary (TRUNC_MOD_EXPR, itype, t2, ostep); |
648 | t = fold_binary (MINUS_EXPR, itype, t, t2); |
649 | tree n1cur |
650 | = fold_binary (PLUS_EXPR, itype, n1, |
651 | fold_binary (MULT_EXPR, itype, m1, t)); |
652 | tree n2cur |
653 | = fold_binary (PLUS_EXPR, itype, n2, |
654 | fold_binary (MULT_EXPR, itype, m2, t)); |
655 | t2 = fold_binary (loop->cond_code, boolean_type_node, |
656 | n1cur, n2cur); |
657 | tree t3 = fold_binary (MULT_EXPR, itype, m1, ostep); |
658 | tree t4 = fold_binary (MULT_EXPR, itype, m2, ostep); |
659 | tree diff; |
660 | if (adj_first) |
661 | { |
662 | tree new_first; |
663 | if (integer_nonzerop (t2)) |
664 | { |
665 | new_first = t; |
666 | n1first = n1cur; |
667 | n2first = n2cur; |
668 | if (flag_checking) |
669 | { |
670 | t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3); |
671 | t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4); |
672 | t3 = fold_binary (loop->cond_code, |
673 | boolean_type_node, t3, t4); |
674 | gcc_assert (integer_zerop (t3)); |
675 | } |
676 | } |
677 | else |
678 | { |
679 | t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3); |
680 | t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4); |
681 | new_first = fold_binary (PLUS_EXPR, itype, t, ostep); |
682 | n1first = t3; |
683 | n2first = t4; |
684 | if (flag_checking) |
685 | { |
686 | t3 = fold_binary (loop->cond_code, |
687 | boolean_type_node, t3, t4); |
688 | gcc_assert (integer_nonzerop (t3)); |
689 | } |
690 | } |
691 | diff = fold_binary (MINUS_EXPR, itype, new_first, first); |
692 | first = new_first; |
693 | fd->adjn1 = first; |
694 | } |
695 | else |
696 | { |
697 | tree new_last; |
698 | if (integer_zerop (t2)) |
699 | { |
700 | t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3); |
701 | t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4); |
702 | new_last = fold_binary (MINUS_EXPR, itype, t, ostep); |
703 | n1last = t3; |
704 | n2last = t4; |
705 | if (flag_checking) |
706 | { |
707 | t3 = fold_binary (loop->cond_code, |
708 | boolean_type_node, t3, t4); |
709 | gcc_assert (integer_nonzerop (t3)); |
710 | } |
711 | } |
712 | else |
713 | { |
714 | new_last = t; |
715 | n1last = n1cur; |
716 | n2last = n2cur; |
717 | if (flag_checking) |
718 | { |
719 | t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3); |
720 | t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4); |
721 | t3 = fold_binary (loop->cond_code, |
722 | boolean_type_node, t3, t4); |
723 | gcc_assert (integer_zerop (t3)); |
724 | } |
725 | } |
726 | diff = fold_binary (MINUS_EXPR, itype, last, new_last); |
727 | } |
728 | if (TYPE_UNSIGNED (itype) |
729 | && single_nonrect_cond_code == GT_EXPR) |
730 | diff = fold_binary (TRUNC_DIV_EXPR, itype, |
731 | fold_unary (NEGATE_EXPR, itype, diff), |
732 | fold_unary (NEGATE_EXPR, itype, |
733 | ostep)); |
734 | else |
735 | diff = fold_binary (TRUNC_DIV_EXPR, itype, diff, ostep); |
736 | diff = fold_convert (long_long_unsigned_type_node, diff); |
737 | single_nonrect_count |
738 | = fold_binary (MINUS_EXPR, long_long_unsigned_type_node, |
739 | single_nonrect_count, diff); |
740 | t = NULL_TREE; |
741 | } |
742 | } |
743 | else |
744 | t = fold_binary (loop->cond_code, boolean_type_node, |
745 | fold_convert (TREE_TYPE (loop->v), loop->n1), |
746 | fold_convert (TREE_TYPE (loop->v), loop->n2)); |
747 | if (t && integer_zerop (t)) |
748 | count = build_zero_cst (long_long_unsigned_type_node); |
749 | else if ((i == 0 || count != NULL_TREE) |
750 | && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE |
751 | && TREE_CONSTANT (loop->n1) |
752 | && TREE_CONSTANT (loop->n2) |
753 | && TREE_CODE (loop->step) == INTEGER_CST) |
754 | { |
755 | tree itype = TREE_TYPE (loop->v); |
756 | |
757 | if (POINTER_TYPE_P (itype)) |
758 | itype = signed_type_for (itype); |
759 | t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1)); |
760 | t = fold_build2 (PLUS_EXPR, itype, |
761 | fold_convert (itype, loop->step), t); |
762 | tree n1 = loop->n1; |
763 | tree n2 = loop->n2; |
764 | if (loop->m1 || loop->m2) |
765 | { |
766 | gcc_assert (single_nonrect != -1); |
767 | n1 = n1first; |
768 | n2 = n2first; |
769 | } |
770 | t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2)); |
771 | t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); |
772 | tree step = fold_convert_loc (loc, itype, loop->step); |
773 | if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) |
774 | t = fold_build2 (TRUNC_DIV_EXPR, itype, |
775 | fold_build1 (NEGATE_EXPR, itype, t), |
776 | fold_build1 (NEGATE_EXPR, itype, step)); |
777 | else |
778 | t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); |
779 | tree llutype = long_long_unsigned_type_node; |
780 | t = fold_convert (llutype, t); |
781 | if (loop->m1 || loop->m2) |
782 | { |
783 | /* t is number of iterations of inner loop at either first |
784 | or last value of the outer iterator (the one with fewer |
785 | iterations). |
786 | Compute t2 = ((m2 - m1) * ostep) / step |
787 | and niters = outer_count * t |
788 | + t2 * ((outer_count - 1) * outer_count / 2) |
789 | */ |
790 | tree m1 = loop->m1 ? loop->m1 : integer_zero_node; |
791 | tree m2 = loop->m2 ? loop->m2 : integer_zero_node; |
792 | m1 = fold_convert (itype, m1); |
793 | m2 = fold_convert (itype, m2); |
794 | tree t2 = fold_build2 (MINUS_EXPR, itype, m2, m1); |
795 | t2 = fold_build2 (MULT_EXPR, itype, t2, ostep); |
796 | if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) |
797 | t2 = fold_build2 (TRUNC_DIV_EXPR, itype, |
798 | fold_build1 (NEGATE_EXPR, itype, t2), |
799 | fold_build1 (NEGATE_EXPR, itype, step)); |
800 | else |
801 | t2 = fold_build2 (TRUNC_DIV_EXPR, itype, t2, step); |
802 | t2 = fold_convert (llutype, t2); |
803 | fd->first_inner_iterations = t; |
804 | fd->factor = t2; |
805 | t = fold_build2 (MULT_EXPR, llutype, t, |
806 | single_nonrect_count); |
807 | tree t3 = fold_build2 (MINUS_EXPR, llutype, |
808 | single_nonrect_count, |
809 | build_one_cst (llutype)); |
810 | t3 = fold_build2 (MULT_EXPR, llutype, t3, |
811 | single_nonrect_count); |
812 | t3 = fold_build2 (TRUNC_DIV_EXPR, llutype, t3, |
813 | build_int_cst (llutype, 2)); |
814 | t2 = fold_build2 (MULT_EXPR, llutype, t2, t3); |
815 | t = fold_build2 (PLUS_EXPR, llutype, t, t2); |
816 | } |
817 | if (i == single_nonrect) |
818 | { |
819 | if (integer_zerop (t) || TREE_CODE (t) != INTEGER_CST) |
820 | count = t; |
821 | else |
822 | { |
823 | single_nonrect_count = t; |
824 | single_nonrect_cond_code = loop->cond_code; |
825 | if (count == NULL_TREE) |
826 | count = build_one_cst (llutype); |
827 | } |
828 | } |
829 | else if (count != NULL_TREE) |
830 | count = fold_build2 (MULT_EXPR, llutype, count, t); |
831 | else |
832 | count = t; |
833 | if (TREE_CODE (count) != INTEGER_CST) |
834 | count = NULL_TREE; |
835 | } |
836 | else if (count && !integer_zerop (count)) |
837 | count = NULL_TREE; |
838 | } |
839 | } |
840 | |
841 | if (count |
842 | && !simd |
843 | && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC |
844 | || fd->have_ordered)) |
845 | { |
846 | if (!tree_int_cst_lt (t1: count, TYPE_MAX_VALUE (long_integer_type_node))) |
847 | iter_type = long_long_unsigned_type_node; |
848 | else |
849 | iter_type = long_integer_type_node; |
850 | } |
851 | else if (collapse_iter && *collapse_iter != NULL) |
852 | iter_type = TREE_TYPE (*collapse_iter); |
853 | fd->iter_type = iter_type; |
854 | if (collapse_iter && *collapse_iter == NULL) |
855 | *collapse_iter = create_tmp_var (iter_type, ".iter" ); |
856 | if (collapse_count && *collapse_count == NULL) |
857 | { |
858 | if (count) |
859 | { |
860 | *collapse_count = fold_convert_loc (loc, iter_type, count); |
861 | if (fd->first_inner_iterations && fd->factor) |
862 | { |
863 | t = make_tree_vec (4); |
864 | TREE_VEC_ELT (t, 0) = *collapse_count; |
865 | TREE_VEC_ELT (t, 1) = fd->first_inner_iterations; |
866 | TREE_VEC_ELT (t, 2) = fd->factor; |
867 | TREE_VEC_ELT (t, 3) = fd->adjn1; |
868 | *collapse_count = t; |
869 | } |
870 | } |
871 | else |
872 | *collapse_count = create_tmp_var (iter_type, ".count" ); |
873 | } |
874 | |
875 | if (fd->collapse > 1 || fd->tiling || (fd->ordered && loops)) |
876 | { |
877 | fd->loop.v = *collapse_iter; |
878 | fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0); |
879 | fd->loop.n2 = *collapse_count; |
880 | if (TREE_CODE (fd->loop.n2) == TREE_VEC) |
881 | { |
882 | gcc_assert (fd->non_rect); |
883 | fd->first_inner_iterations = TREE_VEC_ELT (fd->loop.n2, 1); |
884 | fd->factor = TREE_VEC_ELT (fd->loop.n2, 2); |
885 | fd->adjn1 = TREE_VEC_ELT (fd->loop.n2, 3); |
886 | fd->loop.n2 = TREE_VEC_ELT (fd->loop.n2, 0); |
887 | } |
888 | fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); |
889 | fd->loop.m1 = NULL_TREE; |
890 | fd->loop.m2 = NULL_TREE; |
891 | fd->loop.outer = 0; |
892 | fd->loop.cond_code = LT_EXPR; |
893 | } |
894 | else if (loops) |
895 | loops[0] = fd->loop; |
896 | } |
897 | |
898 | /* Build a call to GOMP_barrier. */ |
899 | |
900 | gimple * |
901 | omp_build_barrier (tree lhs) |
902 | { |
903 | tree fndecl = builtin_decl_explicit (fncode: lhs ? BUILT_IN_GOMP_BARRIER_CANCEL |
904 | : BUILT_IN_GOMP_BARRIER); |
905 | gcall *g = gimple_build_call (fndecl, 0); |
906 | if (lhs) |
907 | gimple_call_set_lhs (gs: g, lhs); |
908 | return g; |
909 | } |
910 | |
911 | /* Find OMP_FOR resp. OMP_SIMD with non-NULL OMP_FOR_INIT. Also, fill in pdata |
912 | array, pdata[0] non-NULL if there is anything non-trivial in between, |
913 | pdata[1] is address of OMP_PARALLEL in between if any, pdata[2] is address |
914 | of OMP_FOR in between if any and pdata[3] is address of the inner |
915 | OMP_FOR/OMP_SIMD. */ |
916 | |
917 | tree |
918 | find_combined_omp_for (tree *tp, int *walk_subtrees, void *data) |
919 | { |
920 | tree **pdata = (tree **) data; |
921 | *walk_subtrees = 0; |
922 | switch (TREE_CODE (*tp)) |
923 | { |
924 | case OMP_FOR: |
925 | if (OMP_FOR_INIT (*tp) != NULL_TREE) |
926 | { |
927 | pdata[3] = tp; |
928 | return *tp; |
929 | } |
930 | pdata[2] = tp; |
931 | *walk_subtrees = 1; |
932 | break; |
933 | case OMP_SIMD: |
934 | if (OMP_FOR_INIT (*tp) != NULL_TREE) |
935 | { |
936 | pdata[3] = tp; |
937 | return *tp; |
938 | } |
939 | break; |
940 | case BIND_EXPR: |
941 | if (BIND_EXPR_VARS (*tp) |
942 | || (BIND_EXPR_BLOCK (*tp) |
943 | && BLOCK_VARS (BIND_EXPR_BLOCK (*tp)))) |
944 | pdata[0] = tp; |
945 | *walk_subtrees = 1; |
946 | break; |
947 | case STATEMENT_LIST: |
948 | if (!tsi_one_before_end_p (i: tsi_start (t: *tp))) |
949 | pdata[0] = tp; |
950 | *walk_subtrees = 1; |
951 | break; |
952 | case TRY_FINALLY_EXPR: |
953 | pdata[0] = tp; |
954 | *walk_subtrees = 1; |
955 | break; |
956 | case OMP_PARALLEL: |
957 | pdata[1] = tp; |
958 | *walk_subtrees = 1; |
959 | break; |
960 | default: |
961 | break; |
962 | } |
963 | return NULL_TREE; |
964 | } |
965 | |
966 | /* Return maximum possible vectorization factor for the target. */ |
967 | |
968 | poly_uint64 |
969 | omp_max_vf (void) |
970 | { |
971 | if (!optimize |
972 | || optimize_debug |
973 | || !flag_tree_loop_optimize |
974 | || (!flag_tree_loop_vectorize |
975 | && OPTION_SET_P (flag_tree_loop_vectorize))) |
976 | return 1; |
977 | |
978 | auto_vector_modes modes; |
979 | targetm.vectorize.autovectorize_vector_modes (&modes, true); |
980 | if (!modes.is_empty ()) |
981 | { |
982 | poly_uint64 vf = 0; |
983 | for (unsigned int i = 0; i < modes.length (); ++i) |
984 | /* The returned modes use the smallest element size (and thus |
985 | the largest nunits) for the vectorization approach that they |
986 | represent. */ |
987 | vf = ordered_max (a: vf, b: GET_MODE_NUNITS (mode: modes[i])); |
988 | return vf; |
989 | } |
990 | |
991 | machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode); |
992 | if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT) |
993 | return GET_MODE_NUNITS (mode: vqimode); |
994 | |
995 | return 1; |
996 | } |
997 | |
998 | /* Return maximum SIMT width if offloading may target SIMT hardware. */ |
999 | |
1000 | int |
1001 | omp_max_simt_vf (void) |
1002 | { |
1003 | if (!optimize) |
1004 | return 0; |
1005 | if (ENABLE_OFFLOADING) |
1006 | for (const char *c = getenv (name: "OFFLOAD_TARGET_NAMES" ); c;) |
1007 | { |
1008 | if (startswith (str: c, prefix: "nvptx" )) |
1009 | return 32; |
1010 | else if ((c = strchr (s: c, c: ':'))) |
1011 | c++; |
1012 | } |
1013 | return 0; |
1014 | } |
1015 | |
1016 | /* Store the construct selectors as tree codes from last to first, |
1017 | return their number. */ |
1018 | |
1019 | int |
1020 | omp_constructor_traits_to_codes (tree ctx, enum tree_code *constructs) |
1021 | { |
1022 | int nconstructs = list_length (ctx); |
1023 | int i = nconstructs - 1; |
1024 | for (tree t2 = ctx; t2; t2 = TREE_CHAIN (t2), i--) |
1025 | { |
1026 | const char *sel = IDENTIFIER_POINTER (TREE_PURPOSE (t2)); |
1027 | if (!strcmp (s1: sel, s2: "target" )) |
1028 | constructs[i] = OMP_TARGET; |
1029 | else if (!strcmp (s1: sel, s2: "teams" )) |
1030 | constructs[i] = OMP_TEAMS; |
1031 | else if (!strcmp (s1: sel, s2: "parallel" )) |
1032 | constructs[i] = OMP_PARALLEL; |
1033 | else if (!strcmp (s1: sel, s2: "for" ) || !strcmp (s1: sel, s2: "do" )) |
1034 | constructs[i] = OMP_FOR; |
1035 | else if (!strcmp (s1: sel, s2: "simd" )) |
1036 | constructs[i] = OMP_SIMD; |
1037 | else |
1038 | gcc_unreachable (); |
1039 | } |
1040 | gcc_assert (i == -1); |
1041 | return nconstructs; |
1042 | } |
1043 | |
1044 | /* Return true if PROP is possibly present in one of the offloading target's |
1045 | OpenMP contexts. The format of PROPS string is always offloading target's |
1046 | name terminated by '\0', followed by properties for that offloading |
1047 | target separated by '\0' and terminated by another '\0'. The strings |
1048 | are created from omp-device-properties installed files of all configured |
1049 | offloading targets. */ |
1050 | |
1051 | static bool |
1052 | omp_offload_device_kind_arch_isa (const char *props, const char *prop) |
1053 | { |
1054 | const char *names = getenv (name: "OFFLOAD_TARGET_NAMES" ); |
1055 | if (names == NULL || *names == '\0') |
1056 | return false; |
1057 | while (*props != '\0') |
1058 | { |
1059 | size_t name_len = strlen (s: props); |
1060 | bool matches = false; |
1061 | for (const char *c = names; c; ) |
1062 | { |
1063 | if (strncmp (s1: props, s2: c, n: name_len) == 0 |
1064 | && (c[name_len] == '\0' |
1065 | || c[name_len] == ':' |
1066 | || c[name_len] == '=')) |
1067 | { |
1068 | matches = true; |
1069 | break; |
1070 | } |
1071 | else if ((c = strchr (s: c, c: ':'))) |
1072 | c++; |
1073 | } |
1074 | props = props + name_len + 1; |
1075 | while (*props != '\0') |
1076 | { |
1077 | if (matches && strcmp (s1: props, s2: prop) == 0) |
1078 | return true; |
1079 | props = strchr (s: props, c: '\0') + 1; |
1080 | } |
1081 | props++; |
1082 | } |
1083 | return false; |
1084 | } |
1085 | |
1086 | /* Return true if the current code location is or might be offloaded. |
1087 | Return true in declare target functions, or when nested in a target |
1088 | region or when unsure, return false otherwise. */ |
1089 | |
1090 | static bool |
1091 | omp_maybe_offloaded (void) |
1092 | { |
1093 | if (!ENABLE_OFFLOADING) |
1094 | return false; |
1095 | const char *names = getenv (name: "OFFLOAD_TARGET_NAMES" ); |
1096 | if (names == NULL || *names == '\0') |
1097 | return false; |
1098 | |
1099 | if (symtab->state == PARSING) |
1100 | /* Maybe. */ |
1101 | return true; |
1102 | if (cfun && cfun->after_inlining) |
1103 | return false; |
1104 | if (current_function_decl |
1105 | && lookup_attribute (attr_name: "omp declare target" , |
1106 | DECL_ATTRIBUTES (current_function_decl))) |
1107 | return true; |
1108 | if (cfun && (cfun->curr_properties & PROP_gimple_any) == 0) |
1109 | { |
1110 | enum tree_code construct = OMP_TARGET; |
1111 | if (omp_construct_selector_matches (&construct, 1, NULL)) |
1112 | return true; |
1113 | } |
1114 | return false; |
1115 | } |
1116 | |
1117 | |
1118 | /* Diagnose errors in an OpenMP context selector, return CTX if |
1119 | it is correct or error_mark_node otherwise. */ |
1120 | |
1121 | tree |
1122 | omp_check_context_selector (location_t loc, tree ctx) |
1123 | { |
1124 | /* Each trait-set-selector-name can only be specified once. |
1125 | There are just 4 set names. */ |
1126 | for (tree t1 = ctx; t1; t1 = TREE_CHAIN (t1)) |
1127 | for (tree t2 = TREE_CHAIN (t1); t2; t2 = TREE_CHAIN (t2)) |
1128 | if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2)) |
1129 | { |
1130 | error_at (loc, "selector set %qs specified more than once" , |
1131 | IDENTIFIER_POINTER (TREE_PURPOSE (t1))); |
1132 | return error_mark_node; |
1133 | } |
1134 | for (tree t = ctx; t; t = TREE_CHAIN (t)) |
1135 | { |
1136 | /* Each trait-selector-name can only be specified once. */ |
1137 | if (list_length (TREE_VALUE (t)) < 5) |
1138 | { |
1139 | for (tree t1 = TREE_VALUE (t); t1; t1 = TREE_CHAIN (t1)) |
1140 | for (tree t2 = TREE_CHAIN (t1); t2; t2 = TREE_CHAIN (t2)) |
1141 | if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2)) |
1142 | { |
1143 | error_at (loc, |
1144 | "selector %qs specified more than once in set %qs" , |
1145 | IDENTIFIER_POINTER (TREE_PURPOSE (t1)), |
1146 | IDENTIFIER_POINTER (TREE_PURPOSE (t))); |
1147 | return error_mark_node; |
1148 | } |
1149 | } |
1150 | else |
1151 | { |
1152 | hash_set<tree> pset; |
1153 | for (tree t1 = TREE_VALUE (t); t1; t1 = TREE_CHAIN (t1)) |
1154 | if (pset.add (TREE_PURPOSE (t1))) |
1155 | { |
1156 | error_at (loc, |
1157 | "selector %qs specified more than once in set %qs" , |
1158 | IDENTIFIER_POINTER (TREE_PURPOSE (t1)), |
1159 | IDENTIFIER_POINTER (TREE_PURPOSE (t))); |
1160 | return error_mark_node; |
1161 | } |
1162 | } |
1163 | |
1164 | static const char *const kind[] = { |
1165 | "host" , "nohost" , "cpu" , "gpu" , "fpga" , "any" , NULL }; |
1166 | static const char *const vendor[] = { |
1167 | "amd" , "arm" , "bsc" , "cray" , "fujitsu" , "gnu" , "ibm" , "intel" , |
1168 | "llvm" , "nvidia" , "pgi" , "ti" , "unknown" , NULL }; |
1169 | static const char *const extension[] = { NULL }; |
1170 | static const char *const atomic_default_mem_order[] = { |
1171 | "seq_cst" , "relaxed" , "acq_rel" , NULL }; |
1172 | struct known_properties { const char *set; const char *selector; |
1173 | const char *const *props; }; |
1174 | known_properties props[] = { |
1175 | { .set: "device" , .selector: "kind" , .props: kind }, |
1176 | { .set: "implementation" , .selector: "vendor" , .props: vendor }, |
1177 | { .set: "implementation" , .selector: "extension" , .props: extension }, |
1178 | { .set: "implementation" , .selector: "atomic_default_mem_order" , |
1179 | .props: atomic_default_mem_order } }; |
1180 | for (tree t1 = TREE_VALUE (t); t1; t1 = TREE_CHAIN (t1)) |
1181 | for (unsigned i = 0; i < ARRAY_SIZE (props); i++) |
1182 | if (!strcmp (IDENTIFIER_POINTER (TREE_PURPOSE (t1)), |
1183 | s2: props[i].selector) |
1184 | && !strcmp (IDENTIFIER_POINTER (TREE_PURPOSE (t)), |
1185 | s2: props[i].set)) |
1186 | for (tree t2 = TREE_VALUE (t1); t2; t2 = TREE_CHAIN (t2)) |
1187 | for (unsigned j = 0; ; j++) |
1188 | { |
1189 | if (props[i].props[j] == NULL) |
1190 | { |
1191 | if (TREE_PURPOSE (t2) |
1192 | && !strcmp (IDENTIFIER_POINTER (TREE_PURPOSE (t2)), |
1193 | s2: " score" )) |
1194 | break; |
1195 | if (props[i].props == atomic_default_mem_order) |
1196 | { |
1197 | error_at (loc, |
1198 | "incorrect property %qs of %qs selector" , |
1199 | IDENTIFIER_POINTER (TREE_PURPOSE (t2)), |
1200 | "atomic_default_mem_order" ); |
1201 | return error_mark_node; |
1202 | } |
1203 | else if (TREE_PURPOSE (t2)) |
1204 | warning_at (loc, 0, |
1205 | "unknown property %qs of %qs selector" , |
1206 | IDENTIFIER_POINTER (TREE_PURPOSE (t2)), |
1207 | props[i].selector); |
1208 | else |
1209 | warning_at (loc, 0, |
1210 | "unknown property %qE of %qs selector" , |
1211 | TREE_VALUE (t2), props[i].selector); |
1212 | break; |
1213 | } |
1214 | else if (TREE_PURPOSE (t2) == NULL_TREE) |
1215 | { |
1216 | const char *str = TREE_STRING_POINTER (TREE_VALUE (t2)); |
1217 | if (!strcmp (s1: str, s2: props[i].props[j]) |
1218 | && ((size_t) TREE_STRING_LENGTH (TREE_VALUE (t2)) |
1219 | == strlen (s: str) + (lang_GNU_Fortran () ? 0 : 1))) |
1220 | break; |
1221 | } |
1222 | else if (!strcmp (IDENTIFIER_POINTER (TREE_PURPOSE (t2)), |
1223 | s2: props[i].props[j])) |
1224 | break; |
1225 | } |
1226 | } |
1227 | return ctx; |
1228 | } |
1229 | |
1230 | |
1231 | /* Register VARIANT as variant of some base function marked with |
1232 | #pragma omp declare variant. CONSTRUCT is corresponding construct |
1233 | selector set. */ |
1234 | |
1235 | void |
1236 | omp_mark_declare_variant (location_t loc, tree variant, tree construct) |
1237 | { |
1238 | tree attr = lookup_attribute (attr_name: "omp declare variant variant" , |
1239 | DECL_ATTRIBUTES (variant)); |
1240 | if (attr == NULL_TREE) |
1241 | { |
1242 | attr = tree_cons (get_identifier ("omp declare variant variant" ), |
1243 | unshare_expr (construct), |
1244 | DECL_ATTRIBUTES (variant)); |
1245 | DECL_ATTRIBUTES (variant) = attr; |
1246 | return; |
1247 | } |
1248 | if ((TREE_VALUE (attr) != NULL_TREE) != (construct != NULL_TREE) |
1249 | || (construct != NULL_TREE |
1250 | && omp_context_selector_set_compare ("construct" , TREE_VALUE (attr), |
1251 | construct))) |
1252 | error_at (loc, "%qD used as a variant with incompatible %<construct%> " |
1253 | "selector sets" , variant); |
1254 | } |
1255 | |
1256 | |
1257 | /* Return a name from PROP, a property in selectors accepting |
1258 | name lists. */ |
1259 | |
1260 | static const char * |
1261 | omp_context_name_list_prop (tree prop) |
1262 | { |
1263 | if (TREE_PURPOSE (prop)) |
1264 | return IDENTIFIER_POINTER (TREE_PURPOSE (prop)); |
1265 | else |
1266 | { |
1267 | const char *ret = TREE_STRING_POINTER (TREE_VALUE (prop)); |
1268 | if ((size_t) TREE_STRING_LENGTH (TREE_VALUE (prop)) |
1269 | == strlen (s: ret) + (lang_GNU_Fortran () ? 0 : 1)) |
1270 | return ret; |
1271 | return NULL; |
1272 | } |
1273 | } |
1274 | |
1275 | /* Return 1 if context selector matches the current OpenMP context, 0 |
1276 | if it does not and -1 if it is unknown and need to be determined later. |
1277 | Some properties can be checked right away during parsing (this routine), |
1278 | others need to wait until the whole TU is parsed, others need to wait until |
1279 | IPA, others until vectorization. */ |
1280 | |
1281 | int |
1282 | omp_context_selector_matches (tree ctx) |
1283 | { |
1284 | int ret = 1; |
1285 | for (tree t1 = ctx; t1; t1 = TREE_CHAIN (t1)) |
1286 | { |
1287 | char set = IDENTIFIER_POINTER (TREE_PURPOSE (t1))[0]; |
1288 | if (set == 'c') |
1289 | { |
1290 | /* For now, ignore the construct set. While something can be |
1291 | determined already during parsing, we don't know until end of TU |
1292 | whether additional constructs aren't added through declare variant |
1293 | unless "omp declare variant variant" attribute exists already |
1294 | (so in most of the cases), and we'd need to maintain set of |
1295 | surrounding OpenMP constructs, which is better handled during |
1296 | gimplification. */ |
1297 | if (symtab->state == PARSING) |
1298 | { |
1299 | ret = -1; |
1300 | continue; |
1301 | } |
1302 | |
1303 | enum tree_code constructs[5]; |
1304 | int nconstructs |
1305 | = omp_constructor_traits_to_codes (TREE_VALUE (t1), constructs); |
1306 | |
1307 | if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) |
1308 | { |
1309 | if (!cfun->after_inlining) |
1310 | { |
1311 | ret = -1; |
1312 | continue; |
1313 | } |
1314 | int i; |
1315 | for (i = 0; i < nconstructs; ++i) |
1316 | if (constructs[i] == OMP_SIMD) |
1317 | break; |
1318 | if (i < nconstructs) |
1319 | { |
1320 | ret = -1; |
1321 | continue; |
1322 | } |
1323 | /* If there is no simd, assume it is ok after IPA, |
1324 | constructs should have been checked before. */ |
1325 | continue; |
1326 | } |
1327 | |
1328 | int r = omp_construct_selector_matches (constructs, nconstructs, |
1329 | NULL); |
1330 | if (r == 0) |
1331 | return 0; |
1332 | if (r == -1) |
1333 | ret = -1; |
1334 | continue; |
1335 | } |
1336 | for (tree t2 = TREE_VALUE (t1); t2; t2 = TREE_CHAIN (t2)) |
1337 | { |
1338 | const char *sel = IDENTIFIER_POINTER (TREE_PURPOSE (t2)); |
1339 | switch (*sel) |
1340 | { |
1341 | case 'v': |
1342 | if (set == 'i' && !strcmp (s1: sel, s2: "vendor" )) |
1343 | for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3)) |
1344 | { |
1345 | const char *prop = omp_context_name_list_prop (prop: t3); |
1346 | if (prop == NULL) |
1347 | return 0; |
1348 | if ((!strcmp (s1: prop, s2: " score" ) && TREE_PURPOSE (t3)) |
1349 | || !strcmp (s1: prop, s2: "gnu" )) |
1350 | continue; |
1351 | return 0; |
1352 | } |
1353 | break; |
1354 | case 'e': |
1355 | if (set == 'i' && !strcmp (s1: sel, s2: "extension" )) |
1356 | /* We don't support any extensions right now. */ |
1357 | return 0; |
1358 | break; |
1359 | case 'a': |
1360 | if (set == 'i' && !strcmp (s1: sel, s2: "atomic_default_mem_order" )) |
1361 | { |
1362 | if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) |
1363 | break; |
1364 | |
1365 | enum omp_memory_order omo |
1366 | = ((enum omp_memory_order) |
1367 | (omp_requires_mask |
1368 | & OMP_REQUIRES_ATOMIC_DEFAULT_MEM_ORDER)); |
1369 | if (omo == OMP_MEMORY_ORDER_UNSPECIFIED) |
1370 | { |
1371 | /* We don't know yet, until end of TU. */ |
1372 | if (symtab->state == PARSING) |
1373 | { |
1374 | ret = -1; |
1375 | break; |
1376 | } |
1377 | else |
1378 | omo = OMP_MEMORY_ORDER_RELAXED; |
1379 | } |
1380 | tree t3 = TREE_VALUE (t2); |
1381 | const char *prop = IDENTIFIER_POINTER (TREE_PURPOSE (t3)); |
1382 | if (!strcmp (s1: prop, s2: " score" )) |
1383 | { |
1384 | t3 = TREE_CHAIN (t3); |
1385 | prop = IDENTIFIER_POINTER (TREE_PURPOSE (t3)); |
1386 | } |
1387 | if (!strcmp (s1: prop, s2: "relaxed" ) |
1388 | && omo != OMP_MEMORY_ORDER_RELAXED) |
1389 | return 0; |
1390 | else if (!strcmp (s1: prop, s2: "seq_cst" ) |
1391 | && omo != OMP_MEMORY_ORDER_SEQ_CST) |
1392 | return 0; |
1393 | else if (!strcmp (s1: prop, s2: "acq_rel" ) |
1394 | && omo != OMP_MEMORY_ORDER_ACQ_REL) |
1395 | return 0; |
1396 | } |
1397 | if (set == 'd' && !strcmp (s1: sel, s2: "arch" )) |
1398 | for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3)) |
1399 | { |
1400 | const char *arch = omp_context_name_list_prop (prop: t3); |
1401 | if (arch == NULL) |
1402 | return 0; |
1403 | int r = 0; |
1404 | if (targetm.omp.device_kind_arch_isa != NULL) |
1405 | r = targetm.omp.device_kind_arch_isa (omp_device_arch, |
1406 | arch); |
1407 | if (r == 0 || (r == -1 && symtab->state != PARSING)) |
1408 | { |
1409 | /* If we are or might be in a target region or |
1410 | declare target function, need to take into account |
1411 | also offloading values. */ |
1412 | if (!omp_maybe_offloaded ()) |
1413 | return 0; |
1414 | if (ENABLE_OFFLOADING) |
1415 | { |
1416 | const char *arches = omp_offload_device_arch; |
1417 | if (omp_offload_device_kind_arch_isa (props: arches, |
1418 | prop: arch)) |
1419 | { |
1420 | ret = -1; |
1421 | continue; |
1422 | } |
1423 | } |
1424 | return 0; |
1425 | } |
1426 | else if (r == -1) |
1427 | ret = -1; |
1428 | /* If arch matches on the host, it still might not match |
1429 | in the offloading region. */ |
1430 | else if (omp_maybe_offloaded ()) |
1431 | ret = -1; |
1432 | } |
1433 | break; |
1434 | case 'u': |
1435 | if (set == 'i' && !strcmp (s1: sel, s2: "unified_address" )) |
1436 | { |
1437 | if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) |
1438 | break; |
1439 | |
1440 | if ((omp_requires_mask & OMP_REQUIRES_UNIFIED_ADDRESS) == 0) |
1441 | { |
1442 | if (symtab->state == PARSING) |
1443 | ret = -1; |
1444 | else |
1445 | return 0; |
1446 | } |
1447 | break; |
1448 | } |
1449 | if (set == 'i' && !strcmp (s1: sel, s2: "unified_shared_memory" )) |
1450 | { |
1451 | if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) |
1452 | break; |
1453 | |
1454 | if ((omp_requires_mask |
1455 | & OMP_REQUIRES_UNIFIED_SHARED_MEMORY) == 0) |
1456 | { |
1457 | if (symtab->state == PARSING) |
1458 | ret = -1; |
1459 | else |
1460 | return 0; |
1461 | } |
1462 | break; |
1463 | } |
1464 | break; |
1465 | case 'd': |
1466 | if (set == 'i' && !strcmp (s1: sel, s2: "dynamic_allocators" )) |
1467 | { |
1468 | if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) |
1469 | break; |
1470 | |
1471 | if ((omp_requires_mask |
1472 | & OMP_REQUIRES_DYNAMIC_ALLOCATORS) == 0) |
1473 | { |
1474 | if (symtab->state == PARSING) |
1475 | ret = -1; |
1476 | else |
1477 | return 0; |
1478 | } |
1479 | break; |
1480 | } |
1481 | break; |
1482 | case 'r': |
1483 | if (set == 'i' && !strcmp (s1: sel, s2: "reverse_offload" )) |
1484 | { |
1485 | if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) |
1486 | break; |
1487 | |
1488 | if ((omp_requires_mask & OMP_REQUIRES_REVERSE_OFFLOAD) == 0) |
1489 | { |
1490 | if (symtab->state == PARSING) |
1491 | ret = -1; |
1492 | else |
1493 | return 0; |
1494 | } |
1495 | break; |
1496 | } |
1497 | break; |
1498 | case 'k': |
1499 | if (set == 'd' && !strcmp (s1: sel, s2: "kind" )) |
1500 | for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3)) |
1501 | { |
1502 | const char *prop = omp_context_name_list_prop (prop: t3); |
1503 | if (prop == NULL) |
1504 | return 0; |
1505 | if (!strcmp (s1: prop, s2: "any" )) |
1506 | continue; |
1507 | if (!strcmp (s1: prop, s2: "host" )) |
1508 | { |
1509 | #ifdef ACCEL_COMPILER |
1510 | return 0; |
1511 | #else |
1512 | if (omp_maybe_offloaded ()) |
1513 | ret = -1; |
1514 | continue; |
1515 | #endif |
1516 | } |
1517 | if (!strcmp (s1: prop, s2: "nohost" )) |
1518 | { |
1519 | #ifndef ACCEL_COMPILER |
1520 | if (omp_maybe_offloaded ()) |
1521 | ret = -1; |
1522 | else |
1523 | return 0; |
1524 | #endif |
1525 | continue; |
1526 | } |
1527 | int r = 0; |
1528 | if (targetm.omp.device_kind_arch_isa != NULL) |
1529 | r = targetm.omp.device_kind_arch_isa (omp_device_kind, |
1530 | prop); |
1531 | else |
1532 | r = strcmp (s1: prop, s2: "cpu" ) == 0; |
1533 | if (r == 0 || (r == -1 && symtab->state != PARSING)) |
1534 | { |
1535 | /* If we are or might be in a target region or |
1536 | declare target function, need to take into account |
1537 | also offloading values. */ |
1538 | if (!omp_maybe_offloaded ()) |
1539 | return 0; |
1540 | if (ENABLE_OFFLOADING) |
1541 | { |
1542 | const char *kinds = omp_offload_device_kind; |
1543 | if (omp_offload_device_kind_arch_isa (props: kinds, prop)) |
1544 | { |
1545 | ret = -1; |
1546 | continue; |
1547 | } |
1548 | } |
1549 | return 0; |
1550 | } |
1551 | else if (r == -1) |
1552 | ret = -1; |
1553 | /* If kind matches on the host, it still might not match |
1554 | in the offloading region. */ |
1555 | else if (omp_maybe_offloaded ()) |
1556 | ret = -1; |
1557 | } |
1558 | break; |
1559 | case 'i': |
1560 | if (set == 'd' && !strcmp (s1: sel, s2: "isa" )) |
1561 | for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3)) |
1562 | { |
1563 | const char *isa = omp_context_name_list_prop (prop: t3); |
1564 | if (isa == NULL) |
1565 | return 0; |
1566 | int r = 0; |
1567 | if (targetm.omp.device_kind_arch_isa != NULL) |
1568 | r = targetm.omp.device_kind_arch_isa (omp_device_isa, |
1569 | isa); |
1570 | if (r == 0 || (r == -1 && symtab->state != PARSING)) |
1571 | { |
1572 | /* If isa is valid on the target, but not in the |
1573 | current function and current function has |
1574 | #pragma omp declare simd on it, some simd clones |
1575 | might have the isa added later on. */ |
1576 | if (r == -1 |
1577 | && targetm.simd_clone.compute_vecsize_and_simdlen |
1578 | && (cfun == NULL || !cfun->after_inlining)) |
1579 | { |
1580 | tree attrs |
1581 | = DECL_ATTRIBUTES (current_function_decl); |
1582 | if (lookup_attribute (attr_name: "omp declare simd" , list: attrs)) |
1583 | { |
1584 | ret = -1; |
1585 | continue; |
1586 | } |
1587 | } |
1588 | /* If we are or might be in a target region or |
1589 | declare target function, need to take into account |
1590 | also offloading values. */ |
1591 | if (!omp_maybe_offloaded ()) |
1592 | return 0; |
1593 | if (ENABLE_OFFLOADING) |
1594 | { |
1595 | const char *isas = omp_offload_device_isa; |
1596 | if (omp_offload_device_kind_arch_isa (props: isas, prop: isa)) |
1597 | { |
1598 | ret = -1; |
1599 | continue; |
1600 | } |
1601 | } |
1602 | return 0; |
1603 | } |
1604 | else if (r == -1) |
1605 | ret = -1; |
1606 | /* If isa matches on the host, it still might not match |
1607 | in the offloading region. */ |
1608 | else if (omp_maybe_offloaded ()) |
1609 | ret = -1; |
1610 | } |
1611 | break; |
1612 | case 'c': |
1613 | if (set == 'u' && !strcmp (s1: sel, s2: "condition" )) |
1614 | for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3)) |
1615 | if (TREE_PURPOSE (t3) == NULL_TREE) |
1616 | { |
1617 | if (integer_zerop (TREE_VALUE (t3))) |
1618 | return 0; |
1619 | if (integer_nonzerop (TREE_VALUE (t3))) |
1620 | break; |
1621 | ret = -1; |
1622 | } |
1623 | break; |
1624 | default: |
1625 | break; |
1626 | } |
1627 | } |
1628 | } |
1629 | return ret; |
1630 | } |
1631 | |
1632 | /* Compare construct={simd} CLAUSES1 with CLAUSES2, return 0/-1/1/2 as |
1633 | in omp_context_selector_set_compare. */ |
1634 | |
1635 | static int |
1636 | omp_construct_simd_compare (tree clauses1, tree clauses2) |
1637 | { |
1638 | if (clauses1 == NULL_TREE) |
1639 | return clauses2 == NULL_TREE ? 0 : -1; |
1640 | if (clauses2 == NULL_TREE) |
1641 | return 1; |
1642 | |
1643 | int r = 0; |
1644 | struct declare_variant_simd_data { |
1645 | bool inbranch, notinbranch; |
1646 | tree simdlen; |
1647 | auto_vec<tree,16> data_sharing; |
1648 | auto_vec<tree,16> aligned; |
1649 | declare_variant_simd_data () |
1650 | : inbranch(false), notinbranch(false), simdlen(NULL_TREE) {} |
1651 | } data[2]; |
1652 | unsigned int i; |
1653 | for (i = 0; i < 2; i++) |
1654 | for (tree c = i ? clauses2 : clauses1; c; c = OMP_CLAUSE_CHAIN (c)) |
1655 | { |
1656 | vec<tree> *v; |
1657 | switch (OMP_CLAUSE_CODE (c)) |
1658 | { |
1659 | case OMP_CLAUSE_INBRANCH: |
1660 | data[i].inbranch = true; |
1661 | continue; |
1662 | case OMP_CLAUSE_NOTINBRANCH: |
1663 | data[i].notinbranch = true; |
1664 | continue; |
1665 | case OMP_CLAUSE_SIMDLEN: |
1666 | data[i].simdlen = OMP_CLAUSE_SIMDLEN_EXPR (c); |
1667 | continue; |
1668 | case OMP_CLAUSE_UNIFORM: |
1669 | case OMP_CLAUSE_LINEAR: |
1670 | v = &data[i].data_sharing; |
1671 | break; |
1672 | case OMP_CLAUSE_ALIGNED: |
1673 | v = &data[i].aligned; |
1674 | break; |
1675 | default: |
1676 | gcc_unreachable (); |
1677 | } |
1678 | unsigned HOST_WIDE_INT argno = tree_to_uhwi (OMP_CLAUSE_DECL (c)); |
1679 | if (argno >= v->length ()) |
1680 | v->safe_grow_cleared (len: argno + 1, exact: true); |
1681 | (*v)[argno] = c; |
1682 | } |
1683 | /* Here, r is used as a bitmask, 2 is set if CLAUSES1 has something |
1684 | CLAUSES2 doesn't, 1 is set if CLAUSES2 has something CLAUSES1 |
1685 | doesn't. Thus, r == 3 implies return value 2, r == 1 implies |
1686 | -1, r == 2 implies 1 and r == 0 implies 0. */ |
1687 | if (data[0].inbranch != data[1].inbranch) |
1688 | r |= data[0].inbranch ? 2 : 1; |
1689 | if (data[0].notinbranch != data[1].notinbranch) |
1690 | r |= data[0].notinbranch ? 2 : 1; |
1691 | if (!simple_cst_equal (data[0].simdlen, data[1].simdlen)) |
1692 | { |
1693 | if (data[0].simdlen && data[1].simdlen) |
1694 | return 2; |
1695 | r |= data[0].simdlen ? 2 : 1; |
1696 | } |
1697 | if (data[0].data_sharing.length () < data[1].data_sharing.length () |
1698 | || data[0].aligned.length () < data[1].aligned.length ()) |
1699 | r |= 1; |
1700 | tree c1, c2; |
1701 | FOR_EACH_VEC_ELT (data[0].data_sharing, i, c1) |
1702 | { |
1703 | c2 = (i < data[1].data_sharing.length () |
1704 | ? data[1].data_sharing[i] : NULL_TREE); |
1705 | if ((c1 == NULL_TREE) != (c2 == NULL_TREE)) |
1706 | { |
1707 | r |= c1 != NULL_TREE ? 2 : 1; |
1708 | continue; |
1709 | } |
1710 | if (c1 == NULL_TREE) |
1711 | continue; |
1712 | if (OMP_CLAUSE_CODE (c1) != OMP_CLAUSE_CODE (c2)) |
1713 | return 2; |
1714 | if (OMP_CLAUSE_CODE (c1) != OMP_CLAUSE_LINEAR) |
1715 | continue; |
1716 | if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (c1) |
1717 | != OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (c2)) |
1718 | return 2; |
1719 | if (OMP_CLAUSE_LINEAR_KIND (c1) != OMP_CLAUSE_LINEAR_KIND (c2)) |
1720 | return 2; |
1721 | if (!simple_cst_equal (OMP_CLAUSE_LINEAR_STEP (c1), |
1722 | OMP_CLAUSE_LINEAR_STEP (c2))) |
1723 | return 2; |
1724 | } |
1725 | FOR_EACH_VEC_ELT (data[0].aligned, i, c1) |
1726 | { |
1727 | c2 = i < data[1].aligned.length () ? data[1].aligned[i] : NULL_TREE; |
1728 | if ((c1 == NULL_TREE) != (c2 == NULL_TREE)) |
1729 | { |
1730 | r |= c1 != NULL_TREE ? 2 : 1; |
1731 | continue; |
1732 | } |
1733 | if (c1 == NULL_TREE) |
1734 | continue; |
1735 | if (!simple_cst_equal (OMP_CLAUSE_ALIGNED_ALIGNMENT (c1), |
1736 | OMP_CLAUSE_ALIGNED_ALIGNMENT (c2))) |
1737 | return 2; |
1738 | } |
1739 | switch (r) |
1740 | { |
1741 | case 0: return 0; |
1742 | case 1: return -1; |
1743 | case 2: return 1; |
1744 | case 3: return 2; |
1745 | default: gcc_unreachable (); |
1746 | } |
1747 | } |
1748 | |
1749 | /* Compare properties of selectors SEL from SET other than construct. |
1750 | Return 0/-1/1/2 as in omp_context_selector_set_compare. |
1751 | Unlike set names or selector names, properties can have duplicates. */ |
1752 | |
1753 | static int |
1754 | omp_context_selector_props_compare (const char *set, const char *sel, |
1755 | tree ctx1, tree ctx2) |
1756 | { |
1757 | int ret = 0; |
1758 | for (int pass = 0; pass < 2; pass++) |
1759 | for (tree t1 = pass ? ctx2 : ctx1; t1; t1 = TREE_CHAIN (t1)) |
1760 | { |
1761 | tree t2; |
1762 | for (t2 = pass ? ctx1 : ctx2; t2; t2 = TREE_CHAIN (t2)) |
1763 | if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2)) |
1764 | { |
1765 | if (TREE_PURPOSE (t1) == NULL_TREE) |
1766 | { |
1767 | if (set[0] == 'u' && strcmp (s1: sel, s2: "condition" ) == 0) |
1768 | { |
1769 | if (integer_zerop (TREE_VALUE (t1)) |
1770 | != integer_zerop (TREE_VALUE (t2))) |
1771 | return 2; |
1772 | break; |
1773 | } |
1774 | if (simple_cst_equal (TREE_VALUE (t1), TREE_VALUE (t2))) |
1775 | break; |
1776 | } |
1777 | else if (strcmp (IDENTIFIER_POINTER (TREE_PURPOSE (t1)), |
1778 | s2: " score" ) == 0) |
1779 | { |
1780 | if (!simple_cst_equal (TREE_VALUE (t1), TREE_VALUE (t2))) |
1781 | return 2; |
1782 | break; |
1783 | } |
1784 | else |
1785 | break; |
1786 | } |
1787 | else if (TREE_PURPOSE (t1) |
1788 | && TREE_PURPOSE (t2) == NULL_TREE |
1789 | && TREE_CODE (TREE_VALUE (t2)) == STRING_CST) |
1790 | { |
1791 | const char *p1 = omp_context_name_list_prop (prop: t1); |
1792 | const char *p2 = omp_context_name_list_prop (prop: t2); |
1793 | if (p2 |
1794 | && strcmp (s1: p1, s2: p2) == 0 |
1795 | && strcmp (s1: p1, s2: " score" )) |
1796 | break; |
1797 | } |
1798 | else if (TREE_PURPOSE (t1) == NULL_TREE |
1799 | && TREE_PURPOSE (t2) |
1800 | && TREE_CODE (TREE_VALUE (t1)) == STRING_CST) |
1801 | { |
1802 | const char *p1 = omp_context_name_list_prop (prop: t1); |
1803 | const char *p2 = omp_context_name_list_prop (prop: t2); |
1804 | if (p1 |
1805 | && strcmp (s1: p1, s2: p2) == 0 |
1806 | && strcmp (s1: p1, s2: " score" )) |
1807 | break; |
1808 | } |
1809 | if (t2 == NULL_TREE) |
1810 | { |
1811 | int r = pass ? -1 : 1; |
1812 | if (ret && ret != r) |
1813 | return 2; |
1814 | else if (pass) |
1815 | return r; |
1816 | else |
1817 | { |
1818 | ret = r; |
1819 | break; |
1820 | } |
1821 | } |
1822 | } |
1823 | return ret; |
1824 | } |
1825 | |
1826 | /* Compare single context selector sets CTX1 and CTX2 with SET name. |
1827 | Return 0 if CTX1 is equal to CTX2, |
1828 | -1 if CTX1 is a strict subset of CTX2, |
1829 | 1 if CTX2 is a strict subset of CTX1, or |
1830 | 2 if neither context is a subset of another one. */ |
1831 | |
1832 | int |
1833 | omp_context_selector_set_compare (const char *set, tree ctx1, tree ctx2) |
1834 | { |
1835 | bool swapped = false; |
1836 | int ret = 0; |
1837 | int len1 = list_length (ctx1); |
1838 | int len2 = list_length (ctx2); |
1839 | int cnt = 0; |
1840 | if (len1 < len2) |
1841 | { |
1842 | swapped = true; |
1843 | std::swap (a&: ctx1, b&: ctx2); |
1844 | std::swap (a&: len1, b&: len2); |
1845 | } |
1846 | if (set[0] == 'c') |
1847 | { |
1848 | tree t1; |
1849 | tree t2 = ctx2; |
1850 | tree simd = get_identifier ("simd" ); |
1851 | /* Handle construct set specially. In this case the order |
1852 | of the selector matters too. */ |
1853 | for (t1 = ctx1; t1; t1 = TREE_CHAIN (t1)) |
1854 | if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2)) |
1855 | { |
1856 | int r = 0; |
1857 | if (TREE_PURPOSE (t1) == simd) |
1858 | r = omp_construct_simd_compare (TREE_VALUE (t1), |
1859 | TREE_VALUE (t2)); |
1860 | if (r == 2 || (ret && r && (ret < 0) != (r < 0))) |
1861 | return 2; |
1862 | if (ret == 0) |
1863 | ret = r; |
1864 | t2 = TREE_CHAIN (t2); |
1865 | if (t2 == NULL_TREE) |
1866 | { |
1867 | t1 = TREE_CHAIN (t1); |
1868 | break; |
1869 | } |
1870 | } |
1871 | else if (ret < 0) |
1872 | return 2; |
1873 | else |
1874 | ret = 1; |
1875 | if (t2 != NULL_TREE) |
1876 | return 2; |
1877 | if (t1 != NULL_TREE) |
1878 | { |
1879 | if (ret < 0) |
1880 | return 2; |
1881 | ret = 1; |
1882 | } |
1883 | if (ret == 0) |
1884 | return 0; |
1885 | return swapped ? -ret : ret; |
1886 | } |
1887 | for (tree t1 = ctx1; t1; t1 = TREE_CHAIN (t1)) |
1888 | { |
1889 | tree t2; |
1890 | for (t2 = ctx2; t2; t2 = TREE_CHAIN (t2)) |
1891 | if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2)) |
1892 | { |
1893 | const char *sel = IDENTIFIER_POINTER (TREE_PURPOSE (t1)); |
1894 | int r = omp_context_selector_props_compare (set, sel, |
1895 | TREE_VALUE (t1), |
1896 | TREE_VALUE (t2)); |
1897 | if (r == 2 || (ret && r && (ret < 0) != (r < 0))) |
1898 | return 2; |
1899 | if (ret == 0) |
1900 | ret = r; |
1901 | cnt++; |
1902 | break; |
1903 | } |
1904 | if (t2 == NULL_TREE) |
1905 | { |
1906 | if (ret == -1) |
1907 | return 2; |
1908 | ret = 1; |
1909 | } |
1910 | } |
1911 | if (cnt < len2) |
1912 | return 2; |
1913 | if (ret == 0) |
1914 | return 0; |
1915 | return swapped ? -ret : ret; |
1916 | } |
1917 | |
1918 | /* Compare whole context selector specification CTX1 and CTX2. |
1919 | Return 0 if CTX1 is equal to CTX2, |
1920 | -1 if CTX1 is a strict subset of CTX2, |
1921 | 1 if CTX2 is a strict subset of CTX1, or |
1922 | 2 if neither context is a subset of another one. */ |
1923 | |
1924 | static int |
1925 | omp_context_selector_compare (tree ctx1, tree ctx2) |
1926 | { |
1927 | bool swapped = false; |
1928 | int ret = 0; |
1929 | int len1 = list_length (ctx1); |
1930 | int len2 = list_length (ctx2); |
1931 | int cnt = 0; |
1932 | if (len1 < len2) |
1933 | { |
1934 | swapped = true; |
1935 | std::swap (a&: ctx1, b&: ctx2); |
1936 | std::swap (a&: len1, b&: len2); |
1937 | } |
1938 | for (tree t1 = ctx1; t1; t1 = TREE_CHAIN (t1)) |
1939 | { |
1940 | tree t2; |
1941 | for (t2 = ctx2; t2; t2 = TREE_CHAIN (t2)) |
1942 | if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2)) |
1943 | { |
1944 | const char *set = IDENTIFIER_POINTER (TREE_PURPOSE (t1)); |
1945 | int r = omp_context_selector_set_compare (set, TREE_VALUE (t1), |
1946 | TREE_VALUE (t2)); |
1947 | if (r == 2 || (ret && r && (ret < 0) != (r < 0))) |
1948 | return 2; |
1949 | if (ret == 0) |
1950 | ret = r; |
1951 | cnt++; |
1952 | break; |
1953 | } |
1954 | if (t2 == NULL_TREE) |
1955 | { |
1956 | if (ret == -1) |
1957 | return 2; |
1958 | ret = 1; |
1959 | } |
1960 | } |
1961 | if (cnt < len2) |
1962 | return 2; |
1963 | if (ret == 0) |
1964 | return 0; |
1965 | return swapped ? -ret : ret; |
1966 | } |
1967 | |
1968 | /* From context selector CTX, return trait-selector with name SEL in |
1969 | trait-selector-set with name SET if any, or NULL_TREE if not found. |
1970 | If SEL is NULL, return the list of trait-selectors in SET. */ |
1971 | |
1972 | tree |
1973 | omp_get_context_selector (tree ctx, const char *set, const char *sel) |
1974 | { |
1975 | tree setid = get_identifier (set); |
1976 | tree selid = sel ? get_identifier (sel) : NULL_TREE; |
1977 | for (tree t1 = ctx; t1; t1 = TREE_CHAIN (t1)) |
1978 | if (TREE_PURPOSE (t1) == setid) |
1979 | { |
1980 | if (sel == NULL) |
1981 | return TREE_VALUE (t1); |
1982 | for (tree t2 = TREE_VALUE (t1); t2; t2 = TREE_CHAIN (t2)) |
1983 | if (TREE_PURPOSE (t2) == selid) |
1984 | return t2; |
1985 | } |
1986 | return NULL_TREE; |
1987 | } |
1988 | |
1989 | /* Needs to be a GC-friendly widest_int variant, but precision is |
1990 | desirable to be the same on all targets. */ |
1991 | typedef generic_wide_int <fixed_wide_int_storage <1024> > score_wide_int; |
1992 | |
1993 | /* Compute *SCORE for context selector CTX. Return true if the score |
1994 | would be different depending on whether it is a declare simd clone or |
1995 | not. DECLARE_SIMD should be true for the case when it would be |
1996 | a declare simd clone. */ |
1997 | |
1998 | static bool |
1999 | omp_context_compute_score (tree ctx, score_wide_int *score, bool declare_simd) |
2000 | { |
2001 | tree construct = omp_get_context_selector (ctx, set: "construct" , NULL); |
2002 | bool has_kind = omp_get_context_selector (ctx, set: "device" , sel: "kind" ); |
2003 | bool has_arch = omp_get_context_selector (ctx, set: "device" , sel: "arch" ); |
2004 | bool has_isa = omp_get_context_selector (ctx, set: "device" , sel: "isa" ); |
2005 | bool ret = false; |
2006 | *score = 1; |
2007 | for (tree t1 = ctx; t1; t1 = TREE_CHAIN (t1)) |
2008 | if (TREE_VALUE (t1) != construct) |
2009 | for (tree t2 = TREE_VALUE (t1); t2; t2 = TREE_CHAIN (t2)) |
2010 | if (tree t3 = TREE_VALUE (t2)) |
2011 | if (TREE_PURPOSE (t3) |
2012 | && strcmp (IDENTIFIER_POINTER (TREE_PURPOSE (t3)), s2: " score" ) == 0 |
2013 | && TREE_CODE (TREE_VALUE (t3)) == INTEGER_CST) |
2014 | { |
2015 | tree t4 = TREE_VALUE (t3); |
2016 | *score += score_wide_int::from (x: wi::to_wide (t: t4), |
2017 | TYPE_SIGN (TREE_TYPE (t4))); |
2018 | } |
2019 | if (construct || has_kind || has_arch || has_isa) |
2020 | { |
2021 | int scores[12]; |
2022 | enum tree_code constructs[5]; |
2023 | int nconstructs = 0; |
2024 | if (construct) |
2025 | nconstructs = omp_constructor_traits_to_codes (ctx: construct, constructs); |
2026 | if (omp_construct_selector_matches (constructs, nconstructs, scores) |
2027 | == 2) |
2028 | ret = true; |
2029 | int b = declare_simd ? nconstructs + 1 : 0; |
2030 | if (scores[b + nconstructs] + 4U < score->get_precision ()) |
2031 | { |
2032 | for (int n = 0; n < nconstructs; ++n) |
2033 | { |
2034 | if (scores[b + n] < 0) |
2035 | { |
2036 | *score = -1; |
2037 | return ret; |
2038 | } |
2039 | *score += wi::shifted_mask <score_wide_int> (start: scores[b + n], width: 1, negate_p: false); |
2040 | } |
2041 | if (has_kind) |
2042 | *score += wi::shifted_mask <score_wide_int> (start: scores[b + nconstructs], |
2043 | width: 1, negate_p: false); |
2044 | if (has_arch) |
2045 | *score += wi::shifted_mask <score_wide_int> (start: scores[b + nconstructs] + 1, |
2046 | width: 1, negate_p: false); |
2047 | if (has_isa) |
2048 | *score += wi::shifted_mask <score_wide_int> (start: scores[b + nconstructs] + 2, |
2049 | width: 1, negate_p: false); |
2050 | } |
2051 | else /* FIXME: Implement this. */ |
2052 | gcc_unreachable (); |
2053 | } |
2054 | return ret; |
2055 | } |
2056 | |
2057 | /* Class describing a single variant. */ |
2058 | struct GTY(()) omp_declare_variant_entry { |
2059 | /* NODE of the variant. */ |
2060 | cgraph_node *variant; |
2061 | /* Score if not in declare simd clone. */ |
2062 | score_wide_int score; |
2063 | /* Score if in declare simd clone. */ |
2064 | score_wide_int score_in_declare_simd_clone; |
2065 | /* Context selector for the variant. */ |
2066 | tree ctx; |
2067 | /* True if the context selector is known to match already. */ |
2068 | bool matches; |
2069 | }; |
2070 | |
2071 | /* Class describing a function with variants. */ |
2072 | struct GTY((for_user)) omp_declare_variant_base_entry { |
2073 | /* NODE of the base function. */ |
2074 | cgraph_node *base; |
2075 | /* NODE of the artificial function created for the deferred variant |
2076 | resolution. */ |
2077 | cgraph_node *node; |
2078 | /* Vector of the variants. */ |
2079 | vec<omp_declare_variant_entry, va_gc> *variants; |
2080 | }; |
2081 | |
2082 | struct omp_declare_variant_hasher |
2083 | : ggc_ptr_hash<omp_declare_variant_base_entry> { |
2084 | static hashval_t hash (omp_declare_variant_base_entry *); |
2085 | static bool equal (omp_declare_variant_base_entry *, |
2086 | omp_declare_variant_base_entry *); |
2087 | }; |
2088 | |
2089 | hashval_t |
2090 | omp_declare_variant_hasher::hash (omp_declare_variant_base_entry *x) |
2091 | { |
2092 | inchash::hash hstate; |
2093 | hstate.add_int (DECL_UID (x->base->decl)); |
2094 | hstate.add_int (v: x->variants->length ()); |
2095 | omp_declare_variant_entry *variant; |
2096 | unsigned int i; |
2097 | FOR_EACH_VEC_SAFE_ELT (x->variants, i, variant) |
2098 | { |
2099 | hstate.add_int (DECL_UID (variant->variant->decl)); |
2100 | hstate.add_wide_int (x: variant->score); |
2101 | hstate.add_wide_int (x: variant->score_in_declare_simd_clone); |
2102 | hstate.add_ptr (ptr: variant->ctx); |
2103 | hstate.add_int (v: variant->matches); |
2104 | } |
2105 | return hstate.end (); |
2106 | } |
2107 | |
2108 | bool |
2109 | omp_declare_variant_hasher::equal (omp_declare_variant_base_entry *x, |
2110 | omp_declare_variant_base_entry *y) |
2111 | { |
2112 | if (x->base != y->base |
2113 | || x->variants->length () != y->variants->length ()) |
2114 | return false; |
2115 | omp_declare_variant_entry *variant; |
2116 | unsigned int i; |
2117 | FOR_EACH_VEC_SAFE_ELT (x->variants, i, variant) |
2118 | if (variant->variant != (*y->variants)[i].variant |
2119 | || variant->score != (*y->variants)[i].score |
2120 | || (variant->score_in_declare_simd_clone |
2121 | != (*y->variants)[i].score_in_declare_simd_clone) |
2122 | || variant->ctx != (*y->variants)[i].ctx |
2123 | || variant->matches != (*y->variants)[i].matches) |
2124 | return false; |
2125 | return true; |
2126 | } |
2127 | |
2128 | static GTY(()) hash_table<omp_declare_variant_hasher> *omp_declare_variants; |
2129 | |
2130 | struct omp_declare_variant_alt_hasher |
2131 | : ggc_ptr_hash<omp_declare_variant_base_entry> { |
2132 | static hashval_t hash (omp_declare_variant_base_entry *); |
2133 | static bool equal (omp_declare_variant_base_entry *, |
2134 | omp_declare_variant_base_entry *); |
2135 | }; |
2136 | |
2137 | hashval_t |
2138 | omp_declare_variant_alt_hasher::hash (omp_declare_variant_base_entry *x) |
2139 | { |
2140 | return DECL_UID (x->node->decl); |
2141 | } |
2142 | |
2143 | bool |
2144 | omp_declare_variant_alt_hasher::equal (omp_declare_variant_base_entry *x, |
2145 | omp_declare_variant_base_entry *y) |
2146 | { |
2147 | return x->node == y->node; |
2148 | } |
2149 | |
2150 | static GTY(()) hash_table<omp_declare_variant_alt_hasher> |
2151 | *omp_declare_variant_alt; |
2152 | |
2153 | /* Try to resolve declare variant after gimplification. */ |
2154 | |
2155 | static tree |
2156 | omp_resolve_late_declare_variant (tree alt) |
2157 | { |
2158 | cgraph_node *node = cgraph_node::get (decl: alt); |
2159 | cgraph_node *cur_node = cgraph_node::get (cfun->decl); |
2160 | if (node == NULL |
2161 | || !node->declare_variant_alt |
2162 | || !cfun->after_inlining) |
2163 | return alt; |
2164 | |
2165 | omp_declare_variant_base_entry entry; |
2166 | entry.base = NULL; |
2167 | entry.node = node; |
2168 | entry.variants = NULL; |
2169 | omp_declare_variant_base_entry *entryp |
2170 | = omp_declare_variant_alt->find_with_hash (comparable: &entry, DECL_UID (alt)); |
2171 | |
2172 | unsigned int i, j; |
2173 | omp_declare_variant_entry *varentry1, *varentry2; |
2174 | auto_vec <bool, 16> matches; |
2175 | unsigned int nmatches = 0; |
2176 | FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry1) |
2177 | { |
2178 | if (varentry1->matches) |
2179 | { |
2180 | /* This has been checked to be ok already. */ |
2181 | matches.safe_push (obj: true); |
2182 | nmatches++; |
2183 | continue; |
2184 | } |
2185 | switch (omp_context_selector_matches (ctx: varentry1->ctx)) |
2186 | { |
2187 | case 0: |
2188 | matches.safe_push (obj: false); |
2189 | break; |
2190 | case -1: |
2191 | return alt; |
2192 | default: |
2193 | matches.safe_push (obj: true); |
2194 | nmatches++; |
2195 | break; |
2196 | } |
2197 | } |
2198 | |
2199 | if (nmatches == 0) |
2200 | return entryp->base->decl; |
2201 | |
2202 | /* A context selector that is a strict subset of another context selector |
2203 | has a score of zero. */ |
2204 | FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry1) |
2205 | if (matches[i]) |
2206 | { |
2207 | for (j = i + 1; |
2208 | vec_safe_iterate (v: entryp->variants, ix: j, ptr: &varentry2); ++j) |
2209 | if (matches[j]) |
2210 | { |
2211 | int r = omp_context_selector_compare (ctx1: varentry1->ctx, |
2212 | ctx2: varentry2->ctx); |
2213 | if (r == -1) |
2214 | { |
2215 | /* ctx1 is a strict subset of ctx2, ignore ctx1. */ |
2216 | matches[i] = false; |
2217 | break; |
2218 | } |
2219 | else if (r == 1) |
2220 | /* ctx2 is a strict subset of ctx1, remove ctx2. */ |
2221 | matches[j] = false; |
2222 | } |
2223 | } |
2224 | |
2225 | score_wide_int max_score = -1; |
2226 | varentry2 = NULL; |
2227 | FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry1) |
2228 | if (matches[i]) |
2229 | { |
2230 | score_wide_int score |
2231 | = (cur_node->simdclone ? varentry1->score_in_declare_simd_clone |
2232 | : varentry1->score); |
2233 | if (score > max_score) |
2234 | { |
2235 | max_score = score; |
2236 | varentry2 = varentry1; |
2237 | } |
2238 | } |
2239 | return varentry2->variant->decl; |
2240 | } |
2241 | |
2242 | /* Hook to adjust hash tables on cgraph_node removal. */ |
2243 | |
2244 | static void |
2245 | omp_declare_variant_remove_hook (struct cgraph_node *node, void *) |
2246 | { |
2247 | if (!node->declare_variant_alt) |
2248 | return; |
2249 | |
2250 | /* Drop this hash table completely. */ |
2251 | omp_declare_variants = NULL; |
2252 | /* And remove node from the other hash table. */ |
2253 | if (omp_declare_variant_alt) |
2254 | { |
2255 | omp_declare_variant_base_entry entry; |
2256 | entry.base = NULL; |
2257 | entry.node = node; |
2258 | entry.variants = NULL; |
2259 | omp_declare_variant_alt->remove_elt_with_hash (comparable: &entry, |
2260 | DECL_UID (node->decl)); |
2261 | } |
2262 | } |
2263 | |
2264 | /* Try to resolve declare variant, return the variant decl if it should |
2265 | be used instead of base, or base otherwise. */ |
2266 | |
2267 | tree |
2268 | omp_resolve_declare_variant (tree base) |
2269 | { |
2270 | tree variant1 = NULL_TREE, variant2 = NULL_TREE; |
2271 | if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) |
2272 | return omp_resolve_late_declare_variant (alt: base); |
2273 | |
2274 | auto_vec <tree, 16> variants; |
2275 | auto_vec <bool, 16> defer; |
2276 | bool any_deferred = false; |
2277 | for (tree attr = DECL_ATTRIBUTES (base); attr; attr = TREE_CHAIN (attr)) |
2278 | { |
2279 | attr = lookup_attribute (attr_name: "omp declare variant base" , list: attr); |
2280 | if (attr == NULL_TREE) |
2281 | break; |
2282 | if (TREE_CODE (TREE_PURPOSE (TREE_VALUE (attr))) != FUNCTION_DECL) |
2283 | continue; |
2284 | cgraph_node *node = cgraph_node::get (decl: base); |
2285 | /* If this is already a magic decl created by this function, |
2286 | don't process it again. */ |
2287 | if (node && node->declare_variant_alt) |
2288 | return base; |
2289 | switch (omp_context_selector_matches (TREE_VALUE (TREE_VALUE (attr)))) |
2290 | { |
2291 | case 0: |
2292 | /* No match, ignore. */ |
2293 | break; |
2294 | case -1: |
2295 | /* Needs to be deferred. */ |
2296 | any_deferred = true; |
2297 | variants.safe_push (obj: attr); |
2298 | defer.safe_push (obj: true); |
2299 | break; |
2300 | default: |
2301 | variants.safe_push (obj: attr); |
2302 | defer.safe_push (obj: false); |
2303 | break; |
2304 | } |
2305 | } |
2306 | if (variants.length () == 0) |
2307 | return base; |
2308 | |
2309 | if (any_deferred) |
2310 | { |
2311 | score_wide_int max_score1 = 0; |
2312 | score_wide_int max_score2 = 0; |
2313 | bool first = true; |
2314 | unsigned int i; |
2315 | tree attr1, attr2; |
2316 | omp_declare_variant_base_entry entry; |
2317 | entry.base = cgraph_node::get_create (base); |
2318 | entry.node = NULL; |
2319 | vec_alloc (v&: entry.variants, nelems: variants.length ()); |
2320 | FOR_EACH_VEC_ELT (variants, i, attr1) |
2321 | { |
2322 | score_wide_int score1; |
2323 | score_wide_int score2; |
2324 | bool need_two; |
2325 | tree ctx = TREE_VALUE (TREE_VALUE (attr1)); |
2326 | need_two = omp_context_compute_score (ctx, score: &score1, declare_simd: false); |
2327 | if (need_two) |
2328 | omp_context_compute_score (ctx, score: &score2, declare_simd: true); |
2329 | else |
2330 | score2 = score1; |
2331 | if (first) |
2332 | { |
2333 | first = false; |
2334 | max_score1 = score1; |
2335 | max_score2 = score2; |
2336 | if (!defer[i]) |
2337 | { |
2338 | variant1 = attr1; |
2339 | variant2 = attr1; |
2340 | } |
2341 | } |
2342 | else |
2343 | { |
2344 | if (max_score1 == score1) |
2345 | variant1 = NULL_TREE; |
2346 | else if (score1 > max_score1) |
2347 | { |
2348 | max_score1 = score1; |
2349 | variant1 = defer[i] ? NULL_TREE : attr1; |
2350 | } |
2351 | if (max_score2 == score2) |
2352 | variant2 = NULL_TREE; |
2353 | else if (score2 > max_score2) |
2354 | { |
2355 | max_score2 = score2; |
2356 | variant2 = defer[i] ? NULL_TREE : attr1; |
2357 | } |
2358 | } |
2359 | omp_declare_variant_entry varentry; |
2360 | varentry.variant |
2361 | = cgraph_node::get_create (TREE_PURPOSE (TREE_VALUE (attr1))); |
2362 | varentry.score = score1; |
2363 | varentry.score_in_declare_simd_clone = score2; |
2364 | varentry.ctx = ctx; |
2365 | varentry.matches = !defer[i]; |
2366 | entry.variants->quick_push (obj: varentry); |
2367 | } |
2368 | |
2369 | /* If there is a clear winner variant with the score which is not |
2370 | deferred, verify it is not a strict subset of any other context |
2371 | selector and if it is not, it is the best alternative no matter |
2372 | whether the others do or don't match. */ |
2373 | if (variant1 && variant1 == variant2) |
2374 | { |
2375 | tree ctx1 = TREE_VALUE (TREE_VALUE (variant1)); |
2376 | FOR_EACH_VEC_ELT (variants, i, attr2) |
2377 | { |
2378 | if (attr2 == variant1) |
2379 | continue; |
2380 | tree ctx2 = TREE_VALUE (TREE_VALUE (attr2)); |
2381 | int r = omp_context_selector_compare (ctx1, ctx2); |
2382 | if (r == -1) |
2383 | { |
2384 | /* The winner is a strict subset of ctx2, can't |
2385 | decide now. */ |
2386 | variant1 = NULL_TREE; |
2387 | break; |
2388 | } |
2389 | } |
2390 | if (variant1) |
2391 | { |
2392 | vec_free (v&: entry.variants); |
2393 | return TREE_PURPOSE (TREE_VALUE (variant1)); |
2394 | } |
2395 | } |
2396 | |
2397 | static struct cgraph_node_hook_list *node_removal_hook_holder; |
2398 | if (!node_removal_hook_holder) |
2399 | node_removal_hook_holder |
2400 | = symtab->add_cgraph_removal_hook (hook: omp_declare_variant_remove_hook, |
2401 | NULL); |
2402 | |
2403 | if (omp_declare_variants == NULL) |
2404 | omp_declare_variants |
2405 | = hash_table<omp_declare_variant_hasher>::create_ggc (n: 64); |
2406 | omp_declare_variant_base_entry **slot |
2407 | = omp_declare_variants->find_slot (value: &entry, insert: INSERT); |
2408 | if (*slot != NULL) |
2409 | { |
2410 | vec_free (v&: entry.variants); |
2411 | return (*slot)->node->decl; |
2412 | } |
2413 | |
2414 | *slot = ggc_cleared_alloc<omp_declare_variant_base_entry> (); |
2415 | (*slot)->base = entry.base; |
2416 | (*slot)->node = entry.base; |
2417 | (*slot)->variants = entry.variants; |
2418 | tree alt = build_decl (DECL_SOURCE_LOCATION (base), FUNCTION_DECL, |
2419 | DECL_NAME (base), TREE_TYPE (base)); |
2420 | DECL_ARTIFICIAL (alt) = 1; |
2421 | DECL_IGNORED_P (alt) = 1; |
2422 | TREE_STATIC (alt) = 1; |
2423 | tree attributes = DECL_ATTRIBUTES (base); |
2424 | if (lookup_attribute (attr_name: "noipa" , list: attributes) == NULL) |
2425 | { |
2426 | attributes = tree_cons (get_identifier ("noipa" ), NULL, attributes); |
2427 | if (lookup_attribute (attr_name: "noinline" , list: attributes) == NULL) |
2428 | attributes = tree_cons (get_identifier ("noinline" ), NULL, |
2429 | attributes); |
2430 | if (lookup_attribute (attr_name: "noclone" , list: attributes) == NULL) |
2431 | attributes = tree_cons (get_identifier ("noclone" ), NULL, |
2432 | attributes); |
2433 | if (lookup_attribute (attr_name: "no_icf" , list: attributes) == NULL) |
2434 | attributes = tree_cons (get_identifier ("no_icf" ), NULL, |
2435 | attributes); |
2436 | } |
2437 | DECL_ATTRIBUTES (alt) = attributes; |
2438 | DECL_INITIAL (alt) = error_mark_node; |
2439 | (*slot)->node = cgraph_node::create (decl: alt); |
2440 | (*slot)->node->declare_variant_alt = 1; |
2441 | (*slot)->node->create_reference (referred_node: entry.base, use_type: IPA_REF_ADDR); |
2442 | omp_declare_variant_entry *varentry; |
2443 | FOR_EACH_VEC_SAFE_ELT (entry.variants, i, varentry) |
2444 | (*slot)->node->create_reference (referred_node: varentry->variant, use_type: IPA_REF_ADDR); |
2445 | if (omp_declare_variant_alt == NULL) |
2446 | omp_declare_variant_alt |
2447 | = hash_table<omp_declare_variant_alt_hasher>::create_ggc (n: 64); |
2448 | *omp_declare_variant_alt->find_slot_with_hash (comparable: *slot, DECL_UID (alt), |
2449 | insert: INSERT) = *slot; |
2450 | return alt; |
2451 | } |
2452 | |
2453 | if (variants.length () == 1) |
2454 | return TREE_PURPOSE (TREE_VALUE (variants[0])); |
2455 | |
2456 | /* A context selector that is a strict subset of another context selector |
2457 | has a score of zero. */ |
2458 | tree attr1, attr2; |
2459 | unsigned int i, j; |
2460 | FOR_EACH_VEC_ELT (variants, i, attr1) |
2461 | if (attr1) |
2462 | { |
2463 | tree ctx1 = TREE_VALUE (TREE_VALUE (attr1)); |
2464 | FOR_EACH_VEC_ELT_FROM (variants, j, attr2, i + 1) |
2465 | if (attr2) |
2466 | { |
2467 | tree ctx2 = TREE_VALUE (TREE_VALUE (attr2)); |
2468 | int r = omp_context_selector_compare (ctx1, ctx2); |
2469 | if (r == -1) |
2470 | { |
2471 | /* ctx1 is a strict subset of ctx2, remove |
2472 | attr1 from the vector. */ |
2473 | variants[i] = NULL_TREE; |
2474 | break; |
2475 | } |
2476 | else if (r == 1) |
2477 | /* ctx2 is a strict subset of ctx1, remove attr2 |
2478 | from the vector. */ |
2479 | variants[j] = NULL_TREE; |
2480 | } |
2481 | } |
2482 | score_wide_int max_score1 = 0; |
2483 | score_wide_int max_score2 = 0; |
2484 | bool first = true; |
2485 | FOR_EACH_VEC_ELT (variants, i, attr1) |
2486 | if (attr1) |
2487 | { |
2488 | if (variant1) |
2489 | { |
2490 | score_wide_int score1; |
2491 | score_wide_int score2; |
2492 | bool need_two; |
2493 | tree ctx; |
2494 | if (first) |
2495 | { |
2496 | first = false; |
2497 | ctx = TREE_VALUE (TREE_VALUE (variant1)); |
2498 | need_two = omp_context_compute_score (ctx, score: &max_score1, declare_simd: false); |
2499 | if (need_two) |
2500 | omp_context_compute_score (ctx, score: &max_score2, declare_simd: true); |
2501 | else |
2502 | max_score2 = max_score1; |
2503 | } |
2504 | ctx = TREE_VALUE (TREE_VALUE (attr1)); |
2505 | need_two = omp_context_compute_score (ctx, score: &score1, declare_simd: false); |
2506 | if (need_two) |
2507 | omp_context_compute_score (ctx, score: &score2, declare_simd: true); |
2508 | else |
2509 | score2 = score1; |
2510 | if (score1 > max_score1) |
2511 | { |
2512 | max_score1 = score1; |
2513 | variant1 = attr1; |
2514 | } |
2515 | if (score2 > max_score2) |
2516 | { |
2517 | max_score2 = score2; |
2518 | variant2 = attr1; |
2519 | } |
2520 | } |
2521 | else |
2522 | { |
2523 | variant1 = attr1; |
2524 | variant2 = attr1; |
2525 | } |
2526 | } |
2527 | /* If there is a disagreement on which variant has the highest score |
2528 | depending on whether it will be in a declare simd clone or not, |
2529 | punt for now and defer until after IPA where we will know that. */ |
2530 | return ((variant1 && variant1 == variant2) |
2531 | ? TREE_PURPOSE (TREE_VALUE (variant1)) : base); |
2532 | } |
2533 | |
2534 | void |
2535 | omp_lto_output_declare_variant_alt (lto_simple_output_block *ob, |
2536 | cgraph_node *node, |
2537 | lto_symtab_encoder_t encoder) |
2538 | { |
2539 | gcc_assert (node->declare_variant_alt); |
2540 | |
2541 | omp_declare_variant_base_entry entry; |
2542 | entry.base = NULL; |
2543 | entry.node = node; |
2544 | entry.variants = NULL; |
2545 | omp_declare_variant_base_entry *entryp |
2546 | = omp_declare_variant_alt->find_with_hash (comparable: &entry, DECL_UID (node->decl)); |
2547 | gcc_assert (entryp); |
2548 | |
2549 | int nbase = lto_symtab_encoder_lookup (encoder, node: entryp->base); |
2550 | gcc_assert (nbase != LCC_NOT_FOUND); |
2551 | streamer_write_hwi_stream (ob->main_stream, nbase); |
2552 | |
2553 | streamer_write_hwi_stream (ob->main_stream, entryp->variants->length ()); |
2554 | |
2555 | unsigned int i; |
2556 | omp_declare_variant_entry *varentry; |
2557 | FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry) |
2558 | { |
2559 | int nvar = lto_symtab_encoder_lookup (encoder, node: varentry->variant); |
2560 | gcc_assert (nvar != LCC_NOT_FOUND); |
2561 | streamer_write_hwi_stream (ob->main_stream, nvar); |
2562 | |
2563 | for (score_wide_int *w = &varentry->score; ; |
2564 | w = &varentry->score_in_declare_simd_clone) |
2565 | { |
2566 | unsigned len = w->get_len (); |
2567 | streamer_write_hwi_stream (ob->main_stream, len); |
2568 | const HOST_WIDE_INT *val = w->get_val (); |
2569 | for (unsigned j = 0; j < len; j++) |
2570 | streamer_write_hwi_stream (ob->main_stream, val[j]); |
2571 | if (w == &varentry->score_in_declare_simd_clone) |
2572 | break; |
2573 | } |
2574 | |
2575 | HOST_WIDE_INT cnt = -1; |
2576 | HOST_WIDE_INT i = varentry->matches ? 1 : 0; |
2577 | for (tree attr = DECL_ATTRIBUTES (entryp->base->decl); |
2578 | attr; attr = TREE_CHAIN (attr), i += 2) |
2579 | { |
2580 | attr = lookup_attribute (attr_name: "omp declare variant base" , list: attr); |
2581 | if (attr == NULL_TREE) |
2582 | break; |
2583 | |
2584 | if (varentry->ctx == TREE_VALUE (TREE_VALUE (attr))) |
2585 | { |
2586 | cnt = i; |
2587 | break; |
2588 | } |
2589 | } |
2590 | |
2591 | gcc_assert (cnt != -1); |
2592 | streamer_write_hwi_stream (ob->main_stream, cnt); |
2593 | } |
2594 | } |
2595 | |
2596 | void |
2597 | omp_lto_input_declare_variant_alt (lto_input_block *ib, cgraph_node *node, |
2598 | vec<symtab_node *> nodes) |
2599 | { |
2600 | gcc_assert (node->declare_variant_alt); |
2601 | omp_declare_variant_base_entry *entryp |
2602 | = ggc_cleared_alloc<omp_declare_variant_base_entry> (); |
2603 | entryp->base = dyn_cast<cgraph_node *> (p: nodes[streamer_read_hwi (ib)]); |
2604 | entryp->node = node; |
2605 | unsigned int len = streamer_read_hwi (ib); |
2606 | vec_alloc (v&: entryp->variants, nelems: len); |
2607 | |
2608 | for (unsigned int i = 0; i < len; i++) |
2609 | { |
2610 | omp_declare_variant_entry varentry; |
2611 | varentry.variant |
2612 | = dyn_cast<cgraph_node *> (p: nodes[streamer_read_hwi (ib)]); |
2613 | for (score_wide_int *w = &varentry.score; ; |
2614 | w = &varentry.score_in_declare_simd_clone) |
2615 | { |
2616 | unsigned len2 = streamer_read_hwi (ib); |
2617 | HOST_WIDE_INT arr[WIDE_INT_MAX_HWIS (1024)]; |
2618 | gcc_assert (len2 <= WIDE_INT_MAX_HWIS (1024)); |
2619 | for (unsigned int j = 0; j < len2; j++) |
2620 | arr[j] = streamer_read_hwi (ib); |
2621 | *w = score_wide_int::from_array (val: arr, len: len2, need_canon_p: true); |
2622 | if (w == &varentry.score_in_declare_simd_clone) |
2623 | break; |
2624 | } |
2625 | |
2626 | HOST_WIDE_INT cnt = streamer_read_hwi (ib); |
2627 | HOST_WIDE_INT j = 0; |
2628 | varentry.ctx = NULL_TREE; |
2629 | varentry.matches = (cnt & 1) ? true : false; |
2630 | cnt &= ~HOST_WIDE_INT_1; |
2631 | for (tree attr = DECL_ATTRIBUTES (entryp->base->decl); |
2632 | attr; attr = TREE_CHAIN (attr), j += 2) |
2633 | { |
2634 | attr = lookup_attribute (attr_name: "omp declare variant base" , list: attr); |
2635 | if (attr == NULL_TREE) |
2636 | break; |
2637 | |
2638 | if (cnt == j) |
2639 | { |
2640 | varentry.ctx = TREE_VALUE (TREE_VALUE (attr)); |
2641 | break; |
2642 | } |
2643 | } |
2644 | gcc_assert (varentry.ctx != NULL_TREE); |
2645 | entryp->variants->quick_push (obj: varentry); |
2646 | } |
2647 | if (omp_declare_variant_alt == NULL) |
2648 | omp_declare_variant_alt |
2649 | = hash_table<omp_declare_variant_alt_hasher>::create_ggc (n: 64); |
2650 | *omp_declare_variant_alt->find_slot_with_hash (comparable: entryp, DECL_UID (node->decl), |
2651 | insert: INSERT) = entryp; |
2652 | } |
2653 | |
2654 | /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK |
2655 | macro on gomp-constants.h. We do not check for overflow. */ |
2656 | |
2657 | tree |
2658 | oacc_launch_pack (unsigned code, tree device, unsigned op) |
2659 | { |
2660 | tree res; |
2661 | |
2662 | res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op)); |
2663 | if (device) |
2664 | { |
2665 | device = fold_build2 (LSHIFT_EXPR, unsigned_type_node, |
2666 | device, build_int_cst (unsigned_type_node, |
2667 | GOMP_LAUNCH_DEVICE_SHIFT)); |
2668 | res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device); |
2669 | } |
2670 | return res; |
2671 | } |
2672 | |
2673 | /* FIXME: What is the following comment for? */ |
2674 | /* Look for compute grid dimension clauses and convert to an attribute |
2675 | attached to FN. This permits the target-side code to (a) massage |
2676 | the dimensions, (b) emit that data and (c) optimize. Non-constant |
2677 | dimensions are pushed onto ARGS. |
2678 | |
2679 | The attribute value is a TREE_LIST. A set of dimensions is |
2680 | represented as a list of INTEGER_CST. Those that are runtime |
2681 | exprs are represented as an INTEGER_CST of zero. |
2682 | |
2683 | TODO: Normally the attribute will just contain a single such list. If |
2684 | however it contains a list of lists, this will represent the use of |
2685 | device_type. Each member of the outer list is an assoc list of |
2686 | dimensions, keyed by the device type. The first entry will be the |
2687 | default. Well, that's the plan. */ |
2688 | |
2689 | /* Replace any existing oacc fn attribute with updated dimensions. */ |
2690 | |
2691 | /* Variant working on a list of attributes. */ |
2692 | |
2693 | tree |
2694 | oacc_replace_fn_attrib_attr (tree attribs, tree dims) |
2695 | { |
2696 | tree ident = get_identifier (OACC_FN_ATTRIB); |
2697 | |
2698 | /* If we happen to be present as the first attrib, drop it. */ |
2699 | if (attribs && TREE_PURPOSE (attribs) == ident) |
2700 | attribs = TREE_CHAIN (attribs); |
2701 | return tree_cons (ident, dims, attribs); |
2702 | } |
2703 | |
2704 | /* Variant working on a function decl. */ |
2705 | |
2706 | void |
2707 | oacc_replace_fn_attrib (tree fn, tree dims) |
2708 | { |
2709 | DECL_ATTRIBUTES (fn) |
2710 | = oacc_replace_fn_attrib_attr (DECL_ATTRIBUTES (fn), dims); |
2711 | } |
2712 | |
2713 | /* Scan CLAUSES for launch dimensions and attach them to the oacc |
2714 | function attribute. Push any that are non-constant onto the ARGS |
2715 | list, along with an appropriate GOMP_LAUNCH_DIM tag. */ |
2716 | |
2717 | void |
2718 | oacc_set_fn_attrib (tree fn, tree clauses, vec<tree> *args) |
2719 | { |
2720 | /* Must match GOMP_DIM ordering. */ |
2721 | static const omp_clause_code ids[] |
2722 | = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, |
2723 | OMP_CLAUSE_VECTOR_LENGTH }; |
2724 | unsigned ix; |
2725 | tree dims[GOMP_DIM_MAX]; |
2726 | |
2727 | tree attr = NULL_TREE; |
2728 | unsigned non_const = 0; |
2729 | |
2730 | for (ix = GOMP_DIM_MAX; ix--;) |
2731 | { |
2732 | tree clause = omp_find_clause (clauses, kind: ids[ix]); |
2733 | tree dim = NULL_TREE; |
2734 | |
2735 | if (clause) |
2736 | dim = OMP_CLAUSE_EXPR (clause, ids[ix]); |
2737 | dims[ix] = dim; |
2738 | if (dim && TREE_CODE (dim) != INTEGER_CST) |
2739 | { |
2740 | dim = integer_zero_node; |
2741 | non_const |= GOMP_DIM_MASK (ix); |
2742 | } |
2743 | attr = tree_cons (NULL_TREE, dim, attr); |
2744 | } |
2745 | |
2746 | oacc_replace_fn_attrib (fn, dims: attr); |
2747 | |
2748 | if (non_const) |
2749 | { |
2750 | /* Push a dynamic argument set. */ |
2751 | args->safe_push (obj: oacc_launch_pack (GOMP_LAUNCH_DIM, |
2752 | NULL_TREE, op: non_const)); |
2753 | for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) |
2754 | if (non_const & GOMP_DIM_MASK (ix)) |
2755 | args->safe_push (obj: dims[ix]); |
2756 | } |
2757 | } |
2758 | |
2759 | /* Verify OpenACC routine clauses. |
2760 | |
2761 | Returns 0 if FNDECL should be marked with an OpenACC 'routine' directive, 1 |
2762 | if it has already been marked in compatible way, and -1 if incompatible. |
2763 | Upon returning, the chain of clauses will contain exactly one clause |
2764 | specifying the level of parallelism. */ |
2765 | |
2766 | int |
2767 | oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc, |
2768 | const char *routine_str) |
2769 | { |
2770 | tree c_level = NULL_TREE; |
2771 | tree c_nohost = NULL_TREE; |
2772 | tree c_p = NULL_TREE; |
2773 | for (tree c = *clauses; c; c_p = c, c = OMP_CLAUSE_CHAIN (c)) |
2774 | switch (OMP_CLAUSE_CODE (c)) |
2775 | { |
2776 | case OMP_CLAUSE_GANG: |
2777 | case OMP_CLAUSE_WORKER: |
2778 | case OMP_CLAUSE_VECTOR: |
2779 | case OMP_CLAUSE_SEQ: |
2780 | if (c_level == NULL_TREE) |
2781 | c_level = c; |
2782 | else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_CODE (c_level)) |
2783 | { |
2784 | /* This has already been diagnosed in the front ends. */ |
2785 | /* Drop the duplicate clause. */ |
2786 | gcc_checking_assert (c_p != NULL_TREE); |
2787 | OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c); |
2788 | c = c_p; |
2789 | } |
2790 | else |
2791 | { |
2792 | error_at (OMP_CLAUSE_LOCATION (c), |
2793 | "%qs specifies a conflicting level of parallelism" , |
2794 | omp_clause_code_name[OMP_CLAUSE_CODE (c)]); |
2795 | inform (OMP_CLAUSE_LOCATION (c_level), |
2796 | "... to the previous %qs clause here" , |
2797 | omp_clause_code_name[OMP_CLAUSE_CODE (c_level)]); |
2798 | /* Drop the conflicting clause. */ |
2799 | gcc_checking_assert (c_p != NULL_TREE); |
2800 | OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c); |
2801 | c = c_p; |
2802 | } |
2803 | break; |
2804 | case OMP_CLAUSE_NOHOST: |
2805 | /* Don't worry about duplicate clauses here. */ |
2806 | c_nohost = c; |
2807 | break; |
2808 | default: |
2809 | gcc_unreachable (); |
2810 | } |
2811 | if (c_level == NULL_TREE) |
2812 | { |
2813 | /* Default to an implicit 'seq' clause. */ |
2814 | c_level = build_omp_clause (loc, OMP_CLAUSE_SEQ); |
2815 | OMP_CLAUSE_CHAIN (c_level) = *clauses; |
2816 | *clauses = c_level; |
2817 | } |
2818 | /* In *clauses, we now have exactly one clause specifying the level of |
2819 | parallelism. */ |
2820 | |
2821 | tree attr |
2822 | = lookup_attribute (attr_name: "omp declare target" , DECL_ATTRIBUTES (fndecl)); |
2823 | if (attr != NULL_TREE) |
2824 | { |
2825 | /* Diagnose if "#pragma omp declare target" has also been applied. */ |
2826 | if (TREE_VALUE (attr) == NULL_TREE) |
2827 | { |
2828 | /* See <https://gcc.gnu.org/PR93465>; the semantics of combining |
2829 | OpenACC and OpenMP 'target' are not clear. */ |
2830 | error_at (loc, |
2831 | "cannot apply %<%s%> to %qD, which has also been" |
2832 | " marked with an OpenMP 'declare target' directive" , |
2833 | routine_str, fndecl); |
2834 | /* Incompatible. */ |
2835 | return -1; |
2836 | } |
2837 | |
2838 | /* If a "#pragma acc routine" has already been applied, just verify |
2839 | this one for compatibility. */ |
2840 | /* Collect previous directive's clauses. */ |
2841 | tree c_level_p = NULL_TREE; |
2842 | tree c_nohost_p = NULL_TREE; |
2843 | for (tree c = TREE_VALUE (attr); c; c = OMP_CLAUSE_CHAIN (c)) |
2844 | switch (OMP_CLAUSE_CODE (c)) |
2845 | { |
2846 | case OMP_CLAUSE_GANG: |
2847 | case OMP_CLAUSE_WORKER: |
2848 | case OMP_CLAUSE_VECTOR: |
2849 | case OMP_CLAUSE_SEQ: |
2850 | gcc_checking_assert (c_level_p == NULL_TREE); |
2851 | c_level_p = c; |
2852 | break; |
2853 | case OMP_CLAUSE_NOHOST: |
2854 | gcc_checking_assert (c_nohost_p == NULL_TREE); |
2855 | c_nohost_p = c; |
2856 | break; |
2857 | default: |
2858 | gcc_unreachable (); |
2859 | } |
2860 | gcc_checking_assert (c_level_p != NULL_TREE); |
2861 | /* ..., and compare to current directive's, which we've already collected |
2862 | above. */ |
2863 | tree c_diag; |
2864 | tree c_diag_p; |
2865 | /* Matching level of parallelism? */ |
2866 | if (OMP_CLAUSE_CODE (c_level) != OMP_CLAUSE_CODE (c_level_p)) |
2867 | { |
2868 | c_diag = c_level; |
2869 | c_diag_p = c_level_p; |
2870 | goto incompatible; |
2871 | } |
2872 | /* Matching 'nohost' clauses? */ |
2873 | if ((c_nohost == NULL_TREE) != (c_nohost_p == NULL_TREE)) |
2874 | { |
2875 | c_diag = c_nohost; |
2876 | c_diag_p = c_nohost_p; |
2877 | goto incompatible; |
2878 | } |
2879 | /* Compatible. */ |
2880 | return 1; |
2881 | |
2882 | incompatible: |
2883 | if (c_diag != NULL_TREE) |
2884 | error_at (OMP_CLAUSE_LOCATION (c_diag), |
2885 | "incompatible %qs clause when applying" |
2886 | " %<%s%> to %qD, which has already been" |
2887 | " marked with an OpenACC 'routine' directive" , |
2888 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)], |
2889 | routine_str, fndecl); |
2890 | else if (c_diag_p != NULL_TREE) |
2891 | error_at (loc, |
2892 | "missing %qs clause when applying" |
2893 | " %<%s%> to %qD, which has already been" |
2894 | " marked with an OpenACC 'routine' directive" , |
2895 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)], |
2896 | routine_str, fndecl); |
2897 | else |
2898 | gcc_unreachable (); |
2899 | if (c_diag_p != NULL_TREE) |
2900 | inform (OMP_CLAUSE_LOCATION (c_diag_p), |
2901 | "... with %qs clause here" , |
2902 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)]); |
2903 | else |
2904 | { |
2905 | /* In the front ends, we don't preserve location information for the |
2906 | OpenACC routine directive itself. However, that of c_level_p |
2907 | should be close. */ |
2908 | location_t loc_routine = OMP_CLAUSE_LOCATION (c_level_p); |
2909 | inform (loc_routine, "... without %qs clause near to here" , |
2910 | omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)]); |
2911 | } |
2912 | /* Incompatible. */ |
2913 | return -1; |
2914 | } |
2915 | |
2916 | return 0; |
2917 | } |
2918 | |
2919 | /* Process the OpenACC 'routine' directive clauses to generate an attribute |
2920 | for the level of parallelism. All dimensions have a size of zero |
2921 | (dynamic). TREE_PURPOSE is set to indicate whether that dimension |
2922 | can have a loop partitioned on it. non-zero indicates |
2923 | yes, zero indicates no. By construction once a non-zero has been |
2924 | reached, further inner dimensions must also be non-zero. We set |
2925 | TREE_VALUE to zero for the dimensions that may be partitioned and |
2926 | 1 for the other ones -- if a loop is (erroneously) spawned at |
2927 | an outer level, we don't want to try and partition it. */ |
2928 | |
2929 | tree |
2930 | oacc_build_routine_dims (tree clauses) |
2931 | { |
2932 | /* Must match GOMP_DIM ordering. */ |
2933 | static const omp_clause_code ids[] |
2934 | = {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ}; |
2935 | int ix; |
2936 | int level = -1; |
2937 | |
2938 | for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses)) |
2939 | for (ix = GOMP_DIM_MAX + 1; ix--;) |
2940 | if (OMP_CLAUSE_CODE (clauses) == ids[ix]) |
2941 | { |
2942 | level = ix; |
2943 | break; |
2944 | } |
2945 | gcc_checking_assert (level >= 0); |
2946 | |
2947 | tree dims = NULL_TREE; |
2948 | |
2949 | for (ix = GOMP_DIM_MAX; ix--;) |
2950 | dims = tree_cons (build_int_cst (boolean_type_node, ix >= level), |
2951 | build_int_cst (integer_type_node, ix < level), dims); |
2952 | |
2953 | return dims; |
2954 | } |
2955 | |
2956 | /* Retrieve the oacc function attrib and return it. Non-oacc |
2957 | functions will return NULL. */ |
2958 | |
2959 | tree |
2960 | oacc_get_fn_attrib (tree fn) |
2961 | { |
2962 | return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn)); |
2963 | } |
2964 | |
2965 | /* Return true if FN is an OpenMP or OpenACC offloading function. */ |
2966 | |
2967 | bool |
2968 | offloading_function_p (tree fn) |
2969 | { |
2970 | tree attrs = DECL_ATTRIBUTES (fn); |
2971 | return (lookup_attribute (attr_name: "omp declare target" , list: attrs) |
2972 | || lookup_attribute (attr_name: "omp target entrypoint" , list: attrs)); |
2973 | } |
2974 | |
2975 | /* Extract an oacc execution dimension from FN. FN must be an |
2976 | offloaded function or routine that has already had its execution |
2977 | dimensions lowered to the target-specific values. */ |
2978 | |
2979 | int |
2980 | oacc_get_fn_dim_size (tree fn, int axis) |
2981 | { |
2982 | tree attrs = oacc_get_fn_attrib (fn); |
2983 | |
2984 | gcc_assert (axis < GOMP_DIM_MAX); |
2985 | |
2986 | tree dims = TREE_VALUE (attrs); |
2987 | while (axis--) |
2988 | dims = TREE_CHAIN (dims); |
2989 | |
2990 | int size = TREE_INT_CST_LOW (TREE_VALUE (dims)); |
2991 | |
2992 | return size; |
2993 | } |
2994 | |
2995 | /* Extract the dimension axis from an IFN_GOACC_DIM_POS or |
2996 | IFN_GOACC_DIM_SIZE call. */ |
2997 | |
2998 | int |
2999 | oacc_get_ifn_dim_arg (const gimple *stmt) |
3000 | { |
3001 | gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE |
3002 | || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS); |
3003 | tree arg = gimple_call_arg (gs: stmt, index: 0); |
3004 | HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg); |
3005 | |
3006 | gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX); |
3007 | return (int) axis; |
3008 | } |
3009 | |
3010 | /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it |
3011 | as appropriate. */ |
3012 | |
3013 | tree |
3014 | omp_build_component_ref (tree obj, tree field) |
3015 | { |
3016 | tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL); |
3017 | if (TREE_THIS_VOLATILE (field)) |
3018 | TREE_THIS_VOLATILE (ret) |= 1; |
3019 | if (TREE_READONLY (field)) |
3020 | TREE_READONLY (ret) |= 1; |
3021 | return ret; |
3022 | } |
3023 | |
3024 | /* Return true if NAME is the name of an omp_* runtime API call. */ |
3025 | bool |
3026 | omp_runtime_api_procname (const char *name) |
3027 | { |
3028 | if (!startswith (str: name, prefix: "omp_" )) |
3029 | return false; |
3030 | |
3031 | static const char *omp_runtime_apis[] = |
3032 | { |
3033 | /* This array has 3 sections. First omp_* calls that don't |
3034 | have any suffixes. */ |
3035 | "aligned_alloc" , |
3036 | "aligned_calloc" , |
3037 | "alloc" , |
3038 | "calloc" , |
3039 | "free" , |
3040 | "get_mapped_ptr" , |
3041 | "realloc" , |
3042 | "target_alloc" , |
3043 | "target_associate_ptr" , |
3044 | "target_disassociate_ptr" , |
3045 | "target_free" , |
3046 | "target_is_accessible" , |
3047 | "target_is_present" , |
3048 | "target_memcpy" , |
3049 | "target_memcpy_async" , |
3050 | "target_memcpy_rect" , |
3051 | "target_memcpy_rect_async" , |
3052 | NULL, |
3053 | /* Now omp_* calls that are available as omp_* and omp_*_; however, the |
3054 | DECL_NAME is always omp_* without tailing underscore. */ |
3055 | "capture_affinity" , |
3056 | "destroy_allocator" , |
3057 | "destroy_lock" , |
3058 | "destroy_nest_lock" , |
3059 | "display_affinity" , |
3060 | "fulfill_event" , |
3061 | "get_active_level" , |
3062 | "get_affinity_format" , |
3063 | "get_cancellation" , |
3064 | "get_default_allocator" , |
3065 | "get_default_device" , |
3066 | "get_device_num" , |
3067 | "get_dynamic" , |
3068 | "get_initial_device" , |
3069 | "get_level" , |
3070 | "get_max_active_levels" , |
3071 | "get_max_task_priority" , |
3072 | "get_max_teams" , |
3073 | "get_max_threads" , |
3074 | "get_nested" , |
3075 | "get_num_devices" , |
3076 | "get_num_places" , |
3077 | "get_num_procs" , |
3078 | "get_num_teams" , |
3079 | "get_num_threads" , |
3080 | "get_partition_num_places" , |
3081 | "get_place_num" , |
3082 | "get_proc_bind" , |
3083 | "get_supported_active_levels" , |
3084 | "get_team_num" , |
3085 | "get_teams_thread_limit" , |
3086 | "get_thread_limit" , |
3087 | "get_thread_num" , |
3088 | "get_wtick" , |
3089 | "get_wtime" , |
3090 | "in_explicit_task" , |
3091 | "in_final" , |
3092 | "in_parallel" , |
3093 | "init_lock" , |
3094 | "init_nest_lock" , |
3095 | "is_initial_device" , |
3096 | "pause_resource" , |
3097 | "pause_resource_all" , |
3098 | "set_affinity_format" , |
3099 | "set_default_allocator" , |
3100 | "set_lock" , |
3101 | "set_nest_lock" , |
3102 | "test_lock" , |
3103 | "test_nest_lock" , |
3104 | "unset_lock" , |
3105 | "unset_nest_lock" , |
3106 | NULL, |
3107 | /* And finally calls available as omp_*, omp_*_ and omp_*_8_; however, |
3108 | as DECL_NAME only omp_* and omp_*_8 appear. */ |
3109 | "display_env" , |
3110 | "get_ancestor_thread_num" , |
3111 | "init_allocator" , |
3112 | "get_partition_place_nums" , |
3113 | "get_place_num_procs" , |
3114 | "get_place_proc_ids" , |
3115 | "get_schedule" , |
3116 | "get_team_size" , |
3117 | "set_default_device" , |
3118 | "set_dynamic" , |
3119 | "set_max_active_levels" , |
3120 | "set_nested" , |
3121 | "set_num_teams" , |
3122 | "set_num_threads" , |
3123 | "set_schedule" , |
3124 | "set_teams_thread_limit" |
3125 | }; |
3126 | |
3127 | int mode = 0; |
3128 | for (unsigned i = 0; i < ARRAY_SIZE (omp_runtime_apis); i++) |
3129 | { |
3130 | if (omp_runtime_apis[i] == NULL) |
3131 | { |
3132 | mode++; |
3133 | continue; |
3134 | } |
3135 | size_t len = strlen (s: omp_runtime_apis[i]); |
3136 | if (strncmp (s1: name + 4, s2: omp_runtime_apis[i], n: len) == 0 |
3137 | && (name[4 + len] == '\0' |
3138 | || (mode > 1 && strcmp (s1: name + 4 + len, s2: "_8" ) == 0))) |
3139 | return true; |
3140 | } |
3141 | return false; |
3142 | } |
3143 | |
3144 | /* Return true if FNDECL is an omp_* runtime API call. */ |
3145 | |
3146 | bool |
3147 | omp_runtime_api_call (const_tree fndecl) |
3148 | { |
3149 | tree declname = DECL_NAME (fndecl); |
3150 | if (!declname |
3151 | || (DECL_CONTEXT (fndecl) != NULL_TREE |
3152 | && TREE_CODE (DECL_CONTEXT (fndecl)) != TRANSLATION_UNIT_DECL) |
3153 | || !TREE_PUBLIC (fndecl)) |
3154 | return false; |
3155 | return omp_runtime_api_procname (IDENTIFIER_POINTER (declname)); |
3156 | } |
3157 | |
3158 | #include "gt-omp-general.h" |
3159 | |