1 | /* OMP constructs' SIMD clone supporting code. |
2 | |
3 | Copyright (C) 2005-2023 Free Software Foundation, Inc. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #include "system.h" |
23 | #include "coretypes.h" |
24 | #include "backend.h" |
25 | #include "target.h" |
26 | #include "tree.h" |
27 | #include "gimple.h" |
28 | #include "cfghooks.h" |
29 | #include "alloc-pool.h" |
30 | #include "tree-pass.h" |
31 | #include "ssa.h" |
32 | #include "cgraph.h" |
33 | #include "pretty-print.h" |
34 | #include "diagnostic-core.h" |
35 | #include "fold-const.h" |
36 | #include "stor-layout.h" |
37 | #include "cfganal.h" |
38 | #include "gimplify.h" |
39 | #include "gimple-iterator.h" |
40 | #include "gimplify-me.h" |
41 | #include "gimple-walk.h" |
42 | #include "langhooks.h" |
43 | #include "tree-cfg.h" |
44 | #include "tree-into-ssa.h" |
45 | #include "tree-dfa.h" |
46 | #include "cfgloop.h" |
47 | #include "symbol-summary.h" |
48 | #include "ipa-param-manipulation.h" |
49 | #include "tree-eh.h" |
50 | #include "varasm.h" |
51 | #include "stringpool.h" |
52 | #include "attribs.h" |
53 | #include "omp-simd-clone.h" |
54 | #include "omp-low.h" |
55 | #include "omp-general.h" |
56 | |
57 | /* Print debug info for ok_for_auto_simd_clone to the dump file, logging |
58 | failure reason EXCUSE for function DECL. Always returns false. */ |
59 | static bool |
60 | auto_simd_fail (tree decl, const char *excuse) |
61 | { |
62 | if (dump_file && (dump_flags & TDF_DETAILS)) |
63 | fprintf (stream: dump_file, format: "\nNot auto-cloning %s because %s\n" , |
64 | IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), |
65 | excuse); |
66 | return false; |
67 | } |
68 | |
69 | /* Helper function for ok_for_auto_simd_clone; return false if the statement |
70 | violates restrictions for an "omp declare simd" function. Specifically, |
71 | the function must not |
72 | - throw or call setjmp/longjmp |
73 | - write memory that could alias parallel calls |
74 | - read volatile memory |
75 | - include openmp directives or calls |
76 | - call functions that might do those things */ |
77 | |
78 | static bool |
79 | auto_simd_check_stmt (gimple *stmt, tree outer) |
80 | { |
81 | tree decl; |
82 | |
83 | switch (gimple_code (g: stmt)) |
84 | { |
85 | case GIMPLE_CALL: |
86 | |
87 | /* Calls to functions that are CONST or PURE are ok, even if they |
88 | are internal functions without a decl. Reject other internal |
89 | functions. */ |
90 | if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)) |
91 | break; |
92 | if (gimple_call_internal_p (gs: stmt)) |
93 | return auto_simd_fail (decl: outer, |
94 | excuse: "body contains internal function call" ); |
95 | |
96 | decl = gimple_call_fndecl (gs: stmt); |
97 | |
98 | /* We can't know whether indirect calls are safe. */ |
99 | if (decl == NULL_TREE) |
100 | return auto_simd_fail (decl: outer, excuse: "body contains indirect call" ); |
101 | |
102 | /* Calls to functions that are already marked "omp declare simd" are |
103 | OK. */ |
104 | if (lookup_attribute (attr_name: "omp declare simd" , DECL_ATTRIBUTES (decl))) |
105 | break; |
106 | |
107 | /* Let recursive calls to the current function through. */ |
108 | if (decl == outer) |
109 | break; |
110 | |
111 | /* Other function calls are not permitted. This covers all calls to |
112 | the libgomp API and setjmp/longjmp, too, as well as things like |
113 | __cxa_throw_ related to exception handling. */ |
114 | return auto_simd_fail (decl: outer, excuse: "body contains unsafe function call" ); |
115 | |
116 | /* Reject EH-related constructs. Most of the EH gimple codes are |
117 | already lowered by the time this pass runs during IPA. |
118 | GIMPLE_EH_DISPATCH and GIMPLE_RESX remain and are lowered by |
119 | pass_lower_eh_dispatch and pass_lower_resx, respectively; those |
120 | passes run later. */ |
121 | case GIMPLE_EH_DISPATCH: |
122 | case GIMPLE_RESX: |
123 | return auto_simd_fail (decl: outer, excuse: "body contains EH constructs" ); |
124 | |
125 | /* Asms are not permitted since we don't know what they do. */ |
126 | case GIMPLE_ASM: |
127 | return auto_simd_fail (decl: outer, excuse: "body contains inline asm" ); |
128 | |
129 | default: |
130 | break; |
131 | } |
132 | |
133 | /* Memory writes are not permitted. |
134 | FIXME: this could be relaxed a little to permit writes to |
135 | function-local variables that could not alias other instances |
136 | of the function running in parallel. */ |
137 | if (gimple_store_p (gs: stmt)) |
138 | return auto_simd_fail (decl: outer, excuse: "body includes memory write" ); |
139 | |
140 | /* Volatile reads are not permitted. */ |
141 | if (gimple_has_volatile_ops (stmt)) |
142 | return auto_simd_fail (decl: outer, excuse: "body includes volatile op" ); |
143 | |
144 | /* Otherwise OK. */ |
145 | return true; |
146 | } |
147 | |
148 | /* Helper function for ok_for_auto_simd_clone: return true if type T is |
149 | plausible for a cloneable function argument or return type. */ |
150 | static bool |
151 | plausible_type_for_simd_clone (tree t) |
152 | { |
153 | if (VOID_TYPE_P (t)) |
154 | return true; |
155 | else if (RECORD_OR_UNION_TYPE_P (t) || !is_a <scalar_mode> (TYPE_MODE (t))) |
156 | /* Small record/union types may fit into a scalar mode, but are |
157 | still not suitable. */ |
158 | return false; |
159 | else if (TYPE_ATOMIC (t)) |
160 | /* Atomic types trigger warnings in simd_clone_clauses_extract. */ |
161 | return false; |
162 | else |
163 | return true; |
164 | } |
165 | |
166 | /* Check if the function NODE appears suitable for auto-annotation |
167 | with "declare simd". */ |
168 | |
169 | static bool |
170 | ok_for_auto_simd_clone (struct cgraph_node *node) |
171 | { |
172 | tree decl = node->decl; |
173 | tree t; |
174 | basic_block bb; |
175 | |
176 | /* Nothing to do if the function isn't a definition or doesn't |
177 | have a body. */ |
178 | if (!node->definition || !node->has_gimple_body_p ()) |
179 | return auto_simd_fail (decl, excuse: "no definition or body" ); |
180 | |
181 | /* No point in trying to generate implicit clones if the function |
182 | isn't used in the compilation unit. */ |
183 | if (!node->callers) |
184 | return auto_simd_fail (decl, excuse: "function is not used" ); |
185 | |
186 | /* Nothing to do if the function already has the "omp declare simd" |
187 | attribute, is marked noclone, or is not "omp declare target". */ |
188 | if (lookup_attribute (attr_name: "omp declare simd" , DECL_ATTRIBUTES (decl)) |
189 | || lookup_attribute (attr_name: "noclone" , DECL_ATTRIBUTES (decl)) |
190 | || !lookup_attribute (attr_name: "omp declare target" , DECL_ATTRIBUTES (decl))) |
191 | return auto_simd_fail (decl, excuse: "incompatible attributes" ); |
192 | |
193 | /* Check whether the function is restricted host/nohost via the |
194 | "omp declare target device_type" clause, and that doesn't match |
195 | what we're compiling for. Internally, these translate into |
196 | "omp declare target [no]host" attributes on the decl; "any" |
197 | translates into both attributes, but the default (which is supposed |
198 | to be equivalent to "any") is neither. */ |
199 | tree host = lookup_attribute (attr_name: "omp declare target host" , |
200 | DECL_ATTRIBUTES (decl)); |
201 | tree nohost = lookup_attribute (attr_name: "omp declare target nohost" , |
202 | DECL_ATTRIBUTES (decl)); |
203 | #ifdef ACCEL_COMPILER |
204 | if (host && !nohost) |
205 | return auto_simd_fail (decl, "device doesn't match for accel compiler" ); |
206 | #else |
207 | if (nohost && !host) |
208 | return auto_simd_fail (decl, excuse: "device doesn't match for host compiler" ); |
209 | #endif |
210 | |
211 | /* Backends will check for vectorizable arguments/return types in a |
212 | target-specific way, but we can immediately filter out functions |
213 | that have implausible argument/return types. */ |
214 | t = TREE_TYPE (TREE_TYPE (decl)); |
215 | if (!plausible_type_for_simd_clone (t)) |
216 | return auto_simd_fail (decl, excuse: "return type fails sniff test" ); |
217 | |
218 | if (TYPE_ARG_TYPES (TREE_TYPE (decl))) |
219 | { |
220 | for (tree temp = TYPE_ARG_TYPES (TREE_TYPE (decl)); |
221 | temp; temp = TREE_CHAIN (temp)) |
222 | { |
223 | t = TREE_VALUE (temp); |
224 | if (!plausible_type_for_simd_clone (t)) |
225 | return auto_simd_fail (decl, excuse: "argument type fails sniff test" ); |
226 | } |
227 | } |
228 | else if (DECL_ARGUMENTS (decl)) |
229 | { |
230 | for (tree temp = DECL_ARGUMENTS (decl); temp; temp = DECL_CHAIN (temp)) |
231 | { |
232 | t = TREE_TYPE (temp); |
233 | if (!plausible_type_for_simd_clone (t)) |
234 | return auto_simd_fail (decl, excuse: "argument type fails sniff test" ); |
235 | } |
236 | } |
237 | else |
238 | return auto_simd_fail (decl, excuse: "function has no arguments" ); |
239 | |
240 | /* Scan the function body to see if it is suitable for SIMD-ization. */ |
241 | node->get_body (); |
242 | |
243 | FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl)) |
244 | { |
245 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); |
246 | gsi_next (i: &gsi)) |
247 | if (!auto_simd_check_stmt (stmt: gsi_stmt (i: gsi), outer: decl)) |
248 | return false; |
249 | } |
250 | |
251 | /* All is good. */ |
252 | if (dump_file) |
253 | fprintf (stream: dump_file, format: "\nMarking %s for auto-cloning\n" , |
254 | IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); |
255 | return true; |
256 | } |
257 | |
258 | /* Allocate a fresh `simd_clone' and return it. NARGS is the number |
259 | of arguments to reserve space for. */ |
260 | |
261 | static struct cgraph_simd_clone * |
262 | simd_clone_struct_alloc (int nargs) |
263 | { |
264 | struct cgraph_simd_clone *clone_info; |
265 | size_t len = (sizeof (struct cgraph_simd_clone) |
266 | + nargs * sizeof (struct cgraph_simd_clone_arg)); |
267 | clone_info = (struct cgraph_simd_clone *) |
268 | ggc_internal_cleared_alloc (s: len); |
269 | return clone_info; |
270 | } |
271 | |
272 | /* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */ |
273 | |
274 | static inline void |
275 | simd_clone_struct_copy (struct cgraph_simd_clone *to, |
276 | struct cgraph_simd_clone *from) |
277 | { |
278 | memcpy (dest: to, src: from, n: (sizeof (struct cgraph_simd_clone) |
279 | + ((from->nargs - from->inbranch) |
280 | * sizeof (struct cgraph_simd_clone_arg)))); |
281 | } |
282 | |
283 | /* Fill an empty vector ARGS with parameter types of function FNDECL. This |
284 | uses TYPE_ARG_TYPES if available, otherwise falls back to types of |
285 | DECL_ARGUMENTS types. */ |
286 | |
287 | static void |
288 | simd_clone_vector_of_formal_parm_types (vec<tree> *args, tree fndecl) |
289 | { |
290 | if (TYPE_ARG_TYPES (TREE_TYPE (fndecl))) |
291 | { |
292 | push_function_arg_types (types: args, TREE_TYPE (fndecl)); |
293 | return; |
294 | } |
295 | push_function_arg_decls (args, fndecl); |
296 | unsigned int i; |
297 | tree arg; |
298 | FOR_EACH_VEC_ELT (*args, i, arg) |
299 | (*args)[i] = TREE_TYPE ((*args)[i]); |
300 | } |
301 | |
302 | /* Given a simd function in NODE, extract the simd specific |
303 | information from the OMP clauses passed in CLAUSES, and return |
304 | the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED |
305 | is set to TRUE if the `inbranch' or `notinbranch' clause specified, |
306 | otherwise set to FALSE. */ |
307 | |
308 | static struct cgraph_simd_clone * |
309 | (struct cgraph_node *node, tree clauses, |
310 | bool *inbranch_specified) |
311 | { |
312 | auto_vec<tree> args; |
313 | simd_clone_vector_of_formal_parm_types (args: &args, fndecl: node->decl); |
314 | tree t; |
315 | int n; |
316 | *inbranch_specified = false; |
317 | |
318 | n = args.length (); |
319 | if (n > 0 && args.last () == void_type_node) |
320 | n--; |
321 | |
322 | /* Allocate one more than needed just in case this is an in-branch |
323 | clone which will require a mask argument. */ |
324 | struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (nargs: n + 1); |
325 | clone_info->nargs = n; |
326 | |
327 | if (!clauses) |
328 | goto out; |
329 | |
330 | clauses = TREE_VALUE (clauses); |
331 | if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE) |
332 | goto out; |
333 | |
334 | for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t)) |
335 | { |
336 | switch (OMP_CLAUSE_CODE (t)) |
337 | { |
338 | case OMP_CLAUSE_INBRANCH: |
339 | clone_info->inbranch = 1; |
340 | *inbranch_specified = true; |
341 | break; |
342 | case OMP_CLAUSE_NOTINBRANCH: |
343 | clone_info->inbranch = 0; |
344 | *inbranch_specified = true; |
345 | break; |
346 | case OMP_CLAUSE_SIMDLEN: |
347 | clone_info->simdlen |
348 | = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t)); |
349 | break; |
350 | case OMP_CLAUSE_LINEAR: |
351 | { |
352 | tree decl = OMP_CLAUSE_DECL (t); |
353 | tree step = OMP_CLAUSE_LINEAR_STEP (t); |
354 | int argno = TREE_INT_CST_LOW (decl); |
355 | if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t)) |
356 | { |
357 | enum cgraph_simd_clone_arg_type arg_type; |
358 | if (TREE_CODE (args[argno]) == REFERENCE_TYPE) |
359 | switch (OMP_CLAUSE_LINEAR_KIND (t)) |
360 | { |
361 | case OMP_CLAUSE_LINEAR_REF: |
362 | arg_type |
363 | = SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP; |
364 | break; |
365 | case OMP_CLAUSE_LINEAR_UVAL: |
366 | arg_type |
367 | = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP; |
368 | break; |
369 | case OMP_CLAUSE_LINEAR_VAL: |
370 | case OMP_CLAUSE_LINEAR_DEFAULT: |
371 | arg_type |
372 | = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP; |
373 | break; |
374 | default: |
375 | gcc_unreachable (); |
376 | } |
377 | else |
378 | arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP; |
379 | clone_info->args[argno].arg_type = arg_type; |
380 | clone_info->args[argno].linear_step = tree_to_shwi (step); |
381 | gcc_assert (clone_info->args[argno].linear_step >= 0 |
382 | && clone_info->args[argno].linear_step < n); |
383 | } |
384 | else |
385 | { |
386 | if (POINTER_TYPE_P (args[argno])) |
387 | step = fold_convert (ssizetype, step); |
388 | if (!tree_fits_shwi_p (step)) |
389 | { |
390 | warning_at (OMP_CLAUSE_LOCATION (t), 0, |
391 | "ignoring large linear step" ); |
392 | return NULL; |
393 | } |
394 | else if (integer_zerop (step)) |
395 | { |
396 | warning_at (OMP_CLAUSE_LOCATION (t), 0, |
397 | "ignoring zero linear step" ); |
398 | return NULL; |
399 | } |
400 | else |
401 | { |
402 | enum cgraph_simd_clone_arg_type arg_type; |
403 | if (TREE_CODE (args[argno]) == REFERENCE_TYPE) |
404 | switch (OMP_CLAUSE_LINEAR_KIND (t)) |
405 | { |
406 | case OMP_CLAUSE_LINEAR_REF: |
407 | arg_type |
408 | = SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP; |
409 | break; |
410 | case OMP_CLAUSE_LINEAR_UVAL: |
411 | arg_type |
412 | = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP; |
413 | break; |
414 | case OMP_CLAUSE_LINEAR_VAL: |
415 | case OMP_CLAUSE_LINEAR_DEFAULT: |
416 | arg_type |
417 | = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP; |
418 | break; |
419 | default: |
420 | gcc_unreachable (); |
421 | } |
422 | else |
423 | arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP; |
424 | clone_info->args[argno].arg_type = arg_type; |
425 | clone_info->args[argno].linear_step = tree_to_shwi (step); |
426 | } |
427 | } |
428 | break; |
429 | } |
430 | case OMP_CLAUSE_UNIFORM: |
431 | { |
432 | tree decl = OMP_CLAUSE_DECL (t); |
433 | int argno = tree_to_uhwi (decl); |
434 | clone_info->args[argno].arg_type |
435 | = SIMD_CLONE_ARG_TYPE_UNIFORM; |
436 | break; |
437 | } |
438 | case OMP_CLAUSE_ALIGNED: |
439 | { |
440 | /* Ignore aligned (x) for declare simd, for the ABI we really |
441 | need an alignment specified. */ |
442 | if (OMP_CLAUSE_ALIGNED_ALIGNMENT (t) == NULL_TREE) |
443 | break; |
444 | tree decl = OMP_CLAUSE_DECL (t); |
445 | int argno = tree_to_uhwi (decl); |
446 | clone_info->args[argno].alignment |
447 | = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t)); |
448 | break; |
449 | } |
450 | default: |
451 | break; |
452 | } |
453 | } |
454 | |
455 | out: |
456 | if (TYPE_ATOMIC (TREE_TYPE (TREE_TYPE (node->decl)))) |
457 | { |
458 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
459 | "ignoring %<#pragma omp declare simd%> on function " |
460 | "with %<_Atomic%> qualified return type" ); |
461 | return NULL; |
462 | } |
463 | |
464 | for (unsigned int argno = 0; argno < clone_info->nargs; argno++) |
465 | if (TYPE_ATOMIC (args[argno]) |
466 | && clone_info->args[argno].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM) |
467 | { |
468 | warning_at (DECL_SOURCE_LOCATION (node->decl), 0, |
469 | "ignoring %<#pragma omp declare simd%> on function " |
470 | "with %<_Atomic%> qualified non-%<uniform%> argument" ); |
471 | args.release (); |
472 | return NULL; |
473 | } |
474 | |
475 | return clone_info; |
476 | } |
477 | |
478 | /* Given a SIMD clone in NODE, calculate the characteristic data |
479 | type and return the coresponding type. The characteristic data |
480 | type is computed as described in the Intel Vector ABI. */ |
481 | |
482 | static tree |
483 | simd_clone_compute_base_data_type (struct cgraph_node *node, |
484 | struct cgraph_simd_clone *clone_info) |
485 | { |
486 | tree type = integer_type_node; |
487 | tree fndecl = node->decl; |
488 | |
489 | /* a) For non-void function, the characteristic data type is the |
490 | return type. */ |
491 | if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE) |
492 | type = TREE_TYPE (TREE_TYPE (fndecl)); |
493 | |
494 | /* b) If the function has any non-uniform, non-linear parameters, |
495 | then the characteristic data type is the type of the first |
496 | such parameter. */ |
497 | else |
498 | { |
499 | auto_vec<tree> map; |
500 | simd_clone_vector_of_formal_parm_types (args: &map, fndecl); |
501 | for (unsigned int i = 0; i < clone_info->nargs; ++i) |
502 | if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) |
503 | { |
504 | type = map[i]; |
505 | break; |
506 | } |
507 | } |
508 | |
509 | /* c) If the characteristic data type determined by a) or b) above |
510 | is struct, union, or class type which is pass-by-value (except |
511 | for the type that maps to the built-in complex data type), the |
512 | characteristic data type is int. */ |
513 | if (RECORD_OR_UNION_TYPE_P (type) |
514 | && !aggregate_value_p (type, NULL) |
515 | && TREE_CODE (type) != COMPLEX_TYPE) |
516 | return integer_type_node; |
517 | |
518 | /* d) If none of the above three classes is applicable, the |
519 | characteristic data type is int. */ |
520 | |
521 | return type; |
522 | |
523 | /* e) For Intel Xeon Phi native and offload compilation, if the |
524 | resulting characteristic data type is 8-bit or 16-bit integer |
525 | data type, the characteristic data type is int. */ |
526 | /* Well, we don't handle Xeon Phi yet. */ |
527 | } |
528 | |
529 | static tree |
530 | simd_clone_mangle (struct cgraph_node *node, |
531 | struct cgraph_simd_clone *clone_info) |
532 | { |
533 | char vecsize_mangle = clone_info->vecsize_mangle; |
534 | char mask = clone_info->inbranch ? 'M' : 'N'; |
535 | poly_uint64 simdlen = clone_info->simdlen; |
536 | unsigned int n; |
537 | pretty_printer pp; |
538 | |
539 | gcc_assert (vecsize_mangle && maybe_ne (simdlen, 0U)); |
540 | |
541 | pp_string (&pp, "_ZGV" ); |
542 | pp_character (&pp, vecsize_mangle); |
543 | pp_character (&pp, mask); |
544 | /* For now, simdlen is always constant, while variable simdlen pp 'n'. */ |
545 | unsigned int len = simdlen.to_constant (); |
546 | pp_decimal_int (&pp, (len)); |
547 | |
548 | for (n = 0; n < clone_info->nargs; ++n) |
549 | { |
550 | struct cgraph_simd_clone_arg arg = clone_info->args[n]; |
551 | |
552 | switch (arg.arg_type) |
553 | { |
554 | case SIMD_CLONE_ARG_TYPE_UNIFORM: |
555 | pp_character (&pp, 'u'); |
556 | break; |
557 | case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: |
558 | pp_character (&pp, 'l'); |
559 | goto mangle_linear; |
560 | case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: |
561 | pp_character (&pp, 'R'); |
562 | goto mangle_linear; |
563 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: |
564 | pp_character (&pp, 'L'); |
565 | goto mangle_linear; |
566 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: |
567 | pp_character (&pp, 'U'); |
568 | goto mangle_linear; |
569 | mangle_linear: |
570 | gcc_assert (arg.linear_step != 0); |
571 | if (arg.linear_step > 1) |
572 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
573 | else if (arg.linear_step < 0) |
574 | { |
575 | pp_character (&pp, 'n'); |
576 | pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT) |
577 | arg.linear_step)); |
578 | } |
579 | break; |
580 | case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: |
581 | pp_string (&pp, "ls" ); |
582 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
583 | break; |
584 | case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: |
585 | pp_string (&pp, "Rs" ); |
586 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
587 | break; |
588 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: |
589 | pp_string (&pp, "Ls" ); |
590 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
591 | break; |
592 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: |
593 | pp_string (&pp, "Us" ); |
594 | pp_unsigned_wide_integer (&pp, arg.linear_step); |
595 | break; |
596 | default: |
597 | pp_character (&pp, 'v'); |
598 | } |
599 | if (arg.alignment) |
600 | { |
601 | pp_character (&pp, 'a'); |
602 | pp_decimal_int (&pp, arg.alignment); |
603 | } |
604 | } |
605 | |
606 | pp_underscore (&pp); |
607 | const char *str = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)); |
608 | if (*str == '*') |
609 | ++str; |
610 | pp_string (&pp, str); |
611 | str = pp_formatted_text (&pp); |
612 | |
613 | /* If there already is a SIMD clone with the same mangled name, don't |
614 | add another one. This can happen e.g. for |
615 | #pragma omp declare simd |
616 | #pragma omp declare simd simdlen(8) |
617 | int foo (int, int); |
618 | if the simdlen is assumed to be 8 for the first one, etc. */ |
619 | for (struct cgraph_node *clone = node->simd_clones; clone; |
620 | clone = clone->simdclone->next_clone) |
621 | if (id_equal (DECL_ASSEMBLER_NAME (clone->decl), str)) |
622 | return NULL_TREE; |
623 | |
624 | return get_identifier (str); |
625 | } |
626 | |
627 | /* Create a simd clone of OLD_NODE and return it. If FORCE_LOCAL is true, |
628 | create it as a local symbol, otherwise copy the symbol linkage and |
629 | visibility attributes from OLD_NODE. */ |
630 | |
631 | static struct cgraph_node * |
632 | simd_clone_create (struct cgraph_node *old_node, bool force_local) |
633 | { |
634 | struct cgraph_node *new_node; |
635 | if (old_node->definition) |
636 | { |
637 | if (!old_node->has_gimple_body_p ()) |
638 | return NULL; |
639 | old_node->get_body (); |
640 | new_node = old_node->create_version_clone_with_body (redirect_callers: vNULL, NULL, NULL, |
641 | NULL, NULL, |
642 | clone_name: "simdclone" ); |
643 | } |
644 | else |
645 | { |
646 | tree old_decl = old_node->decl; |
647 | tree new_decl = copy_node (old_node->decl); |
648 | DECL_NAME (new_decl) = clone_function_name_numbered (decl: old_decl, |
649 | suffix: "simdclone" ); |
650 | SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl)); |
651 | SET_DECL_RTL (new_decl, NULL); |
652 | DECL_STATIC_CONSTRUCTOR (new_decl) = 0; |
653 | DECL_STATIC_DESTRUCTOR (new_decl) = 0; |
654 | new_node = old_node->create_version_clone (new_decl, redirect_callers: vNULL, NULL); |
655 | if (old_node->in_other_partition) |
656 | new_node->in_other_partition = 1; |
657 | } |
658 | if (new_node == NULL) |
659 | return new_node; |
660 | |
661 | set_decl_built_in_function (decl: new_node->decl, fclass: NOT_BUILT_IN, fcode: 0); |
662 | if (force_local) |
663 | { |
664 | TREE_PUBLIC (new_node->decl) = 0; |
665 | DECL_COMDAT (new_node->decl) = 0; |
666 | DECL_WEAK (new_node->decl) = 0; |
667 | DECL_EXTERNAL (new_node->decl) = 0; |
668 | DECL_VISIBILITY_SPECIFIED (new_node->decl) = 0; |
669 | DECL_VISIBILITY (new_node->decl) = VISIBILITY_DEFAULT; |
670 | DECL_DLLIMPORT_P (new_node->decl) = 0; |
671 | } |
672 | else |
673 | { |
674 | TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl); |
675 | DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl); |
676 | DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl); |
677 | DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl); |
678 | DECL_VISIBILITY_SPECIFIED (new_node->decl) |
679 | = DECL_VISIBILITY_SPECIFIED (old_node->decl); |
680 | DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl); |
681 | DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl); |
682 | if (DECL_ONE_ONLY (old_node->decl)) |
683 | make_decl_one_only (new_node->decl, |
684 | DECL_ASSEMBLER_NAME (new_node->decl)); |
685 | |
686 | /* The method cgraph_version_clone_with_body () will force the new |
687 | symbol local. Undo this, and inherit external visibility from |
688 | the old node. */ |
689 | new_node->local = old_node->local; |
690 | new_node->externally_visible = old_node->externally_visible; |
691 | new_node->calls_declare_variant_alt |
692 | = old_node->calls_declare_variant_alt; |
693 | } |
694 | |
695 | /* Mark clones with internal linkage as gc'able, so they will not be |
696 | emitted unless the vectorizer can actually use them. */ |
697 | if (!TREE_PUBLIC (new_node->decl)) |
698 | new_node->gc_candidate = true; |
699 | |
700 | return new_node; |
701 | } |
702 | |
703 | /* Adjust the return type of the given function to its appropriate |
704 | vector counterpart. */ |
705 | |
706 | static void |
707 | simd_clone_adjust_return_type (struct cgraph_node *node) |
708 | { |
709 | tree fndecl = node->decl; |
710 | tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl)); |
711 | poly_uint64 veclen; |
712 | tree t; |
713 | |
714 | /* Adjust the function return type. */ |
715 | if (orig_rettype == void_type_node) |
716 | return; |
717 | t = TREE_TYPE (TREE_TYPE (fndecl)); |
718 | if (INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t)) |
719 | veclen = node->simdclone->vecsize_int; |
720 | else |
721 | veclen = node->simdclone->vecsize_float; |
722 | if (known_eq (veclen, 0U)) |
723 | veclen = node->simdclone->simdlen; |
724 | else |
725 | veclen = exact_div (a: veclen, b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t))); |
726 | if (multiple_p (a: veclen, b: node->simdclone->simdlen)) |
727 | veclen = node->simdclone->simdlen; |
728 | if (POINTER_TYPE_P (t)) |
729 | t = pointer_sized_int_node; |
730 | if (known_eq (veclen, node->simdclone->simdlen)) |
731 | t = build_vector_type (t, node->simdclone->simdlen); |
732 | else |
733 | { |
734 | t = build_vector_type (t, veclen); |
735 | t = build_array_type_nelts (t, exact_div (a: node->simdclone->simdlen, |
736 | b: veclen)); |
737 | } |
738 | TREE_TYPE (TREE_TYPE (fndecl)) = t; |
739 | } |
740 | |
741 | /* Each vector argument has a corresponding array to be used locally |
742 | as part of the eventual loop. Create such temporary array and |
743 | return it. |
744 | |
745 | PREFIX is the prefix to be used for the temporary. |
746 | |
747 | TYPE is the inner element type. |
748 | |
749 | SIMDLEN is the number of elements. */ |
750 | |
751 | static tree |
752 | create_tmp_simd_array (const char *prefix, tree type, poly_uint64 simdlen) |
753 | { |
754 | tree atype = build_array_type_nelts (type, simdlen); |
755 | tree avar = create_tmp_var_raw (atype, prefix); |
756 | gimple_add_tmp_var (avar); |
757 | return avar; |
758 | } |
759 | |
760 | /* Modify the function argument types to their corresponding vector |
761 | counterparts if appropriate. Also, create one array for each simd |
762 | argument to be used locally when using the function arguments as |
763 | part of the loop. |
764 | |
765 | NODE is the function whose arguments are to be adjusted. |
766 | |
767 | If NODE does not represent function definition, returns NULL. Otherwise |
768 | returns an adjustment class that will be filled describing how the argument |
769 | declarations will be remapped. New arguments which are not to be remapped |
770 | are marked with USER_FLAG. */ |
771 | |
772 | static void |
773 | simd_clone_adjust_argument_types (struct cgraph_node *node) |
774 | { |
775 | auto_vec<tree> args; |
776 | |
777 | if (node->definition) |
778 | push_function_arg_decls (args: &args, fndecl: node->decl); |
779 | else |
780 | simd_clone_vector_of_formal_parm_types (args: &args, fndecl: node->decl); |
781 | struct cgraph_simd_clone *sc = node->simdclone; |
782 | unsigned i, k; |
783 | poly_uint64 veclen; |
784 | |
785 | for (i = 0; i < sc->nargs; ++i) |
786 | { |
787 | tree parm = NULL_TREE; |
788 | tree parm_type = NULL_TREE; |
789 | if (i < args.length()) |
790 | { |
791 | parm = args[i]; |
792 | parm_type = node->definition ? TREE_TYPE (parm) : parm; |
793 | } |
794 | |
795 | sc->args[i].orig_arg = node->definition ? parm : NULL_TREE; |
796 | sc->args[i].orig_type = parm_type; |
797 | |
798 | switch (sc->args[i].arg_type) |
799 | { |
800 | default: |
801 | break; |
802 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: |
803 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: |
804 | if (node->definition) |
805 | sc->args[i].simd_array |
806 | = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)), |
807 | TREE_TYPE (parm_type), |
808 | simdlen: sc->simdlen); |
809 | break; |
810 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: |
811 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: |
812 | case SIMD_CLONE_ARG_TYPE_VECTOR: |
813 | if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type)) |
814 | veclen = sc->vecsize_int; |
815 | else |
816 | veclen = sc->vecsize_float; |
817 | if (known_eq (veclen, 0U)) |
818 | veclen = sc->simdlen; |
819 | else |
820 | veclen |
821 | = exact_div (a: veclen, |
822 | b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type))); |
823 | if (multiple_p (a: veclen, b: sc->simdlen)) |
824 | veclen = sc->simdlen; |
825 | tree vtype; |
826 | if (POINTER_TYPE_P (parm_type)) |
827 | vtype = build_vector_type (pointer_sized_int_node, veclen); |
828 | else |
829 | vtype = build_vector_type (parm_type, veclen); |
830 | sc->args[i].vector_type = vtype; |
831 | |
832 | if (node->definition) |
833 | sc->args[i].simd_array |
834 | = create_tmp_simd_array (DECL_NAME (parm) |
835 | ? IDENTIFIER_POINTER (DECL_NAME (parm)) |
836 | : NULL, type: parm_type, simdlen: sc->simdlen); |
837 | } |
838 | } |
839 | |
840 | if (sc->inbranch) |
841 | { |
842 | tree base_type = simd_clone_compute_base_data_type (node: sc->origin, clone_info: sc); |
843 | tree mask_type; |
844 | if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type)) |
845 | veclen = sc->vecsize_int; |
846 | else |
847 | veclen = sc->vecsize_float; |
848 | if (known_eq (veclen, 0U)) |
849 | veclen = sc->simdlen; |
850 | else |
851 | veclen = exact_div (a: veclen, |
852 | b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type))); |
853 | if (multiple_p (a: veclen, b: sc->simdlen)) |
854 | veclen = sc->simdlen; |
855 | if (sc->mask_mode != VOIDmode) |
856 | mask_type |
857 | = lang_hooks.types.type_for_mode (sc->mask_mode, 1); |
858 | else if (POINTER_TYPE_P (base_type)) |
859 | mask_type = build_vector_type (pointer_sized_int_node, veclen); |
860 | else |
861 | mask_type = build_vector_type (base_type, veclen); |
862 | |
863 | k = vector_unroll_factor (sc->simdlen, veclen); |
864 | |
865 | /* We have previously allocated one extra entry for the mask. Use |
866 | it and fill it. */ |
867 | sc->nargs++; |
868 | if (sc->mask_mode != VOIDmode) |
869 | base_type = boolean_type_node; |
870 | if (node->definition) |
871 | { |
872 | sc->args[i].orig_arg |
873 | = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type); |
874 | if (sc->mask_mode == VOIDmode) |
875 | sc->args[i].simd_array |
876 | = create_tmp_simd_array (prefix: "mask" , type: base_type, simdlen: sc->simdlen); |
877 | else if (k > 1) |
878 | sc->args[i].simd_array |
879 | = create_tmp_simd_array (prefix: "mask" , type: mask_type, simdlen: k); |
880 | else |
881 | sc->args[i].simd_array = NULL_TREE; |
882 | } |
883 | sc->args[i].orig_type = base_type; |
884 | sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK; |
885 | sc->args[i].vector_type = mask_type; |
886 | } |
887 | |
888 | if (!node->definition) |
889 | { |
890 | tree new_arg_types = NULL_TREE, new_reversed; |
891 | bool last_parm_void = false; |
892 | if (args.length () > 0 && args.last () == void_type_node) |
893 | last_parm_void = true; |
894 | |
895 | gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl))); |
896 | for (i = 0; i < sc->nargs; i++) |
897 | { |
898 | tree ptype; |
899 | switch (sc->args[i].arg_type) |
900 | { |
901 | default: |
902 | ptype = sc->args[i].orig_type; |
903 | break; |
904 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: |
905 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: |
906 | case SIMD_CLONE_ARG_TYPE_VECTOR: |
907 | ptype = sc->args[i].vector_type; |
908 | break; |
909 | } |
910 | new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types); |
911 | } |
912 | new_reversed = nreverse (new_arg_types); |
913 | if (last_parm_void) |
914 | { |
915 | if (new_reversed) |
916 | TREE_CHAIN (new_arg_types) = void_list_node; |
917 | else |
918 | new_reversed = void_list_node; |
919 | } |
920 | TYPE_ARG_TYPES (TREE_TYPE (node->decl)) = new_reversed; |
921 | } |
922 | } |
923 | |
924 | /* Initialize and copy the function arguments in NODE to their |
925 | corresponding local simd arrays. Returns a fresh gimple_seq with |
926 | the instruction sequence generated. */ |
927 | |
928 | static gimple_seq |
929 | simd_clone_init_simd_arrays (struct cgraph_node *node, |
930 | ipa_param_body_adjustments *adjustments) |
931 | { |
932 | gimple_seq seq = NULL; |
933 | unsigned i = 0, j = 0, k; |
934 | |
935 | for (tree arg = DECL_ARGUMENTS (node->decl); |
936 | arg; |
937 | arg = DECL_CHAIN (arg), i++, j++) |
938 | { |
939 | ipa_adjusted_param adj = (*adjustments->m_adj_params)[j]; |
940 | if (adj.op == IPA_PARAM_OP_COPY |
941 | || POINTER_TYPE_P (TREE_TYPE (arg))) |
942 | continue; |
943 | |
944 | node->simdclone->args[i].vector_arg = arg; |
945 | |
946 | tree array = node->simdclone->args[i].simd_array; |
947 | if (node->simdclone->mask_mode != VOIDmode |
948 | && adj.param_prefix_index == IPA_PARAM_PREFIX_MASK) |
949 | { |
950 | if (array == NULL_TREE) |
951 | continue; |
952 | unsigned int l |
953 | = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array)))); |
954 | for (k = 0; k <= l; k++) |
955 | { |
956 | if (k) |
957 | { |
958 | arg = DECL_CHAIN (arg); |
959 | j++; |
960 | } |
961 | tree t = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)), |
962 | array, size_int (k), NULL, NULL); |
963 | t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); |
964 | gimplify_and_add (t, &seq); |
965 | } |
966 | continue; |
967 | } |
968 | if (!VECTOR_TYPE_P (TREE_TYPE (arg)) |
969 | || known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)), |
970 | node->simdclone->simdlen)) |
971 | { |
972 | tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array))); |
973 | tree ptr = build_fold_addr_expr (array); |
974 | tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr, |
975 | build_int_cst (ptype, 0)); |
976 | t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); |
977 | gimplify_and_add (t, &seq); |
978 | } |
979 | else |
980 | { |
981 | poly_uint64 simdlen = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)); |
982 | unsigned int times = vector_unroll_factor (node->simdclone->simdlen, |
983 | simdlen); |
984 | tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array))); |
985 | for (k = 0; k < times; k++) |
986 | { |
987 | tree ptr = build_fold_addr_expr (array); |
988 | int elemsize; |
989 | if (k) |
990 | { |
991 | arg = DECL_CHAIN (arg); |
992 | j++; |
993 | } |
994 | tree elemtype = TREE_TYPE (TREE_TYPE (arg)); |
995 | elemsize = GET_MODE_SIZE (SCALAR_TYPE_MODE (elemtype)); |
996 | tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr, |
997 | build_int_cst (ptype, k * elemsize * simdlen)); |
998 | t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg); |
999 | gimplify_and_add (t, &seq); |
1000 | } |
1001 | } |
1002 | } |
1003 | return seq; |
1004 | } |
1005 | |
1006 | /* Callback info for ipa_simd_modify_stmt_ops below. */ |
1007 | |
1008 | struct modify_stmt_info { |
1009 | ipa_param_body_adjustments *adjustments; |
1010 | gimple *stmt; |
1011 | gimple *after_stmt; |
1012 | /* True if the parent statement was modified by |
1013 | ipa_simd_modify_stmt_ops. */ |
1014 | bool modified; |
1015 | }; |
1016 | |
1017 | /* Callback for walk_gimple_op. |
1018 | |
1019 | Adjust operands from a given statement as specified in the |
1020 | adjustments vector in the callback data. */ |
1021 | |
1022 | static tree |
1023 | ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data) |
1024 | { |
1025 | struct walk_stmt_info *wi = (struct walk_stmt_info *) data; |
1026 | struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info; |
1027 | tree *orig_tp = tp; |
1028 | if (TREE_CODE (*tp) == ADDR_EXPR) |
1029 | tp = &TREE_OPERAND (*tp, 0); |
1030 | |
1031 | if (TREE_CODE (*tp) == BIT_FIELD_REF |
1032 | || TREE_CODE (*tp) == IMAGPART_EXPR |
1033 | || TREE_CODE (*tp) == REALPART_EXPR) |
1034 | tp = &TREE_OPERAND (*tp, 0); |
1035 | |
1036 | tree repl = NULL_TREE; |
1037 | ipa_param_body_replacement *pbr = NULL; |
1038 | |
1039 | if (TREE_CODE (*tp) == PARM_DECL) |
1040 | { |
1041 | pbr = info->adjustments->get_expr_replacement (expr: *tp, ignore_default_def: true); |
1042 | if (pbr) |
1043 | repl = pbr->repl; |
1044 | } |
1045 | else if (TYPE_P (*tp)) |
1046 | *walk_subtrees = 0; |
1047 | |
1048 | if (repl) |
1049 | repl = unshare_expr (repl); |
1050 | else |
1051 | { |
1052 | if (tp != orig_tp) |
1053 | { |
1054 | *walk_subtrees = 0; |
1055 | bool modified = info->modified; |
1056 | info->modified = false; |
1057 | walk_tree (tp, ipa_simd_modify_stmt_ops, wi, wi->pset); |
1058 | if (!info->modified) |
1059 | { |
1060 | info->modified = modified; |
1061 | return NULL_TREE; |
1062 | } |
1063 | info->modified = modified; |
1064 | repl = *tp; |
1065 | } |
1066 | else |
1067 | return NULL_TREE; |
1068 | } |
1069 | |
1070 | if (tp != orig_tp) |
1071 | { |
1072 | if (gimple_code (g: info->stmt) == GIMPLE_PHI |
1073 | && pbr |
1074 | && TREE_CODE (*orig_tp) == ADDR_EXPR |
1075 | && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL |
1076 | && pbr->dummy) |
1077 | { |
1078 | gcc_assert (TREE_CODE (pbr->dummy) == SSA_NAME); |
1079 | *orig_tp = pbr->dummy; |
1080 | info->modified = true; |
1081 | return NULL_TREE; |
1082 | } |
1083 | |
1084 | repl = build_fold_addr_expr (repl); |
1085 | gimple *stmt; |
1086 | if (is_gimple_debug (gs: info->stmt)) |
1087 | { |
1088 | tree vexpr = build_debug_expr_decl (TREE_TYPE (repl)); |
1089 | stmt = gimple_build_debug_source_bind (vexpr, repl, NULL); |
1090 | repl = vexpr; |
1091 | } |
1092 | else |
1093 | { |
1094 | stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl); |
1095 | repl = gimple_assign_lhs (gs: stmt); |
1096 | } |
1097 | gimple_stmt_iterator gsi; |
1098 | if (gimple_code (g: info->stmt) == GIMPLE_PHI) |
1099 | { |
1100 | if (info->after_stmt) |
1101 | gsi = gsi_for_stmt (info->after_stmt); |
1102 | else |
1103 | gsi = gsi_after_labels (bb: single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
1104 | /* Cache SSA_NAME for next time. */ |
1105 | if (pbr |
1106 | && TREE_CODE (*orig_tp) == ADDR_EXPR |
1107 | && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL) |
1108 | { |
1109 | gcc_assert (!pbr->dummy); |
1110 | pbr->dummy = repl; |
1111 | } |
1112 | } |
1113 | else |
1114 | gsi = gsi_for_stmt (info->stmt); |
1115 | if (info->after_stmt) |
1116 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); |
1117 | else |
1118 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
1119 | if (gimple_code (g: info->stmt) == GIMPLE_PHI) |
1120 | info->after_stmt = stmt; |
1121 | *orig_tp = repl; |
1122 | } |
1123 | else if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl))) |
1124 | { |
1125 | tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl); |
1126 | *tp = vce; |
1127 | } |
1128 | else |
1129 | *tp = repl; |
1130 | |
1131 | info->modified = true; |
1132 | return NULL_TREE; |
1133 | } |
1134 | |
1135 | /* Traverse the function body and perform all modifications as |
1136 | described in ADJUSTMENTS. At function return, ADJUSTMENTS will be |
1137 | modified such that the replacement/reduction value will now be an |
1138 | offset into the corresponding simd_array. |
1139 | |
1140 | This function will replace all function argument uses with their |
1141 | corresponding simd array elements, and ajust the return values |
1142 | accordingly. */ |
1143 | |
1144 | static void |
1145 | ipa_simd_modify_function_body (struct cgraph_node *node, |
1146 | ipa_param_body_adjustments *adjustments, |
1147 | tree retval_array, tree iter) |
1148 | { |
1149 | basic_block bb; |
1150 | unsigned int i, j; |
1151 | |
1152 | |
1153 | /* Register replacements for every function argument use to an offset into |
1154 | the corresponding simd_array. */ |
1155 | for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j) |
1156 | { |
1157 | if (!node->simdclone->args[i].vector_arg |
1158 | || (*adjustments->m_adj_params)[j].user_flag) |
1159 | continue; |
1160 | |
1161 | tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg); |
1162 | tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg); |
1163 | tree r = build4 (ARRAY_REF, basetype, node->simdclone->args[i].simd_array, |
1164 | iter, NULL_TREE, NULL_TREE); |
1165 | adjustments->register_replacement (apm: &(*adjustments->m_adj_params)[j], replacement: r); |
1166 | |
1167 | if (multiple_p (a: node->simdclone->simdlen, b: TYPE_VECTOR_SUBPARTS (node: vectype))) |
1168 | j += vector_unroll_factor (node->simdclone->simdlen, |
1169 | TYPE_VECTOR_SUBPARTS (vectype)) - 1; |
1170 | } |
1171 | adjustments->sort_replacements (); |
1172 | |
1173 | tree name; |
1174 | FOR_EACH_SSA_NAME (i, name, cfun) |
1175 | { |
1176 | tree base_var; |
1177 | if (SSA_NAME_VAR (name) |
1178 | && TREE_CODE (SSA_NAME_VAR (name)) == PARM_DECL |
1179 | && (base_var |
1180 | = adjustments->get_replacement_ssa_base (SSA_NAME_VAR (name)))) |
1181 | { |
1182 | if (SSA_NAME_IS_DEFAULT_DEF (name)) |
1183 | { |
1184 | tree old_decl = SSA_NAME_VAR (name); |
1185 | bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
1186 | gimple_stmt_iterator gsi = gsi_after_labels (bb); |
1187 | tree repl = adjustments->lookup_replacement (base: old_decl, unit_offset: 0); |
1188 | gcc_checking_assert (repl); |
1189 | repl = unshare_expr (repl); |
1190 | set_ssa_default_def (cfun, old_decl, NULL_TREE); |
1191 | SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var); |
1192 | SSA_NAME_IS_DEFAULT_DEF (name) = 0; |
1193 | gimple *stmt = gimple_build_assign (name, repl); |
1194 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
1195 | } |
1196 | else |
1197 | SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var); |
1198 | } |
1199 | } |
1200 | |
1201 | struct modify_stmt_info info; |
1202 | info.adjustments = adjustments; |
1203 | |
1204 | FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) |
1205 | { |
1206 | gimple_stmt_iterator gsi; |
1207 | |
1208 | for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1209 | { |
1210 | gphi *phi = as_a <gphi *> (p: gsi_stmt (i: gsi)); |
1211 | int i, n = gimple_phi_num_args (gs: phi); |
1212 | info.stmt = phi; |
1213 | info.after_stmt = NULL; |
1214 | struct walk_stmt_info wi; |
1215 | memset (s: &wi, c: 0, n: sizeof (wi)); |
1216 | info.modified = false; |
1217 | wi.info = &info; |
1218 | for (i = 0; i < n; ++i) |
1219 | { |
1220 | int walk_subtrees = 1; |
1221 | tree arg = gimple_phi_arg_def (gs: phi, index: i); |
1222 | tree op = arg; |
1223 | ipa_simd_modify_stmt_ops (tp: &op, walk_subtrees: &walk_subtrees, data: &wi); |
1224 | if (op != arg) |
1225 | { |
1226 | SET_PHI_ARG_DEF (phi, i, op); |
1227 | gcc_assert (TREE_CODE (op) == SSA_NAME); |
1228 | if (gimple_phi_arg_edge (phi, i)->flags & EDGE_ABNORMAL) |
1229 | SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op) = 1; |
1230 | } |
1231 | } |
1232 | } |
1233 | |
1234 | gsi = gsi_start_bb (bb); |
1235 | while (!gsi_end_p (i: gsi)) |
1236 | { |
1237 | gimple *stmt = gsi_stmt (i: gsi); |
1238 | info.stmt = stmt; |
1239 | info.after_stmt = NULL; |
1240 | struct walk_stmt_info wi; |
1241 | |
1242 | memset (s: &wi, c: 0, n: sizeof (wi)); |
1243 | info.modified = false; |
1244 | wi.info = &info; |
1245 | walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi); |
1246 | |
1247 | if (greturn *return_stmt = dyn_cast <greturn *> (p: stmt)) |
1248 | { |
1249 | tree retval = gimple_return_retval (gs: return_stmt); |
1250 | edge e = find_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun)); |
1251 | e->flags |= EDGE_FALLTHRU; |
1252 | if (!retval) |
1253 | { |
1254 | gsi_remove (&gsi, true); |
1255 | continue; |
1256 | } |
1257 | |
1258 | /* Replace `return foo' with `retval_array[iter] = foo'. */ |
1259 | tree ref = build4 (ARRAY_REF, TREE_TYPE (retval), |
1260 | retval_array, iter, NULL, NULL); |
1261 | stmt = gimple_build_assign (ref, retval); |
1262 | gsi_replace (&gsi, stmt, true); |
1263 | info.modified = true; |
1264 | } |
1265 | |
1266 | if (info.modified) |
1267 | { |
1268 | update_stmt (s: stmt); |
1269 | /* If the above changed the var of a debug bind into something |
1270 | different, remove the debug stmt. We could also for all the |
1271 | replaced parameters add VAR_DECLs for debug info purposes, |
1272 | add debug stmts for those to be the simd array accesses and |
1273 | replace debug stmt var operand with that var. Debugging of |
1274 | vectorized loops doesn't work too well, so don't bother for |
1275 | now. */ |
1276 | if ((gimple_debug_bind_p (s: stmt) |
1277 | && !DECL_P (gimple_debug_bind_get_var (stmt))) |
1278 | || (gimple_debug_source_bind_p (s: stmt) |
1279 | && !DECL_P (gimple_debug_source_bind_get_var (stmt)))) |
1280 | { |
1281 | gsi_remove (&gsi, true); |
1282 | continue; |
1283 | } |
1284 | if (maybe_clean_eh_stmt (stmt)) |
1285 | gimple_purge_dead_eh_edges (gimple_bb (g: stmt)); |
1286 | } |
1287 | gsi_next (i: &gsi); |
1288 | } |
1289 | } |
1290 | } |
1291 | |
1292 | /* Helper function of simd_clone_adjust, return linear step addend |
1293 | of Ith argument. */ |
1294 | |
1295 | static tree |
1296 | simd_clone_linear_addend (struct cgraph_node *node, unsigned int i, |
1297 | tree addtype, basic_block entry_bb) |
1298 | { |
1299 | tree ptype = NULL_TREE; |
1300 | switch (node->simdclone->args[i].arg_type) |
1301 | { |
1302 | case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: |
1303 | case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP: |
1304 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: |
1305 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP: |
1306 | return build_int_cst (addtype, node->simdclone->args[i].linear_step); |
1307 | case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: |
1308 | case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP: |
1309 | ptype = TREE_TYPE (node->simdclone->args[i].orig_arg); |
1310 | break; |
1311 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: |
1312 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: |
1313 | ptype = TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg)); |
1314 | break; |
1315 | default: |
1316 | gcc_unreachable (); |
1317 | } |
1318 | |
1319 | unsigned int idx = node->simdclone->args[i].linear_step; |
1320 | tree arg = node->simdclone->args[idx].orig_arg; |
1321 | gcc_assert (is_gimple_reg_type (TREE_TYPE (arg))); |
1322 | gimple_stmt_iterator gsi = gsi_after_labels (bb: entry_bb); |
1323 | gimple *g; |
1324 | tree ret; |
1325 | if (is_gimple_reg (arg)) |
1326 | ret = get_or_create_ssa_default_def (cfun, arg); |
1327 | else |
1328 | { |
1329 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg); |
1330 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1331 | ret = gimple_assign_lhs (gs: g); |
1332 | } |
1333 | if (TREE_CODE (TREE_TYPE (arg)) == REFERENCE_TYPE) |
1334 | { |
1335 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (TREE_TYPE (arg))), |
1336 | build_simple_mem_ref (ret)); |
1337 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1338 | ret = gimple_assign_lhs (gs: g); |
1339 | } |
1340 | if (!useless_type_conversion_p (addtype, TREE_TYPE (ret))) |
1341 | { |
1342 | g = gimple_build_assign (make_ssa_name (var: addtype), NOP_EXPR, ret); |
1343 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1344 | ret = gimple_assign_lhs (gs: g); |
1345 | } |
1346 | if (POINTER_TYPE_P (ptype)) |
1347 | { |
1348 | tree size = TYPE_SIZE_UNIT (TREE_TYPE (ptype)); |
1349 | if (size && TREE_CODE (size) == INTEGER_CST) |
1350 | { |
1351 | g = gimple_build_assign (make_ssa_name (var: addtype), MULT_EXPR, |
1352 | ret, fold_convert (addtype, size)); |
1353 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1354 | ret = gimple_assign_lhs (gs: g); |
1355 | } |
1356 | } |
1357 | return ret; |
1358 | } |
1359 | |
1360 | /* Adjust the argument types in NODE to their appropriate vector |
1361 | counterparts. */ |
1362 | |
1363 | static void |
1364 | simd_clone_adjust (struct cgraph_node *node) |
1365 | { |
1366 | push_cfun (DECL_STRUCT_FUNCTION (node->decl)); |
1367 | |
1368 | tree orig_rettype = TREE_TYPE (TREE_TYPE (node->decl)); |
1369 | TREE_TYPE (node->decl) = build_distinct_type_copy (TREE_TYPE (node->decl)); |
1370 | simd_clone_adjust_return_type (node); |
1371 | simd_clone_adjust_argument_types (node); |
1372 | targetm.simd_clone.adjust (node); |
1373 | tree retval = NULL_TREE; |
1374 | if (orig_rettype != void_type_node) |
1375 | { |
1376 | poly_uint64 veclen; |
1377 | if (INTEGRAL_TYPE_P (orig_rettype) || POINTER_TYPE_P (orig_rettype)) |
1378 | veclen = node->simdclone->vecsize_int; |
1379 | else |
1380 | veclen = node->simdclone->vecsize_float; |
1381 | if (known_eq (veclen, 0U)) |
1382 | veclen = node->simdclone->simdlen; |
1383 | else |
1384 | veclen = exact_div (a: veclen, |
1385 | b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (orig_rettype))); |
1386 | if (multiple_p (a: veclen, b: node->simdclone->simdlen)) |
1387 | veclen = node->simdclone->simdlen; |
1388 | |
1389 | retval = DECL_RESULT (node->decl); |
1390 | /* Adjust the DECL_RESULT. */ |
1391 | TREE_TYPE (retval) = TREE_TYPE (TREE_TYPE (node->decl)); |
1392 | relayout_decl (retval); |
1393 | |
1394 | tree atype = build_array_type_nelts (orig_rettype, |
1395 | node->simdclone->simdlen); |
1396 | if (maybe_ne (a: veclen, b: node->simdclone->simdlen)) |
1397 | retval = build1 (VIEW_CONVERT_EXPR, atype, retval); |
1398 | else |
1399 | { |
1400 | /* Set up a SIMD array to use as the return value. */ |
1401 | retval = create_tmp_var_raw (atype, "retval" ); |
1402 | gimple_add_tmp_var (retval); |
1403 | } |
1404 | } |
1405 | |
1406 | struct cgraph_simd_clone *sc = node->simdclone; |
1407 | vec<ipa_adjusted_param, va_gc> *new_params = NULL; |
1408 | vec_safe_reserve (v&: new_params, nelems: sc->nargs); |
1409 | unsigned i, j, k; |
1410 | for (i = 0; i < sc->nargs; ++i) |
1411 | { |
1412 | ipa_adjusted_param adj; |
1413 | memset (s: &adj, c: 0, n: sizeof (adj)); |
1414 | poly_uint64 veclen; |
1415 | tree elem_type; |
1416 | |
1417 | adj.base_index = i; |
1418 | adj.prev_clone_index = i; |
1419 | switch (sc->args[i].arg_type) |
1420 | { |
1421 | default: |
1422 | /* No adjustment necessary for scalar arguments. */ |
1423 | adj.op = IPA_PARAM_OP_COPY; |
1424 | break; |
1425 | case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP: |
1426 | adj.op = IPA_PARAM_OP_COPY; |
1427 | break; |
1428 | case SIMD_CLONE_ARG_TYPE_MASK: |
1429 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: |
1430 | case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: |
1431 | case SIMD_CLONE_ARG_TYPE_VECTOR: |
1432 | if (sc->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK |
1433 | && sc->mask_mode != VOIDmode) |
1434 | elem_type = boolean_type_node; |
1435 | else |
1436 | elem_type = TREE_TYPE (sc->args[i].vector_type); |
1437 | if (INTEGRAL_TYPE_P (elem_type) || POINTER_TYPE_P (elem_type)) |
1438 | veclen = sc->vecsize_int; |
1439 | else |
1440 | veclen = sc->vecsize_float; |
1441 | if (known_eq (veclen, 0U)) |
1442 | veclen = sc->simdlen; |
1443 | else |
1444 | veclen |
1445 | = exact_div (a: veclen, |
1446 | b: GET_MODE_BITSIZE (SCALAR_TYPE_MODE (elem_type))); |
1447 | if (multiple_p (a: veclen, b: sc->simdlen)) |
1448 | veclen = sc->simdlen; |
1449 | if (sc->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK) |
1450 | { |
1451 | adj.user_flag = 1; |
1452 | adj.param_prefix_index = IPA_PARAM_PREFIX_MASK; |
1453 | } |
1454 | else |
1455 | adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD; |
1456 | adj.op = IPA_PARAM_OP_NEW; |
1457 | adj.type = sc->args[i].vector_type; |
1458 | k = vector_unroll_factor (sc->simdlen, veclen); |
1459 | for (j = 1; j < k; j++) |
1460 | { |
1461 | vec_safe_push (v&: new_params, obj: adj); |
1462 | if (j == 1) |
1463 | { |
1464 | memset (s: &adj, c: 0, n: sizeof (adj)); |
1465 | adj.op = IPA_PARAM_OP_NEW; |
1466 | adj.user_flag = 1; |
1467 | if (sc->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK) |
1468 | adj.param_prefix_index = IPA_PARAM_PREFIX_MASK; |
1469 | else |
1470 | adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD; |
1471 | adj.base_index = i; |
1472 | adj.prev_clone_index = i; |
1473 | adj.type = sc->args[i].vector_type; |
1474 | } |
1475 | } |
1476 | } |
1477 | vec_safe_push (v&: new_params, obj: adj); |
1478 | } |
1479 | ipa_param_body_adjustments *adjustments |
1480 | = new ipa_param_body_adjustments (new_params, node->decl); |
1481 | adjustments->modify_formal_parameters (); |
1482 | |
1483 | push_gimplify_context (); |
1484 | |
1485 | gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments); |
1486 | |
1487 | /* Adjust all uses of vector arguments accordingly. Adjust all |
1488 | return values accordingly. */ |
1489 | tree iter = create_tmp_var (unsigned_type_node, "iter" ); |
1490 | tree iter1 = make_ssa_name (var: iter); |
1491 | tree iter2 = NULL_TREE; |
1492 | ipa_simd_modify_function_body (node, adjustments, retval_array: retval, iter: iter1); |
1493 | delete adjustments; |
1494 | |
1495 | /* Initialize the iteration variable. */ |
1496 | basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
1497 | basic_block body_bb = split_block_after_labels (entry_bb)->dest; |
1498 | gimple_stmt_iterator gsi = gsi_after_labels (bb: entry_bb); |
1499 | /* Insert the SIMD array and iv initialization at function |
1500 | entry. */ |
1501 | gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT); |
1502 | |
1503 | pop_gimplify_context (NULL); |
1504 | |
1505 | gimple *g; |
1506 | basic_block incr_bb = NULL; |
1507 | class loop *loop = NULL; |
1508 | |
1509 | /* Create a new BB right before the original exit BB, to hold the |
1510 | iteration increment and the condition/branch. */ |
1511 | if (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)) |
1512 | { |
1513 | basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src; |
1514 | incr_bb = create_empty_bb (orig_exit); |
1515 | incr_bb->count = profile_count::zero (); |
1516 | add_bb_to_loop (incr_bb, body_bb->loop_father); |
1517 | while (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)) |
1518 | { |
1519 | edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0); |
1520 | redirect_edge_succ (e, incr_bb); |
1521 | incr_bb->count += e->count (); |
1522 | } |
1523 | } |
1524 | else if (node->simdclone->inbranch) |
1525 | { |
1526 | incr_bb = create_empty_bb (entry_bb); |
1527 | incr_bb->count = profile_count::zero (); |
1528 | add_bb_to_loop (incr_bb, body_bb->loop_father); |
1529 | } |
1530 | |
1531 | if (incr_bb) |
1532 | { |
1533 | make_single_succ_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); |
1534 | gsi = gsi_last_bb (bb: incr_bb); |
1535 | iter2 = make_ssa_name (var: iter); |
1536 | g = gimple_build_assign (iter2, PLUS_EXPR, iter1, |
1537 | build_int_cst (unsigned_type_node, 1)); |
1538 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1539 | |
1540 | /* Mostly annotate the loop for the vectorizer (the rest is done |
1541 | below). */ |
1542 | loop = alloc_loop (); |
1543 | cfun->has_force_vectorize_loops = true; |
1544 | /* For now, simlen is always constant. */ |
1545 | loop->safelen = node->simdclone->simdlen.to_constant (); |
1546 | loop->force_vectorize = true; |
1547 | loop->header = body_bb; |
1548 | } |
1549 | |
1550 | /* Branch around the body if the mask applies. */ |
1551 | if (node->simdclone->inbranch) |
1552 | { |
1553 | gsi = gsi_last_bb (bb: loop->header); |
1554 | tree mask_array |
1555 | = node->simdclone->args[node->simdclone->nargs - 1].simd_array; |
1556 | tree mask; |
1557 | if (node->simdclone->mask_mode != VOIDmode) |
1558 | { |
1559 | tree shift_cnt; |
1560 | if (mask_array == NULL_TREE) |
1561 | { |
1562 | tree arg = node->simdclone->args[node->simdclone->nargs |
1563 | - 1].vector_arg; |
1564 | mask = get_or_create_ssa_default_def (cfun, arg); |
1565 | shift_cnt = iter1; |
1566 | } |
1567 | else |
1568 | { |
1569 | tree maskt = TREE_TYPE (mask_array); |
1570 | int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt))); |
1571 | /* For now, c must be constant here. */ |
1572 | c = exact_div (a: node->simdclone->simdlen, b: c + 1).to_constant (); |
1573 | int s = exact_log2 (x: c); |
1574 | gcc_assert (s > 0); |
1575 | c--; |
1576 | tree idx = make_ssa_name (TREE_TYPE (iter1)); |
1577 | g = gimple_build_assign (idx, RSHIFT_EXPR, iter1, |
1578 | build_int_cst (NULL_TREE, s)); |
1579 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1580 | mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array))); |
1581 | tree aref = build4 (ARRAY_REF, |
1582 | TREE_TYPE (TREE_TYPE (mask_array)), |
1583 | mask_array, idx, NULL, NULL); |
1584 | g = gimple_build_assign (mask, aref); |
1585 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1586 | shift_cnt = make_ssa_name (TREE_TYPE (iter1)); |
1587 | g = gimple_build_assign (shift_cnt, BIT_AND_EXPR, iter1, |
1588 | build_int_cst (TREE_TYPE (iter1), c)); |
1589 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1590 | } |
1591 | tree shift_cnt_conv = shift_cnt; |
1592 | if (!useless_type_conversion_p (TREE_TYPE (mask), |
1593 | TREE_TYPE (shift_cnt))) |
1594 | { |
1595 | shift_cnt_conv = make_ssa_name (TREE_TYPE (mask)); |
1596 | g = gimple_build_assign (shift_cnt_conv, NOP_EXPR, shift_cnt); |
1597 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1598 | } |
1599 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)), |
1600 | RSHIFT_EXPR, mask, shift_cnt_conv); |
1601 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1602 | mask = gimple_assign_lhs (gs: g); |
1603 | g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)), |
1604 | BIT_AND_EXPR, mask, |
1605 | build_one_cst (TREE_TYPE (mask))); |
1606 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1607 | mask = gimple_assign_lhs (gs: g); |
1608 | } |
1609 | else |
1610 | { |
1611 | mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array))); |
1612 | tree aref = build4 (ARRAY_REF, |
1613 | TREE_TYPE (TREE_TYPE (mask_array)), |
1614 | mask_array, iter1, NULL, NULL); |
1615 | g = gimple_build_assign (mask, aref); |
1616 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1617 | int bitsize = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (aref))); |
1618 | if (!INTEGRAL_TYPE_P (TREE_TYPE (aref))) |
1619 | { |
1620 | aref = build1 (VIEW_CONVERT_EXPR, |
1621 | build_nonstandard_integer_type (bitsize, 0), |
1622 | mask); |
1623 | mask = make_ssa_name (TREE_TYPE (aref)); |
1624 | g = gimple_build_assign (mask, aref); |
1625 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1626 | } |
1627 | } |
1628 | |
1629 | g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)), |
1630 | NULL, NULL); |
1631 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1632 | edge e = make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE); |
1633 | e->probability = profile_probability::unlikely ().guessed (); |
1634 | incr_bb->count += e->count (); |
1635 | edge fallthru = FALLTHRU_EDGE (loop->header); |
1636 | fallthru->flags = EDGE_FALSE_VALUE; |
1637 | fallthru->probability = profile_probability::likely ().guessed (); |
1638 | } |
1639 | |
1640 | basic_block latch_bb = NULL; |
1641 | basic_block new_exit_bb = NULL; |
1642 | |
1643 | /* Generate the condition. */ |
1644 | if (incr_bb) |
1645 | { |
1646 | gsi = gsi_last_bb (bb: incr_bb); |
1647 | g = gimple_build_cond (LT_EXPR, iter2, |
1648 | build_int_cst (unsigned_type_node, |
1649 | node->simdclone->simdlen), |
1650 | NULL, NULL); |
1651 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1652 | edge e = split_block (incr_bb, gsi_stmt (i: gsi)); |
1653 | latch_bb = e->dest; |
1654 | new_exit_bb = split_block_after_labels (latch_bb)->dest; |
1655 | loop->latch = latch_bb; |
1656 | |
1657 | redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb); |
1658 | |
1659 | edge new_e = make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE); |
1660 | |
1661 | /* FIXME: Do we need to distribute probabilities for the conditional? */ |
1662 | new_e->probability = profile_probability::guessed_never (); |
1663 | /* The successor of incr_bb is already pointing to latch_bb; just |
1664 | change the flags. |
1665 | make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */ |
1666 | FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE; |
1667 | } |
1668 | |
1669 | gphi *phi = create_phi_node (iter1, body_bb); |
1670 | edge = find_edge (entry_bb, body_bb); |
1671 | edge latch_edge = NULL; |
1672 | add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge, |
1673 | UNKNOWN_LOCATION); |
1674 | if (incr_bb) |
1675 | { |
1676 | latch_edge = single_succ_edge (bb: latch_bb); |
1677 | add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION); |
1678 | |
1679 | /* Generate the new return. */ |
1680 | gsi = gsi_last_bb (bb: new_exit_bb); |
1681 | if (retval |
1682 | && TREE_CODE (retval) == VIEW_CONVERT_EXPR |
1683 | && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL) |
1684 | retval = TREE_OPERAND (retval, 0); |
1685 | else if (retval) |
1686 | { |
1687 | retval = build1 (VIEW_CONVERT_EXPR, |
1688 | TREE_TYPE (TREE_TYPE (node->decl)), |
1689 | retval); |
1690 | retval = force_gimple_operand_gsi (&gsi, retval, true, NULL, |
1691 | false, GSI_CONTINUE_LINKING); |
1692 | } |
1693 | g = gimple_build_return (retval); |
1694 | gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); |
1695 | } |
1696 | |
1697 | /* Handle aligned clauses by replacing default defs of the aligned |
1698 | uniform args with __builtin_assume_aligned (arg_N(D), alignment) |
1699 | lhs. Handle linear by adding PHIs. */ |
1700 | for (unsigned i = 0; i < node->simdclone->nargs; i++) |
1701 | if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM |
1702 | && (TREE_ADDRESSABLE (node->simdclone->args[i].orig_arg) |
1703 | || !is_gimple_reg_type |
1704 | (TREE_TYPE (node->simdclone->args[i].orig_arg)))) |
1705 | { |
1706 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1707 | if (is_gimple_reg_type (TREE_TYPE (orig_arg))) |
1708 | iter1 = make_ssa_name (TREE_TYPE (orig_arg)); |
1709 | else |
1710 | { |
1711 | iter1 = create_tmp_var_raw (TREE_TYPE (orig_arg)); |
1712 | gimple_add_tmp_var (iter1); |
1713 | } |
1714 | gsi = gsi_after_labels (bb: entry_bb); |
1715 | g = gimple_build_assign (iter1, orig_arg); |
1716 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1717 | gsi = gsi_after_labels (bb: body_bb); |
1718 | g = gimple_build_assign (orig_arg, iter1); |
1719 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1720 | } |
1721 | else if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM |
1722 | && DECL_BY_REFERENCE (node->simdclone->args[i].orig_arg) |
1723 | && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg)) |
1724 | == REFERENCE_TYPE |
1725 | && TREE_ADDRESSABLE |
1726 | (TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg)))) |
1727 | { |
1728 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1729 | tree def = ssa_default_def (cfun, orig_arg); |
1730 | if (def && !has_zero_uses (var: def)) |
1731 | { |
1732 | iter1 = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (orig_arg))); |
1733 | gimple_add_tmp_var (iter1); |
1734 | gsi = gsi_after_labels (bb: entry_bb); |
1735 | g = gimple_build_assign (iter1, build_simple_mem_ref (def)); |
1736 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1737 | gsi = gsi_after_labels (bb: body_bb); |
1738 | g = gimple_build_assign (build_simple_mem_ref (def), iter1); |
1739 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1740 | } |
1741 | } |
1742 | else if (node->simdclone->args[i].alignment |
1743 | && node->simdclone->args[i].arg_type |
1744 | == SIMD_CLONE_ARG_TYPE_UNIFORM |
1745 | && (node->simdclone->args[i].alignment |
1746 | & (node->simdclone->args[i].alignment - 1)) == 0 |
1747 | && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg)) |
1748 | == POINTER_TYPE) |
1749 | { |
1750 | unsigned int alignment = node->simdclone->args[i].alignment; |
1751 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1752 | tree def = ssa_default_def (cfun, orig_arg); |
1753 | if (def && !has_zero_uses (var: def)) |
1754 | { |
1755 | tree fn = builtin_decl_explicit (fncode: BUILT_IN_ASSUME_ALIGNED); |
1756 | gimple_seq seq = NULL; |
1757 | bool need_cvt = false; |
1758 | gcall *call |
1759 | = gimple_build_call (fn, 2, def, size_int (alignment)); |
1760 | g = call; |
1761 | if (!useless_type_conversion_p (TREE_TYPE (orig_arg), |
1762 | ptr_type_node)) |
1763 | need_cvt = true; |
1764 | tree t = make_ssa_name (var: need_cvt ? ptr_type_node : orig_arg); |
1765 | gimple_call_set_lhs (gs: g, lhs: t); |
1766 | gimple_seq_add_stmt_without_update (&seq, g); |
1767 | if (need_cvt) |
1768 | { |
1769 | t = make_ssa_name (var: orig_arg); |
1770 | g = gimple_build_assign (t, NOP_EXPR, gimple_call_lhs (gs: g)); |
1771 | gimple_seq_add_stmt_without_update (&seq, g); |
1772 | } |
1773 | gsi_insert_seq_on_edge_immediate |
1774 | (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq); |
1775 | |
1776 | entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
1777 | node->create_edge (callee: cgraph_node::get_create (fn), |
1778 | call_stmt: call, count: entry_bb->count); |
1779 | |
1780 | imm_use_iterator iter; |
1781 | use_operand_p use_p; |
1782 | gimple *use_stmt; |
1783 | tree repl = gimple_get_lhs (g); |
1784 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, def) |
1785 | if (is_gimple_debug (gs: use_stmt) || use_stmt == call) |
1786 | continue; |
1787 | else |
1788 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
1789 | SET_USE (use_p, repl); |
1790 | } |
1791 | } |
1792 | else if ((node->simdclone->args[i].arg_type |
1793 | == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP) |
1794 | || (node->simdclone->args[i].arg_type |
1795 | == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP) |
1796 | || (node->simdclone->args[i].arg_type |
1797 | == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP) |
1798 | || (node->simdclone->args[i].arg_type |
1799 | == SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP)) |
1800 | { |
1801 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1802 | gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg)) |
1803 | || POINTER_TYPE_P (TREE_TYPE (orig_arg))); |
1804 | tree def = NULL_TREE; |
1805 | if (TREE_ADDRESSABLE (orig_arg)) |
1806 | { |
1807 | def = make_ssa_name (TREE_TYPE (orig_arg)); |
1808 | iter1 = make_ssa_name (TREE_TYPE (orig_arg)); |
1809 | if (incr_bb) |
1810 | iter2 = make_ssa_name (TREE_TYPE (orig_arg)); |
1811 | gsi = gsi_after_labels (bb: entry_bb); |
1812 | g = gimple_build_assign (def, orig_arg); |
1813 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1814 | } |
1815 | else |
1816 | { |
1817 | def = ssa_default_def (cfun, orig_arg); |
1818 | if (!def || has_zero_uses (var: def)) |
1819 | def = NULL_TREE; |
1820 | else |
1821 | { |
1822 | iter1 = make_ssa_name (var: orig_arg); |
1823 | if (incr_bb) |
1824 | iter2 = make_ssa_name (var: orig_arg); |
1825 | } |
1826 | } |
1827 | if (def) |
1828 | { |
1829 | phi = create_phi_node (iter1, body_bb); |
1830 | add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION); |
1831 | if (incr_bb) |
1832 | { |
1833 | add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION); |
1834 | enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg)) |
1835 | ? PLUS_EXPR : POINTER_PLUS_EXPR; |
1836 | tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg)) |
1837 | ? TREE_TYPE (orig_arg) : sizetype; |
1838 | tree addcst = simd_clone_linear_addend (node, i, addtype, |
1839 | entry_bb); |
1840 | gsi = gsi_last_bb (bb: incr_bb); |
1841 | g = gimple_build_assign (iter2, code, iter1, addcst); |
1842 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1843 | } |
1844 | |
1845 | imm_use_iterator iter; |
1846 | use_operand_p use_p; |
1847 | gimple *use_stmt; |
1848 | if (TREE_ADDRESSABLE (orig_arg)) |
1849 | { |
1850 | gsi = gsi_after_labels (bb: body_bb); |
1851 | g = gimple_build_assign (orig_arg, iter1); |
1852 | gsi_insert_before (&gsi, g, GSI_NEW_STMT); |
1853 | } |
1854 | else |
1855 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, def) |
1856 | if (use_stmt == phi) |
1857 | continue; |
1858 | else |
1859 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
1860 | SET_USE (use_p, iter1); |
1861 | } |
1862 | } |
1863 | else if (node->simdclone->args[i].arg_type |
1864 | == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP |
1865 | || (node->simdclone->args[i].arg_type |
1866 | == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP)) |
1867 | { |
1868 | tree orig_arg = node->simdclone->args[i].orig_arg; |
1869 | tree def = ssa_default_def (cfun, orig_arg); |
1870 | gcc_assert (!TREE_ADDRESSABLE (orig_arg) |
1871 | && TREE_CODE (TREE_TYPE (orig_arg)) == REFERENCE_TYPE); |
1872 | if (def && !has_zero_uses (var: def)) |
1873 | { |
1874 | tree rtype = TREE_TYPE (TREE_TYPE (orig_arg)); |
1875 | iter1 = make_ssa_name (var: orig_arg); |
1876 | if (incr_bb) |
1877 | iter2 = make_ssa_name (var: orig_arg); |
1878 | tree iter3 = make_ssa_name (var: rtype); |
1879 | tree iter4 = make_ssa_name (var: rtype); |
1880 | tree iter5 = incr_bb ? make_ssa_name (var: rtype) : NULL_TREE; |
1881 | gsi = gsi_after_labels (bb: entry_bb); |
1882 | gimple *load |
1883 | = gimple_build_assign (iter3, build_simple_mem_ref (def)); |
1884 | gsi_insert_before (&gsi, load, GSI_NEW_STMT); |
1885 | |
1886 | tree array = node->simdclone->args[i].simd_array; |
1887 | TREE_ADDRESSABLE (array) = 1; |
1888 | tree ptr = build_fold_addr_expr (array); |
1889 | phi = create_phi_node (iter1, body_bb); |
1890 | add_phi_arg (phi, ptr, preheader_edge, UNKNOWN_LOCATION); |
1891 | if (incr_bb) |
1892 | { |
1893 | add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION); |
1894 | g = gimple_build_assign (iter2, POINTER_PLUS_EXPR, iter1, |
1895 | TYPE_SIZE_UNIT (TREE_TYPE (iter3))); |
1896 | gsi = gsi_last_bb (bb: incr_bb); |
1897 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1898 | } |
1899 | |
1900 | phi = create_phi_node (iter4, body_bb); |
1901 | add_phi_arg (phi, iter3, preheader_edge, UNKNOWN_LOCATION); |
1902 | if (incr_bb) |
1903 | { |
1904 | add_phi_arg (phi, iter5, latch_edge, UNKNOWN_LOCATION); |
1905 | enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (iter3)) |
1906 | ? PLUS_EXPR : POINTER_PLUS_EXPR; |
1907 | tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (iter3)) |
1908 | ? TREE_TYPE (iter3) : sizetype; |
1909 | tree addcst = simd_clone_linear_addend (node, i, addtype, |
1910 | entry_bb); |
1911 | g = gimple_build_assign (iter5, code, iter4, addcst); |
1912 | gsi = gsi_last_bb (bb: incr_bb); |
1913 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1914 | } |
1915 | |
1916 | g = gimple_build_assign (build_simple_mem_ref (iter1), iter4); |
1917 | gsi = gsi_after_labels (bb: body_bb); |
1918 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1919 | |
1920 | imm_use_iterator iter; |
1921 | use_operand_p use_p; |
1922 | gimple *use_stmt; |
1923 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, def) |
1924 | if (use_stmt == load) |
1925 | continue; |
1926 | else |
1927 | FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
1928 | SET_USE (use_p, iter1); |
1929 | |
1930 | if (!TYPE_READONLY (rtype) && incr_bb) |
1931 | { |
1932 | tree v = make_ssa_name (var: rtype); |
1933 | tree aref = build4 (ARRAY_REF, rtype, array, |
1934 | size_zero_node, NULL_TREE, |
1935 | NULL_TREE); |
1936 | gsi = gsi_after_labels (bb: new_exit_bb); |
1937 | g = gimple_build_assign (v, aref); |
1938 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1939 | g = gimple_build_assign (build_simple_mem_ref (def), v); |
1940 | gsi_insert_before (&gsi, g, GSI_SAME_STMT); |
1941 | } |
1942 | } |
1943 | } |
1944 | |
1945 | calculate_dominance_info (CDI_DOMINATORS); |
1946 | if (loop) |
1947 | add_loop (loop, loop->header->loop_father); |
1948 | update_ssa (TODO_update_ssa); |
1949 | |
1950 | pop_cfun (); |
1951 | } |
1952 | |
1953 | /* If the function in NODE is tagged as an elemental SIMD function, |
1954 | create the appropriate SIMD clones. */ |
1955 | |
1956 | void |
1957 | expand_simd_clones (struct cgraph_node *node) |
1958 | { |
1959 | tree attr; |
1960 | bool explicit_p = true; |
1961 | |
1962 | if (node->inlined_to |
1963 | || lookup_attribute (attr_name: "noclone" , DECL_ATTRIBUTES (node->decl))) |
1964 | return; |
1965 | |
1966 | attr = lookup_attribute (attr_name: "omp declare simd" , |
1967 | DECL_ATTRIBUTES (node->decl)); |
1968 | |
1969 | /* See if we can add an "omp declare simd" directive implicitly |
1970 | before giving up. */ |
1971 | /* FIXME: OpenACC "#pragma acc routine" translates into |
1972 | "omp declare target", but appears also to have some other effects |
1973 | that conflict with generating SIMD clones, causing ICEs. So don't |
1974 | do this if we've got OpenACC instead of OpenMP. */ |
1975 | if (attr == NULL_TREE |
1976 | #ifdef ACCEL_COMPILER |
1977 | && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY |
1978 | || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_NOHOST) |
1979 | #else |
1980 | && (flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_ANY |
1981 | || flag_openmp_target_simd_clone == OMP_TARGET_SIMD_CLONE_HOST) |
1982 | #endif |
1983 | && !oacc_get_fn_attrib (fn: node->decl) |
1984 | && ok_for_auto_simd_clone (node)) |
1985 | { |
1986 | attr = tree_cons (get_identifier ("omp declare simd" ), NULL, |
1987 | DECL_ATTRIBUTES (node->decl)); |
1988 | DECL_ATTRIBUTES (node->decl) = attr; |
1989 | explicit_p = false; |
1990 | } |
1991 | |
1992 | if (attr == NULL_TREE) |
1993 | return; |
1994 | |
1995 | /* Ignore |
1996 | #pragma omp declare simd |
1997 | extern int foo (); |
1998 | in C, there we don't know the argument types at all. */ |
1999 | if (!node->definition |
2000 | && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE) |
2001 | return; |
2002 | |
2003 | /* Call this before creating clone_info, as it might ggc_collect. */ |
2004 | if (node->definition && node->has_gimple_body_p ()) |
2005 | node->get_body (); |
2006 | |
2007 | do |
2008 | { |
2009 | /* Start with parsing the "omp declare simd" attribute(s). */ |
2010 | bool inbranch_clause_specified; |
2011 | struct cgraph_simd_clone *clone_info |
2012 | = simd_clone_clauses_extract (node, TREE_VALUE (attr), |
2013 | inbranch_specified: &inbranch_clause_specified); |
2014 | if (clone_info == NULL) |
2015 | continue; |
2016 | |
2017 | poly_uint64 orig_simdlen = clone_info->simdlen; |
2018 | tree base_type = simd_clone_compute_base_data_type (node, clone_info); |
2019 | |
2020 | /* The target can return 0 (no simd clones should be created), |
2021 | 1 (just one ISA of simd clones should be created) or higher |
2022 | count of ISA variants. In that case, clone_info is initialized |
2023 | for the first ISA variant. */ |
2024 | int count |
2025 | = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info, |
2026 | base_type, 0, |
2027 | explicit_p); |
2028 | if (count == 0) |
2029 | continue; |
2030 | |
2031 | /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED, |
2032 | also create one inbranch and one !inbranch clone of it. */ |
2033 | for (int i = 0; i < count * 2; i++) |
2034 | { |
2035 | struct cgraph_simd_clone *clone = clone_info; |
2036 | if (inbranch_clause_specified && (i & 1) != 0) |
2037 | continue; |
2038 | |
2039 | if (i != 0) |
2040 | { |
2041 | clone = simd_clone_struct_alloc (nargs: clone_info->nargs |
2042 | + ((i & 1) != 0)); |
2043 | simd_clone_struct_copy (to: clone, from: clone_info); |
2044 | /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen |
2045 | and simd_clone_adjust_argument_types did to the first |
2046 | clone's info. */ |
2047 | clone->nargs -= clone_info->inbranch; |
2048 | clone->simdlen = orig_simdlen; |
2049 | /* And call the target hook again to get the right ISA. */ |
2050 | targetm.simd_clone.compute_vecsize_and_simdlen (node, clone, |
2051 | base_type, |
2052 | i / 2, |
2053 | explicit_p); |
2054 | if ((i & 1) != 0) |
2055 | clone->inbranch = 1; |
2056 | } |
2057 | |
2058 | /* simd_clone_mangle might fail if such a clone has been created |
2059 | already. */ |
2060 | tree id = simd_clone_mangle (node, clone_info: clone); |
2061 | if (id == NULL_TREE) |
2062 | { |
2063 | if (i == 0) |
2064 | clone->nargs += clone->inbranch; |
2065 | continue; |
2066 | } |
2067 | |
2068 | /* Only when we are sure we want to create the clone actually |
2069 | clone the function (or definitions) or create another |
2070 | extern FUNCTION_DECL (for prototypes without definitions). */ |
2071 | struct cgraph_node *n = simd_clone_create (old_node: node, force_local: !explicit_p); |
2072 | if (n == NULL) |
2073 | { |
2074 | if (i == 0) |
2075 | clone->nargs += clone->inbranch; |
2076 | continue; |
2077 | } |
2078 | |
2079 | n->simdclone = clone; |
2080 | clone->origin = node; |
2081 | clone->next_clone = NULL; |
2082 | if (node->simd_clones == NULL) |
2083 | { |
2084 | clone->prev_clone = n; |
2085 | node->simd_clones = n; |
2086 | } |
2087 | else |
2088 | { |
2089 | clone->prev_clone = node->simd_clones->simdclone->prev_clone; |
2090 | clone->prev_clone->simdclone->next_clone = n; |
2091 | node->simd_clones->simdclone->prev_clone = n; |
2092 | } |
2093 | symtab->change_decl_assembler_name (decl: n->decl, name: id); |
2094 | /* And finally adjust the return type, parameters and for |
2095 | definitions also function body. */ |
2096 | if (node->definition) |
2097 | simd_clone_adjust (node: n); |
2098 | else |
2099 | { |
2100 | TREE_TYPE (n->decl) |
2101 | = build_distinct_type_copy (TREE_TYPE (n->decl)); |
2102 | simd_clone_adjust_return_type (node: n); |
2103 | simd_clone_adjust_argument_types (node: n); |
2104 | targetm.simd_clone.adjust (n); |
2105 | } |
2106 | if (dump_file) |
2107 | fprintf (stream: dump_file, format: "\nGenerated %s clone %s\n" , |
2108 | (TREE_PUBLIC (n->decl) ? "global" : "local" ), |
2109 | IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl))); |
2110 | } |
2111 | } |
2112 | while ((attr = lookup_attribute (attr_name: "omp declare simd" , TREE_CHAIN (attr)))); |
2113 | } |
2114 | |
2115 | /* Entry point for IPA simd clone creation pass. */ |
2116 | |
2117 | static unsigned int |
2118 | ipa_omp_simd_clone (void) |
2119 | { |
2120 | struct cgraph_node *node; |
2121 | FOR_EACH_FUNCTION (node) |
2122 | expand_simd_clones (node); |
2123 | return 0; |
2124 | } |
2125 | |
2126 | namespace { |
2127 | |
2128 | const pass_data pass_data_omp_simd_clone = |
2129 | { |
2130 | .type: SIMPLE_IPA_PASS, /* type */ |
2131 | .name: "simdclone" , /* name */ |
2132 | .optinfo_flags: OPTGROUP_OMP, /* optinfo_flags */ |
2133 | .tv_id: TV_NONE, /* tv_id */ |
2134 | .properties_required: ( PROP_ssa | PROP_cfg ), /* properties_required */ |
2135 | .properties_provided: 0, /* properties_provided */ |
2136 | .properties_destroyed: 0, /* properties_destroyed */ |
2137 | .todo_flags_start: 0, /* todo_flags_start */ |
2138 | .todo_flags_finish: 0, /* todo_flags_finish */ |
2139 | }; |
2140 | |
2141 | class pass_omp_simd_clone : public simple_ipa_opt_pass |
2142 | { |
2143 | public: |
2144 | pass_omp_simd_clone(gcc::context *ctxt) |
2145 | : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt) |
2146 | {} |
2147 | |
2148 | /* opt_pass methods: */ |
2149 | bool gate (function *) final override; |
2150 | unsigned int execute (function *) final override |
2151 | { |
2152 | return ipa_omp_simd_clone (); |
2153 | } |
2154 | }; |
2155 | |
2156 | bool |
2157 | pass_omp_simd_clone::gate (function *) |
2158 | { |
2159 | return targetm.simd_clone.compute_vecsize_and_simdlen != NULL; |
2160 | } |
2161 | |
2162 | } // anon namespace |
2163 | |
2164 | simple_ipa_opt_pass * |
2165 | make_pass_omp_simd_clone (gcc::context *ctxt) |
2166 | { |
2167 | return new pass_omp_simd_clone (ctxt); |
2168 | } |
2169 | |