1 | /* Vectorizer |
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* Loop and basic block vectorizer. |
22 | |
23 | This file contains drivers for the three vectorizers: |
24 | (1) loop vectorizer (inter-iteration parallelism), |
25 | (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop |
26 | vectorizer) |
27 | (3) BB vectorizer (out-of-loops), aka SLP |
28 | |
29 | The rest of the vectorizer's code is organized as follows: |
30 | - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are |
31 | used by drivers (1) and (2). |
32 | - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by |
33 | drivers (1) and (2). |
34 | - tree-vect-slp.cc - BB vectorization specific analysis and transformation, |
35 | used by drivers (2) and (3). |
36 | - tree-vect-stmts.cc - statements analysis and transformation (used by all). |
37 | - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and |
38 | manipulations (used by all). |
39 | - tree-vect-patterns.cc - vectorizable code patterns detector (used by all) |
40 | |
41 | Here's a poor attempt at illustrating that: |
42 | |
43 | tree-vectorizer.cc: |
44 | loop_vect() loop_aware_slp() slp_vect() |
45 | | / \ / |
46 | | / \ / |
47 | tree-vect-loop.cc tree-vect-slp.cc |
48 | | \ \ / / | |
49 | | \ \/ / | |
50 | | \ /\ / | |
51 | | \ / \ / | |
52 | tree-vect-stmts.cc tree-vect-data-refs.cc |
53 | \ / |
54 | tree-vect-patterns.cc |
55 | */ |
56 | |
57 | #include "config.h" |
58 | #include "system.h" |
59 | #include "coretypes.h" |
60 | #include "backend.h" |
61 | #include "tree.h" |
62 | #include "gimple.h" |
63 | #include "predict.h" |
64 | #include "tree-pass.h" |
65 | #include "ssa.h" |
66 | #include "cgraph.h" |
67 | #include "fold-const.h" |
68 | #include "stor-layout.h" |
69 | #include "gimple-iterator.h" |
70 | #include "gimple-walk.h" |
71 | #include "tree-ssa-loop-manip.h" |
72 | #include "tree-ssa-loop-niter.h" |
73 | #include "tree-cfg.h" |
74 | #include "cfgloop.h" |
75 | #include "tree-vectorizer.h" |
76 | #include "tree-ssa-propagate.h" |
77 | #include "dbgcnt.h" |
78 | #include "tree-scalar-evolution.h" |
79 | #include "stringpool.h" |
80 | #include "attribs.h" |
81 | #include "gimple-pretty-print.h" |
82 | #include "opt-problem.h" |
83 | #include "internal-fn.h" |
84 | #include "tree-ssa-sccvn.h" |
85 | #include "tree-into-ssa.h" |
86 | |
87 | /* Loop or bb location, with hotness information. */ |
88 | dump_user_location_t vect_location; |
89 | |
90 | /* auto_purge_vect_location's dtor: reset the vect_location |
91 | global, to avoid stale location_t values that could reference |
92 | GC-ed blocks. */ |
93 | |
94 | auto_purge_vect_location::~auto_purge_vect_location () |
95 | { |
96 | vect_location = dump_user_location_t (); |
97 | } |
98 | |
99 | /* Dump a cost entry according to args to F. */ |
100 | |
101 | void |
102 | dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind, |
103 | stmt_vec_info stmt_info, slp_tree node, tree, |
104 | int misalign, unsigned cost, |
105 | enum vect_cost_model_location where) |
106 | { |
107 | if (stmt_info) |
108 | { |
109 | print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM); |
110 | fprintf (stream: f, format: " " ); |
111 | } |
112 | else if (node) |
113 | fprintf (stream: f, format: "node %p " , (void *)node); |
114 | else |
115 | fprintf (stream: f, format: "<unknown> " ); |
116 | fprintf (stream: f, format: "%d times " , count); |
117 | const char *ks = "unknown" ; |
118 | switch (kind) |
119 | { |
120 | case scalar_stmt: |
121 | ks = "scalar_stmt" ; |
122 | break; |
123 | case scalar_load: |
124 | ks = "scalar_load" ; |
125 | break; |
126 | case scalar_store: |
127 | ks = "scalar_store" ; |
128 | break; |
129 | case vector_stmt: |
130 | ks = "vector_stmt" ; |
131 | break; |
132 | case vector_load: |
133 | ks = "vector_load" ; |
134 | break; |
135 | case vector_gather_load: |
136 | ks = "vector_gather_load" ; |
137 | break; |
138 | case unaligned_load: |
139 | ks = "unaligned_load" ; |
140 | break; |
141 | case unaligned_store: |
142 | ks = "unaligned_store" ; |
143 | break; |
144 | case vector_store: |
145 | ks = "vector_store" ; |
146 | break; |
147 | case vector_scatter_store: |
148 | ks = "vector_scatter_store" ; |
149 | break; |
150 | case vec_to_scalar: |
151 | ks = "vec_to_scalar" ; |
152 | break; |
153 | case scalar_to_vec: |
154 | ks = "scalar_to_vec" ; |
155 | break; |
156 | case cond_branch_not_taken: |
157 | ks = "cond_branch_not_taken" ; |
158 | break; |
159 | case cond_branch_taken: |
160 | ks = "cond_branch_taken" ; |
161 | break; |
162 | case vec_perm: |
163 | ks = "vec_perm" ; |
164 | break; |
165 | case vec_promote_demote: |
166 | ks = "vec_promote_demote" ; |
167 | break; |
168 | case vec_construct: |
169 | ks = "vec_construct" ; |
170 | break; |
171 | } |
172 | fprintf (stream: f, format: "%s " , ks); |
173 | if (kind == unaligned_load || kind == unaligned_store) |
174 | fprintf (stream: f, format: "(misalign %d) " , misalign); |
175 | fprintf (stream: f, format: "costs %u " , cost); |
176 | const char *ws = "unknown" ; |
177 | switch (where) |
178 | { |
179 | case vect_prologue: |
180 | ws = "prologue" ; |
181 | break; |
182 | case vect_body: |
183 | ws = "body" ; |
184 | break; |
185 | case vect_epilogue: |
186 | ws = "epilogue" ; |
187 | break; |
188 | } |
189 | fprintf (stream: f, format: "in %s\n" , ws); |
190 | } |
191 | |
192 | /* For mapping simduid to vectorization factor. */ |
193 | |
194 | class simduid_to_vf : public free_ptr_hash<simduid_to_vf> |
195 | { |
196 | public: |
197 | unsigned int simduid; |
198 | poly_uint64 vf; |
199 | |
200 | /* hash_table support. */ |
201 | static inline hashval_t hash (const simduid_to_vf *); |
202 | static inline int equal (const simduid_to_vf *, const simduid_to_vf *); |
203 | }; |
204 | |
205 | inline hashval_t |
206 | simduid_to_vf::hash (const simduid_to_vf *p) |
207 | { |
208 | return p->simduid; |
209 | } |
210 | |
211 | inline int |
212 | simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2) |
213 | { |
214 | return p1->simduid == p2->simduid; |
215 | } |
216 | |
217 | /* This hash maps the OMP simd array to the corresponding simduid used |
218 | to index into it. Like thus, |
219 | |
220 | _7 = GOMP_SIMD_LANE (simduid.0) |
221 | ... |
222 | ... |
223 | D.1737[_7] = stuff; |
224 | |
225 | |
226 | This hash maps from the OMP simd array (D.1737[]) to DECL_UID of |
227 | simduid.0. */ |
228 | |
229 | struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid> |
230 | { |
231 | tree decl; |
232 | unsigned int simduid; |
233 | |
234 | /* hash_table support. */ |
235 | static inline hashval_t hash (const simd_array_to_simduid *); |
236 | static inline int equal (const simd_array_to_simduid *, |
237 | const simd_array_to_simduid *); |
238 | }; |
239 | |
240 | inline hashval_t |
241 | simd_array_to_simduid::hash (const simd_array_to_simduid *p) |
242 | { |
243 | return DECL_UID (p->decl); |
244 | } |
245 | |
246 | inline int |
247 | simd_array_to_simduid::equal (const simd_array_to_simduid *p1, |
248 | const simd_array_to_simduid *p2) |
249 | { |
250 | return p1->decl == p2->decl; |
251 | } |
252 | |
253 | /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE, |
254 | into their corresponding constants and remove |
255 | IFN_GOMP_SIMD_ORDERED_{START,END}. */ |
256 | |
257 | static void |
258 | adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun) |
259 | { |
260 | basic_block bb; |
261 | |
262 | FOR_EACH_BB_FN (bb, fun) |
263 | { |
264 | gimple_stmt_iterator i; |
265 | |
266 | for (i = gsi_start_bb (bb); !gsi_end_p (i); ) |
267 | { |
268 | poly_uint64 vf = 1; |
269 | enum internal_fn ifn; |
270 | gimple *stmt = gsi_stmt (i); |
271 | tree t; |
272 | if (!is_gimple_call (gs: stmt) |
273 | || !gimple_call_internal_p (gs: stmt)) |
274 | { |
275 | gsi_next (i: &i); |
276 | continue; |
277 | } |
278 | ifn = gimple_call_internal_fn (gs: stmt); |
279 | switch (ifn) |
280 | { |
281 | case IFN_GOMP_SIMD_LANE: |
282 | case IFN_GOMP_SIMD_VF: |
283 | case IFN_GOMP_SIMD_LAST_LANE: |
284 | break; |
285 | case IFN_GOMP_SIMD_ORDERED_START: |
286 | case IFN_GOMP_SIMD_ORDERED_END: |
287 | if (integer_onep (gimple_call_arg (gs: stmt, index: 0))) |
288 | { |
289 | enum built_in_function bcode |
290 | = (ifn == IFN_GOMP_SIMD_ORDERED_START |
291 | ? BUILT_IN_GOMP_ORDERED_START |
292 | : BUILT_IN_GOMP_ORDERED_END); |
293 | gimple *g |
294 | = gimple_build_call (builtin_decl_explicit (fncode: bcode), 0); |
295 | gimple_move_vops (g, stmt); |
296 | gsi_replace (&i, g, true); |
297 | continue; |
298 | } |
299 | gsi_remove (&i, true); |
300 | unlink_stmt_vdef (stmt); |
301 | continue; |
302 | default: |
303 | gsi_next (i: &i); |
304 | continue; |
305 | } |
306 | tree arg = gimple_call_arg (gs: stmt, index: 0); |
307 | gcc_assert (arg != NULL_TREE); |
308 | gcc_assert (TREE_CODE (arg) == SSA_NAME); |
309 | simduid_to_vf *p = NULL, data; |
310 | data.simduid = DECL_UID (SSA_NAME_VAR (arg)); |
311 | /* Need to nullify loop safelen field since it's value is not |
312 | valid after transformation. */ |
313 | if (bb->loop_father && bb->loop_father->safelen > 0) |
314 | bb->loop_father->safelen = 0; |
315 | if (htab) |
316 | { |
317 | p = htab->find (value: &data); |
318 | if (p) |
319 | vf = p->vf; |
320 | } |
321 | switch (ifn) |
322 | { |
323 | case IFN_GOMP_SIMD_VF: |
324 | t = build_int_cst (unsigned_type_node, vf); |
325 | break; |
326 | case IFN_GOMP_SIMD_LANE: |
327 | t = build_int_cst (unsigned_type_node, 0); |
328 | break; |
329 | case IFN_GOMP_SIMD_LAST_LANE: |
330 | t = gimple_call_arg (gs: stmt, index: 1); |
331 | break; |
332 | default: |
333 | gcc_unreachable (); |
334 | } |
335 | tree lhs = gimple_call_lhs (gs: stmt); |
336 | if (lhs) |
337 | replace_uses_by (lhs, t); |
338 | release_defs (stmt); |
339 | gsi_remove (&i, true); |
340 | } |
341 | } |
342 | } |
343 | |
344 | /* Helper structure for note_simd_array_uses. */ |
345 | |
346 | struct note_simd_array_uses_struct |
347 | { |
348 | hash_table<simd_array_to_simduid> **htab; |
349 | unsigned int simduid; |
350 | }; |
351 | |
352 | /* Callback for note_simd_array_uses, called through walk_gimple_op. */ |
353 | |
354 | static tree |
355 | note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data) |
356 | { |
357 | struct walk_stmt_info *wi = (struct walk_stmt_info *) data; |
358 | struct note_simd_array_uses_struct *ns |
359 | = (struct note_simd_array_uses_struct *) wi->info; |
360 | |
361 | if (TYPE_P (*tp)) |
362 | *walk_subtrees = 0; |
363 | else if (VAR_P (*tp) |
364 | && lookup_attribute (attr_name: "omp simd array" , DECL_ATTRIBUTES (*tp)) |
365 | && DECL_CONTEXT (*tp) == current_function_decl) |
366 | { |
367 | simd_array_to_simduid data; |
368 | if (!*ns->htab) |
369 | *ns->htab = new hash_table<simd_array_to_simduid> (15); |
370 | data.decl = *tp; |
371 | data.simduid = ns->simduid; |
372 | simd_array_to_simduid **slot = (*ns->htab)->find_slot (value: &data, insert: INSERT); |
373 | if (*slot == NULL) |
374 | { |
375 | simd_array_to_simduid *p = XNEW (simd_array_to_simduid); |
376 | *p = data; |
377 | *slot = p; |
378 | } |
379 | else if ((*slot)->simduid != ns->simduid) |
380 | (*slot)->simduid = -1U; |
381 | *walk_subtrees = 0; |
382 | } |
383 | return NULL_TREE; |
384 | } |
385 | |
386 | /* Find "omp simd array" temporaries and map them to corresponding |
387 | simduid. */ |
388 | |
389 | static void |
390 | note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun) |
391 | { |
392 | basic_block bb; |
393 | gimple_stmt_iterator gsi; |
394 | struct walk_stmt_info wi; |
395 | struct note_simd_array_uses_struct ns; |
396 | |
397 | memset (s: &wi, c: 0, n: sizeof (wi)); |
398 | wi.info = &ns; |
399 | ns.htab = htab; |
400 | |
401 | FOR_EACH_BB_FN (bb, fun) |
402 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
403 | { |
404 | gimple *stmt = gsi_stmt (i: gsi); |
405 | if (!is_gimple_call (gs: stmt) || !gimple_call_internal_p (gs: stmt)) |
406 | continue; |
407 | switch (gimple_call_internal_fn (gs: stmt)) |
408 | { |
409 | case IFN_GOMP_SIMD_LANE: |
410 | case IFN_GOMP_SIMD_VF: |
411 | case IFN_GOMP_SIMD_LAST_LANE: |
412 | break; |
413 | default: |
414 | continue; |
415 | } |
416 | tree lhs = gimple_call_lhs (gs: stmt); |
417 | if (lhs == NULL_TREE) |
418 | continue; |
419 | imm_use_iterator use_iter; |
420 | gimple *use_stmt; |
421 | ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0))); |
422 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) |
423 | if (!is_gimple_debug (gs: use_stmt)) |
424 | walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi); |
425 | } |
426 | } |
427 | |
428 | /* Shrink arrays with "omp simd array" attribute to the corresponding |
429 | vectorization factor. */ |
430 | |
431 | static void |
432 | shrink_simd_arrays |
433 | (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab, |
434 | hash_table<simduid_to_vf> *simduid_to_vf_htab) |
435 | { |
436 | for (hash_table<simd_array_to_simduid>::iterator iter |
437 | = simd_array_to_simduid_htab->begin (); |
438 | iter != simd_array_to_simduid_htab->end (); ++iter) |
439 | if ((*iter)->simduid != -1U) |
440 | { |
441 | tree decl = (*iter)->decl; |
442 | poly_uint64 vf = 1; |
443 | if (simduid_to_vf_htab) |
444 | { |
445 | simduid_to_vf *p = NULL, data; |
446 | data.simduid = (*iter)->simduid; |
447 | p = simduid_to_vf_htab->find (value: &data); |
448 | if (p) |
449 | vf = p->vf; |
450 | } |
451 | tree atype |
452 | = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf); |
453 | TREE_TYPE (decl) = atype; |
454 | relayout_decl (decl); |
455 | } |
456 | |
457 | delete simd_array_to_simduid_htab; |
458 | } |
459 | |
460 | /* Initialize the vec_info with kind KIND_IN and target cost data |
461 | TARGET_COST_DATA_IN. */ |
462 | |
463 | vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_) |
464 | : kind (kind_in), |
465 | shared (shared_), |
466 | stmt_vec_info_ro (false) |
467 | { |
468 | stmt_vec_infos.create (nelems: 50); |
469 | } |
470 | |
471 | vec_info::~vec_info () |
472 | { |
473 | for (slp_instance &instance : slp_instances) |
474 | vect_free_slp_instance (instance); |
475 | |
476 | free_stmt_vec_infos (); |
477 | } |
478 | |
479 | vec_info_shared::vec_info_shared () |
480 | : n_stmts (0), |
481 | datarefs (vNULL), |
482 | datarefs_copy (vNULL), |
483 | ddrs (vNULL) |
484 | { |
485 | } |
486 | |
487 | vec_info_shared::~vec_info_shared () |
488 | { |
489 | free_data_refs (datarefs); |
490 | free_dependence_relations (ddrs); |
491 | datarefs_copy.release (); |
492 | } |
493 | |
494 | void |
495 | vec_info_shared::save_datarefs () |
496 | { |
497 | if (!flag_checking) |
498 | return; |
499 | datarefs_copy.reserve_exact (nelems: datarefs.length ()); |
500 | for (unsigned i = 0; i < datarefs.length (); ++i) |
501 | datarefs_copy.quick_push (obj: *datarefs[i]); |
502 | } |
503 | |
504 | void |
505 | vec_info_shared::check_datarefs () |
506 | { |
507 | if (!flag_checking) |
508 | return; |
509 | gcc_assert (datarefs.length () == datarefs_copy.length ()); |
510 | for (unsigned i = 0; i < datarefs.length (); ++i) |
511 | if (memcmp (s1: &datarefs_copy[i], s2: datarefs[i], |
512 | offsetof (data_reference, alt_indices)) != 0) |
513 | gcc_unreachable (); |
514 | } |
515 | |
516 | /* Record that STMT belongs to the vectorizable region. Create and return |
517 | an associated stmt_vec_info. */ |
518 | |
519 | stmt_vec_info |
520 | vec_info::add_stmt (gimple *stmt) |
521 | { |
522 | stmt_vec_info res = new_stmt_vec_info (stmt); |
523 | set_vinfo_for_stmt (stmt, res); |
524 | return res; |
525 | } |
526 | |
527 | /* Record that STMT belongs to the vectorizable region. Create a new |
528 | stmt_vec_info and mark VECINFO as being related and return the new |
529 | stmt_vec_info. */ |
530 | |
531 | stmt_vec_info |
532 | vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info) |
533 | { |
534 | stmt_vec_info res = new_stmt_vec_info (stmt); |
535 | set_vinfo_for_stmt (stmt, res, false); |
536 | STMT_VINFO_RELATED_STMT (res) = stmt_info; |
537 | return res; |
538 | } |
539 | |
540 | /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise |
541 | return null. It is safe to call this function on any statement, even if |
542 | it might not be part of the vectorizable region. */ |
543 | |
544 | stmt_vec_info |
545 | vec_info::lookup_stmt (gimple *stmt) |
546 | { |
547 | unsigned int uid = gimple_uid (g: stmt); |
548 | if (uid > 0 && uid - 1 < stmt_vec_infos.length ()) |
549 | { |
550 | stmt_vec_info res = stmt_vec_infos[uid - 1]; |
551 | if (res && res->stmt == stmt) |
552 | return res; |
553 | } |
554 | return NULL; |
555 | } |
556 | |
557 | /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info, |
558 | return that stmt_vec_info, otherwise return null. It is safe to call |
559 | this on arbitrary operands. */ |
560 | |
561 | stmt_vec_info |
562 | vec_info::lookup_def (tree name) |
563 | { |
564 | if (TREE_CODE (name) == SSA_NAME |
565 | && !SSA_NAME_IS_DEFAULT_DEF (name)) |
566 | return lookup_stmt (SSA_NAME_DEF_STMT (name)); |
567 | return NULL; |
568 | } |
569 | |
570 | /* See whether there is a single non-debug statement that uses LHS and |
571 | whether that statement has an associated stmt_vec_info. Return the |
572 | stmt_vec_info if so, otherwise return null. */ |
573 | |
574 | stmt_vec_info |
575 | vec_info::lookup_single_use (tree lhs) |
576 | { |
577 | use_operand_p dummy; |
578 | gimple *use_stmt; |
579 | if (single_imm_use (var: lhs, use_p: &dummy, stmt: &use_stmt)) |
580 | return lookup_stmt (stmt: use_stmt); |
581 | return NULL; |
582 | } |
583 | |
584 | /* Return vectorization information about DR. */ |
585 | |
586 | dr_vec_info * |
587 | vec_info::lookup_dr (data_reference *dr) |
588 | { |
589 | stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr)); |
590 | /* DR_STMT should never refer to a stmt in a pattern replacement. */ |
591 | gcc_checking_assert (!is_pattern_stmt_p (stmt_info)); |
592 | return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt); |
593 | } |
594 | |
595 | /* Record that NEW_STMT_INFO now implements the same data reference |
596 | as OLD_STMT_INFO. */ |
597 | |
598 | void |
599 | vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info) |
600 | { |
601 | gcc_assert (!is_pattern_stmt_p (old_stmt_info)); |
602 | STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info; |
603 | new_stmt_info->dr_aux = old_stmt_info->dr_aux; |
604 | STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info) |
605 | = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info); |
606 | STMT_VINFO_GATHER_SCATTER_P (new_stmt_info) |
607 | = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info); |
608 | } |
609 | |
610 | /* Permanently remove the statement described by STMT_INFO from the |
611 | function. */ |
612 | |
613 | void |
614 | vec_info::remove_stmt (stmt_vec_info stmt_info) |
615 | { |
616 | gcc_assert (!stmt_info->pattern_stmt_p); |
617 | set_vinfo_for_stmt (stmt_info->stmt, NULL); |
618 | unlink_stmt_vdef (stmt_info->stmt); |
619 | gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt); |
620 | gsi_remove (&si, true); |
621 | release_defs (stmt_info->stmt); |
622 | free_stmt_vec_info (stmt_info); |
623 | } |
624 | |
625 | /* Replace the statement at GSI by NEW_STMT, both the vectorization |
626 | information and the function itself. STMT_INFO describes the statement |
627 | at GSI. */ |
628 | |
629 | void |
630 | vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info, |
631 | gimple *new_stmt) |
632 | { |
633 | gimple *old_stmt = stmt_info->stmt; |
634 | gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi)); |
635 | gimple_set_uid (g: new_stmt, uid: gimple_uid (g: old_stmt)); |
636 | stmt_info->stmt = new_stmt; |
637 | gsi_replace (gsi, new_stmt, true); |
638 | } |
639 | |
640 | /* Insert stmts in SEQ on the VEC_INFO region entry. If CONTEXT is |
641 | not NULL it specifies whether to use the sub-region entry |
642 | determined by it, currently used for loop vectorization to insert |
643 | on the inner loop entry vs. the outer loop entry. */ |
644 | |
645 | void |
646 | vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq) |
647 | { |
648 | if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: this)) |
649 | { |
650 | class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
651 | basic_block new_bb; |
652 | edge pe; |
653 | |
654 | if (context && nested_in_vect_loop_p (loop, stmt_info: context)) |
655 | loop = loop->inner; |
656 | |
657 | pe = loop_preheader_edge (loop); |
658 | new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); |
659 | gcc_assert (!new_bb); |
660 | } |
661 | else |
662 | { |
663 | bb_vec_info bb_vinfo = as_a <bb_vec_info> (p: this); |
664 | gimple_stmt_iterator gsi_region_begin |
665 | = gsi_after_labels (bb: bb_vinfo->bbs[0]); |
666 | gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT); |
667 | } |
668 | } |
669 | |
670 | /* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT. */ |
671 | |
672 | void |
673 | vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt) |
674 | { |
675 | gimple_seq seq = NULL; |
676 | gimple_stmt_iterator gsi = gsi_start (seq); |
677 | gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT); |
678 | insert_seq_on_entry (context, seq); |
679 | } |
680 | |
681 | /* Create and initialize a new stmt_vec_info struct for STMT. */ |
682 | |
683 | stmt_vec_info |
684 | vec_info::new_stmt_vec_info (gimple *stmt) |
685 | { |
686 | stmt_vec_info res = XCNEW (class _stmt_vec_info); |
687 | res->stmt = stmt; |
688 | |
689 | STMT_VINFO_TYPE (res) = undef_vec_info_type; |
690 | STMT_VINFO_RELEVANT (res) = vect_unused_in_scope; |
691 | STMT_VINFO_VECTORIZABLE (res) = true; |
692 | STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION; |
693 | STMT_VINFO_REDUC_CODE (res) = ERROR_MARK; |
694 | STMT_VINFO_REDUC_FN (res) = IFN_LAST; |
695 | STMT_VINFO_REDUC_IDX (res) = -1; |
696 | STMT_VINFO_SLP_VECT_ONLY (res) = false; |
697 | STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false; |
698 | STMT_VINFO_VEC_STMTS (res) = vNULL; |
699 | res->reduc_initial_values = vNULL; |
700 | res->reduc_scalar_results = vNULL; |
701 | |
702 | if (is_a <loop_vec_info> (p: this) |
703 | && gimple_code (g: stmt) == GIMPLE_PHI |
704 | && is_loop_header_bb_p (bb: gimple_bb (g: stmt))) |
705 | STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; |
706 | else |
707 | STMT_VINFO_DEF_TYPE (res) = vect_internal_def; |
708 | |
709 | STMT_SLP_TYPE (res) = loop_vect; |
710 | |
711 | /* This is really "uninitialized" until vect_compute_data_ref_alignment. */ |
712 | res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED; |
713 | |
714 | return res; |
715 | } |
716 | |
717 | /* Associate STMT with INFO. */ |
718 | |
719 | void |
720 | vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro) |
721 | { |
722 | unsigned int uid = gimple_uid (g: stmt); |
723 | if (uid == 0) |
724 | { |
725 | gcc_assert (!check_ro || !stmt_vec_info_ro); |
726 | gcc_checking_assert (info); |
727 | uid = stmt_vec_infos.length () + 1; |
728 | gimple_set_uid (g: stmt, uid); |
729 | stmt_vec_infos.safe_push (obj: info); |
730 | } |
731 | else |
732 | { |
733 | gcc_checking_assert (info == NULL); |
734 | stmt_vec_infos[uid - 1] = info; |
735 | } |
736 | } |
737 | |
738 | /* Free the contents of stmt_vec_infos. */ |
739 | |
740 | void |
741 | vec_info::free_stmt_vec_infos (void) |
742 | { |
743 | for (stmt_vec_info &info : stmt_vec_infos) |
744 | if (info != NULL) |
745 | free_stmt_vec_info (info); |
746 | stmt_vec_infos.release (); |
747 | } |
748 | |
749 | /* Free STMT_INFO. */ |
750 | |
751 | void |
752 | vec_info::free_stmt_vec_info (stmt_vec_info stmt_info) |
753 | { |
754 | if (stmt_info->pattern_stmt_p) |
755 | { |
756 | gimple_set_bb (stmt_info->stmt, NULL); |
757 | tree lhs = gimple_get_lhs (stmt_info->stmt); |
758 | if (lhs && TREE_CODE (lhs) == SSA_NAME) |
759 | release_ssa_name (name: lhs); |
760 | } |
761 | |
762 | stmt_info->reduc_initial_values.release (); |
763 | stmt_info->reduc_scalar_results.release (); |
764 | STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release (); |
765 | STMT_VINFO_VEC_STMTS (stmt_info).release (); |
766 | free (ptr: stmt_info); |
767 | } |
768 | |
769 | /* Returns true if S1 dominates S2. */ |
770 | |
771 | bool |
772 | vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2) |
773 | { |
774 | basic_block bb1 = gimple_bb (g: s1), bb2 = gimple_bb (g: s2); |
775 | |
776 | /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D) |
777 | SSA_NAME. Assume it lives at the beginning of function and |
778 | thus dominates everything. */ |
779 | if (!bb1 || s1 == s2) |
780 | return true; |
781 | |
782 | /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */ |
783 | if (!bb2) |
784 | return false; |
785 | |
786 | if (bb1 != bb2) |
787 | return dominated_by_p (CDI_DOMINATORS, bb2, bb1); |
788 | |
789 | /* PHIs in the same basic block are assumed to be |
790 | executed all in parallel, if only one stmt is a PHI, |
791 | it dominates the other stmt in the same basic block. */ |
792 | if (gimple_code (g: s1) == GIMPLE_PHI) |
793 | return true; |
794 | |
795 | if (gimple_code (g: s2) == GIMPLE_PHI) |
796 | return false; |
797 | |
798 | /* Inserted vectorized stmts all have UID 0 while the original stmts |
799 | in the IL have UID increasing within a BB. Walk from both sides |
800 | until we find the other stmt or a stmt with UID != 0. */ |
801 | gimple_stmt_iterator gsi1 = gsi_for_stmt (s1); |
802 | while (gimple_uid (g: gsi_stmt (i: gsi1)) == 0) |
803 | { |
804 | gsi_next (i: &gsi1); |
805 | if (gsi_end_p (i: gsi1)) |
806 | return false; |
807 | if (gsi_stmt (i: gsi1) == s2) |
808 | return true; |
809 | } |
810 | if (gimple_uid (g: gsi_stmt (i: gsi1)) == -1u) |
811 | return false; |
812 | |
813 | gimple_stmt_iterator gsi2 = gsi_for_stmt (s2); |
814 | while (gimple_uid (g: gsi_stmt (i: gsi2)) == 0) |
815 | { |
816 | gsi_prev (i: &gsi2); |
817 | if (gsi_end_p (i: gsi2)) |
818 | return false; |
819 | if (gsi_stmt (i: gsi2) == s1) |
820 | return true; |
821 | } |
822 | if (gimple_uid (g: gsi_stmt (i: gsi2)) == -1u) |
823 | return false; |
824 | |
825 | if (gimple_uid (g: gsi_stmt (i: gsi1)) <= gimple_uid (g: gsi_stmt (i: gsi2))) |
826 | return true; |
827 | return false; |
828 | } |
829 | |
830 | /* A helper function to free scev and LOOP niter information, as well as |
831 | clear loop constraint LOOP_C_FINITE. */ |
832 | |
833 | void |
834 | vect_free_loop_info_assumptions (class loop *loop) |
835 | { |
836 | scev_reset_htab (); |
837 | /* We need to explicitly reset upper bound information since they are |
838 | used even after free_numbers_of_iterations_estimates. */ |
839 | loop->any_upper_bound = false; |
840 | loop->any_likely_upper_bound = false; |
841 | free_numbers_of_iterations_estimates (loop); |
842 | loop_constraint_clear (loop, LOOP_C_FINITE); |
843 | } |
844 | |
845 | /* If LOOP has been versioned during ifcvt, return the internal call |
846 | guarding it. */ |
847 | |
848 | gimple * |
849 | vect_loop_vectorized_call (class loop *loop, gcond **cond) |
850 | { |
851 | basic_block bb = loop_preheader_edge (loop)->src; |
852 | gimple *g; |
853 | do |
854 | { |
855 | g = *gsi_last_bb (bb); |
856 | if ((g && gimple_code (g) == GIMPLE_COND) |
857 | || !single_succ_p (bb)) |
858 | break; |
859 | if (!single_pred_p (bb)) |
860 | break; |
861 | bb = single_pred (bb); |
862 | } |
863 | while (1); |
864 | if (g && gimple_code (g) == GIMPLE_COND) |
865 | { |
866 | if (cond) |
867 | *cond = as_a <gcond *> (p: g); |
868 | gimple_stmt_iterator gsi = gsi_for_stmt (g); |
869 | gsi_prev (i: &gsi); |
870 | if (!gsi_end_p (i: gsi)) |
871 | { |
872 | g = gsi_stmt (i: gsi); |
873 | if (gimple_call_internal_p (gs: g, fn: IFN_LOOP_VECTORIZED) |
874 | && (tree_to_shwi (gimple_call_arg (gs: g, index: 0)) == loop->num |
875 | || tree_to_shwi (gimple_call_arg (gs: g, index: 1)) == loop->num)) |
876 | return g; |
877 | } |
878 | } |
879 | return NULL; |
880 | } |
881 | |
882 | /* If LOOP has been versioned during loop distribution, return the gurading |
883 | internal call. */ |
884 | |
885 | static gimple * |
886 | vect_loop_dist_alias_call (class loop *loop, function *fun) |
887 | { |
888 | basic_block bb; |
889 | basic_block entry; |
890 | class loop *outer, *orig; |
891 | |
892 | if (loop->orig_loop_num == 0) |
893 | return NULL; |
894 | |
895 | orig = get_loop (fn: fun, num: loop->orig_loop_num); |
896 | if (orig == NULL) |
897 | { |
898 | /* The original loop is somehow destroyed. Clear the information. */ |
899 | loop->orig_loop_num = 0; |
900 | return NULL; |
901 | } |
902 | |
903 | if (loop != orig) |
904 | bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header); |
905 | else |
906 | bb = loop_preheader_edge (loop)->src; |
907 | |
908 | outer = bb->loop_father; |
909 | entry = ENTRY_BLOCK_PTR_FOR_FN (fun); |
910 | |
911 | /* Look upward in dominance tree. */ |
912 | for (; bb != entry && flow_bb_inside_loop_p (outer, bb); |
913 | bb = get_immediate_dominator (CDI_DOMINATORS, bb)) |
914 | { |
915 | gimple_stmt_iterator gsi = gsi_last_bb (bb); |
916 | if (!safe_is_a <gcond *> (p: *gsi)) |
917 | continue; |
918 | |
919 | gsi_prev (i: &gsi); |
920 | if (gsi_end_p (i: gsi)) |
921 | continue; |
922 | |
923 | gimple *g = gsi_stmt (i: gsi); |
924 | /* The guarding internal function call must have the same distribution |
925 | alias id. */ |
926 | if (gimple_call_internal_p (gs: g, fn: IFN_LOOP_DIST_ALIAS) |
927 | && (tree_to_shwi (gimple_call_arg (gs: g, index: 0)) == loop->orig_loop_num)) |
928 | return g; |
929 | } |
930 | return NULL; |
931 | } |
932 | |
933 | /* Set the uids of all the statements in basic blocks inside loop |
934 | represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal |
935 | call guarding the loop which has been if converted. */ |
936 | static void |
937 | set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call, |
938 | function *fun) |
939 | { |
940 | tree arg = gimple_call_arg (gs: loop_vectorized_call, index: 1); |
941 | basic_block *bbs; |
942 | unsigned int i; |
943 | class loop *scalar_loop = get_loop (fn: fun, num: tree_to_shwi (arg)); |
944 | |
945 | LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; |
946 | LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo) |
947 | = vec_init_loop_exit_info (scalar_loop); |
948 | gcc_checking_assert (vect_loop_vectorized_call (scalar_loop) |
949 | == loop_vectorized_call); |
950 | /* If we are going to vectorize outer loop, prevent vectorization |
951 | of the inner loop in the scalar loop - either the scalar loop is |
952 | thrown away, so it is a wasted work, or is used only for |
953 | a few iterations. */ |
954 | if (scalar_loop->inner) |
955 | { |
956 | gimple *g = vect_loop_vectorized_call (loop: scalar_loop->inner); |
957 | if (g) |
958 | { |
959 | arg = gimple_call_arg (gs: g, index: 0); |
960 | get_loop (fn: fun, num: tree_to_shwi (arg))->dont_vectorize = true; |
961 | fold_loop_internal_call (g, boolean_false_node); |
962 | } |
963 | } |
964 | bbs = get_loop_body (scalar_loop); |
965 | for (i = 0; i < scalar_loop->num_nodes; i++) |
966 | { |
967 | basic_block bb = bbs[i]; |
968 | gimple_stmt_iterator gsi; |
969 | for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
970 | { |
971 | gimple *phi = gsi_stmt (i: gsi); |
972 | gimple_set_uid (g: phi, uid: 0); |
973 | } |
974 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
975 | { |
976 | gimple *stmt = gsi_stmt (i: gsi); |
977 | gimple_set_uid (g: stmt, uid: 0); |
978 | } |
979 | } |
980 | free (ptr: bbs); |
981 | } |
982 | |
983 | /* Generate vectorized code for LOOP and its epilogues. */ |
984 | |
985 | static unsigned |
986 | vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab, |
987 | loop_p loop, gimple *loop_vectorized_call, |
988 | function *fun) |
989 | { |
990 | loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop); |
991 | |
992 | if (loop_vectorized_call) |
993 | set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun); |
994 | |
995 | unsigned HOST_WIDE_INT bytes; |
996 | if (dump_enabled_p ()) |
997 | { |
998 | if (GET_MODE_SIZE (mode: loop_vinfo->vector_mode).is_constant (const_value: &bytes)) |
999 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, |
1000 | "loop vectorized using %wu byte vectors\n" , bytes); |
1001 | else |
1002 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, |
1003 | "loop vectorized using variable length vectors\n" ); |
1004 | } |
1005 | |
1006 | loop_p new_loop = vect_transform_loop (loop_vinfo, |
1007 | loop_vectorized_call); |
1008 | /* Now that the loop has been vectorized, allow it to be unrolled |
1009 | etc. */ |
1010 | loop->force_vectorize = false; |
1011 | |
1012 | if (loop->simduid) |
1013 | { |
1014 | simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf); |
1015 | if (!simduid_to_vf_htab) |
1016 | simduid_to_vf_htab = new hash_table<simduid_to_vf> (15); |
1017 | simduid_to_vf_data->simduid = DECL_UID (loop->simduid); |
1018 | simduid_to_vf_data->vf = loop_vinfo->vectorization_factor; |
1019 | *simduid_to_vf_htab->find_slot (value: simduid_to_vf_data, insert: INSERT) |
1020 | = simduid_to_vf_data; |
1021 | } |
1022 | |
1023 | /* We should not have to update virtual SSA form here but some |
1024 | transforms involve creating new virtual definitions which makes |
1025 | updating difficult. |
1026 | We delay the actual update to the end of the pass but avoid |
1027 | confusing ourselves by forcing need_ssa_update_p () to false. */ |
1028 | unsigned todo = 0; |
1029 | if (need_ssa_update_p (cfun)) |
1030 | { |
1031 | gcc_assert (loop_vinfo->any_known_not_updated_vssa); |
1032 | fun->gimple_df->ssa_renaming_needed = false; |
1033 | todo |= TODO_update_ssa_only_virtuals; |
1034 | } |
1035 | gcc_assert (!need_ssa_update_p (cfun)); |
1036 | |
1037 | /* Epilogue of vectorized loop must be vectorized too. */ |
1038 | if (new_loop) |
1039 | todo |= vect_transform_loops (simduid_to_vf_htab, loop: new_loop, NULL, fun); |
1040 | |
1041 | return todo; |
1042 | } |
1043 | |
1044 | /* Try to vectorize LOOP. */ |
1045 | |
1046 | static unsigned |
1047 | try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab, |
1048 | unsigned *num_vectorized_loops, loop_p loop, |
1049 | gimple *loop_vectorized_call, |
1050 | gimple *loop_dist_alias_call, |
1051 | function *fun) |
1052 | { |
1053 | unsigned ret = 0; |
1054 | vec_info_shared shared; |
1055 | auto_purge_vect_location sentinel; |
1056 | vect_location = find_loop_location (loop); |
1057 | |
1058 | if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION |
1059 | && dump_enabled_p ()) |
1060 | dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS, |
1061 | "\nAnalyzing loop at %s:%d\n" , |
1062 | LOCATION_FILE (vect_location.get_location_t ()), |
1063 | LOCATION_LINE (vect_location.get_location_t ())); |
1064 | |
1065 | /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */ |
1066 | opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, &shared); |
1067 | loop->aux = loop_vinfo; |
1068 | |
1069 | if (!loop_vinfo) |
1070 | if (dump_enabled_p ()) |
1071 | if (opt_problem *problem = loop_vinfo.get_problem ()) |
1072 | { |
1073 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1074 | "couldn't vectorize loop\n" ); |
1075 | problem->emit_and_clear (); |
1076 | } |
1077 | |
1078 | if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) |
1079 | { |
1080 | /* Free existing information if loop is analyzed with some |
1081 | assumptions. */ |
1082 | if (loop_constraint_set_p (loop, LOOP_C_FINITE)) |
1083 | vect_free_loop_info_assumptions (loop); |
1084 | |
1085 | /* If we applied if-conversion then try to vectorize the |
1086 | BB of innermost loops. |
1087 | ??? Ideally BB vectorization would learn to vectorize |
1088 | control flow by applying if-conversion on-the-fly, the |
1089 | following retains the if-converted loop body even when |
1090 | only non-if-converted parts took part in BB vectorization. */ |
1091 | if (flag_tree_slp_vectorize != 0 |
1092 | && loop_vectorized_call |
1093 | && ! loop->inner) |
1094 | { |
1095 | basic_block bb = loop->header; |
1096 | bool require_loop_vectorize = false; |
1097 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); |
1098 | !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1099 | { |
1100 | gimple *stmt = gsi_stmt (i: gsi); |
1101 | gcall *call = dyn_cast <gcall *> (p: stmt); |
1102 | if (call && gimple_call_internal_p (gs: call)) |
1103 | { |
1104 | internal_fn ifn = gimple_call_internal_fn (gs: call); |
1105 | if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE |
1106 | /* Don't keep the if-converted parts when the ifn with |
1107 | specifc type is not supported by the backend. */ |
1108 | || (direct_internal_fn_p (fn: ifn) |
1109 | && !direct_internal_fn_supported_p |
1110 | (call, OPTIMIZE_FOR_SPEED))) |
1111 | { |
1112 | require_loop_vectorize = true; |
1113 | break; |
1114 | } |
1115 | } |
1116 | gimple_set_uid (g: stmt, uid: -1); |
1117 | gimple_set_visited (stmt, visited_p: false); |
1118 | } |
1119 | if (!require_loop_vectorize) |
1120 | { |
1121 | tree arg = gimple_call_arg (gs: loop_vectorized_call, index: 1); |
1122 | class loop *scalar_loop = get_loop (fn: fun, num: tree_to_shwi (arg)); |
1123 | if (vect_slp_if_converted_bb (bb, orig_loop: scalar_loop)) |
1124 | { |
1125 | fold_loop_internal_call (loop_vectorized_call, |
1126 | boolean_true_node); |
1127 | loop_vectorized_call = NULL; |
1128 | ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; |
1129 | } |
1130 | } |
1131 | } |
1132 | /* If outer loop vectorization fails for LOOP_VECTORIZED guarded |
1133 | loop, don't vectorize its inner loop; we'll attempt to |
1134 | vectorize LOOP_VECTORIZED guarded inner loop of the scalar |
1135 | loop version. */ |
1136 | if (loop_vectorized_call && loop->inner) |
1137 | loop->inner->dont_vectorize = true; |
1138 | return ret; |
1139 | } |
1140 | |
1141 | if (!dbg_cnt (index: vect_loop)) |
1142 | { |
1143 | /* Free existing information if loop is analyzed with some |
1144 | assumptions. */ |
1145 | if (loop_constraint_set_p (loop, LOOP_C_FINITE)) |
1146 | vect_free_loop_info_assumptions (loop); |
1147 | return ret; |
1148 | } |
1149 | |
1150 | (*num_vectorized_loops)++; |
1151 | /* Transform LOOP and its epilogues. */ |
1152 | ret |= vect_transform_loops (simduid_to_vf_htab, loop, |
1153 | loop_vectorized_call, fun); |
1154 | |
1155 | if (loop_vectorized_call) |
1156 | { |
1157 | fold_loop_internal_call (loop_vectorized_call, boolean_true_node); |
1158 | ret |= TODO_cleanup_cfg; |
1159 | } |
1160 | if (loop_dist_alias_call) |
1161 | { |
1162 | tree value = gimple_call_arg (gs: loop_dist_alias_call, index: 1); |
1163 | fold_loop_internal_call (loop_dist_alias_call, value); |
1164 | ret |= TODO_cleanup_cfg; |
1165 | } |
1166 | |
1167 | return ret; |
1168 | } |
1169 | |
1170 | /* Try to vectorize LOOP. */ |
1171 | |
1172 | static unsigned |
1173 | try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab, |
1174 | unsigned *num_vectorized_loops, loop_p loop, |
1175 | function *fun) |
1176 | { |
1177 | if (!((flag_tree_loop_vectorize |
1178 | && optimize_loop_nest_for_speed_p (loop)) |
1179 | || loop->force_vectorize)) |
1180 | return 0; |
1181 | |
1182 | return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop, |
1183 | loop_vectorized_call: vect_loop_vectorized_call (loop), |
1184 | loop_dist_alias_call: vect_loop_dist_alias_call (loop, fun), fun); |
1185 | } |
1186 | |
1187 | |
1188 | /* Loop autovectorization. */ |
1189 | |
1190 | namespace { |
1191 | |
1192 | const pass_data pass_data_vectorize = |
1193 | { |
1194 | .type: GIMPLE_PASS, /* type */ |
1195 | .name: "vect" , /* name */ |
1196 | .optinfo_flags: OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ |
1197 | .tv_id: TV_TREE_VECTORIZATION, /* tv_id */ |
1198 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
1199 | .properties_provided: 0, /* properties_provided */ |
1200 | .properties_destroyed: 0, /* properties_destroyed */ |
1201 | .todo_flags_start: 0, /* todo_flags_start */ |
1202 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1203 | }; |
1204 | |
1205 | class pass_vectorize : public gimple_opt_pass |
1206 | { |
1207 | public: |
1208 | pass_vectorize (gcc::context *ctxt) |
1209 | : gimple_opt_pass (pass_data_vectorize, ctxt) |
1210 | {} |
1211 | |
1212 | /* opt_pass methods: */ |
1213 | bool gate (function *fun) final override |
1214 | { |
1215 | return flag_tree_loop_vectorize || fun->has_force_vectorize_loops; |
1216 | } |
1217 | |
1218 | unsigned int execute (function *) final override; |
1219 | |
1220 | }; // class pass_vectorize |
1221 | |
1222 | /* Function vectorize_loops. |
1223 | |
1224 | Entry point to loop vectorization phase. */ |
1225 | |
1226 | unsigned |
1227 | pass_vectorize::execute (function *fun) |
1228 | { |
1229 | unsigned int i; |
1230 | unsigned int num_vectorized_loops = 0; |
1231 | unsigned int vect_loops_num; |
1232 | hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL; |
1233 | hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; |
1234 | bool any_ifcvt_loops = false; |
1235 | unsigned ret = 0; |
1236 | |
1237 | vect_loops_num = number_of_loops (fn: fun); |
1238 | |
1239 | /* Bail out if there are no loops. */ |
1240 | if (vect_loops_num <= 1) |
1241 | return 0; |
1242 | |
1243 | vect_slp_init (); |
1244 | |
1245 | if (fun->has_simduid_loops) |
1246 | note_simd_array_uses (htab: &simd_array_to_simduid_htab, fun); |
1247 | |
1248 | /* ----------- Analyze loops. ----------- */ |
1249 | |
1250 | /* If some loop was duplicated, it gets bigger number |
1251 | than all previously defined loops. This fact allows us to run |
1252 | only over initial loops skipping newly generated ones. */ |
1253 | for (auto loop : loops_list (fun, 0)) |
1254 | if (loop->dont_vectorize) |
1255 | { |
1256 | any_ifcvt_loops = true; |
1257 | /* If-conversion sometimes versions both the outer loop |
1258 | (for the case when outer loop vectorization might be |
1259 | desirable) as well as the inner loop in the scalar version |
1260 | of the loop. So we have: |
1261 | if (LOOP_VECTORIZED (1, 3)) |
1262 | { |
1263 | loop1 |
1264 | loop2 |
1265 | } |
1266 | else |
1267 | loop3 (copy of loop1) |
1268 | if (LOOP_VECTORIZED (4, 5)) |
1269 | loop4 (copy of loop2) |
1270 | else |
1271 | loop5 (copy of loop4) |
1272 | If loops' iteration gives us loop3 first (which has |
1273 | dont_vectorize set), make sure to process loop1 before loop4; |
1274 | so that we can prevent vectorization of loop4 if loop1 |
1275 | is successfully vectorized. */ |
1276 | if (loop->inner) |
1277 | { |
1278 | gimple *loop_vectorized_call |
1279 | = vect_loop_vectorized_call (loop); |
1280 | if (loop_vectorized_call |
1281 | && vect_loop_vectorized_call (loop: loop->inner)) |
1282 | { |
1283 | tree arg = gimple_call_arg (gs: loop_vectorized_call, index: 0); |
1284 | class loop *vector_loop |
1285 | = get_loop (fn: fun, num: tree_to_shwi (arg)); |
1286 | if (vector_loop && vector_loop != loop) |
1287 | { |
1288 | /* Make sure we don't vectorize it twice. */ |
1289 | vector_loop->dont_vectorize = true; |
1290 | ret |= try_vectorize_loop (simduid_to_vf_htab, |
1291 | num_vectorized_loops: &num_vectorized_loops, |
1292 | loop: vector_loop, fun); |
1293 | } |
1294 | } |
1295 | } |
1296 | } |
1297 | else |
1298 | ret |= try_vectorize_loop (simduid_to_vf_htab, num_vectorized_loops: &num_vectorized_loops, |
1299 | loop, fun); |
1300 | |
1301 | vect_location = dump_user_location_t (); |
1302 | |
1303 | statistics_counter_event (fun, "Vectorized loops" , num_vectorized_loops); |
1304 | if (dump_enabled_p () |
1305 | || (num_vectorized_loops > 0 && dump_enabled_p ())) |
1306 | dump_printf_loc (MSG_NOTE, vect_location, |
1307 | "vectorized %u loops in function.\n" , |
1308 | num_vectorized_loops); |
1309 | |
1310 | /* ----------- Finalize. ----------- */ |
1311 | |
1312 | if (any_ifcvt_loops) |
1313 | for (i = 1; i < number_of_loops (fn: fun); i++) |
1314 | { |
1315 | class loop *loop = get_loop (fn: fun, num: i); |
1316 | if (loop && loop->dont_vectorize) |
1317 | { |
1318 | gimple *g = vect_loop_vectorized_call (loop); |
1319 | if (g) |
1320 | { |
1321 | fold_loop_internal_call (g, boolean_false_node); |
1322 | ret |= TODO_cleanup_cfg; |
1323 | g = NULL; |
1324 | } |
1325 | else |
1326 | g = vect_loop_dist_alias_call (loop, fun); |
1327 | |
1328 | if (g) |
1329 | { |
1330 | fold_loop_internal_call (g, boolean_false_node); |
1331 | ret |= TODO_cleanup_cfg; |
1332 | } |
1333 | } |
1334 | } |
1335 | |
1336 | /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ |
1337 | if (fun->has_simduid_loops) |
1338 | { |
1339 | adjust_simduid_builtins (htab: simduid_to_vf_htab, fun); |
1340 | /* Avoid stale SCEV cache entries for the SIMD_LANE defs. */ |
1341 | scev_reset (); |
1342 | } |
1343 | /* Shrink any "omp array simd" temporary arrays to the |
1344 | actual vectorization factors. */ |
1345 | if (simd_array_to_simduid_htab) |
1346 | shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab); |
1347 | delete simduid_to_vf_htab; |
1348 | fun->has_simduid_loops = false; |
1349 | |
1350 | if (num_vectorized_loops > 0) |
1351 | { |
1352 | /* We are collecting some corner cases where we need to update |
1353 | virtual SSA form via the TODO but delete the queued update-SSA |
1354 | state. Force renaming if we think that might be necessary. */ |
1355 | if (ret & TODO_update_ssa_only_virtuals) |
1356 | mark_virtual_operands_for_renaming (cfun); |
1357 | /* If we vectorized any loop only virtual SSA form needs to be updated. |
1358 | ??? Also while we try hard to update loop-closed SSA form we fail |
1359 | to properly do this in some corner-cases (see PR56286). */ |
1360 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); |
1361 | ret |= TODO_cleanup_cfg; |
1362 | } |
1363 | |
1364 | for (i = 1; i < number_of_loops (fn: fun); i++) |
1365 | { |
1366 | loop_vec_info loop_vinfo; |
1367 | bool has_mask_store; |
1368 | |
1369 | class loop *loop = get_loop (fn: fun, num: i); |
1370 | if (!loop || !loop->aux) |
1371 | continue; |
1372 | loop_vinfo = (loop_vec_info) loop->aux; |
1373 | has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); |
1374 | delete loop_vinfo; |
1375 | if (has_mask_store |
1376 | && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE)) |
1377 | optimize_mask_stores (loop); |
1378 | |
1379 | auto_bitmap exit_bbs; |
1380 | /* Perform local CSE, this esp. helps because we emit code for |
1381 | predicates that need to be shared for optimal predicate usage. |
1382 | However reassoc will re-order them and prevent CSE from working |
1383 | as it should. CSE only the loop body, not the entry. */ |
1384 | bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index); |
1385 | |
1386 | edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0); |
1387 | do_rpo_vn (fun, entry, exit_bbs); |
1388 | |
1389 | loop->aux = NULL; |
1390 | } |
1391 | |
1392 | vect_slp_fini (); |
1393 | |
1394 | return ret; |
1395 | } |
1396 | |
1397 | } // anon namespace |
1398 | |
1399 | gimple_opt_pass * |
1400 | make_pass_vectorize (gcc::context *ctxt) |
1401 | { |
1402 | return new pass_vectorize (ctxt); |
1403 | } |
1404 | |
1405 | /* Entry point to the simduid cleanup pass. */ |
1406 | |
1407 | namespace { |
1408 | |
1409 | const pass_data pass_data_simduid_cleanup = |
1410 | { |
1411 | .type: GIMPLE_PASS, /* type */ |
1412 | .name: "simduid" , /* name */ |
1413 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1414 | .tv_id: TV_NONE, /* tv_id */ |
1415 | .properties_required: ( PROP_ssa | PROP_cfg ), /* properties_required */ |
1416 | .properties_provided: 0, /* properties_provided */ |
1417 | .properties_destroyed: 0, /* properties_destroyed */ |
1418 | .todo_flags_start: 0, /* todo_flags_start */ |
1419 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1420 | }; |
1421 | |
1422 | class pass_simduid_cleanup : public gimple_opt_pass |
1423 | { |
1424 | public: |
1425 | pass_simduid_cleanup (gcc::context *ctxt) |
1426 | : gimple_opt_pass (pass_data_simduid_cleanup, ctxt) |
1427 | {} |
1428 | |
1429 | /* opt_pass methods: */ |
1430 | opt_pass * clone () final override |
1431 | { |
1432 | return new pass_simduid_cleanup (m_ctxt); |
1433 | } |
1434 | bool gate (function *fun) final override { return fun->has_simduid_loops; } |
1435 | unsigned int execute (function *) final override; |
1436 | |
1437 | }; // class pass_simduid_cleanup |
1438 | |
1439 | unsigned int |
1440 | pass_simduid_cleanup::execute (function *fun) |
1441 | { |
1442 | hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; |
1443 | |
1444 | note_simd_array_uses (htab: &simd_array_to_simduid_htab, fun); |
1445 | |
1446 | /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ |
1447 | adjust_simduid_builtins (NULL, fun); |
1448 | |
1449 | /* Shrink any "omp array simd" temporary arrays to the |
1450 | actual vectorization factors. */ |
1451 | if (simd_array_to_simduid_htab) |
1452 | shrink_simd_arrays (simd_array_to_simduid_htab, NULL); |
1453 | fun->has_simduid_loops = false; |
1454 | return 0; |
1455 | } |
1456 | |
1457 | } // anon namespace |
1458 | |
1459 | gimple_opt_pass * |
1460 | make_pass_simduid_cleanup (gcc::context *ctxt) |
1461 | { |
1462 | return new pass_simduid_cleanup (ctxt); |
1463 | } |
1464 | |
1465 | |
1466 | /* Entry point to basic block SLP phase. */ |
1467 | |
1468 | namespace { |
1469 | |
1470 | const pass_data pass_data_slp_vectorize = |
1471 | { |
1472 | .type: GIMPLE_PASS, /* type */ |
1473 | .name: "slp" , /* name */ |
1474 | .optinfo_flags: OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ |
1475 | .tv_id: TV_TREE_SLP_VECTORIZATION, /* tv_id */ |
1476 | .properties_required: ( PROP_ssa | PROP_cfg ), /* properties_required */ |
1477 | .properties_provided: 0, /* properties_provided */ |
1478 | .properties_destroyed: 0, /* properties_destroyed */ |
1479 | .todo_flags_start: 0, /* todo_flags_start */ |
1480 | TODO_update_ssa, /* todo_flags_finish */ |
1481 | }; |
1482 | |
1483 | class pass_slp_vectorize : public gimple_opt_pass |
1484 | { |
1485 | public: |
1486 | pass_slp_vectorize (gcc::context *ctxt) |
1487 | : gimple_opt_pass (pass_data_slp_vectorize, ctxt) |
1488 | {} |
1489 | |
1490 | /* opt_pass methods: */ |
1491 | opt_pass * clone () final override { return new pass_slp_vectorize (m_ctxt); } |
1492 | bool gate (function *) final override { return flag_tree_slp_vectorize != 0; } |
1493 | unsigned int execute (function *) final override; |
1494 | |
1495 | }; // class pass_slp_vectorize |
1496 | |
1497 | unsigned int |
1498 | pass_slp_vectorize::execute (function *fun) |
1499 | { |
1500 | auto_purge_vect_location sentinel; |
1501 | basic_block bb; |
1502 | |
1503 | bool in_loop_pipeline = scev_initialized_p (); |
1504 | if (!in_loop_pipeline) |
1505 | { |
1506 | loop_optimizer_init (LOOPS_NORMAL); |
1507 | scev_initialize (); |
1508 | } |
1509 | |
1510 | /* Mark all stmts as not belonging to the current region and unvisited. */ |
1511 | FOR_EACH_BB_FN (bb, fun) |
1512 | { |
1513 | for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); |
1514 | gsi_next (i: &gsi)) |
1515 | { |
1516 | gphi *stmt = gsi.phi (); |
1517 | gimple_set_uid (g: stmt, uid: -1); |
1518 | gimple_set_visited (stmt, visited_p: false); |
1519 | } |
1520 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); |
1521 | gsi_next (i: &gsi)) |
1522 | { |
1523 | gimple *stmt = gsi_stmt (i: gsi); |
1524 | gimple_set_uid (g: stmt, uid: -1); |
1525 | gimple_set_visited (stmt, visited_p: false); |
1526 | } |
1527 | } |
1528 | |
1529 | vect_slp_init (); |
1530 | |
1531 | vect_slp_function (fun); |
1532 | |
1533 | vect_slp_fini (); |
1534 | |
1535 | if (!in_loop_pipeline) |
1536 | { |
1537 | scev_finalize (); |
1538 | loop_optimizer_finalize (); |
1539 | } |
1540 | |
1541 | return 0; |
1542 | } |
1543 | |
1544 | } // anon namespace |
1545 | |
1546 | gimple_opt_pass * |
1547 | make_pass_slp_vectorize (gcc::context *ctxt) |
1548 | { |
1549 | return new pass_slp_vectorize (ctxt); |
1550 | } |
1551 | |
1552 | |
1553 | /* Increase alignment of global arrays to improve vectorization potential. |
1554 | TODO: |
1555 | - Consider also structs that have an array field. |
1556 | - Use ipa analysis to prune arrays that can't be vectorized? |
1557 | This should involve global alignment analysis and in the future also |
1558 | array padding. */ |
1559 | |
1560 | static unsigned get_vec_alignment_for_type (tree); |
1561 | static hash_map<tree, unsigned> *type_align_map; |
1562 | |
1563 | /* Return alignment of array's vector type corresponding to scalar type. |
1564 | 0 if no vector type exists. */ |
1565 | static unsigned |
1566 | get_vec_alignment_for_array_type (tree type) |
1567 | { |
1568 | gcc_assert (TREE_CODE (type) == ARRAY_TYPE); |
1569 | poly_uint64 array_size, vector_size; |
1570 | |
1571 | tree scalar_type = strip_array_types (type); |
1572 | tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type); |
1573 | if (!vectype |
1574 | || !poly_int_tree_p (TYPE_SIZE (type), value: &array_size) |
1575 | || !poly_int_tree_p (TYPE_SIZE (vectype), value: &vector_size) |
1576 | || maybe_lt (a: array_size, b: vector_size)) |
1577 | return 0; |
1578 | |
1579 | return TYPE_ALIGN (vectype); |
1580 | } |
1581 | |
1582 | /* Return alignment of field having maximum alignment of vector type |
1583 | corresponding to it's scalar type. For now, we only consider fields whose |
1584 | offset is a multiple of it's vector alignment. |
1585 | 0 if no suitable field is found. */ |
1586 | static unsigned |
1587 | get_vec_alignment_for_record_type (tree type) |
1588 | { |
1589 | gcc_assert (TREE_CODE (type) == RECORD_TYPE); |
1590 | |
1591 | unsigned max_align = 0, alignment; |
1592 | HOST_WIDE_INT offset; |
1593 | tree offset_tree; |
1594 | |
1595 | if (TYPE_PACKED (type)) |
1596 | return 0; |
1597 | |
1598 | unsigned *slot = type_align_map->get (k: type); |
1599 | if (slot) |
1600 | return *slot; |
1601 | |
1602 | for (tree field = first_field (type); |
1603 | field != NULL_TREE; |
1604 | field = DECL_CHAIN (field)) |
1605 | { |
1606 | /* Skip if not FIELD_DECL or if alignment is set by user. */ |
1607 | if (TREE_CODE (field) != FIELD_DECL |
1608 | || DECL_USER_ALIGN (field) |
1609 | || DECL_ARTIFICIAL (field)) |
1610 | continue; |
1611 | |
1612 | /* We don't need to process the type further if offset is variable, |
1613 | since the offsets of remaining members will also be variable. */ |
1614 | if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST |
1615 | || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST) |
1616 | break; |
1617 | |
1618 | /* Similarly stop processing the type if offset_tree |
1619 | does not fit in unsigned HOST_WIDE_INT. */ |
1620 | offset_tree = bit_position (field); |
1621 | if (!tree_fits_uhwi_p (offset_tree)) |
1622 | break; |
1623 | |
1624 | offset = tree_to_uhwi (offset_tree); |
1625 | alignment = get_vec_alignment_for_type (TREE_TYPE (field)); |
1626 | |
1627 | /* Get maximum alignment of vectorized field/array among those members |
1628 | whose offset is multiple of the vector alignment. */ |
1629 | if (alignment |
1630 | && (offset % alignment == 0) |
1631 | && (alignment > max_align)) |
1632 | max_align = alignment; |
1633 | } |
1634 | |
1635 | type_align_map->put (k: type, v: max_align); |
1636 | return max_align; |
1637 | } |
1638 | |
1639 | /* Return alignment of vector type corresponding to decl's scalar type |
1640 | or 0 if it doesn't exist or the vector alignment is lesser than |
1641 | decl's alignment. */ |
1642 | static unsigned |
1643 | get_vec_alignment_for_type (tree type) |
1644 | { |
1645 | if (type == NULL_TREE) |
1646 | return 0; |
1647 | |
1648 | gcc_assert (TYPE_P (type)); |
1649 | |
1650 | static unsigned alignment = 0; |
1651 | switch (TREE_CODE (type)) |
1652 | { |
1653 | case ARRAY_TYPE: |
1654 | alignment = get_vec_alignment_for_array_type (type); |
1655 | break; |
1656 | case RECORD_TYPE: |
1657 | alignment = get_vec_alignment_for_record_type (type); |
1658 | break; |
1659 | default: |
1660 | alignment = 0; |
1661 | break; |
1662 | } |
1663 | |
1664 | return (alignment > TYPE_ALIGN (type)) ? alignment : 0; |
1665 | } |
1666 | |
1667 | /* Entry point to increase_alignment pass. */ |
1668 | static unsigned int |
1669 | increase_alignment (void) |
1670 | { |
1671 | varpool_node *vnode; |
1672 | |
1673 | vect_location = dump_user_location_t (); |
1674 | type_align_map = new hash_map<tree, unsigned>; |
1675 | |
1676 | /* Increase the alignment of all global arrays for vectorization. */ |
1677 | FOR_EACH_DEFINED_VARIABLE (vnode) |
1678 | { |
1679 | tree decl = vnode->decl; |
1680 | unsigned int alignment; |
1681 | |
1682 | if ((decl_in_symtab_p (decl) |
1683 | && !symtab_node::get (decl)->can_increase_alignment_p ()) |
1684 | || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) |
1685 | continue; |
1686 | |
1687 | alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); |
1688 | if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) |
1689 | { |
1690 | vnode->increase_alignment (align: alignment); |
1691 | if (dump_enabled_p ()) |
1692 | dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n" , decl); |
1693 | } |
1694 | } |
1695 | |
1696 | delete type_align_map; |
1697 | return 0; |
1698 | } |
1699 | |
1700 | |
1701 | namespace { |
1702 | |
1703 | const pass_data pass_data_ipa_increase_alignment = |
1704 | { |
1705 | .type: SIMPLE_IPA_PASS, /* type */ |
1706 | .name: "increase_alignment" , /* name */ |
1707 | .optinfo_flags: OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ |
1708 | .tv_id: TV_IPA_OPT, /* tv_id */ |
1709 | .properties_required: 0, /* properties_required */ |
1710 | .properties_provided: 0, /* properties_provided */ |
1711 | .properties_destroyed: 0, /* properties_destroyed */ |
1712 | .todo_flags_start: 0, /* todo_flags_start */ |
1713 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1714 | }; |
1715 | |
1716 | class pass_ipa_increase_alignment : public simple_ipa_opt_pass |
1717 | { |
1718 | public: |
1719 | pass_ipa_increase_alignment (gcc::context *ctxt) |
1720 | : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt) |
1721 | {} |
1722 | |
1723 | /* opt_pass methods: */ |
1724 | bool gate (function *) final override |
1725 | { |
1726 | return flag_section_anchors && flag_tree_loop_vectorize; |
1727 | } |
1728 | |
1729 | unsigned int execute (function *) final override |
1730 | { |
1731 | return increase_alignment (); |
1732 | } |
1733 | |
1734 | }; // class pass_ipa_increase_alignment |
1735 | |
1736 | } // anon namespace |
1737 | |
1738 | simple_ipa_opt_pass * |
1739 | make_pass_ipa_increase_alignment (gcc::context *ctxt) |
1740 | { |
1741 | return new pass_ipa_increase_alignment (ctxt); |
1742 | } |
1743 | |
1744 | /* If the condition represented by T is a comparison or the SSA name |
1745 | result of a comparison, extract the comparison's operands. Represent |
1746 | T as NE_EXPR <T, 0> otherwise. */ |
1747 | |
1748 | void |
1749 | scalar_cond_masked_key::get_cond_ops_from_tree (tree t) |
1750 | { |
1751 | if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison) |
1752 | { |
1753 | this->code = TREE_CODE (t); |
1754 | this->op0 = TREE_OPERAND (t, 0); |
1755 | this->op1 = TREE_OPERAND (t, 1); |
1756 | this->inverted_p = false; |
1757 | return; |
1758 | } |
1759 | |
1760 | if (TREE_CODE (t) == SSA_NAME) |
1761 | if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t))) |
1762 | { |
1763 | tree_code code = gimple_assign_rhs_code (gs: stmt); |
1764 | if (TREE_CODE_CLASS (code) == tcc_comparison) |
1765 | { |
1766 | this->code = code; |
1767 | this->op0 = gimple_assign_rhs1 (gs: stmt); |
1768 | this->op1 = gimple_assign_rhs2 (gs: stmt); |
1769 | this->inverted_p = false; |
1770 | return; |
1771 | } |
1772 | else if (code == BIT_NOT_EXPR) |
1773 | { |
1774 | tree n_op = gimple_assign_rhs1 (gs: stmt); |
1775 | if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op)))) |
1776 | { |
1777 | code = gimple_assign_rhs_code (gs: stmt); |
1778 | if (TREE_CODE_CLASS (code) == tcc_comparison) |
1779 | { |
1780 | this->code = code; |
1781 | this->op0 = gimple_assign_rhs1 (gs: stmt); |
1782 | this->op1 = gimple_assign_rhs2 (gs: stmt); |
1783 | this->inverted_p = true; |
1784 | return; |
1785 | } |
1786 | } |
1787 | } |
1788 | } |
1789 | |
1790 | this->code = NE_EXPR; |
1791 | this->op0 = t; |
1792 | this->op1 = build_zero_cst (TREE_TYPE (t)); |
1793 | this->inverted_p = false; |
1794 | } |
1795 | |
1796 | /* See the comment above the declaration for details. */ |
1797 | |
1798 | unsigned int |
1799 | vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, |
1800 | stmt_vec_info stmt_info, slp_tree, |
1801 | tree vectype, int misalign, |
1802 | vect_cost_model_location where) |
1803 | { |
1804 | unsigned int cost |
1805 | = builtin_vectorization_cost (type_of_cost: kind, vectype, misalign) * count; |
1806 | return record_stmt_cost (stmt_info, where, cost); |
1807 | } |
1808 | |
1809 | /* See the comment above the declaration for details. */ |
1810 | |
1811 | void |
1812 | vector_costs::finish_cost (const vector_costs *) |
1813 | { |
1814 | gcc_assert (!m_finished); |
1815 | m_finished = true; |
1816 | } |
1817 | |
1818 | /* Record a base cost of COST units against WHERE. If STMT_INFO is |
1819 | nonnull, use it to adjust the cost based on execution frequency |
1820 | (where appropriate). */ |
1821 | |
1822 | unsigned int |
1823 | vector_costs::record_stmt_cost (stmt_vec_info stmt_info, |
1824 | vect_cost_model_location where, |
1825 | unsigned int cost) |
1826 | { |
1827 | cost = adjust_cost_for_freq (stmt_info, where, cost); |
1828 | m_costs[where] += cost; |
1829 | return cost; |
1830 | } |
1831 | |
1832 | /* COST is the base cost we have calculated for an operation in location WHERE. |
1833 | If STMT_INFO is nonnull, use it to adjust the cost based on execution |
1834 | frequency (where appropriate). Return the adjusted cost. */ |
1835 | |
1836 | unsigned int |
1837 | vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info, |
1838 | vect_cost_model_location where, |
1839 | unsigned int cost) |
1840 | { |
1841 | /* Statements in an inner loop relative to the loop being |
1842 | vectorized are weighted more heavily. The value here is |
1843 | arbitrary and could potentially be improved with analysis. */ |
1844 | if (where == vect_body |
1845 | && stmt_info |
1846 | && stmt_in_inner_loop_p (m_vinfo, stmt_info)) |
1847 | { |
1848 | loop_vec_info loop_vinfo = as_a<loop_vec_info> (p: m_vinfo); |
1849 | cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); |
1850 | } |
1851 | return cost; |
1852 | } |
1853 | |
1854 | /* See the comment above the declaration for details. */ |
1855 | |
1856 | bool |
1857 | vector_costs::better_main_loop_than_p (const vector_costs *other) const |
1858 | { |
1859 | int diff = compare_inside_loop_cost (other); |
1860 | if (diff != 0) |
1861 | return diff < 0; |
1862 | |
1863 | /* If there's nothing to choose between the loop bodies, see whether |
1864 | there's a difference in the prologue and epilogue costs. */ |
1865 | diff = compare_outside_loop_cost (other); |
1866 | if (diff != 0) |
1867 | return diff < 0; |
1868 | |
1869 | return false; |
1870 | } |
1871 | |
1872 | |
1873 | /* See the comment above the declaration for details. */ |
1874 | |
1875 | bool |
1876 | vector_costs::better_epilogue_loop_than_p (const vector_costs *other, |
1877 | loop_vec_info main_loop) const |
1878 | { |
1879 | loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (p: this->m_vinfo); |
1880 | loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (p: other->m_vinfo); |
1881 | |
1882 | poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo); |
1883 | poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo); |
1884 | |
1885 | poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop); |
1886 | unsigned HOST_WIDE_INT main_vf; |
1887 | unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost; |
1888 | /* If we can determine how many iterations are left for the epilogue |
1889 | loop, that is if both the main loop's vectorization factor and number |
1890 | of iterations are constant, then we use them to calculate the cost of |
1891 | the epilogue loop together with a 'likely value' for the epilogues |
1892 | vectorization factor. Otherwise we use the main loop's vectorization |
1893 | factor and the maximum poly value for the epilogue's. If the target |
1894 | has not provided with a sensible upper bound poly vectorization |
1895 | factors are likely to be favored over constant ones. */ |
1896 | if (main_poly_vf.is_constant (const_value: &main_vf) |
1897 | && LOOP_VINFO_NITERS_KNOWN_P (main_loop)) |
1898 | { |
1899 | unsigned HOST_WIDE_INT niters |
1900 | = LOOP_VINFO_INT_NITERS (main_loop) % main_vf; |
1901 | HOST_WIDE_INT other_likely_vf |
1902 | = estimated_poly_value (x: other_vf, kind: POLY_VALUE_LIKELY); |
1903 | HOST_WIDE_INT this_likely_vf |
1904 | = estimated_poly_value (x: this_vf, kind: POLY_VALUE_LIKELY); |
1905 | |
1906 | /* If the epilogue is using partial vectors we account for the |
1907 | partial iteration here too. */ |
1908 | other_factor = niters / other_likely_vf; |
1909 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo) |
1910 | && niters % other_likely_vf != 0) |
1911 | other_factor++; |
1912 | |
1913 | this_factor = niters / this_likely_vf; |
1914 | if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo) |
1915 | && niters % this_likely_vf != 0) |
1916 | this_factor++; |
1917 | } |
1918 | else |
1919 | { |
1920 | unsigned HOST_WIDE_INT main_vf_max |
1921 | = estimated_poly_value (x: main_poly_vf, kind: POLY_VALUE_MAX); |
1922 | unsigned HOST_WIDE_INT other_vf_max |
1923 | = estimated_poly_value (x: other_vf, kind: POLY_VALUE_MAX); |
1924 | unsigned HOST_WIDE_INT this_vf_max |
1925 | = estimated_poly_value (x: this_vf, kind: POLY_VALUE_MAX); |
1926 | |
1927 | other_factor = CEIL (main_vf_max, other_vf_max); |
1928 | this_factor = CEIL (main_vf_max, this_vf_max); |
1929 | |
1930 | /* If the loop is not using partial vectors then it will iterate one |
1931 | time less than one that does. It is safe to subtract one here, |
1932 | because the main loop's vf is always at least 2x bigger than that |
1933 | of an epilogue. */ |
1934 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)) |
1935 | other_factor -= 1; |
1936 | if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)) |
1937 | this_factor -= 1; |
1938 | } |
1939 | |
1940 | /* Compute the costs by multiplying the inside costs with the factor and |
1941 | add the outside costs for a more complete picture. The factor is the |
1942 | amount of times we are expecting to iterate this epilogue. */ |
1943 | other_cost = other->body_cost () * other_factor; |
1944 | this_cost = this->body_cost () * this_factor; |
1945 | other_cost += other->outside_cost (); |
1946 | this_cost += this->outside_cost (); |
1947 | return this_cost < other_cost; |
1948 | } |
1949 | |
1950 | /* A <=>-style subroutine of better_main_loop_than_p. Check whether we can |
1951 | determine the return value of better_main_loop_than_p by comparing the |
1952 | inside (loop body) costs of THIS and OTHER. Return: |
1953 | |
1954 | * -1 if better_main_loop_than_p should return true. |
1955 | * 1 if better_main_loop_than_p should return false. |
1956 | * 0 if we can't decide. */ |
1957 | |
1958 | int |
1959 | vector_costs::compare_inside_loop_cost (const vector_costs *other) const |
1960 | { |
1961 | loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (p: this->m_vinfo); |
1962 | loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (p: other->m_vinfo); |
1963 | |
1964 | struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo); |
1965 | gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop); |
1966 | |
1967 | poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo); |
1968 | poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo); |
1969 | |
1970 | /* Limit the VFs to what is likely to be the maximum number of iterations, |
1971 | to handle cases in which at least one loop_vinfo is fully-masked. */ |
1972 | HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop); |
1973 | if (estimated_max_niter != -1) |
1974 | { |
1975 | if (estimated_poly_value (x: this_vf, kind: POLY_VALUE_MIN) |
1976 | >= estimated_max_niter) |
1977 | this_vf = estimated_max_niter; |
1978 | if (estimated_poly_value (x: other_vf, kind: POLY_VALUE_MIN) |
1979 | >= estimated_max_niter) |
1980 | other_vf = estimated_max_niter; |
1981 | } |
1982 | |
1983 | /* Check whether the (fractional) cost per scalar iteration is lower or |
1984 | higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf. */ |
1985 | poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf; |
1986 | poly_int64 rel_other |
1987 | = other_loop_vinfo->vector_costs->body_cost () * this_vf; |
1988 | |
1989 | HOST_WIDE_INT est_rel_this_min |
1990 | = estimated_poly_value (x: rel_this, kind: POLY_VALUE_MIN); |
1991 | HOST_WIDE_INT est_rel_this_max |
1992 | = estimated_poly_value (x: rel_this, kind: POLY_VALUE_MAX); |
1993 | |
1994 | HOST_WIDE_INT est_rel_other_min |
1995 | = estimated_poly_value (x: rel_other, kind: POLY_VALUE_MIN); |
1996 | HOST_WIDE_INT est_rel_other_max |
1997 | = estimated_poly_value (x: rel_other, kind: POLY_VALUE_MAX); |
1998 | |
1999 | /* Check first if we can make out an unambigous total order from the minimum |
2000 | and maximum estimates. */ |
2001 | if (est_rel_this_min < est_rel_other_min |
2002 | && est_rel_this_max < est_rel_other_max) |
2003 | return -1; |
2004 | |
2005 | if (est_rel_other_min < est_rel_this_min |
2006 | && est_rel_other_max < est_rel_this_max) |
2007 | return 1; |
2008 | |
2009 | /* When other_loop_vinfo uses a variable vectorization factor, |
2010 | we know that it has a lower cost for at least one runtime VF. |
2011 | However, we don't know how likely that VF is. |
2012 | |
2013 | One option would be to compare the costs for the estimated VFs. |
2014 | The problem is that that can put too much pressure on the cost |
2015 | model. E.g. if the estimated VF is also the lowest possible VF, |
2016 | and if other_loop_vinfo is 1 unit worse than this_loop_vinfo |
2017 | for the estimated VF, we'd then choose this_loop_vinfo even |
2018 | though (a) this_loop_vinfo might not actually be better than |
2019 | other_loop_vinfo for that VF and (b) it would be significantly |
2020 | worse at larger VFs. |
2021 | |
2022 | Here we go for a hacky compromise: pick this_loop_vinfo if it is |
2023 | no more expensive than other_loop_vinfo even after doubling the |
2024 | estimated other_loop_vinfo VF. For all but trivial loops, this |
2025 | ensures that we only pick this_loop_vinfo if it is significantly |
2026 | better than other_loop_vinfo at the estimated VF. */ |
2027 | if (est_rel_other_min != est_rel_this_min |
2028 | || est_rel_other_max != est_rel_this_max) |
2029 | { |
2030 | HOST_WIDE_INT est_rel_this_likely |
2031 | = estimated_poly_value (x: rel_this, kind: POLY_VALUE_LIKELY); |
2032 | HOST_WIDE_INT est_rel_other_likely |
2033 | = estimated_poly_value (x: rel_other, kind: POLY_VALUE_LIKELY); |
2034 | |
2035 | return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1; |
2036 | } |
2037 | |
2038 | return 0; |
2039 | } |
2040 | |
2041 | /* A <=>-style subroutine of better_main_loop_than_p, used when there is |
2042 | nothing to choose between the inside (loop body) costs of THIS and OTHER. |
2043 | Check whether we can determine the return value of better_main_loop_than_p |
2044 | by comparing the outside (prologue and epilogue) costs of THIS and OTHER. |
2045 | Return: |
2046 | |
2047 | * -1 if better_main_loop_than_p should return true. |
2048 | * 1 if better_main_loop_than_p should return false. |
2049 | * 0 if we can't decide. */ |
2050 | |
2051 | int |
2052 | vector_costs::compare_outside_loop_cost (const vector_costs *other) const |
2053 | { |
2054 | auto this_outside_cost = this->outside_cost (); |
2055 | auto other_outside_cost = other->outside_cost (); |
2056 | if (this_outside_cost != other_outside_cost) |
2057 | return this_outside_cost < other_outside_cost ? -1 : 1; |
2058 | |
2059 | return 0; |
2060 | } |
2061 | |