1/* Vectorizer
2 Copyright (C) 2003-2025 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* Loop and basic block vectorizer.
22
23 This file contains drivers for the three vectorizers:
24 (1) loop vectorizer (inter-iteration parallelism),
25 (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26 vectorizer)
27 (3) BB vectorizer (out-of-loops), aka SLP
28
29 The rest of the vectorizer's code is organized as follows:
30 - tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
31 used by drivers (1) and (2).
32 - tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
33 drivers (1) and (2).
34 - tree-vect-slp.cc - BB vectorization specific analysis and transformation,
35 used by drivers (2) and (3).
36 - tree-vect-stmts.cc - statements analysis and transformation (used by all).
37 - tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
38 manipulations (used by all).
39 - tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
40
41 Here's a poor attempt at illustrating that:
42
43 tree-vectorizer.cc:
44 loop_vect() loop_aware_slp() slp_vect()
45 | / \ /
46 | / \ /
47 tree-vect-loop.cc tree-vect-slp.cc
48 | \ \ / / |
49 | \ \/ / |
50 | \ /\ / |
51 | \ / \ / |
52 tree-vect-stmts.cc tree-vect-data-refs.cc
53 \ /
54 tree-vect-patterns.cc
55*/
56
57#include "config.h"
58#include "system.h"
59#include "coretypes.h"
60#include "backend.h"
61#include "tree.h"
62#include "gimple.h"
63#include "predict.h"
64#include "tree-pass.h"
65#include "ssa.h"
66#include "cgraph.h"
67#include "fold-const.h"
68#include "stor-layout.h"
69#include "gimple-iterator.h"
70#include "gimple-walk.h"
71#include "tree-ssa-loop-manip.h"
72#include "tree-ssa-loop-niter.h"
73#include "tree-cfg.h"
74#include "cfgloop.h"
75#include "tree-vectorizer.h"
76#include "tree-ssa-propagate.h"
77#include "dbgcnt.h"
78#include "tree-scalar-evolution.h"
79#include "stringpool.h"
80#include "attribs.h"
81#include "gimple-pretty-print.h"
82#include "opt-problem.h"
83#include "internal-fn.h"
84#include "tree-ssa-sccvn.h"
85#include "tree-into-ssa.h"
86
87/* Loop or bb location, with hotness information. */
88dump_user_location_t vect_location;
89
90/* auto_purge_vect_location's dtor: reset the vect_location
91 global, to avoid stale location_t values that could reference
92 GC-ed blocks. */
93
94auto_purge_vect_location::~auto_purge_vect_location ()
95{
96 vect_location = dump_user_location_t ();
97}
98
99/* Dump a cost entry according to args to F. */
100
101void
102dump_stmt_cost (FILE *f, int count, enum vect_cost_for_stmt kind,
103 stmt_vec_info stmt_info, slp_tree node, tree,
104 int misalign, unsigned cost,
105 enum vect_cost_model_location where)
106{
107 if (stmt_info)
108 {
109 print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM);
110 fprintf (stream: f, format: " ");
111 }
112 else if (node)
113 fprintf (stream: f, format: "node %p ", (void *)node);
114 else
115 fprintf (stream: f, format: "<unknown> ");
116 fprintf (stream: f, format: "%d times ", count);
117 const char *ks = "unknown";
118 switch (kind)
119 {
120 case scalar_stmt:
121 ks = "scalar_stmt";
122 break;
123 case scalar_load:
124 ks = "scalar_load";
125 break;
126 case scalar_store:
127 ks = "scalar_store";
128 break;
129 case vector_stmt:
130 ks = "vector_stmt";
131 break;
132 case vector_load:
133 ks = "vector_load";
134 break;
135 case vector_gather_load:
136 ks = "vector_gather_load";
137 break;
138 case unaligned_load:
139 ks = "unaligned_load";
140 break;
141 case unaligned_store:
142 ks = "unaligned_store";
143 break;
144 case vector_store:
145 ks = "vector_store";
146 break;
147 case vector_scatter_store:
148 ks = "vector_scatter_store";
149 break;
150 case vec_to_scalar:
151 ks = "vec_to_scalar";
152 break;
153 case scalar_to_vec:
154 ks = "scalar_to_vec";
155 break;
156 case cond_branch_not_taken:
157 ks = "cond_branch_not_taken";
158 break;
159 case cond_branch_taken:
160 ks = "cond_branch_taken";
161 break;
162 case vec_perm:
163 ks = "vec_perm";
164 break;
165 case vec_promote_demote:
166 ks = "vec_promote_demote";
167 break;
168 case vec_construct:
169 ks = "vec_construct";
170 break;
171 }
172 fprintf (stream: f, format: "%s ", ks);
173 if (kind == unaligned_load || kind == unaligned_store)
174 fprintf (stream: f, format: "(misalign %d) ", misalign);
175 fprintf (stream: f, format: "costs %u ", cost);
176 const char *ws = "unknown";
177 switch (where)
178 {
179 case vect_prologue:
180 ws = "prologue";
181 break;
182 case vect_body:
183 ws = "body";
184 break;
185 case vect_epilogue:
186 ws = "epilogue";
187 break;
188 }
189 fprintf (stream: f, format: "in %s\n", ws);
190}
191
192/* For mapping simduid to vectorization factor. */
193
194class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
195{
196public:
197 unsigned int simduid;
198 poly_uint64 vf;
199
200 /* hash_table support. */
201 static inline hashval_t hash (const simduid_to_vf *);
202 static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
203};
204
205inline hashval_t
206simduid_to_vf::hash (const simduid_to_vf *p)
207{
208 return p->simduid;
209}
210
211inline int
212simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
213{
214 return p1->simduid == p2->simduid;
215}
216
217/* This hash maps the OMP simd array to the corresponding simduid used
218 to index into it. Like thus,
219
220 _7 = GOMP_SIMD_LANE (simduid.0)
221 ...
222 ...
223 D.1737[_7] = stuff;
224
225
226 This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
227 simduid.0. */
228
229struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
230{
231 tree decl;
232 unsigned int simduid;
233
234 /* hash_table support. */
235 static inline hashval_t hash (const simd_array_to_simduid *);
236 static inline int equal (const simd_array_to_simduid *,
237 const simd_array_to_simduid *);
238};
239
240inline hashval_t
241simd_array_to_simduid::hash (const simd_array_to_simduid *p)
242{
243 return DECL_UID (p->decl);
244}
245
246inline int
247simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
248 const simd_array_to_simduid *p2)
249{
250 return p1->decl == p2->decl;
251}
252
253/* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
254 into their corresponding constants and remove
255 IFN_GOMP_SIMD_ORDERED_{START,END}. */
256
257static void
258adjust_simduid_builtins (hash_table<simduid_to_vf> *htab, function *fun)
259{
260 basic_block bb;
261
262 FOR_EACH_BB_FN (bb, fun)
263 {
264 gimple_stmt_iterator i;
265
266 for (i = gsi_start_bb (bb); !gsi_end_p (i); )
267 {
268 poly_uint64 vf = 1;
269 enum internal_fn ifn;
270 gimple *stmt = gsi_stmt (i);
271 tree t;
272 if (!is_gimple_call (gs: stmt)
273 || !gimple_call_internal_p (gs: stmt))
274 {
275 gsi_next (i: &i);
276 continue;
277 }
278 ifn = gimple_call_internal_fn (gs: stmt);
279 switch (ifn)
280 {
281 case IFN_GOMP_SIMD_LANE:
282 case IFN_GOMP_SIMD_VF:
283 case IFN_GOMP_SIMD_LAST_LANE:
284 break;
285 case IFN_GOMP_SIMD_ORDERED_START:
286 case IFN_GOMP_SIMD_ORDERED_END:
287 if (integer_onep (gimple_call_arg (gs: stmt, index: 0)))
288 {
289 enum built_in_function bcode
290 = (ifn == IFN_GOMP_SIMD_ORDERED_START
291 ? BUILT_IN_GOMP_ORDERED_START
292 : BUILT_IN_GOMP_ORDERED_END);
293 gimple *g
294 = gimple_build_call (builtin_decl_explicit (fncode: bcode), 0);
295 gimple_move_vops (g, stmt);
296 gsi_replace (&i, g, true);
297 continue;
298 }
299 gsi_remove (&i, true);
300 unlink_stmt_vdef (stmt);
301 continue;
302 default:
303 gsi_next (i: &i);
304 continue;
305 }
306 tree arg = gimple_call_arg (gs: stmt, index: 0);
307 gcc_assert (arg != NULL_TREE);
308 gcc_assert (TREE_CODE (arg) == SSA_NAME);
309 simduid_to_vf *p = NULL, data;
310 data.simduid = DECL_UID (SSA_NAME_VAR (arg));
311 /* Need to nullify loop safelen field since it's value is not
312 valid after transformation. */
313 if (bb->loop_father && bb->loop_father->safelen > 0)
314 bb->loop_father->safelen = 0;
315 if (htab)
316 {
317 p = htab->find (value: &data);
318 if (p)
319 vf = p->vf;
320 }
321 switch (ifn)
322 {
323 case IFN_GOMP_SIMD_VF:
324 t = build_int_cst (unsigned_type_node, vf);
325 break;
326 case IFN_GOMP_SIMD_LANE:
327 t = build_int_cst (unsigned_type_node, 0);
328 break;
329 case IFN_GOMP_SIMD_LAST_LANE:
330 t = gimple_call_arg (gs: stmt, index: 1);
331 break;
332 default:
333 gcc_unreachable ();
334 }
335 tree lhs = gimple_call_lhs (gs: stmt);
336 if (lhs)
337 replace_uses_by (lhs, t);
338 release_defs (stmt);
339 gsi_remove (&i, true);
340 }
341 }
342}
343
344/* Helper structure for note_simd_array_uses. */
345
346struct note_simd_array_uses_struct
347{
348 hash_table<simd_array_to_simduid> **htab;
349 unsigned int simduid;
350};
351
352/* Callback for note_simd_array_uses, called through walk_gimple_op. */
353
354static tree
355note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
356{
357 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
358 struct note_simd_array_uses_struct *ns
359 = (struct note_simd_array_uses_struct *) wi->info;
360
361 if (TYPE_P (*tp))
362 *walk_subtrees = 0;
363 else if (VAR_P (*tp)
364 && lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (*tp))
365 && DECL_CONTEXT (*tp) == current_function_decl)
366 {
367 simd_array_to_simduid data;
368 if (!*ns->htab)
369 *ns->htab = new hash_table<simd_array_to_simduid> (15);
370 data.decl = *tp;
371 data.simduid = ns->simduid;
372 simd_array_to_simduid **slot = (*ns->htab)->find_slot (value: &data, insert: INSERT);
373 if (*slot == NULL)
374 {
375 simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
376 *p = data;
377 *slot = p;
378 }
379 else if ((*slot)->simduid != ns->simduid)
380 (*slot)->simduid = -1U;
381 *walk_subtrees = 0;
382 }
383 return NULL_TREE;
384}
385
386/* Find "omp simd array" temporaries and map them to corresponding
387 simduid. */
388
389static void
390note_simd_array_uses (hash_table<simd_array_to_simduid> **htab, function *fun)
391{
392 basic_block bb;
393 gimple_stmt_iterator gsi;
394 struct walk_stmt_info wi;
395 struct note_simd_array_uses_struct ns;
396
397 memset (s: &wi, c: 0, n: sizeof (wi));
398 wi.info = &ns;
399 ns.htab = htab;
400
401 FOR_EACH_BB_FN (bb, fun)
402 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
403 {
404 gimple *stmt = gsi_stmt (i: gsi);
405 if (!is_gimple_call (gs: stmt) || !gimple_call_internal_p (gs: stmt))
406 continue;
407 switch (gimple_call_internal_fn (gs: stmt))
408 {
409 case IFN_GOMP_SIMD_LANE:
410 case IFN_GOMP_SIMD_VF:
411 case IFN_GOMP_SIMD_LAST_LANE:
412 break;
413 default:
414 continue;
415 }
416 tree lhs = gimple_call_lhs (gs: stmt);
417 if (lhs == NULL_TREE)
418 continue;
419 imm_use_iterator use_iter;
420 gimple *use_stmt;
421 ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
422 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
423 if (!is_gimple_debug (gs: use_stmt))
424 walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
425 }
426}
427
428/* Shrink arrays with "omp simd array" attribute to the corresponding
429 vectorization factor. */
430
431static void
432shrink_simd_arrays
433 (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
434 hash_table<simduid_to_vf> *simduid_to_vf_htab)
435{
436 for (hash_table<simd_array_to_simduid>::iterator iter
437 = simd_array_to_simduid_htab->begin ();
438 iter != simd_array_to_simduid_htab->end (); ++iter)
439 if ((*iter)->simduid != -1U)
440 {
441 tree decl = (*iter)->decl;
442 poly_uint64 vf = 1;
443 if (simduid_to_vf_htab)
444 {
445 simduid_to_vf *p = NULL, data;
446 data.simduid = (*iter)->simduid;
447 p = simduid_to_vf_htab->find (value: &data);
448 if (p)
449 vf = p->vf;
450 }
451 tree atype
452 = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
453 TREE_TYPE (decl) = atype;
454 relayout_decl (decl);
455 }
456
457 delete simd_array_to_simduid_htab;
458}
459
460/* Initialize the vec_info with kind KIND_IN and target cost data
461 TARGET_COST_DATA_IN. */
462
463vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
464 : kind (kind_in),
465 shared (shared_),
466 stmt_vec_info_ro (false),
467 bbs (NULL),
468 nbbs (0),
469 inv_pattern_def_seq (NULL)
470{
471 stmt_vec_infos.create (nelems: 50);
472}
473
474vec_info::~vec_info ()
475{
476 for (slp_instance &instance : slp_instances)
477 vect_free_slp_instance (instance);
478
479 free_stmt_vec_infos ();
480}
481
482vec_info_shared::vec_info_shared ()
483 : datarefs (vNULL),
484 datarefs_copy (vNULL),
485 ddrs (vNULL)
486{
487}
488
489vec_info_shared::~vec_info_shared ()
490{
491 free_data_refs (datarefs);
492 free_dependence_relations (ddrs);
493 datarefs_copy.release ();
494}
495
496void
497vec_info_shared::save_datarefs ()
498{
499 if (!flag_checking)
500 return;
501 datarefs_copy.reserve_exact (nelems: datarefs.length ());
502 for (unsigned i = 0; i < datarefs.length (); ++i)
503 datarefs_copy.quick_push (obj: *datarefs[i]);
504}
505
506void
507vec_info_shared::check_datarefs ()
508{
509 if (!flag_checking)
510 return;
511 gcc_assert (datarefs.length () == datarefs_copy.length ());
512 for (unsigned i = 0; i < datarefs.length (); ++i)
513 if (memcmp (s1: &datarefs_copy[i], s2: datarefs[i],
514 offsetof (data_reference, alt_indices)) != 0)
515 gcc_unreachable ();
516}
517
518/* Record that STMT belongs to the vectorizable region. Create and return
519 an associated stmt_vec_info. */
520
521stmt_vec_info
522vec_info::add_stmt (gimple *stmt)
523{
524 stmt_vec_info res = new_stmt_vec_info (stmt);
525 set_vinfo_for_stmt (stmt, res);
526 return res;
527}
528
529/* Record that STMT belongs to the vectorizable region. Create a new
530 stmt_vec_info and mark VECINFO as being related and return the new
531 stmt_vec_info. */
532
533stmt_vec_info
534vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
535{
536 stmt_vec_info res = new_stmt_vec_info (stmt);
537 res->pattern_stmt_p = true;
538 set_vinfo_for_stmt (stmt, res, false);
539 STMT_VINFO_RELATED_STMT (res) = stmt_info;
540 return res;
541}
542
543/* If STMT was previously associated with a stmt_vec_info and STMT now resides
544 at a different address than before (e.g., because STMT is a phi node that has
545 been resized), update the stored address to match the new one. It is not
546 possible to use lookup_stmt () to perform this task, because that function
547 returns NULL if the stored stmt pointer does not match the one being looked
548 up. */
549
550stmt_vec_info
551vec_info::resync_stmt_addr (gimple *stmt)
552{
553 unsigned int uid = gimple_uid (g: stmt);
554 if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
555 {
556 stmt_vec_info res = stmt_vec_infos[uid - 1];
557 if (res && res->stmt)
558 {
559 res->stmt = stmt;
560 return res;
561 }
562 }
563 return nullptr;
564}
565
566/* If STMT has an associated stmt_vec_info, return that vec_info, otherwise
567 return null. It is safe to call this function on any statement, even if
568 it might not be part of the vectorizable region. */
569
570stmt_vec_info
571vec_info::lookup_stmt (gimple *stmt)
572{
573 unsigned int uid = gimple_uid (g: stmt);
574 if (uid > 0 && uid - 1 < stmt_vec_infos.length ())
575 {
576 stmt_vec_info res = stmt_vec_infos[uid - 1];
577 if (res && res->stmt == stmt)
578 return res;
579 }
580 return NULL;
581}
582
583/* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,
584 return that stmt_vec_info, otherwise return null. It is safe to call
585 this on arbitrary operands. */
586
587stmt_vec_info
588vec_info::lookup_def (tree name)
589{
590 if (TREE_CODE (name) == SSA_NAME
591 && !SSA_NAME_IS_DEFAULT_DEF (name))
592 return lookup_stmt (SSA_NAME_DEF_STMT (name));
593 return NULL;
594}
595
596/* See whether there is a single non-debug statement that uses LHS and
597 whether that statement has an associated stmt_vec_info. Return the
598 stmt_vec_info if so, otherwise return null. */
599
600stmt_vec_info
601vec_info::lookup_single_use (tree lhs)
602{
603 use_operand_p dummy;
604 gimple *use_stmt;
605 if (single_imm_use (var: lhs, use_p: &dummy, stmt: &use_stmt))
606 return lookup_stmt (stmt: use_stmt);
607 return NULL;
608}
609
610/* Return vectorization information about DR. */
611
612dr_vec_info *
613vec_info::lookup_dr (data_reference *dr)
614{
615 stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
616 /* DR_STMT should never refer to a stmt in a pattern replacement. */
617 gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
618 return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
619}
620
621/* Record that NEW_STMT_INFO now implements the same data reference
622 as OLD_STMT_INFO. */
623
624void
625vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
626{
627 gcc_assert (!is_pattern_stmt_p (old_stmt_info));
628 STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
629 new_stmt_info->dr_aux = old_stmt_info->dr_aux;
630 STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
631 = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
632 STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
633 = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
634 STMT_VINFO_STRIDED_P (new_stmt_info)
635 = STMT_VINFO_STRIDED_P (old_stmt_info);
636 STMT_VINFO_SIMD_LANE_ACCESS_P (new_stmt_info)
637 = STMT_VINFO_SIMD_LANE_ACCESS_P (old_stmt_info);
638}
639
640/* Permanently remove the statement described by STMT_INFO from the
641 function. */
642
643void
644vec_info::remove_stmt (stmt_vec_info stmt_info)
645{
646 gcc_assert (!stmt_info->pattern_stmt_p);
647 set_vinfo_for_stmt (stmt_info->stmt, NULL);
648 unlink_stmt_vdef (stmt_info->stmt);
649 gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
650 gsi_remove (&si, true);
651 release_defs (stmt_info->stmt);
652 free_stmt_vec_info (stmt_info);
653}
654
655/* Replace the statement at GSI by NEW_STMT, both the vectorization
656 information and the function itself. STMT_INFO describes the statement
657 at GSI. */
658
659void
660vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
661 gimple *new_stmt)
662{
663 gimple *old_stmt = stmt_info->stmt;
664 gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
665 gimple_set_uid (g: new_stmt, uid: gimple_uid (g: old_stmt));
666 stmt_info->stmt = new_stmt;
667 gsi_replace (gsi, new_stmt, true);
668}
669
670/* Insert stmts in SEQ on the VEC_INFO region entry. If CONTEXT is
671 not NULL it specifies whether to use the sub-region entry
672 determined by it, currently used for loop vectorization to insert
673 on the inner loop entry vs. the outer loop entry. */
674
675void
676vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
677{
678 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: this))
679 {
680 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
681 basic_block new_bb;
682 edge pe;
683
684 if (context && nested_in_vect_loop_p (loop, stmt_info: context))
685 loop = loop->inner;
686
687 pe = loop_preheader_edge (loop);
688 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
689 gcc_assert (!new_bb);
690 }
691 else
692 {
693 gimple_stmt_iterator gsi_region_begin
694 = gsi_after_labels (bb: bbs[0]);
695 gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
696 }
697}
698
699/* Like insert_seq_on_entry but just inserts the single stmt NEW_STMT. */
700
701void
702vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
703{
704 gimple_seq seq = NULL;
705 gimple_stmt_iterator gsi = gsi_start (seq);
706 gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
707 insert_seq_on_entry (context, seq);
708}
709
710/* Create and initialize a new stmt_vec_info struct for STMT. */
711
712stmt_vec_info
713vec_info::new_stmt_vec_info (gimple *stmt)
714{
715 stmt_vec_info res = XCNEW (class _stmt_vec_info);
716 res->stmt = stmt;
717
718 STMT_VINFO_TYPE (res) = undef_vec_info_type;
719 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
720 STMT_VINFO_VECTORIZABLE (res) = true;
721 STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
722 STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
723 STMT_VINFO_REDUC_FN (res) = IFN_LAST;
724 STMT_VINFO_REDUC_IDX (res) = -1;
725 STMT_VINFO_SLP_VECT_ONLY (res) = false;
726 STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
727 STMT_VINFO_VEC_STMTS (res) = vNULL;
728 res->reduc_initial_values = vNULL;
729 res->reduc_scalar_results = vNULL;
730
731 if (is_a <loop_vec_info> (p: this)
732 && gimple_code (g: stmt) == GIMPLE_PHI
733 && is_loop_header_bb_p (bb: gimple_bb (g: stmt)))
734 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
735 else
736 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
737
738 STMT_SLP_TYPE (res) = loop_vect;
739
740 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
741 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
742
743 return res;
744}
745
746/* Associate STMT with INFO. */
747
748void
749vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info, bool check_ro)
750{
751 unsigned int uid = gimple_uid (g: stmt);
752 if (uid == 0)
753 {
754 gcc_assert (!check_ro || !stmt_vec_info_ro);
755 gcc_checking_assert (info);
756 uid = stmt_vec_infos.length () + 1;
757 gimple_set_uid (g: stmt, uid);
758 stmt_vec_infos.safe_push (obj: info);
759 }
760 else
761 {
762 gcc_checking_assert (info == NULL);
763 stmt_vec_infos[uid - 1] = info;
764 }
765}
766
767/* Free the contents of stmt_vec_infos. */
768
769void
770vec_info::free_stmt_vec_infos (void)
771{
772 for (stmt_vec_info &info : stmt_vec_infos)
773 if (info != NULL)
774 free_stmt_vec_info (info);
775 stmt_vec_infos.release ();
776}
777
778/* Free STMT_INFO. */
779
780void
781vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
782{
783 if (stmt_info->pattern_stmt_p)
784 {
785 gimple_set_bb (stmt_info->stmt, NULL);
786 tree lhs = gimple_get_lhs (stmt_info->stmt);
787 if (lhs && TREE_CODE (lhs) == SSA_NAME)
788 release_ssa_name (name: lhs);
789 }
790
791 stmt_info->reduc_initial_values.release ();
792 stmt_info->reduc_scalar_results.release ();
793 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
794 STMT_VINFO_VEC_STMTS (stmt_info).release ();
795 free (ptr: stmt_info);
796}
797
798/* Returns true if S1 dominates S2. */
799
800bool
801vect_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
802{
803 basic_block bb1 = gimple_bb (g: s1), bb2 = gimple_bb (g: s2);
804
805 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
806 SSA_NAME. Assume it lives at the beginning of function and
807 thus dominates everything. */
808 if (!bb1 || s1 == s2)
809 return true;
810
811 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
812 if (!bb2)
813 return false;
814
815 if (bb1 != bb2)
816 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
817
818 /* PHIs in the same basic block are assumed to be
819 executed all in parallel, if only one stmt is a PHI,
820 it dominates the other stmt in the same basic block. */
821 if (gimple_code (g: s1) == GIMPLE_PHI)
822 return true;
823
824 if (gimple_code (g: s2) == GIMPLE_PHI)
825 return false;
826
827 /* Inserted vectorized stmts all have UID 0 while the original stmts
828 in the IL have UID increasing within a BB. Walk from both sides
829 until we find the other stmt or a stmt with UID != 0. */
830 gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
831 while (gimple_uid (g: gsi_stmt (i: gsi1)) == 0)
832 {
833 gsi_next (i: &gsi1);
834 if (gsi_end_p (i: gsi1))
835 return false;
836 if (gsi_stmt (i: gsi1) == s2)
837 return true;
838 }
839 if (gimple_uid (g: gsi_stmt (i: gsi1)) == -1u)
840 return false;
841
842 gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
843 while (gimple_uid (g: gsi_stmt (i: gsi2)) == 0)
844 {
845 gsi_prev (i: &gsi2);
846 if (gsi_end_p (i: gsi2))
847 return false;
848 if (gsi_stmt (i: gsi2) == s1)
849 return true;
850 }
851 if (gimple_uid (g: gsi_stmt (i: gsi2)) == -1u)
852 return false;
853
854 if (gimple_uid (g: gsi_stmt (i: gsi1)) <= gimple_uid (g: gsi_stmt (i: gsi2)))
855 return true;
856 return false;
857}
858
859/* A helper function to free scev and LOOP niter information, as well as
860 clear loop constraint LOOP_C_FINITE. */
861
862void
863vect_free_loop_info_assumptions (class loop *loop)
864{
865 scev_reset_htab ();
866 /* We need to explicitly reset upper bound information since they are
867 used even after free_numbers_of_iterations_estimates. */
868 loop->any_upper_bound = false;
869 loop->any_likely_upper_bound = false;
870 free_numbers_of_iterations_estimates (loop);
871 loop_constraint_clear (loop, LOOP_C_FINITE);
872}
873
874/* If LOOP has been versioned during ifcvt, return the internal call
875 guarding it. */
876
877gimple *
878vect_loop_vectorized_call (class loop *loop, gcond **cond)
879{
880 basic_block bb = loop_preheader_edge (loop)->src;
881 gimple *g;
882 do
883 {
884 g = *gsi_last_bb (bb);
885 if ((g && gimple_code (g) == GIMPLE_COND)
886 || !single_succ_p (bb))
887 break;
888 if (!single_pred_p (bb))
889 break;
890 bb = single_pred (bb);
891 }
892 while (1);
893 if (g && gimple_code (g) == GIMPLE_COND)
894 {
895 if (cond)
896 *cond = as_a <gcond *> (p: g);
897 gimple_stmt_iterator gsi = gsi_for_stmt (g);
898 gsi_prev (i: &gsi);
899 if (!gsi_end_p (i: gsi))
900 {
901 g = gsi_stmt (i: gsi);
902 if (gimple_call_internal_p (gs: g, fn: IFN_LOOP_VECTORIZED)
903 && (tree_to_shwi (gimple_call_arg (gs: g, index: 0)) == loop->num
904 || tree_to_shwi (gimple_call_arg (gs: g, index: 1)) == loop->num))
905 return g;
906 }
907 }
908 return NULL;
909}
910
911/* If LOOP has been versioned during loop distribution, return the gurading
912 internal call. */
913
914static gimple *
915vect_loop_dist_alias_call (class loop *loop, function *fun)
916{
917 basic_block bb;
918 basic_block entry;
919 class loop *outer, *orig;
920
921 if (loop->orig_loop_num == 0)
922 return NULL;
923
924 orig = get_loop (fn: fun, num: loop->orig_loop_num);
925 if (orig == NULL)
926 {
927 /* The original loop is somehow destroyed. Clear the information. */
928 loop->orig_loop_num = 0;
929 return NULL;
930 }
931
932 if (loop != orig)
933 bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
934 else
935 bb = loop_preheader_edge (loop)->src;
936
937 outer = bb->loop_father;
938 entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
939
940 /* Look upward in dominance tree. */
941 for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
942 bb = get_immediate_dominator (CDI_DOMINATORS, bb))
943 {
944 gimple_stmt_iterator gsi = gsi_last_bb (bb);
945 if (!safe_is_a <gcond *> (p: *gsi))
946 continue;
947
948 gsi_prev (i: &gsi);
949 if (gsi_end_p (i: gsi))
950 continue;
951
952 gimple *g = gsi_stmt (i: gsi);
953 /* The guarding internal function call must have the same distribution
954 alias id. */
955 if (gimple_call_internal_p (gs: g, fn: IFN_LOOP_DIST_ALIAS)
956 && (tree_to_shwi (gimple_call_arg (gs: g, index: 0)) == loop->orig_loop_num))
957 return g;
958 }
959 return NULL;
960}
961
962/* Set the uids of all the statements in basic blocks inside loop
963 represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
964 call guarding the loop which has been if converted. */
965static void
966set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
967 function *fun)
968{
969 tree arg = gimple_call_arg (gs: loop_vectorized_call, index: 1);
970 basic_block *bbs;
971 unsigned int i;
972 class loop *scalar_loop = get_loop (fn: fun, num: tree_to_shwi (arg));
973
974 LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
975 LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo)
976 = vec_init_loop_exit_info (scalar_loop);
977 gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
978 == loop_vectorized_call);
979 /* If we are going to vectorize outer loop, prevent vectorization
980 of the inner loop in the scalar loop - either the scalar loop is
981 thrown away, so it is a wasted work, or is used only for
982 a few iterations. */
983 if (scalar_loop->inner)
984 {
985 gimple *g = vect_loop_vectorized_call (loop: scalar_loop->inner);
986 if (g)
987 {
988 arg = gimple_call_arg (gs: g, index: 0);
989 get_loop (fn: fun, num: tree_to_shwi (arg))->dont_vectorize = true;
990 fold_loop_internal_call (g, boolean_false_node);
991 }
992 }
993 bbs = get_loop_body (scalar_loop);
994 for (i = 0; i < scalar_loop->num_nodes; i++)
995 {
996 basic_block bb = bbs[i];
997 gimple_stmt_iterator gsi;
998 for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
999 {
1000 gimple *phi = gsi_stmt (i: gsi);
1001 gimple_set_uid (g: phi, uid: 0);
1002 }
1003 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1004 {
1005 gimple *stmt = gsi_stmt (i: gsi);
1006 gimple_set_uid (g: stmt, uid: 0);
1007 }
1008 }
1009 free (ptr: bbs);
1010}
1011
1012/* Generate vectorized code for LOOP and its epilogues. */
1013
1014static unsigned
1015vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1016 loop_p loop, gimple *loop_vectorized_call,
1017 function *fun)
1018{
1019 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1020
1021 if (loop_vectorized_call)
1022 set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
1023
1024 unsigned HOST_WIDE_INT bytes;
1025 if (dump_enabled_p ())
1026 {
1027 if (GET_MODE_SIZE (mode: loop_vinfo->vector_mode).is_constant (const_value: &bytes))
1028 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1029 "%sloop vectorized using %s%wu byte vectors and"
1030 " unroll factor %u\n",
1031 LOOP_VINFO_EPILOGUE_P (loop_vinfo)
1032 ? "epilogue " : "",
1033 LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
1034 ? "masked " : "", bytes,
1035 (unsigned int) LOOP_VINFO_VECT_FACTOR
1036 (loop_vinfo).to_constant ());
1037 else
1038 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1039 "%sloop vectorized using variable length vectors\n",
1040 LOOP_VINFO_EPILOGUE_P (loop_vinfo)
1041 ? "epilogue " : "");
1042 }
1043
1044 loop_p new_loop = vect_transform_loop (loop_vinfo,
1045 loop_vectorized_call);
1046 /* Now that the loop has been vectorized, allow it to be unrolled
1047 etc. */
1048 loop->force_vectorize = false;
1049
1050 if (loop->simduid)
1051 {
1052 simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
1053 if (!simduid_to_vf_htab)
1054 simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
1055 simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
1056 simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
1057 *simduid_to_vf_htab->find_slot (value: simduid_to_vf_data, insert: INSERT)
1058 = simduid_to_vf_data;
1059 }
1060
1061 /* We should not have to update virtual SSA form here but some
1062 transforms involve creating new virtual definitions which makes
1063 updating difficult.
1064 We delay the actual update to the end of the pass but avoid
1065 confusing ourselves by forcing need_ssa_update_p () to false. */
1066 unsigned todo = 0;
1067 if (need_ssa_update_p (cfun))
1068 {
1069 gcc_assert (loop_vinfo->any_known_not_updated_vssa);
1070 fun->gimple_df->ssa_renaming_needed = false;
1071 todo |= TODO_update_ssa_only_virtuals;
1072 }
1073 gcc_assert (!need_ssa_update_p (cfun));
1074
1075 /* Epilogue of vectorized loop must be vectorized too. */
1076 if (new_loop)
1077 todo |= vect_transform_loops (simduid_to_vf_htab, loop: new_loop, NULL, fun);
1078
1079 return todo;
1080}
1081
1082/* Try to vectorize LOOP. */
1083
1084static unsigned
1085try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1086 unsigned *num_vectorized_loops, loop_p loop,
1087 gimple *loop_vectorized_call,
1088 gimple *loop_dist_alias_call,
1089 function *fun)
1090{
1091 unsigned ret = 0;
1092 vec_info_shared shared;
1093 auto_purge_vect_location sentinel;
1094 vect_location = find_loop_location (loop);
1095
1096 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
1097 && dump_enabled_p ())
1098 dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
1099 "\nAnalyzing loop at %s:%d\n",
1100 LOCATION_FILE (vect_location.get_location_t ()),
1101 LOCATION_LINE (vect_location.get_location_t ()));
1102
1103 /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */
1104 opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, loop_vectorized_call,
1105 &shared);
1106 loop->aux = loop_vinfo;
1107
1108 if (!loop_vinfo)
1109 if (dump_enabled_p ())
1110 if (opt_problem *problem = loop_vinfo.get_problem ())
1111 {
1112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1113 "couldn't vectorize loop\n");
1114 problem->emit_and_clear ();
1115 }
1116
1117 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1118 {
1119 /* Free existing information if loop is analyzed with some
1120 assumptions. */
1121 if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1122 vect_free_loop_info_assumptions (loop);
1123
1124 /* If we applied if-conversion then try to vectorize the
1125 BB of innermost loops.
1126 ??? Ideally BB vectorization would learn to vectorize
1127 control flow by applying if-conversion on-the-fly, the
1128 following retains the if-converted loop body even when
1129 only non-if-converted parts took part in BB vectorization. */
1130 if (flag_tree_slp_vectorize != 0
1131 && loop_vectorized_call
1132 && ! loop->inner)
1133 {
1134 basic_block bb = loop->header;
1135 bool require_loop_vectorize = false;
1136 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1137 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1138 {
1139 gimple *stmt = gsi_stmt (i: gsi);
1140 gcall *call = dyn_cast <gcall *> (p: stmt);
1141 if (call && gimple_call_internal_p (gs: call))
1142 {
1143 internal_fn ifn = gimple_call_internal_fn (gs: call);
1144 if (ifn == IFN_MASK_LOAD
1145 || ifn == IFN_MASK_STORE
1146 || ifn == IFN_MASK_CALL
1147 /* Don't keep the if-converted parts when the ifn with
1148 specifc type is not supported by the backend. */
1149 || (direct_internal_fn_p (fn: ifn)
1150 && !direct_internal_fn_supported_p
1151 (call, OPTIMIZE_FOR_SPEED)))
1152 {
1153 require_loop_vectorize = true;
1154 break;
1155 }
1156 }
1157 gimple_set_uid (g: stmt, uid: -1);
1158 gimple_set_visited (stmt, visited_p: false);
1159 }
1160 if (!require_loop_vectorize)
1161 {
1162 tree arg = gimple_call_arg (gs: loop_vectorized_call, index: 1);
1163 class loop *scalar_loop = get_loop (fn: fun, num: tree_to_shwi (arg));
1164 if (vect_slp_if_converted_bb (bb, orig_loop: scalar_loop))
1165 {
1166 fold_loop_internal_call (loop_vectorized_call,
1167 boolean_true_node);
1168 loop_vectorized_call = NULL;
1169 ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
1170 }
1171 }
1172 }
1173 /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
1174 loop, don't vectorize its inner loop; we'll attempt to
1175 vectorize LOOP_VECTORIZED guarded inner loop of the scalar
1176 loop version. */
1177 if (loop_vectorized_call && loop->inner)
1178 loop->inner->dont_vectorize = true;
1179 return ret;
1180 }
1181
1182 if (!dbg_cnt (index: vect_loop))
1183 {
1184 /* Free existing information if loop is analyzed with some
1185 assumptions. */
1186 if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1187 vect_free_loop_info_assumptions (loop);
1188 return ret;
1189 }
1190
1191 (*num_vectorized_loops)++;
1192 /* Transform LOOP and its epilogues. */
1193 ret |= vect_transform_loops (simduid_to_vf_htab, loop,
1194 loop_vectorized_call, fun);
1195
1196 if (loop_vectorized_call)
1197 {
1198 fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
1199 ret |= TODO_cleanup_cfg;
1200 }
1201 if (loop_dist_alias_call)
1202 {
1203 tree value = gimple_call_arg (gs: loop_dist_alias_call, index: 1);
1204 fold_loop_internal_call (loop_dist_alias_call, value);
1205 ret |= TODO_cleanup_cfg;
1206 }
1207
1208 return ret;
1209}
1210
1211/* Try to vectorize LOOP. */
1212
1213static unsigned
1214try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1215 unsigned *num_vectorized_loops, loop_p loop,
1216 function *fun)
1217{
1218 if (!((flag_tree_loop_vectorize
1219 && optimize_loop_nest_for_speed_p (loop))
1220 || loop->force_vectorize))
1221 return 0;
1222
1223 return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
1224 loop_vectorized_call: vect_loop_vectorized_call (loop),
1225 loop_dist_alias_call: vect_loop_dist_alias_call (loop, fun), fun);
1226}
1227
1228
1229/* Loop autovectorization. */
1230
1231namespace {
1232
1233const pass_data pass_data_vectorize =
1234{
1235 .type: GIMPLE_PASS, /* type */
1236 .name: "vect", /* name */
1237 .optinfo_flags: OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1238 .tv_id: TV_TREE_VECTORIZATION, /* tv_id */
1239 .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */
1240 .properties_provided: 0, /* properties_provided */
1241 .properties_destroyed: 0, /* properties_destroyed */
1242 .todo_flags_start: 0, /* todo_flags_start */
1243 .todo_flags_finish: 0, /* todo_flags_finish */
1244};
1245
1246class pass_vectorize : public gimple_opt_pass
1247{
1248public:
1249 pass_vectorize (gcc::context *ctxt)
1250 : gimple_opt_pass (pass_data_vectorize, ctxt)
1251 {}
1252
1253 /* opt_pass methods: */
1254 bool gate (function *fun) final override
1255 {
1256 return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
1257 }
1258
1259 unsigned int execute (function *) final override;
1260
1261}; // class pass_vectorize
1262
1263/* Function vectorize_loops.
1264
1265 Entry point to loop vectorization phase. */
1266
1267unsigned
1268pass_vectorize::execute (function *fun)
1269{
1270 unsigned int i;
1271 unsigned int num_vectorized_loops = 0;
1272 unsigned int vect_loops_num;
1273 hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
1274 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1275 bool any_ifcvt_loops = false;
1276 unsigned ret = 0;
1277
1278 vect_loops_num = number_of_loops (fn: fun);
1279
1280 /* Bail out if there are no loops. */
1281 if (vect_loops_num <= 1)
1282 return 0;
1283
1284 vect_slp_init ();
1285
1286 if (fun->has_simduid_loops)
1287 note_simd_array_uses (htab: &simd_array_to_simduid_htab, fun);
1288
1289 /* ----------- Analyze loops. ----------- */
1290
1291 /* If some loop was duplicated, it gets bigger number
1292 than all previously defined loops. This fact allows us to run
1293 only over initial loops skipping newly generated ones. */
1294 for (auto loop : loops_list (fun, 0))
1295 if (loop->dont_vectorize)
1296 {
1297 any_ifcvt_loops = true;
1298 /* If-conversion sometimes versions both the outer loop
1299 (for the case when outer loop vectorization might be
1300 desirable) as well as the inner loop in the scalar version
1301 of the loop. So we have:
1302 if (LOOP_VECTORIZED (1, 3))
1303 {
1304 loop1
1305 loop2
1306 }
1307 else
1308 loop3 (copy of loop1)
1309 if (LOOP_VECTORIZED (4, 5))
1310 loop4 (copy of loop2)
1311 else
1312 loop5 (copy of loop4)
1313 If loops' iteration gives us loop3 first (which has
1314 dont_vectorize set), make sure to process loop1 before loop4;
1315 so that we can prevent vectorization of loop4 if loop1
1316 is successfully vectorized. */
1317 if (loop->inner)
1318 {
1319 gimple *loop_vectorized_call
1320 = vect_loop_vectorized_call (loop);
1321 if (loop_vectorized_call
1322 && vect_loop_vectorized_call (loop: loop->inner))
1323 {
1324 tree arg = gimple_call_arg (gs: loop_vectorized_call, index: 0);
1325 class loop *vector_loop
1326 = get_loop (fn: fun, num: tree_to_shwi (arg));
1327 if (vector_loop && vector_loop != loop)
1328 {
1329 /* Make sure we don't vectorize it twice. */
1330 vector_loop->dont_vectorize = true;
1331 ret |= try_vectorize_loop (simduid_to_vf_htab,
1332 num_vectorized_loops: &num_vectorized_loops,
1333 loop: vector_loop, fun);
1334 }
1335 }
1336 }
1337 }
1338 else
1339 ret |= try_vectorize_loop (simduid_to_vf_htab, num_vectorized_loops: &num_vectorized_loops,
1340 loop, fun);
1341
1342 vect_location = dump_user_location_t ();
1343
1344 statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
1345 if (dump_enabled_p ()
1346 || (num_vectorized_loops > 0 && dump_enabled_p ()))
1347 dump_printf_loc (MSG_NOTE, vect_location,
1348 "vectorized %u loops in function.\n",
1349 num_vectorized_loops);
1350
1351 /* ----------- Finalize. ----------- */
1352
1353 if (any_ifcvt_loops)
1354 for (i = 1; i < number_of_loops (fn: fun); i++)
1355 {
1356 class loop *loop = get_loop (fn: fun, num: i);
1357 if (loop && loop->dont_vectorize)
1358 {
1359 gimple *g = vect_loop_vectorized_call (loop);
1360 if (g)
1361 {
1362 fold_loop_internal_call (g, boolean_false_node);
1363 loop->dont_vectorize = false;
1364 ret |= TODO_cleanup_cfg;
1365 g = NULL;
1366 }
1367 else
1368 g = vect_loop_dist_alias_call (loop, fun);
1369
1370 if (g)
1371 {
1372 fold_loop_internal_call (g, boolean_false_node);
1373 loop->dont_vectorize = false;
1374 ret |= TODO_cleanup_cfg;
1375 }
1376 }
1377 }
1378
1379 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
1380 if (fun->has_simduid_loops)
1381 {
1382 adjust_simduid_builtins (htab: simduid_to_vf_htab, fun);
1383 /* Avoid stale SCEV cache entries for the SIMD_LANE defs. */
1384 scev_reset ();
1385 }
1386 /* Shrink any "omp array simd" temporary arrays to the
1387 actual vectorization factors. */
1388 if (simd_array_to_simduid_htab)
1389 shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
1390 delete simduid_to_vf_htab;
1391 fun->has_simduid_loops = false;
1392
1393 if (num_vectorized_loops > 0)
1394 {
1395 /* We are collecting some corner cases where we need to update
1396 virtual SSA form via the TODO but delete the queued update-SSA
1397 state. Force renaming if we think that might be necessary. */
1398 if (ret & TODO_update_ssa_only_virtuals)
1399 mark_virtual_operands_for_renaming (cfun);
1400 /* If we vectorized any loop only virtual SSA form needs to be updated.
1401 ??? Also while we try hard to update loop-closed SSA form we fail
1402 to properly do this in some corner-cases (see PR56286). */
1403 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
1404 ret |= TODO_cleanup_cfg;
1405 }
1406
1407 for (i = 1; i < number_of_loops (fn: fun); i++)
1408 {
1409 loop_vec_info loop_vinfo;
1410 bool has_mask_store;
1411
1412 class loop *loop = get_loop (fn: fun, num: i);
1413 if (!loop || !loop->aux)
1414 continue;
1415 loop_vinfo = (loop_vec_info) loop->aux;
1416 has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1417 delete loop_vinfo;
1418 if (has_mask_store
1419 && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
1420 optimize_mask_stores (loop);
1421
1422 auto_bitmap exit_bbs;
1423 /* Perform local CSE, this esp. helps because we emit code for
1424 predicates that need to be shared for optimal predicate usage.
1425 However reassoc will re-order them and prevent CSE from working
1426 as it should. CSE only the loop body, not the entry. */
1427 auto_vec<edge> exits = get_loop_exit_edges (loop);
1428 for (edge exit : exits)
1429 bitmap_set_bit (exit_bbs, exit->dest->index);
1430
1431 edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, 0);
1432 do_rpo_vn (fun, entry, exit_bbs);
1433
1434 loop->aux = NULL;
1435 }
1436
1437 vect_slp_fini ();
1438
1439 return ret;
1440}
1441
1442} // anon namespace
1443
1444gimple_opt_pass *
1445make_pass_vectorize (gcc::context *ctxt)
1446{
1447 return new pass_vectorize (ctxt);
1448}
1449
1450/* Entry point to the simduid cleanup pass. */
1451
1452namespace {
1453
1454const pass_data pass_data_simduid_cleanup =
1455{
1456 .type: GIMPLE_PASS, /* type */
1457 .name: "simduid", /* name */
1458 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1459 .tv_id: TV_NONE, /* tv_id */
1460 .properties_required: ( PROP_ssa | PROP_cfg ), /* properties_required */
1461 .properties_provided: 0, /* properties_provided */
1462 .properties_destroyed: 0, /* properties_destroyed */
1463 .todo_flags_start: 0, /* todo_flags_start */
1464 .todo_flags_finish: 0, /* todo_flags_finish */
1465};
1466
1467class pass_simduid_cleanup : public gimple_opt_pass
1468{
1469public:
1470 pass_simduid_cleanup (gcc::context *ctxt)
1471 : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
1472 {}
1473
1474 /* opt_pass methods: */
1475 opt_pass * clone () final override
1476 {
1477 return new pass_simduid_cleanup (m_ctxt);
1478 }
1479 bool gate (function *fun) final override { return fun->has_simduid_loops; }
1480 unsigned int execute (function *) final override;
1481
1482}; // class pass_simduid_cleanup
1483
1484unsigned int
1485pass_simduid_cleanup::execute (function *fun)
1486{
1487 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1488
1489 note_simd_array_uses (htab: &simd_array_to_simduid_htab, fun);
1490
1491 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
1492 adjust_simduid_builtins (NULL, fun);
1493
1494 /* Shrink any "omp array simd" temporary arrays to the
1495 actual vectorization factors. */
1496 if (simd_array_to_simduid_htab)
1497 shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
1498 fun->has_simduid_loops = false;
1499 return 0;
1500}
1501
1502} // anon namespace
1503
1504gimple_opt_pass *
1505make_pass_simduid_cleanup (gcc::context *ctxt)
1506{
1507 return new pass_simduid_cleanup (ctxt);
1508}
1509
1510
1511/* Entry point to basic block SLP phase. */
1512
1513namespace {
1514
1515const pass_data pass_data_slp_vectorize =
1516{
1517 .type: GIMPLE_PASS, /* type */
1518 .name: "slp", /* name */
1519 .optinfo_flags: OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1520 .tv_id: TV_TREE_SLP_VECTORIZATION, /* tv_id */
1521 .properties_required: ( PROP_ssa | PROP_cfg ), /* properties_required */
1522 .properties_provided: 0, /* properties_provided */
1523 .properties_destroyed: 0, /* properties_destroyed */
1524 .todo_flags_start: 0, /* todo_flags_start */
1525 TODO_update_ssa, /* todo_flags_finish */
1526};
1527
1528class pass_slp_vectorize : public gimple_opt_pass
1529{
1530public:
1531 pass_slp_vectorize (gcc::context *ctxt)
1532 : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
1533 {}
1534
1535 /* opt_pass methods: */
1536 opt_pass * clone () final override { return new pass_slp_vectorize (m_ctxt); }
1537 bool gate (function *) final override { return flag_tree_slp_vectorize != 0; }
1538 unsigned int execute (function *) final override;
1539
1540}; // class pass_slp_vectorize
1541
1542unsigned int
1543pass_slp_vectorize::execute (function *fun)
1544{
1545 auto_purge_vect_location sentinel;
1546 basic_block bb;
1547
1548 bool in_loop_pipeline = scev_initialized_p ();
1549 if (!in_loop_pipeline)
1550 {
1551 loop_optimizer_init (LOOPS_NORMAL);
1552 scev_initialize ();
1553 }
1554
1555 /* Mark all stmts as not belonging to the current region and unvisited. */
1556 FOR_EACH_BB_FN (bb, fun)
1557 {
1558 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
1559 gsi_next (i: &gsi))
1560 {
1561 gphi *stmt = gsi.phi ();
1562 gimple_set_uid (g: stmt, uid: -1);
1563 gimple_set_visited (stmt, visited_p: false);
1564 }
1565 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);
1566 gsi_next (i: &gsi))
1567 {
1568 gimple *stmt = gsi_stmt (i: gsi);
1569 gimple_set_uid (g: stmt, uid: -1);
1570 gimple_set_visited (stmt, visited_p: false);
1571 }
1572 }
1573
1574 vect_slp_init ();
1575
1576 vect_slp_function (fun);
1577
1578 vect_slp_fini ();
1579
1580 if (!in_loop_pipeline)
1581 {
1582 scev_finalize ();
1583 loop_optimizer_finalize ();
1584 }
1585
1586 return 0;
1587}
1588
1589} // anon namespace
1590
1591gimple_opt_pass *
1592make_pass_slp_vectorize (gcc::context *ctxt)
1593{
1594 return new pass_slp_vectorize (ctxt);
1595}
1596
1597
1598/* Increase alignment of global arrays to improve vectorization potential.
1599 TODO:
1600 - Consider also structs that have an array field.
1601 - Use ipa analysis to prune arrays that can't be vectorized?
1602 This should involve global alignment analysis and in the future also
1603 array padding. */
1604
1605static unsigned get_vec_alignment_for_type (tree);
1606static hash_map<tree, unsigned> *type_align_map;
1607
1608/* Return alignment of array's vector type corresponding to scalar type.
1609 0 if no vector type exists. */
1610static unsigned
1611get_vec_alignment_for_array_type (tree type)
1612{
1613 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1614 poly_uint64 array_size, vector_size;
1615
1616 tree scalar_type = strip_array_types (type);
1617 tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
1618 if (!vectype
1619 || !poly_int_tree_p (TYPE_SIZE (type), value: &array_size)
1620 || !poly_int_tree_p (TYPE_SIZE (vectype), value: &vector_size)
1621 || maybe_lt (a: array_size, b: vector_size))
1622 return 0;
1623
1624 return TYPE_ALIGN (vectype);
1625}
1626
1627/* Return alignment of field having maximum alignment of vector type
1628 corresponding to it's scalar type. For now, we only consider fields whose
1629 offset is a multiple of it's vector alignment.
1630 0 if no suitable field is found. */
1631static unsigned
1632get_vec_alignment_for_record_type (tree type)
1633{
1634 gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1635
1636 unsigned max_align = 0, alignment;
1637 HOST_WIDE_INT offset;
1638 tree offset_tree;
1639
1640 if (TYPE_PACKED (type))
1641 return 0;
1642
1643 unsigned *slot = type_align_map->get (k: type);
1644 if (slot)
1645 return *slot;
1646
1647 for (tree field = first_field (type);
1648 field != NULL_TREE;
1649 field = DECL_CHAIN (field))
1650 {
1651 /* Skip if not FIELD_DECL or if alignment is set by user. */
1652 if (TREE_CODE (field) != FIELD_DECL
1653 || DECL_USER_ALIGN (field)
1654 || DECL_ARTIFICIAL (field))
1655 continue;
1656
1657 /* We don't need to process the type further if offset is variable,
1658 since the offsets of remaining members will also be variable. */
1659 if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1660 || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1661 break;
1662
1663 /* Similarly stop processing the type if offset_tree
1664 does not fit in unsigned HOST_WIDE_INT. */
1665 offset_tree = bit_position (field);
1666 if (!tree_fits_uhwi_p (offset_tree))
1667 break;
1668
1669 offset = tree_to_uhwi (offset_tree);
1670 alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1671
1672 /* Get maximum alignment of vectorized field/array among those members
1673 whose offset is multiple of the vector alignment. */
1674 if (alignment
1675 && (offset % alignment == 0)
1676 && (alignment > max_align))
1677 max_align = alignment;
1678 }
1679
1680 type_align_map->put (k: type, v: max_align);
1681 return max_align;
1682}
1683
1684/* Return alignment of vector type corresponding to decl's scalar type
1685 or 0 if it doesn't exist or the vector alignment is lesser than
1686 decl's alignment. */
1687static unsigned
1688get_vec_alignment_for_type (tree type)
1689{
1690 if (type == NULL_TREE)
1691 return 0;
1692
1693 gcc_assert (TYPE_P (type));
1694
1695 static unsigned alignment = 0;
1696 switch (TREE_CODE (type))
1697 {
1698 case ARRAY_TYPE:
1699 alignment = get_vec_alignment_for_array_type (type);
1700 break;
1701 case RECORD_TYPE:
1702 alignment = get_vec_alignment_for_record_type (type);
1703 break;
1704 default:
1705 alignment = 0;
1706 break;
1707 }
1708
1709 return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1710}
1711
1712/* Entry point to increase_alignment pass. */
1713static unsigned int
1714increase_alignment (void)
1715{
1716 varpool_node *vnode;
1717
1718 vect_location = dump_user_location_t ();
1719 type_align_map = new hash_map<tree, unsigned>;
1720
1721 /* Increase the alignment of all global arrays for vectorization. */
1722 FOR_EACH_DEFINED_VARIABLE (vnode)
1723 {
1724 tree decl = vnode->decl;
1725 unsigned int alignment;
1726
1727 if ((decl_in_symtab_p (decl)
1728 && !symtab_node::get (decl)->can_increase_alignment_p ())
1729 || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1730 continue;
1731
1732 alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1733 if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1734 {
1735 vnode->increase_alignment (align: alignment);
1736 if (dump_enabled_p ())
1737 dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
1738 }
1739 }
1740
1741 delete type_align_map;
1742 return 0;
1743}
1744
1745
1746namespace {
1747
1748const pass_data pass_data_ipa_increase_alignment =
1749{
1750 .type: SIMPLE_IPA_PASS, /* type */
1751 .name: "increase_alignment", /* name */
1752 .optinfo_flags: OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1753 .tv_id: TV_IPA_OPT, /* tv_id */
1754 .properties_required: 0, /* properties_required */
1755 .properties_provided: 0, /* properties_provided */
1756 .properties_destroyed: 0, /* properties_destroyed */
1757 .todo_flags_start: 0, /* todo_flags_start */
1758 .todo_flags_finish: 0, /* todo_flags_finish */
1759};
1760
1761class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1762{
1763public:
1764 pass_ipa_increase_alignment (gcc::context *ctxt)
1765 : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1766 {}
1767
1768 /* opt_pass methods: */
1769 bool gate (function *) final override
1770 {
1771 return flag_section_anchors && flag_tree_loop_vectorize;
1772 }
1773
1774 unsigned int execute (function *) final override
1775 {
1776 return increase_alignment ();
1777 }
1778
1779}; // class pass_ipa_increase_alignment
1780
1781} // anon namespace
1782
1783simple_ipa_opt_pass *
1784make_pass_ipa_increase_alignment (gcc::context *ctxt)
1785{
1786 return new pass_ipa_increase_alignment (ctxt);
1787}
1788
1789/* If the condition represented by T is a comparison or the SSA name
1790 result of a comparison, extract the comparison's operands. Represent
1791 T as NE_EXPR <T, 0> otherwise. */
1792
1793void
1794scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
1795{
1796 if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
1797 {
1798 this->code = TREE_CODE (t);
1799 this->op0 = TREE_OPERAND (t, 0);
1800 this->op1 = TREE_OPERAND (t, 1);
1801 this->inverted_p = false;
1802 return;
1803 }
1804
1805 if (TREE_CODE (t) == SSA_NAME)
1806 if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
1807 {
1808 tree_code code = gimple_assign_rhs_code (gs: stmt);
1809 if (TREE_CODE_CLASS (code) == tcc_comparison)
1810 {
1811 this->code = code;
1812 this->op0 = gimple_assign_rhs1 (gs: stmt);
1813 this->op1 = gimple_assign_rhs2 (gs: stmt);
1814 this->inverted_p = false;
1815 return;
1816 }
1817 else if (code == BIT_NOT_EXPR)
1818 {
1819 tree n_op = gimple_assign_rhs1 (gs: stmt);
1820 if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
1821 {
1822 code = gimple_assign_rhs_code (gs: stmt);
1823 if (TREE_CODE_CLASS (code) == tcc_comparison)
1824 {
1825 this->code = code;
1826 this->op0 = gimple_assign_rhs1 (gs: stmt);
1827 this->op1 = gimple_assign_rhs2 (gs: stmt);
1828 this->inverted_p = true;
1829 return;
1830 }
1831 }
1832 }
1833 }
1834
1835 this->code = NE_EXPR;
1836 this->op0 = t;
1837 this->op1 = build_zero_cst (TREE_TYPE (t));
1838 this->inverted_p = false;
1839}
1840
1841/* See the comment above the declaration for details. */
1842
1843unsigned int
1844vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
1845 stmt_vec_info stmt_info, slp_tree,
1846 tree vectype, int misalign,
1847 vect_cost_model_location where)
1848{
1849 unsigned int cost
1850 = builtin_vectorization_cost (type_of_cost: kind, vectype, misalign) * count;
1851 return record_stmt_cost (stmt_info, where, cost);
1852}
1853
1854/* See the comment above the declaration for details. */
1855
1856void
1857vector_costs::finish_cost (const vector_costs *)
1858{
1859 gcc_assert (!m_finished);
1860 m_finished = true;
1861}
1862
1863/* Record a base cost of COST units against WHERE. If STMT_INFO is
1864 nonnull, use it to adjust the cost based on execution frequency
1865 (where appropriate). */
1866
1867unsigned int
1868vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
1869 vect_cost_model_location where,
1870 unsigned int cost)
1871{
1872 cost = adjust_cost_for_freq (stmt_info, where, cost);
1873 m_costs[where] += cost;
1874 return cost;
1875}
1876
1877/* COST is the base cost we have calculated for an operation in location WHERE.
1878 If STMT_INFO is nonnull, use it to adjust the cost based on execution
1879 frequency (where appropriate). Return the adjusted cost. */
1880
1881unsigned int
1882vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
1883 vect_cost_model_location where,
1884 unsigned int cost)
1885{
1886 /* Statements in an inner loop relative to the loop being
1887 vectorized are weighted more heavily. The value here is
1888 arbitrary and could potentially be improved with analysis. */
1889 if (where == vect_body
1890 && stmt_info
1891 && stmt_in_inner_loop_p (m_vinfo, stmt_info))
1892 {
1893 loop_vec_info loop_vinfo = as_a<loop_vec_info> (p: m_vinfo);
1894 cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
1895 }
1896 return cost;
1897}
1898
1899/* See the comment above the declaration for details. */
1900
1901bool
1902vector_costs::better_main_loop_than_p (const vector_costs *other) const
1903{
1904 int diff = compare_inside_loop_cost (other);
1905 if (diff != 0)
1906 return diff < 0;
1907
1908 /* If there's nothing to choose between the loop bodies, see whether
1909 there's a difference in the prologue and epilogue costs. */
1910 diff = compare_outside_loop_cost (other);
1911 if (diff != 0)
1912 return diff < 0;
1913
1914 return false;
1915}
1916
1917
1918/* See the comment above the declaration for details. */
1919
1920bool
1921vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
1922 loop_vec_info main_loop) const
1923{
1924 loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (p: this->m_vinfo);
1925 loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (p: other->m_vinfo);
1926
1927 poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
1928 poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
1929
1930 poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
1931 unsigned HOST_WIDE_INT main_vf;
1932 unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
1933 /* If we can determine how many iterations are left for the epilogue
1934 loop, that is if both the main loop's vectorization factor and number
1935 of iterations are constant, then we use them to calculate the cost of
1936 the epilogue loop together with a 'likely value' for the epilogues
1937 vectorization factor. Otherwise we use the main loop's vectorization
1938 factor and the maximum poly value for the epilogue's. If the target
1939 has not provided with a sensible upper bound poly vectorization
1940 factors are likely to be favored over constant ones. */
1941 if (main_poly_vf.is_constant (const_value: &main_vf)
1942 && LOOP_VINFO_NITERS_KNOWN_P (main_loop))
1943 {
1944 unsigned HOST_WIDE_INT niters
1945 = LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
1946 HOST_WIDE_INT other_likely_vf
1947 = estimated_poly_value (x: other_vf, kind: POLY_VALUE_LIKELY);
1948 HOST_WIDE_INT this_likely_vf
1949 = estimated_poly_value (x: this_vf, kind: POLY_VALUE_LIKELY);
1950
1951 /* If the epilogue is using partial vectors we account for the
1952 partial iteration here too. */
1953 other_factor = niters / other_likely_vf;
1954 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
1955 && niters % other_likely_vf != 0)
1956 other_factor++;
1957
1958 this_factor = niters / this_likely_vf;
1959 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
1960 && niters % this_likely_vf != 0)
1961 this_factor++;
1962 }
1963 else
1964 {
1965 unsigned HOST_WIDE_INT main_vf_max
1966 = estimated_poly_value (x: main_poly_vf, kind: POLY_VALUE_MAX);
1967 unsigned HOST_WIDE_INT other_vf_max
1968 = estimated_poly_value (x: other_vf, kind: POLY_VALUE_MAX);
1969 unsigned HOST_WIDE_INT this_vf_max
1970 = estimated_poly_value (x: this_vf, kind: POLY_VALUE_MAX);
1971
1972 other_factor = CEIL (main_vf_max, other_vf_max);
1973 this_factor = CEIL (main_vf_max, this_vf_max);
1974
1975 /* If the loop is not using partial vectors then it will iterate one
1976 time less than one that does. It is safe to subtract one here,
1977 because the main loop's vf is always at least 2x bigger than that
1978 of an epilogue. */
1979 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
1980 other_factor -= 1;
1981 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
1982 this_factor -= 1;
1983 }
1984
1985 /* Compute the costs by multiplying the inside costs with the factor and
1986 add the outside costs for a more complete picture. The factor is the
1987 amount of times we are expecting to iterate this epilogue. */
1988 other_cost = other->body_cost () * other_factor;
1989 this_cost = this->body_cost () * this_factor;
1990 other_cost += other->outside_cost ();
1991 this_cost += this->outside_cost ();
1992 return this_cost < other_cost;
1993}
1994
1995/* A <=>-style subroutine of better_main_loop_than_p. Check whether we can
1996 determine the return value of better_main_loop_than_p by comparing the
1997 inside (loop body) costs of THIS and OTHER. Return:
1998
1999 * -1 if better_main_loop_than_p should return true.
2000 * 1 if better_main_loop_than_p should return false.
2001 * 0 if we can't decide. */
2002
2003int
2004vector_costs::compare_inside_loop_cost (const vector_costs *other) const
2005{
2006 loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (p: this->m_vinfo);
2007 loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (p: other->m_vinfo);
2008
2009 struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
2010 gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
2011
2012 poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
2013 poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
2014
2015 /* Limit the VFs to what is likely to be the maximum number of iterations,
2016 to handle cases in which at least one loop_vinfo is fully-masked. */
2017 HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
2018 if (estimated_max_niter != -1)
2019 {
2020 if (estimated_poly_value (x: this_vf, kind: POLY_VALUE_MIN)
2021 >= estimated_max_niter)
2022 this_vf = estimated_max_niter;
2023 if (estimated_poly_value (x: other_vf, kind: POLY_VALUE_MIN)
2024 >= estimated_max_niter)
2025 other_vf = estimated_max_niter;
2026 }
2027
2028 /* Check whether the (fractional) cost per scalar iteration is lower or
2029 higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf. */
2030 poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
2031 poly_int64 rel_other
2032 = other_loop_vinfo->vector_costs->body_cost () * this_vf;
2033
2034 HOST_WIDE_INT est_rel_this_min
2035 = estimated_poly_value (x: rel_this, kind: POLY_VALUE_MIN);
2036 HOST_WIDE_INT est_rel_this_max
2037 = estimated_poly_value (x: rel_this, kind: POLY_VALUE_MAX);
2038
2039 HOST_WIDE_INT est_rel_other_min
2040 = estimated_poly_value (x: rel_other, kind: POLY_VALUE_MIN);
2041 HOST_WIDE_INT est_rel_other_max
2042 = estimated_poly_value (x: rel_other, kind: POLY_VALUE_MAX);
2043
2044 /* Check first if we can make out an unambigous total order from the minimum
2045 and maximum estimates. */
2046 if (est_rel_this_min < est_rel_other_min
2047 && est_rel_this_max < est_rel_other_max)
2048 return -1;
2049
2050 if (est_rel_other_min < est_rel_this_min
2051 && est_rel_other_max < est_rel_this_max)
2052 return 1;
2053
2054 /* When other_loop_vinfo uses a variable vectorization factor,
2055 we know that it has a lower cost for at least one runtime VF.
2056 However, we don't know how likely that VF is.
2057
2058 One option would be to compare the costs for the estimated VFs.
2059 The problem is that that can put too much pressure on the cost
2060 model. E.g. if the estimated VF is also the lowest possible VF,
2061 and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
2062 for the estimated VF, we'd then choose this_loop_vinfo even
2063 though (a) this_loop_vinfo might not actually be better than
2064 other_loop_vinfo for that VF and (b) it would be significantly
2065 worse at larger VFs.
2066
2067 Here we go for a hacky compromise: pick this_loop_vinfo if it is
2068 no more expensive than other_loop_vinfo even after doubling the
2069 estimated other_loop_vinfo VF. For all but trivial loops, this
2070 ensures that we only pick this_loop_vinfo if it is significantly
2071 better than other_loop_vinfo at the estimated VF. */
2072 if (est_rel_other_min != est_rel_this_min
2073 || est_rel_other_max != est_rel_this_max)
2074 {
2075 HOST_WIDE_INT est_rel_this_likely
2076 = estimated_poly_value (x: rel_this, kind: POLY_VALUE_LIKELY);
2077 HOST_WIDE_INT est_rel_other_likely
2078 = estimated_poly_value (x: rel_other, kind: POLY_VALUE_LIKELY);
2079
2080 return est_rel_this_likely * 2 <= est_rel_other_likely ? -1 : 1;
2081 }
2082
2083 return 0;
2084}
2085
2086/* A <=>-style subroutine of better_main_loop_than_p, used when there is
2087 nothing to choose between the inside (loop body) costs of THIS and OTHER.
2088 Check whether we can determine the return value of better_main_loop_than_p
2089 by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
2090 Return:
2091
2092 * -1 if better_main_loop_than_p should return true.
2093 * 1 if better_main_loop_than_p should return false.
2094 * 0 if we can't decide. */
2095
2096int
2097vector_costs::compare_outside_loop_cost (const vector_costs *other) const
2098{
2099 auto this_outside_cost = this->outside_cost ();
2100 auto other_outside_cost = other->outside_cost ();
2101 if (this_outside_cost != other_outside_cost)
2102 return this_outside_cost < other_outside_cost ? -1 : 1;
2103
2104 return 0;
2105}
2106

source code of gcc/tree-vectorizer.cc