tree-vectorizer.cc source code [gcc/tree-vectorizer.cc]

1	/ Vectorizer*
2	Copyright (C) 2003-2025 Free Software Foundation, Inc.
3	Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5	This file is part of GCC.
6
7	GCC is free software; you can redistribute it and/or modify it under
8	the terms of the GNU General Public License as published by the Free
9	Software Foundation; either version 3, or (at your option) any later
10	version.
11
12	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13	WARRANTY; without even the implied warranty of MERCHANTABILITY or
14	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15	for more details.
16
17	You should have received a copy of the GNU General Public License
18	along with GCC; see the file COPYING3. If not see
19	<http://www.gnu.org/licenses/>. /*
20
21	/ Loop and basic block vectorizer.*
22
23	This file contains drivers for the three vectorizers:
24	(1) loop vectorizer (inter-iteration parallelism),
25	(2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26	vectorizer)
27	(3) BB vectorizer (out-of-loops), aka SLP
28
29	The rest of the vectorizer's code is organized as follows:
30	- tree-vect-loop.cc - loop specific parts such as reductions, etc. These are
31	used by drivers (1) and (2).
32	- tree-vect-loop-manip.cc - vectorizer's loop control-flow utilities, used by
33	drivers (1) and (2).
34	- tree-vect-slp.cc - BB vectorization specific analysis and transformation,
35	used by drivers (2) and (3).
36	- tree-vect-stmts.cc - statements analysis and transformation (used by all).
37	- tree-vect-data-refs.cc - vectorizer specific data-refs analysis and
38	manipulations (used by all).
39	- tree-vect-patterns.cc - vectorizable code patterns detector (used by all)
40
41	Here's a poor attempt at illustrating that:
42
43	tree-vectorizer.cc:
44	loop_vect() loop_aware_slp() slp_vect()
45	\| / \ /
46	\| / \ /
47	tree-vect-loop.cc tree-vect-slp.cc
48	\| \ \ / / \|
49	\| \ \/ / \|
50	\| \ /\ / \|
51	\| \ / \ / \|
52	tree-vect-stmts.cc tree-vect-data-refs.cc
53	\ /
54	tree-vect-patterns.cc
55	*/
56
57	#include "config.h"
58	#include "system.h"
59	#include "coretypes.h"
60	#include "backend.h"
61	#include "tree.h"
62	#include "gimple.h"
63	#include "predict.h"
64	#include "tree-pass.h"
65	#include "ssa.h"
66	#include "cgraph.h"
67	#include "fold-const.h"
68	#include "stor-layout.h"
69	#include "gimple-iterator.h"
70	#include "gimple-walk.h"
71	#include "tree-ssa-loop-manip.h"
72	#include "tree-ssa-loop-niter.h"
73	#include "tree-cfg.h"
74	#include "cfgloop.h"
75	#include "tree-vectorizer.h"
76	#include "tree-ssa-propagate.h"
77	#include "dbgcnt.h"
78	#include "tree-scalar-evolution.h"
79	#include "stringpool.h"
80	#include "attribs.h"
81	#include "gimple-pretty-print.h"
82	#include "opt-problem.h"
83	#include "internal-fn.h"
84	#include "tree-ssa-sccvn.h"
85	#include "tree-into-ssa.h"
86
87	/ Loop or bb location, with hotness information. /
88	dump_user_location_t vect_location;
89
90	/ auto_purge_vect_location's dtor: reset the vect_location*
91	global, to avoid stale location_t values that could reference
92	GC-ed blocks. /*
93
94	auto_purge_vect_location::~auto_purge_vect_location ()
95	{
96	vect_location = dump_user_location_t ();
97	}
98
99	/ Dump a cost entry according to args to F. /
100
101	void
102	dump_stmt_cost (FILE f, int* count, enum vect_cost_for_stmt kind,
103	stmt_vec_info stmt_info, slp_tree node, tree,
104	int misalign, unsigned cost,
105	enum vect_cost_model_location where)
106	{
107	if (stmt_info)
108	{
109	print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), `0`, TDF_SLIM);
110	fprintf (stream: f, format: " ");
111	}
112	else if (node)
113	fprintf (stream: f, format: "node %p ", (void *)node);
114	else
115	fprintf (stream: f, format: "<unknown> ");
116	fprintf (stream: f, format: "%d times ", count);
117	const char *ks = "unknown";
118	switch (kind)
119	{
120	case scalar_stmt:
121	ks = "scalar_stmt";
122	break;
123	case scalar_load:
124	ks = "scalar_load";
125	break;
126	case scalar_store:
127	ks = "scalar_store";
128	break;
129	case vector_stmt:
130	ks = "vector_stmt";
131	break;
132	case vector_load:
133	ks = "vector_load";
134	break;
135	case vector_gather_load:
136	ks = "vector_gather_load";
137	break;
138	case unaligned_load:
139	ks = "unaligned_load";
140	break;
141	case unaligned_store:
142	ks = "unaligned_store";
143	break;
144	case vector_store:
145	ks = "vector_store";
146	break;
147	case vector_scatter_store:
148	ks = "vector_scatter_store";
149	break;
150	case vec_to_scalar:
151	ks = "vec_to_scalar";
152	break;
153	case scalar_to_vec:
154	ks = "scalar_to_vec";
155	break;
156	case cond_branch_not_taken:
157	ks = "cond_branch_not_taken";
158	break;
159	case cond_branch_taken:
160	ks = "cond_branch_taken";
161	break;
162	case vec_perm:
163	ks = "vec_perm";
164	break;
165	case vec_promote_demote:
166	ks = "vec_promote_demote";
167	break;
168	case vec_construct:
169	ks = "vec_construct";
170	break;
171	}
172	fprintf (stream: f, format: "%s ", ks);
173	if (kind == unaligned_load \|\| kind == unaligned_store)
174	fprintf (stream: f, format: "(misalign %d) ", misalign);
175	fprintf (stream: f, format: "costs %u ", cost);
176	const char *ws = "unknown";
177	switch (where)
178	{
179	case vect_prologue:
180	ws = "prologue";
181	break;
182	case vect_body:
183	ws = "body";
184	break;
185	case vect_epilogue:
186	ws = "epilogue";
187	break;
188	}
189	fprintf (stream: f, format: "in %s\n", ws);
190	}
191
192	/ For mapping simduid to vectorization factor. /
193
194	class simduid_to_vf : public free_ptr_hash<simduid_to_vf>
195	{
196	public:
197	unsigned int simduid;
198	poly_uint64 vf;
199
200	/ hash_table support. /
201	static inline hashval_t hash (const simduid_to_vf *);
202	static inline int equal (const simduid_to_vf , const* simduid_to_vf *);
203	};
204
205	inline hashval_t
206	simduid_to_vf::hash (const simduid_to_vf *p)
207	{
208	return p->simduid;
209	}
210
211	inline int
212	simduid_to_vf::equal (const simduid_to_vf p1, const* simduid_to_vf *p2)
213	{
214	return p1->simduid == p2->simduid;
215	}
216
217	/ This hash maps the OMP simd array to the corresponding simduid used*
218	to index into it. Like thus,
219
220	_7 = GOMP_SIMD_LANE (simduid.0)
221	...
222	...
223	D.1737[_7] = stuff;
224
225
226	This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
227	simduid.0. /*
228
229	struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
230	{
231	tree decl;
232	unsigned int simduid;
233
234	/ hash_table support. /
235	static inline hashval_t hash (const simd_array_to_simduid *);
236	static inline int equal (const simd_array_to_simduid *,
237	const simd_array_to_simduid *);
238	};
239
240	inline hashval_t
241	simd_array_to_simduid::hash (const simd_array_to_simduid *p)
242	{
243	return DECL_UID (p->decl);
244	}
245
246	inline int
247	simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
248	const simd_array_to_simduid *p2)
249	{
250	return p1->decl == p2->decl;
251	}
252
253	/ Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,*
254	into their corresponding constants and remove
255	IFN_GOMP_SIMD_ORDERED_{START,END}. /*
256
257	static void
258	adjust_simduid_builtins (hash_table<simduid_to_vf> htab, function fun)
259	{
260	basic_block bb;
261
262	FOR_EACH_BB_FN (bb, fun)
263	{
264	gimple_stmt_iterator i;
265
266	for (i = gsi_start_bb (bb); !gsi_end_p (i); )
267	{
268	poly_uint64 vf = `1`;
269	enum internal_fn ifn;
270	gimple *stmt = gsi_stmt (i);
271	tree t;
272	if (!is_gimple_call (gs: stmt)
273	\|\| !gimple_call_internal_p (gs: stmt))
274	{
275	gsi_next (i: &i);
276	continue;
277	}
278	ifn = gimple_call_internal_fn (gs: stmt);
279	switch (ifn)
280	{
281	case IFN_GOMP_SIMD_LANE:
282	case IFN_GOMP_SIMD_VF:
283	case IFN_GOMP_SIMD_LAST_LANE:
284	break;
285	case IFN_GOMP_SIMD_ORDERED_START:
286	case IFN_GOMP_SIMD_ORDERED_END:
287	if (integer_onep (gimple_call_arg (gs: stmt, index: `0`)))
288	{
289	enum built_in_function bcode
290	= (ifn == IFN_GOMP_SIMD_ORDERED_START
291	? BUILT_IN_GOMP_ORDERED_START
292	: BUILT_IN_GOMP_ORDERED_END);
293	gimple *g
294	= gimple_build_call (builtin_decl_explicit (fncode: bcode), `0`);
295	gimple_move_vops (g, stmt);
296	gsi_replace (&i, g, true);
297	continue;
298	}
299	gsi_remove (&i, true);
300	unlink_stmt_vdef (stmt);
301	continue;
302	default:
303	gsi_next (i: &i);
304	continue;
305	}
306	tree arg = gimple_call_arg (gs: stmt, index: `0`);
307	gcc_assert (arg != NULL_TREE);
308	gcc_assert (TREE_CODE (arg) == SSA_NAME);
309	simduid_to_vf *p = NULL, data;
310	data.simduid = DECL_UID (SSA_NAME_VAR (arg));
311	/ Need to nullify loop safelen field since it's value is not*
312	valid after transformation. /*
313	if (bb->loop_father && bb->loop_father->safelen > `0`)
314	bb->loop_father->safelen = `0`;
315	if (htab)
316	{
317	p = htab->find (value: &data);
318	if (p)
319	vf = p->vf;
320	}
321	switch (ifn)
322	{
323	case IFN_GOMP_SIMD_VF:
324	t = build_int_cst (unsigned_type_node, vf);
325	break;
326	case IFN_GOMP_SIMD_LANE:
327	t = build_int_cst (unsigned_type_node, `0`);
328	break;
329	case IFN_GOMP_SIMD_LAST_LANE:
330	t = gimple_call_arg (gs: stmt, index: `1`);
331	break;
332	default:
333	gcc_unreachable ();
334	}
335	tree lhs = gimple_call_lhs (gs: stmt);
336	if (lhs)
337	replace_uses_by (lhs, t);
338	release_defs (stmt);
339	gsi_remove (&i, true);
340	}
341	}
342	}
343
344	/ Helper structure for note_simd_array_uses. /
345
346	struct note_simd_array_uses_struct
347	{
348	hash_table<simd_array_to_simduid> **htab;
349	unsigned int simduid;
350	};
351
352	/ Callback for note_simd_array_uses, called through walk_gimple_op. /
353
354	static tree
355	note_simd_array_uses_cb (tree tp, int* walk_subtrees, void* *data)
356	{
357	struct walk_stmt_info wi = (struct* walk_stmt_info *) data;
358	struct note_simd_array_uses_struct *ns
359	= (struct note_simd_array_uses_struct *) wi->info;
360
361	if (TYPE_P (*tp))
362	*walk_subtrees = `0`;
363	else if (VAR_P (*tp)
364	&& lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (*tp))
365	&& DECL_CONTEXT (*tp) == current_function_decl)
366	{
367	simd_array_to_simduid data;
368	if (!*ns->htab)
369	ns->htab = new* hash_table<simd_array_to_simduid> (`15`);
370	data.decl = *tp;
371	data.simduid = ns->simduid;
372	simd_array_to_simduid *slot = (ns->htab)->find_slot (value: &data, insert: INSERT);
373	if (*slot == NULL)
374	{
375	simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
376	*p = data;
377	*slot = p;
378	}
379	else if ((*slot)->simduid != ns->simduid)
380	(*slot)->simduid = -`1U`;
381	*walk_subtrees = `0`;
382	}
383	return NULL_TREE;
384	}
385
386	/ Find "omp simd array" temporaries and map them to corresponding*
387	simduid. /*
388
389	static void
390	note_simd_array_uses (hash_table<simd_array_to_simduid> *htab, function fun)
391	{
392	basic_block bb;
393	gimple_stmt_iterator gsi;
394	struct walk_stmt_info wi;
395	struct note_simd_array_uses_struct ns;
396
397	memset (s: &wi, c: `0`, n: sizeof (wi));
398	wi.info = &ns;
399	ns.htab = htab;
400
401	FOR_EACH_BB_FN (bb, fun)
402	for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
403	{
404	gimple *stmt = gsi_stmt (i: gsi);
405	if (!is_gimple_call (gs: stmt) \|\| !gimple_call_internal_p (gs: stmt))
406	continue;
407	switch (gimple_call_internal_fn (gs: stmt))
408	{
409	case IFN_GOMP_SIMD_LANE:
410	case IFN_GOMP_SIMD_VF:
411	case IFN_GOMP_SIMD_LAST_LANE:
412	break;
413	default:
414	continue;
415	}
416	tree lhs = gimple_call_lhs (gs: stmt);
417	if (lhs == NULL_TREE)
418	continue;
419	imm_use_iterator use_iter;
420	gimple *use_stmt;
421	ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, `0`)));
422	FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
423	if (!is_gimple_debug (gs: use_stmt))
424	walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
425	}
426	}
427
428	/ Shrink arrays with "omp simd array" attribute to the corresponding*
429	vectorization factor. /*
430
431	static void
432	shrink_simd_arrays
433	(hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
434	hash_table<simduid_to_vf> *simduid_to_vf_htab)
435	{
436	for (hash_table<simd_array_to_simduid>::iterator iter
437	= simd_array_to_simduid_htab->begin ();
438	iter != simd_array_to_simduid_htab->end (); ++iter)
439	if ((*iter)->simduid != -`1U`)
440	{
441	tree decl = (*iter)->decl;
442	poly_uint64 vf = `1`;
443	if (simduid_to_vf_htab)
444	{
445	simduid_to_vf *p = NULL, data;
446	data.simduid = (*iter)->simduid;
447	p = simduid_to_vf_htab->find (value: &data);
448	if (p)
449	vf = p->vf;
450	}
451	tree atype
452	= build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
453	TREE_TYPE (decl) = atype;
454	relayout_decl (decl);
455	}
456
457	delete simd_array_to_simduid_htab;
458	}
459
460	/ Initialize the vec_info with kind KIND_IN and target cost data*
461	TARGET_COST_DATA_IN. /*
462
463	vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_)
464	: kind (kind_in),
465	shared (shared_),
466	stmt_vec_info_ro (false),
467	bbs (NULL),
468	nbbs (`0`),
469	inv_pattern_def_seq (NULL)
470	{
471	stmt_vec_infos.create (nelems: `50`);
472	}
473
474	vec_info::~vec_info ()
475	{
476	for (slp_instance &instance : slp_instances)
477	vect_free_slp_instance (instance);
478
479	free_stmt_vec_infos ();
480	}
481
482	vec_info_shared::vec_info_shared ()
483	: datarefs (vNULL),
484	datarefs_copy (vNULL),
485	ddrs (vNULL)
486	{
487	}
488
489	vec_info_shared::~vec_info_shared ()
490	{
491	free_data_refs (datarefs);
492	free_dependence_relations (ddrs);
493	datarefs_copy.release ();
494	}
495
496	void
497	vec_info_shared::save_datarefs ()
498	{
499	if (!flag_checking)
500	return;
501	datarefs_copy.reserve_exact (nelems: datarefs.length ());
502	for (unsigned i = `0`; i < datarefs.length (); ++i)
503	datarefs_copy.quick_push (obj: *datarefs [i]);
504	}
505
506	void
507	vec_info_shared::check_datarefs ()
508	{
509	if (!flag_checking)
510	return;
511	gcc_assert (datarefs.length () == datarefs_copy.length ());
512	for (unsigned i = `0`; i < datarefs.length (); ++i)
513	if (memcmp (s1: &datarefs_copy [i], s2: datarefs [i],
514	offsetof (data_reference, alt_indices)) != `0`)
515	gcc_unreachable ();
516	}
517
518	/ Record that STMT belongs to the vectorizable region. Create and return*
519	an associated stmt_vec_info. /*
520
521	stmt_vec_info
522	vec_info::add_stmt (gimple *stmt)
523	{
524	stmt_vec_info res = new_stmt_vec_info (stmt);
525	set_vinfo_for_stmt (stmt, res);
526	return res;
527	}
528
529	/ Record that STMT belongs to the vectorizable region. Create a new*
530	stmt_vec_info and mark VECINFO as being related and return the new
531	stmt_vec_info. /*
532
533	stmt_vec_info
534	vec_info::add_pattern_stmt (gimple *stmt, stmt_vec_info stmt_info)
535	{
536	stmt_vec_info res = new_stmt_vec_info (stmt);
537	res->pattern_stmt_p = true;
538	set_vinfo_for_stmt (stmt, res, false);
539	STMT_VINFO_RELATED_STMT (res) = stmt_info;
540	return res;
541	}
542
543	/ If STMT was previously associated with a stmt_vec_info and STMT now resides*
544	at a different address than before (e.g., because STMT is a phi node that has
545	been resized), update the stored address to match the new one. It is not
546	possible to use lookup_stmt () to perform this task, because that function
547	returns NULL if the stored stmt pointer does not match the one being looked
548	up. /*
549
550	stmt_vec_info
551	vec_info::resync_stmt_addr (gimple *stmt)
552	{
553	unsigned int uid = gimple_uid (g: stmt);
554	if (uid > `0` && uid - `1` < stmt_vec_infos.length ())
555	{
556	stmt_vec_info res = stmt_vec_infos [uid - `1`];
557	if (res && res->stmt)
558	{
559	res->stmt = stmt;
560	return res;
561	}
562	}
563	return nullptr;
564	}
565
566	/ If STMT has an associated stmt_vec_info, return that vec_info, otherwise*
567	return null. It is safe to call this function on any statement, even if
568	it might not be part of the vectorizable region. /*
569
570	stmt_vec_info
571	vec_info::lookup_stmt (gimple *stmt)
572	{
573	unsigned int uid = gimple_uid (g: stmt);
574	if (uid > `0` && uid - `1` < stmt_vec_infos.length ())
575	{
576	stmt_vec_info res = stmt_vec_infos [uid - `1`];
577	if (res && res->stmt == stmt)
578	return res;
579	}
580	return NULL;
581	}
582
583	/ If NAME is an SSA_NAME and its definition has an associated stmt_vec_info,*
584	return that stmt_vec_info, otherwise return null. It is safe to call
585	this on arbitrary operands. /*
586
587	stmt_vec_info
588	vec_info::lookup_def (tree name)
589	{
590	if (TREE_CODE (name) == SSA_NAME
591	&& !SSA_NAME_IS_DEFAULT_DEF (name))
592	return lookup_stmt (SSA_NAME_DEF_STMT (name));
593	return NULL;
594	}
595
596	/ See whether there is a single non-debug statement that uses LHS and*
597	whether that statement has an associated stmt_vec_info. Return the
598	stmt_vec_info if so, otherwise return null. /*
599
600	stmt_vec_info
601	vec_info::lookup_single_use (tree lhs)
602	{
603	use_operand_p dummy;
604	gimple *use_stmt;
605	if (single_imm_use (var: lhs, use_p: &dummy, stmt: &use_stmt))
606	return lookup_stmt (stmt: use_stmt);
607	return NULL;
608	}
609
610	/ Return vectorization information about DR. /
611
612	dr_vec_info *
613	vec_info::lookup_dr (data_reference *dr)
614	{
615	stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr));
616	/ DR_STMT should never refer to a stmt in a pattern replacement. /
617	gcc_checking_assert (!is_pattern_stmt_p (stmt_info));
618	return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt);
619	}
620
621	/ Record that NEW_STMT_INFO now implements the same data reference*
622	as OLD_STMT_INFO. /*
623
624	void
625	vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
626	{
627	gcc_assert (!is_pattern_stmt_p (old_stmt_info));
628	STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info;
629	new_stmt_info->dr_aux = old_stmt_info->dr_aux;
630	STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info)
631	= STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info);
632	STMT_VINFO_GATHER_SCATTER_P (new_stmt_info)
633	= STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
634	STMT_VINFO_STRIDED_P (new_stmt_info)
635	= STMT_VINFO_STRIDED_P (old_stmt_info);
636	STMT_VINFO_SIMD_LANE_ACCESS_P (new_stmt_info)
637	= STMT_VINFO_SIMD_LANE_ACCESS_P (old_stmt_info);
638	}
639
640	/ Permanently remove the statement described by STMT_INFO from the*
641	function. /*
642
643	void
644	vec_info::remove_stmt (stmt_vec_info stmt_info)
645	{
646	gcc_assert (!stmt_info->pattern_stmt_p);
647	set_vinfo_for_stmt (stmt_info->stmt, NULL);
648	unlink_stmt_vdef (stmt_info->stmt);
649	gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt);
650	gsi_remove (&si, true);
651	release_defs (stmt_info->stmt);
652	free_stmt_vec_info (stmt_info);
653	}
654
655	/ Replace the statement at GSI by NEW_STMT, both the vectorization*
656	information and the function itself. STMT_INFO describes the statement
657	at GSI. /*
658
659	void
660	vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info,
661	gimple *new_stmt)
662	{
663	gimple *old_stmt = stmt_info->stmt;
664	gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi));
665	gimple_set_uid (g: new_stmt, uid: gimple_uid (g: old_stmt));
666	stmt_info->stmt = new_stmt;
667	gsi_replace (gsi, new_stmt, true);
668	}
669
670	/ Insert stmts in SEQ on the VEC_INFO region entry. If CONTEXT is*
671	not NULL it specifies whether to use the sub-region entry
672	determined by it, currently used for loop vectorization to insert
673	on the inner loop entry vs. the outer loop entry. /*
674
675	void
676	vec_info::insert_seq_on_entry (stmt_vec_info context, gimple_seq seq)
677	{
678	if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: this))
679	{
680	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
681	basic_block new_bb;
682	edge pe;
683
684	if (context && nested_in_vect_loop_p (loop, stmt_info: context))
685	loop = loop->inner;
686
687	pe = loop_preheader_edge (loop);
688	new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
689	gcc_assert (!new_bb);
690	}
691	else
692	{
693	gimple_stmt_iterator gsi_region_begin
694	= gsi_after_labels (bb: bbs[`0`]);
695	gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT);
696	}
697	}
698
699	/ Like insert_seq_on_entry but just inserts the single stmt NEW_STMT. /
700
701	void
702	vec_info::insert_on_entry (stmt_vec_info context, gimple *new_stmt)
703	{
704	gimple_seq seq = NULL;
705	gimple_stmt_iterator gsi = gsi_start (seq);
706	gsi_insert_before_without_update (&gsi, new_stmt, GSI_SAME_STMT);
707	insert_seq_on_entry (context, seq);
708	}
709
710	/ Create and initialize a new stmt_vec_info struct for STMT. /
711
712	stmt_vec_info
713	vec_info::new_stmt_vec_info (gimple *stmt)
714	{
715	stmt_vec_info res = XCNEW (class _stmt_vec_info);
716	res->stmt = stmt;
717
718	STMT_VINFO_TYPE (res) = undef_vec_info_type;
719	STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
720	STMT_VINFO_VECTORIZABLE (res) = true;
721	STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
722	STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
723	STMT_VINFO_REDUC_FN (res) = IFN_LAST;
724	STMT_VINFO_REDUC_IDX (res) = -`1`;
725	STMT_VINFO_SLP_VECT_ONLY (res) = false;
726	STMT_VINFO_SLP_VECT_ONLY_PATTERN (res) = false;
727	STMT_VINFO_VEC_STMTS (res) = vNULL;
728	res->reduc_initial_values = vNULL;
729	res->reduc_scalar_results = vNULL;
730
731	if (is_a <loop_vec_info> (p: this)
732	&& gimple_code (g: stmt) == GIMPLE_PHI
733	&& is_loop_header_bb_p (bb: gimple_bb (g: stmt)))
734	STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
735	else
736	STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
737
738	STMT_SLP_TYPE (res) = loop_vect;
739
740	/ This is really "uninitialized" until vect_compute_data_ref_alignment. /
741	res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
742
743	return res;
744	}
745
746	/ Associate STMT with INFO. /
747
748	void
749	vec_info::set_vinfo_for_stmt (gimple stmt, stmt_vec_info info, bool* check_ro)
750	{
751	unsigned int uid = gimple_uid (g: stmt);
752	if (uid == `0`)
753	{
754	gcc_assert (!check_ro \|\| !stmt_vec_info_ro);
755	gcc_checking_assert (info);
756	uid = stmt_vec_infos.length () + `1`;
757	gimple_set_uid (g: stmt, uid);
758	stmt_vec_infos.safe_push (obj: info);
759	}
760	else
761	{
762	gcc_checking_assert (info == NULL);
763	stmt_vec_infos [uid - `1`] = info;
764	}
765	}
766
767	/ Free the contents of stmt_vec_infos. /
768
769	void
770	vec_info::free_stmt_vec_infos (void)
771	{
772	for (stmt_vec_info &info : stmt_vec_infos)
773	if (info != NULL)
774	free_stmt_vec_info (info);
775	stmt_vec_infos.release ();
776	}
777
778	/ Free STMT_INFO. /
779
780	void
781	vec_info::free_stmt_vec_info (stmt_vec_info stmt_info)
782	{
783	if (stmt_info->pattern_stmt_p)
784	{
785	gimple_set_bb (stmt_info->stmt, NULL);
786	tree lhs = gimple_get_lhs (stmt_info->stmt);
787	if (lhs && TREE_CODE (lhs) == SSA_NAME)
788	release_ssa_name (name: lhs);
789	}
790
791	stmt_info->reduc_initial_values.release ();
792	stmt_info->reduc_scalar_results.release ();
793	STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
794	STMT_VINFO_VEC_STMTS (stmt_info).release ();
795	free (ptr: stmt_info);
796	}
797
798	/ Returns true if S1 dominates S2. /
799
800	bool
801	vect_stmt_dominates_stmt_p (gimple s1, gimple s2)
802	{
803	basic_block bb1 = gimple_bb (g: s1), bb2 = gimple_bb (g: s2);
804
805	/ If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)*
806	SSA_NAME. Assume it lives at the beginning of function and
807	thus dominates everything. /*
808	if (!bb1 \|\| s1 == s2)
809	return true;
810
811	/ If bb2 is NULL, it doesn't dominate any stmt with a bb. /
812	if (!bb2)
813	return false;
814
815	if (bb1 != bb2)
816	return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
817
818	/ PHIs in the same basic block are assumed to be*
819	executed all in parallel, if only one stmt is a PHI,
820	it dominates the other stmt in the same basic block. /*
821	if (gimple_code (g: s1) == GIMPLE_PHI)
822	return true;
823
824	if (gimple_code (g: s2) == GIMPLE_PHI)
825	return false;
826
827	/ Inserted vectorized stmts all have UID 0 while the original stmts*
828	in the IL have UID increasing within a BB. Walk from both sides
829	until we find the other stmt or a stmt with UID != 0. /*
830	gimple_stmt_iterator gsi1 = gsi_for_stmt (s1);
831	while (gimple_uid (g: gsi_stmt (i: gsi1)) == `0`)
832	{
833	gsi_next (i: &gsi1);
834	if (gsi_end_p (i: gsi1))
835	return false;
836	if (gsi_stmt (i: gsi1) == s2)
837	return true;
838	}
839	if (gimple_uid (g: gsi_stmt (i: gsi1)) == -`1u`)
840	return false;
841
842	gimple_stmt_iterator gsi2 = gsi_for_stmt (s2);
843	while (gimple_uid (g: gsi_stmt (i: gsi2)) == `0`)
844	{
845	gsi_prev (i: &gsi2);
846	if (gsi_end_p (i: gsi2))
847	return false;
848	if (gsi_stmt (i: gsi2) == s1)
849	return true;
850	}
851	if (gimple_uid (g: gsi_stmt (i: gsi2)) == -`1u`)
852	return false;
853
854	if (gimple_uid (g: gsi_stmt (i: gsi1)) <= gimple_uid (g: gsi_stmt (i: gsi2)))
855	return true;
856	return false;
857	}
858
859	/ A helper function to free scev and LOOP niter information, as well as*
860	clear loop constraint LOOP_C_FINITE. /*
861
862	void
863	vect_free_loop_info_assumptions (class loop *loop)
864	{
865	scev_reset_htab ();
866	/ We need to explicitly reset upper bound information since they are*
867	used even after free_numbers_of_iterations_estimates. /*
868	loop->any_upper_bound = false;
869	loop->any_likely_upper_bound = false;
870	free_numbers_of_iterations_estimates (loop);
871	loop_constraint_clear (loop, LOOP_C_FINITE);
872	}
873
874	/ If LOOP has been versioned during ifcvt, return the internal call*
875	guarding it. /*
876
877	gimple *
878	vect_loop_vectorized_call (class loop loop, gcond *cond)
879	{
880	basic_block bb = loop_preheader_edge (loop)->src;
881	gimple *g;
882	do
883	{
884	g = *gsi_last_bb (bb);
885	if ((g && gimple_code (g) == GIMPLE_COND)
886	\|\| !single_succ_p (bb))
887	break;
888	if (!single_pred_p (bb))
889	break;
890	bb = single_pred (bb);
891	}
892	while (`1`);
893	if (g && gimple_code (g) == GIMPLE_COND)
894	{
895	if (cond)
896	cond = as_a <gcond > (p: g);
897	gimple_stmt_iterator gsi = gsi_for_stmt (g);
898	gsi_prev (i: &gsi);
899	if (!gsi_end_p (i: gsi))
900	{
901	g = gsi_stmt (i: gsi);
902	if (gimple_call_internal_p (gs: g, fn: IFN_LOOP_VECTORIZED)
903	&& (tree_to_shwi (gimple_call_arg (gs: g, index: `0`)) == loop->num
904	\|\| tree_to_shwi (gimple_call_arg (gs: g, index: `1`)) == loop->num))
905	return g;
906	}
907	}
908	return NULL;
909	}
910
911	/ If LOOP has been versioned during loop distribution, return the gurading*
912	internal call. /*
913
914	static gimple *
915	vect_loop_dist_alias_call (class loop loop, function fun)
916	{
917	basic_block bb;
918	basic_block entry;
919	class loop outer, orig;
920
921	if (loop->orig_loop_num == `0`)
922	return NULL;
923
924	orig = get_loop (fn: fun, num: loop->orig_loop_num);
925	if (orig == NULL)
926	{
927	/ The original loop is somehow destroyed. Clear the information. /
928	loop->orig_loop_num = `0`;
929	return NULL;
930	}
931
932	if (loop != orig)
933	bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
934	else
935	bb = loop_preheader_edge (loop)->src;
936
937	outer = bb->loop_father;
938	entry = ENTRY_BLOCK_PTR_FOR_FN (fun);
939
940	/ Look upward in dominance tree. /
941	for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
942	bb = get_immediate_dominator (CDI_DOMINATORS, bb))
943	{
944	gimple_stmt_iterator gsi = gsi_last_bb (bb);
945	if (!safe_is_a <gcond > (p: gsi))
946	continue;
947
948	gsi_prev (i: &gsi);
949	if (gsi_end_p (i: gsi))
950	continue;
951
952	gimple *g = gsi_stmt (i: gsi);
953	/ The guarding internal function call must have the same distribution*
954	alias id. /*
955	if (gimple_call_internal_p (gs: g, fn: IFN_LOOP_DIST_ALIAS)
956	&& (tree_to_shwi (gimple_call_arg (gs: g, index: `0`)) == loop->orig_loop_num))
957	return g;
958	}
959	return NULL;
960	}
961
962	/ Set the uids of all the statements in basic blocks inside loop*
963	represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
964	call guarding the loop which has been if converted. /*
965	static void
966	set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call,
967	function *fun)
968	{
969	tree arg = gimple_call_arg (gs: loop_vectorized_call, index: `1`);
970	basic_block *bbs;
971	unsigned int i;
972	class loop *scalar_loop = get_loop (fn: fun, num: tree_to_shwi (arg));
973
974	LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
975	LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo)
976	= vec_init_loop_exit_info (scalar_loop);
977	gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
978	== loop_vectorized_call);
979	/ If we are going to vectorize outer loop, prevent vectorization*
980	of the inner loop in the scalar loop - either the scalar loop is
981	thrown away, so it is a wasted work, or is used only for
982	a few iterations. /*
983	if (scalar_loop->inner)
984	{
985	gimple *g = vect_loop_vectorized_call (loop: scalar_loop->inner);
986	if (g)
987	{
988	arg = gimple_call_arg (gs: g, index: `0`);
989	get_loop (fn: fun, num: tree_to_shwi (arg))->dont_vectorize = true;
990	fold_loop_internal_call (g, boolean_false_node);
991	}
992	}
993	bbs = get_loop_body (scalar_loop);
994	for (i = `0`; i < scalar_loop->num_nodes; i++)
995	{
996	basic_block bb = bbs[i];
997	gimple_stmt_iterator gsi;
998	for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
999	{
1000	gimple *phi = gsi_stmt (i: gsi);
1001	gimple_set_uid (g: phi, uid: `0`);
1002	}
1003	for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
1004	{
1005	gimple *stmt = gsi_stmt (i: gsi);
1006	gimple_set_uid (g: stmt, uid: `0`);
1007	}
1008	}
1009	free (ptr: bbs);
1010	}
1011
1012	/ Generate vectorized code for LOOP and its epilogues. /
1013
1014	static unsigned
1015	vect_transform_loops (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1016	loop_p loop, gimple *loop_vectorized_call,
1017	function *fun)
1018	{
1019	loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1020
1021	if (loop_vectorized_call)
1022	set_uid_loop_bbs (loop_vinfo, loop_vectorized_call, fun);
1023
1024	unsigned HOST_WIDE_INT bytes;
1025	if (dump_enabled_p ())
1026	{
1027	if (GET_MODE_SIZE (mode: loop_vinfo->vector_mode).is_constant (const_value: &bytes))
1028	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1029	"%sloop vectorized using %s%wu byte vectors and"
1030	" unroll factor %u\n",
1031	LOOP_VINFO_EPILOGUE_P (loop_vinfo)
1032	? "epilogue " : "",
1033	LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
1034	? "masked " : "", bytes,
1035	(unsigned int) LOOP_VINFO_VECT_FACTOR
1036	(loop_vinfo).to_constant ());
1037	else
1038	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1039	"%sloop vectorized using variable length vectors\n",
1040	LOOP_VINFO_EPILOGUE_P (loop_vinfo)
1041	? "epilogue " : "");
1042	}
1043
1044	loop_p new_loop = vect_transform_loop (loop_vinfo,
1045	loop_vectorized_call);
1046	/ Now that the loop has been vectorized, allow it to be unrolled*
1047	etc. /*
1048	loop->force_vectorize = false;
1049
1050	if (loop->simduid)
1051	{
1052	simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
1053	if (!simduid_to_vf_htab)
1054	simduid_to_vf_htab = new hash_table<simduid_to_vf> (`15`);
1055	simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
1056	simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
1057	*simduid_to_vf_htab->find_slot (value: simduid_to_vf_data, insert: INSERT)
1058	= simduid_to_vf_data;
1059	}
1060
1061	/ We should not have to update virtual SSA form here but some*
1062	transforms involve creating new virtual definitions which makes
1063	updating difficult.
1064	We delay the actual update to the end of the pass but avoid
1065	confusing ourselves by forcing need_ssa_update_p () to false. /*
1066	unsigned todo = `0`;
1067	if (need_ssa_update_p (cfun))
1068	{
1069	gcc_assert (loop_vinfo->any_known_not_updated_vssa);
1070	fun->gimple_df->ssa_renaming_needed = false;
1071	todo \|= TODO_update_ssa_only_virtuals;
1072	}
1073	gcc_assert (!need_ssa_update_p (cfun));
1074
1075	/ Epilogue of vectorized loop must be vectorized too. /
1076	if (new_loop)
1077	todo \|= vect_transform_loops (simduid_to_vf_htab, loop: new_loop, NULL, fun);
1078
1079	return todo;
1080	}
1081
1082	/ Try to vectorize LOOP. /
1083
1084	static unsigned
1085	try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1086	unsigned *num_vectorized_loops, loop_p loop,
1087	gimple *loop_vectorized_call,
1088	gimple *loop_dist_alias_call,
1089	function *fun)
1090	{
1091	unsigned ret = `0`;
1092	vec_info_shared shared;
1093	auto_purge_vect_location sentinel;
1094	vect_location = find_loop_location (loop);
1095
1096	if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
1097	&& dump_enabled_p ())
1098	dump_printf (MSG_NOTE \| MSG_PRIORITY_INTERNALS,
1099	"\nAnalyzing loop at %s:%d\n",
1100	LOCATION_FILE (vect_location.get_location_t ()),
1101	LOCATION_LINE (vect_location.get_location_t ()));
1102
1103	/ Try to analyze the loop, retaining an opt_problem if dump_enabled_p. /
1104	opt_loop_vec_info loop_vinfo = vect_analyze_loop (loop, loop_vectorized_call,
1105	&shared);
1106	loop->aux = loop_vinfo;
1107
1108	if (!loop_vinfo)
1109	if (dump_enabled_p ())
1110	if (opt_problem *problem = loop_vinfo.get_problem ())
1111	{
1112	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1113	"couldn't vectorize loop\n");
1114	problem->emit_and_clear ();
1115	}
1116
1117	if (!loop_vinfo \|\| !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1118	{
1119	/ Free existing information if loop is analyzed with some*
1120	assumptions. /*
1121	if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1122	vect_free_loop_info_assumptions (loop);
1123
1124	/ If we applied if-conversion then try to vectorize the*
1125	BB of innermost loops.
1126	??? Ideally BB vectorization would learn to vectorize
1127	control flow by applying if-conversion on-the-fly, the
1128	following retains the if-converted loop body even when
1129	only non-if-converted parts took part in BB vectorization. /*
1130	if (flag_tree_slp_vectorize != `0`
1131	&& loop_vectorized_call
1132	&& ! loop->inner)
1133	{
1134	basic_block bb = loop->header;
1135	bool require_loop_vectorize = false;
1136	for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1137	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
1138	{
1139	gimple *stmt = gsi_stmt (i: gsi);
1140	gcall call = dyn_cast <gcall > (p: stmt);
1141	if (call && gimple_call_internal_p (gs: call))
1142	{
1143	internal_fn ifn = gimple_call_internal_fn (gs: call);
1144	if (ifn == IFN_MASK_LOAD
1145	\|\| ifn == IFN_MASK_STORE
1146	\|\| ifn == IFN_MASK_CALL
1147	/ Don't keep the if-converted parts when the ifn with*
1148	specifc type is not supported by the backend. /*
1149	\|\| (direct_internal_fn_p (fn: ifn)
1150	&& !direct_internal_fn_supported_p
1151	(call, OPTIMIZE_FOR_SPEED)))
1152	{
1153	require_loop_vectorize = true;
1154	break;
1155	}
1156	}
1157	gimple_set_uid (g: stmt, uid: -`1`);
1158	gimple_set_visited (stmt, visited_p: false);
1159	}
1160	if (!require_loop_vectorize)
1161	{
1162	tree arg = gimple_call_arg (gs: loop_vectorized_call, index: `1`);
1163	class loop *scalar_loop = get_loop (fn: fun, num: tree_to_shwi (arg));
1164	if (vect_slp_if_converted_bb (bb, orig_loop: scalar_loop))
1165	{
1166	fold_loop_internal_call (loop_vectorized_call,
1167	boolean_true_node);
1168	loop_vectorized_call = NULL;
1169	ret \|= TODO_cleanup_cfg \| TODO_update_ssa_only_virtuals;
1170	}
1171	}
1172	}
1173	/ If outer loop vectorization fails for LOOP_VECTORIZED guarded*
1174	loop, don't vectorize its inner loop; we'll attempt to
1175	vectorize LOOP_VECTORIZED guarded inner loop of the scalar
1176	loop version. /*
1177	if (loop_vectorized_call && loop->inner)
1178	loop->inner->dont_vectorize = true;
1179	return ret;
1180	}
1181
1182	if (!dbg_cnt (index: vect_loop))
1183	{
1184	/ Free existing information if loop is analyzed with some*
1185	assumptions. /*
1186	if (loop_constraint_set_p (loop, LOOP_C_FINITE))
1187	vect_free_loop_info_assumptions (loop);
1188	return ret;
1189	}
1190
1191	(*num_vectorized_loops)++;
1192	/ Transform LOOP and its epilogues. /
1193	ret \|= vect_transform_loops (simduid_to_vf_htab, loop,
1194	loop_vectorized_call, fun);
1195
1196	if (loop_vectorized_call)
1197	{
1198	fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
1199	ret \|= TODO_cleanup_cfg;
1200	}
1201	if (loop_dist_alias_call)
1202	{
1203	tree value = gimple_call_arg (gs: loop_dist_alias_call, index: `1`);
1204	fold_loop_internal_call (loop_dist_alias_call, value);
1205	ret \|= TODO_cleanup_cfg;
1206	}
1207
1208	return ret;
1209	}
1210
1211	/ Try to vectorize LOOP. /
1212
1213	static unsigned
1214	try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
1215	unsigned *num_vectorized_loops, loop_p loop,
1216	function *fun)
1217	{
1218	if (!((flag_tree_loop_vectorize
1219	&& optimize_loop_nest_for_speed_p (loop))
1220	\|\| loop->force_vectorize))
1221	return `0`;
1222
1223	return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
1224	loop_vectorized_call: vect_loop_vectorized_call (loop),
1225	loop_dist_alias_call: vect_loop_dist_alias_call (loop, fun), fun);
1226	}
1227
1228
1229	/ Loop autovectorization. /
1230
1231	namespace {
1232
1233	const pass_data pass_data_vectorize =
1234	{
1235	.type: GIMPLE_PASS, / type /
1236	.name: "vect", / name /
1237	.optinfo_flags: OPTGROUP_LOOP \| OPTGROUP_VEC, / optinfo_flags /
1238	.tv_id: TV_TREE_VECTORIZATION, / tv_id /
1239	.properties_required: ( PROP_cfg \| PROP_ssa ), / properties_required /
1240	.properties_provided: `0`, / properties_provided /
1241	.properties_destroyed: `0`, / properties_destroyed /
1242	.todo_flags_start: `0`, / todo_flags_start /
1243	.todo_flags_finish: `0`, / todo_flags_finish /
1244	};
1245
1246	class pass_vectorize : public gimple_opt_pass
1247	{
1248	public:
1249	pass_vectorize (gcc::context *ctxt)
1250	: gimple_opt_pass (pass_data_vectorize, ctxt)
1251	{}
1252
1253	/ opt_pass methods: /
1254	bool gate (function *fun) final override
1255	{
1256	return flag_tree_loop_vectorize \|\| fun->has_force_vectorize_loops;
1257	}
1258
1259	unsigned int execute (function *) final override;
1260
1261	}; // class pass_vectorize
1262
1263	/ Function vectorize_loops.*
1264
1265	Entry point to loop vectorization phase. /*
1266
1267	unsigned
1268	pass_vectorize::execute (function *fun)
1269	{
1270	unsigned int i;
1271	unsigned int num_vectorized_loops = `0`;
1272	unsigned int vect_loops_num;
1273	hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
1274	hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1275	bool any_ifcvt_loops = false;
1276	unsigned ret = `0`;
1277
1278	vect_loops_num = number_of_loops (fn: fun);
1279
1280	/ Bail out if there are no loops. /
1281	if (vect_loops_num <= `1`)
1282	return `0`;
1283
1284	vect_slp_init ();
1285
1286	if (fun->has_simduid_loops)
1287	note_simd_array_uses (htab: &simd_array_to_simduid_htab, fun);
1288
1289	/ ----------- Analyze loops. ----------- /
1290
1291	/ If some loop was duplicated, it gets bigger number*
1292	than all previously defined loops. This fact allows us to run
1293	only over initial loops skipping newly generated ones. /*
1294	for (auto loop : loops_list (fun, `0`))
1295	if (loop->dont_vectorize)
1296	{
1297	any_ifcvt_loops = true;
1298	/ If-conversion sometimes versions both the outer loop*
1299	(for the case when outer loop vectorization might be
1300	desirable) as well as the inner loop in the scalar version
1301	of the loop. So we have:
1302	if (LOOP_VECTORIZED (1, 3))
1303	{
1304	loop1
1305	loop2
1306	}
1307	else
1308	loop3 (copy of loop1)
1309	if (LOOP_VECTORIZED (4, 5))
1310	loop4 (copy of loop2)
1311	else
1312	loop5 (copy of loop4)
1313	If loops' iteration gives us loop3 first (which has
1314	dont_vectorize set), make sure to process loop1 before loop4;
1315	so that we can prevent vectorization of loop4 if loop1
1316	is successfully vectorized. /*
1317	if (loop->inner)
1318	{
1319	gimple *loop_vectorized_call
1320	= vect_loop_vectorized_call (loop);
1321	if (loop_vectorized_call
1322	&& vect_loop_vectorized_call (loop: loop->inner))
1323	{
1324	tree arg = gimple_call_arg (gs: loop_vectorized_call, index: `0`);
1325	class loop *vector_loop
1326	= get_loop (fn: fun, num: tree_to_shwi (arg));
1327	if (vector_loop && vector_loop != loop)
1328	{
1329	/ Make sure we don't vectorize it twice. /
1330	vector_loop->dont_vectorize = true;
1331	ret \|= try_vectorize_loop (simduid_to_vf_htab,
1332	num_vectorized_loops: &num_vectorized_loops,
1333	loop: vector_loop, fun);
1334	}
1335	}
1336	}
1337	}
1338	else
1339	ret \|= try_vectorize_loop (simduid_to_vf_htab, num_vectorized_loops: &num_vectorized_loops,
1340	loop, fun);
1341
1342	vect_location = dump_user_location_t ();
1343
1344	statistics_counter_event (fun, "Vectorized loops", num_vectorized_loops);
1345	if (dump_enabled_p ()
1346	\|\| (num_vectorized_loops > `0` && dump_enabled_p ()))
1347	dump_printf_loc (MSG_NOTE, vect_location,
1348	"vectorized %u loops in function.\n",
1349	num_vectorized_loops);
1350
1351	/ ----------- Finalize. ----------- /
1352
1353	if (any_ifcvt_loops)
1354	for (i = `1`; i < number_of_loops (fn: fun); i++)
1355	{
1356	class loop *loop = get_loop (fn: fun, num: i);
1357	if (loop && loop->dont_vectorize)
1358	{
1359	gimple *g = vect_loop_vectorized_call (loop);
1360	if (g)
1361	{
1362	fold_loop_internal_call (g, boolean_false_node);
1363	loop->dont_vectorize = false;
1364	ret \|= TODO_cleanup_cfg;
1365	g = NULL;
1366	}
1367	else
1368	g = vect_loop_dist_alias_call (loop, fun);
1369
1370	if (g)
1371	{
1372	fold_loop_internal_call (g, boolean_false_node);
1373	loop->dont_vectorize = false;
1374	ret \|= TODO_cleanup_cfg;
1375	}
1376	}
1377	}
1378
1379	/ Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. /
1380	if (fun->has_simduid_loops)
1381	{
1382	adjust_simduid_builtins (htab: simduid_to_vf_htab, fun);
1383	/ Avoid stale SCEV cache entries for the SIMD_LANE defs. /
1384	scev_reset ();
1385	}
1386	/ Shrink any "omp array simd" temporary arrays to the*
1387	actual vectorization factors. /*
1388	if (simd_array_to_simduid_htab)
1389	shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
1390	delete simduid_to_vf_htab;
1391	fun->has_simduid_loops = false;
1392
1393	if (num_vectorized_loops > `0`)
1394	{
1395	/ We are collecting some corner cases where we need to update*
1396	virtual SSA form via the TODO but delete the queued update-SSA
1397	state. Force renaming if we think that might be necessary. /*
1398	if (ret & TODO_update_ssa_only_virtuals)
1399	mark_virtual_operands_for_renaming (cfun);
1400	/ If we vectorized any loop only virtual SSA form needs to be updated.*
1401	??? Also while we try hard to update loop-closed SSA form we fail
1402	to properly do this in some corner-cases (see PR56286). /*
1403	rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
1404	ret \|= TODO_cleanup_cfg;
1405	}
1406
1407	for (i = `1`; i < number_of_loops (fn: fun); i++)
1408	{
1409	loop_vec_info loop_vinfo;
1410	bool has_mask_store;
1411
1412	class loop *loop = get_loop (fn: fun, num: i);
1413	if (!loop \|\| !loop->aux)
1414	continue;
1415	loop_vinfo = (loop_vec_info) loop->aux;
1416	has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
1417	delete loop_vinfo;
1418	if (has_mask_store
1419	&& targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
1420	optimize_mask_stores (loop);
1421
1422	auto_bitmap exit_bbs;
1423	/ Perform local CSE, this esp. helps because we emit code for*
1424	predicates that need to be shared for optimal predicate usage.
1425	However reassoc will re-order them and prevent CSE from working
1426	as it should. CSE only the loop body, not the entry. /*
1427	auto_vec<edge> exits = get_loop_exit_edges (loop);
1428	for (edge exit : exits)
1429	bitmap_set_bit (exit_bbs, exit->dest->index);
1430
1431	edge entry = EDGE_PRED (loop_preheader_edge (loop)->src, `0`);
1432	do_rpo_vn (fun, entry, exit_bbs);
1433
1434	loop->aux = NULL;
1435	}
1436
1437	vect_slp_fini ();
1438
1439	return ret;
1440	}
1441
1442	} // anon namespace
1443
1444	gimple_opt_pass *
1445	make_pass_vectorize (gcc::context *ctxt)
1446	{
1447	return new pass_vectorize (ctxt);
1448	}
1449
1450	/ Entry point to the simduid cleanup pass. /
1451
1452	namespace {
1453
1454	const pass_data pass_data_simduid_cleanup =
1455	{
1456	.type: GIMPLE_PASS, / type /
1457	.name: "simduid", / name /
1458	.optinfo_flags: OPTGROUP_NONE, / optinfo_flags /
1459	.tv_id: TV_NONE, / tv_id /
1460	.properties_required: ( PROP_ssa \| PROP_cfg ), / properties_required /
1461	.properties_provided: `0`, / properties_provided /
1462	.properties_destroyed: `0`, / properties_destroyed /
1463	.todo_flags_start: `0`, / todo_flags_start /
1464	.todo_flags_finish: `0`, / todo_flags_finish /
1465	};
1466
1467	class pass_simduid_cleanup : public gimple_opt_pass
1468	{
1469	public:
1470	pass_simduid_cleanup (gcc::context *ctxt)
1471	: gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
1472	{}
1473
1474	/ opt_pass methods: /
1475	opt_pass * clone () final override
1476	{
1477	return new pass_simduid_cleanup (m_ctxt);
1478	}
1479	bool gate (function fun) final override { return* fun->has_simduid_loops; }
1480	unsigned int execute (function *) final override;
1481
1482	}; // class pass_simduid_cleanup
1483
1484	unsigned int
1485	pass_simduid_cleanup::execute (function *fun)
1486	{
1487	hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
1488
1489	note_simd_array_uses (htab: &simd_array_to_simduid_htab, fun);
1490
1491	/ Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. /
1492	adjust_simduid_builtins (NULL, fun);
1493
1494	/ Shrink any "omp array simd" temporary arrays to the*
1495	actual vectorization factors. /*
1496	if (simd_array_to_simduid_htab)
1497	shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
1498	fun->has_simduid_loops = false;
1499	return `0`;
1500	}
1501
1502	} // anon namespace
1503
1504	gimple_opt_pass *
1505	make_pass_simduid_cleanup (gcc::context *ctxt)
1506	{
1507	return new pass_simduid_cleanup (ctxt);
1508	}
1509
1510
1511	/ Entry point to basic block SLP phase. /
1512
1513	namespace {
1514
1515	const pass_data pass_data_slp_vectorize =
1516	{
1517	.type: GIMPLE_PASS, / type /
1518	.name: "slp", / name /
1519	.optinfo_flags: OPTGROUP_LOOP \| OPTGROUP_VEC, / optinfo_flags /
1520	.tv_id: TV_TREE_SLP_VECTORIZATION, / tv_id /
1521	.properties_required: ( PROP_ssa \| PROP_cfg ), / properties_required /
1522	.properties_provided: `0`, / properties_provided /
1523	.properties_destroyed: `0`, / properties_destroyed /
1524	.todo_flags_start: `0`, / todo_flags_start /
1525	TODO_update_ssa, / todo_flags_finish /
1526	};
1527
1528	class pass_slp_vectorize : public gimple_opt_pass
1529	{
1530	public:
1531	pass_slp_vectorize (gcc::context *ctxt)
1532	: gimple_opt_pass (pass_data_slp_vectorize, ctxt)
1533	{}
1534
1535	/ opt_pass methods: /
1536	opt_pass * clone () final override { return new pass_slp_vectorize (m_ctxt); }
1537	bool gate (function ) final override { return* flag_tree_slp_vectorize != `0`; }
1538	unsigned int execute (function *) final override;
1539
1540	}; // class pass_slp_vectorize
1541
1542	unsigned int
1543	pass_slp_vectorize::execute (function *fun)
1544	{
1545	auto_purge_vect_location sentinel;
1546	basic_block bb;
1547
1548	bool in_loop_pipeline = scev_initialized_p ();
1549	if (!in_loop_pipeline)
1550	{
1551	loop_optimizer_init (LOOPS_NORMAL);
1552	scev_initialize ();
1553	}
1554
1555	/ Mark all stmts as not belonging to the current region and unvisited. /
1556	FOR_EACH_BB_FN (bb, fun)
1557	{
1558	for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
1559	gsi_next (i: &gsi))
1560	{
1561	gphi *stmt = gsi.phi ();
1562	gimple_set_uid (g: stmt, uid: -`1`);
1563	gimple_set_visited (stmt, visited_p: false);
1564	}
1565	for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);
1566	gsi_next (i: &gsi))
1567	{
1568	gimple *stmt = gsi_stmt (i: gsi);
1569	gimple_set_uid (g: stmt, uid: -`1`);
1570	gimple_set_visited (stmt, visited_p: false);
1571	}
1572	}
1573
1574	vect_slp_init ();
1575
1576	vect_slp_function (fun);
1577
1578	vect_slp_fini ();
1579
1580	if (!in_loop_pipeline)
1581	{
1582	scev_finalize ();
1583	loop_optimizer_finalize ();
1584	}
1585
1586	return `0`;
1587	}
1588
1589	} // anon namespace
1590
1591	gimple_opt_pass *
1592	make_pass_slp_vectorize (gcc::context *ctxt)
1593	{
1594	return new pass_slp_vectorize (ctxt);
1595	}
1596
1597
1598	/ Increase alignment of global arrays to improve vectorization potential.*
1599	TODO:
1600	- Consider also structs that have an array field.
1601	- Use ipa analysis to prune arrays that can't be vectorized?
1602	This should involve global alignment analysis and in the future also
1603	array padding. /*
1604
1605	static unsigned get_vec_alignment_for_type (tree);
1606	static hash_map<tree, unsigned> *type_align_map;
1607
1608	/ Return alignment of array's vector type corresponding to scalar type.*
1609	0 if no vector type exists. /*
1610	static unsigned
1611	get_vec_alignment_for_array_type (tree type)
1612	{
1613	gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1614	poly_uint64 array_size, vector_size;
1615
1616	tree scalar_type = strip_array_types (type);
1617	tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
1618	if (!vectype
1619	\|\| !poly_int_tree_p (TYPE_SIZE (type), value: &array_size)
1620	\|\| !poly_int_tree_p (TYPE_SIZE (vectype), value: &vector_size)
1621	\|\| maybe_lt (a: array_size, b: vector_size))
1622	return `0`;
1623
1624	return TYPE_ALIGN (vectype);
1625	}
1626
1627	/ Return alignment of field having maximum alignment of vector type*
1628	corresponding to it's scalar type. For now, we only consider fields whose
1629	offset is a multiple of it's vector alignment.
1630	0 if no suitable field is found. /*
1631	static unsigned
1632	get_vec_alignment_for_record_type (tree type)
1633	{
1634	gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1635
1636	unsigned max_align = `0`, alignment;
1637	HOST_WIDE_INT offset;
1638	tree offset_tree;
1639
1640	if (TYPE_PACKED (type))
1641	return `0`;
1642
1643	unsigned *slot = type_align_map->get (k: type);
1644	if (slot)
1645	return *slot;
1646
1647	for (tree field = first_field (type);
1648	field != NULL_TREE;
1649	field = DECL_CHAIN (field))
1650	{
1651	/ Skip if not FIELD_DECL or if alignment is set by user. /
1652	if (TREE_CODE (field) != FIELD_DECL
1653	\|\| DECL_USER_ALIGN (field)
1654	\|\| DECL_ARTIFICIAL (field))
1655	continue;
1656
1657	/ We don't need to process the type further if offset is variable,*
1658	since the offsets of remaining members will also be variable. /*
1659	if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1660	\|\| TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1661	break;
1662
1663	/ Similarly stop processing the type if offset_tree*
1664	does not fit in unsigned HOST_WIDE_INT. /*
1665	offset_tree = bit_position (field);
1666	if (!tree_fits_uhwi_p (offset_tree))
1667	break;
1668
1669	offset = tree_to_uhwi (offset_tree);
1670	alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1671
1672	/ Get maximum alignment of vectorized field/array among those members*
1673	whose offset is multiple of the vector alignment. /*
1674	if (alignment
1675	&& (offset % alignment == `0`)
1676	&& (alignment > max_align))
1677	max_align = alignment;
1678	}
1679
1680	type_align_map->put (k: type, v: max_align);
1681	return max_align;
1682	}
1683
1684	/ Return alignment of vector type corresponding to decl's scalar type*
1685	or 0 if it doesn't exist or the vector alignment is lesser than
1686	decl's alignment. /*
1687	static unsigned
1688	get_vec_alignment_for_type (tree type)
1689	{
1690	if (type == NULL_TREE)
1691	return `0`;
1692
1693	gcc_assert (TYPE_P (type));
1694
1695	static unsigned alignment = `0`;
1696	switch (TREE_CODE (type))
1697	{
1698	case ARRAY_TYPE:
1699	alignment = get_vec_alignment_for_array_type (type);
1700	break;
1701	case RECORD_TYPE:
1702	alignment = get_vec_alignment_for_record_type (type);
1703	break;
1704	default:
1705	alignment = `0`;
1706	break;
1707	}
1708
1709	return (alignment > TYPE_ALIGN (type)) ? alignment : `0`;
1710	}
1711
1712	/ Entry point to increase_alignment pass. /
1713	static unsigned int
1714	increase_alignment (void)
1715	{
1716	varpool_node *vnode;
1717
1718	vect_location = dump_user_location_t ();
1719	type_align_map = new hash_map<tree, unsigned>;
1720
1721	/ Increase the alignment of all global arrays for vectorization. /
1722	FOR_EACH_DEFINED_VARIABLE (vnode)
1723	{
1724	tree decl = vnode->decl;
1725	unsigned int alignment;
1726
1727	if ((decl_in_symtab_p (decl)
1728	&& !symtab_node::get (decl)->can_increase_alignment_p ())
1729	\|\| DECL_USER_ALIGN (decl) \|\| DECL_ARTIFICIAL (decl))
1730	continue;
1731
1732	alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1733	if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1734	{
1735	vnode->increase_alignment (align: alignment);
1736	if (dump_enabled_p ())
1737	dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl);
1738	}
1739	}
1740
1741	delete type_align_map;
1742	return `0`;
1743	}
1744
1745
1746	namespace {
1747
1748	const pass_data pass_data_ipa_increase_alignment =
1749	{
1750	.type: SIMPLE_IPA_PASS, / type /
1751	.name: "increase_alignment", / name /
1752	.optinfo_flags: OPTGROUP_LOOP \| OPTGROUP_VEC, / optinfo_flags /
1753	.tv_id: TV_IPA_OPT, / tv_id /
1754	.properties_required: `0`, / properties_required /
1755	.properties_provided: `0`, / properties_provided /
1756	.properties_destroyed: `0`, / properties_destroyed /
1757	.todo_flags_start: `0`, / todo_flags_start /
1758	.todo_flags_finish: `0`, / todo_flags_finish /
1759	};
1760
1761	class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1762	{
1763	public:
1764	pass_ipa_increase_alignment (gcc::context *ctxt)
1765	: simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1766	{}
1767
1768	/ opt_pass methods: /
1769	bool gate (function *) final override
1770	{
1771	return flag_section_anchors && flag_tree_loop_vectorize;
1772	}
1773
1774	unsigned int execute (function *) final override
1775	{
1776	return increase_alignment ();
1777	}
1778
1779	}; // class pass_ipa_increase_alignment
1780
1781	} // anon namespace
1782
1783	simple_ipa_opt_pass *
1784	make_pass_ipa_increase_alignment (gcc::context *ctxt)
1785	{
1786	return new pass_ipa_increase_alignment (ctxt);
1787	}
1788
1789	/ If the condition represented by T is a comparison or the SSA name*
1790	result of a comparison, extract the comparison's operands. Represent
1791	T as NE_EXPR <T, 0> otherwise. /*
1792
1793	void
1794	scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
1795	{
1796	if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
1797	{
1798	this->code = TREE_CODE (t);
1799	this->op0 = TREE_OPERAND (t, `0`);
1800	this->op1 = TREE_OPERAND (t, `1`);
1801	this->inverted_p = false;
1802	return;
1803	}
1804
1805	if (TREE_CODE (t) == SSA_NAME)
1806	if (gassign stmt = dyn_cast<gassign > (SSA_NAME_DEF_STMT (t)))
1807	{
1808	tree_code code = gimple_assign_rhs_code (gs: stmt);
1809	if (TREE_CODE_CLASS (code) == tcc_comparison)
1810	{
1811	this->code = code;
1812	this->op0 = gimple_assign_rhs1 (gs: stmt);
1813	this->op1 = gimple_assign_rhs2 (gs: stmt);
1814	this->inverted_p = false;
1815	return;
1816	}
1817	else if (code == BIT_NOT_EXPR)
1818	{
1819	tree n_op = gimple_assign_rhs1 (gs: stmt);
1820	if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
1821	{
1822	code = gimple_assign_rhs_code (gs: stmt);
1823	if (TREE_CODE_CLASS (code) == tcc_comparison)
1824	{
1825	this->code = code;
1826	this->op0 = gimple_assign_rhs1 (gs: stmt);
1827	this->op1 = gimple_assign_rhs2 (gs: stmt);
1828	this->inverted_p = true;
1829	return;
1830	}
1831	}
1832	}
1833	}
1834
1835	this->code = NE_EXPR;
1836	this->op0 = t;
1837	this->op1 = build_zero_cst (TREE_TYPE (t));
1838	this->inverted_p = false;
1839	}
1840
1841	/ See the comment above the declaration for details. /
1842
1843	unsigned int
1844	vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
1845	stmt_vec_info stmt_info, slp_tree,
1846	tree vectype, int misalign,
1847	vect_cost_model_location where)
1848	{
1849	unsigned int cost
1850	= builtin_vectorization_cost (type_of_cost: kind, vectype, misalign) * count;
1851	return record_stmt_cost (stmt_info, where, cost);
1852	}
1853
1854	/ See the comment above the declaration for details. /
1855
1856	void
1857	vector_costs::finish_cost (const vector_costs *)
1858	{
1859	gcc_assert (!m_finished);
1860	m_finished = true;
1861	}
1862
1863	/ Record a base cost of COST units against WHERE. If STMT_INFO is*
1864	nonnull, use it to adjust the cost based on execution frequency
1865	(where appropriate). /*
1866
1867	unsigned int
1868	vector_costs::record_stmt_cost (stmt_vec_info stmt_info,
1869	vect_cost_model_location where,
1870	unsigned int cost)
1871	{
1872	cost = adjust_cost_for_freq (stmt_info, where, cost);
1873	m_costs[where] += cost;
1874	return cost;
1875	}
1876
1877	/ COST is the base cost we have calculated for an operation in location WHERE.*
1878	If STMT_INFO is nonnull, use it to adjust the cost based on execution
1879	frequency (where appropriate). Return the adjusted cost. /*
1880
1881	unsigned int
1882	vector_costs::adjust_cost_for_freq (stmt_vec_info stmt_info,
1883	vect_cost_model_location where,
1884	unsigned int cost)
1885	{
1886	/ Statements in an inner loop relative to the loop being*
1887	vectorized are weighted more heavily. The value here is
1888	arbitrary and could potentially be improved with analysis. /*
1889	if (where == vect_body
1890	&& stmt_info
1891	&& stmt_in_inner_loop_p (m_vinfo, stmt_info))
1892	{
1893	loop_vec_info loop_vinfo = as_a<loop_vec_info> (p: m_vinfo);
1894	cost *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
1895	}
1896	return cost;
1897	}
1898
1899	/ See the comment above the declaration for details. /
1900
1901	bool
1902	vector_costs::better_main_loop_than_p (const vector_costs other) const*
1903	{
1904	int diff = compare_inside_loop_cost (other);
1905	if (diff != `0`)
1906	return diff < `0`;
1907
1908	/ If there's nothing to choose between the loop bodies, see whether*
1909	there's a difference in the prologue and epilogue costs. /*
1910	diff = compare_outside_loop_cost (other);
1911	if (diff != `0`)
1912	return diff < `0`;
1913
1914	return false;
1915	}
1916
1917
1918	/ See the comment above the declaration for details. /
1919
1920	bool
1921	vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
1922	loop_vec_info main_loop) const
1923	{
1924	loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (p: this->m_vinfo);
1925	loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (p: other->m_vinfo);
1926
1927	poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
1928	poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
1929
1930	poly_uint64 main_poly_vf = LOOP_VINFO_VECT_FACTOR (main_loop);
1931	unsigned HOST_WIDE_INT main_vf;
1932	unsigned HOST_WIDE_INT other_factor, this_factor, other_cost, this_cost;
1933	/ If we can determine how many iterations are left for the epilogue*
1934	loop, that is if both the main loop's vectorization factor and number
1935	of iterations are constant, then we use them to calculate the cost of
1936	the epilogue loop together with a 'likely value' for the epilogues
1937	vectorization factor. Otherwise we use the main loop's vectorization
1938	factor and the maximum poly value for the epilogue's. If the target
1939	has not provided with a sensible upper bound poly vectorization
1940	factors are likely to be favored over constant ones. /*
1941	if (main_poly_vf.is_constant (const_value: &main_vf)
1942	&& LOOP_VINFO_NITERS_KNOWN_P (main_loop))
1943	{
1944	unsigned HOST_WIDE_INT niters
1945	= LOOP_VINFO_INT_NITERS (main_loop) % main_vf;
1946	HOST_WIDE_INT other_likely_vf
1947	= estimated_poly_value (x: other_vf, kind: POLY_VALUE_LIKELY);
1948	HOST_WIDE_INT this_likely_vf
1949	= estimated_poly_value (x: this_vf, kind: POLY_VALUE_LIKELY);
1950
1951	/ If the epilogue is using partial vectors we account for the*
1952	partial iteration here too. /*
1953	other_factor = niters / other_likely_vf;
1954	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
1955	&& niters % other_likely_vf != `0`)
1956	other_factor++;
1957
1958	this_factor = niters / this_likely_vf;
1959	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo)
1960	&& niters % this_likely_vf != `0`)
1961	this_factor++;
1962	}
1963	else
1964	{
1965	unsigned HOST_WIDE_INT main_vf_max
1966	= estimated_poly_value (x: main_poly_vf, kind: POLY_VALUE_MAX);
1967	unsigned HOST_WIDE_INT other_vf_max
1968	= estimated_poly_value (x: other_vf, kind: POLY_VALUE_MAX);
1969	unsigned HOST_WIDE_INT this_vf_max
1970	= estimated_poly_value (x: this_vf, kind: POLY_VALUE_MAX);
1971
1972	other_factor = CEIL (main_vf_max, other_vf_max);
1973	this_factor = CEIL (main_vf_max, this_vf_max);
1974
1975	/ If the loop is not using partial vectors then it will iterate one*
1976	time less than one that does. It is safe to subtract one here,
1977	because the main loop's vf is always at least 2x bigger than that
1978	of an epilogue. /*
1979	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo))
1980	other_factor -= `1`;
1981	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (this_loop_vinfo))
1982	this_factor -= `1`;
1983	}
1984
1985	/ Compute the costs by multiplying the inside costs with the factor and*
1986	add the outside costs for a more complete picture. The factor is the
1987	amount of times we are expecting to iterate this epilogue. /*
1988	other_cost = other->body_cost () * other_factor;
1989	this_cost = this->body_cost () * this_factor;
1990	other_cost += other->outside_cost ();
1991	this_cost += this->outside_cost ();
1992	return this_cost < other_cost;
1993	}
1994
1995	/ A <=>-style subroutine of better_main_loop_than_p. Check whether we can*
1996	determine the return value of better_main_loop_than_p by comparing the
1997	inside (loop body) costs of THIS and OTHER. Return:
1998
1999	* -1 if better_main_loop_than_p should return true.
2000	* 1 if better_main_loop_than_p should return false.
2001	* 0 if we can't decide. */
2002
2003	int
2004	vector_costs::compare_inside_loop_cost (const vector_costs other) const*
2005	{
2006	loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (p: this->m_vinfo);
2007	loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (p: other->m_vinfo);
2008
2009	struct loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
2010	gcc_assert (LOOP_VINFO_LOOP (other_loop_vinfo) == loop);
2011
2012	poly_int64 this_vf = LOOP_VINFO_VECT_FACTOR (this_loop_vinfo);
2013	poly_int64 other_vf = LOOP_VINFO_VECT_FACTOR (other_loop_vinfo);
2014
2015	/ Limit the VFs to what is likely to be the maximum number of iterations,*
2016	to handle cases in which at least one loop_vinfo is fully-masked. /*
2017	HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
2018	if (estimated_max_niter != -`1`)
2019	{
2020	if (estimated_poly_value (x: this_vf, kind: POLY_VALUE_MIN)
2021	>= estimated_max_niter)
2022	this_vf = estimated_max_niter;
2023	if (estimated_poly_value (x: other_vf, kind: POLY_VALUE_MIN)
2024	>= estimated_max_niter)
2025	other_vf = estimated_max_niter;
2026	}
2027
2028	/ Check whether the (fractional) cost per scalar iteration is lower or*
2029	higher: this_inside_cost / this_vf vs. other_inside_cost / other_vf. /*
2030	poly_int64 rel_this = this_loop_vinfo->vector_costs->body_cost () * other_vf;
2031	poly_int64 rel_other
2032	= other_loop_vinfo->vector_costs->body_cost () * this_vf;
2033
2034	HOST_WIDE_INT est_rel_this_min
2035	= estimated_poly_value (x: rel_this, kind: POLY_VALUE_MIN);
2036	HOST_WIDE_INT est_rel_this_max
2037	= estimated_poly_value (x: rel_this, kind: POLY_VALUE_MAX);
2038
2039	HOST_WIDE_INT est_rel_other_min
2040	= estimated_poly_value (x: rel_other, kind: POLY_VALUE_MIN);
2041	HOST_WIDE_INT est_rel_other_max
2042	= estimated_poly_value (x: rel_other, kind: POLY_VALUE_MAX);
2043
2044	/ Check first if we can make out an unambigous total order from the minimum*
2045	and maximum estimates. /*
2046	if (est_rel_this_min < est_rel_other_min
2047	&& est_rel_this_max < est_rel_other_max)
2048	return -`1`;
2049
2050	if (est_rel_other_min < est_rel_this_min
2051	&& est_rel_other_max < est_rel_this_max)
2052	return `1`;
2053
2054	/ When other_loop_vinfo uses a variable vectorization factor,*
2055	we know that it has a lower cost for at least one runtime VF.
2056	However, we don't know how likely that VF is.
2057
2058	One option would be to compare the costs for the estimated VFs.
2059	The problem is that that can put too much pressure on the cost
2060	model. E.g. if the estimated VF is also the lowest possible VF,
2061	and if other_loop_vinfo is 1 unit worse than this_loop_vinfo
2062	for the estimated VF, we'd then choose this_loop_vinfo even
2063	though (a) this_loop_vinfo might not actually be better than
2064	other_loop_vinfo for that VF and (b) it would be significantly
2065	worse at larger VFs.
2066
2067	Here we go for a hacky compromise: pick this_loop_vinfo if it is
2068	no more expensive than other_loop_vinfo even after doubling the
2069	estimated other_loop_vinfo VF. For all but trivial loops, this
2070	ensures that we only pick this_loop_vinfo if it is significantly
2071	better than other_loop_vinfo at the estimated VF. /*
2072	if (est_rel_other_min != est_rel_this_min
2073	\|\| est_rel_other_max != est_rel_this_max)
2074	{
2075	HOST_WIDE_INT est_rel_this_likely
2076	= estimated_poly_value (x: rel_this, kind: POLY_VALUE_LIKELY);
2077	HOST_WIDE_INT est_rel_other_likely
2078	= estimated_poly_value (x: rel_other, kind: POLY_VALUE_LIKELY);
2079
2080	return est_rel_this_likely * `2` <= est_rel_other_likely ? -`1` : `1`;
2081	}
2082
2083	return `0`;
2084	}
2085
2086	/ A <=>-style subroutine of better_main_loop_than_p, used when there is*
2087	nothing to choose between the inside (loop body) costs of THIS and OTHER.
2088	Check whether we can determine the return value of better_main_loop_than_p
2089	by comparing the outside (prologue and epilogue) costs of THIS and OTHER.
2090	Return:
2091
2092	* -1 if better_main_loop_than_p should return true.
2093	* 1 if better_main_loop_than_p should return false.
2094	* 0 if we can't decide. */
2095
2096	int
2097	vector_costs::compare_outside_loop_cost (const vector_costs other) const*
2098	{
2099	auto this_outside_cost = this->outside_cost ();
2100	auto other_outside_cost = other->outside_cost ();
2101	if (this_outside_cost != other_outside_cost)
2102	return this_outside_cost < other_outside_cost ? -`1` : `1`;
2103
2104	return `0`;
2105	}
2106

source code of gcc/tree-vectorizer.cc